diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index f7710589c5..1926ad5bf4 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -185,6 +185,7 @@ jobs:
       cpp-unit-tests-run-all: ${{ steps.determine.outputs.cpp-unit-tests-run-all }}
       cpp-unit-tests-components: ${{ steps.determine.outputs.cpp-unit-tests-components }}
       component-test-batches: ${{ steps.determine.outputs.component-test-batches }}
+      benchmarks: ${{ steps.determine.outputs.benchmarks }}
     steps:
       - name: Check out code from GitHub
         uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -227,6 +228,7 @@ jobs:
           echo "cpp-unit-tests-run-all=$(echo "$output" | jq -r '.cpp_unit_tests_run_all')" >> $GITHUB_OUTPUT
           echo "cpp-unit-tests-components=$(echo "$output" | jq -c '.cpp_unit_tests_components')" >> $GITHUB_OUTPUT
           echo "component-test-batches=$(echo "$output" | jq -c '.component_test_batches')" >> $GITHUB_OUTPUT
+          echo "benchmarks=$(echo "$output" | jq -r '.benchmarks')" >> $GITHUB_OUTPUT
       - name: Save components graph cache
         if: github.ref == 'refs/heads/dev'
         uses: actions/cache/save@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3
@@ -308,6 +310,38 @@ jobs:
             script/cpp_unit_test.py $ARGS
           fi
 
+  benchmarks:
+    name: Run CodSpeed benchmarks
+    runs-on: ubuntu-24.04
+    needs:
+      - common
+      - determine-jobs
+    if: github.event_name == 'pull_request' && needs.determine-jobs.outputs.benchmarks == 'true'
+    steps:
+      - name: Check out code from GitHub
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+
+      - name: Restore Python
+        uses: ./.github/actions/restore-python
+        with:
+          python-version: ${{ env.DEFAULT_PYTHON }}
+          cache-key: ${{ needs.common.outputs.cache-key }}
+
+      - name: Build benchmarks
+        id: build
+        run: |
+          . venv/bin/activate
+          export BENCHMARK_LIB_CONFIG=$(python script/setup_codspeed_lib.py)
+          # --build-only prints BUILD_BINARY=<path> to stdout
+          BINARY=$(script/cpp_benchmark.py --all --build-only | grep '^BUILD_BINARY=' | tail -1 | cut -d= -f2-)
+          echo "binary=$BINARY" >> $GITHUB_OUTPUT
+
+      - name: Run CodSpeed benchmarks
+        uses: CodSpeedHQ/action@281164b0f014a4e7badd2c02cecad9b595b70537 # v4
+        with:
+          run: ${{ steps.build.outputs.binary }}
+          mode: simulation
+
   clang-tidy-single:
     name: ${{ matrix.name }}
     runs-on: ubuntu-24.04
diff --git a/esphome/core/component.h b/esphome/core/component.h
index 5fdf23e128..557ba09bbc 100644
--- a/esphome/core/component.h
+++ b/esphome/core/component.h
@@ -598,9 +598,11 @@ class WarnIfComponentBlockingGuard {
 #ifdef USE_RUNTIME_STATS
     this->record_runtime_stats_();
 #endif
+#ifndef USE_BENCHMARK
     if (blocking_time > WARN_IF_BLOCKING_OVER_MS) [[unlikely]] {
       warn_blocking(this->component_, blocking_time);
     }
+#endif
     return curr_time;
   }
 
diff --git a/script/clang-tidy b/script/clang-tidy
index 9c2899026d..f2834b44ac 100755
--- a/script/clang-tidy
+++ b/script/clang-tidy
@@ -231,6 +231,9 @@ def main():
 
     cwd = os.getcwd()
     files = [os.path.relpath(path, cwd) for path in git_ls_files(["*.cpp"])]
+    # Exclude benchmark files — they require google benchmark headers not
+    # available in the ESP32 toolchain and use different naming conventions.
+    files = [f for f in files if not f.startswith("tests/benchmarks/")]
 
     # Print initial file count if it's large
     if len(files) > 50:
diff --git a/script/cpp_benchmark.py b/script/cpp_benchmark.py
new file mode 100755
index 0000000000..bd92266ea6
--- /dev/null
+++ b/script/cpp_benchmark.py
@@ -0,0 +1,130 @@
+#!/usr/bin/env python3
+"""Build and run C++ benchmarks for ESPHome components using Google Benchmark."""
+
+import argparse
+import json
+import os
+from pathlib import Path
+import sys
+
+from helpers import root_path
+from test_helpers import (
+    BASE_CODEGEN_COMPONENTS,
+    PLATFORMIO_GOOGLE_BENCHMARK_LIB,
+    USE_TIME_TIMEZONE_FLAG,
+    build_and_run,
+)
+
+# Path to /tests/benchmarks/components
+BENCHMARKS_DIR: Path = Path(root_path) / "tests" / "benchmarks" / "components"
+
+# Path to /tests/benchmarks/core (always included, not a component)
+CORE_BENCHMARKS_DIR: Path = Path(root_path) / "tests" / "benchmarks" / "core"
+
+# Additional codegen components beyond the base set.
+# json is needed because its to_code adds the ArduinoJson library
+# (auto-loaded by api, but cpp_testing suppresses to_code unless listed).
+BENCHMARK_CODEGEN_COMPONENTS = BASE_CODEGEN_COMPONENTS | {"json"}
+
+PLATFORMIO_OPTIONS = {
+    "build_unflags": [
+        "-Os",  # remove default size-opt
+    ],
+    "build_flags": [
+        "-O2",  # optimize for speed (CodSpeed recommends RelWithDebInfo)
+        "-g",  # debug symbols for profiling
+        USE_TIME_TIMEZONE_FLAG,
+        "-DUSE_BENCHMARK",  # disable WarnIfComponentBlockingGuard in finish()
+    ],
+    # Use deep+ LDF mode to ensure PlatformIO detects the benchmark
+    # library dependency from nested includes.
+    "lib_ldf_mode": "deep+",
+}
+
+
+def run_benchmarks(selected_components: list[str], build_only: bool = False) -> int:
+    # Allow CI to override the benchmark library (e.g. with CodSpeed's fork).
+    # BENCHMARK_LIB_CONFIG is a JSON string from setup_codspeed_lib.py
+    # containing {"lib_path": "/path/to/google_benchmark"}.
+    lib_config_json = os.environ.get("BENCHMARK_LIB_CONFIG")
+
+    pio_options = PLATFORMIO_OPTIONS
+    if lib_config_json:
+        lib_config = json.loads(lib_config_json)
+        benchmark_lib = f"benchmark=symlink://{lib_config['lib_path']}"
+        # These defines must be global (not just in library.json) because
+        # benchmark.h uses #ifdef CODSPEED_ENABLED to switch benchmark
+        # registration to CodSpeed-instrumented variants, and
+        # CODSPEED_ROOT_DIR is used to display relative file paths in reports.
+        project_root = Path(__file__).resolve().parent.parent
+        codspeed_flags = [
+            "-DNDEBUG",
+            "-DCODSPEED_ENABLED",
+            "-DCODSPEED_ANALYSIS",
+            f'-DCODSPEED_ROOT_DIR=\\"{project_root}\\"',
+        ]
+        pio_options = {
+            **PLATFORMIO_OPTIONS,
+            "build_flags": PLATFORMIO_OPTIONS["build_flags"] + codspeed_flags,
+        }
+    else:
+        benchmark_lib = PLATFORMIO_GOOGLE_BENCHMARK_LIB
+
+    return build_and_run(
+        selected_components=selected_components,
+        tests_dir=BENCHMARKS_DIR,
+        codegen_components=BENCHMARK_CODEGEN_COMPONENTS,
+        config_prefix="cppbench",
+        friendly_name="CPP Benchmarks",
+        libraries=benchmark_lib,
+        platformio_options=pio_options,
+        main_entry="main.cpp",
+        label="benchmarks",
+        build_only=build_only,
+        extra_include_dirs=[CORE_BENCHMARKS_DIR],
+    )
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Build and run C++ benchmarks for ESPHome components."
+    )
+    parser.add_argument(
+        "components",
+        nargs="*",
+        help="List of components to benchmark (must have files in tests/benchmarks/components/).",
+    )
+    parser.add_argument(
+        "--all",
+        action="store_true",
+        help="Benchmark all components with benchmark files.",
+    )
+    parser.add_argument(
+        "--build-only",
+        action="store_true",
+        help="Only build, print binary path without running.",
+    )
+
+    args = parser.parse_args()
+
+    if args.all:
+        # Find all component directories that have .cpp files
+        components: list[str] = (
+            sorted(
+                d.name
+                for d in BENCHMARKS_DIR.iterdir()
+                if d.is_dir()
+                and d.name != "__pycache__"
+                and (any(d.glob("*.cpp")) or any(d.glob("*.h")))
+            )
+            if BENCHMARKS_DIR.is_dir()
+            else []
+        )
+    else:
+        components: list[str] = args.components
+
+    sys.exit(run_benchmarks(components, build_only=args.build_only))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/script/determine-jobs.py b/script/determine-jobs.py
index 6808a3cf6c..ad08f8dce5 100755
--- a/script/determine-jobs.py
+++ b/script/determine-jobs.py
@@ -381,6 +381,63 @@ def determine_cpp_unit_tests(
     return (False, get_cpp_changed_components(cpp_files))
 
 
+# Paths within tests/benchmarks/ that contain component benchmark files
+BENCHMARKS_COMPONENTS_PATH = "tests/benchmarks/components"
+
+# Files that, when changed, should trigger benchmark runs
+BENCHMARK_INFRASTRUCTURE_FILES = frozenset(
+    {
+        "script/cpp_benchmark.py",
+        "script/test_helpers.py",
+        "script/setup_codspeed_lib.py",
+    }
+)
+
+
+def should_run_benchmarks(branch: str | None = None) -> bool:
+    """Determine if C++ benchmarks should run based on changed files.
+
+    Benchmarks run when any of the following conditions are met:
+
+    1. Core C++ files changed (esphome/core/*)
+    2. A directly changed component has benchmark files (no dependency expansion)
+    3. Benchmark infrastructure changed (tests/benchmarks/*, script/cpp_benchmark.py,
+       script/test_helpers.py, script/setup_codspeed_lib.py)
+
+    Unlike unit tests, benchmarks do NOT expand to dependent components.
+    Changing ``sensor`` does not trigger ``api`` benchmarks just because
+    api depends on sensor.
+
+    Args:
+        branch: Branch to compare against. If None, uses default.
+
+    Returns:
+        True if benchmarks should run, False otherwise.
+    """
+    files = changed_files(branch)
+    if core_changed(files):
+        return True
+
+    # Check if benchmark infrastructure changed
+    if any(
+        f.startswith("tests/benchmarks/") or f in BENCHMARK_INFRASTRUCTURE_FILES
+        for f in files
+    ):
+        return True
+
+    # Check if any directly changed component has benchmarks
+    benchmarks_dir = Path(root_path) / BENCHMARKS_COMPONENTS_PATH
+    if not benchmarks_dir.is_dir():
+        return False
+    benchmarked_components = {
+        d.name
+        for d in benchmarks_dir.iterdir()
+        if d.is_dir() and (any(d.glob("*.cpp")) or any(d.glob("*.h")))
+    }
+    # Only direct changes — no dependency expansion
+    return any(get_component_from_path(f) in benchmarked_components for f in files)
+
+
 def _any_changed_file_endswith(branch: str | None, extensions: tuple[str, ...]) -> bool:
     """Check if a changed file ends with any of the specified extensions."""
     return any(file.endswith(extensions) for file in changed_files(branch))
@@ -804,6 +861,9 @@ def main() -> None:
     # Determine which C++ unit tests to run
     cpp_run_all, cpp_components = determine_cpp_unit_tests(args.branch)
 
+    # Determine if benchmarks should run
+    run_benchmarks = should_run_benchmarks(args.branch)
+
     # Split components into batches for CI testing
     # This intelligently groups components with similar bus configurations
     component_test_batches: list[str]
@@ -856,6 +916,7 @@ def main() -> None:
         "cpp_unit_tests_run_all": cpp_run_all,
         "cpp_unit_tests_components": cpp_components,
         "component_test_batches": component_test_batches,
+        "benchmarks": run_benchmarks,
     }
 
     # Output as JSON
diff --git a/script/setup_codspeed_lib.py b/script/setup_codspeed_lib.py
new file mode 100755
index 0000000000..959c89d05b
--- /dev/null
+++ b/script/setup_codspeed_lib.py
@@ -0,0 +1,231 @@
+#!/usr/bin/env python3
+"""Set up CodSpeed's google_benchmark fork as a PlatformIO library.
+
+CodSpeed requires their codspeed-cpp fork for CPU simulation instrumentation.
+This script clones the repo and assembles a flat PlatformIO-compatible library
+by combining google_benchmark sources, codspeed core, and instrument-hooks.
+
+PlatformIO quirks addressed:
+  - .cc files renamed to .cpp (PlatformIO ignores .cc)
+  - All sources merged into one src/ dir (PlatformIO can't compile from
+    multiple source directories in a single library)
+  - library.json created with required CodSpeed preprocessor defines
+
+Usage:
+    python script/setup_codspeed_lib.py [--output-dir DIR]
+
+Prints JSON to stdout with lib_path for cpp_benchmark.py.
+Git output goes to stderr.
+
+See https://codspeed.io/docs/benchmarks/cpp#custom-build-systems
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+from pathlib import Path
+import shutil
+import subprocess
+import sys
+
+# Pin to a specific release for reproducibility
+CODSPEED_CPP_REPO = "https://github.com/CodSpeedHQ/codspeed-cpp.git"
+CODSPEED_CPP_SHA = "e633aca00da3d0ad14e7bf424d9cb47165a29028"  # v2.1.0
+
+DEFAULT_OUTPUT_DIR = "/tmp/codspeed-cpp"
+
+# Well-known paths within the codspeed-cpp repository
+GOOGLE_BENCHMARK_SUBDIR = "google_benchmark"
+CORE_SUBDIR = "core"
+INSTRUMENT_HOOKS_SUBDIR = Path(CORE_SUBDIR) / "instrument-hooks"
+INSTRUMENT_HOOKS_INCLUDES = INSTRUMENT_HOOKS_SUBDIR / "includes"
+INSTRUMENT_HOOKS_DIST = INSTRUMENT_HOOKS_SUBDIR / "dist" / "core.c"
+CORE_CMAKE = Path(CORE_SUBDIR) / "CMakeLists.txt"
+
+
+def _git(args: list[str], **kwargs: object) -> None:
+    """Run a git command, sending output to stderr."""
+    subprocess.run(
+        ["git", *args],
+        check=True,
+        stdout=kwargs.pop("stdout", sys.stderr),
+        stderr=kwargs.pop("stderr", sys.stderr),
+        **kwargs,
+    )
+
+
+def _clone_repo(output_dir: Path) -> None:
+    """Shallow-clone codspeed-cpp at the pinned SHA with submodules."""
+    output_dir.mkdir(parents=True, exist_ok=True)
+    _git(["init", str(output_dir)])
+    _git(["-C", str(output_dir), "remote", "add", "origin", CODSPEED_CPP_REPO])
+    _git(["-C", str(output_dir), "fetch", "--depth", "1", "origin", CODSPEED_CPP_SHA])
+    _git(
+        ["-C", str(output_dir), "checkout", "FETCH_HEAD"],
+        stdout=subprocess.DEVNULL,
+        stderr=subprocess.DEVNULL,
+    )
+    _git(
+        [
+            "-C",
+            str(output_dir),
+            "submodule",
+            "update",
+            "--init",
+            "--recursive",
+            "--depth",
+            "1",
+        ]
+    )
+
+
+def _read_codspeed_version(cmake_path: Path) -> str:
+    """Extract CODSPEED_VERSION from core/CMakeLists.txt."""
+    if not cmake_path.exists():
+        return "0.0.0"
+    for line in cmake_path.read_text().splitlines():
+        if line.startswith("set(CODSPEED_VERSION"):
+            return line.split()[1].rstrip(")")
+    return "0.0.0"
+
+
+def _rename_cc_to_cpp(src_dir: Path) -> None:
+    """Rename .cc files to .cpp so PlatformIO compiles them."""
+    for cc_file in src_dir.glob("*.cc"):
+        cpp_file = cc_file.with_suffix(".cpp")
+        if not cpp_file.exists():
+            cc_file.rename(cpp_file)
+
+
+def _copy_if_missing(src: Path, dest: Path) -> None:
+    """Copy a file only if the destination doesn't already exist."""
+    if not dest.exists():
+        shutil.copy2(src, dest)
+
+
+def _merge_codspeed_core_into_lib(core_src: Path, lib_src: Path) -> None:
+    """Copy codspeed core sources into the benchmark library src/.
+
+    .cpp files get a ``codspeed_`` prefix to avoid name collisions with
+    google_benchmark's own sources.  .h files keep their original names
+    since they're referenced by ``#include "walltime.h"`` etc.
+    """
+    for src_file in core_src.iterdir():
+        if src_file.suffix == ".cpp":
+            _copy_if_missing(src_file, lib_src / f"codspeed_{src_file.name}")
+        elif src_file.suffix == ".h":
+            _copy_if_missing(src_file, lib_src / src_file.name)
+
+
+def _write_library_json(
+    benchmark_dir: Path,
+    core_include: Path,
+    hooks_include: Path,
+    version: str,
+    project_root: Path,
+) -> None:
+    """Write a PlatformIO library.json with CodSpeed build flags."""
+    library_json = {
+        "name": "benchmark",
+        "version": "0.0.0",
+        "build": {
+            "flags": [
+                f"-I{core_include}",
+                f"-I{hooks_include}",
+                # google benchmark build flags
+                # -O2 is critical: without it, instrument_hooks_start_benchmark_inline
+                # doesn't get inlined and shows up as overhead in profiles
+                "-O2",
+                "-DNDEBUG",
+                "-DHAVE_STD_REGEX",
+                "-DHAVE_STEADY_CLOCK",
+                "-DBENCHMARK_STATIC_DEFINE",
+                # CodSpeed instrumentation flags
+                # https://codspeed.io/docs/benchmarks/cpp#custom-build-systems
+                "-DCODSPEED_ENABLED",
+                "-DCODSPEED_ANALYSIS",
+                f'-DCODSPEED_VERSION=\\"{version}\\"',
+                f'-DCODSPEED_ROOT_DIR=\\"{project_root}\\"',
+                '-DCODSPEED_MODE_DISPLAY=\\"simulation\\"',
+            ],
+            "includeDir": "include",
+        },
+    }
+    (benchmark_dir / "library.json").write_text(
+        json.dumps(library_json, indent=2) + "\n"
+    )
+
+
+def setup_codspeed_lib(output_dir: Path) -> None:
+    """Clone codspeed-cpp and assemble a flat PlatformIO library.
+
+    The resulting library at ``output_dir/google_benchmark/`` contains:
+      - google_benchmark sources (.cc renamed to .cpp)
+      - codspeed core sources (prefixed ``codspeed_``)
+      - instrument-hooks C source (as ``instrument_hooks.c``)
+      - library.json with all required CodSpeed defines
+
+    Args:
+        output_dir: Directory to clone the repository into
+    """
+    if not (output_dir / ".git").exists():
+        _clone_repo(output_dir)
+    else:
+        # Verify the existing checkout matches the pinned SHA
+        result = subprocess.run(
+            ["git", "-C", str(output_dir), "rev-parse", "HEAD"],
+            capture_output=True,
+            text=True,
+            check=False,
+        )
+        if result.returncode != 0 or result.stdout.strip() != CODSPEED_CPP_SHA:
+            print(
+                f"Stale codspeed-cpp checkout, re-cloning at {CODSPEED_CPP_SHA}",
+                file=sys.stderr,
+            )
+            shutil.rmtree(output_dir)
+            _clone_repo(output_dir)
+
+    benchmark_dir = output_dir / GOOGLE_BENCHMARK_SUBDIR
+    lib_src = benchmark_dir / "src"
+    core_dir = output_dir / CORE_SUBDIR
+    core_include = core_dir / "include"
+    hooks_include = output_dir / INSTRUMENT_HOOKS_INCLUDES
+    hooks_dist_c = output_dir / INSTRUMENT_HOOKS_DIST
+    project_root = Path(__file__).resolve().parent.parent
+
+    # 1. Rename .cc → .cpp (PlatformIO doesn't compile .cc)
+    _rename_cc_to_cpp(lib_src)
+
+    # 2. Merge codspeed core sources into the library
+    _merge_codspeed_core_into_lib(core_dir / "src", lib_src)
+
+    # 3. Copy instrument-hooks C source (provides instrument_hooks_* symbols)
+    if hooks_dist_c.exists():
+        _copy_if_missing(hooks_dist_c, lib_src / "instrument_hooks.c")
+
+    # 4. Write library.json
+    version = _read_codspeed_version(output_dir / CORE_CMAKE)
+    _write_library_json(
+        benchmark_dir, core_include, hooks_include, version, project_root
+    )
+
+    # Output JSON config for cpp_benchmark.py
+    print(json.dumps({"lib_path": str(benchmark_dir)}))
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--output-dir",
+        type=Path,
+        default=Path(DEFAULT_OUTPUT_DIR),
+        help=f"Directory to clone codspeed-cpp into (default: {DEFAULT_OUTPUT_DIR})",
+    )
+    args = parser.parse_args()
+    setup_codspeed_lib(args.output_dir)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/benchmarks/components/.gitignore b/tests/benchmarks/components/.gitignore
new file mode 100644
index 0000000000..163bec7b80
--- /dev/null
+++ b/tests/benchmarks/components/.gitignore
@@ -0,0 +1,2 @@
+/.esphome/
+/secrets.yaml
diff --git a/tests/benchmarks/components/api/bench_proto_decode.cpp b/tests/benchmarks/components/api/bench_proto_decode.cpp
new file mode 100644
index 0000000000..113201dd8a
--- /dev/null
+++ b/tests/benchmarks/components/api/bench_proto_decode.cpp
@@ -0,0 +1,93 @@
+#include <benchmark/benchmark.h>
+
+#include "esphome/components/api/api_pb2.h"
+#include "esphome/components/api/api_buffer.h"
+
+namespace esphome::api::benchmarks {
+
+// Inner iteration count to amortize CodSpeed instrumentation overhead.
+// Without this, the ~60ns per-iteration valgrind start/stop cost dominates
+// sub-microsecond benchmarks.
+static constexpr int kInnerIterations = 2000;
+
+// Helper: encode a message into a buffer and return it.
+// Benchmarks encode once in setup, then decode the resulting bytes in a loop.
+// This keeps decode benchmarks in sync with the actual protobuf schema —
+// hand-encoded byte arrays would silently break when fields change.
+template<typename T> static APIBuffer encode_message(const T &msg) {
+  APIBuffer buffer;
+  uint32_t size = msg.calculate_size();
+  buffer.resize(size);
+  ProtoWriteBuffer writer(&buffer, 0);
+  msg.encode(writer);
+  return buffer;
+}
+
+// --- HelloRequest decode (string + varint fields) ---
+
+static void Decode_HelloRequest(benchmark::State &state) {
+  HelloRequest source;
+  source.client_info = StringRef::from_lit("aioesphomeapi");
+  source.api_version_major = 1;
+  source.api_version_minor = 10;
+  auto encoded = encode_message(source);
+
+  for (auto _ : state) {
+    HelloRequest msg;
+    for (int i = 0; i < kInnerIterations; i++) {
+      msg.decode(encoded.data(), encoded.size());
+    }
+    benchmark::DoNotOptimize(msg.api_version_major);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(Decode_HelloRequest);
+
+// --- SwitchCommandRequest decode (simple command) ---
+
+static void Decode_SwitchCommandRequest(benchmark::State &state) {
+  SwitchCommandRequest source;
+  source.key = 0x12345678;
+  source.state = true;
+  auto encoded = encode_message(source);
+
+  for (auto _ : state) {
+    SwitchCommandRequest msg;
+    for (int i = 0; i < kInnerIterations; i++) {
+      msg.decode(encoded.data(), encoded.size());
+    }
+    benchmark::DoNotOptimize(msg.state);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(Decode_SwitchCommandRequest);
+
+// --- LightCommandRequest decode (complex command with many fields) ---
+
+static void Decode_LightCommandRequest(benchmark::State &state) {
+  LightCommandRequest source;
+  source.key = 0x11223344;
+  source.has_state = true;
+  source.state = true;
+  source.has_brightness = true;
+  source.brightness = 0.8f;
+  source.has_rgb = true;
+  source.red = 1.0f;
+  source.green = 0.5f;
+  source.blue = 0.2f;
+  source.has_effect = true;
+  source.effect = StringRef::from_lit("rainbow");
+  auto encoded = encode_message(source);
+
+  for (auto _ : state) {
+    LightCommandRequest msg;
+    for (int i = 0; i < kInnerIterations; i++) {
+      msg.decode(encoded.data(), encoded.size());
+    }
+    benchmark::DoNotOptimize(msg.brightness);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(Decode_LightCommandRequest);
+
+}  // namespace esphome::api::benchmarks
diff --git a/tests/benchmarks/components/api/bench_proto_encode.cpp b/tests/benchmarks/components/api/bench_proto_encode.cpp
new file mode 100644
index 0000000000..656c1e17db
--- /dev/null
+++ b/tests/benchmarks/components/api/bench_proto_encode.cpp
@@ -0,0 +1,298 @@
+#include <benchmark/benchmark.h>
+
+#include "esphome/components/api/api_pb2.h"
+#include "esphome/components/api/api_buffer.h"
+
+namespace esphome::api::benchmarks {
+
+// Inner iteration count to amortize CodSpeed instrumentation overhead.
+// Without this, the ~60ns per-iteration valgrind start/stop cost dominates
+// sub-microsecond benchmarks.
+static constexpr int kInnerIterations = 2000;
+
+// --- SensorStateResponse (highest frequency message) ---
+
+static void Encode_SensorStateResponse(benchmark::State &state) {
+  APIBuffer buffer;
+  SensorStateResponse msg;
+  msg.key = 0x12345678;
+  msg.state = 23.5f;
+  msg.missing_state = false;
+  uint32_t size = msg.calculate_size();
+  buffer.resize(size);
+
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      ProtoWriteBuffer writer(&buffer, 0);
+      msg.encode(writer);
+    }
+    benchmark::DoNotOptimize(buffer.data());
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(Encode_SensorStateResponse);
+
+static void CalculateSize_SensorStateResponse(benchmark::State &state) {
+  SensorStateResponse msg;
+  msg.key = 0x12345678;
+  msg.state = 23.5f;
+  msg.missing_state = false;
+
+  for (auto _ : state) {
+    uint32_t result = 0;
+    for (int i = 0; i < kInnerIterations; i++) {
+      result += msg.calculate_size();
+    }
+    benchmark::DoNotOptimize(result);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(CalculateSize_SensorStateResponse);
+
+// Steady state: buffer already allocated from previous iteration
+static void CalcAndEncode_SensorStateResponse(benchmark::State &state) {
+  APIBuffer buffer;
+  SensorStateResponse msg;
+  msg.key = 0x12345678;
+  msg.state = 23.5f;
+  msg.missing_state = false;
+
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      uint32_t size = msg.calculate_size();
+      buffer.resize(size);
+      ProtoWriteBuffer writer(&buffer, 0);
+      msg.encode(writer);
+    }
+    benchmark::DoNotOptimize(buffer.data());
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(CalcAndEncode_SensorStateResponse);
+
+// Cold path: fresh buffer each iteration (measures heap allocation cost).
+// Inner loop still needed to amortize CodSpeed instrumentation overhead.
+// Each inner iteration creates a fresh buffer, so this measures
+// alloc+calc+encode per item.
+static void CalcAndEncode_SensorStateResponse_Fresh(benchmark::State &state) {
+  SensorStateResponse msg;
+  msg.key = 0x12345678;
+  msg.state = 23.5f;
+  msg.missing_state = false;
+
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      APIBuffer buffer;
+      uint32_t size = msg.calculate_size();
+      buffer.resize(size);
+      ProtoWriteBuffer writer(&buffer, 0);
+      msg.encode(writer);
+      benchmark::DoNotOptimize(buffer.data());
+    }
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(CalcAndEncode_SensorStateResponse_Fresh);
+
+// --- BinarySensorStateResponse ---
+
+static void Encode_BinarySensorStateResponse(benchmark::State &state) {
+  APIBuffer buffer;
+  BinarySensorStateResponse msg;
+  msg.key = 0xAABBCCDD;
+  msg.state = true;
+  msg.missing_state = false;
+  uint32_t size = msg.calculate_size();
+  buffer.resize(size);
+
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      ProtoWriteBuffer writer(&buffer, 0);
+      msg.encode(writer);
+    }
+    benchmark::DoNotOptimize(buffer.data());
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(Encode_BinarySensorStateResponse);
+
+// --- HelloResponse (string fields) ---
+
+static void Encode_HelloResponse(benchmark::State &state) {
+  APIBuffer buffer;
+  HelloResponse msg;
+  msg.api_version_major = 1;
+  msg.api_version_minor = 10;
+  msg.server_info = StringRef::from_lit("esphome v2026.3.0");
+  msg.name = StringRef::from_lit("living-room-sensor");
+  uint32_t size = msg.calculate_size();
+  buffer.resize(size);
+
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      ProtoWriteBuffer writer(&buffer, 0);
+      msg.encode(writer);
+    }
+    benchmark::DoNotOptimize(buffer.data());
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(Encode_HelloResponse);
+
+// --- LightStateResponse (complex multi-field message) ---
+
+static void Encode_LightStateResponse(benchmark::State &state) {
+  APIBuffer buffer;
+  LightStateResponse msg;
+  msg.key = 0x11223344;
+  msg.state = true;
+  msg.brightness = 0.8f;
+  msg.color_mode = enums::COLOR_MODE_RGB_WHITE;
+  msg.color_brightness = 1.0f;
+  msg.red = 1.0f;
+  msg.green = 0.5f;
+  msg.blue = 0.2f;
+  msg.white = 0.0f;
+  msg.color_temperature = 4000.0f;
+  msg.cold_white = 0.0f;
+  msg.warm_white = 0.0f;
+  msg.effect = StringRef::from_lit("rainbow");
+  uint32_t size = msg.calculate_size();
+  buffer.resize(size);
+
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      ProtoWriteBuffer writer(&buffer, 0);
+      msg.encode(writer);
+    }
+    benchmark::DoNotOptimize(buffer.data());
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(Encode_LightStateResponse);
+
+static void CalculateSize_LightStateResponse(benchmark::State &state) {
+  LightStateResponse msg;
+  msg.key = 0x11223344;
+  msg.state = true;
+  msg.brightness = 0.8f;
+  msg.color_mode = enums::COLOR_MODE_RGB_WHITE;
+  msg.color_brightness = 1.0f;
+  msg.red = 1.0f;
+  msg.green = 0.5f;
+  msg.blue = 0.2f;
+  msg.white = 0.0f;
+  msg.color_temperature = 4000.0f;
+  msg.cold_white = 0.0f;
+  msg.warm_white = 0.0f;
+  msg.effect = StringRef::from_lit("rainbow");
+
+  for (auto _ : state) {
+    uint32_t result = 0;
+    for (int i = 0; i < kInnerIterations; i++) {
+      result += msg.calculate_size();
+    }
+    benchmark::DoNotOptimize(result);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(CalculateSize_LightStateResponse);
+
+// --- DeviceInfoResponse (nested submessages: 20 devices + 20 areas) ---
+
+static DeviceInfoResponse make_device_info_response() {
+  DeviceInfoResponse msg;
+  msg.name = StringRef::from_lit("living-room-sensor");
+  msg.mac_address = StringRef::from_lit("AA:BB:CC:DD:EE:FF");
+  msg.esphome_version = StringRef::from_lit("2026.3.0");
+  msg.compilation_time = StringRef::from_lit("Mar 16 2026, 12:00:00");
+  msg.model = StringRef::from_lit("esp32-poe-iso");
+  msg.manufacturer = StringRef::from_lit("Olimex");
+  msg.friendly_name = StringRef::from_lit("Living Room Sensor");
+#ifdef USE_DEVICES
+  for (uint32_t i = 0; i < ESPHOME_DEVICE_COUNT && i < 20; i++) {
+    msg.devices[i].device_id = i + 1;
+    msg.devices[i].name = StringRef::from_lit("device");
+    msg.devices[i].area_id = (i % 20) + 1;
+  }
+#endif
+#ifdef USE_AREAS
+  for (uint32_t i = 0; i < ESPHOME_AREA_COUNT && i < 20; i++) {
+    msg.areas[i].area_id = i + 1;
+    msg.areas[i].name = StringRef::from_lit("area");
+  }
+#endif
+  return msg;
+}
+
+static void CalculateSize_DeviceInfoResponse(benchmark::State &state) {
+  auto msg = make_device_info_response();
+
+  for (auto _ : state) {
+    uint32_t result = 0;
+    for (int i = 0; i < kInnerIterations; i++) {
+      result += msg.calculate_size();
+    }
+    benchmark::DoNotOptimize(result);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(CalculateSize_DeviceInfoResponse);
+
+static void Encode_DeviceInfoResponse(benchmark::State &state) {
+  auto msg = make_device_info_response();
+  APIBuffer buffer;
+  uint32_t total_size = msg.calculate_size();
+  buffer.resize(total_size);
+
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      ProtoWriteBuffer writer(&buffer, 0);
+      msg.encode(writer);
+    }
+    benchmark::DoNotOptimize(buffer.data());
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(Encode_DeviceInfoResponse);
+
+// Steady state: buffer already allocated from previous iteration
+static void CalcAndEncode_DeviceInfoResponse(benchmark::State &state) {
+  auto msg = make_device_info_response();
+  APIBuffer buffer;
+
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      uint32_t size = msg.calculate_size();
+      buffer.resize(size);
+      ProtoWriteBuffer writer(&buffer, 0);
+      msg.encode(writer);
+    }
+    benchmark::DoNotOptimize(buffer.data());
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(CalcAndEncode_DeviceInfoResponse);
+
+// Cold path: fresh buffer each iteration (measures heap allocation cost).
+// Inner loop still needed to amortize CodSpeed instrumentation overhead.
+// Each inner iteration creates a fresh buffer, so this measures
+// alloc+calc+encode per item.
+static void CalcAndEncode_DeviceInfoResponse_Fresh(benchmark::State &state) {
+  auto msg = make_device_info_response();
+
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      APIBuffer buffer;
+      uint32_t size = msg.calculate_size();
+      buffer.resize(size);
+      ProtoWriteBuffer writer(&buffer, 0);
+      msg.encode(writer);
+      benchmark::DoNotOptimize(buffer.data());
+    }
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(CalcAndEncode_DeviceInfoResponse_Fresh);
+
+}  // namespace esphome::api::benchmarks
diff --git a/tests/benchmarks/components/api/bench_proto_varint.cpp b/tests/benchmarks/components/api/bench_proto_varint.cpp
new file mode 100644
index 0000000000..0b5ccc2b7d
--- /dev/null
+++ b/tests/benchmarks/components/api/bench_proto_varint.cpp
@@ -0,0 +1,133 @@
+#include <benchmark/benchmark.h>
+
+#include "esphome/components/api/proto.h"
+#include "esphome/components/api/api_buffer.h"
+
+namespace esphome::api::benchmarks {
+
+// Inner iteration count to amortize CodSpeed instrumentation overhead.
+// Without this, the ~60ns per-iteration valgrind start/stop cost dominates
+// sub-microsecond benchmarks.
+static constexpr int kInnerIterations = 2000;
+
+// --- ProtoVarInt::parse() benchmarks ---
+
+static void ProtoVarInt_Parse_SingleByte(benchmark::State &state) {
+  uint8_t buf[] = {0x42};  // value = 66
+
+  for (auto _ : state) {
+    ProtoVarIntResult result{};
+    for (int i = 0; i < kInnerIterations; i++) {
+      result = ProtoVarInt::parse(buf, sizeof(buf));
+    }
+    benchmark::DoNotOptimize(result);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(ProtoVarInt_Parse_SingleByte);
+
+static void ProtoVarInt_Parse_TwoByte(benchmark::State &state) {
+  uint8_t buf[] = {0x80, 0x01};  // value = 128
+
+  for (auto _ : state) {
+    ProtoVarIntResult result{};
+    for (int i = 0; i < kInnerIterations; i++) {
+      result = ProtoVarInt::parse(buf, sizeof(buf));
+    }
+    benchmark::DoNotOptimize(result);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(ProtoVarInt_Parse_TwoByte);
+
+static void ProtoVarInt_Parse_FiveByte(benchmark::State &state) {
+  uint8_t buf[] = {0xFF, 0xFF, 0xFF, 0xFF, 0x0F};
+
+  for (auto _ : state) {
+    ProtoVarIntResult result{};
+    for (int i = 0; i < kInnerIterations; i++) {
+      result = ProtoVarInt::parse(buf, sizeof(buf));
+    }
+    benchmark::DoNotOptimize(result);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(ProtoVarInt_Parse_FiveByte);
+
+// --- Varint encoding benchmarks ---
+
+static void Encode_Varint_Small(benchmark::State &state) {
+  APIBuffer buffer;
+  buffer.resize(16);
+
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      ProtoWriteBuffer writer(&buffer, 0);
+      writer.encode_varint_raw(42);
+    }
+    benchmark::DoNotOptimize(buffer.data());
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(Encode_Varint_Small);
+
+static void Encode_Varint_Large(benchmark::State &state) {
+  APIBuffer buffer;
+  buffer.resize(16);
+
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      ProtoWriteBuffer writer(&buffer, 0);
+      writer.encode_varint_raw(300);
+    }
+    benchmark::DoNotOptimize(buffer.data());
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(Encode_Varint_Large);
+
+static void Encode_Varint_MaxUint32(benchmark::State &state) {
+  APIBuffer buffer;
+  buffer.resize(16);
+
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      ProtoWriteBuffer writer(&buffer, 0);
+      writer.encode_varint_raw(0xFFFFFFFF);
+    }
+    benchmark::DoNotOptimize(buffer.data());
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(Encode_Varint_MaxUint32);
+
+// --- ProtoSize::varint() benchmarks ---
+
+static void ProtoSize_Varint_Small(benchmark::State &state) {
+  // Use varying input to prevent constant folding.
+  // Values 0-127 all take 1 byte but the compiler can't prove that.
+  for (auto _ : state) {
+    uint32_t result = 0;
+    for (int i = 0; i < kInnerIterations; i++) {
+      result += ProtoSize::varint(static_cast<uint32_t>(i) & 0x7F);
+    }
+    benchmark::DoNotOptimize(result);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(ProtoSize_Varint_Small);
+
+static void ProtoSize_Varint_Large(benchmark::State &state) {
+  // Use varying input to prevent constant folding.
+  for (auto _ : state) {
+    uint32_t result = 0;
+    for (int i = 0; i < kInnerIterations; i++) {
+      result += ProtoSize::varint(0xFFFF0000 | static_cast<uint32_t>(i));
+    }
+    benchmark::DoNotOptimize(result);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(ProtoSize_Varint_Large);
+
+}  // namespace esphome::api::benchmarks
diff --git a/tests/benchmarks/components/api/benchmark.yaml b/tests/benchmarks/components/api/benchmark.yaml
new file mode 100644
index 0000000000..bfc24d7440
--- /dev/null
+++ b/tests/benchmarks/components/api/benchmark.yaml
@@ -0,0 +1,114 @@
+# Components needed for API protobuf benchmarks.
+# Merged into the base config before validation so all
+# dependencies get proper defaults.
+#
+# esphome: sub-keys are merged into the base config.
+esphome:
+  areas:
+    - id: area_1
+      name: "Area 1"
+    - id: area_2
+      name: "Area 2"
+    - id: area_3
+      name: "Area 3"
+    - id: area_4
+      name: "Area 4"
+    - id: area_5
+      name: "Area 5"
+    - id: area_6
+      name: "Area 6"
+    - id: area_7
+      name: "Area 7"
+    - id: area_8
+      name: "Area 8"
+    - id: area_9
+      name: "Area 9"
+    - id: area_10
+      name: "Area 10"
+    - id: area_11
+      name: "Area 11"
+    - id: area_12
+      name: "Area 12"
+    - id: area_13
+      name: "Area 13"
+    - id: area_14
+      name: "Area 14"
+    - id: area_15
+      name: "Area 15"
+    - id: area_16
+      name: "Area 16"
+    - id: area_17
+      name: "Area 17"
+    - id: area_18
+      name: "Area 18"
+    - id: area_19
+      name: "Area 19"
+    - id: area_20
+      name: "Area 20"
+  devices:
+    - id: device_1
+      name: "Device 1"
+      area_id: area_1
+    - id: device_2
+      name: "Device 2"
+      area_id: area_2
+    - id: device_3
+      name: "Device 3"
+      area_id: area_3
+    - id: device_4
+      name: "Device 4"
+      area_id: area_4
+    - id: device_5
+      name: "Device 5"
+      area_id: area_5
+    - id: device_6
+      name: "Device 6"
+      area_id: area_6
+    - id: device_7
+      name: "Device 7"
+      area_id: area_7
+    - id: device_8
+      name: "Device 8"
+      area_id: area_8
+    - id: device_9
+      name: "Device 9"
+      area_id: area_9
+    - id: device_10
+      name: "Device 10"
+      area_id: area_10
+    - id: device_11
+      name: "Device 11"
+      area_id: area_11
+    - id: device_12
+      name: "Device 12"
+      area_id: area_12
+    - id: device_13
+      name: "Device 13"
+      area_id: area_13
+    - id: device_14
+      name: "Device 14"
+      area_id: area_14
+    - id: device_15
+      name: "Device 15"
+      area_id: area_15
+    - id: device_16
+      name: "Device 16"
+      area_id: area_16
+    - id: device_17
+      name: "Device 17"
+      area_id: area_17
+    - id: device_18
+      name: "Device 18"
+      area_id: area_18
+    - id: device_19
+      name: "Device 19"
+      area_id: area_19
+    - id: device_20
+      name: "Device 20"
+      area_id: area_20
+
+api:
+sensor:
+binary_sensor:
+light:
+switch:
diff --git a/tests/benchmarks/components/main.cpp b/tests/benchmarks/components/main.cpp
new file mode 100644
index 0000000000..9bc0c31a15
--- /dev/null
+++ b/tests/benchmarks/components/main.cpp
@@ -0,0 +1,42 @@
+#include <benchmark/benchmark.h>
+
+#include "esphome/components/logger/logger.h"
+
+/*
+This special main.cpp provides the entry point for Google Benchmark.
+It replaces the default ESPHome main with a benchmark runner.
+
+*/
+
+// Auto generated code by esphome
+// ========== AUTO GENERATED INCLUDE BLOCK BEGIN ===========
+// ========== AUTO GENERATED INCLUDE BLOCK END ===========
+
+void original_setup() {
+  // Code-generated App initialization (pre_setup, area/device registration, etc.)
+
+  // ========== AUTO GENERATED CODE BEGIN ===========
+  // =========== AUTO GENERATED CODE END ============
+}
+
+void setup() {
+  // Run auto-generated initialization (App.pre_setup, area/device registration,
+  // looping_components_.init, etc.) so benchmarks that use App work correctly.
+  original_setup();
+
+  // Log functions call global_logger->log_vprintf_() without a null check,
+  // so we must set up a Logger before any test that triggers logging.
+  static esphome::logger::Logger test_logger(0);
+  test_logger.set_log_level(ESPHOME_LOG_LEVEL);
+  test_logger.pre_setup();
+
+  int argc = 1;
+  char arg0[] = "benchmark";
+  char *argv[] = {arg0, nullptr};
+  ::benchmark::Initialize(&argc, argv);
+  ::benchmark::RunSpecifiedBenchmarks();
+  ::benchmark::Shutdown();
+  exit(0);
+}
+
+void loop() {}
diff --git a/tests/benchmarks/core/bench_application_loop.cpp b/tests/benchmarks/core/bench_application_loop.cpp
new file mode 100644
index 0000000000..dde78ae739
--- /dev/null
+++ b/tests/benchmarks/core/bench_application_loop.cpp
@@ -0,0 +1,22 @@
+#include <benchmark/benchmark.h>
+
+#include "esphome/core/application.h"
+
+namespace esphome::benchmarks {
+
+// Benchmark Application::loop() with no registered components.
+// App is initialized by original_setup() in main.cpp (code-generated
+// pre_setup, area/device registration, looping_components_.init).
+// This measures the baseline overhead of the main loop: scheduler,
+// timing, before/after loop tasks, and yield_with_select_.
+static void ApplicationLoop_Empty(benchmark::State &state) {
+  // Set loop interval to 0 so yield_with_select_ returns immediately
+  // instead of sleeping. This benchmarks the loop overhead, not the sleep.
+  App.set_loop_interval(0);
+  for (auto _ : state) {
+    App.loop();
+  }
+}
+BENCHMARK(ApplicationLoop_Empty);
+
+}  // namespace esphome::benchmarks
diff --git a/tests/benchmarks/core/bench_helpers.cpp b/tests/benchmarks/core/bench_helpers.cpp
new file mode 100644
index 0000000000..c6e1e6930e
--- /dev/null
+++ b/tests/benchmarks/core/bench_helpers.cpp
@@ -0,0 +1,41 @@
+#include <benchmark/benchmark.h>
+
+#include "esphome/core/helpers.h"
+
+namespace esphome::benchmarks {
+
+// Inner iteration count to amortize CodSpeed instrumentation overhead.
+// Without this, the ~60ns per-iteration valgrind start/stop cost dominates
+// sub-microsecond benchmarks.
+static constexpr int kInnerIterations = 2000;
+
+// --- random_float() ---
+// Ported from ol.yaml:148 "Random Float Benchmark"
+
+static void RandomFloat(benchmark::State &state) {
+  for (auto _ : state) {
+    float result = 0.0f;
+    for (int i = 0; i < kInnerIterations; i++) {
+      result += random_float();
+    }
+    benchmark::DoNotOptimize(result);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(RandomFloat);
+
+// --- random_uint32() ---
+
+static void RandomUint32(benchmark::State &state) {
+  for (auto _ : state) {
+    uint32_t result = 0;
+    for (int i = 0; i < kInnerIterations; i++) {
+      result += random_uint32();
+    }
+    benchmark::DoNotOptimize(result);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(RandomUint32);
+
+}  // namespace esphome::benchmarks
diff --git a/tests/benchmarks/core/bench_logger.cpp b/tests/benchmarks/core/bench_logger.cpp
new file mode 100644
index 0000000000..b7e9a1c4ea
--- /dev/null
+++ b/tests/benchmarks/core/bench_logger.cpp
@@ -0,0 +1,54 @@
+#include <benchmark/benchmark.h>
+
+#include "esphome/core/log.h"
+
+namespace esphome::benchmarks {
+
+// Inner iteration count to amortize CodSpeed instrumentation overhead.
+// Without this, the ~60ns per-iteration valgrind start/stop cost dominates
+// sub-microsecond benchmarks.
+static constexpr int kInnerIterations = 2000;
+
+static const char *const TAG = "bench";
+
+// --- Log a message with no format specifiers (fastest path) ---
+
+static void Logger_NoFormat(benchmark::State &state) {
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      ESP_LOGW(TAG, "Something happened");
+    }
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(Logger_NoFormat);
+
+// --- Log a message with 3 uint32_t format specifiers ---
+
+static void Logger_3Uint32(benchmark::State &state) {
+  uint32_t a = 12345, b = 67890, c = 99999;
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      ESP_LOGW(TAG, "Values: %" PRIu32 " %" PRIu32 " %" PRIu32, a, b, c);
+    }
+    benchmark::DoNotOptimize(a);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(Logger_3Uint32);
+
+// --- Log a message with 3 floats (common for sensor values) ---
+
+static void Logger_3Float(benchmark::State &state) {
+  float temp = 23.456f, humidity = 67.89f, pressure = 1013.25f;
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      ESP_LOGW(TAG, "Sensor: %.2f %.1f %.2f", temp, humidity, pressure);
+    }
+    benchmark::DoNotOptimize(temp);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(Logger_3Float);
+
+}  // namespace esphome::benchmarks
diff --git a/tests/benchmarks/core/bench_scheduler.cpp b/tests/benchmarks/core/bench_scheduler.cpp
new file mode 100644
index 0000000000..764f17ed73
--- /dev/null
+++ b/tests/benchmarks/core/bench_scheduler.cpp
@@ -0,0 +1,133 @@
+#include <benchmark/benchmark.h>
+
+#include "esphome/core/scheduler.h"
+#include "esphome/core/hal.h"
+
+namespace esphome::benchmarks {
+
+// Inner iteration count to amortize CodSpeed instrumentation overhead.
+// Without this, the ~60ns per-iteration valgrind start/stop cost dominates
+// sub-microsecond benchmarks.
+static constexpr int kInnerIterations = 2000;
+
+// --- Scheduler fast path: no work to do ---
+
+static void Scheduler_Call_NoWork(benchmark::State &state) {
+  Scheduler scheduler;
+  uint32_t now = millis();
+
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      scheduler.call(now);
+    }
+    benchmark::DoNotOptimize(now);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(Scheduler_Call_NoWork);
+
+// --- Scheduler with timers: call() when timers exist but aren't due ---
+
+static void Scheduler_Call_TimersNotDue(benchmark::State &state) {
+  Scheduler scheduler;
+  Component dummy_component;
+
+  // Add some timeouts far in the future
+  for (int i = 0; i < 10; i++) {
+    scheduler.set_timeout(&dummy_component, static_cast<uint32_t>(i), 1000000, []() {});
+  }
+  scheduler.process_to_add();
+
+  uint32_t now = millis();
+
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      scheduler.call(now);
+    }
+    benchmark::DoNotOptimize(now);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(Scheduler_Call_TimersNotDue);
+
+// --- Scheduler with 5 intervals firing every call ---
+
+static void Scheduler_Call_5IntervalsFiring(benchmark::State &state) {
+  Scheduler scheduler;
+  Component dummy_component;
+  int fire_count = 0;
+
+  // Benchmarks the heap-based scheduler dispatch with 5 callbacks firing.
+  // Uses monotonically increasing fake time so intervals reliably fire every call.
+  // USE_BENCHMARK ifdef in component.h disables WarnIfComponentBlockingGuard
+  // (fake now > real millis() would cause underflow in finish()).
+  // interval=0 would cause an infinite loop (reschedules at same now).
+  for (int i = 0; i < 5; i++) {
+    scheduler.set_interval(&dummy_component, static_cast<uint32_t>(i), 1, [&fire_count]() { fire_count++; });
+  }
+  scheduler.process_to_add();
+
+  uint32_t now = millis() + 100;
+
+  for (auto _ : state) {
+    scheduler.call(now);
+    now++;
+    benchmark::DoNotOptimize(fire_count);
+  }
+}
+BENCHMARK(Scheduler_Call_5IntervalsFiring);
+
+// --- Scheduler: set_timeout registration ---
+
+static void Scheduler_SetTimeout(benchmark::State &state) {
+  Scheduler scheduler;
+  Component dummy_component;
+
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      scheduler.set_timeout(&dummy_component, static_cast<uint32_t>(i % 5), 1000, []() {});
+    }
+    scheduler.process_to_add();
+    benchmark::DoNotOptimize(scheduler);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(Scheduler_SetTimeout);
+
+// --- Scheduler: set_interval registration ---
+
+static void Scheduler_SetInterval(benchmark::State &state) {
+  Scheduler scheduler;
+  Component dummy_component;
+
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      scheduler.set_interval(&dummy_component, static_cast<uint32_t>(i % 5), 1000, []() {});
+    }
+    scheduler.process_to_add();
+    benchmark::DoNotOptimize(scheduler);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(Scheduler_SetInterval);
+
+// --- Scheduler: defer registration (set_timeout with delay=0) ---
+
+static void Scheduler_Defer(benchmark::State &state) {
+  Scheduler scheduler;
+  Component dummy_component;
+
+  // defer() is Component::defer which calls set_timeout(delay=0).
+  // Call set_timeout directly since defer() is protected.
+  for (auto _ : state) {
+    for (int i = 0; i < kInnerIterations; i++) {
+      scheduler.set_timeout(&dummy_component, static_cast<uint32_t>(i % 5), 0, []() {});
+    }
+    scheduler.process_to_add();
+    benchmark::DoNotOptimize(scheduler);
+  }
+  state.SetItemsProcessed(state.iterations() * kInnerIterations);
+}
+BENCHMARK(Scheduler_Defer);
+
+}  // namespace esphome::benchmarks
diff --git a/tests/script/test_determine_jobs.py b/tests/script/test_determine_jobs.py
index 5c81ad374b..29535d1fd3 100644
--- a/tests/script/test_determine_jobs.py
+++ b/tests/script/test_determine_jobs.py
@@ -1821,3 +1821,151 @@ def test_component_batching_beta_branch_40_per_batch(
         all_components.extend(batch_str.split())
     assert len(all_components) == 120
     assert set(all_components) == set(component_names)
+
+
+# --- should_run_benchmarks tests ---
+
+
+def test_should_run_benchmarks_core_change() -> None:
+    """Test benchmarks trigger on core C++ file changes."""
+    with patch.object(
+        determine_jobs, "changed_files", return_value=["esphome/core/scheduler.cpp"]
+    ):
+        assert determine_jobs.should_run_benchmarks() is True
+
+
+def test_should_run_benchmarks_core_header_change() -> None:
+    """Test benchmarks trigger on core header changes."""
+    with patch.object(
+        determine_jobs, "changed_files", return_value=["esphome/core/helpers.h"]
+    ):
+        assert determine_jobs.should_run_benchmarks() is True
+
+
+def test_should_run_benchmarks_benchmark_infra_change() -> None:
+    """Test benchmarks trigger on benchmark infrastructure changes."""
+    for infra_file in [
+        "script/cpp_benchmark.py",
+        "script/test_helpers.py",
+        "script/setup_codspeed_lib.py",
+    ]:
+        with patch.object(determine_jobs, "changed_files", return_value=[infra_file]):
+            assert determine_jobs.should_run_benchmarks() is True, (
+                f"Expected benchmarks to run for {infra_file}"
+            )
+
+
+def test_should_run_benchmarks_benchmark_file_change() -> None:
+    """Test benchmarks trigger on benchmark file changes."""
+    with patch.object(
+        determine_jobs,
+        "changed_files",
+        return_value=["tests/benchmarks/components/api/bench_proto_encode.cpp"],
+    ):
+        assert determine_jobs.should_run_benchmarks() is True
+
+
+def test_should_run_benchmarks_core_benchmark_file_change() -> None:
+    """Test benchmarks trigger on core benchmark file changes."""
+    with patch.object(
+        determine_jobs,
+        "changed_files",
+        return_value=["tests/benchmarks/core/bench_scheduler.cpp"],
+    ):
+        assert determine_jobs.should_run_benchmarks() is True
+
+
+def test_should_run_benchmarks_benchmarked_component_change(tmp_path: Path) -> None:
+    """Test benchmarks trigger when a benchmarked component changes."""
+    # Create a fake benchmarks directory with an 'api' component
+    benchmarks_dir = tmp_path / "tests" / "benchmarks" / "components" / "api"
+    benchmarks_dir.mkdir(parents=True)
+    (benchmarks_dir / "bench_proto_encode.cpp").write_text("// benchmark")
+
+    with (
+        patch.object(
+            determine_jobs,
+            "changed_files",
+            return_value=["esphome/components/api/proto.h"],
+        ),
+        patch.object(determine_jobs, "root_path", str(tmp_path)),
+        patch.object(
+            determine_jobs,
+            "BENCHMARKS_COMPONENTS_PATH",
+            "tests/benchmarks/components",
+        ),
+    ):
+        assert determine_jobs.should_run_benchmarks() is True
+
+
+def test_should_run_benchmarks_non_benchmarked_component_change(
+    tmp_path: Path,
+) -> None:
+    """Test benchmarks do NOT trigger for non-benchmarked component changes."""
+    # Create a fake benchmarks directory with only 'api'
+    benchmarks_dir = tmp_path / "tests" / "benchmarks" / "components" / "api"
+    benchmarks_dir.mkdir(parents=True)
+    (benchmarks_dir / "bench_proto_encode.cpp").write_text("// benchmark")
+
+    with (
+        patch.object(
+            determine_jobs,
+            "changed_files",
+            return_value=["esphome/components/sensor/__init__.py"],
+        ),
+        patch.object(determine_jobs, "root_path", str(tmp_path)),
+        patch.object(
+            determine_jobs,
+            "BENCHMARKS_COMPONENTS_PATH",
+            "tests/benchmarks/components",
+        ),
+    ):
+        assert determine_jobs.should_run_benchmarks() is False
+
+
+def test_should_run_benchmarks_no_dependency_expansion(tmp_path: Path) -> None:
+    """Test benchmarks do NOT expand to dependent components.
+
+    Changing 'sensor' should not trigger 'api' benchmarks even if api
+    depends on sensor. This is intentional — benchmark runs should be
+    targeted to directly changed components only.
+    """
+    benchmarks_dir = tmp_path / "tests" / "benchmarks" / "components" / "api"
+    benchmarks_dir.mkdir(parents=True)
+    (benchmarks_dir / "bench_proto_encode.cpp").write_text("// benchmark")
+
+    with (
+        patch.object(
+            determine_jobs,
+            "changed_files",
+            # sensor is a dependency of api, but benchmarks don't expand
+            return_value=["esphome/components/sensor/sensor.cpp"],
+        ),
+        patch.object(determine_jobs, "root_path", str(tmp_path)),
+        patch.object(
+            determine_jobs,
+            "BENCHMARKS_COMPONENTS_PATH",
+            "tests/benchmarks/components",
+        ),
+    ):
+        assert determine_jobs.should_run_benchmarks() is False
+
+
+def test_should_run_benchmarks_unrelated_change() -> None:
+    """Test benchmarks do NOT trigger for unrelated changes."""
+    with patch.object(determine_jobs, "changed_files", return_value=["README.md"]):
+        assert determine_jobs.should_run_benchmarks() is False
+
+
+def test_should_run_benchmarks_no_changes() -> None:
+    """Test benchmarks do NOT trigger with no changes."""
+    with patch.object(determine_jobs, "changed_files", return_value=[]):
+        assert determine_jobs.should_run_benchmarks() is False
+
+
+def test_should_run_benchmarks_with_branch() -> None:
+    """Test should_run_benchmarks passes branch to changed_files."""
+    with patch.object(determine_jobs, "changed_files") as mock_changed:
+        mock_changed.return_value = []
+        determine_jobs.should_run_benchmarks("release")
+        mock_changed.assert_called_with("release")