From f132b7dc07f2f402eab87a8ee445a64c5b403e22 Mon Sep 17 00:00:00 2001
From: Kevin Ahrendt <kevin.ahrendt@openhomefoundation.org>
Date: Fri, 24 Apr 2026 10:09:03 -0400
Subject: [PATCH] [media_player][speaker][speaker_source] Centralize preferred
 format codegen (#14771)

---
 esphome/components/media_player/__init__.py   | 111 +++++++++++-
 .../speaker/media_player/__init__.py          | 160 ++++--------------
 .../components/speaker_source/media_player.py |  83 +--------
 .../speaker/common-media_player.yaml          |   2 +-
 4 files changed, 156 insertions(+), 200 deletions(-)

diff --git a/esphome/components/media_player/__init__.py b/esphome/components/media_player/__init__.py
index 1c2c474645..d1db868ace 100644
--- a/esphome/components/media_player/__init__.py
+++ b/esphome/components/media_player/__init__.py
@@ -1,20 +1,31 @@
+from collections.abc import Callable
+
 from esphome import automation
 import esphome.codegen as cg
+from esphome.components import audio
 import esphome.config_validation as cv
 from esphome.const import (
     CONF_ENTITY_CATEGORY,
+    CONF_FORMAT,
     CONF_ICON,
     CONF_ID,
+    CONF_NUM_CHANNELS,
     CONF_ON_IDLE,
     CONF_ON_STATE,
     CONF_ON_TURN_OFF,
     CONF_ON_TURN_ON,
+    CONF_SAMPLE_RATE,
     CONF_VOLUME,
 )
 from esphome.core import CORE
-from esphome.core.entity_helpers import entity_duplicate_validator, setup_entity
+from esphome.core.entity_helpers import (
+    entity_duplicate_validator,
+    inherit_property_from,
+    setup_entity,
+)
 from esphome.coroutine import CoroPriority, coroutine_with_priority
-from esphome.cpp_generator import MockObjClass
+from esphome.cpp_generator import MockObj, MockObjClass
+from esphome.types import ConfigType
 
 CODEOWNERS = ["@jesserockz"]
 
@@ -34,6 +45,102 @@ MEDIA_PLAYER_FORMAT_PURPOSE_ENUM = {
     "announcement": MediaPlayerFormatPurpose.PURPOSE_ANNOUNCEMENT,
 }
 
+# Public API for external components. Do not remove.
+FORMAT_MAPPING = {
+    "FLAC": "flac",
+    "MP3": "mp3",
+    "OPUS": "opus",
+    "WAV": "wav",
+}
+
+
+def build_supported_format_struct(
+    format_config: ConfigType, purpose: MockObj
+) -> cg.StructInitializer:
+    """Build a MediaPlayerSupportedFormat struct from a format config and purpose.
+
+    Public API for external components. Do not remove.
+    """
+    args = [
+        MediaPlayerSupportedFormat,
+        ("format", FORMAT_MAPPING[format_config[CONF_FORMAT]]),
+        ("sample_rate", format_config[CONF_SAMPLE_RATE]),
+        ("num_channels", format_config[CONF_NUM_CHANNELS]),
+        ("purpose", purpose),
+    ]
+
+    # Omit sample_bytes for MP3: ffmpeg transcoding in Home Assistant fails
+    # if the number of bytes per sample is specified for MP3.
+    if format_config[CONF_FORMAT] != "MP3":
+        args.append(("sample_bytes", 2))
+
+    return cg.StructInitializer(*args)
+
+
+def validate_preferred_format(
+    component_name: str, audio_device_key: str
+) -> Callable[[ConfigType], ConfigType]:
+    """Return a validator that inherits audio device settings and validates format constraints.
+
+    Public API for external components. Do not remove.
+    """
+
+    def validator(config: ConfigType) -> ConfigType:
+        # Inherit settings from audio device if not manually set
+        inherit_property_from(CONF_NUM_CHANNELS, audio_device_key)(config)
+        inherit_property_from(CONF_SAMPLE_RATE, audio_device_key)(config)
+
+        # Opus only supports 48 kHz
+        if config.get(CONF_FORMAT) == "OPUS" and config.get(CONF_SAMPLE_RATE) != 48000:
+            raise cv.Invalid("Opus only supports a sample rate of 48000 Hz")
+
+        # Validate the settings are compatible with the audio device
+        audio.final_validate_audio_schema(
+            component_name,
+            audio_device=audio_device_key,
+            bits_per_sample=16,
+            channels=config.get(CONF_NUM_CHANNELS),
+            sample_rate=config.get(CONF_SAMPLE_RATE),
+        )(config)
+
+        return config
+
+    return validator
+
+
+def request_codecs_for_format_configs(
+    config: ConfigType, format_config_keys: list[str]
+) -> None:
+    """Scan format configs for configured formats and request the needed codec support.
+
+    If any config uses "NONE" (accepts any format), all codecs are requested.
+
+    Public API for external components. Do not remove.
+    """
+    needed_formats: set[str] = set()
+    need_all = False
+
+    for key in format_config_keys:
+        if format_config := config.get(key):
+            fmt = format_config[CONF_FORMAT]
+            if fmt == "NONE":
+                need_all = True
+            else:
+                needed_formats.add(fmt)
+
+    if need_all:
+        audio.request_flac_support()
+        audio.request_mp3_support()
+        audio.request_opus_support()
+    else:
+        if "FLAC" in needed_formats:
+            audio.request_flac_support()
+        if "MP3" in needed_formats:
+            audio.request_mp3_support()
+        if "OPUS" in needed_formats:
+            audio.request_opus_support()
+
+
 # Local config key constants
 CONF_ANNOUNCEMENT = "announcement"
 CONF_ON_PLAY = "on_play"
diff --git a/esphome/components/speaker/media_player/__init__.py b/esphome/components/speaker/media_player/__init__.py
index 9b496637da..abfd599808 100644
--- a/esphome/components/speaker/media_player/__init__.py
+++ b/esphome/components/speaker/media_player/__init__.py
@@ -32,7 +32,6 @@ from esphome.const import (
     CONF_URL,
 )
 from esphome.core import CORE, HexInt
-from esphome.core.entity_helpers import inherit_property_from
 from esphome.external_files import download_content
 
 _LOGGER = logging.getLogger(__name__)
@@ -44,16 +43,12 @@ DEPENDENCIES = ["network"]
 CODEOWNERS = ["@kahrendt", "@synesthesiam"]
 DOMAIN = "media_player"
 
-CODEC_SUPPORT_ALL = "all"
-CODEC_SUPPORT_NEEDED = "needed"
-CODEC_SUPPORT_NONE = "none"
-
 TYPE_LOCAL = "local"
 TYPE_WEB = "web"
 
 CONF_ANNOUNCEMENT = "announcement"
 CONF_ANNOUNCEMENT_PIPELINE = "announcement_pipeline"
-CONF_CODEC_SUPPORT_ENABLED = "codec_support_enabled"
+CONF_CODEC_SUPPORT_ENABLED = "codec_support_enabled"  # Remove before 2026.10.0
 CONF_ENQUEUE = "enqueue"
 CONF_MEDIA_FILE = "media_file"
 CONF_MEDIA_PIPELINE = "media_pipeline"
@@ -106,43 +101,10 @@ def _download_web_file(value):
     return value
 
 
-# Returns a media_player.MediaPlayerSupportedFormat struct with the configured
-# format, sample rate, number of channels, purpose, and bytes per sample
-def _get_supported_format_struct(pipeline, type):
-    args = [
-        media_player.MediaPlayerSupportedFormat,
-    ]
-
-    if pipeline[CONF_FORMAT] == "FLAC":
-        args.append(("format", "flac"))
-    elif pipeline[CONF_FORMAT] == "MP3":
-        args.append(("format", "mp3"))
-    elif pipeline[CONF_FORMAT] == "OPUS":
-        args.append(("format", "opus"))
-    elif pipeline[CONF_FORMAT] == "WAV":
-        args.append(("format", "wav"))
-
-    args.append(("sample_rate", pipeline[CONF_SAMPLE_RATE]))
-    args.append(("num_channels", pipeline[CONF_NUM_CHANNELS]))
-
-    if type == "MEDIA":
-        args.append(
-            (
-                "purpose",
-                media_player.MEDIA_PLAYER_FORMAT_PURPOSE_ENUM["default"],
-            )
-        )
-    elif type == "ANNOUNCEMENT":
-        args.append(
-            (
-                "purpose",
-                media_player.MEDIA_PLAYER_FORMAT_PURPOSE_ENUM["announcement"],
-            )
-        )
-    if pipeline[CONF_FORMAT] != "MP3":
-        args.append(("sample_bytes", 2))
-
-    return cg.StructInitializer(*args)
+_PURPOSE_MAP = {
+    "MEDIA": media_player.MEDIA_PLAYER_FORMAT_PURPOSE_ENUM["default"],
+    "ANNOUNCEMENT": media_player.MEDIA_PLAYER_FORMAT_PURPOSE_ENUM["announcement"],
+}
 
 
 def _file_schema(value):
@@ -210,25 +172,9 @@ def _validate_file_shorthand(value):
     )
 
 
-def _validate_pipeline(config):
-    # Inherit transcoder settings from speaker if not manually set
-    inherit_property_from(CONF_NUM_CHANNELS, CONF_SPEAKER)(config)
-    inherit_property_from(CONF_SAMPLE_RATE, CONF_SPEAKER)(config)
-
-    # Opus only supports 48 kHz
-    if config.get(CONF_FORMAT) == "OPUS" and config.get(CONF_SAMPLE_RATE) != 48000:
-        raise cv.Invalid("Opus only supports a sample rate of 48000 Hz")
-
-    # Validate the transcoder settings is compatible with the speaker
-    audio.final_validate_audio_schema(
-        "speaker media_player",
-        audio_device=CONF_SPEAKER,
-        bits_per_sample=16,
-        channels=config.get(CONF_NUM_CHANNELS),
-        sample_rate=config.get(CONF_SAMPLE_RATE),
-    )(config)
-
-    return config
+_validate_pipeline = media_player.validate_preferred_format(
+    "speaker media_player", CONF_SPEAKER
+)
 
 
 def _validate_repeated_speaker(config):
@@ -245,59 +191,34 @@ def _validate_repeated_speaker(config):
 
 
 def _final_validate(config):
-    # Normalize boolean values to string equivalents
-    codec_mode = config[CONF_CODEC_SUPPORT_ENABLED]
-    if codec_mode is True:
-        codec_mode = CODEC_SUPPORT_ALL
-    elif codec_mode is False:
-        codec_mode = CODEC_SUPPORT_NONE
+    # Remove before 2026.10.0
+    if CONF_CODEC_SUPPORT_ENABLED in config:
+        _LOGGER.warning(
+            "'%s' is deprecated and will be removed in 2026.10.0. "
+            "Codec support is now automatically determined from the pipeline "
+            "'format' setting. Set format to 'NONE' to enable all codecs.",
+            CONF_CODEC_SUPPORT_ENABLED,
+        )
 
-    use_codec = codec_mode != CODEC_SUPPORT_NONE
-
-    # In "needed" mode, collect formats from pipelines and files
-    needed_formats = set()
-    need_all = False
-    if codec_mode == CODEC_SUPPORT_NEEDED:
-        for pipeline_key in (CONF_ANNOUNCEMENT_PIPELINE, CONF_MEDIA_PIPELINE):
-            if pipeline := config.get(pipeline_key):
-                fmt = pipeline[CONF_FORMAT]
-                if fmt == "NONE":
-                    # No preferred format means any format could arrive
-                    need_all = True
-                else:
-                    needed_formats.add(fmt)
+    # Request codecs based on pipeline formats
+    media_player.request_codecs_for_format_configs(
+        config, [CONF_ANNOUNCEMENT_PIPELINE, CONF_MEDIA_PIPELINE]
+    )
 
+    # Validate local files and request any additional codecs they need
     for file_config in config.get(CONF_FILES, []):
         _, media_file_type = _read_audio_file_and_type(file_config)
         if str(media_file_type) == str(audio.AUDIO_FILE_TYPE_ENUM["NONE"]):
             raise cv.Invalid("Unsupported local media file")
-        if not use_codec and str(media_file_type) != str(
-            audio.AUDIO_FILE_TYPE_ENUM["WAV"]
-        ):
-            # Only wav files are supported
-            raise cv.Invalid(
-                f"Unsupported local media file type, set {CONF_CODEC_SUPPORT_ENABLED} to true or convert the media file to wav"
-            )
-        # In "needed" mode, add file format to needed codecs
-        if codec_mode == CODEC_SUPPORT_NEEDED:
-            for fmt_name, fmt_enum in audio.AUDIO_FILE_TYPE_ENUM.items():
-                if str(media_file_type) == str(fmt_enum):
-                    if fmt_name not in ("WAV", "NONE"):
-                        needed_formats.add(fmt_name)
-                    break
-
-    # Request codec support
-    if codec_mode == CODEC_SUPPORT_ALL or need_all:
-        audio.request_flac_support()
-        audio.request_mp3_support()
-        audio.request_opus_support()
-    elif codec_mode == CODEC_SUPPORT_NEEDED:
-        if "FLAC" in needed_formats:
-            audio.request_flac_support()
-        if "MP3" in needed_formats:
-            audio.request_mp3_support()
-        if "OPUS" in needed_formats:
-            audio.request_opus_support()
+        for fmt_name, fmt_enum in audio.AUDIO_FILE_TYPE_ENUM.items():
+            if str(media_file_type) == str(fmt_enum):
+                if fmt_name == "FLAC":
+                    audio.request_flac_support()
+                elif fmt_name == "MP3":
+                    audio.request_mp3_support()
+                elif fmt_name == "OPUS":
+                    audio.request_opus_support()
+                break
 
     return config
 
@@ -362,17 +283,8 @@ CONFIG_SCHEMA = cv.All(
             cv.Optional(CONF_BUFFER_SIZE, default=1000000): cv.int_range(
                 min=4000, max=4000000
             ),
-            cv.Optional(
-                CONF_CODEC_SUPPORT_ENABLED, default=CODEC_SUPPORT_NEEDED
-            ): cv.Any(
-                cv.boolean,
-                cv.one_of(
-                    CODEC_SUPPORT_ALL,
-                    CODEC_SUPPORT_NEEDED,
-                    CODEC_SUPPORT_NONE,
-                    lower=True,
-                ),
-            ),
+            # Remove before 2026.10.0
+            cv.Optional(CONF_CODEC_SUPPORT_ENABLED): cv.Any(cv.boolean, cv.string),
             cv.Optional(CONF_FILES): cv.ensure_list(MEDIA_FILE_TYPE_SCHEMA),
             cv.Optional(CONF_TASK_STACK_IN_PSRAM): cv.All(
                 cv.boolean, cv.requires_component(psram.DOMAIN)
@@ -432,8 +344,8 @@ async def to_code(config):
     if announcement_pipeline_config[CONF_FORMAT] != "NONE":
         cg.add(
             var.set_announcement_format(
-                _get_supported_format_struct(
-                    announcement_pipeline_config, "ANNOUNCEMENT"
+                media_player.build_supported_format_struct(
+                    announcement_pipeline_config, _PURPOSE_MAP["ANNOUNCEMENT"]
                 )
             )
         )
@@ -444,7 +356,9 @@ async def to_code(config):
         if media_pipeline_config[CONF_FORMAT] != "NONE":
             cg.add(
                 var.set_media_format(
-                    _get_supported_format_struct(media_pipeline_config, "MEDIA")
+                    media_player.build_supported_format_struct(
+                        media_pipeline_config, _PURPOSE_MAP["MEDIA"]
+                    )
                 )
             )
 
diff --git a/esphome/components/speaker_source/media_player.py b/esphome/components/speaker_source/media_player.py
index 70feeac318..b6653fe543 100644
--- a/esphome/components/speaker_source/media_player.py
+++ b/esphome/components/speaker_source/media_player.py
@@ -17,7 +17,6 @@ from esphome.const import (
     CONF_SPEAKER,
 )
 from esphome.core import ID
-from esphome.core.entity_helpers import inherit_property_from
 from esphome.cpp_generator import MockObj, TemplateArgsType
 from esphome.types import ConfigType
 
@@ -65,53 +64,9 @@ SetPlaylistDelayAction = speaker_source_ns.class_(
 )
 
 
-FORMAT_MAPPING = {
-    "FLAC": "flac",
-    "MP3": "mp3",
-    "OPUS": "opus",
-    "WAV": "wav",
-}
-
-
-# Returns a media_player.MediaPlayerSupportedFormat struct with the configured
-# format, sample rate, number of channels, purpose, and bytes per sample
-def _get_supported_format_struct(pipeline: ConfigType, purpose: MockObj):
-    args = [
-        media_player.MediaPlayerSupportedFormat,
-    ]
-
-    args.append(("format", FORMAT_MAPPING[pipeline[CONF_FORMAT]]))
-
-    args.append(("sample_rate", pipeline[CONF_SAMPLE_RATE]))
-    args.append(("num_channels", pipeline[CONF_NUM_CHANNELS]))
-    args.append(("purpose", purpose))
-
-    # Omit sample_bytes for MP3: ffmpeg transcoding in Home Assistant fails
-    # if the number of bytes per sample is specified for MP3.
-    if pipeline[CONF_FORMAT] != "MP3":
-        args.append(("sample_bytes", 2))
-
-    return cg.StructInitializer(*args)
-
-
-def _validate_pipeline(config: ConfigType) -> ConfigType:
-    # Inherit settings from speaker if not manually set
-    inherit_property_from(CONF_NUM_CHANNELS, CONF_SPEAKER)(config)
-    inherit_property_from(CONF_SAMPLE_RATE, CONF_SPEAKER)(config)
-
-    # Opus only supports 48 kHz
-    if config.get(CONF_FORMAT) == "OPUS" and config.get(CONF_SAMPLE_RATE) != 48000:
-        raise cv.Invalid("Opus only supports a sample rate of 48000 Hz")
-
-    audio.final_validate_audio_schema(
-        "speaker_source media_player",
-        audio_device=CONF_SPEAKER,
-        bits_per_sample=16,
-        channels=config.get(CONF_NUM_CHANNELS),
-        sample_rate=config.get(CONF_SAMPLE_RATE),
-    )(config)
-
-    return config
+_validate_pipeline = media_player.validate_preferred_format(
+    "speaker_source media_player", CONF_SPEAKER
+)
 
 
 PIPELINE_SCHEMA = cv.Schema(
@@ -198,31 +153,9 @@ CONFIG_SCHEMA = cv.All(
 
 
 def _final_validate_codecs(config: ConfigType) -> ConfigType:
-    # "NONE" means the pipeline accepts any format at runtime, so all optional codecs must be available.
-    # When a specific format is set, only that codec is requested.
-    needed_formats: set[str] = set()
-    need_all = False
-
-    for pipeline_key in (CONF_ANNOUNCEMENT_PIPELINE, CONF_MEDIA_PIPELINE):
-        if pipeline := config.get(pipeline_key):
-            fmt = pipeline[CONF_FORMAT]
-            if fmt == "NONE":
-                need_all = True
-            else:
-                needed_formats.add(fmt)
-
-    if need_all:
-        audio.request_flac_support()
-        audio.request_mp3_support()
-        audio.request_opus_support()
-    else:
-        if "FLAC" in needed_formats:
-            audio.request_flac_support()
-        if "MP3" in needed_formats:
-            audio.request_mp3_support()
-        if "OPUS" in needed_formats:
-            audio.request_opus_support()
-
+    media_player.request_codecs_for_format_configs(
+        config, [CONF_ANNOUNCEMENT_PIPELINE, CONF_MEDIA_PIPELINE]
+    )
     return config
 
 
@@ -264,7 +197,9 @@ async def to_code(config: ConfigType) -> None:
                 cg.add(
                     var.set_format(
                         pipeline_enum,
-                        _get_supported_format_struct(pipeline_config, purpose),
+                        media_player.build_supported_format_struct(
+                            pipeline_config, purpose
+                        ),
                     )
                 )
 
diff --git a/tests/components/speaker/common-media_player.yaml b/tests/components/speaker/common-media_player.yaml
index c958c0d912..a849e04b33 100644
--- a/tests/components/speaker/common-media_player.yaml
+++ b/tests/components/speaker/common-media_player.yaml
@@ -11,9 +11,9 @@ media_player:
     id: speaker_media_player_id
     announcement_pipeline:
       speaker: speaker_id
+      format: NONE
     buffer_size: 1000000
     volume_increment: 0.02
     volume_max: 0.95
     volume_min: 0.0
     task_stack_in_psram: true
-    codec_support_enabled: all