Files
esphome/esphome/components/audio/__init__.py
T
2026-05-06 17:22:18 +00:00

433 lines
16 KiB
Python

from dataclasses import dataclass, field
import esphome.codegen as cg
from esphome.components.esp32 import (
add_idf_component,
add_idf_sdkconfig_option,
include_builtin_idf_component,
)
import esphome.config_validation as cv
from esphome.const import (
CONF_BITS_PER_SAMPLE,
CONF_NUM_CHANNELS,
CONF_SAMPLE_RATE,
CONF_SIZE,
)
from esphome.core import CORE
import esphome.final_validate as fv
CODEOWNERS = ["@kahrendt"]
DOMAIN = "audio"
audio_ns = cg.esphome_ns.namespace("audio")
AudioFile = audio_ns.struct("AudioFile")
AudioFileType = audio_ns.enum("AudioFileType", is_class=True)
AUDIO_FILE_TYPE_ENUM = {
"NONE": AudioFileType.NONE,
"WAV": AudioFileType.WAV,
"MP3": AudioFileType.MP3,
"FLAC": AudioFileType.FLAC,
"OPUS": AudioFileType.OPUS,
}
MEMORY_PSRAM = "psram"
MEMORY_INTERNAL = "internal"
MEMORY_LOCATIONS = [MEMORY_PSRAM, MEMORY_INTERNAL]
@dataclass
class FlacOptions:
buffer_memory: str | None = None
@dataclass
class Mp3Options:
buffer_memory: str | None = None
@dataclass
class OpusPseudostackOptions:
threadsafe: bool | None = None
buffer_memory: str | None = None
size: int | None = None
@dataclass
class OpusOptions:
floating_point: bool | None = None
state_memory: str | None = None
pseudostack: OpusPseudostackOptions = field(default_factory=OpusPseudostackOptions)
@dataclass
class AudioData:
flac_support: bool = False
mp3_support: bool = False
opus_support: bool = False
wav_support: bool = False
micro_decoder_support: bool = False
flac: FlacOptions = field(default_factory=FlacOptions)
mp3: Mp3Options = field(default_factory=Mp3Options)
opus: OpusOptions = field(default_factory=OpusOptions)
def _get_data() -> AudioData:
if DOMAIN not in CORE.data:
CORE.data[DOMAIN] = AudioData()
return CORE.data[DOMAIN]
def request_flac_support() -> None:
"""Request FLAC codec support for audio decoding."""
_get_data().flac_support = True
def request_mp3_support() -> None:
"""Request MP3 codec support for audio decoding."""
_get_data().mp3_support = True
def request_opus_support() -> None:
"""Request Opus codec support for audio decoding."""
_get_data().opus_support = True
def request_wav_support() -> None:
"""Request WAV codec support for audio decoding."""
_get_data().wav_support = True
def request_micro_decoder_support() -> None:
"""Request micro-decoder library support for audio decoding."""
_get_data().micro_decoder_support = True
CONF_MIN_BITS_PER_SAMPLE = "min_bits_per_sample"
CONF_MAX_BITS_PER_SAMPLE = "max_bits_per_sample"
CONF_MIN_CHANNELS = "min_channels"
CONF_MAX_CHANNELS = "max_channels"
CONF_MIN_SAMPLE_RATE = "min_sample_rate"
CONF_MAX_SAMPLE_RATE = "max_sample_rate"
CONF_CODECS = "codecs"
CONF_WAV = "wav"
CONF_FLAC = "flac"
CONF_MP3 = "mp3"
CONF_OPUS = "opus"
CONF_BUFFER_MEMORY = "buffer_memory"
CONF_FLOATING_POINT = "floating_point"
CONF_STATE_MEMORY = "state_memory"
CONF_PSEUDOSTACK = "pseudostack"
CONF_THREADSAFE = "threadsafe"
_MEMORY_LOCATION_VALIDATOR = cv.one_of(*MEMORY_LOCATIONS, lower=True)
def _maybe_empty_codec(schema):
"""Wrap a codec dict schema so that a bare key (None value) is treated as an empty dict."""
def validator(value):
if value is None:
value = {}
return schema(value)
return validator
CODEC_FLAC_SCHEMA = cv.Schema(
{
cv.Optional(CONF_BUFFER_MEMORY): _MEMORY_LOCATION_VALIDATOR,
}
)
CODEC_MP3_SCHEMA = cv.Schema(
{
cv.Optional(CONF_BUFFER_MEMORY): _MEMORY_LOCATION_VALIDATOR,
}
)
OPUS_PSEUDOSTACK_SCHEMA = cv.Schema(
{
cv.Optional(CONF_THREADSAFE): cv.boolean,
cv.Optional(CONF_BUFFER_MEMORY): _MEMORY_LOCATION_VALIDATOR,
cv.Optional(CONF_SIZE): cv.int_range(60000, 240000),
}
)
CODEC_OPUS_SCHEMA = cv.Schema(
{
cv.Optional(CONF_FLOATING_POINT): cv.boolean,
cv.Optional(CONF_STATE_MEMORY): _MEMORY_LOCATION_VALIDATOR,
cv.Optional(CONF_PSEUDOSTACK): _maybe_empty_codec(OPUS_PSEUDOSTACK_SCHEMA),
}
)
CODEC_WAV_SCHEMA = cv.Schema({})
CODECS_SCHEMA = cv.Schema(
{
cv.Optional(CONF_FLAC): _maybe_empty_codec(CODEC_FLAC_SCHEMA),
cv.Optional(CONF_MP3): _maybe_empty_codec(CODEC_MP3_SCHEMA),
cv.Optional(CONF_OPUS): _maybe_empty_codec(CODEC_OPUS_SCHEMA),
cv.Optional(CONF_WAV): _maybe_empty_codec(CODEC_WAV_SCHEMA),
}
)
CONFIG_SCHEMA = cv.All(
cv.Schema(
{
cv.Optional(CONF_CODECS): _maybe_empty_codec(CODECS_SCHEMA),
}
),
cv.only_on_esp32,
)
AUDIO_COMPONENT_SCHEMA = cv.Schema(
{
cv.Optional(CONF_BITS_PER_SAMPLE): cv.int_range(8, 32),
cv.Optional(CONF_NUM_CHANNELS): cv.int_range(1, 2),
cv.Optional(CONF_SAMPLE_RATE): cv.int_range(8000, 48000),
}
)
def set_stream_limits(
min_bits_per_sample: int = cv.UNDEFINED,
max_bits_per_sample: int = cv.UNDEFINED,
min_channels: int = cv.UNDEFINED,
max_channels: int = cv.UNDEFINED,
min_sample_rate: int = cv.UNDEFINED,
max_sample_rate: int = cv.UNDEFINED,
):
"""Sets the limits for the audio stream that audio component can handle
When the component sinks audio (e.g., a speaker), these indicate the limits to the audio it can receive.
When the component sources audio (e.g., a microphone), these indicate the limits to the audio it can send.
"""
def set_limits_in_config(config):
if min_bits_per_sample is not cv.UNDEFINED:
config[CONF_MIN_BITS_PER_SAMPLE] = min_bits_per_sample
if max_bits_per_sample is not cv.UNDEFINED:
config[CONF_MAX_BITS_PER_SAMPLE] = max_bits_per_sample
if min_channels is not cv.UNDEFINED:
config[CONF_MIN_CHANNELS] = min_channels
if max_channels is not cv.UNDEFINED:
config[CONF_MAX_CHANNELS] = max_channels
if min_sample_rate is not cv.UNDEFINED:
config[CONF_MIN_SAMPLE_RATE] = min_sample_rate
if max_sample_rate is not cv.UNDEFINED:
config[CONF_MAX_SAMPLE_RATE] = max_sample_rate
return set_limits_in_config
def final_validate_audio_schema(
name: str,
*,
audio_device: str,
bits_per_sample: int = cv.UNDEFINED,
channels: int = cv.UNDEFINED,
sample_rate: int = cv.UNDEFINED,
enabled_channels: list[int] = cv.UNDEFINED,
audio_device_issue: bool = False,
):
"""Validates audio compatibility when passed between different components.
The component derived from ``AUDIO_COMPONENT_SCHEMA`` should call ``set_stream_limits`` in a validator to specify its compatible settings
- If audio_device_issue is True, then the error message indicates the user should adjust the AUDIO_COMPONENT_SCHEMA derived component's configuration to match the values passed to this function
- If audio_device_issue is False, then the error message indicates the user should adjust the configuration of the component calling this function, as it falls out of the valid stream limits
Args:
name (str): Friendly name of the component calling this function with an audio component to validate
audio_device (str): The configuration parameter name that contains the ID of an AUDIO_COMPONENT_SCHEMA derived component to validate against
bits_per_sample (int, optional): The desired bits per sample
channels (int, optional): The desired number of channels
sample_rate (int, optional): The desired sample rate
enabled_channels (list[int], optional): The desired enabled channels
audio_device_issue (bool, optional): Format the error message to indicate the problem is in the configuration for the ``audio_device`` component. Defaults to False.
"""
def validate_audio_compatiblity(audio_config):
audio_schema = {}
if bits_per_sample is not cv.UNDEFINED:
try:
cv.int_range(
min=audio_config.get(CONF_MIN_BITS_PER_SAMPLE),
max=audio_config.get(CONF_MAX_BITS_PER_SAMPLE),
)(bits_per_sample)
except cv.Invalid as exc:
if audio_device_issue:
error_string = f"Invalid configuration for the specified {audio_device}. The {name} component requires {bits_per_sample} bits per sample."
else:
error_string = f"Invalid configuration for the {name} component. The {CONF_BITS_PER_SAMPLE} {str(exc)}"
raise cv.Invalid(error_string) from exc
if channels is not cv.UNDEFINED:
try:
cv.int_range(
min=audio_config.get(CONF_MIN_CHANNELS),
max=audio_config.get(CONF_MAX_CHANNELS),
)(channels)
except cv.Invalid as exc:
if audio_device_issue:
error_string = f"Invalid configuration for the specified {audio_device}. The {name} component requires {channels} channels."
else:
error_string = f"Invalid configuration for the {name} component. The {CONF_NUM_CHANNELS} {str(exc)}"
raise cv.Invalid(error_string) from exc
if sample_rate is not cv.UNDEFINED:
try:
cv.int_range(
min=audio_config.get(CONF_MIN_SAMPLE_RATE),
max=audio_config.get(CONF_MAX_SAMPLE_RATE),
)(sample_rate)
except cv.Invalid as exc:
if audio_device_issue:
error_string = f"Invalid configuration for the specified {audio_device}. The {name} component requires a {sample_rate} sample rate."
else:
error_string = f"Invalid configuration for the {name} component. The {CONF_SAMPLE_RATE} {str(exc)}"
raise cv.Invalid(error_string) from exc
if enabled_channels is not cv.UNDEFINED:
for channel in enabled_channels:
try:
# Channels are 0-indexed
cv.int_range(
min=0,
max=audio_config.get(CONF_MAX_CHANNELS) - 1,
)(channel)
except cv.Invalid as exc:
if audio_device_issue:
error_string = f"Invalid configuration for the specified {audio_device}. The {name} component requires channel {channel}."
else:
error_string = f"Invalid configuration for the {name} component. Enabled channel {channel} {str(exc)}"
raise cv.Invalid(error_string) from exc
return cv.Schema(audio_schema, extra=cv.ALLOW_EXTRA)(audio_config)
return cv.Schema(
{
cv.Required(audio_device): fv.id_declaration_match_schema(
validate_audio_compatiblity
)
},
extra=cv.ALLOW_EXTRA,
)
def _emit_memory_pair(value: str | None, psram_key: str, internal_key: str) -> None:
if value == MEMORY_PSRAM:
add_idf_sdkconfig_option(psram_key, True)
add_idf_sdkconfig_option(internal_key, False)
elif value == MEMORY_INTERNAL:
add_idf_sdkconfig_option(psram_key, False)
add_idf_sdkconfig_option(internal_key, True)
async def to_code(config):
# Re-enable ESP-IDF's HTTP client (excluded by default to save compile time)
include_builtin_idf_component("esp_http_client")
add_idf_component(
name="esphome/esp-audio-libs",
ref="3.0.0",
)
data = _get_data()
# Merge user-supplied codec configuration (additive: presence enables the codec)
if codecs_config := config.get(CONF_CODECS):
if (flac_config := codecs_config.get(CONF_FLAC)) is not None:
data.flac_support = True
if (buffer_memory := flac_config.get(CONF_BUFFER_MEMORY)) is not None:
data.flac.buffer_memory = buffer_memory
if (mp3_config := codecs_config.get(CONF_MP3)) is not None:
data.mp3_support = True
if (buffer_memory := mp3_config.get(CONF_BUFFER_MEMORY)) is not None:
data.mp3.buffer_memory = buffer_memory
if (opus_config := codecs_config.get(CONF_OPUS)) is not None:
data.opus_support = True
floating_point = opus_config.get(CONF_FLOATING_POINT)
if floating_point is not None:
data.opus.floating_point = floating_point
if (state_memory := opus_config.get(CONF_STATE_MEMORY)) is not None:
data.opus.state_memory = state_memory
if (pseudostack_config := opus_config.get(CONF_PSEUDOSTACK)) is not None:
threadsafe = pseudostack_config.get(CONF_THREADSAFE)
if threadsafe is not None:
data.opus.pseudostack.threadsafe = threadsafe
if (
buffer_memory := pseudostack_config.get(CONF_BUFFER_MEMORY)
) is not None:
data.opus.pseudostack.buffer_memory = buffer_memory
if (size := pseudostack_config.get(CONF_SIZE)) is not None:
data.opus.pseudostack.size = size
if CONF_WAV in codecs_config:
data.wav_support = True
if data.micro_decoder_support:
add_idf_component(name="esphome/micro-decoder", ref="0.2.0")
# All codecs are enabled by default in micro-decoder, so disable the ones that aren't requested to save flash
if not data.flac_support:
add_idf_sdkconfig_option("CONFIG_MICRO_DECODER_CODEC_FLAC", False)
if not data.mp3_support:
add_idf_sdkconfig_option("CONFIG_MICRO_DECODER_CODEC_MP3", False)
if not data.opus_support:
add_idf_sdkconfig_option("CONFIG_MICRO_DECODER_CODEC_OPUS", False)
if not data.wav_support:
add_idf_sdkconfig_option("CONFIG_MICRO_DECODER_CODEC_WAV", False)
# Configure each codec library.
# Adds a define and IDF component for legacy `audio_decoder.cpp`.
if data.flac_support:
cg.add_define("USE_AUDIO_FLAC_SUPPORT")
add_idf_component(name="esphome/micro-flac", ref="0.2.0")
_emit_memory_pair(
data.flac.buffer_memory,
"CONFIG_MICRO_FLAC_PREFER_PSRAM",
"CONFIG_MICRO_FLAC_PREFER_INTERNAL",
)
if data.mp3_support:
cg.add_define("USE_AUDIO_MP3_SUPPORT")
add_idf_component(name="esphome/micro-mp3", ref="0.2.0")
_emit_memory_pair(
data.mp3.buffer_memory,
"CONFIG_MP3_DECODER_PREFER_PSRAM",
"CONFIG_MP3_DECODER_PREFER_INTERNAL",
)
if data.opus_support:
cg.add_define("USE_AUDIO_OPUS_SUPPORT")
add_idf_component(name="esphome/micro-opus", ref="0.4.1")
if data.opus.floating_point is not None:
add_idf_sdkconfig_option(
"CONFIG_OPUS_FLOATING_POINT", data.opus.floating_point
)
_emit_memory_pair(
data.opus.state_memory,
"CONFIG_OPUS_STATE_PREFER_PSRAM",
"CONFIG_OPUS_STATE_PREFER_INTERNAL",
)
if data.opus.pseudostack.threadsafe is True:
add_idf_sdkconfig_option("CONFIG_OPUS_THREADSAFE_PSEUDOSTACK", True)
add_idf_sdkconfig_option("CONFIG_OPUS_NONTHREADSAFE_PSEUDOSTACK", False)
elif data.opus.pseudostack.threadsafe is False:
add_idf_sdkconfig_option("CONFIG_OPUS_THREADSAFE_PSEUDOSTACK", False)
add_idf_sdkconfig_option("CONFIG_OPUS_NONTHREADSAFE_PSEUDOSTACK", True)
_emit_memory_pair(
data.opus.pseudostack.buffer_memory,
"CONFIG_OPUS_PSEUDOSTACK_PREFER_PSRAM",
"CONFIG_OPUS_PSEUDOSTACK_PREFER_INTERNAL",
)
if data.opus.pseudostack.size is not None:
add_idf_sdkconfig_option(
"CONFIG_OPUS_PSEUDOSTACK_SIZE", data.opus.pseudostack.size
)
if data.wav_support:
cg.add_define("USE_AUDIO_WAV_SUPPORT")
add_idf_component(name="esphome/micro-wav", ref="0.2.0")