From 8ca6ee4349acb28878d4c15ef32d67999b96f139 Mon Sep 17 00:00:00 2001 From: Kevin Ahrendt Date: Tue, 10 Mar 2026 15:25:26 -0500 Subject: [PATCH] [speaker_source] Add new media player (#14649) Co-authored-by: J. Nick Koston --- CODEOWNERS | 1 + esphome/components/speaker_source/__init__.py | 0 .../components/speaker_source/media_player.py | 212 +++++++ .../speaker_source_media_player.cpp | 546 ++++++++++++++++++ .../speaker_source_media_player.h | 217 +++++++ tests/components/speaker_source/common.yaml | 43 ++ .../speaker_source/test.esp32-idf.yaml | 9 + tests/components/speaker_source/test.wav | Bin 0 -> 46 bytes 8 files changed, 1028 insertions(+) create mode 100644 esphome/components/speaker_source/__init__.py create mode 100644 esphome/components/speaker_source/media_player.py create mode 100644 esphome/components/speaker_source/speaker_source_media_player.cpp create mode 100644 esphome/components/speaker_source/speaker_source_media_player.h create mode 100644 tests/components/speaker_source/common.yaml create mode 100644 tests/components/speaker_source/test.esp32-idf.yaml create mode 100644 tests/components/speaker_source/test.wav diff --git a/CODEOWNERS b/CODEOWNERS index a95e100cbf..12aff01e73 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -459,6 +459,7 @@ esphome/components/sonoff_d1/* @anatoly-savchenkov esphome/components/sound_level/* @kahrendt esphome/components/speaker/* @jesserockz @kahrendt esphome/components/speaker/media_player/* @kahrendt @synesthesiam +esphome/components/speaker_source/* @kahrendt esphome/components/spi/* @clydebarrow @esphome/core esphome/components/spi_device/* @clydebarrow esphome/components/spi_led_strip/* @clydebarrow diff --git a/esphome/components/speaker_source/__init__.py b/esphome/components/speaker_source/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/esphome/components/speaker_source/media_player.py b/esphome/components/speaker_source/media_player.py new file mode 100644 index 0000000000..a44cdcbf01 --- /dev/null +++ b/esphome/components/speaker_source/media_player.py @@ -0,0 +1,212 @@ +from esphome import automation +import esphome.codegen as cg +from esphome.components import audio, media_player, media_source, speaker +import esphome.config_validation as cv +from esphome.const import ( + CONF_FORMAT, + CONF_ID, + CONF_NUM_CHANNELS, + CONF_SAMPLE_RATE, + CONF_SPEAKER, +) +from esphome.core.entity_helpers import inherit_property_from +from esphome.types import ConfigType + +AUTO_LOAD = ["audio"] +DEPENDENCIES = ["media_source", "speaker"] + +CODEOWNERS = ["@kahrendt"] + +CONF_MEDIA_PIPELINE = "media_pipeline" +CONF_ON_MUTE = "on_mute" +CONF_ON_UNMUTE = "on_unmute" +CONF_ON_VOLUME = "on_volume" +CONF_SOURCES = "sources" +CONF_VOLUME_INCREMENT = "volume_increment" +CONF_VOLUME_INITIAL = "volume_initial" +CONF_VOLUME_MAX = "volume_max" +CONF_VOLUME_MIN = "volume_min" + +speaker_source_ns = cg.esphome_ns.namespace("speaker_source") + +SpeakerSourceMediaPlayer = speaker_source_ns.class_( + "SpeakerSourceMediaPlayer", cg.Component, media_player.MediaPlayer +) + +PipelineContext = speaker_source_ns.struct("PipelineContext") + +Pipeline = speaker_source_ns.enum("Pipeline") + + +FORMAT_MAPPING = { + "FLAC": "flac", + "MP3": "mp3", + "OPUS": "opus", + "WAV": "wav", +} + + +# Returns a media_player.MediaPlayerSupportedFormat struct with the configured +# format, sample rate, number of channels, purpose, and bytes per sample +def _get_supported_format_struct(pipeline: ConfigType): + args = [ + media_player.MediaPlayerSupportedFormat, + ] + + args.append(("format", FORMAT_MAPPING[pipeline[CONF_FORMAT]])) + + args.append(("sample_rate", pipeline[CONF_SAMPLE_RATE])) + args.append(("num_channels", pipeline[CONF_NUM_CHANNELS])) + args.append(("purpose", media_player.MEDIA_PLAYER_FORMAT_PURPOSE_ENUM["default"])) + + # Omit sample_bytes for MP3: ffmpeg transcoding in Home Assistant fails + # if the number of bytes per sample is specified for MP3. + if pipeline[CONF_FORMAT] != "MP3": + args.append(("sample_bytes", 2)) + + return cg.StructInitializer(*args) + + +def _validate_pipeline(config: ConfigType) -> ConfigType: + # Inherit settings from speaker if not manually set + inherit_property_from(CONF_NUM_CHANNELS, CONF_SPEAKER)(config) + inherit_property_from(CONF_SAMPLE_RATE, CONF_SPEAKER)(config) + + # Opus only supports 48 kHz + if config.get(CONF_FORMAT) == "OPUS" and config.get(CONF_SAMPLE_RATE) != 48000: + raise cv.Invalid("Opus only supports a sample rate of 48000 Hz") + + audio.final_validate_audio_schema( + "speaker_source media_player", + audio_device=CONF_SPEAKER, + bits_per_sample=16, + channels=config.get(CONF_NUM_CHANNELS), + sample_rate=config.get(CONF_SAMPLE_RATE), + )(config) + + return config + + +PIPELINE_SCHEMA = cv.Schema( + { + cv.GenerateID(): cv.declare_id( + PipelineContext + ), # Needed to inherit audio settings from the speaker + cv.Required(CONF_SPEAKER): cv.use_id(speaker.Speaker), + cv.Required(CONF_SOURCES): cv.All( + cv.ensure_list(cv.use_id(media_source.MediaSource)), + cv.Length(min=1), + ), + cv.Optional(CONF_FORMAT, default="FLAC"): cv.enum(audio.AUDIO_FILE_TYPE_ENUM), + cv.Optional(CONF_SAMPLE_RATE): cv.int_range(min=1), + cv.Optional(CONF_NUM_CHANNELS): cv.int_range(1, 2), + } +) + + +def _validate_volume_settings(config: ConfigType) -> ConfigType: + # CONF_VOLUME_INITIAL is in the scaled volume domain (0.0-1.0) and doesn't need to be validated + if config[CONF_VOLUME_MIN] > config[CONF_VOLUME_MAX]: + raise cv.Invalid( + f"{CONF_VOLUME_MIN} ({config[CONF_VOLUME_MIN]}) must be less than or equal to {CONF_VOLUME_MAX} ({config[CONF_VOLUME_MAX]})" + ) + return config + + +CONFIG_SCHEMA = cv.All( + cv.Schema( + { + cv.Optional(CONF_VOLUME_INCREMENT, default=0.05): cv.percentage, + cv.Optional(CONF_VOLUME_INITIAL, default=0.5): cv.percentage, + cv.Optional(CONF_VOLUME_MAX, default=1.0): cv.percentage, + cv.Optional(CONF_VOLUME_MIN, default=0.0): cv.percentage, + cv.Required(CONF_MEDIA_PIPELINE): PIPELINE_SCHEMA, + cv.Optional(CONF_ON_MUTE): automation.validate_automation(single=True), + cv.Optional(CONF_ON_UNMUTE): automation.validate_automation(single=True), + cv.Optional(CONF_ON_VOLUME): automation.validate_automation(single=True), + } + ) + .extend(cv.COMPONENT_SCHEMA) + .extend(media_player.media_player_schema(SpeakerSourceMediaPlayer)), + cv.only_on_esp32, + _validate_volume_settings, +) + + +def _final_validate_codecs(config: ConfigType) -> ConfigType: + pipeline = config[CONF_MEDIA_PIPELINE] + fmt = pipeline[CONF_FORMAT] + if fmt == "NONE": + audio.request_flac_support() + audio.request_mp3_support() + audio.request_opus_support() + elif fmt == "FLAC": + audio.request_flac_support() + elif fmt == "MP3": + audio.request_mp3_support() + elif fmt == "OPUS": + audio.request_opus_support() + + return config + + +FINAL_VALIDATE_SCHEMA = cv.All( + cv.Schema( + { + cv.Required(CONF_MEDIA_PIPELINE): _validate_pipeline, + }, + extra=cv.ALLOW_EXTRA, + ), + _final_validate_codecs, +) + + +async def to_code(config: ConfigType) -> None: + var = cg.new_Pvariable(config[CONF_ID]) + await cg.register_component(var, config) + await media_player.register_media_player(var, config) + + cg.add(var.set_volume_increment(config[CONF_VOLUME_INCREMENT])) + cg.add(var.set_volume_initial(config[CONF_VOLUME_INITIAL])) + cg.add(var.set_volume_max(config[CONF_VOLUME_MAX])) + cg.add(var.set_volume_min(config[CONF_VOLUME_MIN])) + + pipeline_config = config[CONF_MEDIA_PIPELINE] + pipeline_enum = Pipeline.MEDIA_PIPELINE + + for source in pipeline_config[CONF_SOURCES]: + src = await cg.get_variable(source) + cg.add(var.add_media_source(pipeline_enum, src)) + + cg.add( + var.set_speaker( + pipeline_enum, + await cg.get_variable(pipeline_config[CONF_SPEAKER]), + ) + ) + if pipeline_config[CONF_FORMAT] != "NONE": + cg.add( + var.set_format( + pipeline_enum, + _get_supported_format_struct(pipeline_config), + ) + ) + + if on_mute := config.get(CONF_ON_MUTE): + await automation.build_automation( + var.get_mute_trigger(), + [], + on_mute, + ) + if on_unmute := config.get(CONF_ON_UNMUTE): + await automation.build_automation( + var.get_unmute_trigger(), + [], + on_unmute, + ) + if on_volume := config.get(CONF_ON_VOLUME): + await automation.build_automation( + var.get_volume_trigger(), + [(cg.float_, "x")], + on_volume, + ) diff --git a/esphome/components/speaker_source/speaker_source_media_player.cpp b/esphome/components/speaker_source/speaker_source_media_player.cpp new file mode 100644 index 0000000000..a3679891d2 --- /dev/null +++ b/esphome/components/speaker_source/speaker_source_media_player.cpp @@ -0,0 +1,546 @@ +#include "speaker_source_media_player.h" + +#ifdef USE_ESP32 + +#include "esphome/core/helpers.h" +#include "esphome/core/log.h" + +namespace esphome::speaker_source { + +static constexpr uint32_t MEDIA_CONTROLS_QUEUE_LENGTH = 20; + +static const char *const TAG = "speaker_source_media_player"; + +// SourceBinding method implementations (defined here because SpeakerSourceMediaPlayer is forward-declared in the +// header) + +// THREAD CONTEXT: Called from media source decode task thread +size_t SourceBinding::write_audio(const uint8_t *data, size_t length, uint32_t timeout_ms, + const audio::AudioStreamInfo &stream_info) { + return this->player->handle_media_output_(this->pipeline, this->source, data, length, timeout_ms, stream_info); +} + +// THREAD CONTEXT: Called from main loop (media source's loop() calls set_state_ which calls report_state) +void SourceBinding::report_state(media_source::MediaSourceState state) { + this->player->handle_media_state_changed_(this->pipeline, this->source, state); +} + +// THREAD CONTEXT: Called from media source task thread; uses defer() to marshal to main loop +void SourceBinding::request_volume(float volume) { + this->player->defer([this, volume]() { this->player->handle_volume_request_(volume); }); +} + +// THREAD CONTEXT: Called from media source task thread; uses defer() to marshal to main loop +void SourceBinding::request_mute(bool is_muted) { + this->player->defer([this, is_muted]() { this->player->handle_mute_request_(is_muted); }); +} + +// THREAD CONTEXT: Called from media source task thread; uses defer() to marshal to main loop +void SourceBinding::request_play_uri(const std::string &uri) { + this->player->defer([this, uri]() { this->player->handle_play_uri_request_(this->pipeline, uri); }); +} + +// THREAD CONTEXT: Called during code generation setup (main loop) +void SpeakerSourceMediaPlayer::add_media_source(uint8_t pipeline, media_source::MediaSource *media_source) { + auto &binding = + this->pipelines_[pipeline].sources.emplace_back(std::make_unique(this, media_source, pipeline)); + media_source->set_listener(binding.get()); +} + +void SpeakerSourceMediaPlayer::dump_config() { + ESP_LOGCONFIG(TAG, + "Speaker Source Media Player:\n" + " Volume Increment: %.2f\n" + " Volume Min: %.2f\n" + " Volume Max: %.2f", + this->volume_increment_, this->volume_min_, this->volume_max_); +} + +void SpeakerSourceMediaPlayer::setup() { + this->state = media_player::MEDIA_PLAYER_STATE_IDLE; + + this->media_control_command_queue_ = xQueueCreate(MEDIA_CONTROLS_QUEUE_LENGTH, sizeof(MediaPlayerControlCommand)); + + this->pref_ = this->make_entity_preference(); + + VolumeRestoreState volume_restore_state; + if (this->pref_.load(&volume_restore_state)) { + this->set_volume_(volume_restore_state.volume); + this->set_mute_state_(volume_restore_state.is_muted); + } else { + this->set_volume_(this->volume_initial_); + this->set_mute_state_(false); + } + + // Register callbacks to receive playback notifications from speakers + for (size_t i = 0; i < this->pipelines_.size(); i++) { + if (this->pipelines_[i].is_configured()) { + this->pipelines_[i].speaker->add_audio_output_callback([this, i](uint32_t frames, int64_t timestamp) { + this->handle_speaker_playback_callback_(frames, timestamp, i); + }); + } + } +} + +// THREAD CONTEXT: Called from the speaker's playback callback task (not main loop) +void SpeakerSourceMediaPlayer::handle_speaker_playback_callback_(uint32_t frames, int64_t timestamp, uint8_t pipeline) { + PipelineContext &ps = this->pipelines_[pipeline]; + + // Load once so the null check and use below are consistent + media_source::MediaSource *active_source = ps.active_source.load(std::memory_order_relaxed); + if (active_source == nullptr) { + return; + } + + // CAS loop to safely subtract frames without underflow. If pending_frames is reset to 0 (new source + // starting) between the load and the subtract, compare_exchange_weak will fail and reload the current value. + uint32_t current = ps.pending_frames.load(std::memory_order_relaxed); + uint32_t source_frames; + do { + source_frames = std::min(frames, current); + } while (source_frames > 0 && + !ps.pending_frames.compare_exchange_weak(current, current - source_frames, std::memory_order_relaxed)); + + if (source_frames > 0) { + // Notify the source about the played audio + active_source->notify_audio_played(source_frames, timestamp); + } +} + +// THREAD CONTEXT: Called from main loop via defer() +void SpeakerSourceMediaPlayer::handle_volume_request_(float volume) { + // Update the media player's volume + this->set_volume_(volume); + this->publish_state(); +} + +// THREAD CONTEXT: Called from main loop via defer() +void SpeakerSourceMediaPlayer::handle_mute_request_(bool is_muted) { + // Update the media player's mute state + this->set_mute_state_(is_muted); + this->publish_state(); +} + +// THREAD CONTEXT: Called from main loop via defer() +void SpeakerSourceMediaPlayer::handle_play_uri_request_(uint8_t pipeline, const std::string &uri) { + // Smart source is requesting the player to play a different URI + auto call = this->make_call(); + call.set_media_url(uri); + call.perform(); +} + +// THREAD CONTEXT: Called from main loop (media source's loop() calls set_state_ which calls report_state) +void SpeakerSourceMediaPlayer::handle_media_state_changed_(uint8_t pipeline, media_source::MediaSource *source, + media_source::MediaSourceState state) { + PipelineContext &ps = this->pipelines_[pipeline]; + + if (state == media_source::MediaSourceState::IDLE) { + // Source went idle - clear stopping flag if this was the source we asked to stop + if (ps.stopping_source == source) { + ps.stopping_source = nullptr; + } + + // Clear pending flag if this was the source we asked to play + if (ps.pending_source == source) { + ps.pending_source = nullptr; + } + + // Source went idle - clear it if it's the active source + if (ps.active_source == source) { + ps.last_source = ps.active_source; + ps.active_source = nullptr; + + // Finish the speaker to ensure it's ready for the next playback + ps.speaker->finish(); + } + } else if (state == media_source::MediaSourceState::PLAYING) { + // Source started playing - make it the active source if no one else is active + if (ps.active_source == nullptr) { + ps.active_source = source; + ps.last_source = nullptr; + + // Clear pending flag now that the source is active + if (ps.pending_source == source) { + ps.pending_source = nullptr; + } + } + } +} + +// THREAD CONTEXT: Called from media source decode task thread (not main loop). +// Reads ps.active_source (atomic), writes ps.pending_frames (atomic), and calls +// ps.speaker methods (speaker pointer is immutable after setup). +size_t SpeakerSourceMediaPlayer::handle_media_output_(uint8_t pipeline, media_source::MediaSource *source, + const uint8_t *data, size_t length, uint32_t timeout_ms, + const audio::AudioStreamInfo &stream_info) { + PipelineContext &ps = this->pipelines_[pipeline]; + + // Single read; the if-body only uses ps.speaker (immutable after setup) and the source parameter. + if (ps.active_source == source) { + // This source is active - play the audio + if (ps.speaker->get_audio_stream_info() != stream_info) { + // Setup the speaker to play this stream + ps.speaker->set_audio_stream_info(stream_info); + vTaskDelay(pdMS_TO_TICKS(timeout_ms)); + return 0; + } + size_t bytes_written = ps.speaker->play(data, length, pdMS_TO_TICKS(timeout_ms)); + if (bytes_written > 0) { + // Track frames sent to speaker for this source + ps.pending_frames.fetch_add(stream_info.bytes_to_frames(bytes_written), std::memory_order_relaxed); + } + return bytes_written; + } + + // Not the active source - wait for state callback to set us as active when we transition to PLAYING + vTaskDelay(pdMS_TO_TICKS(timeout_ms)); + return 0; +} + +media_player::MediaPlayerState SpeakerSourceMediaPlayer::get_media_pipeline_state_( + media_source::MediaSource *source) const { + if (source != nullptr) { + switch (source->get_state()) { + case media_source::MediaSourceState::PLAYING: + return media_player::MEDIA_PLAYER_STATE_PLAYING; + case media_source::MediaSourceState::PAUSED: + return media_player::MEDIA_PLAYER_STATE_PAUSED; + case media_source::MediaSourceState::ERROR: + ESP_LOGE(TAG, "Source error"); + return media_player::MEDIA_PLAYER_STATE_IDLE; + case media_source::MediaSourceState::IDLE: + default: + return media_player::MEDIA_PLAYER_STATE_IDLE; + } + } + + return media_player::MEDIA_PLAYER_STATE_IDLE; +} + +void SpeakerSourceMediaPlayer::loop() { + // Process queued control commands + MediaPlayerControlCommand control_command; + + // Use peek to check command without removing it + if (xQueuePeek(this->media_control_command_queue_, &control_command, 0) == pdTRUE) { + bool command_executed = false; + uint8_t pipeline = control_command.pipeline; + + switch (control_command.type) { + case MediaPlayerControlCommand::PLAY_URI: { + command_executed = this->try_execute_play_uri_(*control_command.data.uri, pipeline); + break; + } + + case MediaPlayerControlCommand::SEND_COMMAND: { + PipelineContext &ps = this->pipelines_[pipeline]; + + // Determine target source: prefer active, fall back to last + media_source::MediaSource *target_source = nullptr; + if (ps.active_source != nullptr) { + target_source = ps.active_source; + } else if (ps.last_source != nullptr) { + target_source = ps.last_source; + } + + media_player::MediaPlayerCommand player_command = control_command.data.command; + switch (player_command) { + case media_player::MEDIA_PLAYER_COMMAND_TOGGLE: { + media_source::MediaSource *active_source = ps.active_source; + if ((active_source != nullptr) && (active_source->get_state() == media_source::MediaSourceState::PLAYING)) { + if (target_source != nullptr) { + target_source->handle_command(media_source::MediaSourceCommand::PAUSE); + } + } else { + if (target_source != nullptr) { + target_source->handle_command(media_source::MediaSourceCommand::PLAY); + } + } + break; + } + + case media_player::MEDIA_PLAYER_COMMAND_PLAY: { + if (target_source != nullptr) { + target_source->handle_command(media_source::MediaSourceCommand::PLAY); + } + break; + } + + case media_player::MEDIA_PLAYER_COMMAND_PAUSE: { + if (target_source != nullptr) { + target_source->handle_command(media_source::MediaSourceCommand::PAUSE); + } + break; + } + + case media_player::MEDIA_PLAYER_COMMAND_STOP: { + if (target_source != nullptr) { + target_source->handle_command(media_source::MediaSourceCommand::STOP); + } + break; + } + + default: + break; + } + + command_executed = true; + break; + } + } + + // Only remove from queue if successfully executed + if (command_executed) { + xQueueReceive(this->media_control_command_queue_, &control_command, 0); + + // Delete the allocated string for PLAY_URI commands + if (control_command.type == MediaPlayerControlCommand::PLAY_URI) { + delete control_command.data.uri; + } + } + } + + // Update state based on active sources + media_player::MediaPlayerState old_state = this->state; + + PipelineContext &media_ps = this->pipelines_[MEDIA_PIPELINE]; + this->state = this->get_media_pipeline_state_(media_ps.active_source); + + if (this->state != old_state) { + this->publish_state(); + ESP_LOGD(TAG, "State changed to %s", media_player::media_player_state_to_string(this->state)); + } +} + +media_source::MediaSource *SpeakerSourceMediaPlayer::find_source_for_uri_(const std::string &uri, uint8_t pipeline) { + PipelineContext &ps = this->pipelines_[pipeline]; + media_source::MediaSource *first_match = nullptr; + for (auto &binding : ps.sources) { + if (binding->source->can_handle(uri)) { + // Prefer an idle source; otherwise remember the first match (will be stopped by try_execute_play_uri_) + if (binding->source->get_state() == media_source::MediaSourceState::IDLE) { + return binding->source; + } + if (first_match == nullptr) { + first_match = binding->source; + } + } + } + return first_match; +} + +bool SpeakerSourceMediaPlayer::try_execute_play_uri_(const std::string &uri, uint8_t pipeline) { + // Find target source + media_source::MediaSource *target_source = this->find_source_for_uri_(uri, pipeline); + if (target_source == nullptr) { + ESP_LOGW(TAG, "No source for URI"); + ESP_LOGV(TAG, "URI: %s", uri.c_str()); + return true; // Remove from queue (unrecoverable) + } + + PipelineContext &ps = this->pipelines_[pipeline]; + + media_source::MediaSource *active_source = ps.active_source; + + // If active source exists and is not IDLE, stop it and wait + if (active_source != nullptr) { + media_source::MediaSourceState active_state = active_source->get_state(); + if (active_state != media_source::MediaSourceState::IDLE) { + // Only send END command once per source - check if we've already asked this source to stop + if (ps.stopping_source != active_source) { + ESP_LOGV(TAG, "Pipeline %u: stopping active source", pipeline); + active_source->handle_command(media_source::MediaSourceCommand::STOP); + ps.speaker->stop(); + ps.stopping_source = active_source; + } + return false; // Leave in queue, retry next loop + } + } + + // Also check target source directly - handles case where source errored before PLAYING state + media_source::MediaSourceState target_state = target_source->get_state(); + if (target_state != media_source::MediaSourceState::IDLE) { + // Only send STOP command once per source + if (ps.stopping_source != target_source) { + ESP_LOGV(TAG, "Pipeline %u: target source busy, stopping", pipeline); + target_source->handle_command(media_source::MediaSourceCommand::STOP); + ps.speaker->stop(); + ps.stopping_source = target_source; + } + return false; // Leave in queue, retry next loop + } + + // Clear stopping flag since we're past the stopping phase + ps.stopping_source = nullptr; + + // Check if speaker is ready + if (!ps.speaker->is_stopped()) { + return false; // Speaker not ready yet, retry later + } + + // Set pending source so handle_media_state_changed_ can recognize it when the source transitions to PLAYING + ps.pending_source = target_source; + + // Speaker is ready, try to play + if (!target_source->play_uri(uri)) { + ESP_LOGE(TAG, "Pipeline %u: Failed to play URI: %s", pipeline, uri.c_str()); + ps.pending_source = nullptr; + } + + // Reset pending frame counter for this pipeline since we're starting a new source + ps.pending_frames.store(0, std::memory_order_relaxed); + + return true; // Remove from queue +} + +// THREAD CONTEXT: Called from main loop only. Entry points: +// - HA/automation commands (direct) +// - handle_play_uri_request_() via make_call().perform() (deferred from source tasks) +void SpeakerSourceMediaPlayer::control(const media_player::MediaPlayerCall &call) { + if (!this->is_ready()) { + return; + } + + MediaPlayerControlCommand control_command; + control_command.pipeline = MEDIA_PIPELINE; + + auto media_url = call.get_media_url(); + if (media_url.has_value()) { + control_command.type = MediaPlayerControlCommand::PLAY_URI; + // Heap allocation is unavoidable: URIs from Home Assistant are arbitrary-length (media URLs with tokens + // can easily exceed 500 bytes). Deleted after the command is consumed. FreeRTOS queues require items to be + // copyable, so we store a pointer to the string in the queue rather than the string itself. + control_command.data.uri = new std::string(media_url.value()); + if (xQueueSend(this->media_control_command_queue_, &control_command, 0) != pdTRUE) { + delete control_command.data.uri; + ESP_LOGE(TAG, "Queue full, URI dropped"); + } + return; + } + + auto volume = call.get_volume(); + if (volume.has_value()) { + this->set_volume_(volume.value()); + this->publish_state(); + return; + } + + auto cmd = call.get_command(); + if (cmd.has_value()) { + switch (cmd.value()) { + case media_player::MEDIA_PLAYER_COMMAND_MUTE: + this->set_mute_state_(true); + break; + case media_player::MEDIA_PLAYER_COMMAND_UNMUTE: + this->set_mute_state_(false); + break; + case media_player::MEDIA_PLAYER_COMMAND_VOLUME_UP: + this->set_volume_(std::min(1.0f, this->volume + this->volume_increment_)); + break; + case media_player::MEDIA_PLAYER_COMMAND_VOLUME_DOWN: + this->set_volume_(std::max(0.0f, this->volume - this->volume_increment_)); + break; + default: + // Queue command for processing in loop() + control_command.type = MediaPlayerControlCommand::SEND_COMMAND; + control_command.data.command = cmd.value(); + if (xQueueSend(this->media_control_command_queue_, &control_command, 0) != pdTRUE) { + ESP_LOGE(TAG, "Queue full, command dropped"); + } + return; + } + this->publish_state(); + } +} + +media_player::MediaPlayerTraits SpeakerSourceMediaPlayer::get_traits() { + auto traits = media_player::MediaPlayerTraits(); + traits.set_supports_pause(true); + + for (const auto &ps : this->pipelines_) { + if (ps.format.has_value()) { + traits.get_supported_formats().push_back(ps.format.value()); + } + } + + return traits; +} + +void SpeakerSourceMediaPlayer::save_volume_restore_state_() { + VolumeRestoreState volume_restore_state; + volume_restore_state.volume = this->volume; + volume_restore_state.is_muted = this->is_muted_; + this->pref_.save(&volume_restore_state); +} + +void SpeakerSourceMediaPlayer::set_mute_state_(bool mute_state, bool publish) { + if (this->is_muted_ == mute_state) { + return; + } + + for (auto &ps : this->pipelines_) { + if (ps.is_configured()) { + ps.speaker->set_mute_state(mute_state); + } + } + + this->is_muted_ = mute_state; + + if (publish) { + this->save_volume_restore_state_(); + } + + // Notify all media sources about the mute state change + for (auto &ps : this->pipelines_) { + for (auto &binding : ps.sources) { + binding->source->notify_mute_changed(mute_state); + } + } + + if (mute_state) { + this->defer([this]() { this->mute_trigger_.trigger(); }); + } else { + this->defer([this]() { this->unmute_trigger_.trigger(); }); + } +} + +void SpeakerSourceMediaPlayer::set_volume_(float volume, bool publish) { + // Remap the volume to fit within the configured limits + float bounded_volume = remap(volume, 0.0f, 1.0f, this->volume_min_, this->volume_max_); + + for (auto &ps : this->pipelines_) { + if (ps.is_configured()) { + ps.speaker->set_volume(bounded_volume); + } + } + + if (publish) { + this->volume = volume; + } + + // Notify all media sources about the volume change + for (auto &ps : this->pipelines_) { + for (auto &binding : ps.sources) { + binding->source->notify_volume_changed(volume); + } + } + + // Turn on the mute state if the volume is effectively zero, off otherwise. + // Pass publish=false to avoid saving twice. + if (volume < 0.001) { + this->set_mute_state_(true, false); + } else { + this->set_mute_state_(false, false); + } + + // Save after mute mutation so the restored state has the correct is_muted_ value + if (publish) { + this->save_volume_restore_state_(); + } + + this->defer([this, volume]() { this->volume_trigger_.trigger(volume); }); +} + +} // namespace esphome::speaker_source + +#endif // USE_ESP32 diff --git a/esphome/components/speaker_source/speaker_source_media_player.h b/esphome/components/speaker_source/speaker_source_media_player.h new file mode 100644 index 0000000000..7896fef295 --- /dev/null +++ b/esphome/components/speaker_source/speaker_source_media_player.h @@ -0,0 +1,217 @@ +#pragma once + +#include "esphome/core/defines.h" + +#ifdef USE_ESP32 + +#include "esphome/components/audio/audio.h" +#include "esphome/components/media_source/media_source.h" +#include "esphome/components/media_player/media_player.h" +#include "esphome/components/speaker/speaker.h" + +#include "esphome/core/automation.h" +#include "esphome/core/component.h" +#include "esphome/core/preferences.h" + +#include +#include +#include +#include +#include +#include + +namespace esphome::speaker_source { + +// THREADING MODEL: +// This component coordinates media sources that run their own decode tasks with speakers +// that have their own playback callback tasks. Three thread contexts exist: +// +// - Main loop task: setup(), loop(), dump_config(), handle_media_state_changed_(), +// handle_volume_request_(), handle_mute_request_(), handle_play_uri_request_(), +// set_volume_(), set_mute_state_(), control(), get_media_pipeline_state_(), +// find_source_for_uri_(), try_execute_play_uri_(), save_volume_restore_state_() +// +// - Media source task(s): handle_media_output_() via SourceBinding::write_audio(). +// Called from each source's decode task thread when streaming audio data. +// Reads ps.active_source (atomic), writes ps.pending_frames (atomic), and calls +// ps.speaker methods (speaker pointer is immutable after setup). +// +// - Speaker callback task: handle_speaker_playback_callback_() via speaker's +// add_audio_output_callback(). Called when the speaker finishes writing frames to the DAC. +// Reads ps.active_source (atomic), writes ps.pending_frames (atomic), and calls +// active_source->notify_audio_played(). +// +// control() is only called from the main loop (HA/automation commands). +// Source tasks use defer() for all requests (volume, mute, play_uri). +// +// Thread-safe communication: +// - FreeRTOS queue (media_control_command_queue_): control() -> loop() for play/command dispatch +// - defer(): SourceBinding::request_volume/request_mute/request_play_uri -> main loop +// - Atomic fields (active_source, pending_frames): shared between all three thread contexts +// +// Non-atomic pipeline fields (last_source, stopping_source, pending_source) are only accessed +// from the main loop thread. + +enum Pipeline : uint8_t { + MEDIA_PIPELINE = 0, +}; + +// Forward declaration +class SpeakerSourceMediaPlayer; + +/// @brief Per-source listener binding that captures the source pointer at registration time. +/// Each binding implements MediaSourceListener and forwards callbacks to the player with the source identified. +/// Defined before PipelineContext so pipelines can own their bindings directly. +struct SourceBinding : public media_source::MediaSourceListener { + SourceBinding(SpeakerSourceMediaPlayer *player, media_source::MediaSource *source, uint8_t pipeline) + : player(player), source(source), pipeline(pipeline) {} + SpeakerSourceMediaPlayer *player; + media_source::MediaSource *source; + uint8_t pipeline; + + // Implementations are in the .cpp file because SpeakerSourceMediaPlayer is only forward-declared here + size_t write_audio(const uint8_t *data, size_t length, uint32_t timeout_ms, + const audio::AudioStreamInfo &stream_info) override; + void report_state(media_source::MediaSourceState state) override; + void request_volume(float volume) override; + void request_mute(bool is_muted) override; + void request_play_uri(const std::string &uri) override; +}; + +struct PipelineContext { + speaker::Speaker *speaker{nullptr}; + optional format; + + std::atomic active_source{nullptr}; + media_source::MediaSource *last_source{nullptr}; + media_source::MediaSource *stopping_source{nullptr}; // Source we've asked to stop, awaiting IDLE + media_source::MediaSource *pending_source{nullptr}; // Source we've asked to play, awaiting PLAYING + + // Each SourceBinding pairs a MediaSource* with its listener implementation. + // Uses unique_ptr so binding addresses are stable and set_listener() can be called in add_media_source(). + // Uses std::vector because the count varies across instances (multiple speaker_source media players may exist). + std::vector> sources; + + // Track frames sent to speaker to correlate with playback callbacks. + // Atomic because it is written from the main loop/source tasks and read/decremented from the speaker playback + // callback. + std::atomic pending_frames{0}; + + /// @brief Check if this pipeline is configured (has a speaker assigned) + bool is_configured() const { return this->speaker != nullptr; } +}; + +struct MediaPlayerControlCommand { + enum Type : uint8_t { + PLAY_URI, // Find a source that can handle this URI and play it + SEND_COMMAND, // Send command to active source + }; + Type type; + uint8_t pipeline; + + union { + std::string *uri; // Owned pointer, must delete after xQueueReceive (for PLAY_URI) + media_player::MediaPlayerCommand command; + } data; +}; + +struct VolumeRestoreState { + float volume; + bool is_muted; +}; + +class SpeakerSourceMediaPlayer : public Component, public media_player::MediaPlayer { + friend struct SourceBinding; + + public: + float get_setup_priority() const override { return esphome::setup_priority::PROCESSOR; } + void setup() override; + void loop() override; + void dump_config() override; + + // MediaPlayer implementations + media_player::MediaPlayerTraits get_traits() override; + bool is_muted() const override { return this->is_muted_; } + + // Percentage to increase or decrease the volume for volume up or volume down commands + void set_volume_increment(float volume_increment) { this->volume_increment_ = volume_increment; } + + // Volume used initially on first boot when no volume had been previously saved + void set_volume_initial(float volume_initial) { this->volume_initial_ = volume_initial; } + + void set_volume_max(float volume_max) { this->volume_max_ = volume_max; } + void set_volume_min(float volume_min) { this->volume_min_ = volume_min; } + + /// @brief Adds a media source to a pipeline and registers this player as its listener + void add_media_source(uint8_t pipeline, media_source::MediaSource *media_source); + + void set_speaker(uint8_t pipeline, speaker::Speaker *speaker) { this->pipelines_[pipeline].speaker = speaker; } + void set_format(uint8_t pipeline, const media_player::MediaPlayerSupportedFormat &format) { + this->pipelines_[pipeline].format = format; + } + + Trigger<> *get_mute_trigger() { return &this->mute_trigger_; } + Trigger<> *get_unmute_trigger() { return &this->unmute_trigger_; } + Trigger *get_volume_trigger() { return &this->volume_trigger_; } + + protected: + // Callbacks from source bindings (pipeline index is captured at binding creation time) + size_t handle_media_output_(uint8_t pipeline, media_source::MediaSource *source, const uint8_t *data, size_t length, + uint32_t timeout_ms, const audio::AudioStreamInfo &stream_info); + void handle_media_state_changed_(uint8_t pipeline, media_source::MediaSource *source, + media_source::MediaSourceState state); + void handle_volume_request_(float volume); + void handle_mute_request_(bool is_muted); + void handle_play_uri_request_(uint8_t pipeline, const std::string &uri); + + void handle_speaker_playback_callback_(uint32_t frames, int64_t timestamp, uint8_t pipeline); + + // Receives commands from HA or from the voice assistant component + // Sends commands to the media_control_command_queue_ + void control(const media_player::MediaPlayerCall &call) override; + + /// @brief Updates this->volume and saves volume/mute state to flash for restoration if publish is true. + void set_volume_(float volume, bool publish = true); + + /// @brief Sets the mute state. + /// @param mute_state If true, audio will be muted. If false, audio will be unmuted + /// @param publish If true, saves volume/mute state to flash for restoration + void set_mute_state_(bool mute_state, bool publish = true); + + /// @brief Saves the current volume and mute state to the flash for restoration. + void save_volume_restore_state_(); + + /// @brief Determine media player state from the media pipeline's active source + /// @param media_source Active source for the media pipeline (may be nullptr) + /// @return The appropriate MediaPlayerState + media_player::MediaPlayerState get_media_pipeline_state_(media_source::MediaSource *media_source) const; + + bool try_execute_play_uri_(const std::string &uri, uint8_t pipeline); + media_source::MediaSource *find_source_for_uri_(const std::string &uri, uint8_t pipeline); + QueueHandle_t media_control_command_queue_; + + // Pipeline context for media pipeline. See THREADING MODEL at top of namespace for access rules. + std::array pipelines_; + + // Used to save volume/mute state for restoration on reboot + ESPPreferenceObject pref_; + + Trigger<> mute_trigger_; + Trigger<> unmute_trigger_; + Trigger volume_trigger_; + + // The amount to change the volume on volume up/down commands + float volume_increment_; + + // The initial volume used by Setup when no previous volume was saved + float volume_initial_; + + float volume_max_; + float volume_min_; + + bool is_muted_{false}; +}; + +} // namespace esphome::speaker_source + +#endif // USE_ESP32 diff --git a/tests/components/speaker_source/common.yaml b/tests/components/speaker_source/common.yaml new file mode 100644 index 0000000000..cfcb065f57 --- /dev/null +++ b/tests/components/speaker_source/common.yaml @@ -0,0 +1,43 @@ +i2s_audio: + i2s_lrclk_pin: ${i2s_bclk_pin} + i2s_bclk_pin: ${i2s_lrclk_pin} + i2s_mclk_pin: ${i2s_mclk_pin} + +speaker: + - platform: i2s_audio + id: speaker_id + dac_type: external + i2s_dout_pin: ${i2s_dout_pin} + sample_rate: 48000 + num_channels: 2 + +audio_file: + - id: test_audio + file: + type: local + path: $component_dir/test.wav + +media_source: + - platform: audio_file + id: audio_file_source + +media_player: + - platform: speaker_source + id: media_player_id + name: Media Player + volume_increment: 0.02 + volume_initial: 0.75 + volume_max: 0.95 + volume_min: 0.0 + media_pipeline: + speaker: speaker_id + format: FLAC + num_channels: 1 + sources: + - audio_file_source + on_mute: + - media_player.pause: + id: media_player_id + on_unmute: + - media_player.play: + id: media_player_id diff --git a/tests/components/speaker_source/test.esp32-idf.yaml b/tests/components/speaker_source/test.esp32-idf.yaml new file mode 100644 index 0000000000..e2439ebdf2 --- /dev/null +++ b/tests/components/speaker_source/test.esp32-idf.yaml @@ -0,0 +1,9 @@ +substitutions: + scl_pin: GPIO16 + sda_pin: GPIO17 + i2s_bclk_pin: GPIO27 + i2s_lrclk_pin: GPIO26 + i2s_mclk_pin: GPIO25 + i2s_dout_pin: GPIO23 + +<<: !include common.yaml diff --git a/tests/components/speaker_source/test.wav b/tests/components/speaker_source/test.wav new file mode 100644 index 0000000000000000000000000000000000000000..f9d07ef2238eb2fcb355055466d3789ee1a1fe0b GIT binary patch literal 46 vcmWIYbaPW