From ea2b2b39201a80c13d2ae1620179b2edc7bb40e9 Mon Sep 17 00:00:00 2001 From: Kevin Ahrendt Date: Mon, 4 May 2026 21:12:26 -0400 Subject: [PATCH] [audio_file] Use microDecoder library instead of manual task management/decoding (#16237) --- .../audio_file/media_source/__init__.py | 28 +- .../media_source/audio_file_media_source.cpp | 332 +++++++----------- .../media_source/audio_file_media_source.h | 43 ++- 3 files changed, 165 insertions(+), 238 deletions(-) diff --git a/esphome/components/audio_file/media_source/__init__.py b/esphome/components/audio_file/media_source/__init__.py index e9e292a2b2..635a51b610 100644 --- a/esphome/components/audio_file/media_source/__init__.py +++ b/esphome/components/audio_file/media_source/__init__.py @@ -1,5 +1,7 @@ +from typing import Any + import esphome.codegen as cg -from esphome.components import media_source, psram +from esphome.components import audio, esp32, media_source, psram import esphome.config_validation as cv from esphome.const import CONF_ID, CONF_TASK_STACK_IN_PSRAM from esphome.types import ConfigType @@ -13,19 +15,30 @@ AudioFileMediaSource = audio_file_ns.class_( "AudioFileMediaSource", cg.Component, media_source.MediaSource ) + +def _request_micro_decoder(config: ConfigType) -> ConfigType: + audio.request_micro_decoder_support() + return config + + +def _validate_task_stack_in_psram(value: Any) -> bool: + if value := cv.boolean(value): + return cv.requires_component(psram.DOMAIN)(value) + return value + + CONFIG_SCHEMA = cv.All( media_source.media_source_schema( AudioFileMediaSource, ) .extend( { - cv.Optional(CONF_TASK_STACK_IN_PSRAM): cv.All( - cv.boolean, cv.requires_component(psram.DOMAIN) - ), + cv.Optional(CONF_TASK_STACK_IN_PSRAM): _validate_task_stack_in_psram, } ) .extend(cv.COMPONENT_SCHEMA), cv.only_on_esp32, + _request_micro_decoder, ) @@ -34,5 +47,8 @@ async def to_code(config: ConfigType) -> None: await cg.register_component(var, config) await media_source.register_media_source(var, config) - if CONF_TASK_STACK_IN_PSRAM in config: - cg.add(var.set_task_stack_in_psram(config[CONF_TASK_STACK_IN_PSRAM])) + if config.get(CONF_TASK_STACK_IN_PSRAM): + cg.add(var.set_task_stack_in_psram(True)) + esp32.add_idf_sdkconfig_option( + "CONFIG_SPIRAM_ALLOW_STACK_EXTERNAL_MEMORY", True + ) diff --git a/esphome/components/audio_file/media_source/audio_file_media_source.cpp b/esphome/components/audio_file/media_source/audio_file_media_source.cpp index fbb5ecd88d..0cda1eca9e 100644 --- a/esphome/components/audio_file/media_source/audio_file_media_source.cpp +++ b/esphome/components/audio_file/media_source/audio_file_media_source.cpp @@ -2,281 +2,185 @@ #ifdef USE_ESP32 -#include "esphome/components/audio/audio_decoder.h" +#include "esphome/core/log.h" + +#include +#include -#include #include namespace esphome::audio_file { -namespace { // anonymous namespace for internal linkage -struct AudioSinkAdapter : public audio::AudioSinkCallback { - media_source::MediaSource *source; - audio::AudioStreamInfo stream_info; - - size_t audio_sink_write(uint8_t *data, size_t length, TickType_t ticks_to_wait) override { - return this->source->write_output(data, length, pdTICKS_TO_MS(ticks_to_wait), this->stream_info); - } -}; -} // namespace - -#if defined(USE_AUDIO_OPUS_SUPPORT) -static constexpr uint32_t DECODE_TASK_STACK_SIZE = 5 * 1024; -#else -static constexpr uint32_t DECODE_TASK_STACK_SIZE = 3 * 1024; -#endif - static const char *const TAG = "audio_file_media_source"; -enum EventGroupBits : uint32_t { - // Requests to start playback (set by play_uri, handled by loop) - REQUEST_START = (1 << 0), - // Commands from main loop to decode task - COMMAND_STOP = (1 << 1), - COMMAND_PAUSE = (1 << 2), - // Decode task lifecycle signals (one-shot, cleared by loop) - TASK_STARTING = (1 << 7), - TASK_RUNNING = (1 << 8), - TASK_STOPPING = (1 << 9), - TASK_STOPPED = (1 << 10), - TASK_ERROR = (1 << 11), - // Decode task state (level-triggered, set/cleared by decode task) - TASK_PAUSED = (1 << 12), - ALL_BITS = 0x00FFFFFF, // All valid FreeRTOS event group bits -}; +static constexpr uint32_t AUDIO_WRITE_TIMEOUT_MS = 50; +static constexpr size_t DECODER_TASK_STACK_SIZE = 5120; +static constexpr uint8_t DECODER_TASK_PRIORITY = 2; +static constexpr uint32_t PAUSE_POLL_DELAY_MS = 20; +static constexpr char URI_PREFIX[] = "audio-file://"; + +namespace { // anonymous namespace for internal linkage + +// audio::AudioFileType and micro_decoder::AudioFileType use different numeric layouts (audio's +// values shift with USE_AUDIO_*_SUPPORT defines; micro_decoder's are fixed and guarded by +// MICRO_DECODER_CODEC_*). The codec request flow in audio/__init__.py keeps the two sets of +// guards aligned, so a switch with matching #ifdefs covers all reachable cases. +micro_decoder::AudioFileType to_micro_decoder_type(audio::AudioFileType type) { + switch (type) { +#ifdef USE_AUDIO_FLAC_SUPPORT + case audio::AudioFileType::FLAC: + return micro_decoder::AudioFileType::FLAC; +#endif +#ifdef USE_AUDIO_MP3_SUPPORT + case audio::AudioFileType::MP3: + return micro_decoder::AudioFileType::MP3; +#endif +#ifdef USE_AUDIO_OPUS_SUPPORT + case audio::AudioFileType::OPUS: + return micro_decoder::AudioFileType::OPUS; +#endif +#ifdef USE_AUDIO_WAV_SUPPORT + case audio::AudioFileType::WAV: + return micro_decoder::AudioFileType::WAV; +#endif + default: + return micro_decoder::AudioFileType::NONE; + } +} + +} // namespace void AudioFileMediaSource::dump_config() { - ESP_LOGCONFIG(TAG, "Audio File Media Source:"); - ESP_LOGCONFIG(TAG, " Task Stack in PSRAM: %s", this->task_stack_in_psram_ ? "Yes" : "No"); + ESP_LOGCONFIG(TAG, + "Audio File Media Source:\n" + " Decoder Task Stack in PSRAM: %s", + YESNO(this->decoder_task_stack_in_psram_)); } void AudioFileMediaSource::setup() { this->disable_loop(); - this->event_group_ = xEventGroupCreate(); - if (this->event_group_ == nullptr) { - ESP_LOGE(TAG, "Failed to create event group"); + micro_decoder::DecoderConfig config; + config.audio_write_timeout_ms = AUDIO_WRITE_TIMEOUT_MS; + config.decoder_priority = DECODER_TASK_PRIORITY; + config.decoder_stack_size = DECODER_TASK_STACK_SIZE; + config.decoder_stack_in_psram = this->decoder_task_stack_in_psram_; + + this->decoder_ = std::make_unique(config); + if (this->decoder_ == nullptr) { + ESP_LOGE(TAG, "Failed to allocate decoder"); this->mark_failed(); return; } + this->decoder_->set_listener(this); } -void AudioFileMediaSource::loop() { - EventBits_t event_bits = xEventGroupGetBits(this->event_group_); +void AudioFileMediaSource::loop() { this->decoder_->loop(); } - if (event_bits & REQUEST_START) { - xEventGroupClearBits(this->event_group_, REQUEST_START); - this->decoding_state_ = AudioFileDecodingState::START_TASK; - } - - switch (this->decoding_state_) { - case AudioFileDecodingState::START_TASK: { - if (!this->decode_task_.is_created()) { - xEventGroupClearBits(this->event_group_, ALL_BITS); - if (!this->decode_task_.create(decode_task, "AudioFileDec", DECODE_TASK_STACK_SIZE, this, 1, - this->task_stack_in_psram_)) { - ESP_LOGE(TAG, "Failed to create task"); - this->status_momentary_error("task_create", 1000); - this->set_state_(media_source::MediaSourceState::ERROR); - this->decoding_state_ = AudioFileDecodingState::IDLE; - return; - } - } - this->decoding_state_ = AudioFileDecodingState::DECODING; - break; - } - case AudioFileDecodingState::DECODING: { - if (event_bits & TASK_STARTING) { - ESP_LOGD(TAG, "Starting"); - xEventGroupClearBits(this->event_group_, TASK_STARTING); - } - - if (event_bits & TASK_RUNNING) { - ESP_LOGV(TAG, "Started"); - xEventGroupClearBits(this->event_group_, TASK_RUNNING); - this->set_state_(media_source::MediaSourceState::PLAYING); - } - - if ((event_bits & TASK_PAUSED) && this->get_state() != media_source::MediaSourceState::PAUSED) { - this->set_state_(media_source::MediaSourceState::PAUSED); - } else if (!(event_bits & TASK_PAUSED) && this->get_state() == media_source::MediaSourceState::PAUSED) { - this->set_state_(media_source::MediaSourceState::PLAYING); - } - - if (event_bits & TASK_STOPPING) { - ESP_LOGV(TAG, "Stopping"); - xEventGroupClearBits(this->event_group_, TASK_STOPPING); - } - - if (event_bits & TASK_ERROR) { - // Report error so the orchestrator knows playback failed; task will have already logged the specific error - this->set_state_(media_source::MediaSourceState::ERROR); - } - - if (event_bits & TASK_STOPPED) { - ESP_LOGD(TAG, "Stopped"); - xEventGroupClearBits(this->event_group_, ALL_BITS); - - this->decode_task_.deallocate(); - this->set_state_(media_source::MediaSourceState::IDLE); - this->decoding_state_ = AudioFileDecodingState::IDLE; - } - break; - } - case AudioFileDecodingState::IDLE: { - if (this->get_state() == media_source::MediaSourceState::ERROR && !this->status_has_error()) { - this->set_state_(media_source::MediaSourceState::IDLE); - } - break; - } - } - - if ((this->decoding_state_ == AudioFileDecodingState::IDLE) && - (this->get_state() == media_source::MediaSourceState::IDLE)) { - this->disable_loop(); - } -} +bool AudioFileMediaSource::can_handle(const std::string &uri) const { return uri.starts_with(URI_PREFIX); } // Called from the orchestrator's main loop, so no synchronization needed with loop() bool AudioFileMediaSource::play_uri(const std::string &uri) { - if (!this->is_ready() || this->is_failed() || this->status_has_error() || !this->has_listener() || - xEventGroupGetBits(this->event_group_) & REQUEST_START) { + if (!this->is_ready() || this->is_failed() || this->status_has_error() || !this->has_listener()) { return false; } - // Check if source is already playing if (this->get_state() != media_source::MediaSourceState::IDLE) { ESP_LOGE(TAG, "Cannot play '%s': source is busy", uri.c_str()); return false; } - // Validate URI starts with "audio-file://" - if (!uri.starts_with("audio-file://")) { + if (!uri.starts_with(URI_PREFIX)) { ESP_LOGE(TAG, "Invalid URI: '%s'", uri.c_str()); return false; } - // Strip "audio-file://" prefix and find the file - const char *file_id = uri.c_str() + 13; // "audio-file://" is 13 characters - + const char *file_id = uri.c_str() + sizeof(URI_PREFIX) - 1; + this->current_file_ = nullptr; for (const auto &named_file : get_named_audio_files()) { if (strcmp(named_file.file_id, file_id) == 0) { this->current_file_ = named_file.file; - xEventGroupSetBits(this->event_group_, EventGroupBits::REQUEST_START); - this->enable_loop(); - return true; + break; } } - ESP_LOGE(TAG, "Unknown file: '%s'", file_id); + if (this->current_file_ == nullptr) { + ESP_LOGE(TAG, "Unknown file: '%s'", file_id); + return false; + } + + micro_decoder::AudioFileType type = to_micro_decoder_type(this->current_file_->file_type); + if (this->decoder_->play_buffer(this->current_file_->data, this->current_file_->length, type)) { + this->pause_.store(false, std::memory_order_relaxed); + this->enable_loop(); + return true; + } + + ESP_LOGE(TAG, "Failed to start playback of '%s'", file_id); return false; } // Called from the orchestrator's main loop, so no synchronization needed with loop() void AudioFileMediaSource::handle_command(media_source::MediaSourceCommand command) { - if (this->decoding_state_ != AudioFileDecodingState::DECODING) { - return; - } - switch (command) { case media_source::MediaSourceCommand::STOP: - xEventGroupSetBits(this->event_group_, EventGroupBits::COMMAND_STOP); + this->decoder_->stop(); break; case media_source::MediaSourceCommand::PAUSE: - xEventGroupSetBits(this->event_group_, EventGroupBits::COMMAND_PAUSE); + // Only valid while actively playing; ignoring from IDLE/ERROR/PAUSED prevents the state + // machine from getting stuck in PAUSED when no playback is active (which would block the + // next play_uri() call via its IDLE-state precondition). + if (this->get_state() != media_source::MediaSourceState::PLAYING) + break; + // PAUSE does not stop the decoder task. Instead, on_audio_write() returns 0 and temporarily + // yields, which fills any internal buffering and applies back pressure that effectively + // pauses the decoder task. + this->set_state_(media_source::MediaSourceState::PAUSED); + this->pause_.store(true, std::memory_order_relaxed); break; case media_source::MediaSourceCommand::PLAY: - xEventGroupClearBits(this->event_group_, EventGroupBits::COMMAND_PAUSE); + if (this->get_state() != media_source::MediaSourceState::PAUSED) + break; + this->set_state_(media_source::MediaSourceState::PLAYING); + this->pause_.store(false, std::memory_order_relaxed); break; default: break; } } -void AudioFileMediaSource::decode_task(void *params) { - AudioFileMediaSource *this_source = static_cast(params); +// Called from the decoder task. Forwards to the orchestrator's listener, which is responsible for +// being thread-safe with respect to its own audio writer. +size_t AudioFileMediaSource::on_audio_write(const uint8_t *data, size_t length, uint32_t timeout_ms) { + if (this->pause_.load(std::memory_order_relaxed)) { + vTaskDelay(pdMS_TO_TICKS(PAUSE_POLL_DELAY_MS)); + return 0; + } + return this->write_output(data, length, timeout_ms, this->stream_info_); +} - do { // do-while(false) ensures RAII objects are destroyed on all exit paths via break +// Called from the decoder task before the first on_audio_write(). +void AudioFileMediaSource::on_stream_info(const micro_decoder::AudioStreamInfo &info) { + this->stream_info_ = audio::AudioStreamInfo(info.get_bits_per_sample(), info.get_channels(), info.get_sample_rate()); +} - xEventGroupSetBits(this_source->event_group_, EventGroupBits::TASK_STARTING); - - // 0 bytes for input transfer buffer makes it an inplace buffer - std::unique_ptr decoder = make_unique(0, 4096); - - esp_err_t err = decoder->start(this_source->current_file_->file_type); - if (err != ESP_OK) { - ESP_LOGE(TAG, "Failed to start decoder: %s", esp_err_to_name(err)); - xEventGroupSetBits(this_source->event_group_, EventGroupBits::TASK_ERROR | EventGroupBits::TASK_STOPPING); +// microDecoder invokes on_state_change() from inside decoder_->loop(), so this runs on the main +// loop thread and it's safe to call set_state_() directly. +void AudioFileMediaSource::on_state_change(micro_decoder::DecoderState state) { + switch (state) { + case micro_decoder::DecoderState::IDLE: + this->set_state_(media_source::MediaSourceState::IDLE); + this->disable_loop(); break; - } - - // Add the file as a const data source - decoder->add_source(this_source->current_file_->data, this_source->current_file_->length); - - xEventGroupSetBits(this_source->event_group_, EventGroupBits::TASK_RUNNING); - - AudioSinkAdapter audio_sink; - bool has_stream_info = false; - - while (true) { - EventBits_t event_bits = xEventGroupGetBits(this_source->event_group_); - - if (event_bits & EventGroupBits::COMMAND_STOP) { - break; - } - - bool paused = event_bits & EventGroupBits::COMMAND_PAUSE; - decoder->set_pause_output_state(paused); - if (paused) { - xEventGroupSetBits(this_source->event_group_, EventGroupBits::TASK_PAUSED); - vTaskDelay(pdMS_TO_TICKS(20)); - } else { - xEventGroupClearBits(this_source->event_group_, EventGroupBits::TASK_PAUSED); - } - - // Will stop gracefully once finished with the current file - audio::AudioDecoderState decoder_state = decoder->decode(true); - - if (decoder_state == audio::AudioDecoderState::FINISHED) { - break; - } else if (decoder_state == audio::AudioDecoderState::FAILED) { - ESP_LOGE(TAG, "Decoder failed"); - xEventGroupSetBits(this_source->event_group_, EventGroupBits::TASK_ERROR); - break; - } - - if (!has_stream_info && decoder->get_audio_stream_info().has_value()) { - has_stream_info = true; - - audio::AudioStreamInfo stream_info = decoder->get_audio_stream_info().value(); - - ESP_LOGD(TAG, "Bits per sample: %d, Channels: %d, Sample rate: %" PRIu32, stream_info.get_bits_per_sample(), - stream_info.get_channels(), stream_info.get_sample_rate()); - - if (stream_info.get_bits_per_sample() != 16 || stream_info.get_channels() > 2) { - ESP_LOGE(TAG, "Incompatible audio stream. Only 16 bits per sample and 1 or 2 channels are supported"); - xEventGroupSetBits(this_source->event_group_, EventGroupBits::TASK_ERROR); - break; - } - - audio_sink.source = this_source; - audio_sink.stream_info = stream_info; - esp_err_t err = decoder->add_sink(&audio_sink); - if (err != ESP_OK) { - ESP_LOGE(TAG, "Failed to add sink: %s", esp_err_to_name(err)); - xEventGroupSetBits(this_source->event_group_, EventGroupBits::TASK_ERROR); - break; - } - } - } - - xEventGroupSetBits(this_source->event_group_, EventGroupBits::TASK_STOPPING); - } while (false); - - // All RAII objects from the do-while block (decoder, audio_sink, etc.) are now destroyed. - - xEventGroupSetBits(this_source->event_group_, EventGroupBits::TASK_STOPPED); - vTaskSuspend(nullptr); // Suspend this task indefinitely until the loop method deletes it + case micro_decoder::DecoderState::PLAYING: + this->set_state_(media_source::MediaSourceState::PLAYING); + break; + case micro_decoder::DecoderState::FAILED: + this->set_state_(media_source::MediaSourceState::ERROR); + break; + default: + break; + } } } // namespace esphome::audio_file diff --git a/esphome/components/audio_file/media_source/audio_file_media_source.h b/esphome/components/audio_file/media_source/audio_file_media_source.h index 75e18c13b8..2c6189f272 100644 --- a/esphome/components/audio_file/media_source/audio_file_media_source.h +++ b/esphome/components/audio_file/media_source/audio_file_media_source.h @@ -8,41 +8,48 @@ #include "esphome/components/audio_file/audio_file.h" #include "esphome/components/media_source/media_source.h" #include "esphome/core/component.h" -#include "esphome/core/static_task.h" -#include -#include +#include +#include + +#include +#include +#include namespace esphome::audio_file { -enum class AudioFileDecodingState : uint8_t { - START_TASK, - DECODING, - IDLE, -}; - -class AudioFileMediaSource : public Component, public media_source::MediaSource { +// Inherits from two unrelated listener-style interfaces: +// - media_source::MediaSource: this source reports state and writes audio *to* an orchestrator +// (the orchestrator calls set_listener() on us with a MediaSourceListener*). +// - micro_decoder::DecoderListener: the underlying decoder calls back *into* us with decoded +// audio and state changes (we call decoder_->set_listener(this) in setup()). +class AudioFileMediaSource : public Component, public media_source::MediaSource, public micro_decoder::DecoderListener { public: void setup() override; void loop() override; void dump_config() override; + void set_task_stack_in_psram(bool task_stack_in_psram) { this->decoder_task_stack_in_psram_ = task_stack_in_psram; } + // MediaSource interface implementation bool play_uri(const std::string &uri) override; void handle_command(media_source::MediaSourceCommand command) override; - bool can_handle(const std::string &uri) const override { return uri.starts_with("audio-file://"); } + bool can_handle(const std::string &uri) const override; - void set_task_stack_in_psram(bool task_stack_in_psram) { this->task_stack_in_psram_ = task_stack_in_psram; } + // DecoderListener interface implementation + size_t on_audio_write(const uint8_t *data, size_t length, uint32_t timeout_ms) override; + void on_stream_info(const micro_decoder::AudioStreamInfo &info) override; + void on_state_change(micro_decoder::DecoderState state) override; protected: - static void decode_task(void *params); - + std::unique_ptr decoder_; + audio::AudioStreamInfo stream_info_; audio::AudioFile *current_file_{nullptr}; - AudioFileDecodingState decoding_state_{AudioFileDecodingState::IDLE}; - EventGroupHandle_t event_group_{nullptr}; - StaticTask decode_task_; - bool task_stack_in_psram_{false}; + // Written from the main loop in handle_command(), read from the decoder task in + // on_audio_write(). Must be atomic to avoid a data race. + std::atomic pause_{false}; + bool decoder_task_stack_in_psram_{false}; }; } // namespace esphome::audio_file