[audio_file] Use microDecoder library instead of manual task management/decoding (#16237)
CI / Create common environment (push) Has been cancelled
CI / Check pylint (push) Has been cancelled
CI / Run script/ci-custom (push) Has been cancelled
CI / Check import esphome.__main__ time (push) Has been cancelled
CI / Test downstream esphome/device-builder (push) Has been cancelled
CI / Run pytest (macOS-latest, 3.11) (push) Has been cancelled
CI / Run pytest (macOS-latest, 3.14) (push) Has been cancelled
CI / Run pytest (ubuntu-latest, 3.11) (push) Has been cancelled
CI / Run pytest (ubuntu-latest, 3.13) (push) Has been cancelled
CI / Run pytest (ubuntu-latest, 3.14) (push) Has been cancelled
CI / Run pytest (windows-latest, 3.11) (push) Has been cancelled
CI / Run pytest (windows-latest, 3.14) (push) Has been cancelled
CI / Determine which jobs to run (push) Has been cancelled
CI / Run integration tests (${{ matrix.bucket.name }}) (push) Has been cancelled
CI / Run C++ unit tests (push) Has been cancelled
CI / Run CodSpeed benchmarks (push) Has been cancelled
CI / Run script/clang-tidy for ESP32 IDF (push) Has been cancelled
CI / Run script/clang-tidy for ESP8266 (push) Has been cancelled
CI / Run script/clang-tidy for ZEPHYR (push) Has been cancelled
CI / Run script/clang-tidy for ESP32 Arduino (push) Has been cancelled
CI / Run script/clang-tidy for ESP32 Arduino 1/4 (push) Has been cancelled
CI / Run script/clang-tidy for ESP32 Arduino 2/4 (push) Has been cancelled
CI / Run script/clang-tidy for ESP32 Arduino 3/4 (push) Has been cancelled
CI / Run script/clang-tidy for ESP32 Arduino 4/4 (push) Has been cancelled
CI / Test components batch (${{ matrix.components }}) (push) Has been cancelled
CI / pre-commit.ci lite (push) Has been cancelled
CI / Build target branch for memory impact (push) Has been cancelled
CI / Build PR branch for memory impact (push) Has been cancelled
CI / Comment memory impact (push) Has been cancelled
CI / CI Status (push) Has been cancelled
CI for docker images / Build docker containers (docker, ubuntu-24.04) (push) Has been cancelled
CI for docker images / Build docker containers (docker, ubuntu-24.04-arm) (push) Has been cancelled
CI for docker images / Build docker containers (ha-addon, ubuntu-24.04) (push) Has been cancelled
CI for docker images / Build docker containers (ha-addon, ubuntu-24.04-arm) (push) Has been cancelled

This commit is contained in:
Kevin Ahrendt
2026-05-04 21:12:26 -04:00
committed by GitHub
parent f33d137669
commit ea2b2b3920
3 changed files with 165 additions and 238 deletions
@@ -1,5 +1,7 @@
from typing import Any
import esphome.codegen as cg
from esphome.components import media_source, psram
from esphome.components import audio, esp32, media_source, psram
import esphome.config_validation as cv
from esphome.const import CONF_ID, CONF_TASK_STACK_IN_PSRAM
from esphome.types import ConfigType
@@ -13,19 +15,30 @@ AudioFileMediaSource = audio_file_ns.class_(
"AudioFileMediaSource", cg.Component, media_source.MediaSource
)
def _request_micro_decoder(config: ConfigType) -> ConfigType:
audio.request_micro_decoder_support()
return config
def _validate_task_stack_in_psram(value: Any) -> bool:
if value := cv.boolean(value):
return cv.requires_component(psram.DOMAIN)(value)
return value
CONFIG_SCHEMA = cv.All(
media_source.media_source_schema(
AudioFileMediaSource,
)
.extend(
{
cv.Optional(CONF_TASK_STACK_IN_PSRAM): cv.All(
cv.boolean, cv.requires_component(psram.DOMAIN)
),
cv.Optional(CONF_TASK_STACK_IN_PSRAM): _validate_task_stack_in_psram,
}
)
.extend(cv.COMPONENT_SCHEMA),
cv.only_on_esp32,
_request_micro_decoder,
)
@@ -34,5 +47,8 @@ async def to_code(config: ConfigType) -> None:
await cg.register_component(var, config)
await media_source.register_media_source(var, config)
if CONF_TASK_STACK_IN_PSRAM in config:
cg.add(var.set_task_stack_in_psram(config[CONF_TASK_STACK_IN_PSRAM]))
if config.get(CONF_TASK_STACK_IN_PSRAM):
cg.add(var.set_task_stack_in_psram(True))
esp32.add_idf_sdkconfig_option(
"CONFIG_SPIRAM_ALLOW_STACK_EXTERNAL_MEMORY", True
)
@@ -2,281 +2,185 @@
#ifdef USE_ESP32
#include "esphome/components/audio/audio_decoder.h"
#include "esphome/core/log.h"
#include <freertos/FreeRTOS.h>
#include <freertos/task.h>
#include <cinttypes>
#include <cstring>
namespace esphome::audio_file {
namespace { // anonymous namespace for internal linkage
struct AudioSinkAdapter : public audio::AudioSinkCallback {
media_source::MediaSource *source;
audio::AudioStreamInfo stream_info;
size_t audio_sink_write(uint8_t *data, size_t length, TickType_t ticks_to_wait) override {
return this->source->write_output(data, length, pdTICKS_TO_MS(ticks_to_wait), this->stream_info);
}
};
} // namespace
#if defined(USE_AUDIO_OPUS_SUPPORT)
static constexpr uint32_t DECODE_TASK_STACK_SIZE = 5 * 1024;
#else
static constexpr uint32_t DECODE_TASK_STACK_SIZE = 3 * 1024;
#endif
static const char *const TAG = "audio_file_media_source";
enum EventGroupBits : uint32_t {
// Requests to start playback (set by play_uri, handled by loop)
REQUEST_START = (1 << 0),
// Commands from main loop to decode task
COMMAND_STOP = (1 << 1),
COMMAND_PAUSE = (1 << 2),
// Decode task lifecycle signals (one-shot, cleared by loop)
TASK_STARTING = (1 << 7),
TASK_RUNNING = (1 << 8),
TASK_STOPPING = (1 << 9),
TASK_STOPPED = (1 << 10),
TASK_ERROR = (1 << 11),
// Decode task state (level-triggered, set/cleared by decode task)
TASK_PAUSED = (1 << 12),
ALL_BITS = 0x00FFFFFF, // All valid FreeRTOS event group bits
};
static constexpr uint32_t AUDIO_WRITE_TIMEOUT_MS = 50;
static constexpr size_t DECODER_TASK_STACK_SIZE = 5120;
static constexpr uint8_t DECODER_TASK_PRIORITY = 2;
static constexpr uint32_t PAUSE_POLL_DELAY_MS = 20;
static constexpr char URI_PREFIX[] = "audio-file://";
namespace { // anonymous namespace for internal linkage
// audio::AudioFileType and micro_decoder::AudioFileType use different numeric layouts (audio's
// values shift with USE_AUDIO_*_SUPPORT defines; micro_decoder's are fixed and guarded by
// MICRO_DECODER_CODEC_*). The codec request flow in audio/__init__.py keeps the two sets of
// guards aligned, so a switch with matching #ifdefs covers all reachable cases.
micro_decoder::AudioFileType to_micro_decoder_type(audio::AudioFileType type) {
switch (type) {
#ifdef USE_AUDIO_FLAC_SUPPORT
case audio::AudioFileType::FLAC:
return micro_decoder::AudioFileType::FLAC;
#endif
#ifdef USE_AUDIO_MP3_SUPPORT
case audio::AudioFileType::MP3:
return micro_decoder::AudioFileType::MP3;
#endif
#ifdef USE_AUDIO_OPUS_SUPPORT
case audio::AudioFileType::OPUS:
return micro_decoder::AudioFileType::OPUS;
#endif
#ifdef USE_AUDIO_WAV_SUPPORT
case audio::AudioFileType::WAV:
return micro_decoder::AudioFileType::WAV;
#endif
default:
return micro_decoder::AudioFileType::NONE;
}
}
} // namespace
void AudioFileMediaSource::dump_config() {
ESP_LOGCONFIG(TAG, "Audio File Media Source:");
ESP_LOGCONFIG(TAG, " Task Stack in PSRAM: %s", this->task_stack_in_psram_ ? "Yes" : "No");
ESP_LOGCONFIG(TAG,
"Audio File Media Source:\n"
" Decoder Task Stack in PSRAM: %s",
YESNO(this->decoder_task_stack_in_psram_));
}
void AudioFileMediaSource::setup() {
this->disable_loop();
this->event_group_ = xEventGroupCreate();
if (this->event_group_ == nullptr) {
ESP_LOGE(TAG, "Failed to create event group");
micro_decoder::DecoderConfig config;
config.audio_write_timeout_ms = AUDIO_WRITE_TIMEOUT_MS;
config.decoder_priority = DECODER_TASK_PRIORITY;
config.decoder_stack_size = DECODER_TASK_STACK_SIZE;
config.decoder_stack_in_psram = this->decoder_task_stack_in_psram_;
this->decoder_ = std::make_unique<micro_decoder::DecoderSource>(config);
if (this->decoder_ == nullptr) {
ESP_LOGE(TAG, "Failed to allocate decoder");
this->mark_failed();
return;
}
this->decoder_->set_listener(this);
}
void AudioFileMediaSource::loop() {
EventBits_t event_bits = xEventGroupGetBits(this->event_group_);
void AudioFileMediaSource::loop() { this->decoder_->loop(); }
if (event_bits & REQUEST_START) {
xEventGroupClearBits(this->event_group_, REQUEST_START);
this->decoding_state_ = AudioFileDecodingState::START_TASK;
}
switch (this->decoding_state_) {
case AudioFileDecodingState::START_TASK: {
if (!this->decode_task_.is_created()) {
xEventGroupClearBits(this->event_group_, ALL_BITS);
if (!this->decode_task_.create(decode_task, "AudioFileDec", DECODE_TASK_STACK_SIZE, this, 1,
this->task_stack_in_psram_)) {
ESP_LOGE(TAG, "Failed to create task");
this->status_momentary_error("task_create", 1000);
this->set_state_(media_source::MediaSourceState::ERROR);
this->decoding_state_ = AudioFileDecodingState::IDLE;
return;
}
}
this->decoding_state_ = AudioFileDecodingState::DECODING;
break;
}
case AudioFileDecodingState::DECODING: {
if (event_bits & TASK_STARTING) {
ESP_LOGD(TAG, "Starting");
xEventGroupClearBits(this->event_group_, TASK_STARTING);
}
if (event_bits & TASK_RUNNING) {
ESP_LOGV(TAG, "Started");
xEventGroupClearBits(this->event_group_, TASK_RUNNING);
this->set_state_(media_source::MediaSourceState::PLAYING);
}
if ((event_bits & TASK_PAUSED) && this->get_state() != media_source::MediaSourceState::PAUSED) {
this->set_state_(media_source::MediaSourceState::PAUSED);
} else if (!(event_bits & TASK_PAUSED) && this->get_state() == media_source::MediaSourceState::PAUSED) {
this->set_state_(media_source::MediaSourceState::PLAYING);
}
if (event_bits & TASK_STOPPING) {
ESP_LOGV(TAG, "Stopping");
xEventGroupClearBits(this->event_group_, TASK_STOPPING);
}
if (event_bits & TASK_ERROR) {
// Report error so the orchestrator knows playback failed; task will have already logged the specific error
this->set_state_(media_source::MediaSourceState::ERROR);
}
if (event_bits & TASK_STOPPED) {
ESP_LOGD(TAG, "Stopped");
xEventGroupClearBits(this->event_group_, ALL_BITS);
this->decode_task_.deallocate();
this->set_state_(media_source::MediaSourceState::IDLE);
this->decoding_state_ = AudioFileDecodingState::IDLE;
}
break;
}
case AudioFileDecodingState::IDLE: {
if (this->get_state() == media_source::MediaSourceState::ERROR && !this->status_has_error()) {
this->set_state_(media_source::MediaSourceState::IDLE);
}
break;
}
}
if ((this->decoding_state_ == AudioFileDecodingState::IDLE) &&
(this->get_state() == media_source::MediaSourceState::IDLE)) {
this->disable_loop();
}
}
bool AudioFileMediaSource::can_handle(const std::string &uri) const { return uri.starts_with(URI_PREFIX); }
// Called from the orchestrator's main loop, so no synchronization needed with loop()
bool AudioFileMediaSource::play_uri(const std::string &uri) {
if (!this->is_ready() || this->is_failed() || this->status_has_error() || !this->has_listener() ||
xEventGroupGetBits(this->event_group_) & REQUEST_START) {
if (!this->is_ready() || this->is_failed() || this->status_has_error() || !this->has_listener()) {
return false;
}
// Check if source is already playing
if (this->get_state() != media_source::MediaSourceState::IDLE) {
ESP_LOGE(TAG, "Cannot play '%s': source is busy", uri.c_str());
return false;
}
// Validate URI starts with "audio-file://"
if (!uri.starts_with("audio-file://")) {
if (!uri.starts_with(URI_PREFIX)) {
ESP_LOGE(TAG, "Invalid URI: '%s'", uri.c_str());
return false;
}
// Strip "audio-file://" prefix and find the file
const char *file_id = uri.c_str() + 13; // "audio-file://" is 13 characters
const char *file_id = uri.c_str() + sizeof(URI_PREFIX) - 1;
this->current_file_ = nullptr;
for (const auto &named_file : get_named_audio_files()) {
if (strcmp(named_file.file_id, file_id) == 0) {
this->current_file_ = named_file.file;
xEventGroupSetBits(this->event_group_, EventGroupBits::REQUEST_START);
this->enable_loop();
return true;
break;
}
}
ESP_LOGE(TAG, "Unknown file: '%s'", file_id);
if (this->current_file_ == nullptr) {
ESP_LOGE(TAG, "Unknown file: '%s'", file_id);
return false;
}
micro_decoder::AudioFileType type = to_micro_decoder_type(this->current_file_->file_type);
if (this->decoder_->play_buffer(this->current_file_->data, this->current_file_->length, type)) {
this->pause_.store(false, std::memory_order_relaxed);
this->enable_loop();
return true;
}
ESP_LOGE(TAG, "Failed to start playback of '%s'", file_id);
return false;
}
// Called from the orchestrator's main loop, so no synchronization needed with loop()
void AudioFileMediaSource::handle_command(media_source::MediaSourceCommand command) {
if (this->decoding_state_ != AudioFileDecodingState::DECODING) {
return;
}
switch (command) {
case media_source::MediaSourceCommand::STOP:
xEventGroupSetBits(this->event_group_, EventGroupBits::COMMAND_STOP);
this->decoder_->stop();
break;
case media_source::MediaSourceCommand::PAUSE:
xEventGroupSetBits(this->event_group_, EventGroupBits::COMMAND_PAUSE);
// Only valid while actively playing; ignoring from IDLE/ERROR/PAUSED prevents the state
// machine from getting stuck in PAUSED when no playback is active (which would block the
// next play_uri() call via its IDLE-state precondition).
if (this->get_state() != media_source::MediaSourceState::PLAYING)
break;
// PAUSE does not stop the decoder task. Instead, on_audio_write() returns 0 and temporarily
// yields, which fills any internal buffering and applies back pressure that effectively
// pauses the decoder task.
this->set_state_(media_source::MediaSourceState::PAUSED);
this->pause_.store(true, std::memory_order_relaxed);
break;
case media_source::MediaSourceCommand::PLAY:
xEventGroupClearBits(this->event_group_, EventGroupBits::COMMAND_PAUSE);
if (this->get_state() != media_source::MediaSourceState::PAUSED)
break;
this->set_state_(media_source::MediaSourceState::PLAYING);
this->pause_.store(false, std::memory_order_relaxed);
break;
default:
break;
}
}
void AudioFileMediaSource::decode_task(void *params) {
AudioFileMediaSource *this_source = static_cast<AudioFileMediaSource *>(params);
// Called from the decoder task. Forwards to the orchestrator's listener, which is responsible for
// being thread-safe with respect to its own audio writer.
size_t AudioFileMediaSource::on_audio_write(const uint8_t *data, size_t length, uint32_t timeout_ms) {
if (this->pause_.load(std::memory_order_relaxed)) {
vTaskDelay(pdMS_TO_TICKS(PAUSE_POLL_DELAY_MS));
return 0;
}
return this->write_output(data, length, timeout_ms, this->stream_info_);
}
do { // do-while(false) ensures RAII objects are destroyed on all exit paths via break
// Called from the decoder task before the first on_audio_write().
void AudioFileMediaSource::on_stream_info(const micro_decoder::AudioStreamInfo &info) {
this->stream_info_ = audio::AudioStreamInfo(info.get_bits_per_sample(), info.get_channels(), info.get_sample_rate());
}
xEventGroupSetBits(this_source->event_group_, EventGroupBits::TASK_STARTING);
// 0 bytes for input transfer buffer makes it an inplace buffer
std::unique_ptr<audio::AudioDecoder> decoder = make_unique<audio::AudioDecoder>(0, 4096);
esp_err_t err = decoder->start(this_source->current_file_->file_type);
if (err != ESP_OK) {
ESP_LOGE(TAG, "Failed to start decoder: %s", esp_err_to_name(err));
xEventGroupSetBits(this_source->event_group_, EventGroupBits::TASK_ERROR | EventGroupBits::TASK_STOPPING);
// microDecoder invokes on_state_change() from inside decoder_->loop(), so this runs on the main
// loop thread and it's safe to call set_state_() directly.
void AudioFileMediaSource::on_state_change(micro_decoder::DecoderState state) {
switch (state) {
case micro_decoder::DecoderState::IDLE:
this->set_state_(media_source::MediaSourceState::IDLE);
this->disable_loop();
break;
}
// Add the file as a const data source
decoder->add_source(this_source->current_file_->data, this_source->current_file_->length);
xEventGroupSetBits(this_source->event_group_, EventGroupBits::TASK_RUNNING);
AudioSinkAdapter audio_sink;
bool has_stream_info = false;
while (true) {
EventBits_t event_bits = xEventGroupGetBits(this_source->event_group_);
if (event_bits & EventGroupBits::COMMAND_STOP) {
break;
}
bool paused = event_bits & EventGroupBits::COMMAND_PAUSE;
decoder->set_pause_output_state(paused);
if (paused) {
xEventGroupSetBits(this_source->event_group_, EventGroupBits::TASK_PAUSED);
vTaskDelay(pdMS_TO_TICKS(20));
} else {
xEventGroupClearBits(this_source->event_group_, EventGroupBits::TASK_PAUSED);
}
// Will stop gracefully once finished with the current file
audio::AudioDecoderState decoder_state = decoder->decode(true);
if (decoder_state == audio::AudioDecoderState::FINISHED) {
break;
} else if (decoder_state == audio::AudioDecoderState::FAILED) {
ESP_LOGE(TAG, "Decoder failed");
xEventGroupSetBits(this_source->event_group_, EventGroupBits::TASK_ERROR);
break;
}
if (!has_stream_info && decoder->get_audio_stream_info().has_value()) {
has_stream_info = true;
audio::AudioStreamInfo stream_info = decoder->get_audio_stream_info().value();
ESP_LOGD(TAG, "Bits per sample: %d, Channels: %d, Sample rate: %" PRIu32, stream_info.get_bits_per_sample(),
stream_info.get_channels(), stream_info.get_sample_rate());
if (stream_info.get_bits_per_sample() != 16 || stream_info.get_channels() > 2) {
ESP_LOGE(TAG, "Incompatible audio stream. Only 16 bits per sample and 1 or 2 channels are supported");
xEventGroupSetBits(this_source->event_group_, EventGroupBits::TASK_ERROR);
break;
}
audio_sink.source = this_source;
audio_sink.stream_info = stream_info;
esp_err_t err = decoder->add_sink(&audio_sink);
if (err != ESP_OK) {
ESP_LOGE(TAG, "Failed to add sink: %s", esp_err_to_name(err));
xEventGroupSetBits(this_source->event_group_, EventGroupBits::TASK_ERROR);
break;
}
}
}
xEventGroupSetBits(this_source->event_group_, EventGroupBits::TASK_STOPPING);
} while (false);
// All RAII objects from the do-while block (decoder, audio_sink, etc.) are now destroyed.
xEventGroupSetBits(this_source->event_group_, EventGroupBits::TASK_STOPPED);
vTaskSuspend(nullptr); // Suspend this task indefinitely until the loop method deletes it
case micro_decoder::DecoderState::PLAYING:
this->set_state_(media_source::MediaSourceState::PLAYING);
break;
case micro_decoder::DecoderState::FAILED:
this->set_state_(media_source::MediaSourceState::ERROR);
break;
default:
break;
}
}
} // namespace esphome::audio_file
@@ -8,41 +8,48 @@
#include "esphome/components/audio_file/audio_file.h"
#include "esphome/components/media_source/media_source.h"
#include "esphome/core/component.h"
#include "esphome/core/static_task.h"
#include <freertos/FreeRTOS.h>
#include <freertos/event_groups.h>
#include <micro_decoder/decoder_source.h>
#include <micro_decoder/types.h>
#include <atomic>
#include <memory>
#include <string>
namespace esphome::audio_file {
enum class AudioFileDecodingState : uint8_t {
START_TASK,
DECODING,
IDLE,
};
class AudioFileMediaSource : public Component, public media_source::MediaSource {
// Inherits from two unrelated listener-style interfaces:
// - media_source::MediaSource: this source reports state and writes audio *to* an orchestrator
// (the orchestrator calls set_listener() on us with a MediaSourceListener*).
// - micro_decoder::DecoderListener: the underlying decoder calls back *into* us with decoded
// audio and state changes (we call decoder_->set_listener(this) in setup()).
class AudioFileMediaSource : public Component, public media_source::MediaSource, public micro_decoder::DecoderListener {
public:
void setup() override;
void loop() override;
void dump_config() override;
void set_task_stack_in_psram(bool task_stack_in_psram) { this->decoder_task_stack_in_psram_ = task_stack_in_psram; }
// MediaSource interface implementation
bool play_uri(const std::string &uri) override;
void handle_command(media_source::MediaSourceCommand command) override;
bool can_handle(const std::string &uri) const override { return uri.starts_with("audio-file://"); }
bool can_handle(const std::string &uri) const override;
void set_task_stack_in_psram(bool task_stack_in_psram) { this->task_stack_in_psram_ = task_stack_in_psram; }
// DecoderListener interface implementation
size_t on_audio_write(const uint8_t *data, size_t length, uint32_t timeout_ms) override;
void on_stream_info(const micro_decoder::AudioStreamInfo &info) override;
void on_state_change(micro_decoder::DecoderState state) override;
protected:
static void decode_task(void *params);
std::unique_ptr<micro_decoder::DecoderSource> decoder_;
audio::AudioStreamInfo stream_info_;
audio::AudioFile *current_file_{nullptr};
AudioFileDecodingState decoding_state_{AudioFileDecodingState::IDLE};
EventGroupHandle_t event_group_{nullptr};
StaticTask decode_task_;
bool task_stack_in_psram_{false};
// Written from the main loop in handle_command(), read from the decoder task in
// on_audio_write(). Must be atomic to avoid a data race.
std::atomic<bool> pause_{false};
bool decoder_task_stack_in_psram_{false};
};
} // namespace esphome::audio_file