[audio_http] Add a media source for playing audio from HTTP URLs (#15741)

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
Kevin Ahrendt
2026-04-23 16:53:52 -04:00
committed by GitHub
parent f757cd1210
commit d759f1a567
7 changed files with 290 additions and 0 deletions
+1
View File
@@ -56,6 +56,7 @@ esphome/components/audio_adc/* @kbx81
esphome/components/audio_dac/* @kbx81
esphome/components/audio_file/* @kahrendt
esphome/components/audio_file/media_source/* @kahrendt
esphome/components/audio_http/* @kahrendt
esphome/components/axs15231/* @clydebarrow
esphome/components/b_parasite/* @rbaron
esphome/components/ballu/* @bazuchan
@@ -0,0 +1,163 @@
#include "audio_http_media_source.h"
#ifdef USE_ESP32
#include "esphome/core/log.h"
#include <freertos/FreeRTOS.h>
#include <freertos/task.h>
#include <algorithm>
namespace esphome::audio_http {
static const char *const TAG = "audio_http_media_source";
// Decoder task / buffer tuning. Kept here as constants so the header stays free of magic numbers.
static constexpr size_t DEFAULT_TRANSFER_BUFFER_SIZE = 8 * 1024; // Staging buffer between HTTP reader and decoder
static constexpr uint32_t HTTP_TIMEOUT_MS = 5000; // HTTP connect/read timeout
static constexpr uint32_t AUDIO_WRITE_TIMEOUT_MS = 50; // Max blocking time per on_audio_write() call
static constexpr uint32_t READER_WRITE_TIMEOUT_MS = 50; // Max blocking time when writing into the ring buffer
static constexpr uint8_t READER_TASK_PRIORITY = 2;
static constexpr uint8_t DECODER_TASK_PRIORITY = 2;
static constexpr size_t READER_TASK_STACK_SIZE = 4096;
static constexpr size_t DECODER_TASK_STACK_SIZE = 5120;
static constexpr uint32_t PAUSE_POLL_DELAY_MS = 20;
static constexpr const char *const HTTP_URI_PREFIX = "http://";
static constexpr const char *const HTTPS_URI_PREFIX = "https://";
void AudioHTTPMediaSource::dump_config() {
ESP_LOGCONFIG(TAG,
"Audio HTTP Media Source:\n"
" Buffer Size: %zu bytes\n"
" Decoder Task Stack in PSRAM: %s",
this->buffer_size_, YESNO(this->decoder_task_stack_in_psram_));
}
void AudioHTTPMediaSource::setup() {
this->disable_loop();
micro_decoder::DecoderConfig config;
config.ring_buffer_size = this->buffer_size_;
// Keep the transfer buffer smaller than the ring buffer so the reader can top up the ring
// while the decoder is still draining it, instead of oscillating between empty and full.
config.transfer_buffer_size = std::min(DEFAULT_TRANSFER_BUFFER_SIZE, this->buffer_size_ / 2);
config.http_timeout_ms = HTTP_TIMEOUT_MS;
config.audio_write_timeout_ms = AUDIO_WRITE_TIMEOUT_MS;
config.reader_write_timeout_ms = READER_WRITE_TIMEOUT_MS;
config.reader_priority = READER_TASK_PRIORITY;
config.decoder_priority = DECODER_TASK_PRIORITY;
config.reader_stack_size = READER_TASK_STACK_SIZE;
config.decoder_stack_size = DECODER_TASK_STACK_SIZE;
config.decoder_stack_in_psram = this->decoder_task_stack_in_psram_;
this->decoder_ = std::make_unique<micro_decoder::DecoderSource>(config);
if (this->decoder_ == nullptr) {
ESP_LOGE(TAG, "Failed to allocate decoder");
this->mark_failed();
return;
}
this->decoder_->set_listener(this); // We inherit from micro_decoder::DecoderListener
}
void AudioHTTPMediaSource::loop() { this->decoder_->loop(); }
bool AudioHTTPMediaSource::can_handle(const std::string &uri) const {
return uri.starts_with(HTTP_URI_PREFIX) || uri.starts_with(HTTPS_URI_PREFIX);
}
// Called from the orchestrator's main loop, so no synchronization needed with loop()
bool AudioHTTPMediaSource::play_uri(const std::string &uri) {
if (!this->is_ready() || this->is_failed() || this->status_has_error() || !this->has_listener()) {
return false;
}
// Check if source is already playing
if (this->get_state() != media_source::MediaSourceState::IDLE) {
ESP_LOGE(TAG, "Cannot play '%s': source is busy", uri.c_str());
return false;
}
// Validate URI starts with "http://" or "https://"
if (!uri.starts_with(HTTP_URI_PREFIX) && !uri.starts_with(HTTPS_URI_PREFIX)) {
ESP_LOGE(TAG, "Invalid URI: '%s'", uri.c_str());
return false;
}
if (this->decoder_->play_url(uri)) {
this->pause_.store(false, std::memory_order_relaxed);
this->enable_loop();
return true;
}
ESP_LOGE(TAG, "Failed to start playback of '%s'", uri.c_str());
return false;
}
// Called from the orchestrator's main loop, so no synchronization needed with loop()
void AudioHTTPMediaSource::handle_command(media_source::MediaSourceCommand command) {
switch (command) {
case media_source::MediaSourceCommand::STOP:
this->decoder_->stop();
break;
case media_source::MediaSourceCommand::PAUSE:
// Only valid while actively playing; ignoring from IDLE/ERROR/PAUSED prevents the state
// machine from getting stuck in PAUSED when no playback is active (which would block the
// next play_uri() call via its IDLE-state precondition).
if (this->get_state() != media_source::MediaSourceState::PLAYING)
break;
// PAUSE does not stop the decoder task. Instead, on_audio_write() returns 0 and temporarily
// yields, which fills the ring buffer and applies back pressure that effectively pauses both
// the decoder and HTTP reader tasks.
this->set_state_(media_source::MediaSourceState::PAUSED);
this->pause_.store(true, std::memory_order_relaxed);
break;
case media_source::MediaSourceCommand::PLAY:
// Only resume from PAUSED; don't fabricate a PLAYING state from IDLE/ERROR.
if (this->get_state() != media_source::MediaSourceState::PAUSED)
break;
this->set_state_(media_source::MediaSourceState::PLAYING);
this->pause_.store(false, std::memory_order_relaxed);
break;
default:
break;
}
}
// Called from the decoder task. Forwards to the orchestrator's listener, which is responsible for
// being thread-safe with respect to its own audio writer.
size_t AudioHTTPMediaSource::on_audio_write(const uint8_t *data, size_t length, uint32_t timeout_ms) {
if (this->pause_.load(std::memory_order_relaxed)) {
vTaskDelay(pdMS_TO_TICKS(PAUSE_POLL_DELAY_MS));
return 0;
}
return this->write_output(data, length, timeout_ms, this->stream_info_);
}
// Called from the decoder task before the first on_audio_write().
void AudioHTTPMediaSource::on_stream_info(const micro_decoder::AudioStreamInfo &info) {
this->stream_info_ = audio::AudioStreamInfo(info.get_bits_per_sample(), info.get_channels(), info.get_sample_rate());
}
// microDecoder invokes on_state_change() from inside decoder_->loop(), so this runs on the main
// loop thread and it's safe to call set_state_() directly.
void AudioHTTPMediaSource::on_state_change(micro_decoder::DecoderState state) {
switch (state) {
case micro_decoder::DecoderState::IDLE:
this->set_state_(media_source::MediaSourceState::IDLE);
this->disable_loop();
break;
case micro_decoder::DecoderState::PLAYING:
this->set_state_(media_source::MediaSourceState::PLAYING);
break;
case micro_decoder::DecoderState::FAILED:
this->set_state_(media_source::MediaSourceState::ERROR);
break;
default:
break;
}
}
} // namespace esphome::audio_http
#endif // USE_ESP32
@@ -0,0 +1,59 @@
#pragma once
#include "esphome/core/defines.h"
#ifdef USE_ESP32
#include "esphome/components/audio/audio.h"
#include "esphome/components/media_source/media_source.h"
#include "esphome/core/component.h"
#include <micro_decoder/decoder_source.h>
#include <micro_decoder/types.h>
#include <atomic>
#include <memory>
#include <string>
namespace esphome::audio_http {
// Inherits from two unrelated listener-style interfaces:
// - media_source::MediaSource: this source reports state and writes audio *to* an orchestrator
// (the orchestrator calls set_listener() on us with a MediaSourceListener*).
// - micro_decoder::DecoderListener: the underlying decoder calls back *into* us with decoded
// audio and state changes (we call decoder_->set_listener(this) in setup()).
// The two set_listener() methods live on different base classes and serve opposite directions.
class AudioHTTPMediaSource : public Component, public media_source::MediaSource, public micro_decoder::DecoderListener {
public:
void setup() override;
void loop() override;
void dump_config() override;
void set_buffer_size(size_t buffer_size) { this->buffer_size_ = buffer_size; }
void set_task_stack_in_psram(bool task_stack_in_psram) { this->decoder_task_stack_in_psram_ = task_stack_in_psram; }
// MediaSource interface implementation
bool play_uri(const std::string &uri) override;
void handle_command(media_source::MediaSourceCommand command) override;
bool can_handle(const std::string &uri) const override;
// DecoderListener interface implementation
size_t on_audio_write(const uint8_t *data, size_t length, uint32_t timeout_ms) override;
void on_stream_info(const micro_decoder::AudioStreamInfo &info) override;
void on_state_change(micro_decoder::DecoderState state) override;
protected:
std::unique_ptr<micro_decoder::DecoderSource> decoder_;
audio::AudioStreamInfo stream_info_;
size_t buffer_size_{50000};
// Written from the main loop in handle_command(), read from the decoder task in
// on_audio_write(). Must be atomic to avoid a data race.
std::atomic<bool> pause_{false};
bool decoder_task_stack_in_psram_{false};
};
} // namespace esphome::audio_http
#endif // USE_ESP32
@@ -0,0 +1,59 @@
from typing import Any
import esphome.codegen as cg
from esphome.components import audio, esp32, media_source, psram
import esphome.config_validation as cv
from esphome.const import CONF_BUFFER_SIZE, CONF_ID, CONF_TASK_STACK_IN_PSRAM
from esphome.types import ConfigType
CODEOWNERS = ["@kahrendt"]
AUTO_LOAD = ["audio"]
audio_http_ns = cg.esphome_ns.namespace("audio_http")
AudioHTTPMediaSource = audio_http_ns.class_(
"AudioHTTPMediaSource", cg.Component, media_source.MediaSource
)
def _request_micro_decoder(config: ConfigType) -> ConfigType:
audio.request_micro_decoder_support()
return config
def _validate_task_stack_in_psram(value: Any) -> bool:
# Only require the psram component when actually enabling PSRAM stacks; validating
# the boolean first means `false` doesn't trigger the requires_component check.
if value := cv.boolean(value):
return cv.requires_component(psram.DOMAIN)(value)
return value
CONFIG_SCHEMA = cv.All(
media_source.media_source_schema(
AudioHTTPMediaSource,
)
.extend(
{
cv.Optional(CONF_BUFFER_SIZE, default=50000): cv.int_range(
min=5000, max=1000000
),
cv.Optional(CONF_TASK_STACK_IN_PSRAM): _validate_task_stack_in_psram,
}
)
.extend(cv.COMPONENT_SCHEMA),
cv.only_on_esp32,
_request_micro_decoder,
)
async def to_code(config: ConfigType) -> None:
var = cg.new_Pvariable(config[CONF_ID])
await cg.register_component(var, config)
await media_source.register_media_source(var, config)
if config.get(CONF_TASK_STACK_IN_PSRAM):
cg.add(var.set_task_stack_in_psram(True))
esp32.add_idf_sdkconfig_option(
"CONFIG_SPIRAM_ALLOW_STACK_EXTERNAL_MEMORY", True
)
cg.add(var.set_buffer_size(config[CONF_BUFFER_SIZE]))
+7
View File
@@ -0,0 +1,7 @@
psram:
media_source:
- platform: audio_http
id: audio_http_source
buffer_size: 100000
task_stack_in_psram: true
@@ -0,0 +1 @@
<<: !include common.yaml