[speaker_source] Add new media player (#14649)

Co-authored-by: J. Nick Koston <nick@home-assistant.io>
This commit is contained in:
Kevin Ahrendt
2026-03-10 15:25:26 -05:00
committed by GitHub
parent 780e009bf4
commit 8ca6ee4349
8 changed files with 1028 additions and 0 deletions

View File

@@ -459,6 +459,7 @@ esphome/components/sonoff_d1/* @anatoly-savchenkov
esphome/components/sound_level/* @kahrendt
esphome/components/speaker/* @jesserockz @kahrendt
esphome/components/speaker/media_player/* @kahrendt @synesthesiam
esphome/components/speaker_source/* @kahrendt
esphome/components/spi/* @clydebarrow @esphome/core
esphome/components/spi_device/* @clydebarrow
esphome/components/spi_led_strip/* @clydebarrow

View File

@@ -0,0 +1,212 @@
from esphome import automation
import esphome.codegen as cg
from esphome.components import audio, media_player, media_source, speaker
import esphome.config_validation as cv
from esphome.const import (
CONF_FORMAT,
CONF_ID,
CONF_NUM_CHANNELS,
CONF_SAMPLE_RATE,
CONF_SPEAKER,
)
from esphome.core.entity_helpers import inherit_property_from
from esphome.types import ConfigType
AUTO_LOAD = ["audio"]
DEPENDENCIES = ["media_source", "speaker"]
CODEOWNERS = ["@kahrendt"]
CONF_MEDIA_PIPELINE = "media_pipeline"
CONF_ON_MUTE = "on_mute"
CONF_ON_UNMUTE = "on_unmute"
CONF_ON_VOLUME = "on_volume"
CONF_SOURCES = "sources"
CONF_VOLUME_INCREMENT = "volume_increment"
CONF_VOLUME_INITIAL = "volume_initial"
CONF_VOLUME_MAX = "volume_max"
CONF_VOLUME_MIN = "volume_min"
speaker_source_ns = cg.esphome_ns.namespace("speaker_source")
SpeakerSourceMediaPlayer = speaker_source_ns.class_(
"SpeakerSourceMediaPlayer", cg.Component, media_player.MediaPlayer
)
PipelineContext = speaker_source_ns.struct("PipelineContext")
Pipeline = speaker_source_ns.enum("Pipeline")
FORMAT_MAPPING = {
"FLAC": "flac",
"MP3": "mp3",
"OPUS": "opus",
"WAV": "wav",
}
# Returns a media_player.MediaPlayerSupportedFormat struct with the configured
# format, sample rate, number of channels, purpose, and bytes per sample
def _get_supported_format_struct(pipeline: ConfigType):
args = [
media_player.MediaPlayerSupportedFormat,
]
args.append(("format", FORMAT_MAPPING[pipeline[CONF_FORMAT]]))
args.append(("sample_rate", pipeline[CONF_SAMPLE_RATE]))
args.append(("num_channels", pipeline[CONF_NUM_CHANNELS]))
args.append(("purpose", media_player.MEDIA_PLAYER_FORMAT_PURPOSE_ENUM["default"]))
# Omit sample_bytes for MP3: ffmpeg transcoding in Home Assistant fails
# if the number of bytes per sample is specified for MP3.
if pipeline[CONF_FORMAT] != "MP3":
args.append(("sample_bytes", 2))
return cg.StructInitializer(*args)
def _validate_pipeline(config: ConfigType) -> ConfigType:
# Inherit settings from speaker if not manually set
inherit_property_from(CONF_NUM_CHANNELS, CONF_SPEAKER)(config)
inherit_property_from(CONF_SAMPLE_RATE, CONF_SPEAKER)(config)
# Opus only supports 48 kHz
if config.get(CONF_FORMAT) == "OPUS" and config.get(CONF_SAMPLE_RATE) != 48000:
raise cv.Invalid("Opus only supports a sample rate of 48000 Hz")
audio.final_validate_audio_schema(
"speaker_source media_player",
audio_device=CONF_SPEAKER,
bits_per_sample=16,
channels=config.get(CONF_NUM_CHANNELS),
sample_rate=config.get(CONF_SAMPLE_RATE),
)(config)
return config
PIPELINE_SCHEMA = cv.Schema(
{
cv.GenerateID(): cv.declare_id(
PipelineContext
), # Needed to inherit audio settings from the speaker
cv.Required(CONF_SPEAKER): cv.use_id(speaker.Speaker),
cv.Required(CONF_SOURCES): cv.All(
cv.ensure_list(cv.use_id(media_source.MediaSource)),
cv.Length(min=1),
),
cv.Optional(CONF_FORMAT, default="FLAC"): cv.enum(audio.AUDIO_FILE_TYPE_ENUM),
cv.Optional(CONF_SAMPLE_RATE): cv.int_range(min=1),
cv.Optional(CONF_NUM_CHANNELS): cv.int_range(1, 2),
}
)
def _validate_volume_settings(config: ConfigType) -> ConfigType:
# CONF_VOLUME_INITIAL is in the scaled volume domain (0.0-1.0) and doesn't need to be validated
if config[CONF_VOLUME_MIN] > config[CONF_VOLUME_MAX]:
raise cv.Invalid(
f"{CONF_VOLUME_MIN} ({config[CONF_VOLUME_MIN]}) must be less than or equal to {CONF_VOLUME_MAX} ({config[CONF_VOLUME_MAX]})"
)
return config
CONFIG_SCHEMA = cv.All(
cv.Schema(
{
cv.Optional(CONF_VOLUME_INCREMENT, default=0.05): cv.percentage,
cv.Optional(CONF_VOLUME_INITIAL, default=0.5): cv.percentage,
cv.Optional(CONF_VOLUME_MAX, default=1.0): cv.percentage,
cv.Optional(CONF_VOLUME_MIN, default=0.0): cv.percentage,
cv.Required(CONF_MEDIA_PIPELINE): PIPELINE_SCHEMA,
cv.Optional(CONF_ON_MUTE): automation.validate_automation(single=True),
cv.Optional(CONF_ON_UNMUTE): automation.validate_automation(single=True),
cv.Optional(CONF_ON_VOLUME): automation.validate_automation(single=True),
}
)
.extend(cv.COMPONENT_SCHEMA)
.extend(media_player.media_player_schema(SpeakerSourceMediaPlayer)),
cv.only_on_esp32,
_validate_volume_settings,
)
def _final_validate_codecs(config: ConfigType) -> ConfigType:
pipeline = config[CONF_MEDIA_PIPELINE]
fmt = pipeline[CONF_FORMAT]
if fmt == "NONE":
audio.request_flac_support()
audio.request_mp3_support()
audio.request_opus_support()
elif fmt == "FLAC":
audio.request_flac_support()
elif fmt == "MP3":
audio.request_mp3_support()
elif fmt == "OPUS":
audio.request_opus_support()
return config
FINAL_VALIDATE_SCHEMA = cv.All(
cv.Schema(
{
cv.Required(CONF_MEDIA_PIPELINE): _validate_pipeline,
},
extra=cv.ALLOW_EXTRA,
),
_final_validate_codecs,
)
async def to_code(config: ConfigType) -> None:
var = cg.new_Pvariable(config[CONF_ID])
await cg.register_component(var, config)
await media_player.register_media_player(var, config)
cg.add(var.set_volume_increment(config[CONF_VOLUME_INCREMENT]))
cg.add(var.set_volume_initial(config[CONF_VOLUME_INITIAL]))
cg.add(var.set_volume_max(config[CONF_VOLUME_MAX]))
cg.add(var.set_volume_min(config[CONF_VOLUME_MIN]))
pipeline_config = config[CONF_MEDIA_PIPELINE]
pipeline_enum = Pipeline.MEDIA_PIPELINE
for source in pipeline_config[CONF_SOURCES]:
src = await cg.get_variable(source)
cg.add(var.add_media_source(pipeline_enum, src))
cg.add(
var.set_speaker(
pipeline_enum,
await cg.get_variable(pipeline_config[CONF_SPEAKER]),
)
)
if pipeline_config[CONF_FORMAT] != "NONE":
cg.add(
var.set_format(
pipeline_enum,
_get_supported_format_struct(pipeline_config),
)
)
if on_mute := config.get(CONF_ON_MUTE):
await automation.build_automation(
var.get_mute_trigger(),
[],
on_mute,
)
if on_unmute := config.get(CONF_ON_UNMUTE):
await automation.build_automation(
var.get_unmute_trigger(),
[],
on_unmute,
)
if on_volume := config.get(CONF_ON_VOLUME):
await automation.build_automation(
var.get_volume_trigger(),
[(cg.float_, "x")],
on_volume,
)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,217 @@
#pragma once
#include "esphome/core/defines.h"
#ifdef USE_ESP32
#include "esphome/components/audio/audio.h"
#include "esphome/components/media_source/media_source.h"
#include "esphome/components/media_player/media_player.h"
#include "esphome/components/speaker/speaker.h"
#include "esphome/core/automation.h"
#include "esphome/core/component.h"
#include "esphome/core/preferences.h"
#include <array>
#include <atomic>
#include <memory>
#include <vector>
#include <freertos/FreeRTOS.h>
#include <freertos/queue.h>
namespace esphome::speaker_source {
// THREADING MODEL:
// This component coordinates media sources that run their own decode tasks with speakers
// that have their own playback callback tasks. Three thread contexts exist:
//
// - Main loop task: setup(), loop(), dump_config(), handle_media_state_changed_(),
// handle_volume_request_(), handle_mute_request_(), handle_play_uri_request_(),
// set_volume_(), set_mute_state_(), control(), get_media_pipeline_state_(),
// find_source_for_uri_(), try_execute_play_uri_(), save_volume_restore_state_()
//
// - Media source task(s): handle_media_output_() via SourceBinding::write_audio().
// Called from each source's decode task thread when streaming audio data.
// Reads ps.active_source (atomic), writes ps.pending_frames (atomic), and calls
// ps.speaker methods (speaker pointer is immutable after setup).
//
// - Speaker callback task: handle_speaker_playback_callback_() via speaker's
// add_audio_output_callback(). Called when the speaker finishes writing frames to the DAC.
// Reads ps.active_source (atomic), writes ps.pending_frames (atomic), and calls
// active_source->notify_audio_played().
//
// control() is only called from the main loop (HA/automation commands).
// Source tasks use defer() for all requests (volume, mute, play_uri).
//
// Thread-safe communication:
// - FreeRTOS queue (media_control_command_queue_): control() -> loop() for play/command dispatch
// - defer(): SourceBinding::request_volume/request_mute/request_play_uri -> main loop
// - Atomic fields (active_source, pending_frames): shared between all three thread contexts
//
// Non-atomic pipeline fields (last_source, stopping_source, pending_source) are only accessed
// from the main loop thread.
enum Pipeline : uint8_t {
MEDIA_PIPELINE = 0,
};
// Forward declaration
class SpeakerSourceMediaPlayer;
/// @brief Per-source listener binding that captures the source pointer at registration time.
/// Each binding implements MediaSourceListener and forwards callbacks to the player with the source identified.
/// Defined before PipelineContext so pipelines can own their bindings directly.
struct SourceBinding : public media_source::MediaSourceListener {
SourceBinding(SpeakerSourceMediaPlayer *player, media_source::MediaSource *source, uint8_t pipeline)
: player(player), source(source), pipeline(pipeline) {}
SpeakerSourceMediaPlayer *player;
media_source::MediaSource *source;
uint8_t pipeline;
// Implementations are in the .cpp file because SpeakerSourceMediaPlayer is only forward-declared here
size_t write_audio(const uint8_t *data, size_t length, uint32_t timeout_ms,
const audio::AudioStreamInfo &stream_info) override;
void report_state(media_source::MediaSourceState state) override;
void request_volume(float volume) override;
void request_mute(bool is_muted) override;
void request_play_uri(const std::string &uri) override;
};
struct PipelineContext {
speaker::Speaker *speaker{nullptr};
optional<media_player::MediaPlayerSupportedFormat> format;
std::atomic<media_source::MediaSource *> active_source{nullptr};
media_source::MediaSource *last_source{nullptr};
media_source::MediaSource *stopping_source{nullptr}; // Source we've asked to stop, awaiting IDLE
media_source::MediaSource *pending_source{nullptr}; // Source we've asked to play, awaiting PLAYING
// Each SourceBinding pairs a MediaSource* with its listener implementation.
// Uses unique_ptr so binding addresses are stable and set_listener() can be called in add_media_source().
// Uses std::vector because the count varies across instances (multiple speaker_source media players may exist).
std::vector<std::unique_ptr<SourceBinding>> sources;
// Track frames sent to speaker to correlate with playback callbacks.
// Atomic because it is written from the main loop/source tasks and read/decremented from the speaker playback
// callback.
std::atomic<uint32_t> pending_frames{0};
/// @brief Check if this pipeline is configured (has a speaker assigned)
bool is_configured() const { return this->speaker != nullptr; }
};
struct MediaPlayerControlCommand {
enum Type : uint8_t {
PLAY_URI, // Find a source that can handle this URI and play it
SEND_COMMAND, // Send command to active source
};
Type type;
uint8_t pipeline;
union {
std::string *uri; // Owned pointer, must delete after xQueueReceive (for PLAY_URI)
media_player::MediaPlayerCommand command;
} data;
};
struct VolumeRestoreState {
float volume;
bool is_muted;
};
class SpeakerSourceMediaPlayer : public Component, public media_player::MediaPlayer {
friend struct SourceBinding;
public:
float get_setup_priority() const override { return esphome::setup_priority::PROCESSOR; }
void setup() override;
void loop() override;
void dump_config() override;
// MediaPlayer implementations
media_player::MediaPlayerTraits get_traits() override;
bool is_muted() const override { return this->is_muted_; }
// Percentage to increase or decrease the volume for volume up or volume down commands
void set_volume_increment(float volume_increment) { this->volume_increment_ = volume_increment; }
// Volume used initially on first boot when no volume had been previously saved
void set_volume_initial(float volume_initial) { this->volume_initial_ = volume_initial; }
void set_volume_max(float volume_max) { this->volume_max_ = volume_max; }
void set_volume_min(float volume_min) { this->volume_min_ = volume_min; }
/// @brief Adds a media source to a pipeline and registers this player as its listener
void add_media_source(uint8_t pipeline, media_source::MediaSource *media_source);
void set_speaker(uint8_t pipeline, speaker::Speaker *speaker) { this->pipelines_[pipeline].speaker = speaker; }
void set_format(uint8_t pipeline, const media_player::MediaPlayerSupportedFormat &format) {
this->pipelines_[pipeline].format = format;
}
Trigger<> *get_mute_trigger() { return &this->mute_trigger_; }
Trigger<> *get_unmute_trigger() { return &this->unmute_trigger_; }
Trigger<float> *get_volume_trigger() { return &this->volume_trigger_; }
protected:
// Callbacks from source bindings (pipeline index is captured at binding creation time)
size_t handle_media_output_(uint8_t pipeline, media_source::MediaSource *source, const uint8_t *data, size_t length,
uint32_t timeout_ms, const audio::AudioStreamInfo &stream_info);
void handle_media_state_changed_(uint8_t pipeline, media_source::MediaSource *source,
media_source::MediaSourceState state);
void handle_volume_request_(float volume);
void handle_mute_request_(bool is_muted);
void handle_play_uri_request_(uint8_t pipeline, const std::string &uri);
void handle_speaker_playback_callback_(uint32_t frames, int64_t timestamp, uint8_t pipeline);
// Receives commands from HA or from the voice assistant component
// Sends commands to the media_control_command_queue_
void control(const media_player::MediaPlayerCall &call) override;
/// @brief Updates this->volume and saves volume/mute state to flash for restoration if publish is true.
void set_volume_(float volume, bool publish = true);
/// @brief Sets the mute state.
/// @param mute_state If true, audio will be muted. If false, audio will be unmuted
/// @param publish If true, saves volume/mute state to flash for restoration
void set_mute_state_(bool mute_state, bool publish = true);
/// @brief Saves the current volume and mute state to the flash for restoration.
void save_volume_restore_state_();
/// @brief Determine media player state from the media pipeline's active source
/// @param media_source Active source for the media pipeline (may be nullptr)
/// @return The appropriate MediaPlayerState
media_player::MediaPlayerState get_media_pipeline_state_(media_source::MediaSource *media_source) const;
bool try_execute_play_uri_(const std::string &uri, uint8_t pipeline);
media_source::MediaSource *find_source_for_uri_(const std::string &uri, uint8_t pipeline);
QueueHandle_t media_control_command_queue_;
// Pipeline context for media pipeline. See THREADING MODEL at top of namespace for access rules.
std::array<PipelineContext, 1> pipelines_;
// Used to save volume/mute state for restoration on reboot
ESPPreferenceObject pref_;
Trigger<> mute_trigger_;
Trigger<> unmute_trigger_;
Trigger<float> volume_trigger_;
// The amount to change the volume on volume up/down commands
float volume_increment_;
// The initial volume used by Setup when no previous volume was saved
float volume_initial_;
float volume_max_;
float volume_min_;
bool is_muted_{false};
};
} // namespace esphome::speaker_source
#endif // USE_ESP32

View File

@@ -0,0 +1,43 @@
i2s_audio:
i2s_lrclk_pin: ${i2s_bclk_pin}
i2s_bclk_pin: ${i2s_lrclk_pin}
i2s_mclk_pin: ${i2s_mclk_pin}
speaker:
- platform: i2s_audio
id: speaker_id
dac_type: external
i2s_dout_pin: ${i2s_dout_pin}
sample_rate: 48000
num_channels: 2
audio_file:
- id: test_audio
file:
type: local
path: $component_dir/test.wav
media_source:
- platform: audio_file
id: audio_file_source
media_player:
- platform: speaker_source
id: media_player_id
name: Media Player
volume_increment: 0.02
volume_initial: 0.75
volume_max: 0.95
volume_min: 0.0
media_pipeline:
speaker: speaker_id
format: FLAC
num_channels: 1
sources:
- audio_file_source
on_mute:
- media_player.pause:
id: media_player_id
on_unmute:
- media_player.play:
id: media_player_id

View File

@@ -0,0 +1,9 @@
substitutions:
scl_pin: GPIO16
sda_pin: GPIO17
i2s_bclk_pin: GPIO27
i2s_lrclk_pin: GPIO26
i2s_mclk_pin: GPIO25
i2s_dout_pin: GPIO23
<<: !include common.yaml

Binary file not shown.