[i2s_audio] Optimize SPDIF encoder and suport higher bit depth audio (#16504)

Co-authored-by: Keith Burzinski <kbx81x@gmail.com>
This commit is contained in:
Kevin Ahrendt
2026-05-19 14:37:41 -04:00
committed by Jesse Hills
parent 65e1e210de
commit 302938f875
6 changed files with 372 additions and 262 deletions
@@ -89,10 +89,10 @@ def _set_num_channels_from_config(config):
def _set_stream_limits(config):
if config.get(CONF_SPDIF_MODE, False):
# SPDIF mode: fixed to 16-bit stereo at configured sample rate
# SPDIF mode: 16/24/32-bit audio and stereo at configured sample rate
audio.set_stream_limits(
min_bits_per_sample=16,
max_bits_per_sample=16,
max_bits_per_sample=32,
min_channels=2,
max_channels=2,
min_sample_rate=config.get(CONF_SAMPLE_RATE),
@@ -213,9 +213,6 @@ def _final_validate(config):
)
if config[CONF_CHANNEL] != CONF_STEREO:
raise cv.Invalid("SPDIF mode only supports stereo channel configuration")
# bits_per_sample is converted to float by the schema
if config[CONF_BITS_PER_SAMPLE] != 16:
raise cv.Invalid("SPDIF mode only supports 16 bits per sample")
if not config[CONF_USE_APLL]:
raise cv.Invalid(
"SPDIF mode requires 'use_apll: true' for accurate clock generation"
@@ -411,8 +411,9 @@ esp_err_t I2SAudioSpeakerSPDIF::start_i2s_driver(audio::AudioStreamInfo &audio_s
this->sample_rate_, audio_stream_info.get_sample_rate());
return ESP_ERR_NOT_SUPPORTED;
}
if (audio_stream_info.get_bits_per_sample() != 16) {
ESP_LOGE(TAG, "Only supports 16 bits per sample");
const uint8_t bits_per_sample = audio_stream_info.get_bits_per_sample();
if (bits_per_sample != 16 && bits_per_sample != 24 && bits_per_sample != 32) {
ESP_LOGE(TAG, "Only supports 16, 24, or 32 bits per sample (got %u)", (unsigned) bits_per_sample);
return ESP_ERR_NOT_SUPPORTED;
}
if (audio_stream_info.get_channels() != 2) {
@@ -420,11 +421,8 @@ esp_err_t I2SAudioSpeakerSPDIF::start_i2s_driver(audio::AudioStreamInfo &audio_s
return ESP_ERR_NOT_SUPPORTED;
}
if (this->slot_bit_width_ != I2S_SLOT_BIT_WIDTH_AUTO &&
(i2s_slot_bit_width_t) audio_stream_info.get_bits_per_sample() > this->slot_bit_width_) {
ESP_LOGE(TAG, "Stream bits per sample must be less than or equal to the speaker's configuration");
return ESP_ERR_NOT_SUPPORTED;
}
// Tell the encoder what input width to expect. 32-bit input is truncated to 24-bit on the wire.
this->spdif_encoder_->set_bytes_per_sample(bits_per_sample / 8);
if (!this->parent_->try_lock()) {
ESP_LOGE(TAG, "Parent bus is busy");
File diff suppressed because it is too large Load Diff
@@ -24,8 +24,6 @@ static constexpr uint16_t SPDIF_BLOCK_SIZE_BYTES = SPDIF_BLOCK_SAMPLES * (EMULAT
static constexpr uint32_t SPDIF_BLOCK_SIZE_U32 = SPDIF_BLOCK_SIZE_BYTES / sizeof(uint32_t); // 3072 bytes / 4 = 768
// I2S frame count for one SPDIF block (for new driver where frame = 8 bytes for 32-bit stereo)
static constexpr uint32_t SPDIF_BLOCK_I2S_FRAMES = SPDIF_BLOCK_SIZE_BYTES / 8; // 3072 / 8 = 384 frames
// PCM bytes needed for one complete SPDIF block (192 stereo frames * 2 bytes per sample * 2 channels)
static constexpr uint16_t SPDIF_PCM_BYTES_PER_BLOCK = SPDIF_BLOCK_SAMPLES * 2 * 2; // = 768 bytes
/// Callback signature for block completion (raw function pointer for minimal overhead)
/// @param user_ctx User context pointer passed during callback registration
@@ -64,8 +62,16 @@ class SPDIFEncoder {
/// @brief Check if currently in preload mode
bool is_preload_mode() const { return this->preload_mode_; }
/// @brief Set input PCM width: 2 = 16-bit, 3 = 24-bit, 4 = 32-bit (truncated to 24-bit on the wire).
/// Must be called before write() if input width changes from the default (16-bit). Triggers a
/// channel-status rebuild to reflect the new word length.
void set_bytes_per_sample(uint8_t bytes_per_sample);
/// @brief Get the configured input PCM width in bytes per sample
uint8_t get_bytes_per_sample() const { return this->bytes_per_sample_; }
/// @brief Convert PCM audio data to SPDIF BMC encoded data
/// @param src Source PCM audio data (16-bit stereo)
/// @param src Source PCM audio data (stereo, width matches set_bytes_per_sample)
/// @param size Size of source data in bytes
/// @param ticks_to_wait Timeout for blocking writes
/// @param blocks_sent Optional pointer to receive the number of complete SPDIF blocks sent
@@ -74,17 +80,6 @@ class SPDIFEncoder {
esp_err_t write(const uint8_t *src, size_t size, TickType_t ticks_to_wait, uint32_t *blocks_sent = nullptr,
size_t *bytes_consumed = nullptr);
/// @brief Get the number of PCM bytes currently pending in the partial block buffer
/// @return Number of pending PCM bytes (0 to SPDIF_PCM_BYTES_PER_BLOCK - 1)
size_t get_pending_pcm_bytes() const;
/// @brief Get the number of PCM frames currently pending in the partial block buffer
/// @return Number of pending PCM frames (0 to SPDIF_BLOCK_SAMPLES - 1)
uint32_t get_pending_frames() const { return this->get_pending_pcm_bytes() / 4; }
/// @brief Check if there is a partial block pending
bool has_pending_data() const { return this->spdif_block_ptr_ != this->spdif_block_buf_.get(); }
/// @brief Emit one complete SPDIF block: pad any pending partial block with silence and send,
/// or send a full silence block if nothing is pending. Always produces exactly one block on success.
/// @param ticks_to_wait Timeout for blocking writes
@@ -95,7 +90,7 @@ class SPDIFEncoder {
void reset();
/// @brief Set the sample rate for Channel Status Block encoding
/// @param sample_rate Sample rate in Hz (e.g., 44100, 48000, 96000)
/// @param sample_rate Sample rate in Hz (e.g., 44100, 48000)
/// Call this before writing audio data to ensure correct channel status.
void set_sample_rate(uint32_t sample_rate);
@@ -103,8 +98,19 @@ class SPDIFEncoder {
uint32_t get_sample_rate() const { return this->sample_rate_; }
protected:
/// @brief Encode a single 16-bit PCM sample into the current block position
HOT void encode_sample_(const uint8_t *pcm_sample);
/// @brief Encode a single stereo silence frame at the current block position.
/// @note Used only by flush_with_silence_typed_ to pad; the hot write path inlines the
/// encoding body directly into write_typed_ to keep block_ptr / frame_in_block_ in registers.
template<uint8_t Bps> void encode_silence_frame_();
/// @brief Templated write loop. Called from the public write() via runtime dispatch on bytes_per_sample_.
template<uint8_t Bps>
HOT esp_err_t write_typed_(const uint8_t *src, size_t size, TickType_t ticks_to_wait, uint32_t *blocks_sent,
size_t *bytes_consumed);
/// @brief Templated flush-with-silence. Pads the pending block with zeros at the configured width
/// (or builds a full silence block when nothing is pending) and sends it. Always emits one block.
template<uint8_t Bps> esp_err_t flush_with_silence_typed_(TickType_t ticks_to_wait);
/// @brief Send the completed block via the appropriate callback
esp_err_t send_block_(TickType_t ticks_to_wait);
@@ -112,15 +118,6 @@ class SPDIFEncoder {
/// @brief Build the channel status block from current configuration
void build_channel_status_();
/// @brief Get the channel status bit for a specific frame
/// @param frame Frame number (0-191)
/// @return The C bit value for this frame
ESPHOME_ALWAYS_INLINE inline bool get_channel_status_bit_(uint8_t frame) const {
// Channel status is 192 bits transmitted over 192 frames
// Bit N is transmitted in frame N, LSB-first within each byte
return (this->channel_status_[frame >> 3] >> (frame & 7)) & 1;
}
// Member ordering optimized to minimize padding (largest alignment first)
// 4-byte aligned members (pointers and uint32_t)
@@ -133,9 +130,13 @@ class SPDIFEncoder {
uint32_t sample_rate_{48000}; // Sample rate for Channel Status Block encoding
// 1-byte aligned members (grouped together to avoid internal padding)
uint8_t frame_in_block_{0}; // 0-191, tracks stereo frame position within block
bool is_left_channel_{true}; // Alternates L/R for stereo samples
bool preload_mode_{false}; // Whether to use preload callback vs write callback
uint8_t bytes_per_sample_{2}; // Input PCM width: 2/3/4 (16/24/32-bit). 32-bit truncates to 24-bit on the wire.
uint8_t frame_in_block_{0}; // 0-191, tracks stereo frame position within block
bool preload_mode_{false}; // Whether to use preload callback vs write callback
// True when spdif_block_buf_ currently holds a complete full-silence block valid for the active
// channel status. A full silence block is deterministic for a given sample rate and word length,
// so when this is set flush_with_silence() can re-send the buffer verbatim instead of re-encoding.
bool block_buf_is_silence_block_{false};
// Channel Status Block (192 bits = 24 bytes, transmitted over 192 frames)
// Placed last since std::array<uint8_t> has 1-byte alignment
@@ -1,13 +1,3 @@
substitutions:
i2s_bclk_pin: GPIO27
i2s_lrclk_pin: GPIO26
i2s_mclk_pin: GPIO25
i2s_dout_pin: GPIO12
spdif_data_pin: GPIO4
packages:
i2c: !include ../../test_build_components/common/i2c/esp32-idf.yaml
i2s_audio:
- id: i2s_output
@@ -20,6 +10,5 @@ speaker:
use_apll: true
timeout: 2s
sample_rate: 48000
bits_per_sample: 16bit
channel: stereo
i2s_mode: primary
@@ -0,0 +1,8 @@
substitutions:
i2s_bclk_pin: GPIO27
i2s_lrclk_pin: GPIO26
i2s_mclk_pin: GPIO25
i2s_dout_pin: GPIO12
spdif_data_pin: GPIO4
<<: !include common-spdif_mode.yaml