[i2s_audio] Optimize SPDIF encoder and suport higher bit depth audio (#16504)

Co-authored-by: Keith Burzinski <kbx81x@gmail.com>
2026-05-21 17:39:00 +08:00 · 2026-05-19 14:37:41 -04:00
parent 65e1e210de
commit 302938f875
6 changed files with 372 additions and 262 deletions
@@ -89,10 +89,10 @@ def _set_num_channels_from_config(config):

 def _set_stream_limits(config):
    if config.get(CONF_SPDIF_MODE, False):
-        # SPDIF mode: fixed to 16-bit stereo at configured sample rate
+        # SPDIF mode: 16/24/32-bit audio and stereo at configured sample rate
        audio.set_stream_limits(
            min_bits_per_sample=16,
-            max_bits_per_sample=16,
+            max_bits_per_sample=32,
            min_channels=2,
            max_channels=2,
            min_sample_rate=config.get(CONF_SAMPLE_RATE),
@@ -213,9 +213,6 @@ def _final_validate(config):
            )
        if config[CONF_CHANNEL] != CONF_STEREO:
            raise cv.Invalid("SPDIF mode only supports stereo channel configuration")
-        # bits_per_sample is converted to float by the schema
-        if config[CONF_BITS_PER_SAMPLE] != 16:
-            raise cv.Invalid("SPDIF mode only supports 16 bits per sample")
        if not config[CONF_USE_APLL]:
            raise cv.Invalid(
                "SPDIF mode requires 'use_apll: true' for accurate clock generation"
@@ -411,8 +411,9 @@ esp_err_t I2SAudioSpeakerSPDIF::start_i2s_driver(audio::AudioStreamInfo &audio_s
             this->sample_rate_, audio_stream_info.get_sample_rate());
    return ESP_ERR_NOT_SUPPORTED;
  }
-  if (audio_stream_info.get_bits_per_sample() != 16) {
-    ESP_LOGE(TAG, "Only supports 16 bits per sample");
+  const uint8_t bits_per_sample = audio_stream_info.get_bits_per_sample();
+  if (bits_per_sample != 16 && bits_per_sample != 24 && bits_per_sample != 32) {
+    ESP_LOGE(TAG, "Only supports 16, 24, or 32 bits per sample (got %u)", (unsigned) bits_per_sample);
    return ESP_ERR_NOT_SUPPORTED;
  }
  if (audio_stream_info.get_channels() != 2) {
@@ -420,11 +421,8 @@ esp_err_t I2SAudioSpeakerSPDIF::start_i2s_driver(audio::AudioStreamInfo &audio_s
    return ESP_ERR_NOT_SUPPORTED;
  }

-  if (this->slot_bit_width_ != I2S_SLOT_BIT_WIDTH_AUTO &&
-      (i2s_slot_bit_width_t) audio_stream_info.get_bits_per_sample() > this->slot_bit_width_) {
-    ESP_LOGE(TAG, "Stream bits per sample must be less than or equal to the speaker's configuration");
-    return ESP_ERR_NOT_SUPPORTED;
-  }
+  // Tell the encoder what input width to expect. 32-bit input is truncated to 24-bit on the wire.
+  this->spdif_encoder_->set_bytes_per_sample(bits_per_sample / 8);

  if (!this->parent_->try_lock()) {
    ESP_LOGE(TAG, "Parent bus is busy");
@@ -24,8 +24,6 @@ static constexpr uint16_t SPDIF_BLOCK_SIZE_BYTES = SPDIF_BLOCK_SAMPLES * (EMULAT
 static constexpr uint32_t SPDIF_BLOCK_SIZE_U32 = SPDIF_BLOCK_SIZE_BYTES / sizeof(uint32_t);  // 3072 bytes / 4 = 768
 // I2S frame count for one SPDIF block (for new driver where frame = 8 bytes for 32-bit stereo)
 static constexpr uint32_t SPDIF_BLOCK_I2S_FRAMES = SPDIF_BLOCK_SIZE_BYTES / 8;  // 3072 / 8 = 384 frames
-// PCM bytes needed for one complete SPDIF block (192 stereo frames * 2 bytes per sample * 2 channels)
-static constexpr uint16_t SPDIF_PCM_BYTES_PER_BLOCK = SPDIF_BLOCK_SAMPLES * 2 * 2;  // = 768 bytes

 /// Callback signature for block completion (raw function pointer for minimal overhead)
 /// @param user_ctx User context pointer passed during callback registration
@@ -64,8 +62,16 @@ class SPDIFEncoder {
  /// @brief Check if currently in preload mode
  bool is_preload_mode() const { return this->preload_mode_; }

+  /// @brief Set input PCM width: 2 = 16-bit, 3 = 24-bit, 4 = 32-bit (truncated to 24-bit on the wire).
+  /// Must be called before write() if input width changes from the default (16-bit). Triggers a
+  /// channel-status rebuild to reflect the new word length.
+  void set_bytes_per_sample(uint8_t bytes_per_sample);
+
+  /// @brief Get the configured input PCM width in bytes per sample
+  uint8_t get_bytes_per_sample() const { return this->bytes_per_sample_; }
+
  /// @brief Convert PCM audio data to SPDIF BMC encoded data
-  /// @param src Source PCM audio data (16-bit stereo)
+  /// @param src Source PCM audio data (stereo, width matches set_bytes_per_sample)
  /// @param size Size of source data in bytes
  /// @param ticks_to_wait Timeout for blocking writes
  /// @param blocks_sent Optional pointer to receive the number of complete SPDIF blocks sent
@@ -74,17 +80,6 @@ class SPDIFEncoder {
  esp_err_t write(const uint8_t *src, size_t size, TickType_t ticks_to_wait, uint32_t *blocks_sent = nullptr,
                  size_t *bytes_consumed = nullptr);

-  /// @brief Get the number of PCM bytes currently pending in the partial block buffer
-  /// @return Number of pending PCM bytes (0 to SPDIF_PCM_BYTES_PER_BLOCK - 1)
-  size_t get_pending_pcm_bytes() const;
-
-  /// @brief Get the number of PCM frames currently pending in the partial block buffer
-  /// @return Number of pending PCM frames (0 to SPDIF_BLOCK_SAMPLES - 1)
-  uint32_t get_pending_frames() const { return this->get_pending_pcm_bytes() / 4; }
-
-  /// @brief Check if there is a partial block pending
-  bool has_pending_data() const { return this->spdif_block_ptr_ != this->spdif_block_buf_.get(); }
-
  /// @brief Emit one complete SPDIF block: pad any pending partial block with silence and send,
  /// or send a full silence block if nothing is pending. Always produces exactly one block on success.
  /// @param ticks_to_wait Timeout for blocking writes
@@ -95,7 +90,7 @@ class SPDIFEncoder {
  void reset();

  /// @brief Set the sample rate for Channel Status Block encoding
-  /// @param sample_rate Sample rate in Hz (e.g., 44100, 48000, 96000)
+  /// @param sample_rate Sample rate in Hz (e.g., 44100, 48000)
  /// Call this before writing audio data to ensure correct channel status.
  void set_sample_rate(uint32_t sample_rate);

@@ -103,8 +98,19 @@ class SPDIFEncoder {
  uint32_t get_sample_rate() const { return this->sample_rate_; }

 protected:
-  /// @brief Encode a single 16-bit PCM sample into the current block position
-  HOT void encode_sample_(const uint8_t *pcm_sample);
+  /// @brief Encode a single stereo silence frame at the current block position.
+  /// @note Used only by flush_with_silence_typed_ to pad; the hot write path inlines the
+  /// encoding body directly into write_typed_ to keep block_ptr / frame_in_block_ in registers.
+  template<uint8_t Bps> void encode_silence_frame_();
+
+  /// @brief Templated write loop. Called from the public write() via runtime dispatch on bytes_per_sample_.
+  template<uint8_t Bps>
+  HOT esp_err_t write_typed_(const uint8_t *src, size_t size, TickType_t ticks_to_wait, uint32_t *blocks_sent,
+                             size_t *bytes_consumed);
+
+  /// @brief Templated flush-with-silence. Pads the pending block with zeros at the configured width
+  /// (or builds a full silence block when nothing is pending) and sends it. Always emits one block.
+  template<uint8_t Bps> esp_err_t flush_with_silence_typed_(TickType_t ticks_to_wait);

  /// @brief Send the completed block via the appropriate callback
  esp_err_t send_block_(TickType_t ticks_to_wait);
@@ -112,15 +118,6 @@ class SPDIFEncoder {
  /// @brief Build the channel status block from current configuration
  void build_channel_status_();

-  /// @brief Get the channel status bit for a specific frame
-  /// @param frame Frame number (0-191)
-  /// @return The C bit value for this frame
-  ESPHOME_ALWAYS_INLINE inline bool get_channel_status_bit_(uint8_t frame) const {
-    // Channel status is 192 bits transmitted over 192 frames
-    // Bit N is transmitted in frame N, LSB-first within each byte
-    return (this->channel_status_[frame >> 3] >> (frame & 7)) & 1;
-  }
-
  // Member ordering optimized to minimize padding (largest alignment first)

  // 4-byte aligned members (pointers and uint32_t)
@@ -133,9 +130,13 @@ class SPDIFEncoder {
  uint32_t sample_rate_{48000};                  // Sample rate for Channel Status Block encoding

  // 1-byte aligned members (grouped together to avoid internal padding)
-  uint8_t frame_in_block_{0};   // 0-191, tracks stereo frame position within block
-  bool is_left_channel_{true};  // Alternates L/R for stereo samples
-  bool preload_mode_{false};    // Whether to use preload callback vs write callback
+  uint8_t bytes_per_sample_{2};  // Input PCM width: 2/3/4 (16/24/32-bit). 32-bit truncates to 24-bit on the wire.
+  uint8_t frame_in_block_{0};    // 0-191, tracks stereo frame position within block
+  bool preload_mode_{false};     // Whether to use preload callback vs write callback
+  // True when spdif_block_buf_ currently holds a complete full-silence block valid for the active
+  // channel status. A full silence block is deterministic for a given sample rate and word length,
+  // so when this is set flush_with_silence() can re-send the buffer verbatim instead of re-encoding.
+  bool block_buf_is_silence_block_{false};

  // Channel Status Block (192 bits = 24 bytes, transmitted over 192 frames)
  // Placed last since std::array<uint8_t> has 1-byte alignment
@@ -1,13 +1,3 @@
-substitutions:
-  i2s_bclk_pin: GPIO27
-  i2s_lrclk_pin: GPIO26
-  i2s_mclk_pin: GPIO25
-  i2s_dout_pin: GPIO12
-  spdif_data_pin: GPIO4
-
-packages:
-  i2c: !include ../../test_build_components/common/i2c/esp32-idf.yaml
-
 i2s_audio:
  - id: i2s_output

@@ -20,6 +10,5 @@ speaker:
    use_apll: true
    timeout: 2s
    sample_rate: 48000
-    bits_per_sample: 16bit
    channel: stereo
    i2s_mode: primary
@@ -0,0 +1,8 @@
+substitutions:
+  i2s_bclk_pin: GPIO27
+  i2s_lrclk_pin: GPIO26
+  i2s_mclk_pin: GPIO25
+  i2s_dout_pin: GPIO12
+  spdif_data_pin: GPIO4
+
+<<: !include common-spdif_mode.yaml