diff --git a/esphome/components/i2s_audio/speaker/__init__.py b/esphome/components/i2s_audio/speaker/__init__.py
index 759cc40ca9e..8215d8b518b 100644
--- a/esphome/components/i2s_audio/speaker/__init__.py
+++ b/esphome/components/i2s_audio/speaker/__init__.py
@@ -89,10 +89,10 @@ def _set_num_channels_from_config(config):
 
 def _set_stream_limits(config):
     if config.get(CONF_SPDIF_MODE, False):
-        # SPDIF mode: fixed to 16-bit stereo at configured sample rate
+        # SPDIF mode: 16/24/32-bit audio and stereo at configured sample rate
         audio.set_stream_limits(
             min_bits_per_sample=16,
-            max_bits_per_sample=16,
+            max_bits_per_sample=32,
             min_channels=2,
             max_channels=2,
             min_sample_rate=config.get(CONF_SAMPLE_RATE),
@@ -213,9 +213,6 @@ def _final_validate(config):
             )
         if config[CONF_CHANNEL] != CONF_STEREO:
             raise cv.Invalid("SPDIF mode only supports stereo channel configuration")
-        # bits_per_sample is converted to float by the schema
-        if config[CONF_BITS_PER_SAMPLE] != 16:
-            raise cv.Invalid("SPDIF mode only supports 16 bits per sample")
         if not config[CONF_USE_APLL]:
             raise cv.Invalid(
                 "SPDIF mode requires 'use_apll: true' for accurate clock generation"
diff --git a/esphome/components/i2s_audio/speaker/i2s_audio_spdif.cpp b/esphome/components/i2s_audio/speaker/i2s_audio_spdif.cpp
index 877f67775b0..989bcf29770 100644
--- a/esphome/components/i2s_audio/speaker/i2s_audio_spdif.cpp
+++ b/esphome/components/i2s_audio/speaker/i2s_audio_spdif.cpp
@@ -411,8 +411,9 @@ esp_err_t I2SAudioSpeakerSPDIF::start_i2s_driver(audio::AudioStreamInfo &audio_s
              this->sample_rate_, audio_stream_info.get_sample_rate());
     return ESP_ERR_NOT_SUPPORTED;
   }
-  if (audio_stream_info.get_bits_per_sample() != 16) {
-    ESP_LOGE(TAG, "Only supports 16 bits per sample");
+  const uint8_t bits_per_sample = audio_stream_info.get_bits_per_sample();
+  if (bits_per_sample != 16 && bits_per_sample != 24 && bits_per_sample != 32) {
+    ESP_LOGE(TAG, "Only supports 16, 24, or 32 bits per sample (got %u)", (unsigned) bits_per_sample);
     return ESP_ERR_NOT_SUPPORTED;
   }
   if (audio_stream_info.get_channels() != 2) {
@@ -420,11 +421,8 @@ esp_err_t I2SAudioSpeakerSPDIF::start_i2s_driver(audio::AudioStreamInfo &audio_s
     return ESP_ERR_NOT_SUPPORTED;
   }
 
-  if (this->slot_bit_width_ != I2S_SLOT_BIT_WIDTH_AUTO &&
-      (i2s_slot_bit_width_t) audio_stream_info.get_bits_per_sample() > this->slot_bit_width_) {
-    ESP_LOGE(TAG, "Stream bits per sample must be less than or equal to the speaker's configuration");
-    return ESP_ERR_NOT_SUPPORTED;
-  }
+  // Tell the encoder what input width to expect. 32-bit input is truncated to 24-bit on the wire.
+  this->spdif_encoder_->set_bytes_per_sample(bits_per_sample / 8);
 
   if (!this->parent_->try_lock()) {
     ESP_LOGE(TAG, "Parent bus is busy");
diff --git a/esphome/components/i2s_audio/speaker/spdif_encoder.cpp b/esphome/components/i2s_audio/speaker/spdif_encoder.cpp
index 42a72346ccb..30146e0a70c 100644
--- a/esphome/components/i2s_audio/speaker/spdif_encoder.cpp
+++ b/esphome/components/i2s_audio/speaker/spdif_encoder.cpp
@@ -17,7 +17,7 @@ static constexpr uint8_t PREAMBLE_M = 0x1d;  // Left channel (not block start)
 static constexpr uint8_t PREAMBLE_W = 0x1b;  // Right channel
 
 // BMC encoding of 4 zero bits starting at phase HIGH: 00_11_00_11 = 0x33
-// Since both aux nibbles (bits 4-7, 8-11) are zero for 16-bit audio and phase is preserved, both are 0x33.
+// Used as a constant in the 16-bit subframe path, where bits 4-11 are always zero.
 static constexpr uint32_t BMC_ZERO_NIBBLE = 0x33;
 
 // Constexpr BMC encoder for compile-time LUT generation.
@@ -36,21 +36,43 @@ static constexpr uint16_t bmc_lut_encode(uint32_t data, uint8_t num_bits) {
   return bmc;
 }
 
-// 4-bit BMC lookup table: 16 entries (16 bytes in flash)
-// Index: 4-bit data value (0-15), always phase=true start
+// Compile-time parity helper (constexpr-friendly, runs only at LUT build time).
+static constexpr uint32_t bmc_lut_parity(uint32_t value, uint32_t num_bits) {
+  uint32_t p = 0;
+  for (uint32_t b = 0; b < num_bits; b++)
+    p ^= (value >> b) & 1u;
+  return p;
+}
+
+// Combined BMC + phase-delta lookup tables.
+// Each entry packs the BMC pattern (lower bits, phase=high start) together with
+// a phase-mask delta in bits 16-31 (0xFFFF if the input has odd parity, else 0).
+// XORing the delta into the running phase mask propagates parity across chunks
+// without an explicit popcount.
+
+// 4-bit BMC lookup table: 16 entries x uint32_t = 64 bytes in flash.
+// Bits 0-7   : 8-bit BMC pattern (phase=high start)
+// Bits 16-31 : phase-mask delta (0xFFFFu if odd parity, else 0)
 static constexpr auto BMC_LUT_4 = [] {
-  std::array<uint8_t, 16> t{};
-  for (uint32_t i = 0; i < 16; i++)
-    t[i] = static_cast<uint8_t>(bmc_lut_encode(i, 4));
+  std::array<uint32_t, 16> t{};
+  for (uint32_t i = 0; i < 16; i++) {
+    uint32_t bmc = bmc_lut_encode(i, 4);
+    uint32_t delta = bmc_lut_parity(i, 4) ? 0xFFFF0000u : 0u;
+    t[i] = bmc | delta;
+  }
   return t;
 }();
 
-// 8-bit BMC lookup table: 256 entries (512 bytes in flash)
-// Index: 8-bit data value (0-255), always phase=true start
+// 8-bit BMC lookup table: 256 entries x uint32_t = 1024 bytes in flash.
+// Bits 0-15  : 16-bit BMC pattern (phase=high start)
+// Bits 16-31 : phase-mask delta (0xFFFFu if odd parity, else 0)
 static constexpr auto BMC_LUT_8 = [] {
-  std::array<uint16_t, 256> t{};
-  for (uint32_t i = 0; i < 256; i++)
-    t[i] = bmc_lut_encode(i, 8);
+  std::array<uint32_t, 256> t{};
+  for (uint32_t i = 0; i < 256; i++) {
+    uint32_t bmc = bmc_lut_encode(i, 8);
+    uint32_t delta = bmc_lut_parity(i, 8) ? 0xFFFF0000u : 0u;
+    t[i] = bmc | delta;
+  }
   return t;
 }();
 
@@ -63,7 +85,7 @@ bool SPDIFEncoder::setup() {
   }
   ESP_LOGV(TAG, "Buffer allocated (%zu bytes)", SPDIF_BLOCK_SIZE_BYTES);
 
-  // Build initial channel status block with default sample rate
+  // Build initial channel status block with default sample rate and width
   this->build_channel_status_();
 
   this->reset();
@@ -73,7 +95,7 @@ bool SPDIFEncoder::setup() {
 void SPDIFEncoder::reset() {
   this->spdif_block_ptr_ = this->spdif_block_buf_.get();
   this->frame_in_block_ = 0;
-  this->is_left_channel_ = true;
+  this->block_buf_is_silence_block_ = false;
 }
 
 void SPDIFEncoder::set_sample_rate(uint32_t sample_rate) {
@@ -84,31 +106,27 @@ void SPDIFEncoder::set_sample_rate(uint32_t sample_rate) {
   }
 }
 
+void SPDIFEncoder::set_bytes_per_sample(uint8_t bytes_per_sample) {
+  if (bytes_per_sample != 2 && bytes_per_sample != 3 && bytes_per_sample != 4) {
+    ESP_LOGE(TAG, "Unsupported bytes per sample: %u", (unsigned) bytes_per_sample);
+    return;
+  }
+  if (this->bytes_per_sample_ != bytes_per_sample) {
+    this->bytes_per_sample_ = bytes_per_sample;
+    this->build_channel_status_();
+    // Discard any partial block built at the previous width so we never mix widths on the wire.
+    this->reset();
+    ESP_LOGD(TAG, "Input width set to %u-bit", (unsigned) bytes_per_sample * 8);
+  }
+}
+
 void SPDIFEncoder::build_channel_status_() {
   // IEC 60958-3 Consumer Channel Status Block (192 bits = 24 bytes)
-  // Transmitted LSB-first within each byte, one bit per frame via C bit
-  //
-  // Byte 0: Control bits
-  //   Bit 0: 0 = Consumer format (not professional AES3)
-  //   Bit 1: 0 = PCM audio (not non-audio data like AC3)
-  //   Bit 2: 0 = No copyright assertion
-  //   Bits 3-5: 000 = No pre-emphasis
-  //   Bits 6-7: 00 = Mode 0 (basic consumer format)
-  //
-  // Byte 1: Category code (0x00 = general, 0x01 = CD, etc.)
-  //
-  // Byte 2: Source/channel numbers
-  //   Bits 0-3: Source number (0 = unspecified)
-  //   Bits 4-7: Channel number (0 = unspecified)
-  //
-  // Byte 3: Sample frequency and clock accuracy
-  //   Bits 0-3: Sample frequency code
-  //   Bits 4-5: Clock accuracy (00 = Level II, ±1000 ppm, appropriate for ESP32)
-  //   Bits 6-7: Reserved (0)
-  //
-  // Bytes 4-23: Reserved (zeros for basic compliance)
+  // Transmitted LSB-first within each byte, one bit per frame via C bit.
+
+  // Any cached silence block was built for the previous channel status; it is now stale.
+  this->block_buf_is_silence_block_ = false;
 
-  // Clear all bytes first
   this->channel_status_.fill(0);
 
   // Byte 0: Consumer, PCM audio, no copyright, no pre-emphasis, Mode 0
@@ -140,132 +158,148 @@ void SPDIFEncoder::build_channel_status_() {
   // Byte 3: freq_code in bits 0-3, clock accuracy (00) in bits 4-5
   this->channel_status_[3] = freq_code;  // Clock accuracy bits 4-5 are already 0
 
-  // Bytes 4-23 remain zero (word length not specified, no original sample freq, etc.)
+  // Byte 4: Word length encoding (IEC 60958-3 consumer)
+  //   bit 0:    max length flag (0 = max 20 bits, 1 = max 24 bits)
+  //   bits 1-3: word length code relative to the max
+  // For our supported widths:
+  //   16-bit (max 20): 0b0010 = 0x02 -- "16 bits, max 20"
+  //   24-bit (max 24): 0b1101 = 0x0D -- "24 bits, max 24"
+  //   32-bit input is truncated to 24-bit on the wire, so use the 24-bit code.
+  uint8_t word_length_code;
+  switch (this->bytes_per_sample_) {
+    case 2:
+      word_length_code = 0x02;
+      break;
+    case 3:  // Shared case
+    case 4:
+      word_length_code = 0x0D;
+      break;
+    default:
+      word_length_code = 0x00;  // not specified
+      break;
+  }
+  this->channel_status_[4] = word_length_code;
 }
 
-HOT void SPDIFEncoder::encode_sample_(const uint8_t *pcm_sample) {
-  // ============================================================================
-  // Build raw 32-bit subframe (IEC 60958 format)
-  // ============================================================================
-  // Bit layout:
-  //   Bits 0-3:   Preamble (handled separately, not in raw_subframe)
-  //   Bits 4-7:   Auxiliary audio data (zeros for 16-bit audio)
-  //   Bits 8-11:  Audio LSB extension (zeros for 16-bit audio)
-  //   Bits 12-27: 16-bit audio sample (MSB-aligned in 20-bit audio field)
-  //   Bit 28:     V (Validity) - 0 = valid audio
-  //   Bit 29:     U (User data) - 0
-  //   Bit 30:     C (Channel status) - from channel status block
-  //   Bit 31:     P (Parity) - even parity over bits 4-31
-  // ============================================================================
+// Extract the C bit for the given frame from channel_status_ and shift it into bit 30
+// so it can be OR'd directly into a raw subframe.
+ESPHOME_ALWAYS_INLINE static inline uint32_t c_bit_for_frame(const std::array<uint8_t, 24> &channel_status,
+                                                             uint32_t frame) {
+  return static_cast<uint32_t>((channel_status[frame >> 3] >> (frame & 7)) & 1u) << 30;
+}
 
-  // Place 16-bit audio sample at bits 12-27 (little-endian input: [0]=LSB, [1]=MSB)
-  uint32_t raw_subframe = (static_cast<uint32_t>(pcm_sample[1]) << 20) | (static_cast<uint32_t>(pcm_sample[0]) << 12);
+// ============================================================================
+// IEC 60958 subframe bit layout
+// ============================================================================
+//   Bits 0-3:   Preamble (handled separately, not in raw_subframe)
+//   Bits 4-7:   Auxiliary audio data / 24-bit audio LSB
+//   Bits 8-11:  Audio LSB extension (zero for 16-bit, low nibble of audio for 24-bit)
+//   Bits 12-27: Audio sample (16 high bits in 16-bit mode, mid 16 bits in 24-bit mode)
+//   Bit 28:     V (Validity) - 0 = valid audio
+//   Bit 29:     U (User data) - 0
+//   Bit 30:     C (Channel status) - from channel status block
+//   Bit 31:     P (Parity) - even parity over bits 4-31
+// ============================================================================
 
-  // V = 0 (valid audio), U = 0 (no user data)
-  // C = channel status bit for current frame (same bit used for both L and R subframes)
-  bool c_bit = this->get_channel_status_bit_(this->frame_in_block_);
-  if (c_bit) {
-    raw_subframe |= (1U << 30);
+// Build a raw IEC 60958 subframe from PCM little-endian input of width Bps bytes.
+// Caller is responsible for OR-ing in the C bit and parity.
+template<uint8_t Bps> ESPHOME_ALWAYS_INLINE static inline uint32_t build_raw_subframe(const uint8_t *pcm_sample) {
+  static_assert(Bps == 2 || Bps == 3 || Bps == 4, "Unsupported bytes per sample");
+  if constexpr (Bps == 2) {
+    // 16-bit input: MSB-aligned in the 20-bit audio field, bits 12-27.
+    return (static_cast<uint32_t>(pcm_sample[1]) << 20) | (static_cast<uint32_t>(pcm_sample[0]) << 12);
+  } else if constexpr (Bps == 3) {
+    // 24-bit input: full 24-bit audio field, bits 4-27.
+    return (static_cast<uint32_t>(pcm_sample[2]) << 20) | (static_cast<uint32_t>(pcm_sample[1]) << 12) |
+           (static_cast<uint32_t>(pcm_sample[0]) << 4);
+  } else {  // Bps == 4
+    // 32-bit input truncated to 24-bit: drop the lowest byte.
+    return (static_cast<uint32_t>(pcm_sample[3]) << 20) | (static_cast<uint32_t>(pcm_sample[2]) << 12) |
+           (static_cast<uint32_t>(pcm_sample[1]) << 4);
   }
+}
 
-  // Calculate even parity over bits 4-30
-  // This ensures consistent BMC ending phase regardless of audio content
-  uint32_t bits_4_30 = (raw_subframe >> 4) & 0x07FFFFFF;  // 27 bits (4-30)
-  uint32_t ones_count = __builtin_popcount(bits_4_30);
-  uint32_t parity = ones_count & 1;  // 1 if odd count, 0 if even
-  raw_subframe |= parity << 31;      // Set P bit to make total even
+// BMC-encode a subframe and write the two output uint32 words to dst. Caller passes
+// raw_subframe with the C bit set (bit 30) and the P bit cleared (bit 31 = 0). P is
+// derived from the cumulative parity-mask delta of the per-byte LUT lookups.
+//
+// I2S halfword swap means word[0] transmits as: bits 24-31, 16-23, 8-15, 0-7.
+// word[1] transmits as: bits 16-31, 0-15. Within each halfword, MSB-first.
+// All preambles end at phase HIGH, so phase=true at the start of bit 4.
+//
+// P-bit derivation: BMC_LUT_*'s upper half encodes the parity of the input chunk. Each
+// chunk's parity delta is shifted down (`lut >> 16`) into a phase_mask that lives in the
+// low 16 bits, so the same value can also be XORed against subsequent BMC patterns to
+// invert phase. XOR'ing those deltas through all chunks (with bit 31 = 0) yields the
+// parity of bits 4-30 in the low bits of phase_mask -- the required value of the P bit
+// for even total parity. The BMC of bit 31 lives in bit 0 of the high-byte BMC output
+// (i = 7 maps to position (8-1-7)*2 = 0); flipping the source bit flips only the lower
+// BMC bit (= phase XOR bit), so applying P is `bmc_24_31 ^= phase_mask & 1u`.
+template<uint8_t Bps>
+ESPHOME_ALWAYS_INLINE static inline void bmc_encode_subframe(uint32_t raw_subframe, uint8_t preamble, uint32_t *dst) {
+  if constexpr (Bps == 2) {
+    // 16-bit path: bits 4-11 are zero, encoded inline as BMC_ZERO_NIBBLE constants.
+    // Eight zero source bits with start phase=HIGH end at phase=HIGH (popcount of zeros is even),
+    // so encoding of bits 12-15 starts at phase=true. Zeros contribute 0 to parity.
+    uint32_t nibble = (raw_subframe >> 12) & 0xF;
+    uint32_t lut_n = BMC_LUT_4[nibble];
+    uint32_t bmc_12_15 = lut_n & 0xFFu;
+    uint32_t phase_mask = lut_n >> 16;  // 0xFFFFu if odd parity, else 0
 
-  // ============================================================================
-  // Select preamble based on position in block and channel
-  // ============================================================================
-  // B = block start (left channel, frame 0 of 192-frame block)
-  // M = left channel (frames 1-191)
-  // W = right channel (all frames)
-  uint8_t preamble;
-  if (this->is_left_channel_) {
-    preamble = (this->frame_in_block_ == 0) ? PREAMBLE_B : PREAMBLE_M;
+    uint32_t byte_mid = (raw_subframe >> 16) & 0xFF;
+    uint32_t lut_m = BMC_LUT_8[byte_mid];
+    uint32_t bmc_16_23 = (lut_m & 0xFFFFu) ^ phase_mask;
+    phase_mask ^= lut_m >> 16;
+
+    uint32_t byte_hi = (raw_subframe >> 24) & 0xFF;  // bit 7 (= P) is 0 by precondition
+    uint32_t lut_h = BMC_LUT_8[byte_hi];
+    uint32_t bmc_24_31 = (lut_h & 0xFFFFu) ^ phase_mask;
+    phase_mask ^= lut_h >> 16;
+    // phase_mask now reflects parity of bits 4-30. Apply P by flipping bit 0 of bmc_24_31.
+    bmc_24_31 ^= phase_mask & 1u;
+
+    dst[0] = bmc_12_15 | (BMC_ZERO_NIBBLE << 8) | (BMC_ZERO_NIBBLE << 16) | (static_cast<uint32_t>(preamble) << 24);
+    dst[1] = bmc_24_31 | (bmc_16_23 << 16);
   } else {
-    preamble = PREAMBLE_W;
+    // 24-bit (and 32-bit truncated) path: bits 4-11 are live audio.
+    uint32_t byte_lo = (raw_subframe >> 4) & 0xFF;
+    uint32_t lut_l = BMC_LUT_8[byte_lo];
+    uint32_t bmc_4_11 = lut_l & 0xFFFFu;
+    uint32_t phase_mask = lut_l >> 16;  // 0xFFFFu if odd parity, else 0
+
+    uint32_t nibble = (raw_subframe >> 12) & 0xF;
+    uint32_t lut_n = BMC_LUT_4[nibble];
+    uint32_t bmc_12_15 = (lut_n & 0xFFu) ^ (phase_mask & 0xFFu);
+    phase_mask ^= lut_n >> 16;
+
+    uint32_t byte_mid = (raw_subframe >> 16) & 0xFF;
+    uint32_t lut_m = BMC_LUT_8[byte_mid];
+    uint32_t bmc_16_23 = (lut_m & 0xFFFFu) ^ phase_mask;
+    phase_mask ^= lut_m >> 16;
+
+    uint32_t byte_hi = (raw_subframe >> 24) & 0xFF;  // bit 7 (= P) is 0 by precondition
+    uint32_t lut_h = BMC_LUT_8[byte_hi];
+    uint32_t bmc_24_31 = (lut_h & 0xFFFFu) ^ phase_mask;
+    phase_mask ^= lut_h >> 16;
+    bmc_24_31 ^= phase_mask & 1u;
+
+    // word[0]: bits 24-31 = preamble, bits 8-23 = bmc(4-11), bits 0-7 = bmc(12-15)
+    // word[1]: bits 16-31 = bmc(16-23), bits 0-15 = bmc(24-31)
+    dst[0] = bmc_12_15 | (bmc_4_11 << 8) | (static_cast<uint32_t>(preamble) << 24);
+    dst[1] = bmc_24_31 | (bmc_16_23 << 16);
   }
+}
 
-  // ============================================================================
-  // BMC encode the data portion (bits 4-31) using lookup tables
-  // ============================================================================
-  // The I2S uses 16-bit halfword swap: bits 16-31 transmit before bits 0-15.
-  // This applies to BOTH word[0] and word[1].
-  //
-  // word[0] transmission order: [16-23] → [24-31] → [0-7] → [8-15]
-  // For correct S/PDIF subframe order (preamble → aux → audio):
-  //   - bits 16-23: preamble (8 BMC bits)
-  //   - bits 24-31: BMC(subframe bits 4-7) - first aux nibble
-  //   - bits 0-7:   BMC(subframe bits 8-11) - second aux nibble
-  //   - bits 8-15:  BMC(subframe bits 12-15) - audio low nibble
-  //
-  // word[1] transmission order: [16-31] → [0-15]
-  // For correct S/PDIF subframe order:
-  //   - bits 16-31: BMC(subframe bits 16-23) - audio mid byte
-  //   - bits 0-15:  BMC(subframe bits 24-31) - audio high nibble + VUCP
-  // ============================================================================
-
-  // All preambles end at phase HIGH. Bits 4-11 are always zero for 16-bit audio;
-  // two zero nibbles flip phase 8 times total → back to HIGH.
-  // So bits 12-15 always start encoding at phase=true.
-
-  // Bits 12-15: 4-bit LUT lookup (always phase=true start)
-  uint32_t nibble = (raw_subframe >> 12) & 0xF;
-  uint32_t bmc_12_15 = BMC_LUT_4[nibble];
-
-  // Phase tracking via branchless XOR mask:
-  // - 0x0000 means phase=true (use LUT value directly)
-  // - 0xFFFF means phase=false (complement LUT value)
-  // End phase = start XOR (popcount & 1) since zero-bits flip phase,
-  // and for even bit widths: #zeros parity == popcount parity.
-  uint32_t phase_mask = -(__builtin_popcount(nibble) & 1u) & 0xFFFF;
-
-  // Bits 16-23: 8-bit LUT lookup with phase correction
-  uint32_t byte_mid = (raw_subframe >> 16) & 0xFF;
-  uint32_t bmc_16_23 = BMC_LUT_8[byte_mid] ^ phase_mask;
-  phase_mask ^= -(__builtin_popcount(byte_mid) & 1u) & 0xFFFF;
-
-  // Bits 24-31: 8-bit LUT lookup with phase correction
-  uint32_t byte_hi = (raw_subframe >> 24) & 0xFF;
-  uint32_t bmc_24_31 = BMC_LUT_8[byte_hi] ^ phase_mask;
-
-  // ============================================================================
-  // Combine with correct positioning for I2S transmission
-  // ============================================================================
-  // I2S with halfword swap: transmits bits 16-31, then bits 0-15.
-  // Within each halfword, MSB (highest bit) is transmitted first.
-  //
-  // For upper halfword (bits 16-31): bit 31 → bit 16
-  // For lower halfword (bits 0-15):  bit 15 → bit 0
-  //
-  // Desired S/PDIF order: preamble → bmc_4_7 → bmc_8_11 → bmc_12_15
-  //
-  // word[0] layout for correct transmission:
-  //   bits 24-31: preamble        (transmitted 1st, as MSB of upper halfword)
-  //   bits 16-23: BMC_ZERO_NIBBLE (transmitted 2nd, aux bits 4-7)
-  //   bits 8-15:  BMC_ZERO_NIBBLE (transmitted 3rd, aux bits 8-11)
-  //   bits 0-7:   bmc_12_15       (transmitted 4th, audio low nibble)
-  //
-  // word[1] layout:
-  //   bits 16-31: bmc_16_23 (transmitted 5th)
-  //   bits 0-15:  bmc_24_31 (transmitted 6th)
-  this->spdif_block_ptr_[0] =
-      bmc_12_15 | (BMC_ZERO_NIBBLE << 8) | (BMC_ZERO_NIBBLE << 16) | (static_cast<uint32_t>(preamble) << 24);
-  this->spdif_block_ptr_[1] = bmc_24_31 | (bmc_16_23 << 16);
-  this->spdif_block_ptr_ += 2;
-
-  // ============================================================================
-  // Update position tracking
-  // ============================================================================
-  if (!this->is_left_channel_) {
-    // Completed a stereo frame, advance frame counter
-    if (++this->frame_in_block_ >= SPDIF_BLOCK_SAMPLES) {
-      this->frame_in_block_ = 0;
-    }
+template<uint8_t Bps> void SPDIFEncoder::encode_silence_frame_() {
+  static constexpr uint8_t SILENCE[4] = {0, 0, 0, 0};
+  uint32_t raw = build_raw_subframe<Bps>(SILENCE) | c_bit_for_frame(this->channel_status_, this->frame_in_block_);
+  uint8_t preamble_l = (this->frame_in_block_ == 0) ? PREAMBLE_B : PREAMBLE_M;
+  bmc_encode_subframe<Bps>(raw, preamble_l, this->spdif_block_ptr_);
+  bmc_encode_subframe<Bps>(raw, PREAMBLE_W, this->spdif_block_ptr_ + 2);
+  this->spdif_block_ptr_ += 4;
+  if (++this->frame_in_block_ >= SPDIF_BLOCK_SAMPLES) {
+    this->frame_in_block_ = 0;
   }
-  this->is_left_channel_ = !this->is_left_channel_;
 }
 
 esp_err_t SPDIFEncoder::send_block_(TickType_t ticks_to_wait) {
@@ -295,79 +329,162 @@ esp_err_t SPDIFEncoder::send_block_(TickType_t ticks_to_wait) {
   return err;
 }
 
-size_t SPDIFEncoder::get_pending_pcm_bytes() const {
-  if (this->spdif_block_ptr_ == nullptr || this->spdif_block_buf_ == nullptr) {
-    return 0;
+template<uint8_t Bps>
+HOT esp_err_t SPDIFEncoder::write_typed_(const uint8_t *src, size_t size, TickType_t ticks_to_wait,
+                                         uint32_t *blocks_sent, size_t *bytes_consumed) {
+  const uint8_t *pcm_data = src;
+  const uint8_t *const pcm_end = src + size;
+  uint32_t block_count = 0;
+
+  // Hot state lives in locals so the compiler can keep it in registers across the
+  // per-frame encoding work; byte writes through block_ptr may alias the member fields,
+  // which would block register allocation if the encoding read them directly from this->*.
+  uint32_t *block_ptr = this->spdif_block_ptr_;
+  uint32_t *const block_buf = this->spdif_block_buf_.get();
+  uint32_t *const block_end = block_buf + SPDIF_BLOCK_SIZE_U32;
+  uint32_t frame = this->frame_in_block_;
+  const std::array<uint8_t, 24> &channel_status = this->channel_status_;
+
+  auto save_state = [&]() {
+    this->spdif_block_ptr_ = block_ptr;
+    this->frame_in_block_ = static_cast<uint8_t>(frame);
+  };
+
+  auto report_out_params = [&]() {
+    if (blocks_sent != nullptr)
+      *blocks_sent = block_count;
+    if (bytes_consumed != nullptr)
+      *bytes_consumed = pcm_data - src;
+  };
+
+  // Send a completed block if the buffer is full, propagating any error.
+  // send_block_ resets this->spdif_block_ptr_ to block_buf on success and leaves it
+  // unchanged on error -- mirror both behaviors in our local block_ptr.
+  auto maybe_send = [&]() -> esp_err_t {
+    if (block_ptr >= block_end) {
+      esp_err_t err = this->send_block_(ticks_to_wait);
+      if (err != ESP_OK) {
+        save_state();
+        report_out_params();
+        return err;
+      }
+      block_ptr = block_buf;
+      ++block_count;
+    }
+    return ESP_OK;
+  };
+
+  // Hot path: encode L+R pairs in two peeled sub-loops. Frame 0 carries the only
+  // buffer-full check and uses PREAMBLE_B (a block fills exactly when frame wraps from
+  // 191 back to 0). Frames 1..191 use PREAMBLE_M and need no buffer-full check or
+  // preamble branch. The encoding body is inlined here so block_ptr lives in a register
+  // for the duration of the loop.
+  while (pcm_data + 2 * Bps <= pcm_end) {
+    if (frame == 0) {
+      esp_err_t err = maybe_send();
+      if (err != ESP_OK)
+        return err;
+
+      uint32_t c_bit = c_bit_for_frame(channel_status, 0);
+      uint32_t raw_l = build_raw_subframe<Bps>(pcm_data) | c_bit;
+      uint32_t raw_r = build_raw_subframe<Bps>(pcm_data + Bps) | c_bit;
+      bmc_encode_subframe<Bps>(raw_l, PREAMBLE_B, block_ptr);
+      bmc_encode_subframe<Bps>(raw_r, PREAMBLE_W, block_ptr + 2);
+      block_ptr += 4;
+      frame = 1;
+      pcm_data += 2 * Bps;
+    }
+
+    // The inner loop runs until min(SPDIF_BLOCK_SAMPLES, frame + input_frames). The
+    // input-size bound is folded into end_frame so a single `frame < end_frame` test
+    // governs termination.
+    uint32_t input_frames = static_cast<uint32_t>(pcm_end - pcm_data) / (2u * Bps);
+    uint32_t end_frame = SPDIF_BLOCK_SAMPLES;
+    if (frame + input_frames < end_frame)
+      end_frame = frame + input_frames;
+
+    while (frame < end_frame) {
+      uint32_t c_bit = c_bit_for_frame(channel_status, frame);
+      uint32_t raw_l = build_raw_subframe<Bps>(pcm_data) | c_bit;
+      uint32_t raw_r = build_raw_subframe<Bps>(pcm_data + Bps) | c_bit;
+      bmc_encode_subframe<Bps>(raw_l, PREAMBLE_M, block_ptr);
+      bmc_encode_subframe<Bps>(raw_r, PREAMBLE_W, block_ptr + 2);
+      block_ptr += 4;
+      ++frame;
+      pcm_data += 2 * Bps;
+    }
+    if (frame >= SPDIF_BLOCK_SAMPLES)
+      frame = 0;
   }
-  // Each PCM sample (2 bytes) produces 2 uint32_t values in the SPDIF buffer
-  // So pending uint32s / 2 = pending samples, and each sample is 2 bytes
-  size_t pending_uint32s = this->spdif_block_ptr_ - this->spdif_block_buf_.get();
-  size_t pending_samples = pending_uint32s / 2;
-  return pending_samples * 2;  // 2 bytes per sample
+
+  // Send any complete block that was just finished.
+  if (block_ptr >= block_end) {
+    esp_err_t err = this->send_block_(ticks_to_wait);
+    if (err != ESP_OK) {
+      save_state();
+      report_out_params();
+      return err;
+    }
+    block_ptr = block_buf;
+    ++block_count;
+  }
+
+  save_state();
+  report_out_params();
+  return ESP_OK;
 }
 
 HOT esp_err_t SPDIFEncoder::write(const uint8_t *src, size_t size, TickType_t ticks_to_wait, uint32_t *blocks_sent,
                                   size_t *bytes_consumed) {
-  const uint8_t *pcm_data = src;
-  const uint8_t *pcm_end = src + size;
-  uint32_t block_count = 0;
+  if (size > 0) {
+    // Real PCM is about to be encoded into the buffer, so it is no longer a full-silence block.
+    this->block_buf_is_silence_block_ = false;
+  }
+  switch (this->bytes_per_sample_) {
+    case 2:
+      return this->write_typed_<2>(src, size, ticks_to_wait, blocks_sent, bytes_consumed);
+    case 3:
+      return this->write_typed_<3>(src, size, ticks_to_wait, blocks_sent, bytes_consumed);
+    case 4:
+      return this->write_typed_<4>(src, size, ticks_to_wait, blocks_sent, bytes_consumed);
+    default:
+      return ESP_ERR_INVALID_STATE;
+  }
+}
 
-  while (pcm_data < pcm_end) {
-    // Check if there's a pending complete block from a previous failed send
-    if (this->spdif_block_ptr_ >= &this->spdif_block_buf_[SPDIF_BLOCK_SIZE_U32]) {
-      esp_err_t err = this->send_block_(ticks_to_wait);
-      if (err != ESP_OK) {
-        if (blocks_sent != nullptr) {
-          *blocks_sent = block_count;
-        }
-        if (bytes_consumed != nullptr) {
-          *bytes_consumed = pcm_data - src;
-        }
-        return err;
-      }
-      ++block_count;
+template<uint8_t Bps> esp_err_t SPDIFEncoder::flush_with_silence_typed_(TickType_t ticks_to_wait) {
+  // If a complete block is already pending (from a previous failed send), emit just that block.
+  // Otherwise pad the partial block with silence (or generate a full silence block if empty) and
+  // send. Always emits exactly one block on success.
+  if (this->spdif_block_ptr_ < &this->spdif_block_buf_[SPDIF_BLOCK_SIZE_U32]) {
+    const bool was_empty = (this->spdif_block_ptr_ == this->spdif_block_buf_.get());
+    // Continuous-silence idle case: a full silence block is byte-identical every time for the
+    // active channel status, so when the buffer already holds one, re-send it as-is.
+    if (was_empty && this->block_buf_is_silence_block_) {
+      return this->send_block_(ticks_to_wait);
     }
-
-    // Encode one 16-bit sample
-    this->encode_sample_(pcm_data);
-    pcm_data += 2;
-  }
-
-  // Send any complete block that was just finished
-  if (this->spdif_block_ptr_ >= &this->spdif_block_buf_[SPDIF_BLOCK_SIZE_U32]) {
-    esp_err_t err = this->send_block_(ticks_to_wait);
-    if (err != ESP_OK) {
-      if (blocks_sent != nullptr) {
-        *blocks_sent = block_count;
-      }
-      if (bytes_consumed != nullptr) {
-        *bytes_consumed = pcm_data - src;
-      }
-      return err;
+    // Pad with silence frames at the configured width.
+    while (this->spdif_block_ptr_ < &this->spdif_block_buf_[SPDIF_BLOCK_SIZE_U32]) {
+      this->encode_silence_frame_<Bps>();
     }
-    ++block_count;
+    // The buffer is a reusable full-silence block only if it was built entirely from silence; a
+    // partial real-audio block padded out with silence is not.
+    this->block_buf_is_silence_block_ = was_empty;
   }
-
-  if (blocks_sent != nullptr) {
-    *blocks_sent = block_count;
-  }
-  if (bytes_consumed != nullptr) {
-    *bytes_consumed = size;
-  }
-  return ESP_OK;
+  return this->send_block_(ticks_to_wait);
 }
 
 esp_err_t SPDIFEncoder::flush_with_silence(TickType_t ticks_to_wait) {
-  // If a complete block is already pending (from a previous failed send), emit just that block.
-  // Otherwise pad the partial block with silence (or generate a full silence block if empty)
-  // and send. Always emits exactly one block on success.
-  if (this->spdif_block_ptr_ < &this->spdif_block_buf_[SPDIF_BLOCK_SIZE_U32]) {
-    static const uint8_t SILENCE[2] = {0, 0};
-    while (this->spdif_block_ptr_ < &this->spdif_block_buf_[SPDIF_BLOCK_SIZE_U32]) {
-      this->encode_sample_(SILENCE);
-    }
+  switch (this->bytes_per_sample_) {
+    case 2:
+      return this->flush_with_silence_typed_<2>(ticks_to_wait);
+    case 3:
+      return this->flush_with_silence_typed_<3>(ticks_to_wait);
+    case 4:
+      return this->flush_with_silence_typed_<4>(ticks_to_wait);
+    default:
+      return ESP_ERR_INVALID_STATE;
   }
-  return this->send_block_(ticks_to_wait);
 }
 
 }  // namespace esphome::i2s_audio
diff --git a/esphome/components/i2s_audio/speaker/spdif_encoder.h b/esphome/components/i2s_audio/speaker/spdif_encoder.h
index 8c5e0688419..9e23a858f76 100644
--- a/esphome/components/i2s_audio/speaker/spdif_encoder.h
+++ b/esphome/components/i2s_audio/speaker/spdif_encoder.h
@@ -24,8 +24,6 @@ static constexpr uint16_t SPDIF_BLOCK_SIZE_BYTES = SPDIF_BLOCK_SAMPLES * (EMULAT
 static constexpr uint32_t SPDIF_BLOCK_SIZE_U32 = SPDIF_BLOCK_SIZE_BYTES / sizeof(uint32_t);  // 3072 bytes / 4 = 768
 // I2S frame count for one SPDIF block (for new driver where frame = 8 bytes for 32-bit stereo)
 static constexpr uint32_t SPDIF_BLOCK_I2S_FRAMES = SPDIF_BLOCK_SIZE_BYTES / 8;  // 3072 / 8 = 384 frames
-// PCM bytes needed for one complete SPDIF block (192 stereo frames * 2 bytes per sample * 2 channels)
-static constexpr uint16_t SPDIF_PCM_BYTES_PER_BLOCK = SPDIF_BLOCK_SAMPLES * 2 * 2;  // = 768 bytes
 
 /// Callback signature for block completion (raw function pointer for minimal overhead)
 /// @param user_ctx User context pointer passed during callback registration
@@ -64,8 +62,16 @@ class SPDIFEncoder {
   /// @brief Check if currently in preload mode
   bool is_preload_mode() const { return this->preload_mode_; }
 
+  /// @brief Set input PCM width: 2 = 16-bit, 3 = 24-bit, 4 = 32-bit (truncated to 24-bit on the wire).
+  /// Must be called before write() if input width changes from the default (16-bit). Triggers a
+  /// channel-status rebuild to reflect the new word length.
+  void set_bytes_per_sample(uint8_t bytes_per_sample);
+
+  /// @brief Get the configured input PCM width in bytes per sample
+  uint8_t get_bytes_per_sample() const { return this->bytes_per_sample_; }
+
   /// @brief Convert PCM audio data to SPDIF BMC encoded data
-  /// @param src Source PCM audio data (16-bit stereo)
+  /// @param src Source PCM audio data (stereo, width matches set_bytes_per_sample)
   /// @param size Size of source data in bytes
   /// @param ticks_to_wait Timeout for blocking writes
   /// @param blocks_sent Optional pointer to receive the number of complete SPDIF blocks sent
@@ -74,17 +80,6 @@ class SPDIFEncoder {
   esp_err_t write(const uint8_t *src, size_t size, TickType_t ticks_to_wait, uint32_t *blocks_sent = nullptr,
                   size_t *bytes_consumed = nullptr);
 
-  /// @brief Get the number of PCM bytes currently pending in the partial block buffer
-  /// @return Number of pending PCM bytes (0 to SPDIF_PCM_BYTES_PER_BLOCK - 1)
-  size_t get_pending_pcm_bytes() const;
-
-  /// @brief Get the number of PCM frames currently pending in the partial block buffer
-  /// @return Number of pending PCM frames (0 to SPDIF_BLOCK_SAMPLES - 1)
-  uint32_t get_pending_frames() const { return this->get_pending_pcm_bytes() / 4; }
-
-  /// @brief Check if there is a partial block pending
-  bool has_pending_data() const { return this->spdif_block_ptr_ != this->spdif_block_buf_.get(); }
-
   /// @brief Emit one complete SPDIF block: pad any pending partial block with silence and send,
   /// or send a full silence block if nothing is pending. Always produces exactly one block on success.
   /// @param ticks_to_wait Timeout for blocking writes
@@ -95,7 +90,7 @@ class SPDIFEncoder {
   void reset();
 
   /// @brief Set the sample rate for Channel Status Block encoding
-  /// @param sample_rate Sample rate in Hz (e.g., 44100, 48000, 96000)
+  /// @param sample_rate Sample rate in Hz (e.g., 44100, 48000)
   /// Call this before writing audio data to ensure correct channel status.
   void set_sample_rate(uint32_t sample_rate);
 
@@ -103,8 +98,19 @@ class SPDIFEncoder {
   uint32_t get_sample_rate() const { return this->sample_rate_; }
 
  protected:
-  /// @brief Encode a single 16-bit PCM sample into the current block position
-  HOT void encode_sample_(const uint8_t *pcm_sample);
+  /// @brief Encode a single stereo silence frame at the current block position.
+  /// @note Used only by flush_with_silence_typed_ to pad; the hot write path inlines the
+  /// encoding body directly into write_typed_ to keep block_ptr / frame_in_block_ in registers.
+  template<uint8_t Bps> void encode_silence_frame_();
+
+  /// @brief Templated write loop. Called from the public write() via runtime dispatch on bytes_per_sample_.
+  template<uint8_t Bps>
+  HOT esp_err_t write_typed_(const uint8_t *src, size_t size, TickType_t ticks_to_wait, uint32_t *blocks_sent,
+                             size_t *bytes_consumed);
+
+  /// @brief Templated flush-with-silence. Pads the pending block with zeros at the configured width
+  /// (or builds a full silence block when nothing is pending) and sends it. Always emits one block.
+  template<uint8_t Bps> esp_err_t flush_with_silence_typed_(TickType_t ticks_to_wait);
 
   /// @brief Send the completed block via the appropriate callback
   esp_err_t send_block_(TickType_t ticks_to_wait);
@@ -112,15 +118,6 @@ class SPDIFEncoder {
   /// @brief Build the channel status block from current configuration
   void build_channel_status_();
 
-  /// @brief Get the channel status bit for a specific frame
-  /// @param frame Frame number (0-191)
-  /// @return The C bit value for this frame
-  ESPHOME_ALWAYS_INLINE inline bool get_channel_status_bit_(uint8_t frame) const {
-    // Channel status is 192 bits transmitted over 192 frames
-    // Bit N is transmitted in frame N, LSB-first within each byte
-    return (this->channel_status_[frame >> 3] >> (frame & 7)) & 1;
-  }
-
   // Member ordering optimized to minimize padding (largest alignment first)
 
   // 4-byte aligned members (pointers and uint32_t)
@@ -133,9 +130,13 @@ class SPDIFEncoder {
   uint32_t sample_rate_{48000};                  // Sample rate for Channel Status Block encoding
 
   // 1-byte aligned members (grouped together to avoid internal padding)
-  uint8_t frame_in_block_{0};   // 0-191, tracks stereo frame position within block
-  bool is_left_channel_{true};  // Alternates L/R for stereo samples
-  bool preload_mode_{false};    // Whether to use preload callback vs write callback
+  uint8_t bytes_per_sample_{2};  // Input PCM width: 2/3/4 (16/24/32-bit). 32-bit truncates to 24-bit on the wire.
+  uint8_t frame_in_block_{0};    // 0-191, tracks stereo frame position within block
+  bool preload_mode_{false};     // Whether to use preload callback vs write callback
+  // True when spdif_block_buf_ currently holds a complete full-silence block valid for the active
+  // channel status. A full silence block is deterministic for a given sample rate and word length,
+  // so when this is set flush_with_silence() can re-send the buffer verbatim instead of re-encoding.
+  bool block_buf_is_silence_block_{false};
 
   // Channel Status Block (192 bits = 24 bytes, transmitted over 192 frames)
   // Placed last since std::array<uint8_t> has 1-byte alignment
diff --git a/tests/components/speaker/spdif_mode.esp32-idf.yaml b/tests/components/i2s_audio/common-spdif_mode.yaml
similarity index 52%
rename from tests/components/speaker/spdif_mode.esp32-idf.yaml
rename to tests/components/i2s_audio/common-spdif_mode.yaml
index 4d6859feaed..374a4bce1e3 100644
--- a/tests/components/speaker/spdif_mode.esp32-idf.yaml
+++ b/tests/components/i2s_audio/common-spdif_mode.yaml
@@ -1,13 +1,3 @@
-substitutions:
-  i2s_bclk_pin: GPIO27
-  i2s_lrclk_pin: GPIO26
-  i2s_mclk_pin: GPIO25
-  i2s_dout_pin: GPIO12
-  spdif_data_pin: GPIO4
-
-packages:
-  i2c: !include ../../test_build_components/common/i2c/esp32-idf.yaml
-
 i2s_audio:
   - id: i2s_output
 
@@ -20,6 +10,5 @@ speaker:
     use_apll: true
     timeout: 2s
     sample_rate: 48000
-    bits_per_sample: 16bit
     channel: stereo
     i2s_mode: primary
diff --git a/tests/components/i2s_audio/test-spdif_speaker.esp32-idf.yaml b/tests/components/i2s_audio/test-spdif_speaker.esp32-idf.yaml
new file mode 100644
index 00000000000..a69d808d1db
--- /dev/null
+++ b/tests/components/i2s_audio/test-spdif_speaker.esp32-idf.yaml
@@ -0,0 +1,8 @@
+substitutions:
+  i2s_bclk_pin: GPIO27
+  i2s_lrclk_pin: GPIO26
+  i2s_mclk_pin: GPIO25
+  i2s_dout_pin: GPIO12
+  spdif_data_pin: GPIO4
+
+<<: !include common-spdif_mode.yaml