From 2886cb2f9696fe0178b9b88badd9cd5ec146cf46 Mon Sep 17 00:00:00 2001 From: Kevin Ahrendt Date: Fri, 8 May 2026 11:55:45 +0000 Subject: [PATCH] Add a RingBufferAudioSource class that works like a transfer buffer but directly accesses ring buffer memory (except for a small hold buffer to handle split up samples/frames) --- .../audio/audio_transfer_buffer.cpp | 122 ++++++++++++++++++ .../components/audio/audio_transfer_buffer.h | 69 ++++++++++ 2 files changed, 191 insertions(+) diff --git a/esphome/components/audio/audio_transfer_buffer.cpp b/esphome/components/audio/audio_transfer_buffer.cpp index 6ee9e4d28c..62aa161c2f 100644 --- a/esphome/components/audio/audio_transfer_buffer.cpp +++ b/esphome/components/audio/audio_transfer_buffer.cpp @@ -207,6 +207,128 @@ void ConstAudioSourceBuffer::consume(size_t bytes) { this->data_start_ += bytes; } +std::unique_ptr RingBufferAudioSource::create( + std::shared_ptr ring_buffer, size_t max_fill_bytes, uint8_t alignment_bytes) { + if (ring_buffer == nullptr || max_fill_bytes == 0 || alignment_bytes == 0 || alignment_bytes > MAX_ALIGNMENT_BYTES) { + return nullptr; + } + return make_unique(std::move(ring_buffer), max_fill_bytes, alignment_bytes); +} + +RingBufferAudioSource::~RingBufferAudioSource() { + if (this->acquired_item_ != nullptr) { + this->ring_buffer_->receive_release(this->acquired_item_); + this->acquired_item_ = nullptr; + } +} + +void RingBufferAudioSource::release_item_() { + if (this->acquired_item_ == nullptr) { + return; + } + if (this->item_trailing_length_ > 0) { + // Copy the trailing sub-frame bytes into the splice buffer before returning the item; the next + // fill() will complete the frame from the head of the next chunk. + std::memcpy(this->splice_buffer_, this->item_trailing_ptr_, this->item_trailing_length_); + this->splice_length_ = this->item_trailing_length_; + this->item_trailing_ptr_ = nullptr; + this->item_trailing_length_ = 0; + } + this->ring_buffer_->receive_release(this->acquired_item_); + this->acquired_item_ = nullptr; +} + +void RingBufferAudioSource::consume(size_t bytes) { + bytes = std::min(bytes, this->current_available_); + this->current_data_ += bytes; + this->current_available_ -= bytes; + // Release of the held item and promotion of queued data are deferred to fill() so callers see new + // data as a fresh return value rather than appearing silently after consume(). +} + +bool RingBufferAudioSource::has_buffered_data() const { + return (this->current_available_ > 0) || (this->queued_length_ > 0) || (this->splice_length_ > 0) || + (this->ring_buffer_->available() > 0); +} + +size_t RingBufferAudioSource::fill(TickType_t ticks_to_wait, bool pre_shift) { + if (this->current_available_ > 0) { + // Caller has not finished consuming the current exposure + return 0; + } + + // If a queued region (the aligned remainder of the new chunk after a splice frame) is waiting, + // promote it to the exposed region and report its size as fresh data. + if (this->queued_length_ > 0) { + this->current_data_ = this->queued_data_; + this->current_available_ = this->queued_length_; + this->queued_data_ = nullptr; + this->queued_length_ = 0; + return this->current_available_; + } + + // Nothing exposed and nothing queued: release the previously held item (saving any sub-frame tail + // to splice_buffer_) and acquire a new chunk. + this->release_item_(); + + size_t chunk_length = 0; + void *item = this->ring_buffer_->receive_acquire(chunk_length, this->max_fill_bytes_, ticks_to_wait); + if (item == nullptr) { + return 0; + } + + uint8_t *chunk_data = static_cast(item); + bool exposing_splice_frame = false; + + // Complete any pending splice frame from the head of the new chunk. + if (this->splice_length_ > 0) { + const size_t needed = static_cast(this->alignment_bytes_) - this->splice_length_; + if (chunk_length < needed) { + // Not enough data to complete the spliced frame yet; absorb everything and wait for more. + std::memcpy(this->splice_buffer_ + this->splice_length_, chunk_data, chunk_length); + this->splice_length_ += chunk_length; + this->ring_buffer_->receive_release(item); + return 0; + } + std::memcpy(this->splice_buffer_ + this->splice_length_, chunk_data, needed); + chunk_data += needed; + chunk_length -= needed; + this->splice_length_ = 0; + exposing_splice_frame = true; + } + + this->acquired_item_ = item; + + // Split the remaining chunk into its aligned region and a (possibly zero) sub-frame trailing tail. + const size_t trailing = (this->alignment_bytes_ > 1) ? (chunk_length % this->alignment_bytes_) : 0; + const size_t aligned_bytes = chunk_length - trailing; + if (trailing > 0) { + this->item_trailing_ptr_ = chunk_data + aligned_bytes; + this->item_trailing_length_ = trailing; + } + + if (exposing_splice_frame) { + // Expose the spliced frame from splice_buffer_, queuing the chunk's aligned region for the next + // fill() call. + this->current_data_ = this->splice_buffer_; + this->current_available_ = this->alignment_bytes_; + this->queued_data_ = chunk_data; + this->queued_length_ = aligned_bytes; + return this->alignment_bytes_; + } + + if (aligned_bytes == 0) { + // The entire chunk is a sub-frame tail (only possible when alignment exceeds chunk size). Save it + // to the splice buffer and release the item so the next fill() can complete the frame. + this->release_item_(); + return 0; + } + + this->current_data_ = chunk_data; + this->current_available_ = aligned_bytes; + return aligned_bytes; +} + } // namespace esphome::audio #endif diff --git a/esphome/components/audio/audio_transfer_buffer.h b/esphome/components/audio/audio_transfer_buffer.h index 68151bf4e2..1b599c4f64 100644 --- a/esphome/components/audio/audio_transfer_buffer.h +++ b/esphome/components/audio/audio_transfer_buffer.h @@ -214,6 +214,75 @@ class ConstAudioSourceBuffer : public AudioReadableBuffer { size_t length_{0}; }; +/// @brief Zero-copy audio source that reads directly from a ring buffer's internal storage. +/// +/// Optionally enforces a minimum read alignment (e.g. one audio frame). When alignment_bytes > 1, the +/// source transparently stitches frames that straddle the ring buffer's wrap boundary by buffering the +/// trailing partial frame from one chunk and joining it with the head of the next chunk in a small +/// internal splice buffer, so callers always see frame-aligned data. +class RingBufferAudioSource : public AudioReadableBuffer { + public: + /// Maximum supported alignment. Sized to cover 32-bit samples across up to 2 channels (8 bytes). + static constexpr size_t MAX_ALIGNMENT_BYTES = 8; + + /// @brief Creates a new ring-buffer-backed audio source after validating its parameters. + /// @param ring_buffer The ring buffer to read from. Must be non-null. + /// @param max_fill_bytes Soft cap on bytes acquired per fill() call. Must be > 0. + /// @param alignment_bytes Minimum exposed-region alignment in bytes (defaults to 1, i.e. byte-aligned). + /// Pass bytes_per_frame to make every exposed region a whole number of frames. Must be in + /// [1, MAX_ALIGNMENT_BYTES]. + /// @return unique_ptr if parameters are valid, nullptr otherwise + static std::unique_ptr create(std::shared_ptr ring_buffer, + size_t max_fill_bytes, uint8_t alignment_bytes = 1); + + explicit RingBufferAudioSource(std::shared_ptr ring_buffer, size_t max_fill_bytes, + uint8_t alignment_bytes) + : ring_buffer_(std::move(ring_buffer)), max_fill_bytes_(max_fill_bytes), alignment_bytes_(alignment_bytes) {} + + ~RingBufferAudioSource() override; + + // AudioReadableBuffer interface + const uint8_t *data() const override { return this->current_data_; } + size_t available() const override { return this->current_available_; } + size_t free() const override { return 0; } + void consume(size_t bytes) override; + bool has_buffered_data() const override; + size_t fill(TickType_t ticks_to_wait, bool pre_shift) override; + + /// @brief Returns a mutable pointer to the acquired ring buffer data. + /// Use only when the caller is the sole consumer and data will be discarded after use. + uint8_t *mutable_data() { return this->current_data_; } + + protected: + /// @brief Releases the currently held ring buffer item, first copying any trailing sub-frame bytes + /// into the splice buffer so they can be stitched with the next chunk. + void release_item_(); + + std::shared_ptr ring_buffer_; + size_t max_fill_bytes_; + + void *acquired_item_{nullptr}; + uint8_t *current_data_{nullptr}; + + // Sub-frame trailing bytes inside the held item that will be copied to splice_buffer_ on release. + uint8_t *item_trailing_ptr_{nullptr}; + + // After the currently-exposed splice frame is consumed, fill() will promote this region (the aligned + // remainder of the new chunk) to the exposed region. queued_length_ == 0 when nothing is queued. + uint8_t *queued_data_{nullptr}; + + // Splice buffer holds the start of a partial frame whose remainder lives at the head of the next + // chunk. While splice_length_ > 0, the buffer is incomplete and waiting for completion bytes. + uint8_t splice_buffer_[MAX_ALIGNMENT_BYTES]; + + size_t current_available_{0}; + size_t item_trailing_length_{0}; + size_t queued_length_{0}; + size_t splice_length_{0}; + + uint8_t alignment_bytes_; +}; + } // namespace esphome::audio #endif