mirror of
https://github.com/esphome/esphome.git
synced 2026-06-02 11:08:06 +08:00
[api][voice_assistant] Add second audio channel for voice_assistant (#16265)
Co-authored-by: Kevin Ahrendt <kevin.ahrendt@openhomefoundation.org> Co-authored-by: Jesse Hills <3060199+jesserockz@users.noreply.github.com>
This commit is contained in:
@@ -2026,6 +2026,7 @@ message VoiceAssistantAudio {
|
|||||||
|
|
||||||
bytes data = 1 [(pointer_to_buffer) = true];
|
bytes data = 1 [(pointer_to_buffer) = true];
|
||||||
bool end = 2;
|
bool end = 2;
|
||||||
|
bytes data2 = 3 [(pointer_to_buffer) = true];
|
||||||
}
|
}
|
||||||
|
|
||||||
enum VoiceAssistantTimerEvent {
|
enum VoiceAssistantTimerEvent {
|
||||||
|
|||||||
@@ -2893,6 +2893,11 @@ bool VoiceAssistantAudio::decode_length(uint32_t field_id, ProtoLengthDelimited
|
|||||||
this->data_len = value.size();
|
this->data_len = value.size();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case 3: {
|
||||||
|
this->data2 = value.data();
|
||||||
|
this->data2_len = value.size();
|
||||||
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -2902,12 +2907,14 @@ uint8_t *VoiceAssistantAudio::encode(ProtoWriteBuffer &buffer PROTO_ENCODE_DEBUG
|
|||||||
uint8_t *__restrict__ pos = buffer.get_pos();
|
uint8_t *__restrict__ pos = buffer.get_pos();
|
||||||
ProtoEncode::encode_bytes(pos PROTO_ENCODE_DEBUG_ARG, 1, this->data, this->data_len);
|
ProtoEncode::encode_bytes(pos PROTO_ENCODE_DEBUG_ARG, 1, this->data, this->data_len);
|
||||||
ProtoEncode::encode_bool(pos PROTO_ENCODE_DEBUG_ARG, 2, this->end);
|
ProtoEncode::encode_bool(pos PROTO_ENCODE_DEBUG_ARG, 2, this->end);
|
||||||
|
ProtoEncode::encode_bytes(pos PROTO_ENCODE_DEBUG_ARG, 3, this->data2, this->data2_len);
|
||||||
return pos;
|
return pos;
|
||||||
}
|
}
|
||||||
uint32_t VoiceAssistantAudio::calculate_size() const {
|
uint32_t VoiceAssistantAudio::calculate_size() const {
|
||||||
uint32_t size = 0;
|
uint32_t size = 0;
|
||||||
size += ProtoSize::calc_length(1, this->data_len);
|
size += ProtoSize::calc_length(1, this->data_len);
|
||||||
size += ProtoSize::calc_bool(1, this->end);
|
size += ProtoSize::calc_bool(1, this->end);
|
||||||
|
size += ProtoSize::calc_length(1, this->data2_len);
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
bool VoiceAssistantTimerEventResponse::decode_varint(uint32_t field_id, proto_varint_value_t value) {
|
bool VoiceAssistantTimerEventResponse::decode_varint(uint32_t field_id, proto_varint_value_t value) {
|
||||||
|
|||||||
@@ -2436,13 +2436,15 @@ class VoiceAssistantEventResponse final : public ProtoDecodableMessage {
|
|||||||
class VoiceAssistantAudio final : public ProtoDecodableMessage {
|
class VoiceAssistantAudio final : public ProtoDecodableMessage {
|
||||||
public:
|
public:
|
||||||
static constexpr uint8_t MESSAGE_TYPE = 106;
|
static constexpr uint8_t MESSAGE_TYPE = 106;
|
||||||
static constexpr uint8_t ESTIMATED_SIZE = 21;
|
static constexpr uint8_t ESTIMATED_SIZE = 40;
|
||||||
#ifdef HAS_PROTO_MESSAGE_DUMP
|
#ifdef HAS_PROTO_MESSAGE_DUMP
|
||||||
const LogString *message_name() const override { return LOG_STR("voice_assistant_audio"); }
|
const LogString *message_name() const override { return LOG_STR("voice_assistant_audio"); }
|
||||||
#endif
|
#endif
|
||||||
const uint8_t *data{nullptr};
|
const uint8_t *data{nullptr};
|
||||||
uint16_t data_len{0};
|
uint16_t data_len{0};
|
||||||
bool end{false};
|
bool end{false};
|
||||||
|
const uint8_t *data2{nullptr};
|
||||||
|
uint16_t data2_len{0};
|
||||||
uint8_t *encode(ProtoWriteBuffer &buffer PROTO_ENCODE_DEBUG_PARAM) const;
|
uint8_t *encode(ProtoWriteBuffer &buffer PROTO_ENCODE_DEBUG_PARAM) const;
|
||||||
uint32_t calculate_size() const;
|
uint32_t calculate_size() const;
|
||||||
#ifdef HAS_PROTO_MESSAGE_DUMP
|
#ifdef HAS_PROTO_MESSAGE_DUMP
|
||||||
|
|||||||
@@ -2174,6 +2174,7 @@ const char *VoiceAssistantAudio::dump_to(DumpBuffer &out) const {
|
|||||||
MessageDumpHelper helper(out, ESPHOME_PSTR("VoiceAssistantAudio"));
|
MessageDumpHelper helper(out, ESPHOME_PSTR("VoiceAssistantAudio"));
|
||||||
dump_bytes_field(out, ESPHOME_PSTR("data"), this->data, this->data_len);
|
dump_bytes_field(out, ESPHOME_PSTR("data"), this->data, this->data_len);
|
||||||
dump_field(out, ESPHOME_PSTR("end"), this->end);
|
dump_field(out, ESPHOME_PSTR("end"), this->end);
|
||||||
|
dump_bytes_field(out, ESPHOME_PSTR("data2"), this->data2, this->data2_len);
|
||||||
return out.c_str();
|
return out.c_str();
|
||||||
}
|
}
|
||||||
const char *VoiceAssistantTimerEventResponse::dump_to(DumpBuffer &out) const {
|
const char *VoiceAssistantTimerEventResponse::dump_to(DumpBuffer &out) const {
|
||||||
|
|||||||
@@ -53,6 +53,8 @@ CONF_ON_TIMER_CANCELLED = "on_timer_cancelled"
|
|||||||
CONF_ON_TIMER_FINISHED = "on_timer_finished"
|
CONF_ON_TIMER_FINISHED = "on_timer_finished"
|
||||||
CONF_ON_TIMER_TICK = "on_timer_tick"
|
CONF_ON_TIMER_TICK = "on_timer_tick"
|
||||||
|
|
||||||
|
MAX_MICROPHONE_SOURCES = 2
|
||||||
|
|
||||||
|
|
||||||
voice_assistant_ns = cg.esphome_ns.namespace("voice_assistant")
|
voice_assistant_ns = cg.esphome_ns.namespace("voice_assistant")
|
||||||
VoiceAssistant = voice_assistant_ns.class_("VoiceAssistant", cg.Component)
|
VoiceAssistant = voice_assistant_ns.class_("VoiceAssistant", cg.Component)
|
||||||
@@ -90,13 +92,20 @@ CONFIG_SCHEMA = cv.All(
|
|||||||
cv.Schema(
|
cv.Schema(
|
||||||
{
|
{
|
||||||
cv.GenerateID(): cv.declare_id(VoiceAssistant),
|
cv.GenerateID(): cv.declare_id(VoiceAssistant),
|
||||||
cv.Optional(
|
cv.Optional(CONF_MICROPHONE, default=[{}]): cv.All(
|
||||||
CONF_MICROPHONE, default={}
|
cv.ensure_list(
|
||||||
): microphone.microphone_source_schema(
|
microphone.microphone_source_schema(
|
||||||
min_bits_per_sample=16,
|
min_bits_per_sample=16,
|
||||||
max_bits_per_sample=16,
|
max_bits_per_sample=16,
|
||||||
min_channels=1,
|
min_channels=1,
|
||||||
max_channels=1,
|
max_channels=1,
|
||||||
|
)
|
||||||
|
),
|
||||||
|
cv.Length(
|
||||||
|
min=1,
|
||||||
|
max=MAX_MICROPHONE_SOURCES,
|
||||||
|
msg=f"Voice Assistant supports at most {MAX_MICROPHONE_SOURCES} microphone sources",
|
||||||
|
),
|
||||||
),
|
),
|
||||||
cv.Exclusive(CONF_MEDIA_PLAYER, "output"): cv.use_id(
|
cv.Exclusive(CONF_MEDIA_PLAYER, "output"): cv.use_id(
|
||||||
media_player.MediaPlayer
|
media_player.MediaPlayer
|
||||||
@@ -179,10 +188,10 @@ CONFIG_SCHEMA = cv.All(
|
|||||||
FINAL_VALIDATE_SCHEMA = cv.All(
|
FINAL_VALIDATE_SCHEMA = cv.All(
|
||||||
cv.Schema(
|
cv.Schema(
|
||||||
{
|
{
|
||||||
cv.Optional(
|
cv.Optional(CONF_MICROPHONE): cv.ensure_list(
|
||||||
CONF_MICROPHONE
|
microphone.final_validate_microphone_source_schema(
|
||||||
): microphone.final_validate_microphone_source_schema(
|
"voice_assistant", sample_rate=16000
|
||||||
"voice_assistant", sample_rate=16000
|
)
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
extra=cv.ALLOW_EXTRA,
|
extra=cv.ALLOW_EXTRA,
|
||||||
@@ -194,9 +203,14 @@ async def to_code(config):
|
|||||||
var = cg.new_Pvariable(config[CONF_ID])
|
var = cg.new_Pvariable(config[CONF_ID])
|
||||||
await cg.register_component(var, config)
|
await cg.register_component(var, config)
|
||||||
|
|
||||||
mic_source = await microphone.microphone_source_to_code(config[CONF_MICROPHONE])
|
mic_sources = config[CONF_MICROPHONE]
|
||||||
|
mic_source = await microphone.microphone_source_to_code(mic_sources[0])
|
||||||
cg.add(var.set_microphone_source(mic_source))
|
cg.add(var.set_microphone_source(mic_source))
|
||||||
|
|
||||||
|
if len(mic_sources) > 1:
|
||||||
|
mic_source2 = await microphone.microphone_source_to_code(mic_sources[1])
|
||||||
|
cg.add(var.set_microphone_source2(mic_source2))
|
||||||
|
|
||||||
if CONF_MICRO_WAKE_WORD in config:
|
if CONF_MICRO_WAKE_WORD in config:
|
||||||
mww = await cg.get_variable(config[CONF_MICRO_WAKE_WORD])
|
mww = await cg.get_variable(config[CONF_MICRO_WAKE_WORD])
|
||||||
cg.add(var.set_micro_wake_word(mww))
|
cg.add(var.set_micro_wake_word(mww))
|
||||||
|
|||||||
@@ -31,11 +31,21 @@ VoiceAssistant::VoiceAssistant() { global_voice_assistant = this; }
|
|||||||
void VoiceAssistant::setup() {
|
void VoiceAssistant::setup() {
|
||||||
this->mic_source_->add_data_callback([this](const std::vector<uint8_t> &data) {
|
this->mic_source_->add_data_callback([this](const std::vector<uint8_t> &data) {
|
||||||
std::shared_ptr<ring_buffer::RingBuffer> temp_ring_buffer = this->ring_buffer_;
|
std::shared_ptr<ring_buffer::RingBuffer> temp_ring_buffer = this->ring_buffer_;
|
||||||
if (this->ring_buffer_.use_count() > 1) {
|
if (temp_ring_buffer != nullptr) {
|
||||||
temp_ring_buffer->write((void *) data.data(), data.size());
|
temp_ring_buffer->write((void *) data.data(), data.size());
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Second microphone channel
|
||||||
|
if (this->mic_source2_ != nullptr) {
|
||||||
|
this->mic_source2_->add_data_callback([this](const std::vector<uint8_t> &data) {
|
||||||
|
std::shared_ptr<ring_buffer::RingBuffer> temp_ring_buffer = this->ring_buffer2_;
|
||||||
|
if (temp_ring_buffer != nullptr) {
|
||||||
|
temp_ring_buffer->write((void *) data.data(), data.size());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef USE_MEDIA_PLAYER
|
#ifdef USE_MEDIA_PLAYER
|
||||||
if (this->media_player_ != nullptr) {
|
if (this->media_player_ != nullptr) {
|
||||||
this->media_player_->add_on_state_callback([this](media_player::MediaPlayerState state) {
|
this->media_player_->add_on_state_callback([this](media_player::MediaPlayerState state) {
|
||||||
@@ -115,9 +125,9 @@ bool VoiceAssistant::allocate_buffers_() {
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (this->ring_buffer_.use_count() == 0) {
|
if (this->ring_buffer_ == nullptr) {
|
||||||
this->ring_buffer_ = ring_buffer::RingBuffer::create(RING_BUFFER_SIZE);
|
this->ring_buffer_ = ring_buffer::RingBuffer::create(RING_BUFFER_SIZE);
|
||||||
if (this->ring_buffer_.use_count() == 0) {
|
if (this->ring_buffer_ == nullptr) {
|
||||||
ESP_LOGE(TAG, "Could not allocate ring buffer");
|
ESP_LOGE(TAG, "Could not allocate ring buffer");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -132,6 +142,26 @@ bool VoiceAssistant::allocate_buffers_() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Second microphone channel
|
||||||
|
if (this->mic_source2_ != nullptr) {
|
||||||
|
if (this->ring_buffer2_ == nullptr) {
|
||||||
|
this->ring_buffer2_ = ring_buffer::RingBuffer::create(RING_BUFFER_SIZE);
|
||||||
|
if (this->ring_buffer2_ == nullptr) {
|
||||||
|
ESP_LOGE(TAG, "Could not allocate second ring buffer");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this->send_buffer2_ == nullptr) {
|
||||||
|
RAMAllocator<uint8_t> send_allocator;
|
||||||
|
this->send_buffer2_ = send_allocator.allocate(SEND_BUFFER_SIZE);
|
||||||
|
if (this->send_buffer2_ == nullptr) {
|
||||||
|
ESP_LOGW(TAG, "Could not allocate second send buffer");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -144,6 +174,15 @@ void VoiceAssistant::clear_buffers_() {
|
|||||||
this->ring_buffer_->reset();
|
this->ring_buffer_->reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Second microphone channel
|
||||||
|
if (this->send_buffer2_ != nullptr) {
|
||||||
|
memset(this->send_buffer2_, 0, SEND_BUFFER_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this->ring_buffer2_ != nullptr) {
|
||||||
|
this->ring_buffer2_->reset();
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef USE_SPEAKER
|
#ifdef USE_SPEAKER
|
||||||
if ((this->speaker_ != nullptr) && (this->speaker_buffer_ != nullptr)) {
|
if ((this->speaker_ != nullptr) && (this->speaker_buffer_ != nullptr)) {
|
||||||
memset(this->speaker_buffer_, 0, SPEAKER_BUFFER_SIZE);
|
memset(this->speaker_buffer_, 0, SPEAKER_BUFFER_SIZE);
|
||||||
@@ -162,10 +201,17 @@ void VoiceAssistant::deallocate_buffers_() {
|
|||||||
this->send_buffer_ = nullptr;
|
this->send_buffer_ = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this->ring_buffer_.use_count() > 0) {
|
this->ring_buffer_.reset();
|
||||||
this->ring_buffer_.reset();
|
|
||||||
|
// Second microphone channel
|
||||||
|
if (this->send_buffer2_ != nullptr) {
|
||||||
|
RAMAllocator<uint8_t> send_deallocator;
|
||||||
|
send_deallocator.deallocate(this->send_buffer2_, SEND_BUFFER_SIZE);
|
||||||
|
this->send_buffer2_ = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
this->ring_buffer2_.reset();
|
||||||
|
|
||||||
#ifdef USE_SPEAKER
|
#ifdef USE_SPEAKER
|
||||||
if ((this->speaker_ != nullptr) && (this->speaker_buffer_ != nullptr)) {
|
if ((this->speaker_ != nullptr) && (this->speaker_buffer_ != nullptr)) {
|
||||||
RAMAllocator<uint8_t> speaker_deallocator;
|
RAMAllocator<uint8_t> speaker_deallocator;
|
||||||
@@ -183,7 +229,8 @@ void VoiceAssistant::reset_conversation_id() {
|
|||||||
void VoiceAssistant::loop() {
|
void VoiceAssistant::loop() {
|
||||||
if (this->api_client_ == nullptr && this->state_ != State::IDLE && this->state_ != State::STOP_MICROPHONE &&
|
if (this->api_client_ == nullptr && this->state_ != State::IDLE && this->state_ != State::STOP_MICROPHONE &&
|
||||||
this->state_ != State::STOPPING_MICROPHONE) {
|
this->state_ != State::STOPPING_MICROPHONE) {
|
||||||
if (this->mic_source_->is_running() || this->state_ == State::STARTING_MICROPHONE) {
|
if (this->mic_source_->is_running() || (this->mic_source2_ && this->mic_source2_->is_running()) ||
|
||||||
|
this->state_ == State::STARTING_MICROPHONE) {
|
||||||
this->set_state_(State::STOP_MICROPHONE, State::IDLE);
|
this->set_state_(State::STOP_MICROPHONE, State::IDLE);
|
||||||
} else {
|
} else {
|
||||||
this->set_state_(State::IDLE, State::IDLE);
|
this->set_state_(State::IDLE, State::IDLE);
|
||||||
@@ -215,11 +262,14 @@ void VoiceAssistant::loop() {
|
|||||||
this->clear_buffers_();
|
this->clear_buffers_();
|
||||||
|
|
||||||
this->mic_source_->start();
|
this->mic_source_->start();
|
||||||
|
if (this->mic_source2_) {
|
||||||
|
this->mic_source2_->start();
|
||||||
|
}
|
||||||
this->set_state_(State::STARTING_MICROPHONE);
|
this->set_state_(State::STARTING_MICROPHONE);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case State::STARTING_MICROPHONE: {
|
case State::STARTING_MICROPHONE: {
|
||||||
if (this->mic_source_->is_running()) {
|
if (this->mic_source_->is_running() && (!this->mic_source2_ || this->mic_source2_->is_running())) {
|
||||||
this->set_state_(this->desired_state_);
|
this->set_state_(this->desired_state_);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@@ -266,15 +316,44 @@ void VoiceAssistant::loop() {
|
|||||||
break; // State changed when udp server port received
|
break; // State changed when udp server port received
|
||||||
}
|
}
|
||||||
case State::STREAMING_MICROPHONE: {
|
case State::STREAMING_MICROPHONE: {
|
||||||
size_t available = this->ring_buffer_->available();
|
if (this->audio_mode_ == AUDIO_MODE_API) {
|
||||||
while (available >= SEND_BUFFER_SIZE) {
|
// API audio
|
||||||
size_t read_bytes = this->ring_buffer_->read((void *) this->send_buffer_, SEND_BUFFER_SIZE, 0);
|
// Both microphone channels are sent, if configured
|
||||||
if (this->audio_mode_ == AUDIO_MODE_API) {
|
bool is_available = this->ring_buffer_->available() >= SEND_BUFFER_SIZE;
|
||||||
|
bool is_available2 = false;
|
||||||
|
if (this->mic_source2_) {
|
||||||
|
is_available2 = this->ring_buffer2_->available() >= SEND_BUFFER_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (is_available || is_available2) {
|
||||||
api::VoiceAssistantAudio msg;
|
api::VoiceAssistantAudio msg;
|
||||||
msg.data = this->send_buffer_;
|
|
||||||
msg.data_len = read_bytes;
|
if (is_available) {
|
||||||
|
size_t read_bytes = this->ring_buffer_->read((void *) this->send_buffer_, SEND_BUFFER_SIZE, 0);
|
||||||
|
msg.data = this->send_buffer_;
|
||||||
|
msg.data_len = read_bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Second microphone channel
|
||||||
|
if (is_available2) {
|
||||||
|
size_t read_bytes = this->ring_buffer2_->read((void *) this->send_buffer2_, SEND_BUFFER_SIZE, 0);
|
||||||
|
msg.data2 = this->send_buffer2_;
|
||||||
|
msg.data2_len = read_bytes;
|
||||||
|
}
|
||||||
|
|
||||||
this->api_client_->send_message(msg);
|
this->api_client_->send_message(msg);
|
||||||
} else {
|
is_available = this->ring_buffer_->available() >= SEND_BUFFER_SIZE;
|
||||||
|
if (this->mic_source2_) {
|
||||||
|
is_available2 = this->ring_buffer2_->available() >= SEND_BUFFER_SIZE;
|
||||||
|
} else {
|
||||||
|
is_available2 = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// UDP (will eventually be deprecated)
|
||||||
|
// Only the primary microphone channel is used
|
||||||
|
while (this->ring_buffer_->available() >= SEND_BUFFER_SIZE) {
|
||||||
|
size_t read_bytes = this->ring_buffer_->read((void *) this->send_buffer_, SEND_BUFFER_SIZE, 0);
|
||||||
if (!this->udp_socket_running_) {
|
if (!this->udp_socket_running_) {
|
||||||
if (!this->start_udp_socket_()) {
|
if (!this->start_udp_socket_()) {
|
||||||
this->set_state_(State::STOP_MICROPHONE, State::IDLE);
|
this->set_state_(State::STOP_MICROPHONE, State::IDLE);
|
||||||
@@ -284,14 +363,23 @@ void VoiceAssistant::loop() {
|
|||||||
this->socket_->sendto(this->send_buffer_, read_bytes, 0, (struct sockaddr *) &this->dest_addr_,
|
this->socket_->sendto(this->send_buffer_, read_bytes, 0, (struct sockaddr *) &this->dest_addr_,
|
||||||
sizeof(this->dest_addr_));
|
sizeof(this->dest_addr_));
|
||||||
}
|
}
|
||||||
available = this->ring_buffer_->available();
|
} // audio mode
|
||||||
}
|
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case State::STOP_MICROPHONE: {
|
case State::STOP_MICROPHONE: {
|
||||||
if (this->mic_source_->is_running()) {
|
// Check both microphone channels
|
||||||
this->mic_source_->stop();
|
bool is_running = this->mic_source_->is_running();
|
||||||
|
bool is_running2 = false;
|
||||||
|
if (this->mic_source2_) {
|
||||||
|
is_running2 = this->mic_source2_->is_running();
|
||||||
|
}
|
||||||
|
if (is_running || is_running2) {
|
||||||
|
if (is_running) {
|
||||||
|
this->mic_source_->stop();
|
||||||
|
}
|
||||||
|
if (is_running2) {
|
||||||
|
this->mic_source2_->stop();
|
||||||
|
}
|
||||||
this->set_state_(State::STOPPING_MICROPHONE);
|
this->set_state_(State::STOPPING_MICROPHONE);
|
||||||
} else {
|
} else {
|
||||||
this->set_state_(this->desired_state_);
|
this->set_state_(this->desired_state_);
|
||||||
@@ -299,7 +387,13 @@ void VoiceAssistant::loop() {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case State::STOPPING_MICROPHONE: {
|
case State::STOPPING_MICROPHONE: {
|
||||||
if (this->mic_source_->is_stopped()) {
|
// Check both microphone channels
|
||||||
|
bool is_stopped = this->mic_source_->is_stopped();
|
||||||
|
bool is_stopped2 = true;
|
||||||
|
if (this->mic_source2_) {
|
||||||
|
is_stopped2 = this->mic_source2_->is_stopped();
|
||||||
|
}
|
||||||
|
if (is_stopped && is_stopped2) {
|
||||||
this->set_state_(this->desired_state_);
|
this->set_state_(this->desired_state_);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@@ -504,7 +598,8 @@ void VoiceAssistant::start_streaming() {
|
|||||||
ESP_LOGD(TAG, "Client started, streaming microphone");
|
ESP_LOGD(TAG, "Client started, streaming microphone");
|
||||||
this->audio_mode_ = AUDIO_MODE_API;
|
this->audio_mode_ = AUDIO_MODE_API;
|
||||||
|
|
||||||
if (this->mic_source_->is_running()) {
|
// Both microphone channels
|
||||||
|
if (this->mic_source_->is_running() && (!this->mic_source2_ || this->mic_source2_->is_running())) {
|
||||||
this->set_state_(State::STREAMING_MICROPHONE, State::STREAMING_MICROPHONE);
|
this->set_state_(State::STREAMING_MICROPHONE, State::STREAMING_MICROPHONE);
|
||||||
} else {
|
} else {
|
||||||
this->set_state_(State::START_MICROPHONE, State::STREAMING_MICROPHONE);
|
this->set_state_(State::START_MICROPHONE, State::STREAMING_MICROPHONE);
|
||||||
@@ -520,6 +615,10 @@ void VoiceAssistant::start_streaming(struct sockaddr_storage *addr, uint16_t por
|
|||||||
ESP_LOGD(TAG, "Client started, streaming microphone");
|
ESP_LOGD(TAG, "Client started, streaming microphone");
|
||||||
this->audio_mode_ = AUDIO_MODE_UDP;
|
this->audio_mode_ = AUDIO_MODE_UDP;
|
||||||
|
|
||||||
|
if (this->mic_source2_ != nullptr) {
|
||||||
|
ESP_LOGW(TAG, "UDP audio mode does not support a second microphone channel; only the primary will be streamed");
|
||||||
|
}
|
||||||
|
|
||||||
memcpy(&this->dest_addr_, addr, sizeof(this->dest_addr_));
|
memcpy(&this->dest_addr_, addr, sizeof(this->dest_addr_));
|
||||||
if (this->dest_addr_.ss_family == AF_INET) {
|
if (this->dest_addr_.ss_family == AF_INET) {
|
||||||
((struct sockaddr_in *) &this->dest_addr_)->sin_port = htons(port);
|
((struct sockaddr_in *) &this->dest_addr_)->sin_port = htons(port);
|
||||||
@@ -534,6 +633,7 @@ void VoiceAssistant::start_streaming(struct sockaddr_storage *addr, uint16_t por
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Only primary microphone channel over UDP
|
||||||
if (this->mic_source_->is_running()) {
|
if (this->mic_source_->is_running()) {
|
||||||
this->set_state_(State::STREAMING_MICROPHONE, State::STREAMING_MICROPHONE);
|
this->set_state_(State::STREAMING_MICROPHONE, State::STREAMING_MICROPHONE);
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -40,6 +40,7 @@ enum VoiceAssistantFeature : uint32_t {
|
|||||||
FEATURE_TIMERS = 1 << 3,
|
FEATURE_TIMERS = 1 << 3,
|
||||||
FEATURE_ANNOUNCE = 1 << 4,
|
FEATURE_ANNOUNCE = 1 << 4,
|
||||||
FEATURE_START_CONVERSATION = 1 << 5,
|
FEATURE_START_CONVERSATION = 1 << 5,
|
||||||
|
FEATURE_MULTI_CHANNEL_AUDIO = 1 << 6,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class State {
|
enum class State {
|
||||||
@@ -120,6 +121,7 @@ class VoiceAssistant : public Component {
|
|||||||
void failed_to_start();
|
void failed_to_start();
|
||||||
|
|
||||||
void set_microphone_source(microphone::MicrophoneSource *mic_source) { this->mic_source_ = mic_source; }
|
void set_microphone_source(microphone::MicrophoneSource *mic_source) { this->mic_source_ = mic_source; }
|
||||||
|
void set_microphone_source2(microphone::MicrophoneSource *mic_source2) { this->mic_source2_ = mic_source2; }
|
||||||
#ifdef USE_MICRO_WAKE_WORD
|
#ifdef USE_MICRO_WAKE_WORD
|
||||||
void set_micro_wake_word(micro_wake_word::MicroWakeWord *mww) { this->micro_wake_word_ = mww; }
|
void set_micro_wake_word(micro_wake_word::MicroWakeWord *mww) { this->micro_wake_word_ = mww; }
|
||||||
#endif
|
#endif
|
||||||
@@ -149,6 +151,9 @@ class VoiceAssistant : public Component {
|
|||||||
uint32_t flags = 0;
|
uint32_t flags = 0;
|
||||||
flags |= VoiceAssistantFeature::FEATURE_VOICE_ASSISTANT;
|
flags |= VoiceAssistantFeature::FEATURE_VOICE_ASSISTANT;
|
||||||
flags |= VoiceAssistantFeature::FEATURE_API_AUDIO;
|
flags |= VoiceAssistantFeature::FEATURE_API_AUDIO;
|
||||||
|
if (this->mic_source2_ != nullptr) {
|
||||||
|
flags |= VoiceAssistantFeature::FEATURE_MULTI_CHANNEL_AUDIO;
|
||||||
|
}
|
||||||
#ifdef USE_SPEAKER
|
#ifdef USE_SPEAKER
|
||||||
if (this->speaker_ != nullptr) {
|
if (this->speaker_ != nullptr) {
|
||||||
flags |= VoiceAssistantFeature::FEATURE_SPEAKER;
|
flags |= VoiceAssistantFeature::FEATURE_SPEAKER;
|
||||||
@@ -276,6 +281,7 @@ class VoiceAssistant : public Component {
|
|||||||
bool timer_tick_running_{false};
|
bool timer_tick_running_{false};
|
||||||
|
|
||||||
microphone::MicrophoneSource *mic_source_{nullptr};
|
microphone::MicrophoneSource *mic_source_{nullptr};
|
||||||
|
microphone::MicrophoneSource *mic_source2_{nullptr};
|
||||||
#ifdef USE_SPEAKER
|
#ifdef USE_SPEAKER
|
||||||
void write_speaker_();
|
void write_speaker_();
|
||||||
speaker::Speaker *speaker_{nullptr};
|
speaker::Speaker *speaker_{nullptr};
|
||||||
@@ -301,6 +307,7 @@ class VoiceAssistant : public Component {
|
|||||||
std::string wake_word_;
|
std::string wake_word_;
|
||||||
|
|
||||||
std::shared_ptr<ring_buffer::RingBuffer> ring_buffer_;
|
std::shared_ptr<ring_buffer::RingBuffer> ring_buffer_;
|
||||||
|
std::shared_ptr<ring_buffer::RingBuffer> ring_buffer2_;
|
||||||
|
|
||||||
bool use_wake_word_;
|
bool use_wake_word_;
|
||||||
uint8_t noise_suppression_level_;
|
uint8_t noise_suppression_level_;
|
||||||
@@ -309,6 +316,7 @@ class VoiceAssistant : public Component {
|
|||||||
uint32_t conversation_timeout_;
|
uint32_t conversation_timeout_;
|
||||||
|
|
||||||
uint8_t *send_buffer_{nullptr};
|
uint8_t *send_buffer_{nullptr};
|
||||||
|
uint8_t *send_buffer2_{nullptr};
|
||||||
|
|
||||||
bool continuous_{false};
|
bool continuous_{false};
|
||||||
bool silence_detection_;
|
bool silence_detection_;
|
||||||
|
|||||||
@@ -31,6 +31,11 @@ microphone:
|
|||||||
i2s_din_pin: ${i2s_din_pin}
|
i2s_din_pin: ${i2s_din_pin}
|
||||||
adc_type: external
|
adc_type: external
|
||||||
pdm: false
|
pdm: false
|
||||||
|
- platform: i2s_audio
|
||||||
|
id: mic_id_external2
|
||||||
|
i2s_din_pin: ${i2s_din_pin2}
|
||||||
|
adc_type: external
|
||||||
|
pdm: false
|
||||||
|
|
||||||
speaker:
|
speaker:
|
||||||
- platform: i2s_audio
|
- platform: i2s_audio
|
||||||
@@ -40,9 +45,12 @@ speaker:
|
|||||||
|
|
||||||
voice_assistant:
|
voice_assistant:
|
||||||
microphone:
|
microphone:
|
||||||
microphone: mic_id_external
|
- microphone: mic_id_external
|
||||||
gain_factor: 4
|
gain_factor: 4
|
||||||
channels: 0
|
channels: 0
|
||||||
|
- microphone: mic_id_external2
|
||||||
|
gain_factor: 4
|
||||||
|
channels: 0
|
||||||
speaker: speaker_id
|
speaker: speaker_id
|
||||||
micro_wake_word: mww_id
|
micro_wake_word: mww_id
|
||||||
conversation_timeout: 60s
|
conversation_timeout: 60s
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ substitutions:
|
|||||||
i2s_bclk_pin: GPIO5
|
i2s_bclk_pin: GPIO5
|
||||||
i2s_mclk_pin: GPIO15
|
i2s_mclk_pin: GPIO15
|
||||||
i2s_din_pin: GPIO13
|
i2s_din_pin: GPIO13
|
||||||
|
i2s_din_pin2: GPIO14
|
||||||
i2s_dout_pin: GPIO12
|
i2s_dout_pin: GPIO12
|
||||||
|
|
||||||
<<: !include common-idf.yaml
|
<<: !include common-idf.yaml
|
||||||
|
|||||||
Reference in New Issue
Block a user