代码拉取完成,页面将自动刷新
同步操作将从 houguoxiong/esp32s3-ai-chat 强制同步,此操作会覆盖自 Fork 仓库以来所做的任何修改,且无法恢复!!!
确定后同步将在后台操作,完成时将刷新页面,请耐心等待。
#include <wakeup_detect_houguoxiong_inferencing.h>
#include <WiFi.h>
#include <HTTPClient.h>
#include <ArduinoJson.h>
#include <driver/i2s.h>
#include <UrlEncode.h>
#include <base64.hpp>
// I2S config for MAX98357A
#define I2S_OUT_PORT I2S_NUM_1
#define I2S_OUT_BCLK 15
#define I2S_OUT_LRC 16
#define I2S_OUT_DOUT 7
// INMP441 config
#define I2S_IN_PORT I2S_NUM_0
#define I2S_IN_BCLK 4
#define I2S_IN_LRC 5
#define I2S_IN_DIN 6
// WiFi credentials
const char* ssid = "";
const char* password = "";
// Baidu API credentials
const char* baidu_api_key = "";
const char* baidu_secret_key = "";
// Baidu 千帆大模型
char* qianfan_api_key = "";
char* qianfan_secret_key = "";
// Audio recording settings
#define SAMPLE_RATE 16000
#define RECORD_TIME_SECONDS 15
#define BUFFER_SIZE (SAMPLE_RATE * RECORD_TIME_SECONDS)
// 唤醒词阈值,阈值越大,要求识别的唤醒词更精准
#define PRED_VALUE_THRESHOLD 0.8
/** Audio buffers, pointers and selectors */
typedef struct {
int16_t* buffer;
uint8_t buf_ready;
uint32_t buf_count;
uint32_t n_samples;
} inference_t;
static inference_t inference;
static const uint32_t sample_buffer_size = 2048;
static signed short sampleBuffer[sample_buffer_size];
static bool debug_nn = false; // Set this to true to see e.g. features generated from the raw signal
static bool record_status = true;
void setup() {
// 设置串口波特率
Serial.begin(115200);
// 设置LED输出模式,并初始化设置为低
pinMode(LED_BUILTIN, OUTPUT);
digitalWrite(LED_BUILTIN, LOW); //Turn off
// Connect to WiFi
WiFi.begin(ssid, password);
while (WiFi.status() != WL_CONNECTED) {
delay(1000);
Serial.println("Connecting to WiFi...");
}
Serial.println("Connected to WiFi");
// Initialize I2S for audio output
i2s_config_t i2s_config_out = {
.mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_TX),
.sample_rate = SAMPLE_RATE,
.bits_per_sample = I2S_BITS_PER_SAMPLE_16BIT,
.channel_format = I2S_CHANNEL_FMT_ONLY_RIGHT,
.communication_format = (i2s_comm_format_t)(I2S_COMM_FORMAT_STAND_I2S),
.intr_alloc_flags = ESP_INTR_FLAG_LEVEL1,
.dma_buf_count = 8,
.dma_buf_len = 1024,
};
i2s_pin_config_t pin_config = {
.bck_io_num = I2S_OUT_BCLK,
.ws_io_num = I2S_OUT_LRC,
.data_out_num = I2S_OUT_DOUT,
.data_in_num = -1
};
i2s_driver_install(I2S_OUT_PORT, &i2s_config_out, 0, NULL);
i2s_set_pin(I2S_OUT_PORT, &pin_config);
// Initialize I2S for audio input
i2s_config_t i2s_config_in = {
.mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX),
.sample_rate = SAMPLE_RATE,
.bits_per_sample = I2S_BITS_PER_SAMPLE_16BIT, // 注意:INMP441 输出 32 位数据
.channel_format = I2S_CHANNEL_FMT_ONLY_LEFT,
.communication_format = i2s_comm_format_t(I2S_COMM_FORMAT_STAND_I2S),
.intr_alloc_flags = ESP_INTR_FLAG_LEVEL1,
.dma_buf_count = 8,
.dma_buf_len = 1024,
};
i2s_pin_config_t pin_config_in = {
.bck_io_num = I2S_IN_BCLK,
.ws_io_num = I2S_IN_LRC,
.data_out_num = -1,
.data_in_num = I2S_IN_DIN
};
i2s_driver_install(I2S_IN_PORT, &i2s_config_in, 0, NULL);
i2s_set_pin(I2S_IN_PORT, &pin_config_in);
// // 开启对话主流程
xTaskCreate(mainChat, "mainChat", 1024 * 32, NULL, 10, NULL);
// summary of inferencing settings (from model_metadata.h)
ei_printf("Inferencing settings:\n");
ei_printf("\tInterval: ");
ei_printf_float((float)EI_CLASSIFIER_INTERVAL_MS);
ei_printf(" ms.\n");
ei_printf("\tFrame size: %d\n", EI_CLASSIFIER_DSP_INPUT_FRAME_SIZE);
ei_printf("\tSample length: %d ms.\n", EI_CLASSIFIER_RAW_SAMPLE_COUNT / 16);
ei_printf("\tNo. of classes: %d\n", sizeof(ei_classifier_inferencing_categories) / sizeof(ei_classifier_inferencing_categories[0]));
ei_printf("\nStarting continious inference in 2 seconds...\n");
ei_sleep(2000);
if (microphone_inference_start(EI_CLASSIFIER_RAW_SAMPLE_COUNT) == false) {
ei_printf("ERR: Could not allocate audio buffer (size %d), this could be due to the window length of your model\r\n", EI_CLASSIFIER_RAW_SAMPLE_COUNT);
return;
}
ei_printf("Recording...\n");
}
/**
* @brief Arduino main function. Runs the inferencing loop.
*/
void loop() {
bool m = microphone_inference_record();
if (!m) {
ei_printf("ERR: Failed to record audio...\n");
return;
}
signal_t signal;
signal.total_length = EI_CLASSIFIER_RAW_SAMPLE_COUNT;
signal.get_data = µphone_audio_signal_get_data;
ei_impulse_result_t result = { 0 };
EI_IMPULSE_ERROR r = run_classifier(&signal, &result, debug_nn);
if (r != EI_IMPULSE_OK) {
ei_printf("ERR: Failed to run classifier (%d)\n", r);
return;
}
int pred_index = -1; // Initialize pred_index
float pred_value = PRED_VALUE_THRESHOLD; // Initialize pred_value
// print the predictions
ei_printf("Predictions ");
ei_printf("(DSP: %d ms., Classification: %d ms., Anomaly: %d ms.)",
result.timing.dsp, result.timing.classification, result.timing.anomaly);
ei_printf(": \n");
for (size_t ix = 0; ix < EI_CLASSIFIER_LABEL_COUNT; ix++) {
ei_printf(" %s: ", result.classification[ix].label);
ei_printf_float(result.classification[ix].value);
ei_printf("\n");
if (result.classification[0].value > pred_value) {
pred_index = 0;
}
}
// Display inference result
if (pred_index == 0) {
digitalWrite(LED_BUILTIN, HIGH); //Turn on
Serial.println("playAudio_Zai");
playAudio_Zai();
record_status = false;
}
#if EI_CLASSIFIER_HAS_ANOMALY == 1
ei_printf(" anomaly score: ");
ei_printf_float(result.anomaly);
ei_printf("\n");
#endif
}
static void audio_inference_callback(uint32_t n_bytes) {
for (int i = 0; i < n_bytes >> 1; i++) {
inference.buffer[inference.buf_count++] = sampleBuffer[i];
if (inference.buf_count >= inference.n_samples) {
inference.buf_count = 0;
inference.buf_ready = 1;
}
}
}
static void capture_samples(void* arg) {
const int32_t i2s_bytes_to_read = (uint32_t)arg;
size_t bytes_read = i2s_bytes_to_read;
while (1) {
if (record_status) {
/* read data at once from i2s - Modified for XIAO ESP2S3 Sense and I2S.h library */
i2s_read(I2S_IN_PORT, (void*)sampleBuffer, i2s_bytes_to_read, &bytes_read, 100);
// esp_i2s::i2s_read(esp_i2s::I2S_NUM_0, (void *)sampleBuffer, i2s_bytes_to_read, &bytes_read, 100);
if (bytes_read <= 0) {
ei_printf("Error in I2S read : %d", bytes_read);
} else {
if (bytes_read < i2s_bytes_to_read) {
ei_printf("Partial I2S read");
}
// scale the data (otherwise the sound is too quiet)
for (int x = 0; x < i2s_bytes_to_read / 2; x++) {
sampleBuffer[x] = (int16_t)(sampleBuffer[x]) * 8;
}
audio_inference_callback(i2s_bytes_to_read);
}
}
delay(1);
}
vTaskDelete(NULL);
}
/**
* @brief Init inferencing struct and setup/start PDM
*
* @param[in] n_samples The n samples
*
* @return { description_of_the_return_value }
*/
static bool microphone_inference_start(uint32_t n_samples) {
inference.buffer = (int16_t*)malloc(n_samples * sizeof(int16_t));
if (inference.buffer == NULL) {
return false;
}
inference.buf_count = 0;
inference.n_samples = n_samples;
inference.buf_ready = 0;
ei_sleep(100);
record_status = true;
xTaskCreate(capture_samples, "CaptureSamples", 1024 * 32, (void*)sample_buffer_size, 10, NULL);
return true;
}
/**
* @brief Wait on new data
*
* @return True when finished
*/
static bool microphone_inference_record(void) {
bool ret = true;
while (inference.buf_ready == 0) {
delay(10);
}
inference.buf_ready = 0;
return ret;
}
/**
* Get raw audio signal data
*/
static int microphone_audio_signal_get_data(size_t offset, size_t length, float* out_ptr) {
numpy::int16_to_float(&inference.buffer[offset], out_ptr, length);
return 0;
}
/**
* @brief Stop PDM and release buffers
*/
static void microphone_inference_end(void) {
free(sampleBuffer);
ei_free(inference.buffer);
}
#if !defined(EI_CLASSIFIER_SENSOR) || EI_CLASSIFIER_SENSOR != EI_CLASSIFIER_SENSOR_MICROPHONE
#error "Invalid model for current sensor."
#endif
void mainChat(void* arg) {
//获取access token
String baidu_access_token = "";
String qianfan_access_token = "";
baidu_access_token = getAccessToken(baidu_api_key, baidu_secret_key);
qianfan_access_token = getAccessToken(qianfan_api_key, qianfan_secret_key);
while (1) {
if (!record_status) {
// Record audio from INMP441
// 分配内存
uint8_t* pcm_data = (uint8_t*)ps_malloc(BUFFER_SIZE);
if (!pcm_data) {
Serial.println("Failed to allocate memory for pcm_data");
return;
}
Serial.println("i2s_read");
// 开始循环录音,将录制结果保存在pcm_data中
size_t bytes_read = 0, recordingSize = 0, ttsSize = 0;
int16_t data[512];
size_t noVoicePre = 0, noVoiceCur = 0, noVoiceTotal = 0, VoiceCnt = 0;
bool recording = true;
while (1) {
// 记录刚开始的时间
noVoicePre = millis();
// i2s录音
esp_err_t result = i2s_read(I2S_IN_PORT, data, sizeof(data), &bytes_read, portMAX_DELAY);
memcpy(pcm_data + recordingSize, data, bytes_read);
recordingSize += bytes_read;
Serial.printf("%x recordingSize: %d bytes_read :%d\n", pcm_data + recordingSize, recordingSize, bytes_read);
// 计算平均值
uint32_t sum_data = 0;
for (int i = 0; i < bytes_read / 2; i++) {
sum_data += abs(data[i]);
}
sum_data = sum_data / bytes_read;
Serial.printf("sum_data :%d\n", sum_data);
// 判断当没有说话时间超过一定时间时就退出录音
noVoiceCur = millis();
if (sum_data < 15) {
noVoiceTotal += noVoiceCur - noVoicePre;
} else {
noVoiceTotal = 0;
VoiceCnt += 1;
}
Serial.printf("noVoiceCur :%d noVoicePre :%d noVoiceTotal :%d\n", noVoiceCur, noVoicePre, noVoiceTotal);
if (noVoiceTotal > 1000) {
recording = false;
}
if (!recording || (recordingSize >= BUFFER_SIZE - bytes_read)) {
Serial.printf("record done: %d", recordingSize);
break;
}
}
// 设置唤醒录音状态为true,此后可以唤醒
record_status = true;
// 此时一直没有说话,则退出被唤醒状态
if (VoiceCnt == 0) {
digitalWrite(LED_BUILTIN, LOW); //Turn off
recordingSize = 0;
// 释放内存
free(pcm_data);
continue;
}
if (recordingSize > 0) {
// 音频转文本(语音识别API访问)
String recognizedText = baiduSTT_Send(baidu_access_token, pcm_data, recordingSize);
Serial.println("Recognized text: " + recognizedText);
// 访问千帆大模型(LLM大模型API访问)
String ernieResponse = baiduErnieBot_Get(qianfan_access_token, recognizedText.c_str());
Serial.println("Ernie Bot response: " + ernieResponse);
// 文本转音频tts并通过MAX98357A输出(语音合成API访问)
baiduTTS_Send(baidu_access_token, ernieResponse);
Serial.println("ttsSize: ");
Serial.println(ttsSize);
}
// 释放内存
free(pcm_data);
// 设置唤醒录音状态为false,此后继续录音对话
record_status = false;
}
delay(10);
}
}
// Get Baidu API access token
String getAccessToken(const char* api_key, const char* secret_key) {
String access_token = "";
HTTPClient http;
// 创建http请求
http.begin("https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=" + String(api_key) + "&client_secret=" + String(secret_key));
int httpCode = http.POST("");
if (httpCode == HTTP_CODE_OK) {
String response = http.getString();
DynamicJsonDocument doc(1024);
deserializeJson(doc, response);
access_token = doc["access_token"].as<String>();
Serial.printf("[HTTP] GET access_token: %s\n", access_token);
} else {
Serial.printf("[HTTP] GET... failed, error: %s\n", http.errorToString(httpCode).c_str());
}
http.end();
return access_token;
}
String baiduSTT_Send(String access_token, uint8_t* audioData, int audioDataSize) {
String recognizedText = "";
if (access_token == "") {
Serial.println("access_token is null");
return recognizedText;
}
// audio数据包许愿哦进行Base64编码,数据量会增大1/3
int audio_data_len = audioDataSize * sizeof(char) * 1.4;
unsigned char* audioDataBase64 = (unsigned char*)ps_malloc(audio_data_len);
if (!audioDataBase64) {
Serial.println("Failed to allocate memory for audioDataBase64");
return recognizedText;
}
// json包大小,由于需要将audioData数据进行Base64的编码,数据量会增大1/3
int data_json_len = audioDataSize * sizeof(char) * 1.4;
char* data_json = (char*)ps_malloc(data_json_len);
if (!data_json) {
Serial.println("Failed to allocate memory for data_json");
return recognizedText;
}
// Base64 encode audio data
encode_base64(audioData, audioDataSize, audioDataBase64);
memset(data_json, '\0', data_json_len);
strcat(data_json, "{");
strcat(data_json, "\"format\":\"pcm\",");
strcat(data_json, "\"rate\":16000,");
strcat(data_json, "\"dev_pid\":1537,");
strcat(data_json, "\"channel\":1,");
strcat(data_json, "\"cuid\":\"57722200\",");
strcat(data_json, "\"token\":\"");
strcat(data_json, access_token.c_str());
strcat(data_json, "\",");
sprintf(data_json + strlen(data_json), "\"len\":%d,", audioDataSize);
strcat(data_json, "\"speech\":\"");
strcat(data_json, (const char*)audioDataBase64);
strcat(data_json, "\"");
strcat(data_json, "}");
// 创建http请求
HTTPClient http_client;
http_client.begin("http://vop.baidu.com/server_api");
http_client.addHeader("Content-Type", "application/json");
int httpCode = http_client.POST(data_json);
if (httpCode > 0) {
if (httpCode == HTTP_CODE_OK) {
// 获取返回结果
String response = http_client.getString();
Serial.println(response);
// 从json中解析对应的result
DynamicJsonDocument responseDoc(2048);
deserializeJson(responseDoc, response);
recognizedText = responseDoc["result"].as<String>();
}
} else {
Serial.printf("[HTTP] POST failed, error: %s\n", http_client.errorToString(httpCode).c_str());
}
// 释放内存
if (audioDataBase64) {
free(audioDataBase64);
}
if (data_json) {
free(data_json);
}
http_client.end();
return recognizedText;
}
// Get response from Baidu Ernie Bot
String baiduErnieBot_Get(String access_token, String prompt) {
String ernieResponse = "";
if (access_token == "") {
Serial.println("access_token is null");
ernieResponse = "获取access token失败";
return ernieResponse;
}
if (prompt.length() == 0) {
ernieResponse = "识别出错了";
return ernieResponse;
}
// 角色设定
prompt += "你是一个语音助手,类似朋友的角色进行回答下面的问题,并且要求最多20个字简短的回答。";
// 创建http, 添加访问url和头信息
HTTPClient http;
// 千帆大模型API
const char* ernie_api_url = "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/ernie-lite-8k?access_token=";
http.begin(ernie_api_url + String(access_token));
http.addHeader("Content-Type", "application/json");
// 创建一个 JSON 文档
DynamicJsonDocument doc(2048);
// 创建 messages 数组
JsonArray messages = doc.createNestedArray("messages");
// 创建 message 对象并添加到 messages 数组
JsonObject message = messages.createNestedObject();
message["role"] = "user";
message["content"] = prompt;
// 添加其他字段
doc["disable_search"] = false;
doc["enable_citation"] = false;
// 将 JSON 数据序列化为字符串
String requestBody;
serializeJson(doc, requestBody);
// 发送http访问请求
int httpCode = http.POST(requestBody);
// 访问结果的判断
if (httpCode == HTTP_CODE_OK) {
// 获取返回结果并解析
String response = http.getString();
Serial.println(response);
DynamicJsonDocument responseDoc(2048);
deserializeJson(responseDoc, response);
ernieResponse = responseDoc["result"].as<String>();
} else {
Serial.printf("[HTTP] POST... failed, error: %s\n", http.errorToString(httpCode).c_str());
}
// 结束http访问
http.end();
// 返回响应数据
return ernieResponse;
}
void baiduTTS_Send(String access_token, String text) {
if (access_token == "") {
Serial.println("access_token is null");
return;
}
if (text.length() == 0) {
Serial.println("text is null");
return;
}
const int per = 1;
const int spd = 5;
const int pit = 5;
const int vol = 10;
const int aue = 6;
// 进行 URL 编码
String encodedText = urlEncode(urlEncode(text));
// URL http请求数据封装
String url = "https://tsn.baidu.com/text2audio";
const char* header[] = { "Content-Type", "Content-Length" };
url += "?tok=" + access_token;
url += "&tex=" + encodedText;
url += "&per=" + String(per);
url += "&spd=" + String(spd);
url += "&pit=" + String(pit);
url += "&vol=" + String(vol);
url += "&aue=" + String(aue);
url += "&cuid=esp32s3";
url += "&lan=zh";
url += "&ctp=1";
// http请求创建
HTTPClient http;
http.begin(url);
http.collectHeaders(header, 2);
// http请求
int httpResponseCode = http.GET();
if (httpResponseCode > 0) {
if (httpResponseCode == HTTP_CODE_OK) {
String contentType = http.header("Content-Type");
Serial.println(contentType);
if (contentType.startsWith("audio")) {
Serial.println("合成成功");
// 获取返回的音频数据流
Stream* stream = http.getStreamPtr();
uint8_t buffer[512];
size_t bytesRead = 0;
// 设置timeout为200ms 避免最后出现杂音
stream->setTimeout(200);
while (http.connected() && (bytesRead = stream->readBytes(buffer, sizeof(buffer))) > 0) {
// 音频输出
playAudio(buffer, bytesRead);
delay(1);
}
// 清空I2S DMA缓冲区
clearAudio();
} else if (contentType.equals("application/json")) {
Serial.println("合成出现错误");
} else {
Serial.println("未知的Content-Type");
}
} else {
Serial.println("Failed to receive audio file");
}
} else {
Serial.print("Error code: ");
Serial.println(httpResponseCode);
}
http.end();
}
// Play audio data using MAX98357A
void playAudio(uint8_t* audioData, size_t audioDataSize) {
if (audioDataSize > 0) {
// 发送
size_t bytes_written = 0;
i2s_write(I2S_OUT_PORT, (int16_t*)audioData, audioDataSize, &bytes_written, portMAX_DELAY);
}
}
void clearAudio(void) {
// 清空I2S DMA缓冲区
i2s_zero_dma_buffer(I2S_OUT_PORT);
Serial.print("clearAudio");
}
// Play zai audio data using MAX98357A
void playAudio_Zai(void) {
const char* zai = "
// 分配内存
uint8_t* decode_data = (uint8_t*)ps_malloc(16000 * 3);
if (!decode_data) {
Serial.println("Failed to allocate memory for decode_data");
return;
}
// base64 解析
int decoded_length = decode_base64((unsigned char*)zai, (unsigned char*)decode_data);
// 播放
playAudio(decode_data, decoded_length);
// delay 200ms
delay(200);
// 清空I2S DMA缓冲区
clearAudio();
// 释放内存
free(decode_data);
}
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。