From b27241b7bcd866d3c742eb9efcc6c3a2738e186e Mon Sep 17 00:00:00 2001 From: Paul Date: Mon, 3 Jun 2024 17:57:55 +0800 Subject: [PATCH] =?UTF-8?q?Feat:=20=E4=B8=BA=E5=BE=AE=E8=BD=AF=20SDK=20?= =?UTF-8?q?=E5=8E=8B=E7=BC=A9=E9=9F=B3=E9=A2=91=E5=88=B0=2016000=20?= =?UTF-8?q?=E7=9A=84=E6=AF=94=E7=89=B9=E7=8E=87?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/App.css | 8 ++-- src/App.tsx | 88 ++++++++++++++++++----------------------- src/utils/audio.ts | 63 +++++++++++++++++++++++++++++ src/utils/microphone.ts | 32 +++------------ 4 files changed, 110 insertions(+), 81 deletions(-) create mode 100644 src/utils/audio.ts diff --git a/src/App.css b/src/App.css index b9d355d..5520db6 100644 --- a/src/App.css +++ b/src/App.css @@ -34,9 +34,7 @@ } .card { - padding: 2em; -} - -.read-the-docs { - color: #888; + gap: 1em; + display: flex; + justify-content: center; } diff --git a/src/App.tsx b/src/App.tsx index 116558b..8bfe50c 100644 --- a/src/App.tsx +++ b/src/App.tsx @@ -1,49 +1,36 @@ +import { useEffect, useRef, useState } from "react"; import reactLogo from "./assets/react.svg"; import viteLogo from "/vite.svg"; import "./App.css"; import { client, start } from "./utils/mqtt"; -import { encode, startAudio, stopAudio } from "./utils/microphone"; +import { startAudio, stopAudio, context } from "./utils/microphone"; +import { arrayBufferToBase64, downSampleAudioFrame, getMergedPCMData } from "./utils/audio"; let mqttData; let buffers: Float32Array[] = []; -start().then(([client, data]) => { - console.log("mqtt connected"); - - mqttData = data; -}); - -function arrayBufferToBase64(buffer) { - var binary = ""; - var bytes = new Uint8Array(buffer); - var len = bytes.byteLength; - for (var i = 0; i < len; i++) { - binary += String.fromCharCode(bytes[i]); - } - return window.btoa(binary); -} - const getAudio = () => { buffers = []; - startAudio((buffer) => { - // console.log(buffer); - // console.log(typeof buffer); - // const a = encode(buffer); + startAudio((float32Arr) => { + // 降比特率到 16000 + const downedFloat32Arr = downSampleAudioFrame(float32Arr, context.sampleRate, 16000); + // 转 base64 打包给后端 + const sendItem = arrayBufferToBase64(downedFloat32Arr.buffer); + // 存储一遍压缩后的音频 + buffers.push(downedFloat32Arr); - const a = arrayBufferToBase64(buffer.buffer); - - buffers.push(buffer); - - // console.log(a); - client.publish(mqttData.recognition_topic, a); + client.publish(mqttData.recognition_topic, sendItem); }); }; const onDownload = () => { - const data = getMergedPCMData(); + if (buffers.length === 0) { + return; + } + const data = getMergedPCMData(buffers); const blob = new Blob([data.buffer], { type: 'audio/wave' }); const a = document.createElement('a'); @@ -52,29 +39,27 @@ const onDownload = () => { a.click(); }; -const getMergedPCMData = () => { - // 合并所有片段 - let totalLength = 0; - for (const buffer of buffers) { - totalLength += buffer.length; - } - - const mergedBuffer = new Float32Array(totalLength); - let offset = 0; - for (const buffer of buffers) { - mergedBuffer.set(buffer, offset); - offset += buffer.length; - } - - // 清除存储的片段数据(可选) - // this.outputBuffer = []; - - // 如果需要16位PCM,可以在这里进行转换 - - return mergedBuffer; -}; - function App() { + const loaded = useRef(false); + const [mqttStatus, setMqttStatus] = useState(false); + + useEffect(() => { + if (loaded.current) { + return; + } + + loaded.current = true; + + // 启用 MQTT 并连接 + start().then(([client, data]) => { + console.log("mqtt connected"); + + mqttData = data; + + setMqttStatus(true); + }); + }, []); + return ( <>
@@ -87,6 +72,9 @@ function App() {

确保 MQTT 已连接,再点击 getAudio 开始录音,stopAudio 结束

点击 download 下载音频原始数据(Float32Array 拼接的),用 Audacity 音频软件可加载。

+

MQTT 连接状态:{String(mqttStatus)}

+

当前输入设备比特率:{context.sampleRate}hz

+

当前输出音频比特率:16000hz

diff --git a/src/utils/audio.ts b/src/utils/audio.ts new file mode 100644 index 0000000..1117717 --- /dev/null +++ b/src/utils/audio.ts @@ -0,0 +1,63 @@ +// 降低采样率,默认输入可能是 48000 或者 44100HZ,微软要求是 16000HZ +// 代码复制于微软 SDK +export const downSampleAudioFrame = ( + srcFrame: Float32Array, + srcRate: number, + dstRate: number +) => { + if (dstRate === srcRate || dstRate > srcRate) { + return srcFrame; + } + + const ratio = srcRate / dstRate; + const dstLength = Math.round(srcFrame.length / ratio); + const dstFrame = new Float32Array(dstLength); + let srcOffset = 0; + let dstOffset = 0; + while (dstOffset < dstLength) { + const nextSrcOffset = Math.round((dstOffset + 1) * ratio); + let accum = 0; + let count = 0; + while (srcOffset < nextSrcOffset && srcOffset < srcFrame.length) { + accum += srcFrame[srcOffset++]; + count++; + } + dstFrame[dstOffset++] = accum / count; + } + + return dstFrame; +}; + +// 合并多个 Float32 的音频片段 +export const getMergedPCMData = (buffers: Float32Array[]) => { + // 合并所有片段 + let totalLength = 0; + for (const buffer of buffers) { + totalLength += buffer.length; + } + + const mergedBuffer = new Float32Array(totalLength); + let offset = 0; + for (const buffer of buffers) { + mergedBuffer.set(buffer, offset); + offset += buffer.length; + } + + // 清除存储的片段数据(可选) + // this.outputBuffer = []; + + // 如果需要16位PCM,可以在这里进行转换 + + return mergedBuffer; +}; + +// 转 Base64 +export const arrayBufferToBase64 = (buffer: ArrayBuffer) => { + let binary = ""; + const bytes = new Uint8Array(buffer); + const len = bytes.byteLength; + for (let i = 0; i < len; i++) { + binary += String.fromCharCode(bytes[i]); + } + return window.btoa(binary); +} diff --git a/src/utils/microphone.ts b/src/utils/microphone.ts index 94c5d9a..42eea20 100644 --- a/src/utils/microphone.ts +++ b/src/utils/microphone.ts @@ -1,8 +1,8 @@ -const context = new AudioContext(); +export const context = new AudioContext(); let stream: MediaStream; -export async function createAudio(context: AudioContext, send: (buffer: any) => void) { +export async function createAudio(context: AudioContext, send: (buffer: Float32Array) => void) { if (stream) { return; } @@ -22,12 +22,7 @@ export async function createAudio(context: AudioContext, send: (buffer: any) => }); node.port.onmessage = ({ data }) => { - // a.send(data); - // const a = encode(data); - - // buffers.push(a); - // console.log(data); - send(data); + send(data as Float32Array); }; // const oscillator = new OscillatorNode(context); @@ -45,27 +40,12 @@ export async function createAudio(context: AudioContext, send: (buffer: any) => context.resume(); } -export const startAudio = (send: (buffer: any) => void) => { +export const startAudio = (send: (buffer: Float32Array) => void) => { createAudio(context, send); + + return context; } export const stopAudio = () => { context.close(); } - -function floatTo16BitPCM(view, offset, input) { - for (let i = 0; i < input.length; i++ , offset += 2) { - const s = Math.max(-1, Math.min(1, input[i])); - view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true); - } -} - -export function encode(actualAudioFrame) { - const audioLength = actualAudioFrame.length * 2; - - const buffer = new ArrayBuffer(audioLength); - const view = new DataView(buffer); - floatTo16BitPCM(view, 0, actualAudioFrame); - - return buffer; -}