Feat: 为微软 SDK 压缩音频到 16000 的比特率

This commit is contained in:
Paul 2024-06-03 17:57:55 +08:00
parent 745dab518f
commit b27241b7bc
4 changed files with 110 additions and 81 deletions

View File

@ -34,9 +34,7 @@
} }
.card { .card {
padding: 2em; gap: 1em;
} display: flex;
justify-content: center;
.read-the-docs {
color: #888;
} }

View File

@ -1,49 +1,36 @@
import { useEffect, useRef, useState } from "react";
import reactLogo from "./assets/react.svg"; import reactLogo from "./assets/react.svg";
import viteLogo from "/vite.svg"; import viteLogo from "/vite.svg";
import "./App.css"; import "./App.css";
import { client, start } from "./utils/mqtt"; import { client, start } from "./utils/mqtt";
import { encode, startAudio, stopAudio } from "./utils/microphone"; import { startAudio, stopAudio, context } from "./utils/microphone";
import { arrayBufferToBase64, downSampleAudioFrame, getMergedPCMData } from "./utils/audio";
let mqttData; let mqttData;
let buffers: Float32Array[] = []; let buffers: Float32Array[] = [];
start().then(([client, data]) => {
console.log("mqtt connected");
mqttData = data;
});
function arrayBufferToBase64(buffer) {
var binary = "";
var bytes = new Uint8Array(buffer);
var len = bytes.byteLength;
for (var i = 0; i < len; i++) {
binary += String.fromCharCode(bytes[i]);
}
return window.btoa(binary);
}
const getAudio = () => { const getAudio = () => {
buffers = []; buffers = [];
startAudio((buffer) => { startAudio((float32Arr) => {
// console.log(buffer); // 降比特率到 16000
// console.log(typeof buffer); const downedFloat32Arr = downSampleAudioFrame(float32Arr, context.sampleRate, 16000);
// const a = encode(buffer); // 转 base64 打包给后端
const sendItem = arrayBufferToBase64(downedFloat32Arr.buffer);
// 存储一遍压缩后的音频
buffers.push(downedFloat32Arr);
const a = arrayBufferToBase64(buffer.buffer); client.publish(mqttData.recognition_topic, sendItem);
buffers.push(buffer);
// console.log(a);
client.publish(mqttData.recognition_topic, a);
}); });
}; };
const onDownload = () => { const onDownload = () => {
const data = getMergedPCMData(); if (buffers.length === 0) {
return;
}
const data = getMergedPCMData(buffers);
const blob = new Blob([data.buffer], { type: 'audio/wave' }); const blob = new Blob([data.buffer], { type: 'audio/wave' });
const a = document.createElement('a'); const a = document.createElement('a');
@ -52,29 +39,27 @@ const onDownload = () => {
a.click(); a.click();
}; };
const getMergedPCMData = () => {
// 合并所有片段
let totalLength = 0;
for (const buffer of buffers) {
totalLength += buffer.length;
}
const mergedBuffer = new Float32Array(totalLength);
let offset = 0;
for (const buffer of buffers) {
mergedBuffer.set(buffer, offset);
offset += buffer.length;
}
// 清除存储的片段数据(可选)
// this.outputBuffer = [];
// 如果需要16位PCM可以在这里进行转换
return mergedBuffer;
};
function App() { function App() {
const loaded = useRef(false);
const [mqttStatus, setMqttStatus] = useState(false);
useEffect(() => {
if (loaded.current) {
return;
}
loaded.current = true;
// 启用 MQTT 并连接
start().then(([client, data]) => {
console.log("mqtt connected");
mqttData = data;
setMqttStatus(true);
});
}, []);
return ( return (
<> <>
<div> <div>
@ -87,6 +72,9 @@ function App() {
</div> </div>
<p> MQTT getAudio stopAudio </p> <p> MQTT getAudio stopAudio </p>
<p> download Float32Array Audacity </p> <p> download Float32Array Audacity </p>
<p>MQTT {String(mqttStatus)}</p>
<p>{context.sampleRate}hz</p>
<p>16000hz</p>
<div className="card"> <div className="card">
<button onClick={getAudio}>getAudio</button> <button onClick={getAudio}>getAudio</button>
<button onClick={stopAudio}>stopAudio</button> <button onClick={stopAudio}>stopAudio</button>

63
src/utils/audio.ts Normal file
View File

@ -0,0 +1,63 @@
// 降低采样率,默认输入可能是 48000 或者 44100HZ微软要求是 16000HZ
// 代码复制于微软 SDK
export const downSampleAudioFrame = (
srcFrame: Float32Array,
srcRate: number,
dstRate: number
) => {
if (dstRate === srcRate || dstRate > srcRate) {
return srcFrame;
}
const ratio = srcRate / dstRate;
const dstLength = Math.round(srcFrame.length / ratio);
const dstFrame = new Float32Array(dstLength);
let srcOffset = 0;
let dstOffset = 0;
while (dstOffset < dstLength) {
const nextSrcOffset = Math.round((dstOffset + 1) * ratio);
let accum = 0;
let count = 0;
while (srcOffset < nextSrcOffset && srcOffset < srcFrame.length) {
accum += srcFrame[srcOffset++];
count++;
}
dstFrame[dstOffset++] = accum / count;
}
return dstFrame;
};
// 合并多个 Float32 的音频片段
export const getMergedPCMData = (buffers: Float32Array[]) => {
// 合并所有片段
let totalLength = 0;
for (const buffer of buffers) {
totalLength += buffer.length;
}
const mergedBuffer = new Float32Array(totalLength);
let offset = 0;
for (const buffer of buffers) {
mergedBuffer.set(buffer, offset);
offset += buffer.length;
}
// 清除存储的片段数据(可选)
// this.outputBuffer = [];
// 如果需要16位PCM可以在这里进行转换
return mergedBuffer;
};
// 转 Base64
export const arrayBufferToBase64 = (buffer: ArrayBuffer) => {
let binary = "";
const bytes = new Uint8Array(buffer);
const len = bytes.byteLength;
for (let i = 0; i < len; i++) {
binary += String.fromCharCode(bytes[i]);
}
return window.btoa(binary);
}

View File

@ -1,8 +1,8 @@
const context = new AudioContext(); export const context = new AudioContext();
let stream: MediaStream; let stream: MediaStream;
export async function createAudio(context: AudioContext, send: (buffer: any) => void) { export async function createAudio(context: AudioContext, send: (buffer: Float32Array) => void) {
if (stream) { if (stream) {
return; return;
} }
@ -22,12 +22,7 @@ export async function createAudio(context: AudioContext, send: (buffer: any) =>
}); });
node.port.onmessage = ({ data }) => { node.port.onmessage = ({ data }) => {
// a.send(data); send(data as Float32Array);
// const a = encode(data);
// buffers.push(a);
// console.log(data);
send(data);
}; };
// const oscillator = new OscillatorNode(context); // const oscillator = new OscillatorNode(context);
@ -45,27 +40,12 @@ export async function createAudio(context: AudioContext, send: (buffer: any) =>
context.resume(); context.resume();
} }
export const startAudio = (send: (buffer: any) => void) => { export const startAudio = (send: (buffer: Float32Array) => void) => {
createAudio(context, send); createAudio(context, send);
return context;
} }
export const stopAudio = () => { export const stopAudio = () => {
context.close(); context.close();
} }
function floatTo16BitPCM(view, offset, input) {
for (let i = 0; i < input.length; i++ , offset += 2) {
const s = Math.max(-1, Math.min(1, input[i]));
view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
}
}
export function encode(actualAudioFrame) {
const audioLength = actualAudioFrame.length * 2;
const buffer = new ArrayBuffer(audioLength);
const view = new DataView(buffer);
floatTo16BitPCM(view, 0, actualAudioFrame);
return buffer;
}