Feat: 为微软 SDK 压缩音频到 16000 的比特率

This commit is contained in:
Paul 2024-06-03 17:57:55 +08:00
parent 745dab518f
commit b27241b7bc
4 changed files with 110 additions and 81 deletions

View File

@ -34,9 +34,7 @@
}
.card {
padding: 2em;
}
.read-the-docs {
color: #888;
gap: 1em;
display: flex;
justify-content: center;
}

View File

@ -1,49 +1,36 @@
import { useEffect, useRef, useState } from "react";
import reactLogo from "./assets/react.svg";
import viteLogo from "/vite.svg";
import "./App.css";
import { client, start } from "./utils/mqtt";
import { encode, startAudio, stopAudio } from "./utils/microphone";
import { startAudio, stopAudio, context } from "./utils/microphone";
import { arrayBufferToBase64, downSampleAudioFrame, getMergedPCMData } from "./utils/audio";
let mqttData;
let buffers: Float32Array[] = [];
start().then(([client, data]) => {
console.log("mqtt connected");
mqttData = data;
});
function arrayBufferToBase64(buffer) {
var binary = "";
var bytes = new Uint8Array(buffer);
var len = bytes.byteLength;
for (var i = 0; i < len; i++) {
binary += String.fromCharCode(bytes[i]);
}
return window.btoa(binary);
}
const getAudio = () => {
buffers = [];
startAudio((buffer) => {
// console.log(buffer);
// console.log(typeof buffer);
// const a = encode(buffer);
startAudio((float32Arr) => {
// 降比特率到 16000
const downedFloat32Arr = downSampleAudioFrame(float32Arr, context.sampleRate, 16000);
// 转 base64 打包给后端
const sendItem = arrayBufferToBase64(downedFloat32Arr.buffer);
// 存储一遍压缩后的音频
buffers.push(downedFloat32Arr);
const a = arrayBufferToBase64(buffer.buffer);
buffers.push(buffer);
// console.log(a);
client.publish(mqttData.recognition_topic, a);
client.publish(mqttData.recognition_topic, sendItem);
});
};
const onDownload = () => {
const data = getMergedPCMData();
if (buffers.length === 0) {
return;
}
const data = getMergedPCMData(buffers);
const blob = new Blob([data.buffer], { type: 'audio/wave' });
const a = document.createElement('a');
@ -52,29 +39,27 @@ const onDownload = () => {
a.click();
};
const getMergedPCMData = () => {
// 合并所有片段
let totalLength = 0;
for (const buffer of buffers) {
totalLength += buffer.length;
}
const mergedBuffer = new Float32Array(totalLength);
let offset = 0;
for (const buffer of buffers) {
mergedBuffer.set(buffer, offset);
offset += buffer.length;
}
// 清除存储的片段数据(可选)
// this.outputBuffer = [];
// 如果需要16位PCM可以在这里进行转换
return mergedBuffer;
};
function App() {
const loaded = useRef(false);
const [mqttStatus, setMqttStatus] = useState(false);
useEffect(() => {
if (loaded.current) {
return;
}
loaded.current = true;
// 启用 MQTT 并连接
start().then(([client, data]) => {
console.log("mqtt connected");
mqttData = data;
setMqttStatus(true);
});
}, []);
return (
<>
<div>
@ -87,6 +72,9 @@ function App() {
</div>
<p> MQTT getAudio stopAudio </p>
<p> download Float32Array Audacity </p>
<p>MQTT {String(mqttStatus)}</p>
<p>{context.sampleRate}hz</p>
<p>16000hz</p>
<div className="card">
<button onClick={getAudio}>getAudio</button>
<button onClick={stopAudio}>stopAudio</button>

63
src/utils/audio.ts Normal file
View File

@ -0,0 +1,63 @@
// 降低采样率,默认输入可能是 48000 或者 44100HZ微软要求是 16000HZ
// 代码复制于微软 SDK
export const downSampleAudioFrame = (
srcFrame: Float32Array,
srcRate: number,
dstRate: number
) => {
if (dstRate === srcRate || dstRate > srcRate) {
return srcFrame;
}
const ratio = srcRate / dstRate;
const dstLength = Math.round(srcFrame.length / ratio);
const dstFrame = new Float32Array(dstLength);
let srcOffset = 0;
let dstOffset = 0;
while (dstOffset < dstLength) {
const nextSrcOffset = Math.round((dstOffset + 1) * ratio);
let accum = 0;
let count = 0;
while (srcOffset < nextSrcOffset && srcOffset < srcFrame.length) {
accum += srcFrame[srcOffset++];
count++;
}
dstFrame[dstOffset++] = accum / count;
}
return dstFrame;
};
// 合并多个 Float32 的音频片段
export const getMergedPCMData = (buffers: Float32Array[]) => {
// 合并所有片段
let totalLength = 0;
for (const buffer of buffers) {
totalLength += buffer.length;
}
const mergedBuffer = new Float32Array(totalLength);
let offset = 0;
for (const buffer of buffers) {
mergedBuffer.set(buffer, offset);
offset += buffer.length;
}
// 清除存储的片段数据(可选)
// this.outputBuffer = [];
// 如果需要16位PCM可以在这里进行转换
return mergedBuffer;
};
// 转 Base64
export const arrayBufferToBase64 = (buffer: ArrayBuffer) => {
let binary = "";
const bytes = new Uint8Array(buffer);
const len = bytes.byteLength;
for (let i = 0; i < len; i++) {
binary += String.fromCharCode(bytes[i]);
}
return window.btoa(binary);
}

View File

@ -1,8 +1,8 @@
const context = new AudioContext();
export const context = new AudioContext();
let stream: MediaStream;
export async function createAudio(context: AudioContext, send: (buffer: any) => void) {
export async function createAudio(context: AudioContext, send: (buffer: Float32Array) => void) {
if (stream) {
return;
}
@ -22,12 +22,7 @@ export async function createAudio(context: AudioContext, send: (buffer: any) =>
});
node.port.onmessage = ({ data }) => {
// a.send(data);
// const a = encode(data);
// buffers.push(a);
// console.log(data);
send(data);
send(data as Float32Array);
};
// const oscillator = new OscillatorNode(context);
@ -45,27 +40,12 @@ export async function createAudio(context: AudioContext, send: (buffer: any) =>
context.resume();
}
export const startAudio = (send: (buffer: any) => void) => {
export const startAudio = (send: (buffer: Float32Array) => void) => {
createAudio(context, send);
return context;
}
export const stopAudio = () => {
context.close();
}
function floatTo16BitPCM(view, offset, input) {
for (let i = 0; i < input.length; i++ , offset += 2) {
const s = Math.max(-1, Math.min(1, input[i]));
view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
}
}
export function encode(actualAudioFrame) {
const audioLength = actualAudioFrame.length * 2;
const buffer = new ArrayBuffer(audioLength);
const view = new DataView(buffer);
floatTo16BitPCM(view, 0, actualAudioFrame);
return buffer;
}