125 lines
4.6 KiB
JavaScript
125 lines
4.6 KiB
JavaScript
|
|
import WebSocket from 'ws';
|
||
|
|
export class SpeechTranscriber {
|
||
|
|
options;
|
||
|
|
logger;
|
||
|
|
constructor(options, logger) {
|
||
|
|
this.options = options;
|
||
|
|
this.logger = logger;
|
||
|
|
}
|
||
|
|
async transcribe(requestId, audioBase64, mimeType) {
|
||
|
|
const audio = this.normalizeAudioPayload(audioBase64, mimeType);
|
||
|
|
return await new Promise((resolve, reject) => {
|
||
|
|
let settled = false;
|
||
|
|
const socket = new WebSocket(this.options.serviceUrl);
|
||
|
|
const finish = (handler) => {
|
||
|
|
if (settled) {
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
settled = true;
|
||
|
|
clearTimeout(timeout);
|
||
|
|
socket.removeAllListeners();
|
||
|
|
if (socket.readyState === WebSocket.CONNECTING || socket.readyState === WebSocket.OPEN) {
|
||
|
|
socket.close();
|
||
|
|
}
|
||
|
|
handler();
|
||
|
|
};
|
||
|
|
const timeout = setTimeout(() => {
|
||
|
|
finish(() => {
|
||
|
|
reject(new Error(`The transcription service timed out after ${this.options.requestTimeoutMs}ms.`));
|
||
|
|
});
|
||
|
|
}, this.options.requestTimeoutMs);
|
||
|
|
socket.on('open', () => {
|
||
|
|
try {
|
||
|
|
socket.send(JSON.stringify({
|
||
|
|
type: 'transcribe',
|
||
|
|
id: requestId,
|
||
|
|
language: this.options.language,
|
||
|
|
audio,
|
||
|
|
}));
|
||
|
|
}
|
||
|
|
catch (error) {
|
||
|
|
finish(() => {
|
||
|
|
reject(error instanceof Error ? error : new Error('Could not send transcription request.'));
|
||
|
|
});
|
||
|
|
}
|
||
|
|
});
|
||
|
|
socket.on('message', (payload) => {
|
||
|
|
const event = this.parseEvent(payload);
|
||
|
|
if (!event) {
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
if (event.id && event.id !== requestId) {
|
||
|
|
this.logger.warn({ requestId, event }, 'Ignored transcription event for another request');
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
if (event.type === 'start') {
|
||
|
|
this.logger.info({ requestId, model: event.model, language: event.language }, 'Speech transcription started');
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
if (event.type === 'delta') {
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
if (event.type === 'done') {
|
||
|
|
finish(() => {
|
||
|
|
resolve(event.text.trim());
|
||
|
|
});
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
finish(() => {
|
||
|
|
reject(new Error(event.message));
|
||
|
|
});
|
||
|
|
});
|
||
|
|
socket.on('error', (error) => {
|
||
|
|
finish(() => {
|
||
|
|
reject(error instanceof Error ? error : new Error('The transcription service connection failed.'));
|
||
|
|
});
|
||
|
|
});
|
||
|
|
socket.on('close', (code, reasonBuffer) => {
|
||
|
|
if (settled) {
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
const reason = reasonBuffer.toString().trim();
|
||
|
|
const detail = reason
|
||
|
|
? `The transcription service closed the connection unexpectedly (code=${code}, reason=${reason}).`
|
||
|
|
: `The transcription service closed the connection unexpectedly (code=${code}).`;
|
||
|
|
finish(() => {
|
||
|
|
reject(new Error(detail));
|
||
|
|
});
|
||
|
|
});
|
||
|
|
});
|
||
|
|
}
|
||
|
|
normalizeAudioPayload(audioBase64, mimeType) {
|
||
|
|
const trimmedAudio = audioBase64.trim();
|
||
|
|
if (trimmedAudio.startsWith('data:')) {
|
||
|
|
return trimmedAudio;
|
||
|
|
}
|
||
|
|
const normalizedMimeType = mimeType.trim() || 'audio/webm';
|
||
|
|
return `data:${normalizedMimeType};base64,${trimmedAudio}`;
|
||
|
|
}
|
||
|
|
parseEvent(payload) {
|
||
|
|
const message = this.rawDataToString(payload).trim();
|
||
|
|
if (!message) {
|
||
|
|
return null;
|
||
|
|
}
|
||
|
|
try {
|
||
|
|
return JSON.parse(message);
|
||
|
|
}
|
||
|
|
catch {
|
||
|
|
this.logger.warn({ transcriptionPayload: message }, 'Ignored non-JSON transcription event');
|
||
|
|
return null;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
rawDataToString(payload) {
|
||
|
|
if (typeof payload === 'string') {
|
||
|
|
return payload;
|
||
|
|
}
|
||
|
|
if (payload instanceof ArrayBuffer) {
|
||
|
|
return Buffer.from(payload).toString('utf8');
|
||
|
|
}
|
||
|
|
if (Array.isArray(payload)) {
|
||
|
|
return Buffer.concat(payload).toString('utf8');
|
||
|
|
}
|
||
|
|
return payload.toString('utf8');
|
||
|
|
}
|
||
|
|
}
|