Files
PrivateChat/server/dist/speech-transcriber.js

125 lines
4.6 KiB
JavaScript
Raw Normal View History

2026-03-11 03:08:27 +01:00
import WebSocket from 'ws';
export class SpeechTranscriber {
options;
logger;
constructor(options, logger) {
this.options = options;
this.logger = logger;
}
async transcribe(requestId, audioBase64, mimeType) {
const audio = this.normalizeAudioPayload(audioBase64, mimeType);
return await new Promise((resolve, reject) => {
let settled = false;
const socket = new WebSocket(this.options.serviceUrl);
const finish = (handler) => {
if (settled) {
return;
}
settled = true;
clearTimeout(timeout);
socket.removeAllListeners();
if (socket.readyState === WebSocket.CONNECTING || socket.readyState === WebSocket.OPEN) {
socket.close();
}
handler();
};
const timeout = setTimeout(() => {
finish(() => {
reject(new Error(`The transcription service timed out after ${this.options.requestTimeoutMs}ms.`));
});
}, this.options.requestTimeoutMs);
socket.on('open', () => {
try {
socket.send(JSON.stringify({
type: 'transcribe',
id: requestId,
language: this.options.language,
audio,
}));
}
catch (error) {
finish(() => {
reject(error instanceof Error ? error : new Error('Could not send transcription request.'));
});
}
});
socket.on('message', (payload) => {
const event = this.parseEvent(payload);
if (!event) {
return;
}
if (event.id && event.id !== requestId) {
this.logger.warn({ requestId, event }, 'Ignored transcription event for another request');
return;
}
if (event.type === 'start') {
this.logger.info({ requestId, model: event.model, language: event.language }, 'Speech transcription started');
return;
}
if (event.type === 'delta') {
return;
}
if (event.type === 'done') {
finish(() => {
resolve(event.text.trim());
});
return;
}
finish(() => {
reject(new Error(event.message));
});
});
socket.on('error', (error) => {
finish(() => {
reject(error instanceof Error ? error : new Error('The transcription service connection failed.'));
});
});
socket.on('close', (code, reasonBuffer) => {
if (settled) {
return;
}
const reason = reasonBuffer.toString().trim();
const detail = reason
? `The transcription service closed the connection unexpectedly (code=${code}, reason=${reason}).`
: `The transcription service closed the connection unexpectedly (code=${code}).`;
finish(() => {
reject(new Error(detail));
});
});
});
}
normalizeAudioPayload(audioBase64, mimeType) {
const trimmedAudio = audioBase64.trim();
if (trimmedAudio.startsWith('data:')) {
return trimmedAudio;
}
const normalizedMimeType = mimeType.trim() || 'audio/webm';
return `data:${normalizedMimeType};base64,${trimmedAudio}`;
}
parseEvent(payload) {
const message = this.rawDataToString(payload).trim();
if (!message) {
return null;
}
try {
return JSON.parse(message);
}
catch {
this.logger.warn({ transcriptionPayload: message }, 'Ignored non-JSON transcription event');
return null;
}
}
rawDataToString(payload) {
if (typeof payload === 'string') {
return payload;
}
if (payload instanceof ArrayBuffer) {
return Buffer.from(payload).toString('utf8');
}
if (Array.isArray(payload)) {
return Buffer.concat(payload).toString('utf8');
}
return payload.toString('utf8');
}
}