local multi language STT

This commit is contained in:
2026-03-26 19:38:54 +01:00
parent cc14b4d1b7
commit f2bf70bc7d
18 changed files with 1334 additions and 517 deletions

45
server/dist/index.js vendored
View File

@@ -16,7 +16,6 @@ import { generateAuthenticationOptions, generateRegistrationOptions, verifyAuthe
import Fastify from 'fastify';
import { Redis } from 'ioredis';
import { z } from 'zod';
import { SpeechTranscriber } from './speech-transcriber.js';
dotenv.config({ path: fileURLToPath(new URL('../../.env', import.meta.url)) });
const projectRootPath = fileURLToPath(new URL('../../', import.meta.url));
const registerSchema = z.object({
@@ -90,12 +89,6 @@ const signalMessageSchema = z.discriminatedUnion('type', [
z.object({
type: z.literal('ping'),
}),
z.object({
type: z.literal('speech-transcription'),
requestId: z.string().uuid(),
mimeType: z.string().trim().min(1).max(128),
audioBase64: z.string().min(1).max(32_000_000),
}),
]);
const app = Fastify({ logger: true, trustProxy: true });
const approvalAdminUsername = 'ladparis';
@@ -106,9 +99,6 @@ const frontendDistPath = resolveProjectPath(process.env.PRIVATECHAT_WEB_DIST_DIR
const ollamaServerUrl = (process.env.PRIVATECHAT_OLLAMA_URL ?? 'http://192.168.1.19:11434').replace(/\/+$/, '');
const ollamaImageModel = process.env.PRIVATECHAT_OLLAMA_IMAGE_MODEL ?? 'x/z-image-turbo:latest';
const ollamaImageSize = process.env.PRIVATECHAT_OLLAMA_IMAGE_SIZE ?? '1024x1024';
const speechTranscriptionServiceUrl = process.env.PRIVATECHAT_TRANSCRIPTION_WS_URL ?? 'wss://whisper.dubertrand.fr';
const speechTranscriptionLanguage = process.env.PRIVATECHAT_TRANSCRIPTION_LANGUAGE ?? 'auto';
const speechTranscriptionTimeoutMs = Number(process.env.PRIVATECHAT_TRANSCRIPTION_TIMEOUT_MS ?? 120_000);
const sessionTtlSeconds = Number(process.env.SESSION_TTL_SECONDS ?? 60 * 60 * 12);
const webAuthnChallengeTtlSeconds = Number(process.env.WEBAUTHN_CHALLENGE_TTL_SECONDS ?? 5 * 60);
const allowedCorsOrigins = parseAllowedOrigins(process.env.CORS_ORIGIN);
@@ -121,11 +111,6 @@ const frontendIndexPath = path.join(frontendDistPath, 'index.html');
const hasFrontendBuild = fs.existsSync(frontendIndexPath);
const convertOfficeDocument = promisify(libreOffice.convertWithOptions);
const execFileAsync = promisify(execFile);
const speechTranscriber = new SpeechTranscriber({
serviceUrl: speechTranscriptionServiceUrl,
language: speechTranscriptionLanguage,
requestTimeoutMs: speechTranscriptionTimeoutMs,
}, app.log);
fs.mkdirSync(path.dirname(sqlitePath), { recursive: true });
fs.mkdirSync(path.dirname(masterKeyPath), { recursive: true });
const encryptionKey = deriveEncryptionKey(loadOrCreateMasterKey(masterKeyPath));
@@ -782,25 +767,6 @@ async function handleSocketMessage(userId, sessionId, socket, rawMessage) {
}
return;
}
if (parsed.type === 'speech-transcription') {
try {
const text = await transcribeAudioPayload(parsed.requestId, parsed.audioBase64, parsed.mimeType);
send(socket, {
type: 'speech-transcribed',
requestId: parsed.requestId,
text,
});
}
catch (error) {
app.log.warn({ err: error, userId }, 'Speech transcription failed');
send(socket, {
type: 'speech-transcription-error',
requestId: parsed.requestId,
message: error instanceof Error ? error.message : 'Speech transcription failed.',
});
}
return;
}
let delivered = 0;
const recipientSockets = socketsByUserId.get(parsed.to);
if (recipientSockets) {
@@ -1257,23 +1223,12 @@ function parseClientMessage(rawMessage) {
prompt: parsed.data.prompt,
};
}
if (parsed.data.type === 'speech-transcription') {
return {
type: 'speech-transcription',
requestId: parsed.data.requestId,
mimeType: parsed.data.mimeType,
audioBase64: parsed.data.audioBase64,
};
}
return {
type: 'signal',
to: parsed.data.to,
signal: normalizeSignal(parsed.data.signal),
};
}
async function transcribeAudioPayload(requestId, audioBase64, mimeType) {
return await speechTranscriber.transcribe(requestId, audioBase64, mimeType);
}
async function generateImageFromPrompt(prompt) {
const abortController = new AbortController();
const timeoutId = setTimeout(() => abortController.abort(), 120_000);