Dictation through AI
This commit is contained in:
82
server/dist/index.js
vendored
82
server/dist/index.js
vendored
@@ -1,5 +1,7 @@
|
||||
import crypto from 'node:crypto';
|
||||
import fs from 'node:fs';
|
||||
import fsPromises from 'node:fs/promises';
|
||||
import os from 'node:os';
|
||||
import path from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { TextEncoder } from 'node:util';
|
||||
@@ -13,6 +15,7 @@ import { generateAuthenticationOptions, generateRegistrationOptions, verifyAuthe
|
||||
import Fastify from 'fastify';
|
||||
import { Redis } from 'ioredis';
|
||||
import { z } from 'zod';
|
||||
import { WhisperTranscriber } from './whisper-transcriber.js';
|
||||
dotenv.config({ path: fileURLToPath(new URL('../../.env', import.meta.url)) });
|
||||
const projectRootPath = fileURLToPath(new URL('../../', import.meta.url));
|
||||
const registerSchema = z.object({
|
||||
@@ -81,6 +84,12 @@ const signalMessageSchema = z.discriminatedUnion('type', [
|
||||
z.object({
|
||||
type: z.literal('ping'),
|
||||
}),
|
||||
z.object({
|
||||
type: z.literal('speech-transcription'),
|
||||
requestId: z.string().uuid(),
|
||||
mimeType: z.string().trim().min(1).max(128),
|
||||
audioBase64: z.string().min(1).max(32_000_000),
|
||||
}),
|
||||
]);
|
||||
const app = Fastify({ logger: true, trustProxy: true });
|
||||
const approvalAdminUsername = 'ladparis';
|
||||
@@ -91,6 +100,11 @@ const frontendDistPath = resolveProjectPath(process.env.PRIVATECHAT_WEB_DIST_DIR
|
||||
const ollamaServerUrl = (process.env.PRIVATECHAT_OLLAMA_URL ?? 'http://192.168.1.19:11434').replace(/\/+$/, '');
|
||||
const ollamaImageModel = process.env.PRIVATECHAT_OLLAMA_IMAGE_MODEL ?? 'x/z-image-turbo:latest';
|
||||
const ollamaImageSize = process.env.PRIVATECHAT_OLLAMA_IMAGE_SIZE ?? '1024x1024';
|
||||
const whisperPythonExecutable = process.env.PRIVATECHAT_WHISPER_PYTHON ?? 'python3';
|
||||
const whisperModel = process.env.PRIVATECHAT_WHISPER_MODEL ?? 'small';
|
||||
const whisperDevice = process.env.PRIVATECHAT_WHISPER_DEVICE ?? 'cpu';
|
||||
const whisperComputeType = process.env.PRIVATECHAT_WHISPER_COMPUTE_TYPE ?? 'int8';
|
||||
const whisperScriptPath = resolveProjectPath('server/scripts/transcribe_whisper.py');
|
||||
const sessionTtlSeconds = Number(process.env.SESSION_TTL_SECONDS ?? 60 * 60 * 12);
|
||||
const webAuthnChallengeTtlSeconds = Number(process.env.WEBAUTHN_CHALLENGE_TTL_SECONDS ?? 5 * 60);
|
||||
const allowedCorsOrigins = parseAllowedOrigins(process.env.CORS_ORIGIN);
|
||||
@@ -101,6 +115,13 @@ const webAuthnRpName = process.env.WEBAUTHN_RP_NAME ?? 'PrivateChat';
|
||||
const webAuthnUserVerification = resolveWebAuthnUserVerification(process.env.WEBAUTHN_USER_VERIFICATION);
|
||||
const frontendIndexPath = path.join(frontendDistPath, 'index.html');
|
||||
const hasFrontendBuild = fs.existsSync(frontendIndexPath);
|
||||
const whisperTranscriber = new WhisperTranscriber({
|
||||
pythonExecutable: whisperPythonExecutable,
|
||||
scriptPath: whisperScriptPath,
|
||||
model: whisperModel,
|
||||
device: whisperDevice,
|
||||
computeType: whisperComputeType,
|
||||
}, app.log);
|
||||
fs.mkdirSync(path.dirname(sqlitePath), { recursive: true });
|
||||
fs.mkdirSync(path.dirname(masterKeyPath), { recursive: true });
|
||||
const encryptionKey = deriveEncryptionKey(loadOrCreateMasterKey(masterKeyPath));
|
||||
@@ -719,6 +740,25 @@ async function handleSocketMessage(userId, sessionId, socket, rawMessage) {
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (parsed.type === 'speech-transcription') {
|
||||
try {
|
||||
const text = await transcribeAudioPayload(parsed.requestId, parsed.audioBase64, parsed.mimeType);
|
||||
send(socket, {
|
||||
type: 'speech-transcribed',
|
||||
requestId: parsed.requestId,
|
||||
text,
|
||||
});
|
||||
}
|
||||
catch (error) {
|
||||
app.log.warn({ err: error, userId }, 'Whisper transcription failed');
|
||||
send(socket, {
|
||||
type: 'speech-transcription-error',
|
||||
requestId: parsed.requestId,
|
||||
message: error instanceof Error ? error.message : 'Speech transcription failed.',
|
||||
});
|
||||
}
|
||||
return;
|
||||
}
|
||||
let delivered = 0;
|
||||
const recipientSockets = socketsByUserId.get(parsed.to);
|
||||
if (recipientSockets) {
|
||||
@@ -1095,12 +1135,54 @@ function parseClientMessage(rawMessage) {
|
||||
prompt: parsed.data.prompt,
|
||||
};
|
||||
}
|
||||
if (parsed.data.type === 'speech-transcription') {
|
||||
return {
|
||||
type: 'speech-transcription',
|
||||
requestId: parsed.data.requestId,
|
||||
mimeType: parsed.data.mimeType,
|
||||
audioBase64: parsed.data.audioBase64,
|
||||
};
|
||||
}
|
||||
return {
|
||||
type: 'signal',
|
||||
to: parsed.data.to,
|
||||
signal: normalizeSignal(parsed.data.signal),
|
||||
};
|
||||
}
|
||||
async function transcribeAudioPayload(requestId, audioBase64, mimeType) {
|
||||
const tempDirectory = await fsPromises.mkdtemp(path.join(os.tmpdir(), 'privatechat-whisper-'));
|
||||
const extension = audioExtensionForMimeType(mimeType);
|
||||
const audioPath = path.join(tempDirectory, `dictation-${requestId}.${extension}`);
|
||||
try {
|
||||
await fsPromises.writeFile(audioPath, Buffer.from(audioBase64, 'base64'));
|
||||
return await whisperTranscriber.transcribe(requestId, audioPath);
|
||||
}
|
||||
finally {
|
||||
await fsPromises.rm(tempDirectory, { recursive: true, force: true });
|
||||
}
|
||||
}
|
||||
function audioExtensionForMimeType(mimeType) {
|
||||
switch (mimeType.toLowerCase()) {
|
||||
case 'audio/webm':
|
||||
case 'audio/webm;codecs=opus':
|
||||
return 'webm';
|
||||
case 'audio/ogg':
|
||||
case 'audio/ogg;codecs=opus':
|
||||
return 'ogg';
|
||||
case 'audio/mp4':
|
||||
case 'audio/m4a':
|
||||
return 'm4a';
|
||||
case 'audio/mpeg':
|
||||
case 'audio/mp3':
|
||||
return 'mp3';
|
||||
case 'audio/wav':
|
||||
case 'audio/wave':
|
||||
case 'audio/x-wav':
|
||||
return 'wav';
|
||||
default:
|
||||
return 'webm';
|
||||
}
|
||||
}
|
||||
async function generateImageFromPrompt(prompt) {
|
||||
const abortController = new AbortController();
|
||||
const timeoutId = setTimeout(() => abortController.abort(), 120_000);
|
||||
|
||||
Reference in New Issue
Block a user