Files
PrivateChat/server/scripts/transcribe_whisper.py

93 lines
2.5 KiB
Python

#!/usr/bin/env python3
import argparse
import json
import sys
def emit(payload):
print(json.dumps(payload), flush=True)
def load_model(model_name, device, compute_type):
try:
from faster_whisper import WhisperModel
except Exception as exc:
emit(
{
"type": "fatal",
"message": "faster-whisper is not installed. Run `python3 -m pip install -r server/requirements-whisper.txt`.",
}
)
raise SystemExit(1) from exc
try:
return WhisperModel(model_name, device=device, compute_type=compute_type)
except Exception as exc:
emit(
{
"type": "fatal",
"message": f"Could not load the faster-whisper model '{model_name}': {exc}",
}
)
raise SystemExit(1) from exc
def transcribe(model, request_id, audio_path):
try:
segments, _ = model.transcribe(audio_path, vad_filter=True, beam_size=5)
text = "".join(segment.text for segment in segments).strip()
emit({"type": "result", "requestId": request_id, "text": text})
except Exception as exc:
emit(
{
"type": "error",
"requestId": request_id,
"message": f"Whisper transcription failed: {exc}",
}
)
def main():
parser = argparse.ArgumentParser(description="Persistent faster-whisper transcription worker")
parser.add_argument("--model", default="small")
parser.add_argument("--device", default="cpu")
parser.add_argument("--compute-type", default="int8")
args = parser.parse_args()
model = load_model(args.model, args.device, args.compute_type)
emit({"type": "ready", "model": args.model})
for raw_line in sys.stdin:
line = raw_line.strip()
if not line:
continue
try:
payload = json.loads(line)
except Exception as exc:
emit({"type": "error", "message": f"Invalid request JSON: {exc}"})
continue
request_id = payload.get("requestId")
audio_path = payload.get("audioPath")
if not request_id or not audio_path:
emit(
{
"type": "error",
"requestId": request_id,
"message": "Missing requestId or audioPath.",
}
)
continue
transcribe(model, request_id, audio_path)
return 0
if __name__ == "__main__":
raise SystemExit(main())