Dictation through AI

This commit is contained in:
2026-03-11 00:26:49 +01:00
parent d2c4152ea7
commit 189f989c0d
12 changed files with 911 additions and 2 deletions

View File

@@ -1,5 +1,5 @@
import { CommonModule } from '@angular/common';
import { Component, computed, effect, ElementRef, inject, OnDestroy, signal, ViewChild } from '@angular/core';
import { Component, computed, effect, ElementRef, inject, NgZone, OnDestroy, signal, ViewChild } from '@angular/core';
import { toSignal } from '@angular/core/rxjs-interop';
import { FormsModule } from '@angular/forms';
import { ActivatedRoute, Router, RouterLink } from '@angular/router';
@@ -18,6 +18,7 @@ import type { ChatEntry, ConnectionState, PeerSummary } from './models';
export class ChatPageComponent implements OnDestroy {
private readonly route = inject(ActivatedRoute);
private readonly router = inject(Router);
private readonly ngZone = inject(NgZone);
private readonly routeParamMap = toSignal(this.route.paramMap, {
initialValue: this.route.snapshot.paramMap,
});
@@ -28,6 +29,14 @@ export class ChatPageComponent implements OnDestroy {
private voiceChunks: Blob[] = [];
private discardRecordedVoice = false;
private recordingPeerId: string | null = null;
private dictationRecorder: MediaRecorder | null = null;
private dictationStream: MediaStream | null = null;
private dictationChunks: Blob[] = [];
private dictationBaseText = '';
private discardRecordedDictation = false;
private dictationCompletionPromise: Promise<void> | null = null;
private resolveDictationCompletion: (() => void) | null = null;
private dictationApplyToken = 0;
@ViewChild('callAudioElement')
set callAudioElementRef(value: ElementRef<HTMLAudioElement> | undefined) {
this.callAudioElement = value;
@@ -39,6 +48,8 @@ export class ChatPageComponent implements OnDestroy {
readonly forwardingEntryId = signal<string | null>(null);
readonly emojiPickerOpen = signal(false);
readonly isRecordingVoice = signal(false);
readonly isDictating = signal(false);
readonly isTranscribingDictation = signal(false);
readonly emojiOptions = [
'😀', '😁', '😂', '🤣', '😊',
'😉', '😍', '😘', '😎', '🤔',
@@ -152,6 +163,7 @@ export class ChatPageComponent implements OnDestroy {
}
ngOnDestroy(): void {
void this.stopDictation(true);
this.stopVoiceRecording(true);
this.detachCallAudioSource();
}
@@ -174,6 +186,7 @@ export class ChatPageComponent implements OnDestroy {
return;
}
await this.stopDictation(false);
await this.session.sendText(peerId, this.messageText);
this.messageText = '';
this.emojiPickerOpen.set(false);
@@ -188,6 +201,7 @@ export class ChatPageComponent implements OnDestroy {
return;
}
await this.stopDictation(false);
const requested = await this.session.requestGeneratedImage(peerId, this.messageText);
if (!requested) {
@@ -262,6 +276,92 @@ export class ChatPageComponent implements OnDestroy {
input.value = '';
}
async toggleDictation(textarea: HTMLTextAreaElement): Promise<void> {
if (this.isDictating()) {
await this.stopDictation(false);
return;
}
if (this.isTranscribingDictation()) {
return;
}
const peerId = this.peerId();
if (!peerId) {
return;
}
if (typeof MediaRecorder === 'undefined' || typeof navigator === 'undefined') {
this.session.error.set('This browser does not support dictation recording.');
return;
}
if (typeof navigator.mediaDevices?.getUserMedia !== 'function') {
this.session.error.set('This browser cannot access the microphone for dictation.');
return;
}
this.dictationBaseText = this.messageText;
this.discardRecordedDictation = false;
this.dictationApplyToken += 1;
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
const preferredMimeType = this.preferredVoiceMimeType();
const recorder = preferredMimeType
? new MediaRecorder(stream, { mimeType: preferredMimeType })
: new MediaRecorder(stream);
const applyToken = this.dictationApplyToken;
this.dictationChunks = [];
this.dictationStream = stream;
this.dictationRecorder = recorder;
this.dictationCompletionPromise = new Promise<void>((resolve) => {
this.resolveDictationCompletion = resolve;
});
recorder.ondataavailable = (event) => {
if (event.data.size > 0) {
this.dictationChunks.push(event.data);
}
};
recorder.onerror = () => {
this.ngZone.run(() => {
this.session.error.set('Could not record dictation audio.');
this.cleanupDictationRecorder();
this.finishDictationCompletion();
});
};
recorder.onstop = () => {
const shouldDiscard = this.discardRecordedDictation;
const mimeType = recorder.mimeType || preferredMimeType || 'audio/webm';
const blob = new Blob(this.dictationChunks, { type: mimeType });
this.ngZone.run(() => {
this.cleanupDictationRecorder();
if (shouldDiscard || blob.size === 0) {
this.finishDictationCompletion();
return;
}
this.isTranscribingDictation.set(true);
void this.transcribeDictation(blob, textarea, applyToken);
});
};
recorder.start();
this.isDictating.set(true);
this.session.error.set(null);
} catch {
this.session.error.set('Could not start dictation recording.');
this.cleanupDictationRecorder();
this.finishDictationCompletion();
}
}
async toggleVoiceRecording(): Promise<void> {
if (this.isRecordingVoice()) {
this.stopVoiceRecording(false);
@@ -482,6 +582,7 @@ export class ChatPageComponent implements OnDestroy {
return;
}
await this.stopDictation(true);
this.stopVoiceRecording(true);
this.forwardingEntryId.set(null);
this.emojiPickerOpen.set(false);
@@ -532,6 +633,106 @@ export class ChatPageComponent implements OnDestroy {
return candidates.find((candidate) => MediaRecorder.isTypeSupported(candidate)) ?? '';
}
private async stopDictation(discard: boolean): Promise<void> {
const completion = this.dictationCompletionPromise;
if (discard) {
this.dictationApplyToken += 1;
this.messageText = this.dictationBaseText || this.messageText;
this.handleMessageTextChange(this.messageText);
this.isTranscribingDictation.set(false);
} else {
this.dictationBaseText = this.messageText;
}
if (this.dictationRecorder) {
this.discardRecordedDictation = discard;
if (this.dictationRecorder.state !== 'inactive') {
this.dictationRecorder.stop();
} else {
this.cleanupDictationRecorder();
this.finishDictationCompletion();
}
} else if (!completion) {
this.dictationBaseText = '';
}
if (completion) {
await completion;
}
}
private cleanupDictationRecorder(): void {
if (this.dictationStream) {
for (const track of this.dictationStream.getTracks()) {
track.stop();
}
}
this.dictationRecorder = null;
this.dictationStream = null;
this.dictationChunks = [];
this.discardRecordedDictation = false;
this.isDictating.set(false);
}
private finishDictationCompletion(): void {
this.resolveDictationCompletion?.();
this.resolveDictationCompletion = null;
this.dictationCompletionPromise = null;
this.dictationBaseText = '';
}
private async transcribeDictation(blob: Blob, textarea: HTMLTextAreaElement, applyToken: number): Promise<void> {
try {
const transcript = await this.session.requestSpeechTranscription(blob);
if (applyToken !== this.dictationApplyToken) {
return;
}
this.applyDictatedText(this.mergeDictatedText(this.dictationBaseText, transcript), textarea);
} catch {
if (applyToken === this.dictationApplyToken) {
this.session.error.set('Dictation transcription failed.');
}
} finally {
if (applyToken === this.dictationApplyToken) {
this.isTranscribingDictation.set(false);
}
this.finishDictationCompletion();
}
}
private mergeDictatedText(baseText: string, transcript: string): string {
const trimmedTranscript = transcript.trim();
if (!trimmedTranscript) {
return baseText;
}
if (!baseText.trim()) {
return trimmedTranscript;
}
return `${baseText.trimEnd()} ${trimmedTranscript}`;
}
private applyDictatedText(text: string, textarea: HTMLTextAreaElement): void {
this.messageText = text;
textarea.value = text;
this.composerSelectionStart = text.length;
this.composerSelectionEnd = text.length;
this.handleMessageTextChange(text);
queueMicrotask(() => {
textarea.focus();
textarea.setSelectionRange(text.length, text.length);
});
}
private syncCallAudioSource(): void {
const audio = this.callAudioElement?.nativeElement;