Dictation through AI

2026-03-11 00:26:49 +01:00
parent d2c4152ea7
commit 189f989c0d
12 changed files with 911 additions and 2 deletions
--- a/client/src/app/chat-page.component.ts
+++ b/client/src/app/chat-page.component.ts
@@ -1,5 +1,5 @@
 import { CommonModule } from '@angular/common';
-import { Component, computed, effect, ElementRef, inject, OnDestroy, signal, ViewChild } from '@angular/core';
+import { Component, computed, effect, ElementRef, inject, NgZone, OnDestroy, signal, ViewChild } from '@angular/core';
 import { toSignal } from '@angular/core/rxjs-interop';
 import { FormsModule } from '@angular/forms';
 import { ActivatedRoute, Router, RouterLink } from '@angular/router';
@@ -18,6 +18,7 @@ import type { ChatEntry, ConnectionState, PeerSummary } from './models';
 export class ChatPageComponent implements OnDestroy {
  private readonly route = inject(ActivatedRoute);
  private readonly router = inject(Router);
+  private readonly ngZone = inject(NgZone);
  private readonly routeParamMap = toSignal(this.route.paramMap, {
    initialValue: this.route.snapshot.paramMap,
  });
@@ -28,6 +29,14 @@ export class ChatPageComponent implements OnDestroy {
  private voiceChunks: Blob[] = [];
  private discardRecordedVoice = false;
  private recordingPeerId: string | null = null;
+  private dictationRecorder: MediaRecorder | null = null;
+  private dictationStream: MediaStream | null = null;
+  private dictationChunks: Blob[] = [];
+  private dictationBaseText = '';
+  private discardRecordedDictation = false;
+  private dictationCompletionPromise: Promise<void> | null = null;
+  private resolveDictationCompletion: (() => void) | null = null;
+  private dictationApplyToken = 0;
  @ViewChild('callAudioElement')
  set callAudioElementRef(value: ElementRef<HTMLAudioElement> | undefined) {
    this.callAudioElement = value;
@@ -39,6 +48,8 @@ export class ChatPageComponent implements OnDestroy {
  readonly forwardingEntryId = signal<string | null>(null);
  readonly emojiPickerOpen = signal(false);
  readonly isRecordingVoice = signal(false);
+  readonly isDictating = signal(false);
+  readonly isTranscribingDictation = signal(false);
  readonly emojiOptions = [
    '😀', '😁', '😂', '🤣', '😊',
    '😉', '😍', '😘', '😎', '🤔',
@@ -152,6 +163,7 @@ export class ChatPageComponent implements OnDestroy {
  }

  ngOnDestroy(): void {
+    void this.stopDictation(true);
    this.stopVoiceRecording(true);
    this.detachCallAudioSource();
  }
@@ -174,6 +186,7 @@ export class ChatPageComponent implements OnDestroy {
      return;
    }

+    await this.stopDictation(false);
    await this.session.sendText(peerId, this.messageText);
    this.messageText = '';
    this.emojiPickerOpen.set(false);
@@ -188,6 +201,7 @@ export class ChatPageComponent implements OnDestroy {
      return;
    }

+    await this.stopDictation(false);
    const requested = await this.session.requestGeneratedImage(peerId, this.messageText);

    if (!requested) {
@@ -262,6 +276,92 @@ export class ChatPageComponent implements OnDestroy {
    input.value = '';
  }

+  async toggleDictation(textarea: HTMLTextAreaElement): Promise<void> {
+    if (this.isDictating()) {
+      await this.stopDictation(false);
+      return;
+    }
+
+    if (this.isTranscribingDictation()) {
+      return;
+    }
+    const peerId = this.peerId();
+
+    if (!peerId) {
+      return;
+    }
+
+    if (typeof MediaRecorder === 'undefined' || typeof navigator === 'undefined') {
+      this.session.error.set('This browser does not support dictation recording.');
+      return;
+    }
+
+    if (typeof navigator.mediaDevices?.getUserMedia !== 'function') {
+      this.session.error.set('This browser cannot access the microphone for dictation.');
+      return;
+    }
+
+    this.dictationBaseText = this.messageText;
+    this.discardRecordedDictation = false;
+    this.dictationApplyToken += 1;
+
+    try {
+      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+      const preferredMimeType = this.preferredVoiceMimeType();
+      const recorder = preferredMimeType
+        ? new MediaRecorder(stream, { mimeType: preferredMimeType })
+        : new MediaRecorder(stream);
+      const applyToken = this.dictationApplyToken;
+
+      this.dictationChunks = [];
+      this.dictationStream = stream;
+      this.dictationRecorder = recorder;
+      this.dictationCompletionPromise = new Promise<void>((resolve) => {
+        this.resolveDictationCompletion = resolve;
+      });
+
+      recorder.ondataavailable = (event) => {
+        if (event.data.size > 0) {
+          this.dictationChunks.push(event.data);
+        }
+      };
+
+      recorder.onerror = () => {
+        this.ngZone.run(() => {
+          this.session.error.set('Could not record dictation audio.');
+          this.cleanupDictationRecorder();
+          this.finishDictationCompletion();
+        });
+      };
+
+      recorder.onstop = () => {
+        const shouldDiscard = this.discardRecordedDictation;
+        const mimeType = recorder.mimeType || preferredMimeType || 'audio/webm';
+        const blob = new Blob(this.dictationChunks, { type: mimeType });
+
+        this.ngZone.run(() => {
+          this.cleanupDictationRecorder();
+
+          if (shouldDiscard || blob.size === 0) {
+            this.finishDictationCompletion();
+            return;
+          }
+
+          this.isTranscribingDictation.set(true);
+          void this.transcribeDictation(blob, textarea, applyToken);
+        });
+      };
+
+      recorder.start();
+      this.isDictating.set(true);
+      this.session.error.set(null);
+    } catch {
+      this.session.error.set('Could not start dictation recording.');
+      this.cleanupDictationRecorder();
+      this.finishDictationCompletion();
+    }
+  }
+
  async toggleVoiceRecording(): Promise<void> {
    if (this.isRecordingVoice()) {
      this.stopVoiceRecording(false);
@@ -482,6 +582,7 @@ export class ChatPageComponent implements OnDestroy {
      return;
    }

+    await this.stopDictation(true);
    this.stopVoiceRecording(true);
    this.forwardingEntryId.set(null);
    this.emojiPickerOpen.set(false);
@@ -532,6 +633,106 @@ export class ChatPageComponent implements OnDestroy {
    return candidates.find((candidate) => MediaRecorder.isTypeSupported(candidate)) ?? '';
  }

+  private async stopDictation(discard: boolean): Promise<void> {
+    const completion = this.dictationCompletionPromise;
+
+    if (discard) {
+      this.dictationApplyToken += 1;
+      this.messageText = this.dictationBaseText || this.messageText;
+      this.handleMessageTextChange(this.messageText);
+      this.isTranscribingDictation.set(false);
+    } else {
+      this.dictationBaseText = this.messageText;
+    }
+
+    if (this.dictationRecorder) {
+      this.discardRecordedDictation = discard;
+
+      if (this.dictationRecorder.state !== 'inactive') {
+        this.dictationRecorder.stop();
+      } else {
+        this.cleanupDictationRecorder();
+        this.finishDictationCompletion();
+      }
+    } else if (!completion) {
+      this.dictationBaseText = '';
+    }
+
+    if (completion) {
+      await completion;
+    }
+  }
+
+  private cleanupDictationRecorder(): void {
+    if (this.dictationStream) {
+      for (const track of this.dictationStream.getTracks()) {
+        track.stop();
+      }
+    }
+
+    this.dictationRecorder = null;
+    this.dictationStream = null;
+    this.dictationChunks = [];
+    this.discardRecordedDictation = false;
+    this.isDictating.set(false);
+  }
+
+  private finishDictationCompletion(): void {
+    this.resolveDictationCompletion?.();
+    this.resolveDictationCompletion = null;
+    this.dictationCompletionPromise = null;
+    this.dictationBaseText = '';
+  }
+
+  private async transcribeDictation(blob: Blob, textarea: HTMLTextAreaElement, applyToken: number): Promise<void> {
+    try {
+      const transcript = await this.session.requestSpeechTranscription(blob);
+
+      if (applyToken !== this.dictationApplyToken) {
+        return;
+      }
+
+      this.applyDictatedText(this.mergeDictatedText(this.dictationBaseText, transcript), textarea);
+    } catch {
+      if (applyToken === this.dictationApplyToken) {
+        this.session.error.set('Dictation transcription failed.');
+      }
+    } finally {
+      if (applyToken === this.dictationApplyToken) {
+        this.isTranscribingDictation.set(false);
+      }
+
+      this.finishDictationCompletion();
+    }
+  }
+
+  private mergeDictatedText(baseText: string, transcript: string): string {
+    const trimmedTranscript = transcript.trim();
+
+    if (!trimmedTranscript) {
+      return baseText;
+    }
+
+    if (!baseText.trim()) {
+      return trimmedTranscript;
+    }
+
+    return `${baseText.trimEnd()} ${trimmedTranscript}`;
+  }
+
+  private applyDictatedText(text: string, textarea: HTMLTextAreaElement): void {
+    this.messageText = text;
+    textarea.value = text;
+    this.composerSelectionStart = text.length;
+    this.composerSelectionEnd = text.length;
+    this.handleMessageTextChange(text);
+
+    queueMicrotask(() => {
+      textarea.focus();
+      textarea.setSelectionRange(text.length, text.length);
+    });
+  }
+
  private syncCallAudioSource(): void {
    const audio = this.callAudioElement?.nativeElement;