Dictation through AI
This commit is contained in:
@@ -314,6 +314,30 @@
|
||||
{{ isRecordingVoice() ? '⏹️' : '🎙️' }}
|
||||
</button>
|
||||
|
||||
<button
|
||||
class="composer-dictation"
|
||||
type="button"
|
||||
[disabled]="!session.isSelectedPeerReady() || session.signalingState() !== 'connected' || isTranscribingDictation()"
|
||||
(click)="toggleDictation(composerTextarea)"
|
||||
[title]="
|
||||
isDictating()
|
||||
? 'Stop dictation and transcribe'
|
||||
: isTranscribingDictation()
|
||||
? 'Transcribing dictated audio'
|
||||
: 'Start dictation'
|
||||
"
|
||||
[attr.aria-label]="
|
||||
isDictating()
|
||||
? 'Stop dictation and transcribe'
|
||||
: isTranscribingDictation()
|
||||
? 'Transcribing dictated audio'
|
||||
: 'Start dictation'
|
||||
"
|
||||
[class.composer-dictation-active]="isDictating() || isTranscribingDictation()"
|
||||
>
|
||||
{{ isDictating() ? '🛑' : isTranscribingDictation() ? '⏳' : '🗣️' }}
|
||||
</button>
|
||||
|
||||
<input
|
||||
#fileInput
|
||||
class="composer-file-input"
|
||||
|
||||
@@ -357,6 +357,7 @@
|
||||
|
||||
.composer-camera,
|
||||
.composer-call,
|
||||
.composer-dictation,
|
||||
.composer-hangup,
|
||||
.composer-voice,
|
||||
.composer-image-generate,
|
||||
@@ -398,6 +399,12 @@
|
||||
background: var(--badge-background);
|
||||
}
|
||||
|
||||
.composer-dictation {
|
||||
color: var(--page-text);
|
||||
background: linear-gradient(135deg, #f6d8ff, #ffcadb);
|
||||
}
|
||||
|
||||
.composer-dictation-active,
|
||||
.composer-hangup,
|
||||
.composer-voice-recording {
|
||||
color: #fff;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { CommonModule } from '@angular/common';
|
||||
import { Component, computed, effect, ElementRef, inject, OnDestroy, signal, ViewChild } from '@angular/core';
|
||||
import { Component, computed, effect, ElementRef, inject, NgZone, OnDestroy, signal, ViewChild } from '@angular/core';
|
||||
import { toSignal } from '@angular/core/rxjs-interop';
|
||||
import { FormsModule } from '@angular/forms';
|
||||
import { ActivatedRoute, Router, RouterLink } from '@angular/router';
|
||||
@@ -18,6 +18,7 @@ import type { ChatEntry, ConnectionState, PeerSummary } from './models';
|
||||
export class ChatPageComponent implements OnDestroy {
|
||||
private readonly route = inject(ActivatedRoute);
|
||||
private readonly router = inject(Router);
|
||||
private readonly ngZone = inject(NgZone);
|
||||
private readonly routeParamMap = toSignal(this.route.paramMap, {
|
||||
initialValue: this.route.snapshot.paramMap,
|
||||
});
|
||||
@@ -28,6 +29,14 @@ export class ChatPageComponent implements OnDestroy {
|
||||
private voiceChunks: Blob[] = [];
|
||||
private discardRecordedVoice = false;
|
||||
private recordingPeerId: string | null = null;
|
||||
private dictationRecorder: MediaRecorder | null = null;
|
||||
private dictationStream: MediaStream | null = null;
|
||||
private dictationChunks: Blob[] = [];
|
||||
private dictationBaseText = '';
|
||||
private discardRecordedDictation = false;
|
||||
private dictationCompletionPromise: Promise<void> | null = null;
|
||||
private resolveDictationCompletion: (() => void) | null = null;
|
||||
private dictationApplyToken = 0;
|
||||
@ViewChild('callAudioElement')
|
||||
set callAudioElementRef(value: ElementRef<HTMLAudioElement> | undefined) {
|
||||
this.callAudioElement = value;
|
||||
@@ -39,6 +48,8 @@ export class ChatPageComponent implements OnDestroy {
|
||||
readonly forwardingEntryId = signal<string | null>(null);
|
||||
readonly emojiPickerOpen = signal(false);
|
||||
readonly isRecordingVoice = signal(false);
|
||||
readonly isDictating = signal(false);
|
||||
readonly isTranscribingDictation = signal(false);
|
||||
readonly emojiOptions = [
|
||||
'😀', '😁', '😂', '🤣', '😊',
|
||||
'😉', '😍', '😘', '😎', '🤔',
|
||||
@@ -152,6 +163,7 @@ export class ChatPageComponent implements OnDestroy {
|
||||
}
|
||||
|
||||
ngOnDestroy(): void {
|
||||
void this.stopDictation(true);
|
||||
this.stopVoiceRecording(true);
|
||||
this.detachCallAudioSource();
|
||||
}
|
||||
@@ -174,6 +186,7 @@ export class ChatPageComponent implements OnDestroy {
|
||||
return;
|
||||
}
|
||||
|
||||
await this.stopDictation(false);
|
||||
await this.session.sendText(peerId, this.messageText);
|
||||
this.messageText = '';
|
||||
this.emojiPickerOpen.set(false);
|
||||
@@ -188,6 +201,7 @@ export class ChatPageComponent implements OnDestroy {
|
||||
return;
|
||||
}
|
||||
|
||||
await this.stopDictation(false);
|
||||
const requested = await this.session.requestGeneratedImage(peerId, this.messageText);
|
||||
|
||||
if (!requested) {
|
||||
@@ -262,6 +276,92 @@ export class ChatPageComponent implements OnDestroy {
|
||||
input.value = '';
|
||||
}
|
||||
|
||||
async toggleDictation(textarea: HTMLTextAreaElement): Promise<void> {
|
||||
if (this.isDictating()) {
|
||||
await this.stopDictation(false);
|
||||
return;
|
||||
}
|
||||
|
||||
if (this.isTranscribingDictation()) {
|
||||
return;
|
||||
}
|
||||
const peerId = this.peerId();
|
||||
|
||||
if (!peerId) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (typeof MediaRecorder === 'undefined' || typeof navigator === 'undefined') {
|
||||
this.session.error.set('This browser does not support dictation recording.');
|
||||
return;
|
||||
}
|
||||
|
||||
if (typeof navigator.mediaDevices?.getUserMedia !== 'function') {
|
||||
this.session.error.set('This browser cannot access the microphone for dictation.');
|
||||
return;
|
||||
}
|
||||
|
||||
this.dictationBaseText = this.messageText;
|
||||
this.discardRecordedDictation = false;
|
||||
this.dictationApplyToken += 1;
|
||||
|
||||
try {
|
||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||
const preferredMimeType = this.preferredVoiceMimeType();
|
||||
const recorder = preferredMimeType
|
||||
? new MediaRecorder(stream, { mimeType: preferredMimeType })
|
||||
: new MediaRecorder(stream);
|
||||
const applyToken = this.dictationApplyToken;
|
||||
|
||||
this.dictationChunks = [];
|
||||
this.dictationStream = stream;
|
||||
this.dictationRecorder = recorder;
|
||||
this.dictationCompletionPromise = new Promise<void>((resolve) => {
|
||||
this.resolveDictationCompletion = resolve;
|
||||
});
|
||||
|
||||
recorder.ondataavailable = (event) => {
|
||||
if (event.data.size > 0) {
|
||||
this.dictationChunks.push(event.data);
|
||||
}
|
||||
};
|
||||
|
||||
recorder.onerror = () => {
|
||||
this.ngZone.run(() => {
|
||||
this.session.error.set('Could not record dictation audio.');
|
||||
this.cleanupDictationRecorder();
|
||||
this.finishDictationCompletion();
|
||||
});
|
||||
};
|
||||
|
||||
recorder.onstop = () => {
|
||||
const shouldDiscard = this.discardRecordedDictation;
|
||||
const mimeType = recorder.mimeType || preferredMimeType || 'audio/webm';
|
||||
const blob = new Blob(this.dictationChunks, { type: mimeType });
|
||||
|
||||
this.ngZone.run(() => {
|
||||
this.cleanupDictationRecorder();
|
||||
|
||||
if (shouldDiscard || blob.size === 0) {
|
||||
this.finishDictationCompletion();
|
||||
return;
|
||||
}
|
||||
|
||||
this.isTranscribingDictation.set(true);
|
||||
void this.transcribeDictation(blob, textarea, applyToken);
|
||||
});
|
||||
};
|
||||
|
||||
recorder.start();
|
||||
this.isDictating.set(true);
|
||||
this.session.error.set(null);
|
||||
} catch {
|
||||
this.session.error.set('Could not start dictation recording.');
|
||||
this.cleanupDictationRecorder();
|
||||
this.finishDictationCompletion();
|
||||
}
|
||||
}
|
||||
|
||||
async toggleVoiceRecording(): Promise<void> {
|
||||
if (this.isRecordingVoice()) {
|
||||
this.stopVoiceRecording(false);
|
||||
@@ -482,6 +582,7 @@ export class ChatPageComponent implements OnDestroy {
|
||||
return;
|
||||
}
|
||||
|
||||
await this.stopDictation(true);
|
||||
this.stopVoiceRecording(true);
|
||||
this.forwardingEntryId.set(null);
|
||||
this.emojiPickerOpen.set(false);
|
||||
@@ -532,6 +633,106 @@ export class ChatPageComponent implements OnDestroy {
|
||||
return candidates.find((candidate) => MediaRecorder.isTypeSupported(candidate)) ?? '';
|
||||
}
|
||||
|
||||
private async stopDictation(discard: boolean): Promise<void> {
|
||||
const completion = this.dictationCompletionPromise;
|
||||
|
||||
if (discard) {
|
||||
this.dictationApplyToken += 1;
|
||||
this.messageText = this.dictationBaseText || this.messageText;
|
||||
this.handleMessageTextChange(this.messageText);
|
||||
this.isTranscribingDictation.set(false);
|
||||
} else {
|
||||
this.dictationBaseText = this.messageText;
|
||||
}
|
||||
|
||||
if (this.dictationRecorder) {
|
||||
this.discardRecordedDictation = discard;
|
||||
|
||||
if (this.dictationRecorder.state !== 'inactive') {
|
||||
this.dictationRecorder.stop();
|
||||
} else {
|
||||
this.cleanupDictationRecorder();
|
||||
this.finishDictationCompletion();
|
||||
}
|
||||
} else if (!completion) {
|
||||
this.dictationBaseText = '';
|
||||
}
|
||||
|
||||
if (completion) {
|
||||
await completion;
|
||||
}
|
||||
}
|
||||
|
||||
private cleanupDictationRecorder(): void {
|
||||
if (this.dictationStream) {
|
||||
for (const track of this.dictationStream.getTracks()) {
|
||||
track.stop();
|
||||
}
|
||||
}
|
||||
|
||||
this.dictationRecorder = null;
|
||||
this.dictationStream = null;
|
||||
this.dictationChunks = [];
|
||||
this.discardRecordedDictation = false;
|
||||
this.isDictating.set(false);
|
||||
}
|
||||
|
||||
private finishDictationCompletion(): void {
|
||||
this.resolveDictationCompletion?.();
|
||||
this.resolveDictationCompletion = null;
|
||||
this.dictationCompletionPromise = null;
|
||||
this.dictationBaseText = '';
|
||||
}
|
||||
|
||||
private async transcribeDictation(blob: Blob, textarea: HTMLTextAreaElement, applyToken: number): Promise<void> {
|
||||
try {
|
||||
const transcript = await this.session.requestSpeechTranscription(blob);
|
||||
|
||||
if (applyToken !== this.dictationApplyToken) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.applyDictatedText(this.mergeDictatedText(this.dictationBaseText, transcript), textarea);
|
||||
} catch {
|
||||
if (applyToken === this.dictationApplyToken) {
|
||||
this.session.error.set('Dictation transcription failed.');
|
||||
}
|
||||
} finally {
|
||||
if (applyToken === this.dictationApplyToken) {
|
||||
this.isTranscribingDictation.set(false);
|
||||
}
|
||||
|
||||
this.finishDictationCompletion();
|
||||
}
|
||||
}
|
||||
|
||||
private mergeDictatedText(baseText: string, transcript: string): string {
|
||||
const trimmedTranscript = transcript.trim();
|
||||
|
||||
if (!trimmedTranscript) {
|
||||
return baseText;
|
||||
}
|
||||
|
||||
if (!baseText.trim()) {
|
||||
return trimmedTranscript;
|
||||
}
|
||||
|
||||
return `${baseText.trimEnd()} ${trimmedTranscript}`;
|
||||
}
|
||||
|
||||
private applyDictatedText(text: string, textarea: HTMLTextAreaElement): void {
|
||||
this.messageText = text;
|
||||
textarea.value = text;
|
||||
this.composerSelectionStart = text.length;
|
||||
this.composerSelectionEnd = text.length;
|
||||
this.handleMessageTextChange(text);
|
||||
|
||||
queueMicrotask(() => {
|
||||
textarea.focus();
|
||||
textarea.setSelectionRange(text.length, text.length);
|
||||
});
|
||||
}
|
||||
|
||||
private syncCallAudioSource(): void {
|
||||
const audio = this.callAudioElement?.nativeElement;
|
||||
|
||||
|
||||
@@ -170,6 +170,10 @@ export class ChatSessionService {
|
||||
string,
|
||||
{ peerId: string; prompt: string; waitMessageId: string }
|
||||
>();
|
||||
private readonly pendingSpeechTranscriptionRequests = new Map<
|
||||
string,
|
||||
{ resolve: (text: string) => void; reject: (reason?: unknown) => void }
|
||||
>();
|
||||
private readonly remoteVideoStreams = signal<Array<{ peerId: string; stream: MediaStream }>>([]);
|
||||
private readonly remoteAudioStreams = signal<Array<{ peerId: string; stream: MediaStream }>>([]);
|
||||
private readonly activeCameraPeerId = signal<string | null>(null);
|
||||
@@ -916,6 +920,32 @@ export class ChatSessionService {
|
||||
return true;
|
||||
}
|
||||
|
||||
async requestSpeechTranscription(audioBlob: Blob): Promise<string> {
|
||||
if (!this.websocket || this.websocket.readyState !== WebSocket.OPEN) {
|
||||
throw new Error('You must be connected to signaling before using dictation.');
|
||||
}
|
||||
|
||||
const requestId = crypto.randomUUID();
|
||||
const audioBase64 = await this.blobToBase64(audioBlob);
|
||||
|
||||
return new Promise<string>((resolve, reject) => {
|
||||
this.pendingSpeechTranscriptionRequests.set(requestId, { resolve, reject });
|
||||
|
||||
try {
|
||||
this.error.set(null);
|
||||
this.websocket?.send(JSON.stringify({
|
||||
type: 'speech-transcription',
|
||||
requestId,
|
||||
mimeType: audioBlob.type || 'audio/webm',
|
||||
audioBase64,
|
||||
}));
|
||||
} catch (error) {
|
||||
this.pendingSpeechTranscriptionRequests.delete(requestId);
|
||||
reject(error);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private async loadAccessKeys(): Promise<void> {
|
||||
const token = this.token();
|
||||
|
||||
@@ -990,6 +1020,7 @@ export class ChatSessionService {
|
||||
const shouldReconnect = this.websocket === websocket && !this.suppressSocketReconnect;
|
||||
|
||||
this.stopWebSocketHeartbeat();
|
||||
this.rejectPendingSpeechTranscriptions('Signaling connection closed during dictation.');
|
||||
this.signalingState.set('disconnected');
|
||||
this.status.set('Signaling connection closed.');
|
||||
|
||||
@@ -1014,6 +1045,7 @@ export class ChatSessionService {
|
||||
|
||||
private disconnectWebSocket(): void {
|
||||
this.stopWebSocketHeartbeat();
|
||||
this.rejectPendingSpeechTranscriptions('Signaling connection closed during dictation.');
|
||||
|
||||
if (this.websocket) {
|
||||
this.suppressSocketReconnect = true;
|
||||
@@ -1055,6 +1087,12 @@ export class ChatSessionService {
|
||||
case 'image-generation-error':
|
||||
this.handleGeneratedImageError(event);
|
||||
break;
|
||||
case 'speech-transcribed':
|
||||
this.handleSpeechTranscribed(event);
|
||||
break;
|
||||
case 'speech-transcription-error':
|
||||
this.handleSpeechTranscriptionError(event);
|
||||
break;
|
||||
case 'pong':
|
||||
break;
|
||||
case 'error':
|
||||
@@ -1109,6 +1147,28 @@ export class ChatSessionService {
|
||||
this.error.set(event.message);
|
||||
}
|
||||
|
||||
private handleSpeechTranscribed(event: Extract<ServerEvent, { type: 'speech-transcribed' }>): void {
|
||||
const pendingRequest = this.pendingSpeechTranscriptionRequests.get(event.requestId);
|
||||
|
||||
if (!pendingRequest) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.pendingSpeechTranscriptionRequests.delete(event.requestId);
|
||||
pendingRequest.resolve(event.text);
|
||||
}
|
||||
|
||||
private handleSpeechTranscriptionError(event: Extract<ServerEvent, { type: 'speech-transcription-error' }>): void {
|
||||
const pendingRequest = this.pendingSpeechTranscriptionRequests.get(event.requestId);
|
||||
|
||||
if (pendingRequest) {
|
||||
this.pendingSpeechTranscriptionRequests.delete(event.requestId);
|
||||
pendingRequest.reject(new Error(event.message));
|
||||
}
|
||||
|
||||
this.error.set(event.message);
|
||||
}
|
||||
|
||||
private async restoreSession(): Promise<void> {
|
||||
const token = this.token();
|
||||
|
||||
@@ -2024,6 +2084,18 @@ export class ChatSessionService {
|
||||
}
|
||||
}
|
||||
|
||||
private rejectPendingSpeechTranscriptions(message: string): void {
|
||||
if (this.pendingSpeechTranscriptionRequests.size === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (const { reject } of this.pendingSpeechTranscriptionRequests.values()) {
|
||||
reject(new Error(message));
|
||||
}
|
||||
|
||||
this.pendingSpeechTranscriptionRequests.clear();
|
||||
}
|
||||
|
||||
private clearLocalAuth(statusMessage: string): void {
|
||||
this.clearWebSocketReconnect();
|
||||
this.disconnectWebSocket();
|
||||
@@ -2034,6 +2106,7 @@ export class ChatSessionService {
|
||||
this.stopRingtone();
|
||||
this.releasePreloadedRingtone();
|
||||
this.pendingImageGenerationRequests.clear();
|
||||
this.rejectPendingSpeechTranscriptions('Session ended during dictation.');
|
||||
this.remoteVideoStreams.set([]);
|
||||
this.remoteAudioStreams.set([]);
|
||||
this.remoteVideoModalPeerId.set(null);
|
||||
@@ -2060,6 +2133,19 @@ export class ChatSessionService {
|
||||
this.removeStorage('privatechat.user');
|
||||
}
|
||||
|
||||
private async blobToBase64(blob: Blob): Promise<string> {
|
||||
const buffer = await blob.arrayBuffer();
|
||||
let binary = '';
|
||||
const bytes = new Uint8Array(buffer);
|
||||
const chunkSize = 0x8000;
|
||||
|
||||
for (let index = 0; index < bytes.length; index += chunkSize) {
|
||||
binary += String.fromCharCode(...bytes.subarray(index, index + chunkSize));
|
||||
}
|
||||
|
||||
return btoa(binary);
|
||||
}
|
||||
|
||||
private async loadPersistedMessages(userId: string): Promise<void> {
|
||||
const messageEncryptionKey = this.messageEncryptionKey;
|
||||
|
||||
|
||||
@@ -130,6 +130,16 @@ export type ServerEvent =
|
||||
peerId: string;
|
||||
message: string;
|
||||
}
|
||||
| {
|
||||
type: 'speech-transcribed';
|
||||
requestId: string;
|
||||
text: string;
|
||||
}
|
||||
| {
|
||||
type: 'speech-transcription-error';
|
||||
requestId: string;
|
||||
message: string;
|
||||
}
|
||||
| { type: 'pong' }
|
||||
| { type: 'error'; message: string };
|
||||
|
||||
|
||||
Reference in New Issue
Block a user