From 755ef4bb80120ee5de5bf6b5b3819ea34f8cfee0 Mon Sep 17 00:00:00 2001 From: Timothy Carambat Date: Thu, 31 Jul 2025 12:23:36 -0700 Subject: [PATCH] STT append spoken text (#4216) * STT append spoken text * remove default args from calls * fix bug where message and input were out of sync * update diffs --- .../PromptInput/SpeechToText/index.jsx | 25 ++++++++--- .../ChatContainer/PromptInput/index.jsx | 8 +++- .../WorkspaceChat/ChatContainer/index.jsx | 42 +++++++++++++++---- 3 files changed, 60 insertions(+), 15 deletions(-) diff --git a/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/SpeechToText/index.jsx b/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/SpeechToText/index.jsx index cac5daa0..9e701b85 100644 --- a/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/SpeechToText/index.jsx +++ b/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/SpeechToText/index.jsx @@ -1,4 +1,4 @@ -import { useEffect, useCallback } from "react"; +import { useEffect, useCallback, useRef } from "react"; import { Microphone } from "@phosphor-icons/react"; import { Tooltip } from "react-tooltip"; import _regeneratorRuntime from "regenerator-runtime"; @@ -19,6 +19,7 @@ const SILENCE_INTERVAL = 3_200; // wait in seconds of silence before closing. * @returns {React.ReactElement} The SpeechToText component */ export default function SpeechToText({ sendCommand }) { + const previousTranscriptRef = useRef(""); const { transcript, listening, @@ -39,6 +40,7 @@ export default function SpeechToText({ sendCommand }) { } resetTranscript(); + previousTranscriptRef.current = ""; SpeechRecognition.startListening({ continuous: browserSupportsContinuousListening, language: window?.navigator?.language ?? "en-US", @@ -47,14 +49,19 @@ export default function SpeechToText({ sendCommand }) { function endSTTSession() { SpeechRecognition.stopListening(); - if (transcript.length > 0) { + + // If auto submit is enabled, send an empty string to the chat window to submit the current transcript + // since every chunk of text should have been streamed to the chat window by now. + if (Appearance.get("autoSubmitSttInput")) { sendCommand({ - text: transcript, - autoSubmit: Appearance.get("autoSubmitSttInput"), + text: "", + autoSubmit: true, + writeMode: "append", }); } resetTranscript(); + previousTranscriptRef.current = ""; clearTimeout(timeout); } @@ -95,7 +102,15 @@ export default function SpeechToText({ sendCommand }) { useEffect(() => { if (transcript?.length > 0 && listening) { - sendCommand({ text: transcript }); + const previousTranscript = previousTranscriptRef.current; + const newContent = transcript.slice(previousTranscript.length); + + // Stream just the diff of the new content since transcript is an accumulating string. + // and not just the new content transcribed. + if (newContent.length > 0) + sendCommand({ text: newContent, writeMode: "append" }); + + previousTranscriptRef.current = transcript; clearTimeout(timeout); timeout = setTimeout(() => { endSTTSession(); diff --git a/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/index.jsx b/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/index.jsx index 8a441293..2a716aad 100644 --- a/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/index.jsx +++ b/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/index.jsx @@ -25,6 +25,7 @@ import useTextSize from "@/hooks/useTextSize"; import { useTranslation } from "react-i18next"; import Appearance from "@/models/appearance"; +export const PROMPT_INPUT_ID = "primary-prompt-input"; export const PROMPT_INPUT_EVENT = "set_prompt_input"; const MAX_EDIT_STACK_SIZE = 100; @@ -51,10 +52,12 @@ export default function PromptInput({ * To prevent too many re-renders we remotely listen for updates from the parent * via an event cycle. Otherwise, using message as a prop leads to a re-render every * change on the input. - * @param {Event} e + * @param {{detail: {messageContent: string, writeMode: 'replace' | 'append'}}} e */ function handlePromptUpdate(e) { - setPromptInput(e?.detail ?? ""); + const { messageContent, writeMode = "replace" } = e?.detail ?? {}; + if (writeMode === "append") setPromptInput((prev) => prev + messageContent); + else setPromptInput(messageContent ?? ""); } useEffect(() => { @@ -261,6 +264,7 @@ export default function PromptInput({