| | import {useCallback, useEffect, useLayoutEffect, useRef, useState} from 'react'; |
| | import Button from '@mui/material/Button'; |
| | import Typography from '@mui/material/Typography'; |
| | import InputLabel from '@mui/material/InputLabel'; |
| | import FormControl from '@mui/material/FormControl'; |
| | import Select, {SelectChangeEvent} from '@mui/material/Select'; |
| | import MenuItem from '@mui/material/MenuItem'; |
| | import Stack from '@mui/material/Stack'; |
| | import seamlessLogoUrl from './assets/seamless.svg'; |
| | import { |
| | AgentCapabilities, |
| | BaseResponse, |
| | BrowserAudioStreamConfig, |
| | DynamicConfig, |
| | PartialDynamicConfig, |
| | SUPPORTED_INPUT_SOURCES, |
| | SUPPORTED_OUTPUT_MODES, |
| | ServerExceptionData, |
| | ServerSpeechData, |
| | ServerState, |
| | ServerTextData, |
| | StartStreamEventConfig, |
| | StreamingStatus, |
| | SupportedInputSource, |
| | SupportedOutputMode, |
| | TranslationSentences, |
| | } from './types/StreamingTypes'; |
| | import FormLabel from '@mui/material/FormLabel'; |
| | import RadioGroup from '@mui/material/RadioGroup'; |
| | import FormControlLabel from '@mui/material/FormControlLabel'; |
| | import Radio from '@mui/material/Radio'; |
| | import './StreamingInterface.css'; |
| | import RoomConfig from './RoomConfig'; |
| | import Divider from '@mui/material/Divider'; |
| | import {useSocket} from './useSocket'; |
| | import {RoomState} from './types/RoomState'; |
| | import useStable from './useStable'; |
| | import float32To16BitPCM from './float32To16BitPCM'; |
| | import createBufferedSpeechPlayer from './createBufferedSpeechPlayer'; |
| | import Checkbox from '@mui/material/Checkbox'; |
| | import Alert from '@mui/material/Alert'; |
| | import isScrolledToDocumentBottom from './isScrolledToDocumentBottom'; |
| | import Box from '@mui/material/Box'; |
| | import Slider from '@mui/material/Slider'; |
| | import VolumeDown from '@mui/icons-material/VolumeDown'; |
| | import VolumeUp from '@mui/icons-material/VolumeUp'; |
| | import Mic from '@mui/icons-material/Mic'; |
| | import MicOff from '@mui/icons-material/MicOff'; |
| | import XRDialog from './react-xr/XRDialog'; |
| | import getTranslationSentencesFromReceivedData from './getTranslationSentencesFromReceivedData'; |
| | import { |
| | sliceTranslationSentencesUpToIndex, |
| | getTotalSentencesLength, |
| | } from './sliceTranslationSentencesUtils'; |
| | import Blink from './Blink'; |
| | import {CURSOR_BLINK_INTERVAL_MS} from './cursorBlinkInterval'; |
| | import {getURLParams} from './URLParams'; |
| | import debug from './debug'; |
| | import DebugSection from './DebugSection'; |
| | import Switch from '@mui/material/Switch'; |
| | import Grid from '@mui/material/Grid'; |
| | import {getLanguageFromThreeLetterCode} from './languageLookup'; |
| | import HeadphonesIcon from '@mui/icons-material/Headphones'; |
| |
|
| | const AUDIO_STREAM_DEFAULTS = { |
| | userMedia: { |
| | echoCancellation: false, |
| | noiseSuppression: true, |
| | }, |
| | displayMedia: { |
| | echoCancellation: false, |
| | noiseSuppression: false, |
| | }, |
| | } as const; |
| |
|
| | async function requestUserMediaAudioStream( |
| | config: BrowserAudioStreamConfig = AUDIO_STREAM_DEFAULTS['userMedia'], |
| | ) { |
| | const stream = await navigator.mediaDevices.getUserMedia({ |
| | audio: {...config, channelCount: 1}, |
| | }); |
| | console.debug( |
| | '[requestUserMediaAudioStream] stream created with settings:', |
| | stream.getAudioTracks()?.[0]?.getSettings(), |
| | ); |
| | return stream; |
| | } |
| |
|
| | async function requestDisplayMediaAudioStream( |
| | config: BrowserAudioStreamConfig = AUDIO_STREAM_DEFAULTS['displayMedia'], |
| | ) { |
| | const stream = await navigator.mediaDevices.getDisplayMedia({ |
| | audio: {...config, channelCount: 1}, |
| | }); |
| | console.debug( |
| | '[requestDisplayMediaAudioStream] stream created with settings:', |
| | stream.getAudioTracks()?.[0]?.getSettings(), |
| | ); |
| | return stream; |
| | } |
| |
|
| | const buttonLabelMap: {[key in StreamingStatus]: string} = { |
| | stopped: 'Start Streaming', |
| | running: 'Stop Streaming', |
| | starting: 'Starting...', |
| | }; |
| |
|
| | const BUFFER_LIMIT = 1; |
| |
|
| | const SCROLLED_TO_BOTTOM_THRESHOLD_PX = 36; |
| |
|
| | const GAIN_MULTIPLIER_OVER_1 = 3; |
| |
|
| | const getGainScaledValue = (value) => |
| | value > 1 ? (value - 1) * GAIN_MULTIPLIER_OVER_1 + 1 : value; |
| |
|
| | const TOTAL_ACTIVE_TRANSCODER_WARNING_THRESHOLD = 2; |
| |
|
| | const MAX_SERVER_EXCEPTIONS_TRACKED = 500; |
| |
|
| | export const TYPING_ANIMATION_DELAY_MS = 6; |
| |
|
| | export default function StreamingInterface() { |
| | const urlParams = getURLParams(); |
| | const debugParam = urlParams.debug; |
| | const [animateTextDisplay, setAnimateTextDisplay] = useState<boolean>( |
| | urlParams.animateTextDisplay, |
| | ); |
| |
|
| | const socketObject = useSocket(); |
| | const {socket, clientID} = socketObject; |
| |
|
| | const [serverState, setServerState] = useState<ServerState | null>(null); |
| | const [agent, setAgent] = useState<AgentCapabilities | null>(null); |
| | const model = agent?.name ?? null; |
| | const agentsCapabilities: Array<AgentCapabilities> = |
| | serverState?.agentsCapabilities ?? []; |
| | const currentAgent: AgentCapabilities | null = |
| | agentsCapabilities.find((agent) => agent.name === model) ?? null; |
| |
|
| | const [serverExceptions, setServerExceptions] = useState< |
| | Array<ServerExceptionData> |
| | >([]); |
| | const [roomState, setRoomState] = useState<RoomState | null>(null); |
| | const roomID = roomState?.room_id ?? null; |
| | const isSpeaker = |
| | (clientID != null && roomState?.speakers.includes(clientID)) ?? false; |
| | const isListener = |
| | (clientID != null && roomState?.listeners.includes(clientID)) ?? false; |
| |
|
| | const [streamingStatus, setStreamingStatus] = |
| | useState<StreamingStatus>('stopped'); |
| |
|
| | const isStreamConfiguredRef = useRef<boolean>(false); |
| | const [hasMaxSpeakers, setHasMaxSpeakers] = useState<boolean>(false); |
| |
|
| | const [outputMode, setOutputMode] = useState<SupportedOutputMode>('s2s&t'); |
| | const [inputSource, setInputSource] = |
| | useState<SupportedInputSource>('userMedia'); |
| | const [enableNoiseSuppression, setEnableNoiseSuppression] = useState< |
| | boolean | null |
| | >(null); |
| | const [enableEchoCancellation, setEnableEchoCancellation] = useState< |
| | boolean | null |
| | >(null); |
| |
|
| | |
| | const [targetLang, setTargetLang] = useState<string | null>(null); |
| | const [enableExpressive, setEnableExpressive] = useState<boolean | null>( |
| | null, |
| | ); |
| |
|
| | const [serverDebugFlag, setServerDebugFlag] = useState<boolean>( |
| | debugParam ?? false, |
| | ); |
| |
|
| | const [receivedData, setReceivedData] = useState<Array<ServerTextData>>([]); |
| | const [ |
| | translationSentencesAnimatedIndex, |
| | setTranslationSentencesAnimatedIndex, |
| | ] = useState<number>(0); |
| |
|
| | const lastTranslationResultRef = useRef<HTMLDivElement | null>(null); |
| |
|
| | const [inputStream, setInputStream] = useState<MediaStream | null>(null); |
| | const [inputStreamSource, setInputStreamSource] = |
| | useState<MediaStreamAudioSourceNode | null>(null); |
| | const audioContext = useStable<AudioContext>(() => new AudioContext()); |
| | const [scriptNodeProcessor, setScriptNodeProcessor] = |
| | useState<ScriptProcessorNode | null>(null); |
| |
|
| | const [muted, setMuted] = useState<boolean>(false); |
| | |
| | |
| | const mutedRef = useRef<boolean>(muted); |
| | useEffect(() => { |
| | mutedRef.current = muted; |
| | }, [muted]); |
| |
|
| | const [gain, setGain] = useState<number>(1); |
| |
|
| | const isScrolledToBottomRef = useRef<boolean>(isScrolledToDocumentBottom()); |
| |
|
| | |
| | |
| | const streamFixedConfigOptionsDisabled = |
| | streamingStatus !== 'stopped' || roomID == null; |
| |
|
| | const bufferedSpeechPlayer = useStable(() => { |
| | const player = createBufferedSpeechPlayer({ |
| | onStarted: () => { |
| | console.debug('📢 PLAYBACK STARTED 📢'); |
| | }, |
| | onEnded: () => { |
| | console.debug('🛑 PLAYBACK ENDED 🛑'); |
| | }, |
| | }); |
| |
|
| | |
| | player.start(); |
| | return player; |
| | }); |
| |
|
| | const translationSentencesBase: TranslationSentences = |
| | getTranslationSentencesFromReceivedData(receivedData); |
| |
|
| | const translationSentencesBaseTotalLength = getTotalSentencesLength( |
| | translationSentencesBase, |
| | ); |
| |
|
| | const translationSentences: TranslationSentences = animateTextDisplay |
| | ? sliceTranslationSentencesUpToIndex( |
| | translationSentencesBase, |
| | translationSentencesAnimatedIndex, |
| | ) |
| | : translationSentencesBase; |
| |
|
| | |
| | const translationSentencesWithEmptyStartingString = |
| | streamingStatus === 'running' && translationSentences.length === 0 |
| | ? [''] |
| | : translationSentences; |
| |
|
| | |
| | |
| | |
| |
|
| | const setAgentAndUpdateParams = useCallback( |
| | (newAgent: AgentCapabilities | null) => { |
| | setAgent((prevAgent) => { |
| | if (prevAgent?.name !== newAgent?.name) { |
| | setTargetLang(newAgent?.targetLangs[0] ?? null); |
| | setEnableExpressive(null); |
| | } |
| | return newAgent; |
| | }); |
| | }, |
| | [], |
| | ); |
| |
|
| | const onSetDynamicConfig = useCallback( |
| | async (partialConfig: PartialDynamicConfig) => { |
| | return new Promise<void>((resolve, reject) => { |
| | if (socket == null) { |
| | reject(new Error('[onSetDynamicConfig] socket is null ')); |
| | return; |
| | } |
| |
|
| | socket.emit( |
| | 'set_dynamic_config', |
| | partialConfig, |
| | (result: BaseResponse) => { |
| | console.log('[emit result: set_dynamic_config]', result); |
| | if (result.status === 'ok') { |
| | resolve(); |
| | } else { |
| | reject(); |
| | } |
| | }, |
| | ); |
| | }); |
| | }, |
| | [socket], |
| | ); |
| |
|
| | const configureStreamAsync = ({sampleRate}: {sampleRate: number}) => { |
| | return new Promise<void>((resolve, reject) => { |
| | if (socket == null) { |
| | reject(new Error('[configureStreamAsync] socket is null ')); |
| | return; |
| | } |
| | const modelName = agent?.name ?? null; |
| | if (modelName == null) { |
| | reject(new Error('[configureStreamAsync] modelName is null ')); |
| | return; |
| | } |
| |
|
| | const config: StartStreamEventConfig = { |
| | event: 'config', |
| | rate: sampleRate, |
| | model_name: modelName, |
| | debug: serverDebugFlag, |
| | |
| | async_processing: true, |
| | buffer_limit: BUFFER_LIMIT, |
| | model_type: outputMode, |
| | }; |
| |
|
| | console.log('[configureStreamAsync] sending config', config); |
| |
|
| | socket.emit('configure_stream', config, (statusObject) => { |
| | setHasMaxSpeakers(statusObject.message === 'max_speakers') |
| | if (statusObject.status === 'ok') { |
| | isStreamConfiguredRef.current = true; |
| | console.debug( |
| | '[configureStreamAsync] stream configured!', |
| | statusObject, |
| | ); |
| | resolve(); |
| | } else { |
| | isStreamConfiguredRef.current = false; |
| | reject( |
| | new Error( |
| | `[configureStreamAsync] configure_stream returned status: ${statusObject.status}`, |
| | ), |
| | ); |
| | return; |
| | } |
| | }); |
| | }); |
| | }; |
| |
|
| | const startStreaming = async () => { |
| | if (streamingStatus !== 'stopped') { |
| | console.warn( |
| | `Attempting to start stream when status is ${streamingStatus}`, |
| | ); |
| | return; |
| | } |
| |
|
| | setStreamingStatus('starting'); |
| |
|
| | if (audioContext.state === 'suspended') { |
| | console.warn('audioContext was suspended! resuming...'); |
| | await audioContext.resume(); |
| | } |
| |
|
| | let stream: MediaStream | null = null; |
| |
|
| | try { |
| | if (inputSource === 'userMedia') { |
| | stream = await requestUserMediaAudioStream({ |
| | noiseSuppression: |
| | enableNoiseSuppression ?? |
| | AUDIO_STREAM_DEFAULTS['userMedia'].noiseSuppression, |
| | echoCancellation: |
| | enableEchoCancellation ?? |
| | AUDIO_STREAM_DEFAULTS['userMedia'].echoCancellation, |
| | }); |
| | } else if (inputSource === 'displayMedia') { |
| | stream = await requestDisplayMediaAudioStream({ |
| | noiseSuppression: |
| | enableNoiseSuppression ?? |
| | AUDIO_STREAM_DEFAULTS['displayMedia'].noiseSuppression, |
| | echoCancellation: |
| | enableEchoCancellation ?? |
| | AUDIO_STREAM_DEFAULTS['displayMedia'].echoCancellation, |
| | }); |
| | } else { |
| | throw new Error(`Unsupported input source requested: ${inputSource}`); |
| | } |
| | setInputStream(stream); |
| | } catch (e) { |
| | console.error('[startStreaming] media stream request failed:', e); |
| | setStreamingStatus('stopped'); |
| | return; |
| | } |
| |
|
| | const mediaStreamSource = audioContext.createMediaStreamSource(stream); |
| | setInputStreamSource(mediaStreamSource); |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | const scriptProcessor = audioContext.createScriptProcessor(16384, 1, 1); |
| | setScriptNodeProcessor(scriptProcessor); |
| |
|
| | scriptProcessor.onaudioprocess = (event) => { |
| | if (isStreamConfiguredRef.current === false) { |
| | console.debug('[onaudioprocess] stream is not configured yet!'); |
| | return; |
| | } |
| | if (socket == null) { |
| | console.warn('[onaudioprocess] socket is null in onaudioprocess'); |
| | return; |
| | } |
| |
|
| | if (mutedRef.current) { |
| | |
| | |
| | const mostlyEmptyInt16Array = new Int16Array(1); |
| | socket.emit('incoming_audio', mostlyEmptyInt16Array); |
| | } else { |
| | const float32Audio = event.inputBuffer.getChannelData(0); |
| | const pcm16Audio = float32To16BitPCM(float32Audio); |
| | socket.emit('incoming_audio', pcm16Audio); |
| | } |
| |
|
| | debug()?.sentAudio(event); |
| | }; |
| |
|
| | mediaStreamSource.connect(scriptProcessor); |
| | scriptProcessor.connect(audioContext.destination); |
| |
|
| | bufferedSpeechPlayer.start(); |
| |
|
| | try { |
| | if (targetLang == null) { |
| | throw new Error('[startStreaming] targetLang cannot be nullish'); |
| | } |
| |
|
| | |
| | |
| | const fullDynamicConfig: DynamicConfig = { |
| | targetLanguage: targetLang, |
| | expressive: enableExpressive, |
| | }; |
| |
|
| | await onSetDynamicConfig(fullDynamicConfig); |
| |
|
| | |
| | await configureStreamAsync({ |
| | sampleRate: audioContext.sampleRate, |
| | }); |
| | } catch (e) { |
| | console.error('configureStreamAsync failed', e); |
| | setStreamingStatus('stopped'); |
| | return; |
| | } |
| |
|
| | setStreamingStatus('running'); |
| | }; |
| |
|
| | const stopStreaming = useCallback(async () => { |
| | if (streamingStatus === 'stopped') { |
| | console.warn( |
| | `Attempting to stop stream when status is ${streamingStatus}`, |
| | ); |
| | return; |
| | } |
| |
|
| | |
| | bufferedSpeechPlayer.stop(); |
| |
|
| | if (inputStreamSource == null || scriptNodeProcessor == null) { |
| | console.error( |
| | 'inputStreamSource || scriptNodeProcessor is null in stopStreaming', |
| | ); |
| | } else { |
| | inputStreamSource.disconnect(scriptNodeProcessor); |
| | scriptNodeProcessor.disconnect(audioContext.destination); |
| |
|
| | |
| | inputStream?.getTracks().forEach((track) => track.stop()); |
| | } |
| |
|
| | if (socket == null) { |
| | console.warn('Unable to emit stop_stream because socket is null'); |
| | } else { |
| | socket.emit('stop_stream', (result) => { |
| | console.debug('[emit result: stop_stream]', result); |
| | }); |
| | } |
| |
|
| | setStreamingStatus('stopped'); |
| | }, [ |
| | audioContext.destination, |
| | bufferedSpeechPlayer, |
| | inputStream, |
| | inputStreamSource, |
| | scriptNodeProcessor, |
| | socket, |
| | streamingStatus, |
| | ]); |
| |
|
| | const onClearTranscriptForAll = useCallback(() => { |
| | if (socket != null) { |
| | socket.emit('clear_transcript_for_all'); |
| | } |
| | }, [socket]); |
| |
|
| | |
| | |
| | |
| |
|
| | useEffect(() => { |
| | if (socket == null) { |
| | return; |
| | } |
| |
|
| | const onRoomStateUpdate = (roomState: RoomState) => { |
| | setRoomState(roomState); |
| | }; |
| |
|
| | socket.on('room_state_update', onRoomStateUpdate); |
| |
|
| | return () => { |
| | socket.off('room_state_update', onRoomStateUpdate); |
| | }; |
| | }, [socket]); |
| |
|
| | useEffect(() => { |
| | if (socket != null) { |
| | const onTranslationText = (data: ServerTextData) => { |
| | setReceivedData((prev) => [...prev, data]); |
| | debug()?.receivedText(data.payload); |
| | }; |
| |
|
| | const onTranslationSpeech = (data: ServerSpeechData) => { |
| | bufferedSpeechPlayer.addAudioToBuffer(data.payload, data.sample_rate); |
| | }; |
| |
|
| | socket.on('translation_text', onTranslationText); |
| | socket.on('translation_speech', onTranslationSpeech); |
| |
|
| | return () => { |
| | socket.off('translation_text', onTranslationText); |
| | socket.off('translation_speech', onTranslationSpeech); |
| | }; |
| | } |
| | }, [bufferedSpeechPlayer, socket]); |
| |
|
| | useEffect(() => { |
| | if (socket != null) { |
| | const onServerStateUpdate = (newServerState: ServerState) => { |
| | setServerState(newServerState); |
| |
|
| | |
| | if ( |
| | newServerState.serverLock?.isActive === true && |
| | newServerState.serverLock?.clientID !== clientID && |
| | streamingStatus === 'running' |
| | ) { |
| | stopStreaming(); |
| | } |
| |
|
| | const firstAgentNullable = newServerState.agentsCapabilities[0]; |
| | if (agent == null && firstAgentNullable != null) { |
| | setAgentAndUpdateParams(firstAgentNullable); |
| | } |
| | }; |
| |
|
| | socket.on('server_state_update', onServerStateUpdate); |
| |
|
| | return () => { |
| | socket.off('server_state_update', onServerStateUpdate); |
| | }; |
| | } |
| | }, [ |
| | agent, |
| | clientID, |
| | setAgentAndUpdateParams, |
| | socket, |
| | stopStreaming, |
| | streamingStatus, |
| | ]); |
| |
|
| | useEffect(() => { |
| | if (socket != null) { |
| | const onServerException = ( |
| | exceptionDataWithoutClientTime: ServerExceptionData, |
| | ) => { |
| | const exceptionData = { |
| | ...exceptionDataWithoutClientTime, |
| | timeStringClient: new Date( |
| | exceptionDataWithoutClientTime['timeEpochMs'], |
| | ).toLocaleString(), |
| | }; |
| |
|
| | setServerExceptions((prev) => |
| | [exceptionData, ...prev].slice(0, MAX_SERVER_EXCEPTIONS_TRACKED), |
| | ); |
| | console.error( |
| | `[server_exception] The server encountered an exception: ${exceptionData['message']}`, |
| | exceptionData, |
| | ); |
| | }; |
| |
|
| | socket.on('server_exception', onServerException); |
| |
|
| | return () => { |
| | socket.off('server_exception', onServerException); |
| | }; |
| | } |
| | }, [socket]); |
| |
|
| | useEffect(() => { |
| | if (socket != null) { |
| | const onClearTranscript = () => { |
| | setReceivedData([]); |
| | setTranslationSentencesAnimatedIndex(0); |
| | }; |
| |
|
| | socket.on('clear_transcript', onClearTranscript); |
| |
|
| | return () => { |
| | socket.off('clear_transcript', onClearTranscript); |
| | }; |
| | } |
| | }, [socket]); |
| |
|
| | useEffect(() => { |
| | const onScroll = () => { |
| | if (isScrolledToDocumentBottom(SCROLLED_TO_BOTTOM_THRESHOLD_PX)) { |
| | isScrolledToBottomRef.current = true; |
| | return; |
| | } |
| | isScrolledToBottomRef.current = false; |
| | return; |
| | }; |
| |
|
| | document.addEventListener('scroll', onScroll); |
| |
|
| | return () => { |
| | document.removeEventListener('scroll', onScroll); |
| | }; |
| | }, []); |
| |
|
| | useLayoutEffect(() => { |
| | if ( |
| | lastTranslationResultRef.current != null && |
| | isScrolledToBottomRef.current |
| | ) { |
| | |
| | lastTranslationResultRef.current.scrollIntoView(); |
| | } |
| | |
| | |
| | |
| | }, [receivedData]); |
| |
|
| | useEffect(() => { |
| | if (!animateTextDisplay) { |
| | return; |
| | } |
| |
|
| | if ( |
| | translationSentencesAnimatedIndex < translationSentencesBaseTotalLength |
| | ) { |
| | const timeout = setTimeout(() => { |
| | setTranslationSentencesAnimatedIndex((prev) => prev + 1); |
| | debug()?.startRenderText(); |
| | }, TYPING_ANIMATION_DELAY_MS); |
| |
|
| | return () => clearTimeout(timeout); |
| | } else { |
| | debug()?.endRenderText(); |
| | } |
| | }, [ |
| | animateTextDisplay, |
| | translationSentencesAnimatedIndex, |
| | translationSentencesBaseTotalLength, |
| | ]); |
| |
|
| | |
| | |
| | |
| |
|
| | const volumeSliderNode = ( |
| | <Stack |
| | spacing={2} |
| | direction="row" |
| | sx={{mb: 1, width: '100%'}} |
| | alignItems="center"> |
| | <VolumeDown color="primary" /> |
| | <Slider |
| | aria-label="Volume" |
| | defaultValue={1} |
| | scale={getGainScaledValue} |
| | min={0} |
| | max={3} |
| | step={0.1} |
| | marks={[ |
| | {value: 0, label: '0%'}, |
| | {value: 1, label: '100%'}, |
| | {value: 2, label: '400%'}, |
| | {value: 3, label: '700%'}, |
| | ]} |
| | valueLabelFormat={(value) => `${(value * 100).toFixed(0)}%`} |
| | valueLabelDisplay="auto" |
| | value={gain} |
| | onChange={(_event: Event, newValue: number | number[]) => { |
| | if (typeof newValue === 'number') { |
| | const scaledGain = getGainScaledValue(newValue); |
| | // We want the actual gain node to use the scaled value |
| | bufferedSpeechPlayer.setGain(scaledGain); |
| | // But we want react state to keep track of the non-scaled value |
| | setGain(newValue); |
| | } else { |
| | console.error( |
| | `[volume slider] Unexpected non-number value: ${newValue}`, |
| | ); |
| | } |
| | }} |
| | /> |
| | <VolumeUp color="primary" /> |
| | </Stack> |
| | ); |
| |
|
| | const xrDialogComponent = ( |
| | <XRDialog |
| | animateTextDisplay={ |
| | animateTextDisplay && |
| | translationSentencesAnimatedIndex == translationSentencesBaseTotalLength |
| | } |
| | bufferedSpeechPlayer={bufferedSpeechPlayer} |
| | translationSentences={translationSentences} |
| | roomState={roomState} |
| | roomID={roomID} |
| | startStreaming={startStreaming} |
| | stopStreaming={stopStreaming} |
| | debugParam={debugParam} |
| | onARHidden={() => { |
| | setAnimateTextDisplay(urlParams.animateTextDisplay); |
| | }} |
| | onARVisible={() => setAnimateTextDisplay(false)} |
| | /> |
| | ); |
| |
|
| | return ( |
| | <div className="app-wrapper-sra"> |
| | <Box |
| | // eslint-disable-next-line @typescript-eslint/ban-ts-comment |
| | // @ts-ignore Not sure why it's complaining about complexity here |
| | sx={{width: '100%', maxWidth: '660px', minWidth: '320px'}}> |
| | <div className="main-container-sra"> |
| | <div className="top-section-sra horizontal-padding-sra"> |
| | <div className="header-container-sra"> |
| | <img |
| | src={seamlessLogoUrl} |
| | className="header-icon-sra" |
| | alt="Seamless Translation Logo" |
| | height={24} |
| | width={24} |
| | /> |
| |
|
| | <div> |
| | <Typography variant="h1" sx={{color: '#65676B'}}> |
| | Seamless Translation |
| | </Typography> |
| | </div> |
| | </div> |
| | <div className="header-container-sra"> |
| | <div> |
| | <Typography variant="body2" sx={{color: '#65676B'}}> |
| | Welcome! This space is limited to one speaker at a time. |
| | If using the live HF space, sharing room code to listeners on another |
| | IP address may not work because it's running on different replicas. |
| | Use headphones if you are both speaker and listener to prevent feedback. |
| | <br/> |
| | If max speakers reached, please duplicate the space <a target="_blank" rel="noopener noreferrer" href="https://huggingface.co/spaces/facebook/seamless-streaming?duplicate=true">here</a>. |
| | In your duplicated space, join a room as speaker or listener (or both), |
| | and share the room code to invite listeners. |
| | <br/> |
| | Check out the seamless_communication <a target="_blank" rel="noopener noreferrer" href="https://github.com/facebookresearch/seamless_communication/tree/main">README</a> for more information. |
| | <br/> |
| | SeamlessStreaming model is a research model and is not released |
| | for production deployment. It is important to use a microphone with |
| | noise cancellation (for e.g. a smartphone), otherwise you may see model hallucination on noises. |
| | It works best if you pause every couple of sentences, or you may wish adjust the VAD threshold |
| | in the model config. The real-time performance will degrade |
| | if you try streaming multiple speakers at the same time. |
| | </Typography> |
| | </div> |
| | </div> |
| | <Stack spacing="22px" direction="column"> |
| | <Box> |
| | <RoomConfig |
| | roomState={roomState} |
| | serverState={serverState} |
| | streamingStatus={streamingStatus} |
| | onJoinRoomOrUpdateRoles={() => { |
| | // If the user has switched from speaker to listener we need to tell the |
| | // player to play eagerly, since currently the listener doesn't have any stop/start controls |
| | bufferedSpeechPlayer.start(); |
| | }} |
| | /> |
| |
|
| | {isListener && !isSpeaker && ( |
| | <Box |
| | sx={{ |
| | paddingX: 6, |
| | paddingBottom: 2, |
| | marginY: 2, |
| | display: 'flex', |
| | flexDirection: 'column', |
| | alignItems: 'center', |
| | }}> |
| | {volumeSliderNode} |
| | </Box> |
| | )} |
| | </Box> |
| |
|
| | {isSpeaker && ( |
| | <> |
| | <Divider /> |
| |
|
| | <Stack spacing="12px" direction="column"> |
| | <FormLabel id="output-modes-radio-group-label"> |
| | Model |
| | </FormLabel> |
| | <FormControl |
| | disabled={ |
| | streamFixedConfigOptionsDisabled || |
| | agentsCapabilities.length === 0 |
| | } |
| | fullWidth |
| | sx={{minWidth: '14em'}}> |
| | <InputLabel id="model-selector-input-label"> |
| | Model |
| | </InputLabel> |
| | <Select |
| | labelId="model-selector-input-label" |
| | label="Model" |
| | onChange={(e: SelectChangeEvent) => { |
| | const newAgent = |
| | agentsCapabilities.find( |
| | (agent) => e.target.value === agent.name, |
| | ) ?? null; |
| | if (newAgent == null) { |
| | console.error( |
| | 'Unable to find agent with name', |
| | e.target.value, |
| | ); |
| | } |
| | setAgentAndUpdateParams(newAgent); |
| | }} |
| | value={model ?? ''}> |
| | {agentsCapabilities.map((agent) => ( |
| | <MenuItem value={agent.name} key={agent.name}> |
| | {agent.name} |
| | </MenuItem> |
| | ))} |
| | </Select> |
| | </FormControl> |
| |
|
| | </Stack> |
| |
|
| | <Stack spacing={0.5}> |
| | <FormLabel id="output-modes-radio-group-label"> |
| | Output |
| | </FormLabel> |
| |
|
| | <Box sx={{paddingTop: 2, paddingBottom: 1}}> |
| | <FormControl fullWidth sx={{minWidth: '14em'}}> |
| | <InputLabel id="target-selector-input-label"> |
| | Target Language |
| | </InputLabel> |
| | <Select |
| | labelId="target-selector-input-label" |
| | label="Target Language" |
| | onChange={(e: SelectChangeEvent) => { |
| | setTargetLang(e.target.value); |
| | onSetDynamicConfig({ |
| | targetLanguage: e.target.value, |
| | }); |
| | }} |
| | value={targetLang ?? ''}> |
| | {currentAgent?.targetLangs.map((langCode) => ( |
| | <MenuItem value={langCode} key={langCode}> |
| | {getLanguageFromThreeLetterCode(langCode) != null |
| | ? `${getLanguageFromThreeLetterCode( |
| | langCode, |
| | )} (${langCode})` |
| | : langCode} |
| | </MenuItem> |
| | ))} |
| | </Select> |
| | </FormControl> |
| | </Box> |
| |
|
| | <Grid container> |
| | <Grid item xs={12} sm={4}> |
| | <FormControl |
| | disabled={streamFixedConfigOptionsDisabled}> |
| | <RadioGroup |
| | aria-labelledby="output-modes-radio-group-label" |
| | value={outputMode} |
| | onChange={(e) => |
| | setOutputMode( |
| | e.target.value as SupportedOutputMode, |
| | ) |
| | } |
| | name="output-modes-radio-buttons-group"> |
| | { |
| | // TODO: Use supported modalities from agentCapabilities |
| | SUPPORTED_OUTPUT_MODES.map(({value, label}) => ( |
| | <FormControlLabel |
| | key={value} |
| | value={value} |
| | control={<Radio />} |
| | label={label} |
| | /> |
| | )) |
| | } |
| | </RadioGroup> |
| | </FormControl> |
| | </Grid> |
| |
|
| | <Grid item xs={12} sm={8}> |
| | <Stack |
| | direction="column" |
| | spacing={1} |
| | alignItems="flex-start" |
| | sx={{flexGrow: 1}}> |
| | {currentAgent?.dynamicParams?.includes( |
| | 'expressive', |
| | ) && ( |
| | <FormControlLabel |
| | control={ |
| | <Switch |
| | checked={enableExpressive ?? false} |
| | onChange={( |
| | event: React.ChangeEvent<HTMLInputElement>, |
| | ) => { |
| | const newValue = event.target.checked; |
| | setEnableExpressive(newValue); |
| | onSetDynamicConfig({ |
| | expressive: newValue, |
| | }); |
| | }} |
| | /> |
| | } |
| | label="Expressive" |
| | /> |
| | )} |
| |
|
| | {isListener && ( |
| | <Box |
| | sx={{ |
| | flexGrow: 1, |
| | paddingX: 1.5, |
| | paddingY: 1.5, |
| | width: '100%', |
| | }}> |
| | {volumeSliderNode} |
| | </Box> |
| | )} |
| | </Stack> |
| | </Grid> |
| | </Grid> |
| | </Stack> |
| |
|
| | <Stack |
| | direction="row" |
| | spacing={2} |
| | justifyContent="space-between"> |
| | <Box sx={{flex: 1}}> |
| | <FormControl disabled={streamFixedConfigOptionsDisabled}> |
| | <FormLabel id="input-source-radio-group-label"> |
| | Input Source |
| | </FormLabel> |
| | <RadioGroup |
| | aria-labelledby="input-source-radio-group-label" |
| | value={inputSource} |
| | onChange={(e: React.ChangeEvent<HTMLInputElement>) => |
| | setInputSource( |
| | e.target.value as SupportedInputSource, |
| | ) |
| | } |
| | name="input-source-radio-buttons-group"> |
| | {SUPPORTED_INPUT_SOURCES.map(({label, value}) => ( |
| | <FormControlLabel |
| | key={value} |
| | value={value} |
| | control={<Radio />} |
| | label={label} |
| | /> |
| | ))} |
| | </RadioGroup> |
| | </FormControl> |
| | </Box> |
| |
|
| | <Box sx={{flex: 1, flexGrow: 2}}> |
| | <FormControl disabled={streamFixedConfigOptionsDisabled}> |
| | <FormLabel>Options</FormLabel> |
| | <FormControlLabel |
| | control={ |
| | <Checkbox |
| | checked={ |
| | enableNoiseSuppression ?? |
| | AUDIO_STREAM_DEFAULTS[inputSource] |
| | .noiseSuppression |
| | } |
| | onChange={( |
| | event: React.ChangeEvent<HTMLInputElement>, |
| | ) => |
| | setEnableNoiseSuppression(event.target.checked) |
| | } |
| | /> |
| | } |
| | label="Noise Suppression" |
| | /> |
| | <FormControlLabel |
| | control={ |
| | <Checkbox |
| | checked={ |
| | enableEchoCancellation ?? |
| | AUDIO_STREAM_DEFAULTS[inputSource] |
| | .echoCancellation |
| | } |
| | onChange={( |
| | event: React.ChangeEvent<HTMLInputElement>, |
| | ) => |
| | setEnableEchoCancellation(event.target.checked) |
| | } |
| | /> |
| | } |
| | label="Echo Cancellation (not recommended)" |
| | /> |
| | <FormControlLabel |
| | control={ |
| | <Checkbox |
| | checked={serverDebugFlag} |
| | onChange={( |
| | event: React.ChangeEvent<HTMLInputElement>, |
| | ) => setServerDebugFlag(event.target.checked)} |
| | /> |
| | } |
| | label="Enable Server Debugging" |
| | /> |
| | </FormControl> |
| | </Box> |
| | </Stack> |
| |
|
| | {isSpeaker && |
| | isListener && |
| | inputSource === 'userMedia' && |
| | !enableEchoCancellation && |
| | gain !== 0 && ( |
| | <div> |
| | <Alert severity="warning" icon={<HeadphonesIcon />}> |
| | Headphones required to prevent feedback. |
| | </Alert> |
| | </div> |
| | )} |
| |
|
| | {isSpeaker && enableEchoCancellation && ( |
| | <div> |
| | <Alert severity="warning"> |
| | We don't recommend using echo cancellation as it may |
| | distort the input audio. If possible, use headphones and |
| | disable echo cancellation instead. |
| | </Alert> |
| | </div> |
| | )} |
| |
|
| | <Stack direction="row" spacing={2}> |
| | {streamingStatus === 'stopped' ? ( |
| | <Button |
| | variant="contained" |
| | onClick={startStreaming} |
| | disabled={ |
| | roomID == null || |
| | // Prevent users from starting streaming if there is a server lock with an active session |
| | (serverState?.serverLock?.isActive === true && |
| | serverState.serverLock.clientID !== clientID) |
| | }> |
| | {buttonLabelMap[streamingStatus]} |
| | </Button> |
| | ) : ( |
| | <Button |
| | variant="contained" |
| | color={ |
| | streamingStatus === 'running' ? 'error' : 'primary' |
| | } |
| | disabled={ |
| | streamingStatus === 'starting' || roomID == null |
| | } |
| | onClick={stopStreaming}> |
| | {buttonLabelMap[streamingStatus]} |
| | </Button> |
| | )} |
| |
|
| | <Box> |
| | <Button |
| | variant="contained" |
| | aria-label={muted ? 'Unmute' : 'Mute'} |
| | color={muted ? 'info' : 'primary'} |
| | onClick={() => setMuted((prev) => !prev)} |
| | sx={{ |
| | borderRadius: 100, |
| | paddingX: 0, |
| | minWidth: '36px', |
| | }}> |
| | {muted ? <MicOff /> : <Mic />} |
| | </Button> |
| | </Box> |
| |
|
| | {roomID == null ? null : ( |
| | <Box |
| | sx={{ |
| | flexGrow: 1, |
| | display: 'flex', |
| | justifyContent: 'flex-end', |
| | }}> |
| | {xrDialogComponent} |
| | </Box> |
| | )} |
| | </Stack> |
| |
|
| | {serverExceptions.length > 0 && ( |
| | <div> |
| | <Alert severity="error"> |
| | {`The server encountered an exception. See the browser console for details. You may need to refresh the page to continue using the app.`} |
| | </Alert> |
| | </div> |
| | )} |
| | {serverState != null && hasMaxSpeakers && ( |
| | <div> |
| | <Alert severity="error"> |
| | {`Maximum number of speakers reached. Please try again at a later time.`} |
| | </Alert> |
| | </div> |
| | )} |
| | {serverState != null && |
| | serverState.totalActiveTranscoders >= |
| | TOTAL_ACTIVE_TRANSCODER_WARNING_THRESHOLD && ( |
| | <div> |
| | <Alert severity="warning"> |
| | {`The server currently has ${serverState?.totalActiveTranscoders} active streaming sessions. Performance may be degraded.`} |
| | </Alert> |
| | </div> |
| | )} |
| |
|
| | {serverState?.serverLock != null && |
| | serverState.serverLock.clientID !== clientID && ( |
| | <div> |
| | <Alert severity="warning"> |
| | {`The server is currently locked. Priority will be given to that client when they are streaming, and your streaming session may be halted abruptly.`} |
| | </Alert> |
| | </div> |
| | )} |
| | </> |
| | )} |
| | </Stack> |
| |
|
| | {isListener && !isSpeaker && ( |
| | <Box sx={{marginBottom: 1, marginTop: 2}}> |
| | {xrDialogComponent} |
| | </Box> |
| | )} |
| | </div> |
| |
|
| | {debugParam && roomID != null && <DebugSection />} |
| |
|
| | <div className="translation-text-container-sra horizontal-padding-sra"> |
| | <Stack |
| | direction="row" |
| | spacing={2} |
| | sx={{mb: '16px', alignItems: 'center'}}> |
| | <Typography variant="h1" sx={{fontWeight: 700, flexGrow: 1}}> |
| | Transcript |
| | </Typography> |
| | {isSpeaker && ( |
| | <Button |
| | variant="text" |
| | size="small" |
| | onClick={onClearTranscriptForAll}> |
| | Clear Transcript for All |
| | </Button> |
| | )} |
| | </Stack> |
| | <Stack direction="row"> |
| | <div className="translation-text-sra"> |
| | {translationSentencesWithEmptyStartingString.map( |
| | (sentence, index, arr) => { |
| | const isLast = index === arr.length - 1; |
| | const maybeRef = isLast |
| | ? {ref: lastTranslationResultRef} |
| | : {}; |
| | return ( |
| | <div className="text-chunk-sra" key={index} {...maybeRef}> |
| | <Typography variant="body1"> |
| | {sentence} |
| | {animateTextDisplay && isLast && ( |
| | <Blink |
| | intervalMs={CURSOR_BLINK_INTERVAL_MS} |
| | shouldBlink={ |
| | (roomState?.activeTranscoders ?? 0) > 0 |
| | }> |
| | <Typography |
| | component="span" |
| | variant="body1" |
| | sx={{ |
| | display: 'inline-block', |
| | transform: 'scaleY(1.25) translateY(-1px)', |
| | }}> |
| | {'|'} |
| | </Typography> |
| | </Blink> |
| | )} |
| | </Typography> |
| | </div> |
| | ); |
| | }, |
| | )} |
| | </div> |
| | </Stack> |
| | </div> |
| | </div> |
| | </Box> |
| | </div> |
| | ); |
| | } |
| |
|