diff options
Diffstat (limited to 'autogpts/autogpt/autogpt/speech/say.py')
-rw-r--r-- | autogpts/autogpt/autogpt/speech/say.py | 79 |
1 files changed, 79 insertions, 0 deletions
diff --git a/autogpts/autogpt/autogpt/speech/say.py b/autogpts/autogpt/autogpt/speech/say.py new file mode 100644 index 000000000..04ab3a4bc --- /dev/null +++ b/autogpts/autogpt/autogpt/speech/say.py @@ -0,0 +1,79 @@ +""" Text to speech module """ +from __future__ import annotations + +import os +import threading +from threading import Semaphore +from typing import Literal, Optional + +from autogpt.core.configuration.schema import SystemConfiguration, UserConfigurable + +from .base import VoiceBase +from .eleven_labs import ElevenLabsConfig, ElevenLabsSpeech +from .gtts import GTTSVoice +from .macos_tts import MacOSTTS +from .stream_elements_speech import StreamElementsConfig, StreamElementsSpeech + +_QUEUE_SEMAPHORE = Semaphore( + 1 +) # The amount of sounds to queue before blocking the main thread + + +class TTSConfig(SystemConfiguration): + speak_mode: bool = False + elevenlabs: Optional[ElevenLabsConfig] = None + streamelements: Optional[StreamElementsConfig] = None + provider: Literal[ + "elevenlabs", "gtts", "macos", "streamelements" + ] = UserConfigurable( + default="gtts", + from_env=lambda: os.getenv("TEXT_TO_SPEECH_PROVIDER") + or ( + "macos" + if os.getenv("USE_MAC_OS_TTS") + else "elevenlabs" + if os.getenv("ELEVENLABS_API_KEY") + else "streamelements" + if os.getenv("USE_BRIAN_TTS") + else "gtts" + ), + ) # type: ignore + + +class TextToSpeechProvider: + def __init__(self, config: TTSConfig): + self._config = config + self._default_voice_engine, self._voice_engine = self._get_voice_engine(config) + + def say(self, text, voice_index: int = 0) -> None: + def _speak() -> None: + success = self._voice_engine.say(text, voice_index) + if not success: + self._default_voice_engine.say(text, voice_index) + _QUEUE_SEMAPHORE.release() + + if self._config.speak_mode: + _QUEUE_SEMAPHORE.acquire(True) + thread = threading.Thread(target=_speak) + thread.start() + + def __repr__(self): + return "{class_name}(provider={voice_engine_name})".format( + class_name=self.__class__.__name__, + voice_engine_name=self._voice_engine.__class__.__name__, + ) + + @staticmethod + def _get_voice_engine(config: TTSConfig) -> tuple[VoiceBase, VoiceBase]: + """Get the voice engine to use for the given configuration""" + tts_provider = config.provider + if tts_provider == "elevenlabs": + voice_engine = ElevenLabsSpeech(config.elevenlabs) + elif tts_provider == "macos": + voice_engine = MacOSTTS() + elif tts_provider == "streamelements": + voice_engine = StreamElementsSpeech(config.streamelements) + else: + voice_engine = GTTSVoice() + + return GTTSVoice(), voice_engine |