1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
|
"""ElevenLabs speech module"""
from __future__ import annotations
import logging
import os
import requests
from playsound import playsound
from autogpt.core.configuration import SystemConfiguration, UserConfigurable
from .base import VoiceBase
logger = logging.getLogger(__name__)
PLACEHOLDERS = {"your-voice-id"}
class ElevenLabsConfig(SystemConfiguration):
api_key: str = UserConfigurable()
voice_id: str = UserConfigurable()
class ElevenLabsSpeech(VoiceBase):
"""ElevenLabs speech class"""
def _setup(self, config: ElevenLabsConfig) -> None:
"""Set up the voices, API key, etc.
Returns:
None: None
"""
default_voices = ["ErXwobaYiN019PkySvjV", "EXAVITQu4vr4xnSDxMaL"]
voice_options = {
"Rachel": "21m00Tcm4TlvDq8ikWAM",
"Domi": "AZnzlk1XvdvUeBnXmlld",
"Bella": "EXAVITQu4vr4xnSDxMaL",
"Antoni": "ErXwobaYiN019PkySvjV",
"Elli": "MF3mGyEYCl7XYWbV9V6O",
"Josh": "TxGEqnHWrfWFTfGW9XjX",
"Arnold": "VR6AewLTigWG4xSOukaG",
"Adam": "pNInz6obpgDQGcFmaJgB",
"Sam": "yoZ06aMxZJJ28mfd3POQ",
}
self._headers = {
"Content-Type": "application/json",
"xi-api-key": config.api_key,
}
self._voices = default_voices.copy()
if config.voice_id in voice_options:
config.voice_id = voice_options[config.voice_id]
self._use_custom_voice(config.voice_id, 0)
def _use_custom_voice(self, voice, voice_index) -> None:
"""Use a custom voice if provided and not a placeholder
Args:
voice (str): The voice ID
voice_index (int): The voice index
Returns:
None: None
"""
# Placeholder values that should be treated as empty
if voice and voice not in PLACEHOLDERS:
self._voices[voice_index] = voice
def _speech(self, text: str, voice_index: int = 0) -> bool:
"""Speak text using elevenlabs.io's API
Args:
text (str): The text to speak
voice_index (int, optional): The voice to use. Defaults to 0.
Returns:
bool: True if the request was successful, False otherwise
"""
tts_url = (
f"https://api.elevenlabs.io/v1/text-to-speech/{self._voices[voice_index]}"
)
response = requests.post(tts_url, headers=self._headers, json={"text": text})
if response.status_code == 200:
with open("speech.mpeg", "wb") as f:
f.write(response.content)
playsound("speech.mpeg", True)
os.remove("speech.mpeg")
return True
else:
logger.warn("Request failed with status code:", response.status_code)
logger.info("Response content:", response.content)
return False
|