aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Reinier van der Leer <pwuts@agpt.co> 2024-01-19 17:31:59 +0100
committerGravatar Reinier van der Leer <pwuts@agpt.co> 2024-01-19 17:31:59 +0100
commit354106be7b7d049b30dc42cd2f806ad3cc20aac7 (patch)
tree9fa7f61e7f7fa9eab418db75c865ff4be2c59519
parentfix(benchmark): Fix challenge input artifact upload (diff)
downloadAuto-GPT-354106be7b7d049b30dc42cd2f806ad3cc20aac7.tar.gz
Auto-GPT-354106be7b7d049b30dc42cd2f806ad3cc20aac7.tar.bz2
Auto-GPT-354106be7b7d049b30dc42cd2f806ad3cc20aac7.zip
feat(agent/llm): Add cost tracking and logging to `AgentProtocolServer`
-rw-r--r--autogpts/autogpt/autogpt/app/agent_protocol_server.py24
-rw-r--r--autogpts/autogpt/autogpt/app/main.py5
-rw-r--r--autogpts/autogpt/autogpt/core/resource/model_providers/openai.py68
-rw-r--r--autogpts/autogpt/autogpt/core/resource/model_providers/schema.py21
-rw-r--r--autogpts/autogpt/autogpt/core/resource/schema.py8
5 files changed, 84 insertions, 42 deletions
diff --git a/autogpts/autogpt/autogpt/app/agent_protocol_server.py b/autogpts/autogpt/autogpt/app/agent_protocol_server.py
index 7bebca930..e7474d1ad 100644
--- a/autogpts/autogpt/autogpt/app/agent_protocol_server.py
+++ b/autogpts/autogpt/autogpt/app/agent_protocol_server.py
@@ -34,6 +34,7 @@ from autogpt.commands.user_interaction import ask_user
from autogpt.config import Config
from autogpt.core.resource.model_providers import ChatModelProvider
from autogpt.core.resource.model_providers.openai import OpenAIProvider
+from autogpt.core.resource.model_providers.schema import ModelProviderBudget
from autogpt.file_workspace import (
FileWorkspace,
FileWorkspaceBackendName,
@@ -46,6 +47,8 @@ logger = logging.getLogger(__name__)
class AgentProtocolServer:
+ _task_budgets: dict[str, ModelProviderBudget]
+
def __init__(
self,
app_config: Config,
@@ -56,6 +59,7 @@ class AgentProtocolServer:
self.db = database
self.llm_provider = llm_provider
self.agent_manager = AgentManager(app_data_dir=app_config.app_data_dir)
+ self._task_budgets = {}
async def start(self, port: int = 8000, router: APIRouter = base_router):
"""Start the agent server."""
@@ -127,10 +131,13 @@ class AgentProtocolServer:
app_config=self.app_config,
llm_provider=self._get_task_llm_provider(task),
)
+
+ # Assign an ID and a folder to the Agent and persist it
agent_id = task_agent.state.agent_id = task_agent_id(task.task_id)
logger.debug(f"New agent ID: {agent_id}")
task_agent.attach_fs(self.app_config.app_data_dir / "agents" / agent_id)
task_agent.state.save_to_json_file(task_agent.file_manager.state_file_path)
+
return task
async def list_tasks(self, page: int = 1, pageSize: int = 10) -> TaskListResponse:
@@ -224,6 +231,10 @@ class AgentProtocolServer:
step_id=step.step_id,
output=execute_command_args["reason"],
)
+ logger.info(
+ f"Total LLM cost for task {task_id}: "
+ f"${round(agent.llm_provider.get_incurred_cost(), 2)}"
+ )
return step
if execute_command == ask_user.__name__: # HACK
@@ -310,6 +321,10 @@ class AgentProtocolServer:
additional_output=additional_output,
)
+ logger.debug(
+ f"Running total LLM cost for task {task_id}: "
+ f"${round(agent.llm_provider.get_incurred_cost(), 2)}"
+ )
agent.state.save_to_json_file(agent.file_manager.state_file_path)
return step
@@ -437,6 +452,12 @@ class AgentProtocolServer:
task_llm_provider_config = self.llm_provider._configuration.copy(deep=True)
_extra_request_headers = task_llm_provider_config.extra_request_headers
+ task_llm_budget = self._task_budgets.get(
+ task.task_id, self.llm_provider.default_settings.budget.copy(deep=True)
+ )
+ if task.task_id not in self._task_budgets:
+ self._task_budgets[task.task_id] = task_llm_budget
+
_extra_request_headers["AP-TaskID"] = task.task_id
if step_id:
_extra_request_headers["AP-StepID"] = step_id
@@ -445,7 +466,8 @@ class AgentProtocolServer:
if isinstance(self.llm_provider, OpenAIProvider):
settings = self.llm_provider._settings.copy()
- settings.configuration = task_llm_provider_config
+ settings.budget = task_llm_budget
+ settings.configuration = task_llm_provider_config # type: ignore
return OpenAIProvider(
settings=settings,
logger=logger.getChild(f"Task-{task.task_id}_OpenAIProvider"),
diff --git a/autogpts/autogpt/autogpt/app/main.py b/autogpts/autogpt/autogpt/app/main.py
index a9cbc563b..20f1d9872 100644
--- a/autogpts/autogpt/autogpt/app/main.py
+++ b/autogpts/autogpt/autogpt/app/main.py
@@ -371,6 +371,11 @@ async def run_auto_gpt_server(
)
await server.start(port=port)
+ logging.getLogger().info(
+ f"Total OpenAI session cost: "
+ f"${round(sum(b.total_cost for b in server._task_budgets.values()), 2)}"
+ )
+
def _configure_openai_provider(config: Config) -> OpenAIProvider:
"""Create a configured OpenAIProvider object.
diff --git a/autogpts/autogpt/autogpt/core/resource/model_providers/openai.py b/autogpts/autogpt/autogpt/core/resource/model_providers/openai.py
index fe4cfbcab..464babe2e 100644
--- a/autogpts/autogpt/autogpt/core/resource/model_providers/openai.py
+++ b/autogpts/autogpt/autogpt/core/resource/model_providers/openai.py
@@ -241,15 +241,10 @@ class OpenAICredentials(ModelProviderCredentials):
return {"model": deployment_id}
-class OpenAIModelProviderBudget(ModelProviderBudget):
- graceful_shutdown_threshold: float = UserConfigurable()
- warning_threshold: float = UserConfigurable()
-
-
class OpenAISettings(ModelProviderSettings):
configuration: OpenAIConfiguration
credentials: Optional[OpenAICredentials]
- budget: OpenAIModelProviderBudget
+ budget: ModelProviderBudget
class OpenAIProvider(
@@ -262,7 +257,7 @@ class OpenAIProvider(
retries_per_request=10,
),
credentials=None,
- budget=OpenAIModelProviderBudget(
+ budget=ModelProviderBudget(
total_budget=math.inf,
total_cost=0.0,
remaining_budget=math.inf,
@@ -271,11 +266,10 @@ class OpenAIProvider(
completion_tokens=0,
total_tokens=0,
),
- graceful_shutdown_threshold=0.005,
- warning_threshold=0.01,
),
)
+ _budget: ModelProviderBudget
_configuration: OpenAIConfiguration
def __init__(
@@ -307,10 +301,6 @@ class OpenAIProvider(
"""Get the token limit for a given model."""
return OPEN_AI_MODELS[model_name].max_tokens
- def get_remaining_budget(self) -> float:
- """Get the remaining budget."""
- return self._budget.remaining_budget
-
@classmethod
def get_tokenizer(cls, model_name: OpenAIModelName) -> ModelTokenizer:
return tiktoken.encoding_for_model(model_name)
@@ -379,45 +369,60 @@ class OpenAIProvider(
model_prompt += completion_kwargs["messages"]
del completion_kwargs["messages"]
+ cost = 0.0
attempts = 0
while True:
- response = await self._create_chat_completion(
+ _response = await self._create_chat_completion(
messages=model_prompt,
**completion_kwargs,
)
- response_message = response.choices[0].message
+ _response_msg = _response.choices[0].message
if (
tool_calls_compat_mode
- and response_message.content
- and not response_message.tool_calls
+ and _response_msg.content
+ and not _response_msg.tool_calls
):
tool_calls = list(
- _tool_calls_compat_extract_calls(response_message.content)
+ _tool_calls_compat_extract_calls(_response_msg.content)
)
- elif response_message.tool_calls:
+ elif _response_msg.tool_calls:
tool_calls = [
- AssistantToolCall(**tc.dict()) for tc in response_message.tool_calls
+ AssistantToolCall(**tc.dict()) for tc in _response_msg.tool_calls
]
else:
tool_calls = None
assistant_message = AssistantChatMessage(
- content=response_message.content,
+ content=_response_msg.content,
tool_calls=tool_calls,
)
+ response = ChatModelResponse(
+ response=assistant_message,
+ model_info=OPEN_AI_CHAT_MODELS[model_name],
+ prompt_tokens_used=(
+ _response.usage.prompt_tokens if _response.usage else 0
+ ),
+ completion_tokens_used=(
+ _response.usage.completion_tokens if _response.usage else 0
+ ),
+ )
+ cost += self._budget.update_usage_and_cost(response)
+ self._logger.debug(
+ f"Completion usage: {response.prompt_tokens_used} input, "
+ f"{response.completion_tokens_used} output - ${round(cost, 2)}"
+ )
+
# If parsing the response fails, append the error to the prompt, and let the
# LLM fix its mistake(s).
try:
attempts += 1
- parsed_response = completion_parser(assistant_message)
+ response.parsed_result = completion_parser(assistant_message)
break
except Exception as e:
self._logger.warning(f"Parsing attempt #{attempts} failed: {e}")
- self._logger.debug(
- f"Parsing failed on response: '''{response_message}'''"
- )
+ self._logger.debug(f"Parsing failed on response: '''{_response_msg}'''")
if attempts < self._configuration.fix_failed_parse_tries:
model_prompt.append(
ChatMessage.system(f"ERROR PARSING YOUR RESPONSE:\n\n{e}")
@@ -425,16 +430,9 @@ class OpenAIProvider(
else:
raise
- response = ChatModelResponse(
- response=assistant_message,
- parsed_result=parsed_response,
- model_info=OPEN_AI_CHAT_MODELS[model_name],
- prompt_tokens_used=response.usage.prompt_tokens if response.usage else 0,
- completion_tokens_used=(
- response.usage.completion_tokens if response.usage else 0
- ),
- )
- self._budget.update_usage_and_cost(response)
+ if attempts > 1:
+ self._logger.debug(f"Total cost for {attempts} attempts: {round(cost, 2)}")
+
return response
async def create_embedding(
diff --git a/autogpts/autogpt/autogpt/core/resource/model_providers/schema.py b/autogpts/autogpt/autogpt/core/resource/model_providers/schema.py
index e2599b0aa..2ed667725 100644
--- a/autogpts/autogpt/autogpt/core/resource/model_providers/schema.py
+++ b/autogpts/autogpt/autogpt/core/resource/model_providers/schema.py
@@ -1,5 +1,6 @@
import abc
import enum
+import math
from typing import (
Callable,
ClassVar,
@@ -208,8 +209,12 @@ class ModelProviderBudget(ProviderBudget):
def update_usage_and_cost(
self,
model_response: ModelResponse,
- ) -> None:
- """Update the usage and cost of the provider."""
+ ) -> float:
+ """Update the usage and cost of the provider.
+
+ Returns:
+ float: The (calculated) cost of the given model response.
+ """
model_info = model_response.model_info
self.usage.update_usage(model_response)
incurred_cost = (
@@ -218,6 +223,7 @@ class ModelProviderBudget(ProviderBudget):
)
self.total_cost += incurred_cost
self.remaining_budget -= incurred_cost
+ return incurred_cost
class ModelProviderSettings(ProviderSettings):
@@ -232,6 +238,7 @@ class ModelProvider(abc.ABC):
default_settings: ClassVar[ModelProviderSettings]
+ _budget: Optional[ModelProviderBudget]
_configuration: ModelProviderConfiguration
@abc.abstractmethod
@@ -246,9 +253,15 @@ class ModelProvider(abc.ABC):
def get_token_limit(self, model_name: str) -> int:
...
- @abc.abstractmethod
+ def get_incurred_cost(self) -> float:
+ if self._budget:
+ return self._budget.total_cost
+ return 0
+
def get_remaining_budget(self) -> float:
- ...
+ if self._budget:
+ return self._budget.remaining_budget
+ return math.inf
class ModelTokenizer(Protocol):
diff --git a/autogpts/autogpt/autogpt/core/resource/schema.py b/autogpts/autogpt/autogpt/core/resource/schema.py
index ed7a94d02..d8cc1de31 100644
--- a/autogpts/autogpt/autogpt/core/resource/schema.py
+++ b/autogpts/autogpt/autogpt/core/resource/schema.py
@@ -31,8 +31,12 @@ class ProviderBudget(SystemConfiguration):
usage: ProviderUsage
@abc.abstractmethod
- def update_usage_and_cost(self, *args, **kwargs) -> None:
- """Update the usage and cost of the resource."""
+ def update_usage_and_cost(self, *args, **kwargs) -> float:
+ """Update the usage and cost of the provider.
+
+ Returns:
+ float: The (calculated) cost of the given model response.
+ """
...