From 354106be7b7d049b30dc42cd2f806ad3cc20aac7 Mon Sep 17 00:00:00 2001 From: Reinier van der Leer Date: Fri, 19 Jan 2024 17:31:59 +0100 Subject: feat(agent/llm): Add cost tracking and logging to `AgentProtocolServer` --- .../autogpt/autogpt/app/agent_protocol_server.py | 24 +++++++- autogpts/autogpt/autogpt/app/main.py | 5 ++ .../core/resource/model_providers/openai.py | 68 +++++++++++----------- .../core/resource/model_providers/schema.py | 21 +++++-- autogpts/autogpt/autogpt/core/resource/schema.py | 8 ++- 5 files changed, 84 insertions(+), 42 deletions(-) diff --git a/autogpts/autogpt/autogpt/app/agent_protocol_server.py b/autogpts/autogpt/autogpt/app/agent_protocol_server.py index 7bebca930..e7474d1ad 100644 --- a/autogpts/autogpt/autogpt/app/agent_protocol_server.py +++ b/autogpts/autogpt/autogpt/app/agent_protocol_server.py @@ -34,6 +34,7 @@ from autogpt.commands.user_interaction import ask_user from autogpt.config import Config from autogpt.core.resource.model_providers import ChatModelProvider from autogpt.core.resource.model_providers.openai import OpenAIProvider +from autogpt.core.resource.model_providers.schema import ModelProviderBudget from autogpt.file_workspace import ( FileWorkspace, FileWorkspaceBackendName, @@ -46,6 +47,8 @@ logger = logging.getLogger(__name__) class AgentProtocolServer: + _task_budgets: dict[str, ModelProviderBudget] + def __init__( self, app_config: Config, @@ -56,6 +59,7 @@ class AgentProtocolServer: self.db = database self.llm_provider = llm_provider self.agent_manager = AgentManager(app_data_dir=app_config.app_data_dir) + self._task_budgets = {} async def start(self, port: int = 8000, router: APIRouter = base_router): """Start the agent server.""" @@ -127,10 +131,13 @@ class AgentProtocolServer: app_config=self.app_config, llm_provider=self._get_task_llm_provider(task), ) + + # Assign an ID and a folder to the Agent and persist it agent_id = task_agent.state.agent_id = task_agent_id(task.task_id) logger.debug(f"New agent ID: {agent_id}") task_agent.attach_fs(self.app_config.app_data_dir / "agents" / agent_id) task_agent.state.save_to_json_file(task_agent.file_manager.state_file_path) + return task async def list_tasks(self, page: int = 1, pageSize: int = 10) -> TaskListResponse: @@ -224,6 +231,10 @@ class AgentProtocolServer: step_id=step.step_id, output=execute_command_args["reason"], ) + logger.info( + f"Total LLM cost for task {task_id}: " + f"${round(agent.llm_provider.get_incurred_cost(), 2)}" + ) return step if execute_command == ask_user.__name__: # HACK @@ -310,6 +321,10 @@ class AgentProtocolServer: additional_output=additional_output, ) + logger.debug( + f"Running total LLM cost for task {task_id}: " + f"${round(agent.llm_provider.get_incurred_cost(), 2)}" + ) agent.state.save_to_json_file(agent.file_manager.state_file_path) return step @@ -437,6 +452,12 @@ class AgentProtocolServer: task_llm_provider_config = self.llm_provider._configuration.copy(deep=True) _extra_request_headers = task_llm_provider_config.extra_request_headers + task_llm_budget = self._task_budgets.get( + task.task_id, self.llm_provider.default_settings.budget.copy(deep=True) + ) + if task.task_id not in self._task_budgets: + self._task_budgets[task.task_id] = task_llm_budget + _extra_request_headers["AP-TaskID"] = task.task_id if step_id: _extra_request_headers["AP-StepID"] = step_id @@ -445,7 +466,8 @@ class AgentProtocolServer: if isinstance(self.llm_provider, OpenAIProvider): settings = self.llm_provider._settings.copy() - settings.configuration = task_llm_provider_config + settings.budget = task_llm_budget + settings.configuration = task_llm_provider_config # type: ignore return OpenAIProvider( settings=settings, logger=logger.getChild(f"Task-{task.task_id}_OpenAIProvider"), diff --git a/autogpts/autogpt/autogpt/app/main.py b/autogpts/autogpt/autogpt/app/main.py index a9cbc563b..20f1d9872 100644 --- a/autogpts/autogpt/autogpt/app/main.py +++ b/autogpts/autogpt/autogpt/app/main.py @@ -371,6 +371,11 @@ async def run_auto_gpt_server( ) await server.start(port=port) + logging.getLogger().info( + f"Total OpenAI session cost: " + f"${round(sum(b.total_cost for b in server._task_budgets.values()), 2)}" + ) + def _configure_openai_provider(config: Config) -> OpenAIProvider: """Create a configured OpenAIProvider object. diff --git a/autogpts/autogpt/autogpt/core/resource/model_providers/openai.py b/autogpts/autogpt/autogpt/core/resource/model_providers/openai.py index fe4cfbcab..464babe2e 100644 --- a/autogpts/autogpt/autogpt/core/resource/model_providers/openai.py +++ b/autogpts/autogpt/autogpt/core/resource/model_providers/openai.py @@ -241,15 +241,10 @@ class OpenAICredentials(ModelProviderCredentials): return {"model": deployment_id} -class OpenAIModelProviderBudget(ModelProviderBudget): - graceful_shutdown_threshold: float = UserConfigurable() - warning_threshold: float = UserConfigurable() - - class OpenAISettings(ModelProviderSettings): configuration: OpenAIConfiguration credentials: Optional[OpenAICredentials] - budget: OpenAIModelProviderBudget + budget: ModelProviderBudget class OpenAIProvider( @@ -262,7 +257,7 @@ class OpenAIProvider( retries_per_request=10, ), credentials=None, - budget=OpenAIModelProviderBudget( + budget=ModelProviderBudget( total_budget=math.inf, total_cost=0.0, remaining_budget=math.inf, @@ -271,11 +266,10 @@ class OpenAIProvider( completion_tokens=0, total_tokens=0, ), - graceful_shutdown_threshold=0.005, - warning_threshold=0.01, ), ) + _budget: ModelProviderBudget _configuration: OpenAIConfiguration def __init__( @@ -307,10 +301,6 @@ class OpenAIProvider( """Get the token limit for a given model.""" return OPEN_AI_MODELS[model_name].max_tokens - def get_remaining_budget(self) -> float: - """Get the remaining budget.""" - return self._budget.remaining_budget - @classmethod def get_tokenizer(cls, model_name: OpenAIModelName) -> ModelTokenizer: return tiktoken.encoding_for_model(model_name) @@ -379,45 +369,60 @@ class OpenAIProvider( model_prompt += completion_kwargs["messages"] del completion_kwargs["messages"] + cost = 0.0 attempts = 0 while True: - response = await self._create_chat_completion( + _response = await self._create_chat_completion( messages=model_prompt, **completion_kwargs, ) - response_message = response.choices[0].message + _response_msg = _response.choices[0].message if ( tool_calls_compat_mode - and response_message.content - and not response_message.tool_calls + and _response_msg.content + and not _response_msg.tool_calls ): tool_calls = list( - _tool_calls_compat_extract_calls(response_message.content) + _tool_calls_compat_extract_calls(_response_msg.content) ) - elif response_message.tool_calls: + elif _response_msg.tool_calls: tool_calls = [ - AssistantToolCall(**tc.dict()) for tc in response_message.tool_calls + AssistantToolCall(**tc.dict()) for tc in _response_msg.tool_calls ] else: tool_calls = None assistant_message = AssistantChatMessage( - content=response_message.content, + content=_response_msg.content, tool_calls=tool_calls, ) + response = ChatModelResponse( + response=assistant_message, + model_info=OPEN_AI_CHAT_MODELS[model_name], + prompt_tokens_used=( + _response.usage.prompt_tokens if _response.usage else 0 + ), + completion_tokens_used=( + _response.usage.completion_tokens if _response.usage else 0 + ), + ) + cost += self._budget.update_usage_and_cost(response) + self._logger.debug( + f"Completion usage: {response.prompt_tokens_used} input, " + f"{response.completion_tokens_used} output - ${round(cost, 2)}" + ) + # If parsing the response fails, append the error to the prompt, and let the # LLM fix its mistake(s). try: attempts += 1 - parsed_response = completion_parser(assistant_message) + response.parsed_result = completion_parser(assistant_message) break except Exception as e: self._logger.warning(f"Parsing attempt #{attempts} failed: {e}") - self._logger.debug( - f"Parsing failed on response: '''{response_message}'''" - ) + self._logger.debug(f"Parsing failed on response: '''{_response_msg}'''") if attempts < self._configuration.fix_failed_parse_tries: model_prompt.append( ChatMessage.system(f"ERROR PARSING YOUR RESPONSE:\n\n{e}") @@ -425,16 +430,9 @@ class OpenAIProvider( else: raise - response = ChatModelResponse( - response=assistant_message, - parsed_result=parsed_response, - model_info=OPEN_AI_CHAT_MODELS[model_name], - prompt_tokens_used=response.usage.prompt_tokens if response.usage else 0, - completion_tokens_used=( - response.usage.completion_tokens if response.usage else 0 - ), - ) - self._budget.update_usage_and_cost(response) + if attempts > 1: + self._logger.debug(f"Total cost for {attempts} attempts: {round(cost, 2)}") + return response async def create_embedding( diff --git a/autogpts/autogpt/autogpt/core/resource/model_providers/schema.py b/autogpts/autogpt/autogpt/core/resource/model_providers/schema.py index e2599b0aa..2ed667725 100644 --- a/autogpts/autogpt/autogpt/core/resource/model_providers/schema.py +++ b/autogpts/autogpt/autogpt/core/resource/model_providers/schema.py @@ -1,5 +1,6 @@ import abc import enum +import math from typing import ( Callable, ClassVar, @@ -208,8 +209,12 @@ class ModelProviderBudget(ProviderBudget): def update_usage_and_cost( self, model_response: ModelResponse, - ) -> None: - """Update the usage and cost of the provider.""" + ) -> float: + """Update the usage and cost of the provider. + + Returns: + float: The (calculated) cost of the given model response. + """ model_info = model_response.model_info self.usage.update_usage(model_response) incurred_cost = ( @@ -218,6 +223,7 @@ class ModelProviderBudget(ProviderBudget): ) self.total_cost += incurred_cost self.remaining_budget -= incurred_cost + return incurred_cost class ModelProviderSettings(ProviderSettings): @@ -232,6 +238,7 @@ class ModelProvider(abc.ABC): default_settings: ClassVar[ModelProviderSettings] + _budget: Optional[ModelProviderBudget] _configuration: ModelProviderConfiguration @abc.abstractmethod @@ -246,9 +253,15 @@ class ModelProvider(abc.ABC): def get_token_limit(self, model_name: str) -> int: ... - @abc.abstractmethod + def get_incurred_cost(self) -> float: + if self._budget: + return self._budget.total_cost + return 0 + def get_remaining_budget(self) -> float: - ... + if self._budget: + return self._budget.remaining_budget + return math.inf class ModelTokenizer(Protocol): diff --git a/autogpts/autogpt/autogpt/core/resource/schema.py b/autogpts/autogpt/autogpt/core/resource/schema.py index ed7a94d02..d8cc1de31 100644 --- a/autogpts/autogpt/autogpt/core/resource/schema.py +++ b/autogpts/autogpt/autogpt/core/resource/schema.py @@ -31,8 +31,12 @@ class ProviderBudget(SystemConfiguration): usage: ProviderUsage @abc.abstractmethod - def update_usage_and_cost(self, *args, **kwargs) -> None: - """Update the usage and cost of the resource.""" + def update_usage_and_cost(self, *args, **kwargs) -> float: + """Update the usage and cost of the provider. + + Returns: + float: The (calculated) cost of the given model response. + """ ... -- cgit v1.2.3