aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Reinier van der Leer <pwuts@agpt.co> 2024-04-22 18:29:43 +0200
committerGravatar Reinier van der Leer <pwuts@agpt.co> 2024-04-22 18:40:48 +0200
commit4db4ca08b2f4e9673a4b2fb4cd869f09ea282b5b (patch)
tree732c19e6dc4b77e1b4463ecc31ebf646684c02e3
parentfeat(agent/core): Add `max_output_tokens` parameter to `create_chat_completio... (diff)
downloadAuto-GPT-4db4ca08b2f4e9673a4b2fb4cd869f09ea282b5b.tar.gz
Auto-GPT-4db4ca08b2f4e9673a4b2fb4cd869f09ea282b5b.tar.bz2
Auto-GPT-4db4ca08b2f4e9673a4b2fb4cd869f09ea282b5b.zip
refactor(agent): Tweak `model_providers.schema` for easier use
- Set default values for `ProviderBudget` / `ModelProviderBudget` fields - Remove redundant field redefinitions on `ModelProviderBudget` class - Change `ModelProviderUsage.update_usage(..)` and `ModelProviderBudget.update_usage_and_cost(..)` signatures for easier use - Change `ModelProviderBudget.usage` from `ModelProviderUsage` to `defaultdict[str, ModelProviderUsage]` for per-model usage tracking - Fix `ChatModelInfo`/`EmbeddingModelInfo` `service` attribute: rename from `llm_service` to match base class and fix types. This makes it unnecessary to specify the `service` field when creating a `ChatModelInfo` or `EmbeddingModelInfo` object. - Use `defaultdict(ModelProviderBudget)` for task budget tracking in agent_protocol_server.py
-rw-r--r--autogpts/autogpt/autogpt/app/agent_protocol_server.py7
-rw-r--r--autogpts/autogpt/autogpt/core/resource/model_providers/openai.py36
-rw-r--r--autogpts/autogpt/autogpt/core/resource/model_providers/schema.py38
-rw-r--r--autogpts/autogpt/autogpt/core/resource/schema.py7
-rw-r--r--autogpts/autogpt/tests/unit/test_config.py3
5 files changed, 29 insertions, 62 deletions
diff --git a/autogpts/autogpt/autogpt/app/agent_protocol_server.py b/autogpts/autogpt/autogpt/app/agent_protocol_server.py
index dd40545b6..fe0a3a0ee 100644
--- a/autogpts/autogpt/autogpt/app/agent_protocol_server.py
+++ b/autogpts/autogpt/autogpt/app/agent_protocol_server.py
@@ -1,6 +1,7 @@
import logging
import os
import pathlib
+from collections import defaultdict
from io import BytesIO
from uuid import uuid4
@@ -60,7 +61,7 @@ class AgentProtocolServer:
self.file_storage = file_storage
self.llm_provider = llm_provider
self.agent_manager = AgentManager(file_storage)
- self._task_budgets = {}
+ self._task_budgets = defaultdict(ModelProviderBudget)
async def start(self, port: int = 8000, router: APIRouter = base_router):
"""Start the agent server."""
@@ -461,9 +462,7 @@ class AgentProtocolServer:
"""
Configures the LLM provider with headers to link outgoing requests to the task.
"""
- task_llm_budget = self._task_budgets.get(
- task.task_id, self.llm_provider.default_settings.budget.copy(deep=True)
- )
+ task_llm_budget = self._task_budgets[task.task_id]
task_llm_provider_config = self.llm_provider._configuration.copy(deep=True)
_extra_request_headers = task_llm_provider_config.extra_request_headers
diff --git a/autogpts/autogpt/autogpt/core/resource/model_providers/openai.py b/autogpts/autogpt/autogpt/core/resource/model_providers/openai.py
index 2ebb56638..874282764 100644
--- a/autogpts/autogpt/autogpt/core/resource/model_providers/openai.py
+++ b/autogpts/autogpt/autogpt/core/resource/model_providers/openai.py
@@ -1,6 +1,5 @@
import enum
import logging
-import math
import os
from pathlib import Path
from typing import Any, Callable, Coroutine, Iterator, Optional, ParamSpec, TypeVar
@@ -37,9 +36,7 @@ from autogpt.core.resource.model_providers.schema import (
ModelProviderConfiguration,
ModelProviderCredentials,
ModelProviderName,
- ModelProviderService,
ModelProviderSettings,
- ModelProviderUsage,
ModelTokenizer,
)
from autogpt.core.utils.json_schema import JSONSchema
@@ -49,7 +46,6 @@ _T = TypeVar("_T")
_P = ParamSpec("_P")
OpenAIEmbeddingParser = Callable[[Embedding], Embedding]
-OpenAIChatParser = Callable[[str], dict]
class OpenAIModelName(str, enum.Enum):
@@ -87,7 +83,6 @@ OPEN_AI_EMBEDDING_MODELS = {
for info in [
EmbeddingModelInfo(
name=OpenAIModelName.EMBEDDING_v2,
- service=ModelProviderService.EMBEDDING,
provider_name=ModelProviderName.OPENAI,
prompt_token_cost=0.0001 / 1000,
max_tokens=8191,
@@ -95,7 +90,6 @@ OPEN_AI_EMBEDDING_MODELS = {
),
EmbeddingModelInfo(
name=OpenAIModelName.EMBEDDING_v3_S,
- service=ModelProviderService.EMBEDDING,
provider_name=ModelProviderName.OPENAI,
prompt_token_cost=0.00002 / 1000,
max_tokens=8191,
@@ -103,7 +97,6 @@ OPEN_AI_EMBEDDING_MODELS = {
),
EmbeddingModelInfo(
name=OpenAIModelName.EMBEDDING_v3_L,
- service=ModelProviderService.EMBEDDING,
provider_name=ModelProviderName.OPENAI,
prompt_token_cost=0.00013 / 1000,
max_tokens=8191,
@@ -118,7 +111,6 @@ OPEN_AI_CHAT_MODELS = {
for info in [
ChatModelInfo(
name=OpenAIModelName.GPT3_v1,
- service=ModelProviderService.CHAT,
provider_name=ModelProviderName.OPENAI,
prompt_token_cost=0.0015 / 1000,
completion_token_cost=0.002 / 1000,
@@ -127,7 +119,6 @@ OPEN_AI_CHAT_MODELS = {
),
ChatModelInfo(
name=OpenAIModelName.GPT3_v2_16k,
- service=ModelProviderService.CHAT,
provider_name=ModelProviderName.OPENAI,
prompt_token_cost=0.003 / 1000,
completion_token_cost=0.004 / 1000,
@@ -136,7 +127,6 @@ OPEN_AI_CHAT_MODELS = {
),
ChatModelInfo(
name=OpenAIModelName.GPT3_v3,
- service=ModelProviderService.CHAT,
provider_name=ModelProviderName.OPENAI,
prompt_token_cost=0.001 / 1000,
completion_token_cost=0.002 / 1000,
@@ -145,7 +135,6 @@ OPEN_AI_CHAT_MODELS = {
),
ChatModelInfo(
name=OpenAIModelName.GPT3_v4,
- service=ModelProviderService.CHAT,
provider_name=ModelProviderName.OPENAI,
prompt_token_cost=0.0005 / 1000,
completion_token_cost=0.0015 / 1000,
@@ -154,7 +143,6 @@ OPEN_AI_CHAT_MODELS = {
),
ChatModelInfo(
name=OpenAIModelName.GPT4_v1,
- service=ModelProviderService.CHAT,
provider_name=ModelProviderName.OPENAI,
prompt_token_cost=0.03 / 1000,
completion_token_cost=0.06 / 1000,
@@ -163,7 +151,6 @@ OPEN_AI_CHAT_MODELS = {
),
ChatModelInfo(
name=OpenAIModelName.GPT4_v1_32k,
- service=ModelProviderService.CHAT,
provider_name=ModelProviderName.OPENAI,
prompt_token_cost=0.06 / 1000,
completion_token_cost=0.12 / 1000,
@@ -172,7 +159,6 @@ OPEN_AI_CHAT_MODELS = {
),
ChatModelInfo(
name=OpenAIModelName.GPT4_TURBO,
- service=ModelProviderService.CHAT,
provider_name=ModelProviderName.OPENAI,
prompt_token_cost=0.01 / 1000,
completion_token_cost=0.03 / 1000,
@@ -305,21 +291,12 @@ class OpenAIProvider(
retries_per_request=10,
),
credentials=None,
- budget=ModelProviderBudget(
- total_budget=math.inf,
- total_cost=0.0,
- remaining_budget=math.inf,
- usage=ModelProviderUsage(
- prompt_tokens=0,
- completion_tokens=0,
- total_tokens=0,
- ),
- ),
+ budget=ModelProviderBudget(),
)
- _budget: ModelProviderBudget
_configuration: OpenAIConfiguration
_credentials: OpenAICredentials
+ _budget: ModelProviderBudget
def __init__(
self,
@@ -648,12 +625,9 @@ class OpenAIProvider(
prompt_tokens_used = completion_tokens_used = 0
cost = self._budget.update_usage_and_cost(
- ChatModelResponse(
- response=AssistantChatMessage(content=None),
- model_info=OPEN_AI_CHAT_MODELS[model],
- prompt_tokens_used=prompt_tokens_used,
- completion_tokens_used=completion_tokens_used,
- )
+ model_info=OPEN_AI_CHAT_MODELS[model],
+ input_tokens_used=prompt_tokens_used,
+ output_tokens_used=completion_tokens_used,
)
self._logger.debug(
f"Completion usage: {prompt_tokens_used} input, "
diff --git a/autogpts/autogpt/autogpt/core/resource/model_providers/schema.py b/autogpts/autogpt/autogpt/core/resource/model_providers/schema.py
index 327718c11..dd69b526e 100644
--- a/autogpts/autogpt/autogpt/core/resource/model_providers/schema.py
+++ b/autogpts/autogpt/autogpt/core/resource/model_providers/schema.py
@@ -1,6 +1,7 @@
import abc
import enum
import math
+from collections import defaultdict
from typing import (
Any,
Callable,
@@ -90,7 +91,7 @@ class AssistantToolCallDict(TypedDict):
class AssistantChatMessage(ChatMessage):
- role: Literal["assistant"] = "assistant"
+ role: Literal[ChatMessage.Role.ASSISTANT] = ChatMessage.Role.ASSISTANT
content: Optional[str]
tool_calls: Optional[list[AssistantToolCall]] = None
@@ -187,39 +188,34 @@ class ModelProviderUsage(ProviderUsage):
completion_tokens: int = 0
prompt_tokens: int = 0
- total_tokens: int = 0
def update_usage(
self,
- model_response: ModelResponse,
+ input_tokens_used: int,
+ output_tokens_used: int = 0,
) -> None:
- self.completion_tokens += model_response.completion_tokens_used
- self.prompt_tokens += model_response.prompt_tokens_used
- self.total_tokens += (
- model_response.completion_tokens_used + model_response.prompt_tokens_used
- )
+ self.prompt_tokens += input_tokens_used
+ self.completion_tokens += output_tokens_used
class ModelProviderBudget(ProviderBudget):
- total_budget: float = UserConfigurable()
- total_cost: float
- remaining_budget: float
- usage: ModelProviderUsage
+ usage: defaultdict[str, ModelProviderUsage] = defaultdict(ModelProviderUsage)
def update_usage_and_cost(
self,
- model_response: ModelResponse,
+ model_info: ModelInfo,
+ input_tokens_used: int,
+ output_tokens_used: int = 0,
) -> float:
"""Update the usage and cost of the provider.
Returns:
float: The (calculated) cost of the given model response.
"""
- model_info = model_response.model_info
- self.usage.update_usage(model_response)
+ self.usage[model_info.name].update_usage(input_tokens_used, output_tokens_used)
incurred_cost = (
- model_response.completion_tokens_used * model_info.completion_token_cost
- + model_response.prompt_tokens_used * model_info.prompt_token_cost
+ output_tokens_used * model_info.completion_token_cost
+ + input_tokens_used * model_info.prompt_token_cost
)
self.total_cost += incurred_cost
self.remaining_budget -= incurred_cost
@@ -230,7 +226,7 @@ class ModelProviderSettings(ProviderSettings):
resource_type: ResourceType = ResourceType.MODEL
configuration: ModelProviderConfiguration
credentials: ModelProviderCredentials
- budget: ModelProviderBudget
+ budget: Optional[ModelProviderBudget] = None
class ModelProvider(abc.ABC):
@@ -238,8 +234,8 @@ class ModelProvider(abc.ABC):
default_settings: ClassVar[ModelProviderSettings]
- _budget: Optional[ModelProviderBudget]
_configuration: ModelProviderConfiguration
+ _budget: Optional[ModelProviderBudget] = None
@abc.abstractmethod
def count_tokens(self, text: str, model_name: str) -> int:
@@ -284,7 +280,7 @@ class ModelTokenizer(Protocol):
class EmbeddingModelInfo(ModelInfo):
"""Struct for embedding model information."""
- llm_service = ModelProviderService.EMBEDDING
+ service: Literal[ModelProviderService.EMBEDDING] = ModelProviderService.EMBEDDING
max_tokens: int
embedding_dimensions: int
@@ -322,7 +318,7 @@ class EmbeddingModelProvider(ModelProvider):
class ChatModelInfo(ModelInfo):
"""Struct for language model information."""
- llm_service = ModelProviderService.CHAT
+ service: Literal[ModelProviderService.CHAT] = ModelProviderService.CHAT
max_tokens: int
has_function_call_api: bool = False
diff --git a/autogpts/autogpt/autogpt/core/resource/schema.py b/autogpts/autogpt/autogpt/core/resource/schema.py
index d8cc1de31..0da275ee2 100644
--- a/autogpts/autogpt/autogpt/core/resource/schema.py
+++ b/autogpts/autogpt/autogpt/core/resource/schema.py
@@ -1,5 +1,6 @@
import abc
import enum
+import math
from pydantic import BaseModel, SecretBytes, SecretField, SecretStr
@@ -25,9 +26,9 @@ class ProviderUsage(SystemConfiguration, abc.ABC):
class ProviderBudget(SystemConfiguration):
- total_budget: float = UserConfigurable()
- total_cost: float
- remaining_budget: float
+ total_budget: float = UserConfigurable(math.inf)
+ total_cost: float = 0
+ remaining_budget: float = math.inf
usage: ProviderUsage
@abc.abstractmethod
diff --git a/autogpts/autogpt/tests/unit/test_config.py b/autogpts/autogpt/tests/unit/test_config.py
index 2eca547e8..70d1b65b0 100644
--- a/autogpts/autogpt/tests/unit/test_config.py
+++ b/autogpts/autogpt/tests/unit/test_config.py
@@ -18,7 +18,6 @@ from autogpt.core.resource.model_providers.openai import OpenAIModelName
from autogpt.core.resource.model_providers.schema import (
ChatModelInfo,
ModelProviderName,
- ModelProviderService,
)
@@ -153,7 +152,6 @@ async def test_create_config_gpt4only(config: Config) -> None:
) as mock_get_models:
mock_get_models.return_value = [
ChatModelInfo(
- service=ModelProviderService.CHAT,
name=GPT_4_MODEL,
provider_name=ModelProviderName.OPENAI,
max_tokens=4096,
@@ -174,7 +172,6 @@ async def test_create_config_gpt3only(config: Config) -> None:
) as mock_get_models:
mock_get_models.return_value = [
ChatModelInfo(
- service=ModelProviderService.CHAT,
name=GPT_3_MODEL,
provider_name=ModelProviderName.OPENAI,
max_tokens=4096,