4 files changed, 83 insertions, 55 deletions
diff --git a/autogpts/autogpt/autogpt/agents/README.md b/autogpts/autogpt/autogpt/agents/README.md
index adc158547..4ab573243 120000..100644
--- a/autogpts/autogpt/autogpt/agents/README.md
+++ b/autogpts/autogpt/autogpt/agents/README.md
@@ -1 +1,37 @@
-../../../../docs/content/AutoGPT/component agent/agents.md
-\ No newline at end of file
+# 🤖 Agents
+
+Agent is composed of [🧩 Components](./components.md) and responsible for executing pipelines and some additional logic. The base class for all agents is `BaseAgent`, it has the necessary logic to collect components and execute protocols.
+
+## Important methods
+
+`BaseAgent` provides two abstract methods needed for any agent to work properly:
+1. `propose_action`: This method is responsible for proposing an action based on the current state of the agent, it returns `ThoughtProcessOutput`.
+2. `execute`: This method is responsible for executing the proposed action, returns `ActionResult`.
+
+## AutoGPT Agent
+
+`Agent` is the main agent provided by AutoGPT. It's a subclass of `BaseAgent`. It has all the [Built-in Components](./built-in-components.md). `Agent` implements the essential abstract methods from `BaseAgent`: `propose_action` and `execute`.
+
+## Building your own Agent
+
+The easiest way to build your own agent is to extend the `Agent` class and add additional components. By doing this you can reuse the existing components and the default logic for executing [⚙️ Protocols](./protocols.md).
+
+```py
+class MyComponent(AgentComponent):
+    pass
+
+class MyAgent(Agent):
+    def __init__(
+        self,
+        settings: AgentSettings,
+        llm_provider: ChatModelProvider,
+        file_storage: FileStorage,
+        legacy_config: Config,
+    ):
+        # Call the parent constructor to bring in the default components
+        super().__init__(settings, llm_provider, file_storage, legacy_config)
+        # Add your custom component
+        self.my_component = MyComponent()
+```
+
+For more customization, you can override the `propose_action` and `execute` or even subclass `BaseAgent` directly. This way you can have full control over the agent's components and behavior. Have a look at the [implementation of Agent](https://github.com/Significant-Gravitas/AutoGPT/tree/master/autogpts/autogpt/autogpt/agents/agent.py) for more details.
diff --git a/autogpts/autogpt/autogpt/agents/agent.py b/autogpts/autogpt/autogpt/agents/agent.py
index 3572cbed0..4a66a7ca4 100644
--- a/autogpts/autogpt/autogpt/agents/agent.py
+++ b/autogpts/autogpt/autogpt/agents/agent.py
@@ -19,7 +19,6 @@ from autogpt.components.event_history import EventHistoryComponent
 from autogpt.core.configuration import Configurable
 from autogpt.core.prompting import ChatPrompt
 from autogpt.core.resource.model_providers import (
-    AssistantChatMessage,
     AssistantFunctionCall,
     ChatMessage,
     ChatModelProvider,
@@ -27,7 +26,7 @@ from autogpt.core.resource.model_providers import (
 )
 from autogpt.core.runner.client_lib.logging.helpers import dump_prompt
 from autogpt.file_storage.base import FileStorage
-from autogpt.llm.providers.openai import get_openai_command_specs
+from autogpt.llm.providers.openai import function_specs_from_commands
 from autogpt.logs.log_cycle import (
     CURRENT_CONTEXT_FILE_NAME,
     NEXT_ACTION_FILE_NAME,
@@ -46,7 +45,6 @@ from autogpt.utils.exceptions import (
     AgentException,
     AgentTerminated,
     CommandExecutionError,
-    InvalidArgumentError,
     UnknownCommandError,
 )
 
@@ -104,7 +102,11 @@ class Agent(BaseAgent, Configurable[AgentSettings]):
         self.ai_profile = settings.ai_profile
         self.directives = settings.directives
         prompt_config = OneShotAgentPromptStrategy.default_configuration.copy(deep=True)
-        prompt_config.use_functions_api = settings.config.use_functions_api
+        prompt_config.use_functions_api = (
+            settings.config.use_functions_api
+            # Anthropic currently doesn't support tools + prefilling :(
+            and self.llm.provider_name != "anthropic"
+        )
         self.prompt_strategy = OneShotAgentPromptStrategy(prompt_config, logger)
         self.commands: list[Command] = []
 
@@ -172,7 +174,7 @@ class Agent(BaseAgent, Configurable[AgentSettings]):
             task=self.state.task,
             ai_profile=self.state.ai_profile,
             ai_directives=directives,
-            commands=get_openai_command_specs(self.commands),
+            commands=function_specs_from_commands(self.commands),
             include_os_info=self.legacy_config.execute_local_commands,
         )
 
@@ -202,12 +204,9 @@ class Agent(BaseAgent, Configurable[AgentSettings]):
         ] = await self.llm_provider.create_chat_completion(
             prompt.messages,
             model_name=self.llm.name,
-            completion_parser=self.parse_and_validate_response,
-            functions=(
-                get_openai_command_specs(self.commands)
-                if self.config.use_functions_api
-                else []
-            ),
+            completion_parser=self.prompt_strategy.parse_response_content,
+            functions=prompt.functions,
+            prefill_response=prompt.prefill_response,
         )
         result = response.parsed_result
 
@@ -223,28 +222,6 @@ class Agent(BaseAgent, Configurable[AgentSettings]):
 
         return result
 
-    def parse_and_validate_response(
-        self, llm_response: AssistantChatMessage
-    ) -> OneShotAgentActionProposal:
-        parsed_response = self.prompt_strategy.parse_response_content(llm_response)
-
-        # Validate command arguments
-        command_name = parsed_response.use_tool.name
-        command = self._get_command(command_name)
-        if arg_errors := command.validate_args(parsed_response.use_tool.arguments)[1]:
-            fmt_errors = [
-                f"{'.'.join(str(p) for p in f.path)}: {f.message}"
-                if f.path
-                else f.message
-                for f in arg_errors
-            ]
-            raise InvalidArgumentError(
-                f"The set of arguments supplied for {command_name} is invalid:\n"
-                + "\n".join(fmt_errors)
-            )
-
-        return parsed_response
-
     async def execute(
         self,
         proposal: OneShotAgentActionProposal,
diff --git a/autogpts/autogpt/autogpt/agents/base.py b/autogpts/autogpt/autogpt/agents/base.py
index cf8e3cac8..515515701 100644
--- a/autogpts/autogpt/autogpt/agents/base.py
+++ b/autogpts/autogpt/autogpt/agents/base.py
@@ -39,11 +39,12 @@ from autogpt.core.configuration import (
     SystemSettings,
     UserConfigurable,
 )
-from autogpt.core.resource.model_providers import AssistantFunctionCall
-from autogpt.core.resource.model_providers.openai import (
-    OPEN_AI_CHAT_MODELS,
-    OpenAIModelName,
+from autogpt.core.resource.model_providers import (
+    CHAT_MODELS,
+    AssistantFunctionCall,
+    ModelName,
 )
+from autogpt.core.resource.model_providers.openai import OpenAIModelName
 from autogpt.models.utils import ModelWithSummary
 from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT
 
@@ -56,8 +57,8 @@ P = ParamSpec("P")
 class BaseAgentConfiguration(SystemConfiguration):
     allow_fs_access: bool = UserConfigurable(default=False)
 
-    fast_llm: OpenAIModelName = UserConfigurable(default=OpenAIModelName.GPT3_16k)
-    smart_llm: OpenAIModelName = UserConfigurable(default=OpenAIModelName.GPT4)
+    fast_llm: ModelName = UserConfigurable(default=OpenAIModelName.GPT3_16k)
+    smart_llm: ModelName = UserConfigurable(default=OpenAIModelName.GPT4)
     use_functions_api: bool = UserConfigurable(default=False)
 
     default_cycle_instruction: str = DEFAULT_TRIGGERING_PROMPT
@@ -174,7 +175,7 @@ class BaseAgent(Configurable[BaseAgentSettings], metaclass=AgentMeta):
         llm_name = (
             self.config.smart_llm if self.config.big_brain else self.config.fast_llm
         )
-        return OPEN_AI_CHAT_MODELS[llm_name]
+        return CHAT_MODELS[llm_name]
 
     @property
     def send_token_limit(self) -> int:
diff --git a/autogpts/autogpt/autogpt/agents/prompt_strategies/one_shot.py b/autogpts/autogpt/autogpt/agents/prompt_strategies/one_shot.py
index 53fadaa7c..ff08f4669 100644
--- a/autogpts/autogpt/autogpt/agents/prompt_strategies/one_shot.py
+++ b/autogpts/autogpt/autogpt/agents/prompt_strategies/one_shot.py
@@ -122,7 +122,7 @@ class OneShotAgentPromptStrategy(PromptStrategy):
         1. System prompt
         3. `cycle_instruction`
         """
-        system_prompt = self.build_system_prompt(
+        system_prompt, response_prefill = self.build_system_prompt(
             ai_profile=ai_profile,
             ai_directives=ai_directives,
             commands=commands,
@@ -131,24 +131,34 @@ class OneShotAgentPromptStrategy(PromptStrategy):
 
         final_instruction_msg = ChatMessage.user(self.config.choose_action_instruction)
 
-        prompt = ChatPrompt(
+        return ChatPrompt(
             messages=[
                 ChatMessage.system(system_prompt),
                 ChatMessage.user(f'"""{task}"""'),
                 *messages,
                 final_instruction_msg,
             ],
+            prefill_response=response_prefill,
+            functions=commands if self.config.use_functions_api else [],
         )
 
-        return prompt
-
     def build_system_prompt(
         self,
         ai_profile: AIProfile,
         ai_directives: AIDirectives,
         commands: list[CompletionModelFunction],
         include_os_info: bool,
-    ) -> str:
+    ) -> tuple[str, str]:
+        """
+        Builds the system prompt.
+
+        Returns:
+            str: The system prompt body
+            str: The desired start for the LLM's response; used to steer the output
+        """
+        response_fmt_instruction, response_prefill = self.response_format_instruction(
+            self.config.use_functions_api
+        )
         system_prompt_parts = (
             self._generate_intro_prompt(ai_profile)
             + (self._generate_os_info() if include_os_info else [])
@@ -169,16 +179,16 @@ class OneShotAgentPromptStrategy(PromptStrategy):
                 " in the next message. Your job is to complete the task while following"
                 " your directives as given above, and terminate when your task is done."
             ]
-            + [
-                "## RESPONSE FORMAT\n"
-                + self.response_format_instruction(self.config.use_functions_api)
-            ]
+            + ["## RESPONSE FORMAT\n" + response_fmt_instruction]
         )
 
         # Join non-empty parts together into paragraph format
-        return "\n\n".join(filter(None, system_prompt_parts)).strip("\n")
+        return (
+            "\n\n".join(filter(None, system_prompt_parts)).strip("\n"),
+            response_prefill,
+        )
 
-    def response_format_instruction(self, use_functions_api: bool) -> str:
+    def response_format_instruction(self, use_functions_api: bool) -> tuple[str, str]:
         response_schema = self.response_schema.copy(deep=True)
         if (
             use_functions_api
@@ -193,11 +203,15 @@ class OneShotAgentPromptStrategy(PromptStrategy):
             "\n",
             response_schema.to_typescript_object_interface(_RESPONSE_INTERFACE_NAME),
         )
+        response_prefill = f'{{\n    "{list(response_schema.properties.keys())[0]}":'
 
         return (
-            f"YOU MUST ALWAYS RESPOND WITH A JSON OBJECT OF THE FOLLOWING TYPE:\n"
-            f"{response_format}"
-            + ("\n\nYOU MUST ALSO INVOKE A TOOL!" if use_functions_api else "")
+            (
+                f"YOU MUST ALWAYS RESPOND WITH A JSON OBJECT OF THE FOLLOWING TYPE:\n"
+                f"{response_format}"
+                + ("\n\nYOU MUST ALSO INVOKE A TOOL!" if use_functions_api else "")
+            ),
+            response_prefill,
         )
 
     def _generate_intro_prompt(self, ai_profile: AIProfile) -> list[str]: