From ada2e19829155f08541c1358cbf384a8e01ff6f7 Mon Sep 17 00:00:00 2001
From: Reinier van der Leer <pwuts@agpt.co>
Date: Thu, 2 May 2024 00:43:11 +0200
Subject: refactor(agent)!: Use Pydantic models for `Agent` process output
 (#7116)

* Introduce `BaseAgentActionProposal`, `OneShotAgentActionProposal`, and `AssistantThoughts` models to replace `ThoughtProcessResponse`, `DEFAULT_RESPONSE_SCHEMA`
* Refactor and clean up code because now we don't need to do as much type checking everywhere
* Tweak `OneShot` response format instruction

Granular:

* `autogpt.agents.prompt_strategies.one_shot`
  * Replace ThoughtProcessResponse`, `DEFAULT_RESPONSE_SCHEMA` and parsing logic by `AssistantThoughts` and `OneShotAgentActionProposal`
  * (TANGENTIAL) Move response format instruction into main system prompt message
  * (TANGENTIAL) Adjust response format instruction

* `autogpt.agents.base`
  * Add `BaseAgentActionProposal` base model -> replace `ThoughtProcessOutput`
  * Change signature of `execute` method to accept `BaseAgentActionProposal` instead of separate `command_name` and `command_args`
  * Add `do_not_execute(proposal, feedback)` abstract method, replacing `execute("human_feedback", ..., feedback)`

* Move `history` definition from `BaseAgentSettings` to `AgentSettings` (the only place where it's used anyway)

* `autogpt.models`
  * Add `.utils` > `ModelWithSummary` base model
  * Make the models in `.action_history` (`Episode`, `EpisodicActionHistory`) generic with a type parameter for the type of `Episode.action`

* `autogpt.core.resource.model_providers.schema`
  * Add `__str__` to `AssistantFunctionCall` which pretty-prints the function call

All other changes are a direct result of the changes above.

## BREAKING CHANGE:
* Due to the change in `autogpt.models.action_history`, the application after this change will be unable to load/resume agents from before this change and vice versa.
* The `additional_output` field in the response of `execute_step` has changed slightly:
  * Type of `.thoughts.plan` has changed from `str` to `list[str]`
  * `.command` -> `.use_tool`
  * `.command.args` -> `.use_tool.arguments`
---
 autogpts/autogpt/autogpt/agents/__init__.py        |  11 +-
 autogpts/autogpt/autogpt/agents/agent.py           | 135 ++++++-------
 autogpts/autogpt/autogpt/agents/base.py            |  36 ++--
 .../autogpt/autogpt/agents/features/watchdog.py    |  22 +--
 .../autogpt/agents/prompt_strategies/one_shot.py   | 217 +++++----------------
 autogpts/autogpt/autogpt/agents/protocols.py       |   4 +-
 .../autogpt/autogpt/app/agent_protocol_server.py   |  83 ++++----
 autogpts/autogpt/autogpt/app/main.py               | 159 ++++++++-------
 .../autogpt/autogpt/components/event_history.py    |  20 +-
 .../core/resource/model_providers/schema.py        |   4 +
 autogpts/autogpt/autogpt/models/action_history.py  |  46 +++--
 autogpts/autogpt/autogpt/models/utils.py           |  10 +
 12 files changed, 307 insertions(+), 440 deletions(-)
 create mode 100644 autogpts/autogpt/autogpt/models/utils.py

diff --git a/autogpts/autogpt/autogpt/agents/__init__.py b/autogpts/autogpt/autogpt/agents/__init__.py
index 94a5f42a5..f6ae48c59 100644
--- a/autogpts/autogpt/autogpt/agents/__init__.py
+++ b/autogpts/autogpt/autogpt/agents/__init__.py
@@ -1,4 +1,9 @@
-from .agent import Agent
-from .base import AgentThoughts, BaseAgent, CommandArgs, CommandName
+from .agent import Agent, OneShotAgentActionProposal
+from .base import BaseAgent, BaseAgentActionProposal
 
-__all__ = ["BaseAgent", "Agent", "CommandName", "CommandArgs", "AgentThoughts"]
+__all__ = [
+    "BaseAgent",
+    "Agent",
+    "BaseAgentActionProposal",
+    "OneShotAgentActionProposal",
+]
diff --git a/autogpts/autogpt/autogpt/agents/agent.py b/autogpts/autogpt/autogpt/agents/agent.py
index 0ffd2db2a..3572cbed0 100644
--- a/autogpts/autogpt/autogpt/agents/agent.py
+++ b/autogpts/autogpt/autogpt/agents/agent.py
@@ -8,7 +8,6 @@ from typing import TYPE_CHECKING, Optional
 import sentry_sdk
 from pydantic import Field
 
-from autogpt.agents.prompt_strategies.one_shot import OneShotAgentPromptStrategy
 from autogpt.commands.execute_code import CodeExecutorComponent
 from autogpt.commands.git_operations import GitOperationsComponent
 from autogpt.commands.image_gen import ImageGeneratorComponent
@@ -19,9 +18,11 @@ from autogpt.commands.web_selenium import WebSeleniumComponent
 from autogpt.components.event_history import EventHistoryComponent
 from autogpt.core.configuration import Configurable
 from autogpt.core.prompting import ChatPrompt
-from autogpt.core.resource.model_providers import ChatMessage, ChatModelProvider
-from autogpt.core.resource.model_providers.schema import (
+from autogpt.core.resource.model_providers import (
     AssistantChatMessage,
+    AssistantFunctionCall,
+    ChatMessage,
+    ChatModelProvider,
     ChatModelResponse,
 )
 from autogpt.core.runner.client_lib.logging.helpers import dump_prompt
@@ -33,12 +34,12 @@ from autogpt.logs.log_cycle import (
     USER_INPUT_FILE_NAME,
     LogCycleHandler,
 )
-from autogpt.logs.utils import fmt_kwargs
 from autogpt.models.action_history import (
     ActionErrorResult,
     ActionInterruptedByHuman,
     ActionResult,
     ActionSuccessResult,
+    EpisodicActionHistory,
 )
 from autogpt.models.command import Command, CommandOutput
 from autogpt.utils.exceptions import (
@@ -49,15 +50,14 @@ from autogpt.utils.exceptions import (
     UnknownCommandError,
 )
 
-from .base import (
-    BaseAgent,
-    BaseAgentConfiguration,
-    BaseAgentSettings,
-    ThoughtProcessOutput,
-)
+from .base import BaseAgent, BaseAgentConfiguration, BaseAgentSettings
 from .features.agent_file_manager import FileManagerComponent
 from .features.context import ContextComponent
 from .features.watchdog import WatchdogComponent
+from .prompt_strategies.one_shot import (
+    OneShotAgentActionProposal,
+    OneShotAgentPromptStrategy,
+)
 from .protocols import (
     AfterExecute,
     AfterParse,
@@ -79,6 +79,11 @@ class AgentConfiguration(BaseAgentConfiguration):
 class AgentSettings(BaseAgentSettings):
     config: AgentConfiguration = Field(default_factory=AgentConfiguration)
 
+    history: EpisodicActionHistory[OneShotAgentActionProposal] = Field(
+        default_factory=EpisodicActionHistory[OneShotAgentActionProposal]
+    )
+    """(STATE) The action history of the agent."""
+
 
 class Agent(BaseAgent, Configurable[AgentSettings]):
     default_settings: AgentSettings = AgentSettings(
@@ -137,7 +142,7 @@ class Agent(BaseAgent, Configurable[AgentSettings]):
         self.event_history = settings.history
         self.legacy_config = legacy_config
 
-    async def propose_action(self) -> ThoughtProcessOutput:
+    async def propose_action(self) -> OneShotAgentActionProposal:
         """Proposes the next action to execute, based on the task and current state.
 
         Returns:
@@ -188,12 +193,12 @@ class Agent(BaseAgent, Configurable[AgentSettings]):
 
     async def complete_and_parse(
         self, prompt: ChatPrompt, exception: Optional[Exception] = None
-    ) -> ThoughtProcessOutput:
+    ) -> OneShotAgentActionProposal:
         if exception:
             prompt.messages.append(ChatMessage.system(f"Error: {exception}"))
 
         response: ChatModelResponse[
-            ThoughtProcessOutput
+            OneShotAgentActionProposal
         ] = await self.llm_provider.create_chat_completion(
             prompt.messages,
             model_name=self.llm.name,
@@ -210,7 +215,7 @@ class Agent(BaseAgent, Configurable[AgentSettings]):
             self.state.ai_profile.ai_name,
             self.created_at,
             self.config.cycle_count,
-            result.thoughts,
+            result.thoughts.dict(),
             NEXT_ACTION_FILE_NAME,
         )
 
@@ -220,13 +225,13 @@ class Agent(BaseAgent, Configurable[AgentSettings]):
 
     def parse_and_validate_response(
         self, llm_response: AssistantChatMessage
-    ) -> ThoughtProcessOutput:
+    ) -> OneShotAgentActionProposal:
         parsed_response = self.prompt_strategy.parse_response_content(llm_response)
 
         # Validate command arguments
-        command_name = parsed_response.command_name
+        command_name = parsed_response.use_tool.name
         command = self._get_command(command_name)
-        if arg_errors := command.validate_args(parsed_response.command_args)[1]:
+        if arg_errors := command.validate_args(parsed_response.use_tool.arguments)[1]:
             fmt_errors = [
                 f"{'.'.join(str(p) for p in f.path)}: {f.message}"
                 if f.path
@@ -242,49 +247,50 @@ class Agent(BaseAgent, Configurable[AgentSettings]):
 
     async def execute(
         self,
-        command_name: str,
-        command_args: dict[str, str] = {},
-        user_input: str = "",
+        proposal: OneShotAgentActionProposal,
+        user_feedback: str = "",
     ) -> ActionResult:
-        result: ActionResult
-
-        if command_name == "human_feedback":
-            result = ActionInterruptedByHuman(feedback=user_input)
-            self.log_cycle_handler.log_cycle(
-                self.state.ai_profile.ai_name,
-                self.created_at,
-                self.config.cycle_count,
-                user_input,
-                USER_INPUT_FILE_NAME,
+        tool = proposal.use_tool
+
+        # Get commands
+        self.commands = await self.run_pipeline(CommandProvider.get_commands)
+        self._remove_disabled_commands()
+
+        try:
+            return_value = await self._execute_tool(tool)
+
+            result = ActionSuccessResult(outputs=return_value)
+        except AgentTerminated:
+            raise
+        except AgentException as e:
+            result = ActionErrorResult.from_exception(e)
+            logger.warning(f"{tool} raised an error: {e}")
+            sentry_sdk.capture_exception(e)
+
+        result_tlength = self.llm_provider.count_tokens(str(result), self.llm.name)
+        if result_tlength > self.send_token_limit // 3:
+            result = ActionErrorResult(
+                reason=f"Command {tool.name} returned too much output. "
+                "Do not execute this command again with the same arguments."
             )
 
-        else:
-            # Get commands
-            self.commands = await self.run_pipeline(CommandProvider.get_commands)
-            self._remove_disabled_commands()
-
-            try:
-                return_value = await self._execute_command(
-                    command_name=command_name,
-                    arguments=command_args,
-                )
-
-                result = ActionSuccessResult(outputs=return_value)
-            except AgentTerminated:
-                raise
-            except AgentException as e:
-                result = ActionErrorResult.from_exception(e)
-                logger.warning(
-                    f"{command_name}({fmt_kwargs(command_args)}) raised an error: {e}"
-                )
-                sentry_sdk.capture_exception(e)
-
-            result_tlength = self.llm_provider.count_tokens(str(result), self.llm.name)
-            if result_tlength > self.send_token_limit // 3:
-                result = ActionErrorResult(
-                    reason=f"Command {command_name} returned too much output. "
-                    "Do not execute this command again with the same arguments."
-                )
+        await self.run_pipeline(AfterExecute.after_execute, result)
+
+        logger.debug("\n".join(self.trace))
+
+        return result
+
+    async def do_not_execute(
+        self, denied_proposal: OneShotAgentActionProposal, user_feedback: str
+    ) -> ActionResult:
+        result = ActionInterruptedByHuman(feedback=user_feedback)
+        self.log_cycle_handler.log_cycle(
+            self.state.ai_profile.ai_name,
+            self.created_at,
+            self.config.cycle_count,
+            user_feedback,
+            USER_INPUT_FILE_NAME,
+        )
 
         await self.run_pipeline(AfterExecute.after_execute, result)
 
@@ -292,24 +298,19 @@ class Agent(BaseAgent, Configurable[AgentSettings]):
 
         return result
 
-    async def _execute_command(
-        self,
-        command_name: str,
-        arguments: dict[str, str],
-    ) -> CommandOutput:
+    async def _execute_tool(self, tool_call: AssistantFunctionCall) -> CommandOutput:
         """Execute the command and return the result
 
         Args:
-            command_name (str): The name of the command to execute
-            arguments (dict): The arguments for the command
+            tool_call (AssistantFunctionCall): The tool call to execute
 
         Returns:
-            str: The result of the command
+            str: The execution result
         """
         # Execute a native command with the same name or alias, if it exists
-        command = self._get_command(command_name)
+        command = self._get_command(tool_call.name)
         try:
-            result = command(**arguments)
+            result = command(**tool_call.arguments)
             if inspect.isawaitable(result):
                 return await result
             return result
diff --git a/autogpts/autogpt/autogpt/agents/base.py b/autogpts/autogpt/autogpt/agents/base.py
index e293441d8..cf8e3cac8 100644
--- a/autogpts/autogpt/autogpt/agents/base.py
+++ b/autogpts/autogpt/autogpt/agents/base.py
@@ -22,6 +22,7 @@ if TYPE_CHECKING:
     from autogpt.core.resource.model_providers.schema import (
         ChatModelInfo,
     )
+    from autogpt.models.action_history import ActionResult
 
 from autogpt.agents import protocols as _protocols
 from autogpt.agents.components import (
@@ -38,11 +39,12 @@ from autogpt.core.configuration import (
     SystemSettings,
     UserConfigurable,
 )
+from autogpt.core.resource.model_providers import AssistantFunctionCall
 from autogpt.core.resource.model_providers.openai import (
     OPEN_AI_CHAT_MODELS,
     OpenAIModelName,
 )
-from autogpt.models.action_history import ActionResult, EpisodicActionHistory
+from autogpt.models.utils import ModelWithSummary
 from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT
 
 logger = logging.getLogger(__name__)
@@ -50,10 +52,6 @@ logger = logging.getLogger(__name__)
 T = TypeVar("T")
 P = ParamSpec("P")
 
-CommandName = str
-CommandArgs = dict[str, str]
-AgentThoughts = dict[str, Any]
-
 
 class BaseAgentConfiguration(SystemConfiguration):
     allow_fs_access: bool = UserConfigurable(default=False)
@@ -131,9 +129,6 @@ class BaseAgentSettings(SystemSettings):
     config: BaseAgentConfiguration = Field(default_factory=BaseAgentConfiguration)
     """The configuration for this BaseAgent subsystem instance."""
 
-    history: EpisodicActionHistory = Field(default_factory=EpisodicActionHistory)
-    """(STATE) The action history of the agent."""
-
 
 class AgentMeta(ABCMeta):
     def __call__(cls, *args, **kwargs):
@@ -144,13 +139,9 @@ class AgentMeta(ABCMeta):
         return instance
 
 
-class ThoughtProcessOutput(BaseModel):
-    command_name: str = ""
-    command_args: dict[str, Any] = Field(default_factory=dict)
-    thoughts: dict[str, Any] = Field(default_factory=dict)
-
-    def to_tuple(self) -> tuple[CommandName, CommandArgs, AgentThoughts]:
-        return self.command_name, self.command_args, self.thoughts
+class BaseAgentActionProposal(BaseModel):
+    thoughts: str | ModelWithSummary
+    use_tool: AssistantFunctionCall = None
 
 
 class BaseAgent(Configurable[BaseAgentSettings], metaclass=AgentMeta):
@@ -190,15 +181,22 @@ class BaseAgent(Configurable[BaseAgentSettings], metaclass=AgentMeta):
         return self.config.send_token_limit or self.llm.max_tokens * 3 // 4
 
     @abstractmethod
-    async def propose_action(self) -> ThoughtProcessOutput:
+    async def propose_action(self) -> BaseAgentActionProposal:
         ...
 
     @abstractmethod
     async def execute(
         self,
-        command_name: str,
-        command_args: dict[str, str] = {},
-        user_input: str = "",
+        proposal: BaseAgentActionProposal,
+        user_feedback: str = "",
+    ) -> ActionResult:
+        ...
+
+    @abstractmethod
+    async def do_not_execute(
+        self,
+        denied_proposal: BaseAgentActionProposal,
+        user_feedback: str,
     ) -> ActionResult:
         ...
 
diff --git a/autogpts/autogpt/autogpt/agents/features/watchdog.py b/autogpts/autogpt/autogpt/agents/features/watchdog.py
index 8f3758ef4..9a623d6a5 100644
--- a/autogpts/autogpt/autogpt/agents/features/watchdog.py
+++ b/autogpts/autogpt/autogpt/agents/features/watchdog.py
@@ -1,13 +1,11 @@
 import logging
 
-from autogpt.agents.base import ThoughtProcessOutput
+from autogpt.agents.base import BaseAgentActionProposal, BaseAgentConfiguration
 from autogpt.agents.components import ComponentSystemError
 from autogpt.agents.features.context import ContextComponent
 from autogpt.agents.protocols import AfterParse
 from autogpt.models.action_history import EpisodicActionHistory
 
-from ..base import BaseAgentConfiguration
-
 logger = logging.getLogger(__name__)
 
 
@@ -20,13 +18,15 @@ class WatchdogComponent(AfterParse):
     run_after = [ContextComponent]
 
     def __init__(
-        self, config: BaseAgentConfiguration, event_history: EpisodicActionHistory
+        self,
+        config: BaseAgentConfiguration,
+        event_history: EpisodicActionHistory[BaseAgentActionProposal],
     ):
         self.config = config
         self.event_history = event_history
         self.revert_big_brain = False
 
-    def after_parse(self, result: ThoughtProcessOutput) -> None:
+    def after_parse(self, result: BaseAgentActionProposal) -> None:
         if self.revert_big_brain:
             self.config.big_brain = False
             self.revert_big_brain = False
@@ -38,18 +38,18 @@ class WatchdogComponent(AfterParse):
                 previous_cycle = self.event_history.episodes[
                     self.event_history.cursor - 1
                 ]
-                previous_command = previous_cycle.action.name
-                previous_command_args = previous_cycle.action.args
+                previous_command = previous_cycle.action.use_tool.name
+                previous_command_args = previous_cycle.action.use_tool.arguments
 
             rethink_reason = ""
 
-            if not result.command_name:
+            if not result.use_tool:
                 rethink_reason = "AI did not specify a command"
             elif (
-                result.command_name == previous_command
-                and result.command_args == previous_command_args
+                result.use_tool.name == previous_command
+                and result.use_tool.arguments == previous_command_args
             ):
-                rethink_reason = f"Repititive command detected ({result.command_name})"
+                rethink_reason = f"Repititive command detected ({result.use_tool.name})"
 
             if rethink_reason:
                 logger.info(f"{rethink_reason}, re-thinking with SMART_LLM...")
diff --git a/autogpts/autogpt/autogpt/agents/prompt_strategies/one_shot.py b/autogpts/autogpt/autogpt/agents/prompt_strategies/one_shot.py
index 0be7b8673..53fadaa7c 100644
--- a/autogpts/autogpt/autogpt/agents/prompt_strategies/one_shot.py
+++ b/autogpts/autogpt/autogpt/agents/prompt_strategies/one_shot.py
@@ -6,8 +6,9 @@ import re
 from logging import Logger
 
 import distro
+from pydantic import Field
 
-from autogpt.agents.base import ThoughtProcessOutput
+from autogpt.agents.base import BaseAgentActionProposal
 from autogpt.config import AIDirectives, AIProfile
 from autogpt.core.configuration.schema import SystemConfiguration, UserConfigurable
 from autogpt.core.prompting import (
@@ -22,9 +23,32 @@ from autogpt.core.resource.model_providers.schema import (
 )
 from autogpt.core.utils.json_schema import JSONSchema
 from autogpt.core.utils.json_utils import extract_dict_from_json
+from autogpt.models.utils import ModelWithSummary
 from autogpt.prompts.utils import format_numbered_list
 from autogpt.utils.exceptions import InvalidAgentResponseError
 
+_RESPONSE_INTERFACE_NAME = "AssistantResponse"
+
+
+class AssistantThoughts(ModelWithSummary):
+    observations: str = Field(
+        ..., description="Relevant observations from your last action (if any)"
+    )
+    text: str = Field(..., description="Thoughts")
+    reasoning: str = Field(..., description="Reasoning behind the thoughts")
+    self_criticism: str = Field(..., description="Constructive self-criticism")
+    plan: list[str] = Field(
+        ..., description="Short list that conveys the long-term plan"
+    )
+    speak: str = Field(..., description="Summary of thoughts, to say to user")
+
+    def summary(self) -> str:
+        return self.text
+
+
+class OneShotAgentActionProposal(BaseAgentActionProposal):
+    thoughts: AssistantThoughts
+
 
 class OneShotAgentPromptConfiguration(SystemConfiguration):
     DEFAULT_BODY_TEMPLATE: str = (
@@ -51,70 +75,7 @@ class OneShotAgentPromptConfiguration(SystemConfiguration):
         "and respond using the JSON schema specified previously:"
     )
 
-    DEFAULT_RESPONSE_SCHEMA = JSONSchema(
-        type=JSONSchema.Type.OBJECT,
-        properties={
-            "thoughts": JSONSchema(
-                type=JSONSchema.Type.OBJECT,
-                required=True,
-                properties={
-                    "observations": JSONSchema(
-                        description=(
-                            "Relevant observations from your last action (if any)"
-                        ),
-                        type=JSONSchema.Type.STRING,
-                        required=False,
-                    ),
-                    "text": JSONSchema(
-                        description="Thoughts",
-                        type=JSONSchema.Type.STRING,
-                        required=True,
-                    ),
-                    "reasoning": JSONSchema(
-                        type=JSONSchema.Type.STRING,
-                        required=True,
-                    ),
-                    "self_criticism": JSONSchema(
-                        description="Constructive self-criticism",
-                        type=JSONSchema.Type.STRING,
-                        required=True,
-                    ),
-                    "plan": JSONSchema(
-                        description=(
-                            "Short markdown-style bullet list that conveys the "
-                            "long-term plan"
-                        ),
-                        type=JSONSchema.Type.STRING,
-                        required=True,
-                    ),
-                    "speak": JSONSchema(
-                        description="Summary of thoughts, to say to user",
-                        type=JSONSchema.Type.STRING,
-                        required=True,
-                    ),
-                },
-            ),
-            "command": JSONSchema(
-                type=JSONSchema.Type.OBJECT,
-                required=True,
-                properties={
-                    "name": JSONSchema(
-                        type=JSONSchema.Type.STRING,
-                        required=True,
-                    ),
-                    "args": JSONSchema(
-                        type=JSONSchema.Type.OBJECT,
-                        required=True,
-                    ),
-                },
-            ),
-        },
-    )
-
     body_template: str = UserConfigurable(default=DEFAULT_BODY_TEMPLATE)
-    response_schema: dict = UserConfigurable(
-        default_factory=DEFAULT_RESPONSE_SCHEMA.to_dict
-    )
     choose_action_instruction: str = UserConfigurable(
         default=DEFAULT_CHOOSE_ACTION_INSTRUCTION
     )
@@ -139,7 +100,7 @@ class OneShotAgentPromptStrategy(PromptStrategy):
         logger: Logger,
     ):
         self.config = configuration
-        self.response_schema = JSONSchema.from_dict(configuration.response_schema)
+        self.response_schema = JSONSchema.from_dict(OneShotAgentActionProposal.schema())
         self.logger = logger
 
     @property
@@ -168,19 +129,12 @@ class OneShotAgentPromptStrategy(PromptStrategy):
             include_os_info=include_os_info,
         )
 
-        user_task = f'"""{task}"""'
-
-        response_format_instr = self.response_format_instruction(
-            self.config.use_functions_api
-        )
-        messages.append(ChatMessage.system(response_format_instr))
-
         final_instruction_msg = ChatMessage.user(self.config.choose_action_instruction)
 
         prompt = ChatPrompt(
             messages=[
                 ChatMessage.system(system_prompt),
-                ChatMessage.user(user_task),
+                ChatMessage.user(f'"""{task}"""'),
                 *messages,
                 final_instruction_msg,
             ],
@@ -215,6 +169,10 @@ class OneShotAgentPromptStrategy(PromptStrategy):
                 " in the next message. Your job is to complete the task while following"
                 " your directives as given above, and terminate when your task is done."
             ]
+            + [
+                "## RESPONSE FORMAT\n"
+                + self.response_format_instruction(self.config.use_functions_api)
+            ]
         )
 
         # Join non-empty parts together into paragraph format
@@ -225,27 +183,21 @@ class OneShotAgentPromptStrategy(PromptStrategy):
         if (
             use_functions_api
             and response_schema.properties
-            and "command" in response_schema.properties
+            and "use_tool" in response_schema.properties
         ):
-            del response_schema.properties["command"]
+            del response_schema.properties["use_tool"]
 
         # Unindent for performance
         response_format = re.sub(
             r"\n\s+",
             "\n",
-            response_schema.to_typescript_object_interface("Response"),
-        )
-
-        instruction = (
-            "Respond with pure JSON containing your thoughts, " "and invoke a tool."
-            if use_functions_api
-            else "Respond with pure JSON."
+            response_schema.to_typescript_object_interface(_RESPONSE_INTERFACE_NAME),
         )
 
         return (
-            f"{instruction} "
-            "The JSON object should be compatible with the TypeScript type `Response` "
-            f"from the following:\n{response_format}"
+            f"YOU MUST ALWAYS RESPOND WITH A JSON OBJECT OF THE FOLLOWING TYPE:\n"
+            f"{response_format}"
+            + ("\n\nYOU MUST ALSO INVOKE A TOOL!" if use_functions_api else "")
         )
 
     def _generate_intro_prompt(self, ai_profile: AIProfile) -> list[str]:
@@ -309,7 +261,7 @@ class OneShotAgentPromptStrategy(PromptStrategy):
     def parse_response_content(
         self,
         response: AssistantChatMessage,
-    ) -> ThoughtProcessOutput:
+    ) -> OneShotAgentActionProposal:
         if not response.content:
             raise InvalidAgentResponseError("Assistant response has no text content")
 
@@ -323,92 +275,13 @@ class OneShotAgentPromptStrategy(PromptStrategy):
         )
         assistant_reply_dict = extract_dict_from_json(response.content)
         self.logger.debug(
-            "Validating object extracted from LLM response:\n"
+            "Parsing object extracted from LLM response:\n"
             f"{json.dumps(assistant_reply_dict, indent=4)}"
         )
 
-        response_schema = self.response_schema.copy(deep=True)
-        if (
-            self.config.use_functions_api
-            and response_schema.properties
-            and "command" in response_schema.properties
-        ):
-            del response_schema.properties["command"]
-        _, errors = response_schema.validate_object(assistant_reply_dict)
-        if errors:
-            raise InvalidAgentResponseError(
-                "Validation of response failed:\n  "
-                + ";\n  ".join([str(e) for e in errors])
-            )
-
-        # Get command name and arguments
-        command_name, arguments = extract_command(
-            assistant_reply_dict, response, self.config.use_functions_api
-        )
-        return ThoughtProcessOutput(
-            command_name=command_name,
-            command_args=arguments,
-            thoughts=assistant_reply_dict,
-        )
-
-
-#############
-# Utilities #
-#############
-
-
-def extract_command(
-    assistant_reply_json: dict,
-    assistant_reply: AssistantChatMessage,
-    use_openai_functions_api: bool,
-) -> tuple[str, dict[str, str]]:
-    """Parse the response and return the command name and arguments
-
-    Args:
-        assistant_reply_json (dict): The response object from the AI
-        assistant_reply (AssistantChatMessage): The model response from the AI
-        config (Config): The config object
-
-    Returns:
-        tuple: The command name and arguments
-
-    Raises:
-        json.decoder.JSONDecodeError: If the response is not valid JSON
-
-        Exception: If any other error occurs
-    """
-    if use_openai_functions_api:
-        if not assistant_reply.tool_calls:
-            raise InvalidAgentResponseError("Assistant did not use any tools")
-        assistant_reply_json["command"] = {
-            "name": assistant_reply.tool_calls[0].function.name,
-            "args": assistant_reply.tool_calls[0].function.arguments,
-        }
-    try:
-        if not isinstance(assistant_reply_json, dict):
-            raise InvalidAgentResponseError(
-                f"The previous message sent was not a dictionary {assistant_reply_json}"
-            )
-
-        if "command" not in assistant_reply_json:
-            raise InvalidAgentResponseError("Missing 'command' object in JSON")
-
-        command = assistant_reply_json["command"]
-        if not isinstance(command, dict):
-            raise InvalidAgentResponseError("'command' object is not a dictionary")
-
-        if "name" not in command:
-            raise InvalidAgentResponseError("Missing 'name' field in 'command' object")
-
-        command_name = command["name"]
-
-        # Use an empty dictionary if 'args' field is not present in 'command' object
-        arguments = command.get("args", {})
-
-        return command_name, arguments
-
-    except json.decoder.JSONDecodeError:
-        raise InvalidAgentResponseError("Invalid JSON")
-
-    except Exception as e:
-        raise InvalidAgentResponseError(str(e))
+        parsed_response = OneShotAgentActionProposal.parse_obj(assistant_reply_dict)
+        if self.config.use_functions_api:
+            if not response.tool_calls:
+                raise InvalidAgentResponseError("Assistant did not use a tool")
+            parsed_response.use_tool = response.tool_calls[0].function
+        return parsed_response
diff --git a/autogpts/autogpt/autogpt/agents/protocols.py b/autogpts/autogpt/autogpt/agents/protocols.py
index 07984ba08..22fab67f9 100644
--- a/autogpts/autogpt/autogpt/agents/protocols.py
+++ b/autogpts/autogpt/autogpt/agents/protocols.py
@@ -4,7 +4,7 @@ from typing import TYPE_CHECKING, Iterator
 from autogpt.agents.components import AgentComponent
 
 if TYPE_CHECKING:
-    from autogpt.agents.base import ThoughtProcessOutput
+    from autogpt.agents.base import BaseAgentActionProposal
     from autogpt.core.resource.model_providers.schema import ChatMessage
     from autogpt.models.action_history import ActionResult
     from autogpt.models.command import Command
@@ -35,7 +35,7 @@ class MessageProvider(AgentComponent):
 
 class AfterParse(AgentComponent):
     @abstractmethod
-    def after_parse(self, result: "ThoughtProcessOutput") -> None:
+    def after_parse(self, result: "BaseAgentActionProposal") -> None:
         ...
 
 
diff --git a/autogpts/autogpt/autogpt/app/agent_protocol_server.py b/autogpts/autogpt/autogpt/app/agent_protocol_server.py
index 3c0a49a68..cdaf1f460 100644
--- a/autogpts/autogpt/autogpt/app/agent_protocol_server.py
+++ b/autogpts/autogpt/autogpt/app/agent_protocol_server.py
@@ -33,11 +33,9 @@ from autogpt.agent_factory.generators import generate_agent_for_task
 from autogpt.agent_manager import AgentManager
 from autogpt.app.utils import is_port_free
 from autogpt.config import Config
-from autogpt.core.resource.model_providers import ChatModelProvider
+from autogpt.core.resource.model_providers import ChatModelProvider, ModelProviderBudget
 from autogpt.core.resource.model_providers.openai import OpenAIProvider
-from autogpt.core.resource.model_providers.schema import ModelProviderBudget
 from autogpt.file_storage import FileStorage
-from autogpt.logs.utils import fmt_kwargs
 from autogpt.models.action_history import ActionErrorResult, ActionSuccessResult
 from autogpt.utils.exceptions import AgentFinished
 from autogpt.utils.utils import DEFAULT_ASK_COMMAND, DEFAULT_FINISH_COMMAND
@@ -201,7 +199,7 @@ class AgentProtocolServer:
         # To prevent this from interfering with the agent's process, we ignore the input
         #  of this first step request, and just generate the first step proposal.
         is_init_step = not bool(agent.event_history)
-        execute_command, execute_command_args, execute_result = None, None, None
+        last_proposal, tool_result = None, None
         execute_approved = False
 
         # HACK: only for compatibility with AGBenchmark
@@ -215,13 +213,11 @@ class AgentProtocolServer:
             and agent.event_history.current_episode
             and not agent.event_history.current_episode.result
         ):
-            execute_command = agent.event_history.current_episode.action.name
-            execute_command_args = agent.event_history.current_episode.action.args
+            last_proposal = agent.event_history.current_episode.action
             execute_approved = not user_input
 
             logger.debug(
-                f"Agent proposed command"
-                f" {execute_command}({fmt_kwargs(execute_command_args)})."
+                f"Agent proposed command {last_proposal.use_tool}."
                 f" User input/feedback: {repr(user_input)}"
             )
 
@@ -229,24 +225,25 @@ class AgentProtocolServer:
         step = await self.db.create_step(
             task_id=task_id,
             input=step_request,
-            is_last=execute_command == DEFAULT_FINISH_COMMAND and execute_approved,
+            is_last=(
+                last_proposal is not None
+                and last_proposal.use_tool.name == DEFAULT_FINISH_COMMAND
+                and execute_approved
+            ),
         )
         agent.llm_provider = self._get_task_llm_provider(task, step.step_id)
 
         # Execute previously proposed action
-        if execute_command:
-            assert execute_command_args is not None
+        if last_proposal:
             agent.file_manager.workspace.on_write_file = (
                 lambda path: self._on_agent_write_file(
                     task=task, step=step, relative_path=path
                 )
             )
 
-            if execute_command == DEFAULT_ASK_COMMAND:
-                execute_result = ActionSuccessResult(outputs=user_input)
-                agent.event_history.register_result(execute_result)
-            elif not execute_command:
-                execute_result = None
+            if last_proposal.use_tool.name == DEFAULT_ASK_COMMAND:
+                tool_result = ActionSuccessResult(outputs=user_input)
+                agent.event_history.register_result(tool_result)
             elif execute_approved:
                 step = await self.db.update_step(
                     task_id=task_id,
@@ -256,10 +253,7 @@ class AgentProtocolServer:
 
                 try:
                     # Execute previously proposed action
-                    execute_result = await agent.execute(
-                        command_name=execute_command,
-                        command_args=execute_command_args,
-                    )
+                    tool_result = await agent.execute(last_proposal)
                 except AgentFinished:
                     additional_output = {}
                     task_total_cost = agent.llm_provider.get_incurred_cost()
@@ -273,25 +267,20 @@ class AgentProtocolServer:
                     step = await self.db.update_step(
                         task_id=task_id,
                         step_id=step.step_id,
-                        output=execute_command_args["reason"],
+                        output=last_proposal.use_tool.arguments["reason"],
                         additional_output=additional_output,
                     )
                     await agent.file_manager.save_state()
                     return step
             else:
                 assert user_input
-                execute_result = await agent.execute(
-                    command_name="human_feedback",  # HACK
-                    command_args={},
-                    user_input=user_input,
-                )
+                tool_result = await agent.do_not_execute(last_proposal, user_input)
 
         # Propose next action
         try:
-            next_command, next_command_args, raw_output = (
-                await agent.propose_action()
-            ).to_tuple()
-            logger.debug(f"AI output: {raw_output}")
+            assistant_response = await agent.propose_action()
+            next_tool_to_use = assistant_response.use_tool
+            logger.debug(f"AI output: {assistant_response.thoughts}")
         except Exception as e:
             step = await self.db.update_step(
                 task_id=task_id,
@@ -304,44 +293,44 @@ class AgentProtocolServer:
         # Format step output
         output = (
             (
-                f"`{execute_command}({fmt_kwargs(execute_command_args)})` returned:"
-                + ("\n\n" if "\n" in str(execute_result) else " ")
-                + f"{execute_result}\n\n"
+                f"`{last_proposal.use_tool}` returned:"
+                + ("\n\n" if "\n" in str(tool_result) else " ")
+                + f"{tool_result}\n\n"
             )
-            if execute_command_args and execute_command != DEFAULT_ASK_COMMAND
+            if last_proposal and last_proposal.use_tool.name != DEFAULT_ASK_COMMAND
             else ""
         )
-        output += f"{raw_output['thoughts']['speak']}\n\n"
+        output += f"{assistant_response.thoughts.speak}\n\n"
         output += (
-            f"Next Command: {next_command}({fmt_kwargs(next_command_args)})"
-            if next_command != DEFAULT_ASK_COMMAND
-            else next_command_args["question"]
+            f"Next Command: {next_tool_to_use}"
+            if next_tool_to_use.name != DEFAULT_ASK_COMMAND
+            else next_tool_to_use.arguments["question"]
         )
 
         additional_output = {
             **(
                 {
                     "last_action": {
-                        "name": execute_command,
-                        "args": execute_command_args,
+                        "name": last_proposal.use_tool.name,
+                        "args": last_proposal.use_tool.arguments,
                         "result": (
                             ""
-                            if execute_result is None
+                            if tool_result is None
                             else (
-                                orjson.loads(execute_result.json())
-                                if not isinstance(execute_result, ActionErrorResult)
+                                orjson.loads(tool_result.json())
+                                if not isinstance(tool_result, ActionErrorResult)
                                 else {
-                                    "error": str(execute_result.error),
-                                    "reason": execute_result.reason,
+                                    "error": str(tool_result.error),
+                                    "reason": tool_result.reason,
                                 }
                             )
                         ),
                     },
                 }
-                if not is_init_step
+                if last_proposal and tool_result
                 else {}
             ),
-            **raw_output,
+            **assistant_response.dict(),
         }
 
         task_cumulative_cost = agent.llm_provider.get_incurred_cost()
diff --git a/autogpts/autogpt/autogpt/app/main.py b/autogpts/autogpt/autogpt/app/main.py
index f256c8707..aaab5fe48 100644
--- a/autogpts/autogpt/autogpt/app/main.py
+++ b/autogpts/autogpt/autogpt/app/main.py
@@ -18,11 +18,12 @@ from forge.sdk.db import AgentDB
 
 if TYPE_CHECKING:
     from autogpt.agents.agent import Agent
+    from autogpt.agents.base import BaseAgentActionProposal
 
 from autogpt.agent_factory.configurators import configure_agent_with_state, create_agent
 from autogpt.agent_factory.profile_generator import generate_agent_profile_for_task
 from autogpt.agent_manager import AgentManager
-from autogpt.agents import AgentThoughts, CommandArgs, CommandName
+from autogpt.agents.prompt_strategies.one_shot import AssistantThoughts
 from autogpt.commands.execute_code import (
     is_docker_available,
     we_are_running_in_a_docker_container,
@@ -40,6 +41,7 @@ from autogpt.file_storage import FileStorageBackendName, get_storage
 from autogpt.logs.config import configure_logging
 from autogpt.logs.helpers import print_attribute, speak
 from autogpt.models.action_history import ActionInterruptedByHuman
+from autogpt.models.utils import ModelWithSummary
 from autogpt.utils.exceptions import AgentTerminated, InvalidAgentResponseError
 from autogpt.utils.utils import DEFAULT_FINISH_COMMAND
 
@@ -227,13 +229,12 @@ async def run_auto_gpt(
         )
 
         if (
-            agent.event_history.current_episode
-            and agent.event_history.current_episode.action.name
-            == DEFAULT_FINISH_COMMAND
-            and not agent.event_history.current_episode.result
+            (current_episode := agent.event_history.current_episode)
+            and current_episode.action.use_tool.name == DEFAULT_FINISH_COMMAND
+            and not current_episode.result
         ):
             # Agent was resumed after `finish` -> rewrite result of `finish` action
-            finish_reason = agent.event_history.current_episode.action.args["reason"]
+            finish_reason = current_episode.action.use_tool.arguments["reason"]
             print(f"Agent previously self-terminated; reason: '{finish_reason}'")
             new_assignment = clean_input(
                 config, "Please give a follow-up question or assignment:"
@@ -531,11 +532,7 @@ async def run_interaction_loop(
         # Have the agent determine the next action to take.
         with spinner:
             try:
-                (
-                    command_name,
-                    command_args,
-                    assistant_reply_dict,
-                ) = (await agent.propose_action()).to_tuple()
+                action_proposal = await agent.propose_action()
             except InvalidAgentResponseError as e:
                 logger.warning(f"The agent's thoughts could not be parsed: {e}")
                 consecutive_failures += 1
@@ -558,9 +555,7 @@ async def run_interaction_loop(
         # Print the assistant's thoughts and the next command to the user.
         update_user(
             ai_profile,
-            command_name,
-            command_args,
-            assistant_reply_dict,
+            action_proposal,
             speak_mode=legacy_config.tts_config.speak_mode,
         )
 
@@ -569,12 +564,12 @@ async def run_interaction_loop(
         ##################
         handle_stop_signal()
         if cycles_remaining == 1:  # Last cycle
-            user_feedback, user_input, new_cycles_remaining = await get_user_feedback(
+            feedback_type, feedback, new_cycles_remaining = await get_user_feedback(
                 legacy_config,
                 ai_profile,
             )
 
-            if user_feedback == UserFeedback.AUTHORIZE:
+            if feedback_type == UserFeedback.AUTHORIZE:
                 if new_cycles_remaining is not None:
                     # Case 1: User is altering the cycle budget.
                     if cycle_budget > 1:
@@ -598,13 +593,13 @@ async def run_interaction_loop(
                     "-=-=-=-=-=-=-= COMMAND AUTHORISED BY USER -=-=-=-=-=-=-=",
                     extra={"color": Fore.MAGENTA},
                 )
-            elif user_feedback == UserFeedback.EXIT:
+            elif feedback_type == UserFeedback.EXIT:
                 logger.warning("Exiting...")
                 exit()
             else:  # user_feedback == UserFeedback.TEXT
-                command_name = "human_feedback"
+                pass
         else:
-            user_input = ""
+            feedback = ""
             # First log new-line so user can differentiate sections better in console
             print()
             if cycles_remaining != math.inf:
@@ -619,33 +614,31 @@ async def run_interaction_loop(
         # Decrement the cycle counter first to reduce the likelihood of a SIGINT
         # happening during command execution, setting the cycles remaining to 1,
         # and then having the decrement set it to 0, exiting the application.
-        if command_name != "human_feedback":
+        if not feedback:
             cycles_remaining -= 1
 
-        if not command_name:
+        if not action_proposal.use_tool:
             continue
 
         handle_stop_signal()
 
-        if command_name:
-            result = await agent.execute(command_name, command_args, user_input)
+        if not feedback:
+            result = await agent.execute(action_proposal)
+        else:
+            result = await agent.do_not_execute(action_proposal, feedback)
 
-            if result.status == "success":
-                logger.info(
-                    result, extra={"title": "SYSTEM:", "title_color": Fore.YELLOW}
-                )
-            elif result.status == "error":
-                logger.warning(
-                    f"Command {command_name} returned an error: "
-                    f"{result.error or result.reason}"
-                )
+        if result.status == "success":
+            logger.info(result, extra={"title": "SYSTEM:", "title_color": Fore.YELLOW})
+        elif result.status == "error":
+            logger.warning(
+                f"Command {action_proposal.use_tool.name} returned an error: "
+                f"{result.error or result.reason}"
+            )
 
 
 def update_user(
     ai_profile: AIProfile,
-    command_name: CommandName,
-    command_args: CommandArgs,
-    assistant_reply_dict: AgentThoughts,
+    action_proposal: "BaseAgentActionProposal",
     speak_mode: bool = False,
 ) -> None:
     """Prints the assistant's thoughts and the next command to the user.
@@ -661,18 +654,19 @@ def update_user(
 
     print_assistant_thoughts(
         ai_name=ai_profile.ai_name,
-        assistant_reply_json_valid=assistant_reply_dict,
+        thoughts=action_proposal.thoughts,
         speak_mode=speak_mode,
     )
 
     if speak_mode:
-        speak(f"I want to execute {command_name}")
+        speak(f"I want to execute {action_proposal.use_tool.name}")
 
     # First log new-line so user can differentiate sections better in console
     print()
+    safe_tool_name = remove_ansi_escape(action_proposal.use_tool.name)
     logger.info(
-        f"COMMAND = {Fore.CYAN}{remove_ansi_escape(command_name)}{Style.RESET_ALL}  "
-        f"ARGUMENTS = {Fore.CYAN}{command_args}{Style.RESET_ALL}",
+        f"COMMAND = {Fore.CYAN}{safe_tool_name}{Style.RESET_ALL}  "
+        f"ARGUMENTS = {Fore.CYAN}{action_proposal.use_tool.arguments}{Style.RESET_ALL}",
         extra={
             "title": "NEXT ACTION:",
             "title_color": Fore.CYAN,
@@ -741,56 +735,59 @@ async def get_user_feedback(
 
 def print_assistant_thoughts(
     ai_name: str,
-    assistant_reply_json_valid: dict,
+    thoughts: str | ModelWithSummary | AssistantThoughts,
     speak_mode: bool = False,
 ) -> None:
     logger = logging.getLogger(__name__)
 
-    assistant_thoughts_reasoning = None
-    assistant_thoughts_plan = None
-    assistant_thoughts_speak = None
-    assistant_thoughts_criticism = None
-
-    assistant_thoughts = assistant_reply_json_valid.get("thoughts", {})
-    assistant_thoughts_text = remove_ansi_escape(assistant_thoughts.get("text", ""))
-    if assistant_thoughts:
-        assistant_thoughts_reasoning = remove_ansi_escape(
-            assistant_thoughts.get("reasoning", "")
-        )
-        assistant_thoughts_plan = remove_ansi_escape(assistant_thoughts.get("plan", ""))
-        assistant_thoughts_criticism = remove_ansi_escape(
-            assistant_thoughts.get("self_criticism", "")
-        )
-        assistant_thoughts_speak = remove_ansi_escape(
-            assistant_thoughts.get("speak", "")
-        )
-    print_attribute(
-        f"{ai_name.upper()} THOUGHTS", assistant_thoughts_text, title_color=Fore.YELLOW
+    thoughts_text = remove_ansi_escape(
+        thoughts.text
+        if isinstance(thoughts, AssistantThoughts)
+        else thoughts.summary()
+        if isinstance(thoughts, ModelWithSummary)
+        else thoughts
     )
-    print_attribute("REASONING", assistant_thoughts_reasoning, title_color=Fore.YELLOW)
-    if assistant_thoughts_plan:
-        print_attribute("PLAN", "", title_color=Fore.YELLOW)
-        # If it's a list, join it into a string
-        if isinstance(assistant_thoughts_plan, list):
-            assistant_thoughts_plan = "\n".join(assistant_thoughts_plan)
-        elif isinstance(assistant_thoughts_plan, dict):
-            assistant_thoughts_plan = str(assistant_thoughts_plan)
-
-        # Split the input_string using the newline character and dashes
-        lines = assistant_thoughts_plan.split("\n")
-        for line in lines:
-            line = line.lstrip("- ")
-            logger.info(line.strip(), extra={"title": "- ", "title_color": Fore.GREEN})
     print_attribute(
-        "CRITICISM", f"{assistant_thoughts_criticism}", title_color=Fore.YELLOW
+        f"{ai_name.upper()} THOUGHTS", thoughts_text, title_color=Fore.YELLOW
     )
 
-    # Speak the assistant's thoughts
-    if assistant_thoughts_speak:
-        if speak_mode:
-            speak(assistant_thoughts_speak)
-        else:
-            print_attribute("SPEAK", assistant_thoughts_speak, title_color=Fore.YELLOW)
+    if isinstance(thoughts, AssistantThoughts):
+        print_attribute(
+            "REASONING", remove_ansi_escape(thoughts.reasoning), title_color=Fore.YELLOW
+        )
+        if assistant_thoughts_plan := remove_ansi_escape(
+            "\n".join(f"- {p}" for p in thoughts.plan)
+        ):
+            print_attribute("PLAN", "", title_color=Fore.YELLOW)
+            # If it's a list, join it into a string
+            if isinstance(assistant_thoughts_plan, list):
+                assistant_thoughts_plan = "\n".join(assistant_thoughts_plan)
+            elif isinstance(assistant_thoughts_plan, dict):
+                assistant_thoughts_plan = str(assistant_thoughts_plan)
+
+            # Split the input_string using the newline character and dashes
+            lines = assistant_thoughts_plan.split("\n")
+            for line in lines:
+                line = line.lstrip("- ")
+                logger.info(
+                    line.strip(), extra={"title": "- ", "title_color": Fore.GREEN}
+                )
+        print_attribute(
+            "CRITICISM",
+            remove_ansi_escape(thoughts.self_criticism),
+            title_color=Fore.YELLOW,
+        )
+
+        # Speak the assistant's thoughts
+        if assistant_thoughts_speak := remove_ansi_escape(thoughts.speak):
+            if speak_mode:
+                speak(assistant_thoughts_speak)
+            else:
+                print_attribute(
+                    "SPEAK", assistant_thoughts_speak, title_color=Fore.YELLOW
+                )
+    else:
+        speak(thoughts_text)
 
 
 def remove_ansi_escape(s: str) -> str:
diff --git a/autogpts/autogpt/autogpt/components/event_history.py b/autogpts/autogpt/autogpt/components/event_history.py
index a66b18e2d..fd3b5100c 100644
--- a/autogpts/autogpt/autogpt/components/event_history.py
+++ b/autogpts/autogpt/autogpt/components/event_history.py
@@ -1,12 +1,11 @@
-from typing import Callable, Iterator, Optional
+from typing import Callable, Generic, Iterator, Optional
 
-from autogpt.agents.base import ThoughtProcessOutput
 from autogpt.agents.features.watchdog import WatchdogComponent
 from autogpt.agents.protocols import AfterExecute, AfterParse, MessageProvider
 from autogpt.config.config import Config
 from autogpt.core.resource.model_providers.schema import ChatMessage, ChatModelProvider
 from autogpt.models.action_history import (
-    Action,
+    AP,
     ActionResult,
     Episode,
     EpisodicActionHistory,
@@ -14,14 +13,14 @@ from autogpt.models.action_history import (
 from autogpt.prompts.utils import indent
 
 
-class EventHistoryComponent(MessageProvider, AfterParse, AfterExecute):
+class EventHistoryComponent(MessageProvider, AfterParse, AfterExecute, Generic[AP]):
     """Keeps track of the event history and provides a summary of the steps."""
 
     run_after = [WatchdogComponent]
 
     def __init__(
         self,
-        event_history: EpisodicActionHistory,
+        event_history: EpisodicActionHistory[AP],
         max_tokens: int,
         count_tokens: Callable[[str], int],
         legacy_config: Config,
@@ -41,15 +40,8 @@ class EventHistoryComponent(MessageProvider, AfterParse, AfterExecute):
         ):
             yield ChatMessage.system(f"## Progress on your Task so far\n\n{progress}")
 
-    def after_parse(self, result: ThoughtProcessOutput) -> None:
-        if result.command_name:
-            self.event_history.register_action(
-                Action(
-                    name=result.command_name,
-                    args=result.command_args,
-                    reasoning=result.thoughts["thoughts"]["reasoning"],
-                )
-            )
+    def after_parse(self, result: AP) -> None:
+        self.event_history.register_action(result)
 
     async def after_execute(self, result: ActionResult) -> None:
         self.event_history.register_result(result)
diff --git a/autogpts/autogpt/autogpt/core/resource/model_providers/schema.py b/autogpts/autogpt/autogpt/core/resource/model_providers/schema.py
index dd69b526e..60df855f2 100644
--- a/autogpts/autogpt/autogpt/core/resource/model_providers/schema.py
+++ b/autogpts/autogpt/autogpt/core/resource/model_providers/schema.py
@@ -26,6 +26,7 @@ from autogpt.core.resource.schema import (
     ResourceType,
 )
 from autogpt.core.utils.json_schema import JSONSchema
+from autogpt.logs.utils import fmt_kwargs
 
 
 class ModelProviderService(str, enum.Enum):
@@ -72,6 +73,9 @@ class AssistantFunctionCall(BaseModel):
     name: str
     arguments: dict[str, Any]
 
+    def __str__(self) -> str:
+        return f"{self.name}({fmt_kwargs(self.arguments)})"
+
 
 class AssistantFunctionCallDict(TypedDict):
     name: str
diff --git a/autogpts/autogpt/autogpt/models/action_history.py b/autogpts/autogpt/autogpt/models/action_history.py
index 2358e2236..d433cd80d 100644
--- a/autogpts/autogpt/autogpt/models/action_history.py
+++ b/autogpts/autogpt/autogpt/models/action_history.py
@@ -1,10 +1,13 @@
 from __future__ import annotations
 
 import asyncio
-from typing import TYPE_CHECKING, Any, Iterator, Literal, Optional
+from typing import TYPE_CHECKING, Any, Generic, Iterator, Literal, Optional, TypeVar
 
 from pydantic import BaseModel, Field
+from pydantic.generics import GenericModel
 
+from autogpt.agents.base import BaseAgentActionProposal
+from autogpt.models.utils import ModelWithSummary
 from autogpt.processing.text import summarize_text
 from autogpt.prompts.utils import format_numbered_list, indent
 
@@ -13,18 +16,6 @@ if TYPE_CHECKING:
     from autogpt.core.resource.model_providers import ChatModelProvider
 
 
-class Action(BaseModel):
-    name: str
-    args: dict[str, Any]
-    reasoning: str
-
-    def format_call(self) -> str:
-        return (
-            f"{self.name}"
-            f"({', '.join([f'{a}={repr(v)}' for a, v in self.args.items()])})"
-        )
-
-
 class ActionSuccessResult(BaseModel):
     outputs: Any
     status: Literal["success"] = "success"
@@ -86,15 +77,22 @@ class ActionInterruptedByHuman(BaseModel):
 
 ActionResult = ActionSuccessResult | ActionErrorResult | ActionInterruptedByHuman
 
+AP = TypeVar("AP", bound=BaseAgentActionProposal)
+
 
-class Episode(BaseModel):
-    action: Action
+class Episode(GenericModel, Generic[AP]):
+    action: AP
     result: ActionResult | None
     summary: str | None = None
 
     def format(self):
-        step = f"Executed `{self.action.format_call()}`\n"
-        step += f'- **Reasoning:** "{self.action.reasoning}"\n'
+        step = f"Executed `{self.action.use_tool}`\n"
+        reasoning = (
+            _r.summary()
+            if isinstance(_r := self.action.thoughts, ModelWithSummary)
+            else _r
+        )
+        step += f'- **Reasoning:** "{reasoning}"\n'
         step += (
             "- **Status:** "
             f"`{self.result.status if self.result else 'did_not_finish'}`\n"
@@ -113,28 +111,28 @@ class Episode(BaseModel):
         return step
 
     def __str__(self) -> str:
-        executed_action = f"Executed `{self.action.format_call()}`"
+        executed_action = f"Executed `{self.action.use_tool}`"
         action_result = f": {self.result}" if self.result else "."
         return executed_action + action_result
 
 
-class EpisodicActionHistory(BaseModel):
+class EpisodicActionHistory(GenericModel, Generic[AP]):
     """Utility container for an action history"""
 
-    episodes: list[Episode] = Field(default_factory=list)
+    episodes: list[Episode[AP]] = Field(default_factory=list)
     cursor: int = 0
     _lock = asyncio.Lock()
 
     @property
-    def current_episode(self) -> Episode | None:
+    def current_episode(self) -> Episode[AP] | None:
         if self.cursor == len(self):
             return None
         return self[self.cursor]
 
-    def __getitem__(self, key: int) -> Episode:
+    def __getitem__(self, key: int) -> Episode[AP]:
         return self.episodes[key]
 
-    def __iter__(self) -> Iterator[Episode]:
+    def __iter__(self) -> Iterator[Episode[AP]]:
         return iter(self.episodes)
 
     def __len__(self) -> int:
@@ -143,7 +141,7 @@ class EpisodicActionHistory(BaseModel):
     def __bool__(self) -> bool:
         return len(self.episodes) > 0
 
-    def register_action(self, action: Action) -> None:
+    def register_action(self, action: AP) -> None:
         if not self.current_episode:
             self.episodes.append(Episode(action=action, result=None))
             assert self.current_episode
diff --git a/autogpts/autogpt/autogpt/models/utils.py b/autogpts/autogpt/autogpt/models/utils.py
new file mode 100644
index 000000000..1b4db2175
--- /dev/null
+++ b/autogpts/autogpt/autogpt/models/utils.py
@@ -0,0 +1,10 @@
+from abc import ABC, abstractmethod
+
+from pydantic import BaseModel
+
+
+class ModelWithSummary(BaseModel, ABC):
+    @abstractmethod
+    def summary(self) -> str:
+        """Should produce a human readable summary of the model content."""
+        pass
-- 
cgit v1.2.3


From d57ccf7ec9e8ce6c1c2d5077e1488403bdcf35c7 Mon Sep 17 00:00:00 2001
From: Reinier van der Leer <pwuts@agpt.co>
Date: Thu, 2 May 2024 02:41:03 +0200
Subject: fix(agent): Fix `open_file` and `open_folder` commands

They weren't ported properly to the new component-based architecture: the `@sanitize_path` decorator was removed, causing path handling issues.
---
 .../autogpt/autogpt/agents/features/context.py     | 32 ++++++++++------------
 1 file changed, 14 insertions(+), 18 deletions(-)

diff --git a/autogpts/autogpt/autogpt/agents/features/context.py b/autogpts/autogpt/autogpt/agents/features/context.py
index 5de7b7967..405c6e559 100644
--- a/autogpts/autogpt/autogpt/agents/features/context.py
+++ b/autogpts/autogpt/autogpt/agents/features/context.py
@@ -71,34 +71,32 @@ class ContextComponent(MessageProvider, CommandProvider):
             )
         }
     )
-    async def open_file(self, file_path: Path) -> str:
+    async def open_file(self, file_path: str | Path) -> str:
         """Opens a file for editing or continued viewing;
         creates it if it does not exist yet.
         Note: If you only need to read or write a file once,
         use `write_to_file` instead.
 
         Args:
-            file_path (Path): The path of the file to open
+            file_path (str | Path): The path of the file to open
 
         Returns:
             str: A status message indicating what happened
         """
-        # Try to make the file path relative
-        relative_file_path = None
-        with contextlib.suppress(ValueError):
-            relative_file_path = file_path.relative_to(self.workspace.root)
+        if not isinstance(file_path, Path):
+            file_path = Path(file_path)
 
         created = False
         if not self.workspace.exists(file_path):
             await self.workspace.write_file(file_path, "")
             created = True
 
-        file_path = relative_file_path or file_path
+        # Try to make the file path relative
+        with contextlib.suppress(ValueError):
+            file_path = file_path.relative_to(self.workspace.root)
 
         file = FileContextItem(path=file_path)
-
         self.context.add(file)
-
         return (
             f"File {file_path}{' created,' if created else ''} has been opened"
             " and added to the context ✅"
@@ -113,31 +111,29 @@ class ContextComponent(MessageProvider, CommandProvider):
             )
         }
     )
-    def open_folder(self, path: Path) -> str:
+    def open_folder(self, path: str | Path) -> str:
         """Open a folder to keep track of its content
 
         Args:
-            path (Path): The path of the folder to open
+            path (str | Path): The path of the folder to open
 
         Returns:
             str: A status message indicating what happened
         """
-        # Try to make the path relative
-        relative_path = None
-        with contextlib.suppress(ValueError):
-            relative_path = path.relative_to(self.workspace.root)
+        if not isinstance(path, Path):
+            path = Path(path)
 
         if not self.workspace.exists(path):
             raise FileNotFoundError(
                 f"open_folder {path} failed: no such file or directory"
             )
 
-        path = relative_path or path
+        # Try to make the path relative
+        with contextlib.suppress(ValueError):
+            path = path.relative_to(self.workspace.root)
 
         folder = FolderContextItem(path=path)
-
         self.context.add(folder)
-
         return f"Folder {path} has been opened and added to the context ✅"
 
     @command(
-- 
cgit v1.2.3