108 files changed, 2421 insertions, 3058 deletions
diff --git a/.env.template b/.env.template
index 067452457..c3fcb761d 100644
--- a/.env.template
+++ b/.env.template
@@ -25,10 +25,14 @@ OPENAI_API_KEY=your-openai-api-key
 ## PROMPT_SETTINGS_FILE - Specifies which Prompt Settings file to use (defaults to prompt_settings.yaml)
 # PROMPT_SETTINGS_FILE=prompt_settings.yaml
 
-## OPENAI_API_BASE_URL - Custom url for the OpenAI API, useful for connecting to custom backends. No effect if USE_AZURE is true, leave blank to keep the default url 
+## OPENAI_API_BASE_URL - Custom url for the OpenAI API, useful for connecting to custom backends. No effect if USE_AZURE is true, leave blank to keep the default url
 # the following is an example:
 # OPENAI_API_BASE_URL=http://localhost:443/v1
 
+## OPENAI_FUNCTIONS - Enables OpenAI functions: https://platform.openai.com/docs/guides/gpt/function-calling
+## WARNING: this feature is only supported by OpenAI's newest models. Until these models become the default on 27 June, add a '-0613' suffix to the model of your choosing.
+# OPENAI_FUNCTIONS=False
+
 ## AUTHORISE COMMAND KEY - Key to authorise commands
 # AUTHORISE_COMMAND_KEY=y
 
diff --git a/.gitignore b/.gitignore
index 29a0285a8..1376ba5dd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -31,7 +31,7 @@ __pycache__/
 build/
 develop-eggs/
 dist/
-plugins/
+/plugins/
 plugins_config.yaml
 downloads/
 eggs/
diff --git a/BULLETIN.md b/BULLETIN.md
index ba1de5a11..0b8afeba4 100644
--- a/BULLETIN.md
+++ b/BULLETIN.md
@@ -8,20 +8,15 @@ Since releasing v0.3.0, whave been working on re-architecting the Auto-GPT core
 Check out the contribution guide on our wiki:
 https://github.com/Significant-Gravitas/Auto-GPT/wiki/Contributing
 
-# 🚀 v0.4.1 Release 🚀
-Two weeks and 50+ pull requests have passed since v0.4.0, and we are happy to announce the release of v0.4.1!
-
-Highlights and notable changes since v0.4.0:
-- The .env.template is more readable and better explains the purpose of each environment variable.
-- More dependable search
-    - The CUSTOM_SEARCH_ENGINE_ID variable has been replaced to GOOGLE_CUSTOM_SEARCH_ENGINE_ID, make sure you update it.
-- Better read_file
-- More reliable python code execution
-- Lots of JSON error fixes
-- Directory-based plugins
-
-## Further fixes and changes 🛠️
-Under the hood, we've done a bunch of work improving architectures and streamlining code. Most of that won't be user-visible
+# 🚀 v0.4.3 Release 🚀
+We're happy to announce the 0.4.3 maintenance release, which primarily focuses on refining the LLM command execution,
+extending support for OpenAI's latest models (including the powerful GPT-3 16k model), and laying the groundwork 
+for future compatibility with OpenAI's function calling feature.
 
+Key Highlights:
+- OpenAI API Key Prompt: Auto-GPT will now courteously prompt users for their OpenAI API key, if it's not already provided.
+- Summarization Enhancements: We've optimized Auto-GPT's use of the LLM context window even further.
+- JSON Memory Reading: Support for reading memories from JSON files has been improved, resulting in enhanced task execution.
+- Deprecated commands, removed for a leaner, more performant LLM: analyze_code, write_tests, improve_code, audio_text, web_playwright, web_requests
 ## Take a look at the Release Notes on Github for the full changelog!
 https://github.com/Significant-Gravitas/Auto-GPT/releases
diff --git a/autogpt/agent/agent.py b/autogpt/agent/agent.py
index 1f31be165..c578152a1 100644
--- a/autogpt/agent/agent.py
+++ b/autogpt/agent/agent.py
@@ -5,25 +5,22 @@ from datetime import datetime
 
 from colorama import Fore, Style
 
-from autogpt.commands.command import CommandRegistry
 from autogpt.config import Config
 from autogpt.config.ai_config import AIConfig
 from autogpt.json_utils.utilities import extract_json_from_response, validate_json
-from autogpt.llm.base import ChatSequence
-from autogpt.llm.chat import chat_with_ai, create_chat_completion
+from autogpt.llm.chat import chat_with_ai
 from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS
 from autogpt.llm.utils import count_string_tokens
 from autogpt.log_cycle.log_cycle import (
     FULL_MESSAGE_HISTORY_FILE_NAME,
     NEXT_ACTION_FILE_NAME,
-    PROMPT_SUPERVISOR_FEEDBACK_FILE_NAME,
-    SUPERVISOR_FEEDBACK_FILE_NAME,
     USER_INPUT_FILE_NAME,
     LogCycleHandler,
 )
-from autogpt.logs import logger, print_assistant_thoughts
+from autogpt.logs import logger, print_assistant_thoughts, remove_ansi_escape
 from autogpt.memory.message_history import MessageHistory
 from autogpt.memory.vector import VectorMemory
+from autogpt.models.command_registry import CommandRegistry
 from autogpt.speech import say_text
 from autogpt.spinner import Spinner
 from autogpt.utils import clean_input
@@ -145,8 +142,10 @@ class Agent:
                 )
 
             try:
-                assistant_reply_json = extract_json_from_response(assistant_reply)
-                validate_json(assistant_reply_json)
+                assistant_reply_json = extract_json_from_response(
+                    assistant_reply.content
+                )
+                validate_json(assistant_reply_json, self.config)
             except json.JSONDecodeError as e:
                 logger.error(f"Exception while validating assistant reply JSON: {e}")
                 assistant_reply_json = {}
@@ -161,9 +160,11 @@ class Agent:
                 # Get command name and arguments
                 try:
                     print_assistant_thoughts(
-                        self.ai_name, assistant_reply_json, self.config.speak_mode
+                        self.ai_name, assistant_reply_json, self.config
+                    )
+                    command_name, arguments = get_command(
+                        assistant_reply_json, assistant_reply, self.config
                     )
-                    command_name, arguments = get_command(assistant_reply_json)
                     if self.config.speak_mode:
                         say_text(f"I want to execute {command_name}")
 
@@ -184,7 +185,7 @@ class Agent:
             logger.typewriter_log(
                 "NEXT ACTION: ",
                 Fore.CYAN,
-                f"COMMAND = {Fore.CYAN}{command_name}{Style.RESET_ALL}  "
+                f"COMMAND = {Fore.CYAN}{remove_ansi_escape(command_name)}{Style.RESET_ALL}  "
                 f"ARGUMENTS = {Fore.CYAN}{arguments}{Style.RESET_ALL}",
             )
 
@@ -200,32 +201,16 @@ class Agent:
                 )
                 while True:
                     if self.config.chat_messages_enabled:
-                        console_input = clean_input("Waiting for your response...")
+                        console_input = clean_input(
+                            self.config, "Waiting for your response..."
+                        )
                     else:
                         console_input = clean_input(
-                            Fore.MAGENTA + "Input:" + Style.RESET_ALL
+                            self.config, Fore.MAGENTA + "Input:" + Style.RESET_ALL
                         )
                     if console_input.lower().strip() == self.config.authorise_key:
                         user_input = "GENERATE NEXT COMMAND JSON"
                         break
-                    elif console_input.lower().strip() == "s":
-                        logger.typewriter_log(
-                            "-=-=-=-=-=-=-= THOUGHTS, REASONING, PLAN AND CRITICISM WILL NOW BE VERIFIED BY AGENT -=-=-=-=-=-=-=",
-                            Fore.GREEN,
-                            "",
-                        )
-                        thoughts = assistant_reply_json.get("thoughts", {})
-                        self_feedback_resp = self.get_self_feedback(
-                            thoughts, self.config.fast_llm_model
-                        )
-                        logger.typewriter_log(
-                            f"SELF FEEDBACK: {self_feedback_resp}",
-                            Fore.YELLOW,
-                            "",
-                        )
-                        user_input = self_feedback_resp
-                        command_name = "self_feedback"
-                        break
                     elif console_input.lower().strip() == "":
                         logger.warn("Invalid input format.")
                         continue
@@ -281,8 +266,6 @@ class Agent:
                 result = f"Could not execute command: {arguments}"
             elif command_name == "human_feedback":
                 result = f"Human feedback: {user_input}"
-            elif command_name == "self_feedback":
-                result = f"Self feedback: {user_input}"
             else:
                 for plugin in self.config.plugins:
                     if not plugin.can_handle_pre_command():
@@ -335,45 +318,3 @@ class Agent:
                         self.workspace.get_path(command_args[pathlike])
                     )
         return command_args
-
-    def get_self_feedback(self, thoughts: dict, llm_model: str) -> str:
-        """Generates a feedback response based on the provided thoughts dictionary.
-        This method takes in a dictionary of thoughts containing keys such as 'reasoning',
-        'plan', 'thoughts', and 'criticism'. It combines these elements into a single
-        feedback message and uses the create_chat_completion() function to generate a
-        response based on the input message.
-        Args:
-            thoughts (dict): A dictionary containing thought elements like reasoning,
-            plan, thoughts, and criticism.
-        Returns:
-            str: A feedback response generated using the provided thoughts dictionary.
-        """
-        ai_role = self.ai_config.ai_role
-
-        feedback_prompt = f"Below is a message from me, an AI Agent, assuming the role of {ai_role}. whilst keeping knowledge of my slight limitations as an AI Agent Please evaluate my thought process, reasoning, and plan, and provide a concise paragraph outlining potential improvements. Consider adding or removing ideas that do not align with my role and explaining why, prioritizing thoughts based on their significance, or simply refining my overall thought process."
-        reasoning = thoughts.get("reasoning", "")
-        plan = thoughts.get("plan", "")
-        thought = thoughts.get("thoughts", "")
-        feedback_thoughts = thought + reasoning + plan
-
-        prompt = ChatSequence.for_model(llm_model)
-        prompt.add("user", feedback_prompt + feedback_thoughts)
-
-        self.log_cycle_handler.log_cycle(
-            self.ai_config.ai_name,
-            self.created_at,
-            self.cycle_count,
-            prompt.raw(),
-            PROMPT_SUPERVISOR_FEEDBACK_FILE_NAME,
-        )
-
-        feedback = create_chat_completion(prompt)
-
-        self.log_cycle_handler.log_cycle(
-            self.ai_config.ai_name,
-            self.created_at,
-            self.cycle_count,
-            feedback,
-            SUPERVISOR_FEEDBACK_FILE_NAME,
-        )
-        return feedback
diff --git a/autogpt/agent/agent_manager.py b/autogpt/agent/agent_manager.py
index 8560b0ec2..eaecbf3b4 100644
--- a/autogpt/agent/agent_manager.py
+++ b/autogpt/agent/agent_manager.py
@@ -10,12 +10,12 @@ from autogpt.singleton import Singleton
 class AgentManager(metaclass=Singleton):
     """Agent manager for managing GPT agents"""
 
-    def __init__(self):
+    def __init__(self, config: Config):
         self.next_key = 0
         self.agents: dict[
             int, tuple[str, list[Message], str]
         ] = {}  # key, (task, full_message_history, model)
-        self.cfg = Config()
+        self.config = config
 
     # Create new GPT agent
     # TODO: Centralise use of create_chat_completion() to globally enforce token limit
@@ -35,18 +35,20 @@ class AgentManager(metaclass=Singleton):
         """
         messages = ChatSequence.for_model(model, [Message("user", creation_prompt)])
 
-        for plugin in self.cfg.plugins:
+        for plugin in self.config.plugins:
             if not plugin.can_handle_pre_instruction():
                 continue
             if plugin_messages := plugin.pre_instruction(messages.raw()):
                 messages.extend([Message(**raw_msg) for raw_msg in plugin_messages])
         # Start GPT instance
-        agent_reply = create_chat_completion(prompt=messages)
+        agent_reply = create_chat_completion(
+            prompt=messages, config=self.config
+        ).content
 
         messages.add("assistant", agent_reply)
 
         plugins_reply = ""
-        for i, plugin in enumerate(self.cfg.plugins):
+        for i, plugin in enumerate(self.config.plugins):
             if not plugin.can_handle_on_instruction():
                 continue
             if plugin_result := plugin.on_instruction([m.raw() for m in messages]):
@@ -62,7 +64,7 @@ class AgentManager(metaclass=Singleton):
 
         self.agents[key] = (task, list(messages), model)
 
-        for plugin in self.cfg.plugins:
+        for plugin in self.config.plugins:
             if not plugin.can_handle_post_instruction():
                 continue
             agent_reply = plugin.post_instruction(agent_reply)
@@ -85,19 +87,21 @@ class AgentManager(metaclass=Singleton):
         messages = ChatSequence.for_model(model, messages)
         messages.add("user", message)
 
-        for plugin in self.cfg.plugins:
+        for plugin in self.config.plugins:
             if not plugin.can_handle_pre_instruction():
                 continue
             if plugin_messages := plugin.pre_instruction([m.raw() for m in messages]):
                 messages.extend([Message(**raw_msg) for raw_msg in plugin_messages])
 
         # Start GPT instance
-        agent_reply = create_chat_completion(prompt=messages)
+        agent_reply = create_chat_completion(
+            prompt=messages, config=self.config
+        ).content
 
         messages.add("assistant", agent_reply)
 
         plugins_reply = agent_reply
-        for i, plugin in enumerate(self.cfg.plugins):
+        for i, plugin in enumerate(self.config.plugins):
             if not plugin.can_handle_on_instruction():
                 continue
             if plugin_result := plugin.on_instruction([m.raw() for m in messages]):
@@ -107,7 +111,7 @@ class AgentManager(metaclass=Singleton):
         if plugins_reply and plugins_reply != "":
             messages.add("assistant", plugins_reply)
 
-        for plugin in self.cfg.plugins:
+        for plugin in self.config.plugins:
             if not plugin.can_handle_post_instruction():
                 continue
             agent_reply = plugin.post_instruction(agent_reply)
diff --git a/autogpt/app.py b/autogpt/app.py
index 780b74a01..06db7938d 100644
--- a/autogpt/app.py
+++ b/autogpt/app.py
@@ -1,14 +1,10 @@
 """ Command and Control """
 import json
-from typing import Dict, List, Union
+from typing import Dict
 
 from autogpt.agent.agent import Agent
-from autogpt.agent.agent_manager import AgentManager
-from autogpt.commands.command import command
-from autogpt.commands.web_requests import scrape_links, scrape_text
-from autogpt.processing.text import summarize_text
-from autogpt.speech import say_text
-from autogpt.url_utils.validators import validate_url
+from autogpt.config import Config
+from autogpt.llm import ChatModelResponse
 
 
 def is_valid_int(value: str) -> bool:
@@ -27,11 +23,15 @@ def is_valid_int(value: str) -> bool:
         return False
 
 
-def get_command(response_json: Dict):
+def get_command(
+    assistant_reply_json: Dict, assistant_reply: ChatModelResponse, config: Config
+):
     """Parse the response and return the command name and arguments
 
     Args:
-        response_json (json): The response from the AI
+        assistant_reply_json (dict): The response object from the AI
+        assistant_reply (ChatModelResponse): The model response from the AI
+        config (Config): The config object
 
     Returns:
         tuple: The command name and arguments
@@ -41,14 +41,24 @@ def get_command(response_json: Dict):
 
         Exception: If any other error occurs
     """
+    if config.openai_functions:
+        if assistant_reply.function_call is None:
+            return "Error:", "No 'function_call' in assistant reply"
+        assistant_reply_json["command"] = {
+            "name": assistant_reply.function_call.name,
+            "args": json.loads(assistant_reply.function_call.arguments),
+        }
     try:
-        if "command" not in response_json:
+        if "command" not in assistant_reply_json:
             return "Error:", "Missing 'command' object in JSON"
 
-        if not isinstance(response_json, dict):
-            return "Error:", f"'response_json' object is not dictionary {response_json}"
+        if not isinstance(assistant_reply_json, dict):
+            return (
+                "Error:",
+                f"The previous message sent was not a dictionary {assistant_reply_json}",
+            )
 
-        command = response_json["command"]
+        command = assistant_reply_json["command"]
         if not isinstance(command, dict):
             return "Error:", "'command' object is not a dictionary"
 
@@ -124,117 +134,3 @@ def execute_command(
         )
     except Exception as e:
         return f"Error: {str(e)}"
-
-
-@command(
-    "get_text_summary", "Get text summary", '"url": "<url>", "question": "<question>"'
-)
-@validate_url
-def get_text_summary(url: str, question: str, agent: Agent) -> str:
-    """Get the text summary of a webpage
-
-    Args:
-        url (str): The url to scrape
-        question (str): The question to summarize the text for
-
-    Returns:
-        str: The summary of the text
-    """
-    text = scrape_text(url, agent)
-    summary, _ = summarize_text(text, question=question)
-
-    return f""" "Result" : {summary}"""
-
-
-@command("get_hyperlinks", "Get hyperlinks", '"url": "<url>"')
-@validate_url
-def get_hyperlinks(url: str, agent: Agent) -> Union[str, List[str]]:
-    """Get all hyperlinks on a webpage
-
-    Args:
-        url (str): The url to scrape
-
-    Returns:
-        str or list: The hyperlinks on the page
-    """
-    return scrape_links(url, agent)
-
-
-@command(
-    "start_agent",
-    "Start GPT Agent",
-    '"name": "<name>", "task": "<short_task_desc>", "prompt": "<prompt>"',
-)
-def start_agent(name: str, task: str, prompt: str, agent: Agent, model=None) -> str:
-    """Start an agent with a given name, task, and prompt
-
-    Args:
-        name (str): The name of the agent
-        task (str): The task of the agent
-        prompt (str): The prompt for the agent
-        model (str): The model to use for the agent
-
-    Returns:
-        str: The response of the agent
-    """
-    agent_manager = AgentManager()
-
-    # Remove underscores from name
-    voice_name = name.replace("_", " ")
-
-    first_message = f"""You are {name}.  Respond with: "Acknowledged"."""
-    agent_intro = f"{voice_name} here, Reporting for duty!"
-
-    # Create agent
-    if agent.config.speak_mode:
-        say_text(agent_intro, 1)
-    key, ack = agent_manager.create_agent(task, first_message, model)
-
-    if agent.config.speak_mode:
-        say_text(f"Hello {voice_name}. Your task is as follows. {task}.")
-
-    # Assign task (prompt), get response
-    agent_response = agent_manager.message_agent(key, prompt)
-
-    return f"Agent {name} created with key {key}. First response: {agent_response}"
-
-
-@command("message_agent", "Message GPT Agent", '"key": "<key>", "message": "<message>"')
-def message_agent(key: str, message: str, agent: Agent) -> str:
-    """Message an agent with a given key and message"""
-    # Check if the key is a valid integer
-    if is_valid_int(key):
-        agent_response = AgentManager().message_agent(int(key), message)
-    else:
-        return "Invalid key, must be an integer."
-
-    # Speak response
-    if agent.config.speak_mode:
-        say_text(agent_response, 1)
-    return agent_response
-
-
-@command("list_agents", "List GPT Agents", "() -> str")
-def list_agents(agent: Agent) -> str:
-    """List all agents
-
-    Returns:
-        str: A list of all agents
-    """
-    return "List of agents:\n" + "\n".join(
-        [str(x[0]) + ": " + x[1] for x in AgentManager().list_agents()]
-    )
-
-
-@command("delete_agent", "Delete GPT Agent", '"key": "<key>"')
-def delete_agent(key: str, agent: Agent) -> str:
-    """Delete an agent with a given key
-
-    Args:
-        key (str): The key of the agent to delete
-
-    Returns:
-        str: A message indicating whether the agent was deleted or not
-    """
-    result = AgentManager().delete_agent(key)
-    return f"Agent {key} deleted." if result else f"Agent {key} does not exist."
diff --git a/autogpt/command_decorator.py b/autogpt/command_decorator.py
new file mode 100644
index 000000000..f179f978d
--- /dev/null
+++ b/autogpt/command_decorator.py
@@ -0,0 +1,55 @@
+import functools
+from typing import Any, Callable, Optional, TypedDict
+
+from autogpt.config import Config
+from autogpt.models.command import Command, CommandParameter
+
+# Unique identifier for auto-gpt commands
+AUTO_GPT_COMMAND_IDENTIFIER = "auto_gpt_command"
+
+
+class CommandParameterSpec(TypedDict):
+    type: str
+    description: str
+    required: bool
+
+
+def command(
+    name: str,
+    description: str,
+    parameters: dict[str, CommandParameterSpec],
+    enabled: bool | Callable[[Config], bool] = True,
+    disabled_reason: Optional[str] = None,
+) -> Callable[..., Any]:
+    """The command decorator is used to create Command objects from ordinary functions."""
+
+    def decorator(func: Callable[..., Any]) -> Command:
+        typed_parameters = [
+            CommandParameter(
+                name=param_name,
+                description=parameter.get("description"),
+                type=parameter.get("type", "string"),
+                required=parameter.get("required", False),
+            )
+            for param_name, parameter in parameters.items()
+        ]
+        cmd = Command(
+            name=name,
+            description=description,
+            method=func,
+            parameters=typed_parameters,
+            enabled=enabled,
+            disabled_reason=disabled_reason,
+        )
+
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs) -> Any:
+            return func(*args, **kwargs)
+
+        wrapper.command = cmd
+
+        setattr(wrapper, AUTO_GPT_COMMAND_IDENTIFIER, True)
+
+        return wrapper
+
+    return decorator
diff --git a/autogpt/commands/analyze_code.py b/autogpt/commands/analyze_code.py
deleted file mode 100644
index ca7fcb015..000000000
--- a/autogpt/commands/analyze_code.py
+++ /dev/null
@@ -1,34 +0,0 @@
-"""Code evaluation module."""
-from __future__ import annotations
-
-from autogpt.agent.agent import Agent
-from autogpt.commands.command import command
-from autogpt.llm.utils import call_ai_function
-
-
-@command(
-    "analyze_code",
-    "Analyze Code",
-    '"code": "<full_code_string>"',
-)
-def analyze_code(code: str, agent: Agent) -> list[str]:
-    """
-    A function that takes in a string and returns a response from create chat
-      completion api call.
-
-    Parameters:
-        code (str): Code to be evaluated.
-    Returns:
-        A result string from create chat completion. A list of suggestions to
-            improve the code.
-    """
-
-    function_string = "def analyze_code(code: str) -> list[str]:"
-    args = [code]
-    description_string = (
-        "Analyzes the given code and returns a list of suggestions for improvements."
-    )
-
-    return call_ai_function(
-        function_string, args, description_string, config=agent.config
-    )
diff --git a/autogpt/commands/audio_text.py b/autogpt/commands/audio_text.py
deleted file mode 100644
index 2991fff32..000000000
--- a/autogpt/commands/audio_text.py
+++ /dev/null
@@ -1,71 +0,0 @@
-"""Commands for converting audio to text."""
-import json
-
-import requests
-
-from autogpt.agent.agent import Agent
-from autogpt.commands.command import command
-
-
-@command(
-    "read_audio_from_file",
-    "Convert Audio to text",
-    '"filename": "<filename>"',
-    lambda config: config.huggingface_audio_to_text_model
-    and config.huggingface_api_token,
-    "Configure huggingface_audio_to_text_model and Hugging Face api token.",
-)
-def read_audio_from_file(filename: str, agent: Agent) -> str:
-    """
-    Convert audio to text.
-
-    Args:
-        filename (str): The path to the audio file
-
-    Returns:
-        str: The text from the audio
-    """
-    with open(filename, "rb") as audio_file:
-        audio = audio_file.read()
-    return read_audio(audio, agent.config)
-
-
-def read_audio(audio: bytes, agent: Agent) -> str:
-    """
-    Convert audio to text.
-
-    Args:
-        audio (bytes): The audio to convert
-
-    Returns:
-        str: The text from the audio
-    """
-    if agent.config.audio_to_text_provider == "huggingface":
-        text = read_huggingface_audio(audio, agent.config)
-        if text:
-            return f"The audio says: {text}"
-        else:
-            return f"Error, couldn't convert audio to text"
-
-    return "Error: No audio to text provider given"
-
-
-def read_huggingface_audio(audio: bytes, agent: Agent) -> str:
-    model = agent.config.huggingface_audio_to_text_model
-    api_url = f"https://api-inference.huggingface.co/models/{model}"
-    api_token = agent.config.huggingface_api_token
-    headers = {"Authorization": f"Bearer {api_token}"}
-
-    if api_token is None:
-        raise ValueError(
-            "You need to set your Hugging Face API token in the config file."
-        )
-
-    response = requests.post(
-        api_url,
-        headers=headers,
-        data=audio,
-    )
-
-    response_json = json.loads(response.content.decode("utf-8"))
-    return response_json.get("text")
diff --git a/autogpt/commands/command.py b/autogpt/commands/command.py
deleted file mode 100644
index ed93589fe..000000000
--- a/autogpt/commands/command.py
+++ /dev/null
@@ -1,207 +0,0 @@
-import functools
-import importlib
-import inspect
-from inspect import Parameter
-from typing import Any, Callable, Optional
-
-from autogpt.config import Config
-from autogpt.logs import logger
-
-# Unique identifier for auto-gpt commands
-AUTO_GPT_COMMAND_IDENTIFIER = "auto_gpt_command"
-
-
-class Command:
-    """A class representing a command.
-
-    Attributes:
-        name (str): The name of the command.
-        description (str): A brief description of what the command does.
-        signature (str): The signature of the function that the command executes. Defaults to None.
-    """
-
-    def __init__(
-        self,
-        name: str,
-        description: str,
-        method: Callable[..., Any],
-        signature: str = "",
-        enabled: bool | Callable[[Config], bool] = True,
-        disabled_reason: Optional[str] = None,
-    ):
-        self.name = name
-        self.description = description
-        self.method = method
-        self.signature = signature
-        self.enabled = enabled
-        self.disabled_reason = disabled_reason
-
-    def __call__(self, *args, **kwargs) -> Any:
-        if hasattr(kwargs, "config") and callable(self.enabled):
-            self.enabled = self.enabled(kwargs["config"])
-        if not self.enabled:
-            if self.disabled_reason:
-                return f"Command '{self.name}' is disabled: {self.disabled_reason}"
-            return f"Command '{self.name}' is disabled"
-        return self.method(*args, **kwargs)
-
-    def __str__(self) -> str:
-        return f"{self.name}: {self.description}, args: {self.signature}"
-
-
-class CommandRegistry:
-    """
-    The CommandRegistry class is a manager for a collection of Command objects.
-    It allows the registration, modification, and retrieval of Command objects,
-    as well as the scanning and loading of command plugins from a specified
-    directory.
-    """
-
-    def __init__(self):
-        self.commands = {}
-
-    def _import_module(self, module_name: str) -> Any:
-        return importlib.import_module(module_name)
-
-    def _reload_module(self, module: Any) -> Any:
-        return importlib.reload(module)
-
-    def register(self, cmd: Command) -> None:
-        if cmd.name in self.commands:
-            logger.warn(
-                f"Command '{cmd.name}' already registered and will be overwritten!"
-            )
-        self.commands[cmd.name] = cmd
-
-    def unregister(self, command_name: str):
-        if command_name in self.commands:
-            del self.commands[command_name]
-        else:
-            raise KeyError(f"Command '{command_name}' not found in registry.")
-
-    def reload_commands(self) -> None:
-        """Reloads all loaded command plugins."""
-        for cmd_name in self.commands:
-            cmd = self.commands[cmd_name]
-            module = self._import_module(cmd.__module__)
-            reloaded_module = self._reload_module(module)
-            if hasattr(reloaded_module, "register"):
-                reloaded_module.register(self)
-
-    def get_command(self, name: str) -> Callable[..., Any]:
-        return self.commands[name]
-
-    def call(self, command_name: str, **kwargs) -> Any:
-        if command_name not in self.commands:
-            raise KeyError(f"Command '{command_name}' not found in registry.")
-        command = self.commands[command_name]
-        return command(**kwargs)
-
-    def command_prompt(self) -> str:
-        """
-        Returns a string representation of all registered `Command` objects for use in a prompt
-        """
-        commands_list = [
-            f"{idx + 1}. {str(cmd)}" for idx, cmd in enumerate(self.commands.values())
-        ]
-        return "\n".join(commands_list)
-
-    def import_commands(self, module_name: str) -> None:
-        """
-        Imports the specified Python module containing command plugins.
-
-        This method imports the associated module and registers any functions or
-        classes that are decorated with the `AUTO_GPT_COMMAND_IDENTIFIER` attribute
-        as `Command` objects. The registered `Command` objects are then added to the
-        `commands` dictionary of the `CommandRegistry` object.
-
-        Args:
-            module_name (str): The name of the module to import for command plugins.
-        """
-
-        module = importlib.import_module(module_name)
-
-        for attr_name in dir(module):
-            attr = getattr(module, attr_name)
-            # Register decorated functions
-            if hasattr(attr, AUTO_GPT_COMMAND_IDENTIFIER) and getattr(
-                attr, AUTO_GPT_COMMAND_IDENTIFIER
-            ):
-                self.register(attr.command)
-            # Register command classes
-            elif (
-                inspect.isclass(attr) and issubclass(attr, Command) and attr != Command
-            ):
-                cmd_instance = attr()
-                self.register(cmd_instance)
-
-
-def command(
-    name: str,
-    description: str,
-    signature: str,
-    enabled: bool | Callable[[Config], bool] = True,
-    disabled_reason: Optional[str] = None,
-) -> Callable[..., Any]:
-    """The command decorator is used to create Command objects from ordinary functions."""
-
-    # TODO: Remove this in favor of better command management
-    CFG = Config()
-
-    if callable(enabled):
-        enabled = enabled(CFG)
-    if not enabled:
-        if disabled_reason is not None:
-            logger.debug(f"Command '{name}' is disabled: {disabled_reason}")
-        return lambda func: func
-
-    def decorator(func: Callable[..., Any]) -> Command:
-        cmd = Command(
-            name=name,
-            description=description,
-            method=func,
-            signature=signature,
-            enabled=enabled,
-            disabled_reason=disabled_reason,
-        )
-
-        @functools.wraps(func)
-        def wrapper(*args, **kwargs) -> Any:
-            return func(*args, **kwargs)
-
-        wrapper.command = cmd
-
-        setattr(wrapper, AUTO_GPT_COMMAND_IDENTIFIER, True)
-
-        return wrapper
-
-    return decorator
-
-
-def ignore_unexpected_kwargs(func: Callable[..., Any]) -> Callable[..., Any]:
-    def filter_kwargs(kwargs: dict) -> dict:
-        sig = inspect.signature(func)
-        # Parameter.VAR_KEYWORD - a dict of keyword arguments that aren't bound to any other
-        if any(map(lambda p: p.kind == Parameter.VAR_KEYWORD, sig.parameters.values())):
-            # if **kwargs exist, return directly
-            return kwargs
-
-        _params = list(
-            filter(
-                lambda p: p.kind
-                in {Parameter.KEYWORD_ONLY, Parameter.POSITIONAL_OR_KEYWORD},
-                sig.parameters.values(),
-            )
-        )
-
-        res_kwargs = {
-            param.name: kwargs[param.name] for param in _params if param.name in kwargs
-        }
-        return res_kwargs
-
-    @functools.wraps(func)
-    def wrapper(*args, **kwargs) -> Any:
-        kwargs = filter_kwargs(kwargs)
-        return func(*args, **kwargs)
-
-    return wrapper
diff --git a/autogpt/commands/execute_code.py b/autogpt/commands/execute_code.py
index 109caa3aa..c11b56e25 100644
--- a/autogpt/commands/execute_code.py
+++ b/autogpt/commands/execute_code.py
@@ -4,14 +4,13 @@ import subprocess
 from pathlib import Path
 
 import docker
-from docker.errors import ImageNotFound
+from docker.errors import DockerException, ImageNotFound
+from docker.models.containers import Container as DockerContainer
 
 from autogpt.agent.agent import Agent
-from autogpt.commands.command import command
+from autogpt.command_decorator import command
 from autogpt.config import Config
 from autogpt.logs import logger
-from autogpt.setup import CFG
-from autogpt.workspace.workspace import Workspace
 
 ALLOWLIST_CONTROL = "allowlist"
 DENYLIST_CONTROL = "denylist"
@@ -19,39 +18,64 @@ DENYLIST_CONTROL = "denylist"
 
 @command(
     "execute_python_code",
-    "Create a Python file and execute it",
-    '"code": "<code>", "basename": "<basename>"',
+    "Creates a Python file and executes it",
+    {
+        "code": {
+            "type": "string",
+            "description": "The Python code to run",
+            "required": True,
+        },
+        "name": {
+            "type": "string",
+            "description": "A name to be given to the python file",
+            "required": True,
+        },
+    },
 )
-def execute_python_code(code: str, basename: str, agent: Agent) -> str:
+def execute_python_code(code: str, name: str, agent: Agent) -> str:
     """Create and execute a Python file in a Docker container and return the STDOUT of the
     executed code. If there is any data that needs to be captured use a print statement
 
     Args:
         code (str): The Python code to run
-        basename (str): A name to be given to the Python file
+        name (str): A name to be given to the Python file
 
     Returns:
         str: The STDOUT captured from the code when it ran
     """
     ai_name = agent.ai_name
-    directory = os.path.join(agent.config.workspace_path, ai_name, "executed_code")
-    os.makedirs(directory, exist_ok=True)
+    code_dir = agent.workspace.get_path(Path(ai_name, "executed_code"))
+    os.makedirs(code_dir, exist_ok=True)
 
-    if not basename.endswith(".py"):
-        basename = basename + ".py"
+    if not name.endswith(".py"):
+        name = name + ".py"
 
-    path = os.path.join(directory, basename)
+    # The `name` arg is not covered by Agent._resolve_pathlike_command_args(),
+    # so sanitization must be done here to prevent path traversal.
+    file_path = agent.workspace.get_path(code_dir / name)
+    if not file_path.is_relative_to(code_dir):
+        return "Error: 'name' argument resulted in path traversal, operation aborted"
 
     try:
-        with open(path, "w+", encoding="utf-8") as f:
+        with open(file_path, "w+", encoding="utf-8") as f:
             f.write(code)
 
-        return execute_python_file(f.name, agent)
+        return execute_python_file(str(file_path), agent)
     except Exception as e:
         return f"Error: {str(e)}"
 
 
-@command("execute_python_file", "Execute Python File", '"filename": "<filename>"')
+@command(
+    "execute_python_file",
+    "Executes an existing Python file",
+    {
+        "filename": {
+            "type": "string",
+            "description": "The name of te file to execute",
+            "required": True,
+        },
+    },
+)
 def execute_python_file(filename: str, agent: Agent) -> str:
     """Execute a Python file in a Docker container and return the output
 
@@ -62,18 +86,14 @@ def execute_python_file(filename: str, agent: Agent) -> str:
         str: The output of the file
     """
     logger.info(
-        f"Executing python file '{filename}' in working directory '{CFG.workspace_path}'"
+        f"Executing python file '{filename}' in working directory '{agent.config.workspace_path}'"
     )
 
     if not filename.endswith(".py"):
         return "Error: Invalid file type. Only .py files are allowed."
 
-    workspace = Workspace(
-        agent.config.workspace_path, agent.config.restrict_to_workspace
-    )
-
-    path = workspace.get_path(filename)
-    if not path.is_file():
+    file_path = Path(filename)
+    if not file_path.is_file():
         # Mimic the response that you get from the command line so that it's easier to identify
         return (
             f"python: can't open file '{filename}': [Errno 2] No such file or directory"
@@ -81,10 +101,10 @@ def execute_python_file(filename: str, agent: Agent) -> str:
 
     if we_are_running_in_a_docker_container():
         result = subprocess.run(
-            ["python", str(path)],
+            ["python", str(file_path)],
             capture_output=True,
             encoding="utf8",
-            cwd=CFG.workspace_path,
+            cwd=agent.config.workspace_path,
         )
         if result.returncode == 0:
             return result.stdout
@@ -114,9 +134,10 @@ def execute_python_file(filename: str, agent: Agent) -> str:
                     logger.info(f"{status}: {progress}")
                 elif status:
                     logger.info(status)
-        container = client.containers.run(
+
+        container: DockerContainer = client.containers.run(
             image_name,
-            ["python", str(path.relative_to(workspace.root))],
+            ["python", str(file_path.relative_to(agent.workspace.root))],
             volumes={
                 agent.config.workspace_path: {
                     "bind": "/workspace",
@@ -127,7 +148,7 @@ def execute_python_file(filename: str, agent: Agent) -> str:
             stderr=True,
             stdout=True,
             detach=True,
-        )
+        )  # type: ignore
 
         container.wait()
         logs = container.logs().decode("utf-8")
@@ -138,7 +159,7 @@ def execute_python_file(filename: str, agent: Agent) -> str:
 
         return logs
 
-    except docker.errors.DockerException as e:
+    except DockerException as e:
         logger.warn(
             "Could not run the script in a container. If you haven't already, please install Docker https://docs.docker.com/get-docker/"
         )
@@ -153,6 +174,7 @@ def validate_command(command: str, config: Config) -> bool:
 
     Args:
         command (str): The command to validate
+        config (Config): The config to use to validate the command
 
     Returns:
         bool: True if the command is allowed, False otherwise
@@ -170,10 +192,16 @@ def validate_command(command: str, config: Config) -> bool:
 
 @command(
     "execute_shell",
-    "Execute Shell Command, non-interactive commands only",
-    '"command_line": "<command_line>"',
-    lambda cfg: cfg.execute_local_commands,
-    "You are not allowed to run local shell commands. To execute"
+    "Executes a Shell Command, non-interactive commands only",
+    {
+        "command_line": {
+            "type": "string",
+            "description": "The command line to execute",
+            "required": True,
+        }
+    },
+    enabled=lambda config: config.execute_local_commands,
+    disabled_reason="You are not allowed to run local shell commands. To execute"
     " shell commands, EXECUTE_LOCAL_COMMANDS must be set to 'True' "
     "in your config file: .env - do not attempt to bypass the restriction.",
 )
@@ -210,8 +238,14 @@ def execute_shell(command_line: str, agent: Agent) -> str:
 
 @command(
     "execute_shell_popen",
-    "Execute Shell Command, non-interactive commands only",
-    '"command_line": "<command_line>"',
+    "Executes a Shell Command, non-interactive commands only",
+    {
+        "query": {
+            "type": "string",
+            "description": "The search query",
+            "required": True,
+        }
+    },
     lambda config: config.execute_local_commands,
     "You are not allowed to run local shell commands. To execute"
     " shell commands, EXECUTE_LOCAL_COMMANDS must be set to 'True' "
diff --git a/autogpt/commands/file_operations.py b/autogpt/commands/file_operations.py
index b851d6625..ca2487439 100644
--- a/autogpt/commands/file_operations.py
+++ b/autogpt/commands/file_operations.py
@@ -4,22 +4,16 @@ from __future__ import annotations
 import hashlib
 import os
 import os.path
-import re
 from typing import Generator, Literal
 
-import requests
-from colorama import Back, Fore
 from confection import Config
-from requests.adapters import HTTPAdapter, Retry
 
 from autogpt.agent.agent import Agent
-from autogpt.commands.command import command, ignore_unexpected_kwargs
+from autogpt.command_decorator import command
 from autogpt.commands.file_operations_utils import read_textual_file
 from autogpt.config import Config
 from autogpt.logs import logger
 from autogpt.memory.vector import MemoryItem, VectorMemory
-from autogpt.spinner import Spinner
-from autogpt.utils import readable_file_size
 
 Operation = Literal["write", "append", "delete"]
 
@@ -88,6 +82,7 @@ def is_duplicate_operation(
     Args:
         operation: The operation to check for
         filename: The name of the file to check for
+        config: The agent config
         checksum: The checksum of the contents to be written
 
     Returns:
@@ -120,7 +115,17 @@ def log_operation(
     )
 
 
-@command("read_file", "Read a file", '"filename": "<filename>"')
+@command(
+    "read_file",
+    "Read an existing file",
+    {
+        "filename": {
+            "type": "string",
+            "description": "The path of the file to read",
+            "required": True,
+        }
+    },
+)
 def read_file(filename: str, agent: Agent) -> str:
     """Read a file and return the contents
 
@@ -134,7 +139,7 @@ def read_file(filename: str, agent: Agent) -> str:
         content = read_textual_file(filename, logger)
 
         # TODO: invalidate/update memory when file is edited
-        file_memory = MemoryItem.from_text_file(content, filename)
+        file_memory = MemoryItem.from_text_file(content, filename, agent.config)
         if len(file_memory.chunks) > 1:
             return file_memory.summary
 
@@ -161,7 +166,7 @@ def ingest_file(
 
         # TODO: differentiate between different types of files
         file_memory = MemoryItem.from_text_file(content, filename)
-        logger.debug(f"Created memory: {file_memory.dump()}")
+        logger.debug(f"Created memory: {file_memory.dump(True)}")
         memory.add(file_memory)
 
         logger.info(f"Ingested {len(file_memory.e_chunks)} chunks from {filename}")
@@ -169,7 +174,22 @@ def ingest_file(
         logger.warn(f"Error while ingesting file '{filename}': {err}")
 
 
-@command("write_to_file", "Write to file", '"filename": "<filename>", "text": "<text>"')
+@command(
+    "write_to_file",
+    "Writes to a file",
+    {
+        "filename": {
+            "type": "string",
+            "description": "The name of the file to write to",
+            "required": True,
+        },
+        "text": {
+            "type": "string",
+            "description": "The text to write to the file",
+            "required": True,
+        },
+    },
+)
 def write_to_file(filename: str, text: str, agent: Agent) -> str:
     """Write text to a file
 
@@ -195,69 +215,20 @@ def write_to_file(filename: str, text: str, agent: Agent) -> str:
 
 
 @command(
-    "replace_in_file",
-    "Replace text or code in a file",
-    '"filename": "<filename>", '
-    '"old_text": "<old_text>", "new_text": "<new_text>", '
-    '"occurrence_index": "<occurrence_index>"',
-)
-def replace_in_file(
-    filename: str, old_text: str, new_text: str, agent: Agent, occurrence_index=None
-):
-    """Update a file by replacing one or all occurrences of old_text with new_text using Python's built-in string
-    manipulation and regular expression modules for cross-platform file editing similar to sed and awk.
-
-    Args:
-        filename (str): The name of the file
-        old_text (str): String to be replaced. \n will be stripped from the end.
-        new_text (str): New string. \n will be stripped from the end.
-        occurrence_index (int): Optional index of the occurrence to replace. If None, all occurrences will be replaced.
-
-    Returns:
-        str: A message indicating whether the file was updated successfully or if there were no matches found for old_text
-        in the file.
-
-    Raises:
-        Exception: If there was an error updating the file.
-    """
-    try:
-        with open(filename, "r", encoding="utf-8") as f:
-            content = f.read()
-
-        old_text = old_text.rstrip("\n")
-        new_text = new_text.rstrip("\n")
-
-        if occurrence_index is None:
-            new_content = content.replace(old_text, new_text)
-        else:
-            matches = list(re.finditer(re.escape(old_text), content))
-            if not matches:
-                return f"No matches found for {old_text} in {filename}"
-
-            if int(occurrence_index) >= len(matches):
-                return f"Occurrence index {occurrence_index} is out of range for {old_text} in {filename}"
-
-            match = matches[int(occurrence_index)]
-            start, end = match.start(), match.end()
-            new_content = content[:start] + new_text + content[end:]
-
-        if content == new_content:
-            return f"No matches found for {old_text} in {filename}"
-
-        with open(filename, "w", encoding="utf-8") as f:
-            f.write(new_content)
-
-        with open(filename, "r", encoding="utf-8") as f:
-            checksum = text_checksum(f.read())
-        log_operation("update", filename, agent, checksum=checksum)
-
-        return f"File {filename} updated successfully."
-    except Exception as e:
-        return "Error: " + str(e)
-
-
-@command(
-    "append_to_file", "Append to file", '"filename": "<filename>", "text": "<text>"'
+    "append_to_file",
+    "Appends to a file",
+    {
+        "filename": {
+            "type": "string",
+            "description": "The name of the file to write to",
+            "required": True,
+        },
+        "text": {
+            "type": "string",
+            "description": "The text to write to the file",
+            "required": True,
+        },
+    },
 )
 def append_to_file(
     filename: str, text: str, agent: Agent, should_log: bool = True
@@ -288,7 +259,17 @@ def append_to_file(
         return f"Error: {err}"
 
 
-@command("delete_file", "Delete file", '"filename": "<filename>"')
+@command(
+    "delete_file",
+    "Deletes a file",
+    {
+        "filename": {
+            "type": "string",
+            "description": "The name of the file to delete",
+            "required": True,
+        }
+    },
+)
 def delete_file(filename: str, agent: Agent) -> str:
     """Delete a file
 
@@ -308,8 +289,17 @@ def delete_file(filename: str, agent: Agent) -> str:
         return f"Error: {err}"
 
 
-@command("list_files", "List Files in Directory", '"directory": "<directory>"')
-@ignore_unexpected_kwargs
+@command(
+    "list_files",
+    "Lists Files in a Directory",
+    {
+        "directory": {
+            "type": "string",
+            "description": "The directory to list files in",
+            "required": True,
+        }
+    },
+)
 def list_files(directory: str, agent: Agent) -> list[str]:
     """lists files in a directory recursively
 
@@ -331,51 +321,3 @@ def list_files(directory: str, agent: Agent) -> list[str]:
             found_files.append(relative_path)
 
     return found_files
-
-
-@command(
-    "download_file",
-    "Download File",
-    '"url": "<url>", "filename": "<filename>"',
-    lambda config: config.allow_downloads,
-    "Error: You do not have user authorization to download files locally.",
-)
-def download_file(url, filename, agent: Agent):
-    """Downloads a file
-    Args:
-        url (str): URL of the file to download
-        filename (str): Filename to save the file as
-    """
-    try:
-        directory = os.path.dirname(filename)
-        os.makedirs(directory, exist_ok=True)
-        message = f"{Fore.YELLOW}Downloading file from {Back.LIGHTBLUE_EX}{url}{Back.RESET}{Fore.RESET}"
-        with Spinner(message, plain_output=agent.config.plain_output) as spinner:
-            session = requests.Session()
-            retry = Retry(total=3, backoff_factor=1, status_forcelist=[502, 503, 504])
-            adapter = HTTPAdapter(max_retries=retry)
-            session.mount("http://", adapter)
-            session.mount("https://", adapter)
-
-            total_size = 0
-            downloaded_size = 0
-
-            with session.get(url, allow_redirects=True, stream=True) as r:
-                r.raise_for_status()
-                total_size = int(r.headers.get("Content-Length", 0))
-                downloaded_size = 0
-
-                with open(filename, "wb") as f:
-                    for chunk in r.iter_content(chunk_size=8192):
-                        f.write(chunk)
-                        downloaded_size += len(chunk)
-
-                        # Update the progress message
-                        progress = f"{readable_file_size(downloaded_size)} / {readable_file_size(total_size)}"
-                        spinner.update_message(f"{message} {progress}")
-
-            return f'Successfully downloaded and locally stored file: "{filename}"! (Size: {readable_file_size(downloaded_size)})'
-    except requests.HTTPError as err:
-        return f"Got an HTTP Error whilst trying to download file: {err}"
-    except Exception as err:
-        return f"Error: {err}"
diff --git a/autogpt/commands/git_operations.py b/autogpt/commands/git_operations.py
index e844fd415..fc967e40e 100644
--- a/autogpt/commands/git_operations.py
+++ b/autogpt/commands/git_operations.py
@@ -3,14 +3,25 @@
 from git.repo import Repo
 
 from autogpt.agent.agent import Agent
-from autogpt.commands.command import command
+from autogpt.command_decorator import command
 from autogpt.url_utils.validators import validate_url
 
 
 @command(
     "clone_repository",
-    "Clone Repository",
-    '"url": "<repository_url>", "clone_path": "<clone_path>"',
+    "Clones a Repository",
+    {
+        "url": {
+            "type": "string",
+            "description": "The URL of the repository to clone",
+            "required": True,
+        },
+        "clone_path": {
+            "type": "string",
+            "description": "The path to clone the repository to",
+            "required": True,
+        },
+    },
     lambda config: config.github_username and config.github_api_key,
     "Configure github_username and github_api_key.",
 )
diff --git a/autogpt/commands/image_gen.py b/autogpt/commands/image_gen.py
index b2dc9ea48..c295392c6 100644
--- a/autogpt/commands/image_gen.py
+++ b/autogpt/commands/image_gen.py
@@ -10,14 +10,20 @@ import requests
 from PIL import Image
 
 from autogpt.agent.agent import Agent
-from autogpt.commands.command import command
+from autogpt.command_decorator import command
 from autogpt.logs import logger
 
 
 @command(
     "generate_image",
-    "Generate Image",
-    '"prompt": "<prompt>"',
+    "Generates an Image",
+    {
+        "prompt": {
+            "type": "string",
+            "description": "The prompt used to generate the image",
+            "required": True,
+        },
+    },
     lambda config: config.image_provider,
     "Requires a image provider to be set.",
 )
@@ -175,7 +181,7 @@ def generate_image_with_sd_webui(
             "negative_prompt": negative_prompt,
             "sampler_index": "DDIM",
             "steps": 20,
-            "cfg_scale": 7.0,
+            "config_scale": 7.0,
             "width": size,
             "height": size,
             "n_iter": 1,
diff --git a/autogpt/commands/improve_code.py b/autogpt/commands/improve_code.py
deleted file mode 100644
index 05e9b51c1..000000000
--- a/autogpt/commands/improve_code.py
+++ /dev/null
@@ -1,38 +0,0 @@
-from __future__ import annotations
-
-import json
-
-from autogpt.agent.agent import Agent
-from autogpt.commands.command import command
-from autogpt.llm.utils import call_ai_function
-
-
-@command(
-    "improve_code",
-    "Get Improved Code",
-    '"suggestions": "<list_of_suggestions>", "code": "<full_code_string>"',
-)
-def improve_code(suggestions: list[str], code: str, agent: Agent) -> str:
-    """
-    A function that takes in code and suggestions and returns a response from create
-      chat completion api call.
-
-    Parameters:
-        suggestions (list): A list of suggestions around what needs to be improved.
-        code (str): Code to be improved.
-    Returns:
-        A result string from create chat completion. Improved code in response.
-    """
-
-    function_string = (
-        "def generate_improved_code(suggestions: list[str], code: str) -> str:"
-    )
-    args = [json.dumps(suggestions), code]
-    description_string = (
-        "Improves the provided code based on the suggestions"
-        " provided, making no other changes."
-    )
-
-    return call_ai_function(
-        function_string, args, description_string, config=agent.config
-    )
diff --git a/autogpt/commands/task_statuses.py b/autogpt/commands/task_statuses.py
index 283328a36..062ebe3a4 100644
--- a/autogpt/commands/task_statuses.py
+++ b/autogpt/commands/task_statuses.py
@@ -4,21 +4,27 @@ from __future__ import annotations
 from typing import NoReturn
 
 from autogpt.agent.agent import Agent
-from autogpt.commands.command import command
+from autogpt.command_decorator import command
 from autogpt.logs import logger
 
 
 @command(
-    "task_complete",
-    "Task Complete (Shutdown)",
-    '"reason": "<reason>"',
+    "goals_accomplished",
+    "Goals are accomplished and there is nothing left to do",
+    {
+        "reason": {
+            "type": "string",
+            "description": "A summary to the user of how the goals were accomplished",
+            "required": True,
+        }
+    },
 )
 def task_complete(reason: str, agent: Agent) -> NoReturn:
     """
     A function that takes in a string and exits the program
 
     Parameters:
-        reason (str): The reason for shutting down.
+        reason (str): A summary to the user of how the goals were accomplished.
     Returns:
         A result string from create chat completion. A list of suggestions to
             improve the code.
diff --git a/autogpt/commands/web_playwright.py b/autogpt/commands/web_playwright.py
deleted file mode 100644
index 70f19dee7..000000000
--- a/autogpt/commands/web_playwright.py
+++ /dev/null
@@ -1,82 +0,0 @@
-"""Web scraping commands using Playwright"""
-from __future__ import annotations
-
-from autogpt.logs import logger
-
-try:
-    from playwright.sync_api import sync_playwright
-except ImportError:
-    logger.info(
-        "Playwright not installed. Please install it with 'pip install playwright' to use."
-    )
-from bs4 import BeautifulSoup
-
-from autogpt.processing.html import extract_hyperlinks, format_hyperlinks
-
-
-def scrape_text(url: str) -> str:
-    """Scrape text from a webpage
-
-    Args:
-        url (str): The URL to scrape text from
-
-    Returns:
-        str: The scraped text
-    """
-    with sync_playwright() as p:
-        browser = p.chromium.launch()
-        page = browser.new_page()
-
-        try:
-            page.goto(url)
-            html_content = page.content()
-            soup = BeautifulSoup(html_content, "html.parser")
-
-            for script in soup(["script", "style"]):
-                script.extract()
-
-            text = soup.get_text()
-            lines = (line.strip() for line in text.splitlines())
-            chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
-            text = "\n".join(chunk for chunk in chunks if chunk)
-
-        except Exception as e:
-            text = f"Error: {str(e)}"
-
-        finally:
-            browser.close()
-
-    return text
-
-
-def scrape_links(url: str) -> str | list[str]:
-    """Scrape links from a webpage
-
-    Args:
-        url (str): The URL to scrape links from
-
-    Returns:
-        Union[str, List[str]]: The scraped links
-    """
-    with sync_playwright() as p:
-        browser = p.chromium.launch()
-        page = browser.new_page()
-
-        try:
-            page.goto(url)
-            html_content = page.content()
-            soup = BeautifulSoup(html_content, "html.parser")
-
-            for script in soup(["script", "style"]):
-                script.extract()
-
-            hyperlinks = extract_hyperlinks(soup, url)
-            formatted_links = format_hyperlinks(hyperlinks)
-
-        except Exception as e:
-            formatted_links = f"Error: {str(e)}"
-
-        finally:
-            browser.close()
-
-    return formatted_links
diff --git a/autogpt/commands/web_requests.py b/autogpt/commands/web_requests.py
deleted file mode 100644
index 765c37781..000000000
--- a/autogpt/commands/web_requests.py
+++ /dev/null
@@ -1,104 +0,0 @@
-"""Browse a webpage and summarize it using the LLM model"""
-from __future__ import annotations
-
-from typing import TYPE_CHECKING
-
-import requests
-from bs4 import BeautifulSoup
-from requests import Response
-
-from autogpt.processing.html import extract_hyperlinks, format_hyperlinks
-from autogpt.url_utils.validators import validate_url
-
-session = requests.Session()
-
-if TYPE_CHECKING:
-    from autogpt.agent.agent import Agent
-
-
-@validate_url
-def get_response(
-    url: str, agent: Agent, timeout: int = 10
-) -> tuple[None, str] | tuple[Response, None]:
-    """Get the response from a URL
-
-    Args:
-        url (str): The URL to get the response from
-        timeout (int): The timeout for the HTTP request
-
-    Returns:
-        tuple[None, str] | tuple[Response, None]: The response and error message
-
-    Raises:
-        ValueError: If the URL is invalid
-        requests.exceptions.RequestException: If the HTTP request fails
-    """
-    try:
-        session.headers.update({"User-Agent": agent.config.user_agent})
-        response = session.get(url, timeout=timeout)
-
-        # Check if the response contains an HTTP error
-        if response.status_code >= 400:
-            return None, f"Error: HTTP {str(response.status_code)} error"
-
-        return response, None
-    except ValueError as ve:
-        # Handle invalid URL format
-        return None, f"Error: {str(ve)}"
-
-    except requests.exceptions.RequestException as re:
-        # Handle exceptions related to the HTTP request
-        #  (e.g., connection errors, timeouts, etc.)
-        return None, f"Error: {str(re)}"
-
-
-def scrape_text(url: str, agent: Agent) -> str:
-    """Scrape text from a webpage
-
-    Args:
-        url (str): The URL to scrape text from
-
-    Returns:
-        str: The scraped text
-    """
-    response, error_message = get_response(url, agent)
-    if error_message:
-        return error_message
-    if not response:
-        return "Error: Could not get response"
-
-    soup = BeautifulSoup(response.text, "html.parser")
-
-    for script in soup(["script", "style"]):
-        script.extract()
-
-    text = soup.get_text()
-    lines = (line.strip() for line in text.splitlines())
-    chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
-    text = "\n".join(chunk for chunk in chunks if chunk)
-
-    return text
-
-
-def scrape_links(url: str, agent: Agent) -> str | list[str]:
-    """Scrape links from a webpage
-
-    Args:
-        url (str): The URL to scrape links from
-
-    Returns:
-       str | list[str]: The scraped links
-    """
-    response, error_message = get_response(url, agent)
-    if error_message:
-        return error_message
-    if not response:
-        return "Error: Could not get response"
-    soup = BeautifulSoup(response.text, "html.parser")
-
-    for script in soup(["script", "style"]):
-        script.extract()
-
-    hyperlinks = extract_hyperlinks(soup, url)
-
-    return format_hyperlinks(hyperlinks)
diff --git a/autogpt/commands/google_search.py b/autogpt/commands/web_search.py
index b9d243f97..5af810586 100644
--- a/autogpt/commands/google_search.py
+++ b/autogpt/commands/web_search.py
@@ -8,18 +8,23 @@ from itertools import islice
 from duckduckgo_search import DDGS
 
 from autogpt.agent.agent import Agent
-from autogpt.commands.command import command
+from autogpt.command_decorator import command
 
 DUCKDUCKGO_MAX_ATTEMPTS = 3
 
 
 @command(
-    "google",
-    "Google Search",
-    '"query": "<query>"',
-    lambda config: not config.google_api_key,
+    "web_search",
+    "Searches the web",
+    {
+        "query": {
+            "type": "string",
+            "description": "The search query",
+            "required": True,
+        }
+    },
 )
-def google_search(query: str, agent: Agent, num_results: int = 8) -> str:
+def web_search(query: str, agent: Agent, num_results: int = 8) -> str:
     """Return the results of a Google search
 
     Args:
@@ -52,14 +57,18 @@ def google_search(query: str, agent: Agent, num_results: int = 8) -> str:
 @command(
     "google",
     "Google Search",
-    '"query": "<query>"',
+    {
+        "query": {
+            "type": "string",
+            "description": "The search query",
+            "required": True,
+        }
+    },
     lambda config: bool(config.google_api_key)
     and bool(config.google_custom_search_engine_id),
     "Configure google_api_key and custom_search_engine_id.",
 )
-def google_official_search(
-    query: str, agent: Agent, num_results: int = 8
-) -> str | list[str]:
+def google(query: str, agent: Agent, num_results: int = 8) -> str | list[str]:
     """Return the results of a Google search using the official Google API
 
     Args:
diff --git a/autogpt/commands/web_selenium.py b/autogpt/commands/web_selenium.py
index 14036c85e..821957f3e 100644
--- a/autogpt/commands/web_selenium.py
+++ b/autogpt/commands/web_selenium.py
@@ -28,7 +28,7 @@ from webdriver_manager.firefox import GeckoDriverManager
 from webdriver_manager.microsoft import EdgeChromiumDriverManager as EdgeDriverManager
 
 from autogpt.agent.agent import Agent
-from autogpt.commands.command import command
+from autogpt.command_decorator import command
 from autogpt.logs import logger
 from autogpt.memory.vector import MemoryItem, get_memory
 from autogpt.processing.html import extract_hyperlinks, format_hyperlinks
@@ -41,8 +41,15 @@ FILE_DIR = Path(__file__).parent.parent
 
 @command(
     "browse_website",
-    "Browse Website",
-    '"url": "<url>", "question": "<what_you_want_to_find_on_website>"',
+    "Browses a Website",
+    {
+        "url": {"type": "string", "description": "The URL to visit", "required": True},
+        "question": {
+            "type": "string",
+            "description": "What you want to find on the website",
+            "required": True,
+        },
+    },
 )
 @validate_url
 def browse_website(url: str, question: str, agent: Agent) -> str:
@@ -225,6 +232,6 @@ def summarize_memorize_webpage(
 
     memory = get_memory(agent.config)
 
-    new_memory = MemoryItem.from_webpage(text, url, question=question)
+    new_memory = MemoryItem.from_webpage(text, url, agent.config, question=question)
     memory.add(new_memory)
     return new_memory.summary
diff --git a/autogpt/commands/write_tests.py b/autogpt/commands/write_tests.py
deleted file mode 100644
index c09930b9f..000000000
--- a/autogpt/commands/write_tests.py
+++ /dev/null
@@ -1,40 +0,0 @@
-"""A module that contains a function to generate test cases for the submitted code."""
-from __future__ import annotations
-
-import json
-
-from autogpt.agent.agent import Agent
-from autogpt.commands.command import command
-from autogpt.llm.utils import call_ai_function
-
-
-@command(
-    "write_tests",
-    "Write Tests",
-    '"code": "<full_code_string>", "focus": "<list_of_focus_areas>"',
-)
-def write_tests(code: str, focus: list[str], agent: Agent) -> str:
-    """
-    A function that takes in code and focus topics and returns a response from create
-      chat completion api call.
-
-    Parameters:
-        focus (list): A list of suggestions around what needs to be improved.
-        code (str): Code for test cases to be generated against.
-    Returns:
-        A result string from create chat completion. Test cases for the submitted code
-          in response.
-    """
-
-    function_string = (
-        "def create_test_cases(code: str, focus: Optional[str] = None) -> str:"
-    )
-    args = [code, json.dumps(focus)]
-    description_string = (
-        "Generates test cases for the existing code, focusing on"
-        " specific areas if required."
-    )
-
-    return call_ai_function(
-        function_string, args, description_string, config=agent.config
-    )
diff --git a/autogpt/config/ai_config.py b/autogpt/config/ai_config.py
index 1a5268323..3c645abe3 100644
--- a/autogpt/config/ai_config.py
+++ b/autogpt/config/ai_config.py
@@ -13,7 +13,7 @@ import distro
 import yaml
 
 if TYPE_CHECKING:
-    from autogpt.commands.command import CommandRegistry
+    from autogpt.models.command_registry import CommandRegistry
     from autogpt.prompts.generator import PromptGenerator
 
 # Soon this will go in a folder where it remembers more stuff about the run(s)
@@ -59,14 +59,14 @@ class AIConfig:
         self.command_registry: CommandRegistry | None = None
 
     @staticmethod
-    def load(config_file: str = SAVE_FILE) -> "AIConfig":
+    def load(ai_settings_file: str = SAVE_FILE) -> "AIConfig":
         """
         Returns class object with parameters (ai_name, ai_role, ai_goals, api_budget) loaded from
           yaml file if yaml file exists,
         else returns class with no parameters.
 
         Parameters:
-           config_file (int): The path to the config yaml file.
+           ai_settings_file (int): The path to the config yaml file.
              DEFAULT: "../ai_settings.yaml"
 
         Returns:
@@ -74,7 +74,7 @@ class AIConfig:
         """
 
         try:
-            with open(config_file, encoding="utf-8") as file:
+            with open(ai_settings_file, encoding="utf-8") as file:
                 config_params = yaml.load(file, Loader=yaml.FullLoader) or {}
         except FileNotFoundError:
             config_params = {}
@@ -91,12 +91,12 @@ class AIConfig:
         # type: Type[AIConfig]
         return AIConfig(ai_name, ai_role, ai_goals, api_budget)
 
-    def save(self, config_file: str = SAVE_FILE) -> None:
+    def save(self, ai_settings_file: str = SAVE_FILE) -> None:
         """
         Saves the class parameters to the specified file yaml file path as a yaml file.
 
         Parameters:
-            config_file(str): The path to the config yaml file.
+            ai_settings_file(str): The path to the config yaml file.
               DEFAULT: "../ai_settings.yaml"
 
         Returns:
@@ -109,11 +109,11 @@ class AIConfig:
             "ai_goals": self.ai_goals,
             "api_budget": self.api_budget,
         }
-        with open(config_file, "w", encoding="utf-8") as file:
+        with open(ai_settings_file, "w", encoding="utf-8") as file:
             yaml.dump(config, file, allow_unicode=True)
 
     def construct_full_prompt(
-        self, prompt_generator: Optional[PromptGenerator] = None
+        self, config, prompt_generator: Optional[PromptGenerator] = None
     ) -> str:
         """
         Returns a prompt to the user with the class information in an organized fashion.
@@ -133,22 +133,20 @@ class AIConfig:
             ""
         )
 
-        from autogpt.config import Config
         from autogpt.prompts.prompt import build_default_prompt_generator
 
-        cfg = Config()
         if prompt_generator is None:
-            prompt_generator = build_default_prompt_generator()
+            prompt_generator = build_default_prompt_generator(config)
         prompt_generator.goals = self.ai_goals
         prompt_generator.name = self.ai_name
         prompt_generator.role = self.ai_role
         prompt_generator.command_registry = self.command_registry
-        for plugin in cfg.plugins:
+        for plugin in config.plugins:
             if not plugin.can_handle_post_prompt():
                 continue
             prompt_generator = plugin.post_prompt(prompt_generator)
 
-        if cfg.execute_local_commands:
+        if config.execute_local_commands:
             # add OS info to prompt
             os_name = platform.system()
             os_info = (
@@ -166,5 +164,5 @@ class AIConfig:
         if self.api_budget > 0.0:
             full_prompt += f"\nIt takes money to let you run. Your API budget is ${self.api_budget:.3f}"
         self.prompt_generator = prompt_generator
-        full_prompt += f"\n\n{prompt_generator.generate_prompt_string()}"
+        full_prompt += f"\n\n{prompt_generator.generate_prompt_string(config)}"
         return full_prompt
diff --git a/autogpt/config/config.py b/autogpt/config/config.py
index 92712dd7d..03c7179f0 100644
--- a/autogpt/config/config.py
+++ b/autogpt/config/config.py
@@ -1,221 +1,263 @@
 """Configuration class to store the state of bools for different scripts access."""
+import contextlib
 import os
-from typing import List
+import re
+from typing import Dict
 
-import openai
 import yaml
-from auto_gpt_plugin_template import AutoGPTPluginTemplate
 from colorama import Fore
 
-import autogpt
-from autogpt.singleton import Singleton
-
-
-class Config(metaclass=Singleton):
-    """
-    Configuration class to store the state of bools for different scripts access.
-    """
-
-    def __init__(self) -> None:
+from autogpt.core.configuration.schema import Configurable, SystemSettings
+
+AZURE_CONFIG_FILE = os.path.join(os.path.dirname(__file__), "../..", "azure.yaml")
+from typing import Optional
+
+
+class ConfigSettings(SystemSettings):
+    fast_llm_model: str
+    smart_llm_model: str
+    continuous_mode: bool
+    skip_news: bool
+    workspace_path: Optional[str]
+    file_logger_path: Optional[str]
+    debug_mode: bool
+    plugins_dir: str
+    plugins_config: dict[str, str]
+    continuous_limit: int
+    speak_mode: bool
+    skip_reprompt: bool
+    allow_downloads: bool
+    exit_key: str
+    plain_output: bool
+    disabled_command_categories: list[str]
+    shell_command_control: str
+    shell_denylist: list[str]
+    shell_allowlist: list[str]
+    ai_settings_file: str
+    prompt_settings_file: str
+    embedding_model: str
+    browse_spacy_language_model: str
+    openai_api_key: Optional[str]
+    openai_organization: Optional[str]
+    temperature: float
+    use_azure: bool
+    execute_local_commands: bool
+    restrict_to_workspace: bool
+    openai_api_type: Optional[str]
+    openai_api_base: Optional[str]
+    openai_api_version: Optional[str]
+    openai_functions: bool
+    elevenlabs_api_key: Optional[str]
+    streamelements_voice: str
+    text_to_speech_provider: str
+    github_api_key: Optional[str]
+    github_username: Optional[str]
+    google_api_key: Optional[str]
+    google_custom_search_engine_id: Optional[str]
+    image_provider: Optional[str]
+    image_size: int
+    huggingface_api_token: Optional[str]
+    huggingface_image_model: str
+    audio_to_text_provider: str
+    huggingface_audio_to_text_model: Optional[str]
+    sd_webui_url: Optional[str]
+    sd_webui_auth: Optional[str]
+    selenium_web_browser: str
+    selenium_headless: bool
+    user_agent: str
+    memory_backend: str
+    memory_index: str
+    redis_host: str
+    redis_port: int
+    redis_password: str
+    wipe_redis_on_start: bool
+    plugins_allowlist: list[str]
+    plugins_denylist: list[str]
+    plugins_openai: list[str]
+    plugins_config_file: str
+    chat_messages_enabled: bool
+    elevenlabs_voice_id: Optional[str]
+    plugins: list[str]
+    authorise_key: str
+
+
+class Config(Configurable):
+    default_plugins_config_file = os.path.join(
+        os.path.dirname(os.path.abspath(__file__)), "..", "..", "plugins_config.yaml"
+    )
+
+    elevenlabs_api_key = os.getenv("ELEVENLABS_API_KEY")
+    if os.getenv("USE_MAC_OS_TTS"):
+        default_tts_provider = "macos"
+    elif elevenlabs_api_key:
+        default_tts_provider = "elevenlabs"
+    elif os.getenv("USE_BRIAN_TTS"):
+        default_tts_provider = "streamelements"
+    else:
+        default_tts_provider = "gtts"
+
+    defaults_settings = ConfigSettings(
+        name="Default Server Config",
+        description="This is a default server configuration",
+        smart_llm_model="gpt-3.5-turbo",
+        fast_llm_model="gpt-3.5-turbo",
+        continuous_mode=False,
+        continuous_limit=0,
+        skip_news=False,
+        debug_mode=False,
+        plugins_dir="plugins",
+        plugins_config={},
+        speak_mode=False,
+        skip_reprompt=False,
+        allow_downloads=False,
+        exit_key="n",
+        plain_output=False,
+        disabled_command_categories=[],
+        shell_command_control="denylist",
+        shell_denylist=["sudo", "su"],
+        shell_allowlist=[],
+        ai_settings_file="ai_settings.yaml",
+        prompt_settings_file="prompt_settings.yaml",
+        embedding_model="text-embedding-ada-002",
+        browse_spacy_language_model="en_core_web_sm",
+        temperature=0,
+        use_azure=False,
+        execute_local_commands=False,
+        restrict_to_workspace=True,
+        openai_functions=False,
+        streamelements_voice="Brian",
+        text_to_speech_provider=default_tts_provider,
+        image_size=256,
+        huggingface_image_model="CompVis/stable-diffusion-v1-4",
+        audio_to_text_provider="huggingface",
+        sd_webui_url="http://localhost:7860",
+        selenium_web_browser="chrome",
+        selenium_headless=True,
+        user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36",
+        memory_backend="json_file",
+        memory_index="auto-gpt-memory",
+        redis_host="localhost",
+        redis_port=6379,
+        wipe_redis_on_start=True,
+        plugins_allowlist=[],
+        plugins_denylist=[],
+        plugins_openai=[],
+        plugins_config_file=default_plugins_config_file,
+        chat_messages_enabled=True,
+        plugins=[],
+        authorise_key="y",
+        redis_password="",
+    )
+
+    @classmethod
+    def build_config_from_env(cls):
         """Initialize the Config class"""
-        self.workspace_path: str = None
-        self.file_logger_path: str = None
-
-        self.debug_mode = False
-        self.continuous_mode = False
-        self.continuous_limit = 0
-        self.speak_mode = False
-        self.skip_reprompt = False
-        self.allow_downloads = False
-        self.skip_news = False
-
-        self.authorise_key = os.getenv("AUTHORISE_COMMAND_KEY", "y")
-        self.exit_key = os.getenv("EXIT_KEY", "n")
-        self.plain_output = os.getenv("PLAIN_OUTPUT", "False") == "True"
-
+        config_dict = {
+            "authorise_key": os.getenv("AUTHORISE_COMMAND_KEY"),
+            "exit_key": os.getenv("EXIT_KEY"),
+            "plain_output": os.getenv("PLAIN_OUTPUT", "False") == "True",
+            "shell_command_control": os.getenv("SHELL_COMMAND_CONTROL"),
+            "ai_settings_file": os.getenv("AI_SETTINGS_FILE"),
+            "prompt_settings_file": os.getenv("PROMPT_SETTINGS_FILE"),
+            "fast_llm_model": os.getenv("FAST_LLM_MODEL"),
+            "smart_llm_model": os.getenv("SMART_LLM_MODEL"),
+            "embedding_model": os.getenv("EMBEDDING_MODEL"),
+            "browse_spacy_language_model": os.getenv("BROWSE_SPACY_LANGUAGE_MODEL"),
+            "openai_api_key": os.getenv("OPENAI_API_KEY"),
+            "use_azure": os.getenv("USE_AZURE") == "True",
+            "execute_local_commands": os.getenv("EXECUTE_LOCAL_COMMANDS", "False")
+            == "True",
+            "restrict_to_workspace": os.getenv("RESTRICT_TO_WORKSPACE", "True")
+            == "True",
+            "openai_functions": os.getenv("OPENAI_FUNCTIONS", "False") == "True",
+            "elevenlabs_api_key": os.getenv("ELEVENLABS_API_KEY"),
+            "streamelements_voice": os.getenv("STREAMELEMENTS_VOICE"),
+            "text_to_speech_provider": os.getenv("TEXT_TO_SPEECH_PROVIDER"),
+            "github_api_key": os.getenv("GITHUB_API_KEY"),
+            "github_username": os.getenv("GITHUB_USERNAME"),
+            "google_api_key": os.getenv("GOOGLE_API_KEY"),
+            "image_provider": os.getenv("IMAGE_PROVIDER"),
+            "huggingface_api_token": os.getenv("HUGGINGFACE_API_TOKEN"),
+            "huggingface_image_model": os.getenv("HUGGINGFACE_IMAGE_MODEL"),
+            "audio_to_text_provider": os.getenv("AUDIO_TO_TEXT_PROVIDER"),
+            "huggingface_audio_to_text_model": os.getenv(
+                "HUGGINGFACE_AUDIO_TO_TEXT_MODEL"
+            ),
+            "sd_webui_url": os.getenv("SD_WEBUI_URL"),
+            "sd_webui_auth": os.getenv("SD_WEBUI_AUTH"),
+            "selenium_web_browser": os.getenv("USE_WEB_BROWSER"),
+            "selenium_headless": os.getenv("HEADLESS_BROWSER", "True") == "True",
+            "user_agent": os.getenv("USER_AGENT"),
+            "memory_backend": os.getenv("MEMORY_BACKEND"),
+            "memory_index": os.getenv("MEMORY_INDEX"),
+            "redis_host": os.getenv("REDIS_HOST"),
+            "redis_password": os.getenv("REDIS_PASSWORD"),
+            "wipe_redis_on_start": os.getenv("WIPE_REDIS_ON_START", "True") == "True",
+            "plugins_dir": os.getenv("PLUGINS_DIR"),
+            "plugins_config_file": os.getenv("PLUGINS_CONFIG_FILE"),
+            "chat_messages_enabled": os.getenv("CHAT_MESSAGES_ENABLED") == "True",
+        }
+
+        # Converting to a list from comma-separated string
         disabled_command_categories = os.getenv("DISABLED_COMMAND_CATEGORIES")
         if disabled_command_categories:
-            self.disabled_command_categories = disabled_command_categories.split(",")
-        else:
-            self.disabled_command_categories = []
-
-        self.shell_command_control = os.getenv("SHELL_COMMAND_CONTROL", "denylist")
+            config_dict[
+                "disabled_command_categories"
+            ] = disabled_command_categories.split(",")
 
-        # DENY_COMMANDS is deprecated and included for backwards-compatibility
+        # Converting to a list from comma-separated string
         shell_denylist = os.getenv("SHELL_DENYLIST", os.getenv("DENY_COMMANDS"))
         if shell_denylist:
-            self.shell_denylist = shell_denylist.split(",")
-        else:
-            self.shell_denylist = ["sudo", "su"]
+            config_dict["shell_denylist"] = shell_denylist.split(",")
 
-        # ALLOW_COMMANDS is deprecated and included for backwards-compatibility
         shell_allowlist = os.getenv("SHELL_ALLOWLIST", os.getenv("ALLOW_COMMANDS"))
         if shell_allowlist:
-            self.shell_allowlist = shell_allowlist.split(",")
-        else:
-            self.shell_allowlist = []
-
-        self.ai_settings_file = os.getenv("AI_SETTINGS_FILE", "ai_settings.yaml")
-        self.prompt_settings_file = os.getenv(
-            "PROMPT_SETTINGS_FILE", "prompt_settings.yaml"
-        )
-        self.fast_llm_model = os.getenv("FAST_LLM_MODEL", "gpt-3.5-turbo")
-        self.smart_llm_model = os.getenv("SMART_LLM_MODEL", "gpt-3.5-turbo")
-        self.embedding_model = os.getenv("EMBEDDING_MODEL", "text-embedding-ada-002")
-
-        self.browse_spacy_language_model = os.getenv(
-            "BROWSE_SPACY_LANGUAGE_MODEL", "en_core_web_sm"
-        )
-
-        self.openai_api_key = os.getenv("OPENAI_API_KEY")
-        self.openai_organization = os.getenv("OPENAI_ORGANIZATION")
-        self.temperature = float(os.getenv("TEMPERATURE", "0"))
-        self.use_azure = os.getenv("USE_AZURE") == "True"
-        self.execute_local_commands = (
-            os.getenv("EXECUTE_LOCAL_COMMANDS", "False") == "True"
-        )
-        self.restrict_to_workspace = (
-            os.getenv("RESTRICT_TO_WORKSPACE", "True") == "True"
-        )
-
-        if self.use_azure:
-            self.load_azure_config()
-            openai.api_type = self.openai_api_type
-            openai.api_base = self.openai_api_base
-            openai.api_version = self.openai_api_version
-        elif os.getenv("OPENAI_API_BASE_URL", None):
-            openai.api_base = os.getenv("OPENAI_API_BASE_URL")
-
-        if self.openai_organization is not None:
-            openai.organization = self.openai_organization
-
-        self.elevenlabs_api_key = os.getenv("ELEVENLABS_API_KEY")
-        # ELEVENLABS_VOICE_1_ID is deprecated and included for backwards-compatibility
-        self.elevenlabs_voice_id = os.getenv(
-            "ELEVENLABS_VOICE_ID", os.getenv("ELEVENLABS_VOICE_1_ID")
-        )
-        self.streamelements_voice = os.getenv("STREAMELEMENTS_VOICE", "Brian")
-
-        # Backwards-compatibility shim for deprecated env variables
-        if os.getenv("USE_MAC_OS_TTS"):
-            default_tts_provider = "macos"
-        elif self.elevenlabs_api_key:
-            default_tts_provider = "elevenlabs"
-        elif os.getenv("USE_BRIAN_TTS"):
-            default_tts_provider = "streamelements"
-        else:
-            default_tts_provider = "gtts"
+            config_dict["shell_allowlist"] = shell_allowlist.split(",")
 
-        self.text_to_speech_provider = os.getenv(
-            "TEXT_TO_SPEECH_PROVIDER", default_tts_provider
-        )
-
-        self.github_api_key = os.getenv("GITHUB_API_KEY")
-        self.github_username = os.getenv("GITHUB_USERNAME")
-
-        self.google_api_key = os.getenv("GOOGLE_API_KEY")
-        # CUSTOM_SEARCH_ENGINE_ID is deprecated and included for backwards-compatibility
-        self.google_custom_search_engine_id = os.getenv(
+        config_dict["google_custom_search_engine_id"] = os.getenv(
             "GOOGLE_CUSTOM_SEARCH_ENGINE_ID", os.getenv("CUSTOM_SEARCH_ENGINE_ID")
         )
 
-        self.image_provider = os.getenv("IMAGE_PROVIDER")
-        self.image_size = int(os.getenv("IMAGE_SIZE", 256))
-        self.huggingface_api_token = os.getenv("HUGGINGFACE_API_TOKEN")
-        self.huggingface_image_model = os.getenv(
-            "HUGGINGFACE_IMAGE_MODEL", "CompVis/stable-diffusion-v1-4"
-        )
-        self.audio_to_text_provider = os.getenv("AUDIO_TO_TEXT_PROVIDER", "huggingface")
-        self.huggingface_audio_to_text_model = os.getenv(
-            "HUGGINGFACE_AUDIO_TO_TEXT_MODEL"
-        )
-        self.sd_webui_url = os.getenv("SD_WEBUI_URL", "http://localhost:7860")
-        self.sd_webui_auth = os.getenv("SD_WEBUI_AUTH")
-
-        # Selenium browser settings
-        self.selenium_web_browser = os.getenv("USE_WEB_BROWSER", "chrome")
-        self.selenium_headless = os.getenv("HEADLESS_BROWSER", "True") == "True"
-
-        # User agent header to use when making HTTP requests
-        # Some websites might just completely deny request with an error code if
-        # no user agent was found.
-        self.user_agent = os.getenv(
-            "USER_AGENT",
-            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36"
-            " (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36",
+        config_dict["elevenlabs_voice_id"] = os.getenv(
+            "ELEVENLABS_VOICE_ID", os.getenv("ELEVENLABS_VOICE_1_ID")
         )
 
-        self.memory_backend = os.getenv("MEMORY_BACKEND", "json_file")
-        self.memory_index = os.getenv("MEMORY_INDEX", "auto-gpt-memory")
-
-        self.redis_host = os.getenv("REDIS_HOST", "localhost")
-        self.redis_port = int(os.getenv("REDIS_PORT", "6379"))
-        self.redis_password = os.getenv("REDIS_PASSWORD", "")
-        self.wipe_redis_on_start = os.getenv("WIPE_REDIS_ON_START", "True") == "True"
-
-        self.plugins_dir = os.getenv("PLUGINS_DIR", "plugins")
-        self.plugins: List[AutoGPTPluginTemplate] = []
-        self.plugins_openai = []
-
-        # Deprecated. Kept for backwards-compatibility. Will remove in a future version.
         plugins_allowlist = os.getenv("ALLOWLISTED_PLUGINS")
         if plugins_allowlist:
-            self.plugins_allowlist = plugins_allowlist.split(",")
-        else:
-            self.plugins_allowlist = []
+            config_dict["plugins_allowlist"] = plugins_allowlist.split(",")
 
-        # Deprecated. Kept for backwards-compatibility. Will remove in a future version.
         plugins_denylist = os.getenv("DENYLISTED_PLUGINS")
         if plugins_denylist:
-            self.plugins_denylist = plugins_denylist.split(",")
-        else:
-            self.plugins_denylist = []
-
-        # Avoid circular imports
-        from autogpt.plugins import DEFAULT_PLUGINS_CONFIG_FILE
-
-        self.plugins_config_file = os.getenv(
-            "PLUGINS_CONFIG_FILE", DEFAULT_PLUGINS_CONFIG_FILE
-        )
-        self.load_plugins_config()
+            config_dict["plugins_denylist"] = plugins_denylist.split(",")
 
-        self.chat_messages_enabled = os.getenv("CHAT_MESSAGES_ENABLED") == "True"
+        with contextlib.suppress(TypeError):
+            config_dict["image_size"] = int(os.getenv("IMAGE_SIZE"))
+        with contextlib.suppress(TypeError):
+            config_dict["redis_port"] = int(os.getenv("REDIS_PORT"))
+        with contextlib.suppress(TypeError):
+            config_dict["temperature"] = float(os.getenv("TEMPERATURE"))
 
-    def load_plugins_config(self) -> "autogpt.plugins.PluginsConfig":
-        # Avoid circular import
-        from autogpt.plugins.plugins_config import PluginsConfig
+        if config_dict["use_azure"]:
+            azure_config = cls.load_azure_config()
+            config_dict["openai_api_type"] = azure_config["openai_api_type"]
+            config_dict["openai_api_base"] = azure_config["openai_api_base"]
+            config_dict["openai_api_version"] = azure_config["openai_api_version"]
 
-        self.plugins_config = PluginsConfig.load_config(global_config=self)
-        return self.plugins_config
+        if os.getenv("OPENAI_API_BASE_URL"):
+            config_dict["openai_api_base"] = os.getenv("OPENAI_API_BASE_URL")
 
-    def get_azure_deployment_id_for_model(self, model: str) -> str:
-        """
-        Returns the relevant deployment id for the model specified.
-
-        Parameters:
-            model(str): The model to map to the deployment id.
+        openai_organization = os.getenv("OPENAI_ORGANIZATION")
+        if openai_organization is not None:
+            config_dict["openai_organization"] = openai_organization
 
-        Returns:
-            The matching deployment id if found, otherwise an empty string.
-        """
-        if model == self.fast_llm_model:
-            return self.azure_model_to_deployment_id_map[
-                "fast_llm_model_deployment_id"
-            ]  # type: ignore
-        elif model == self.smart_llm_model:
-            return self.azure_model_to_deployment_id_map[
-                "smart_llm_model_deployment_id"
-            ]  # type: ignore
-        elif model == "text-embedding-ada-002":
-            return self.azure_model_to_deployment_id_map[
-                "embedding_model_deployment_id"
-            ]  # type: ignore
-        else:
-            return ""
+        config_dict_without_none_values = {
+            k: v for k, v in config_dict.items() if v is not None
+        }
 
-    AZURE_CONFIG_FILE = os.path.join(os.path.dirname(__file__), "../..", "azure.yaml")
+        return cls.build_agent_configuration(config_dict_without_none_values)
 
-    def load_azure_config(self, config_file: str = AZURE_CONFIG_FILE) -> None:
+    @classmethod
+    def load_azure_config(cls, config_file: str = AZURE_CONFIG_FILE) -> Dict[str, str]:
         """
         Loads the configuration parameters for Azure hosting from the specified file
           path as a yaml file.
@@ -224,90 +266,47 @@ class Config(metaclass=Singleton):
             config_file(str): The path to the config yaml file. DEFAULT: "../azure.yaml"
 
         Returns:
-            None
+            Dict
         """
         with open(config_file) as file:
             config_params = yaml.load(file, Loader=yaml.FullLoader) or {}
-        self.openai_api_type = config_params.get("azure_api_type") or "azure"
-        self.openai_api_base = config_params.get("azure_api_base") or ""
-        self.openai_api_version = (
-            config_params.get("azure_api_version") or "2023-03-15-preview"
-        )
-        self.azure_model_to_deployment_id_map = config_params.get("azure_model_map", {})
-
-    def set_continuous_mode(self, value: bool) -> None:
-        """Set the continuous mode value."""
-        self.continuous_mode = value
 
-    def set_continuous_limit(self, value: int) -> None:
-        """Set the continuous limit value."""
-        self.continuous_limit = value
+        return {
+            "openai_api_type": config_params.get("azure_api_type") or "azure",
+            "openai_api_base": config_params.get("azure_api_base") or "",
+            "openai_api_version": config_params.get("azure_api_version")
+            or "2023-03-15-preview",
+            "azure_model_to_deployment_id_map": config_params.get(
+                "azure_model_map", {}
+            ),
+        }
 
-    def set_speak_mode(self, value: bool) -> None:
-        """Set the speak mode value."""
-        self.speak_mode = value
 
-    def set_fast_llm_model(self, value: str) -> None:
-        """Set the fast LLM model value."""
-        self.fast_llm_model = value
-
-    def set_smart_llm_model(self, value: str) -> None:
-        """Set the smart LLM model value."""
-        self.smart_llm_model = value
-
-    def set_embedding_model(self, value: str) -> None:
-        """Set the model to use for creating embeddings."""
-        self.embedding_model = value
-
-    def set_openai_api_key(self, value: str) -> None:
-        """Set the OpenAI API key value."""
-        self.openai_api_key = value
-
-    def set_elevenlabs_api_key(self, value: str) -> None:
-        """Set the ElevenLabs API key value."""
-        self.elevenlabs_api_key = value
-
-    def set_elevenlabs_voice_1_id(self, value: str) -> None:
-        """Set the ElevenLabs Voice 1 ID value."""
-        self.elevenlabs_voice_id = value
-
-    def set_elevenlabs_voice_2_id(self, value: str) -> None:
-        """Set the ElevenLabs Voice 2 ID value."""
-        self.elevenlabs_voice_2_id = value
-
-    def set_google_api_key(self, value: str) -> None:
-        """Set the Google API key value."""
-        self.google_api_key = value
-
-    def set_custom_search_engine_id(self, value: str) -> None:
-        """Set the custom search engine id value."""
-        self.google_custom_search_engine_id = value
-
-    def set_debug_mode(self, value: bool) -> None:
-        """Set the debug mode value."""
-        self.debug_mode = value
-
-    def set_plugins(self, value: list) -> None:
-        """Set the plugins value."""
-        self.plugins = value
-
-    def set_temperature(self, value: int) -> None:
-        """Set the temperature value."""
-        self.temperature = value
-
-    def set_memory_backend(self, name: str) -> None:
-        """Set the memory backend name."""
-        self.memory_backend = name
-
-
-def check_openai_api_key() -> None:
+def check_openai_api_key(config: Config) -> None:
     """Check if the OpenAI API key is set in config.py or as an environment variable."""
-    cfg = Config()
-    if not cfg.openai_api_key:
+    if not config.openai_api_key:
         print(
             Fore.RED
             + "Please set your OpenAI API key in .env or as an environment variable."
             + Fore.RESET
         )
         print("You can get your key from https://platform.openai.com/account/api-keys")
-        exit(1)
+        openai_api_key = input(
+            "If you do have the key, please enter your OpenAI API key now:\n"
+        )
+        key_pattern = r"^sk-\w{48}"
+        openai_api_key = openai_api_key.strip()
+        if re.search(key_pattern, openai_api_key):
+            os.environ["OPENAI_API_KEY"] = openai_api_key
+            config.openai_api_key = openai_api_key
+            print(
+                Fore.GREEN
+                + "OpenAI API key successfully set!\n"
+                + Fore.ORANGE
+                + "NOTE: The API key you've set is only temporary.\n"
+                + "For longer sessions, please set it in .env file"
+                + Fore.RESET
+            )
+        else:
+            print("Invalid OpenAI API key!")
+            exit(1)
diff --git a/autogpt/config/prompt_config.py b/autogpt/config/prompt_config.py
index 3f562c95f..793bb4440 100644
--- a/autogpt/config/prompt_config.py
+++ b/autogpt/config/prompt_config.py
@@ -6,11 +6,8 @@ import yaml
 from colorama import Fore
 
 from autogpt import utils
-from autogpt.config.config import Config
 from autogpt.logs import logger
 
-CFG = Config()
-
 
 class PromptConfig:
     """
@@ -22,10 +19,7 @@ class PromptConfig:
         performance_evaluations (list): Performance evaluation list for the prompt generator.
     """
 
-    def __init__(
-        self,
-        config_file: str = CFG.prompt_settings_file,
-    ) -> None:
+    def __init__(self, prompt_settings_file: str) -> None:
         """
         Initialize a class instance with parameters (constraints, resources, performance_evaluations) loaded from
           yaml file if yaml file exists,
@@ -39,13 +33,13 @@ class PromptConfig:
             None
         """
         # Validate file
-        (validated, message) = utils.validate_yaml_file(config_file)
+        (validated, message) = utils.validate_yaml_file(prompt_settings_file)
         if not validated:
             logger.typewriter_log("FAILED FILE VALIDATION", Fore.RED, message)
             logger.double_check()
             exit(1)
 
-        with open(config_file, encoding="utf-8") as file:
+        with open(prompt_settings_file, encoding="utf-8") as file:
             config_params = yaml.load(file, Loader=yaml.FullLoader)
 
         self.constraints = config_params.get("constraints", [])
diff --git a/autogpt/configurator.py b/autogpt/configurator.py
index 324f30843..cc21414cf 100644
--- a/autogpt/configurator.py
+++ b/autogpt/configurator.py
@@ -51,13 +51,13 @@ def create_config(
         allow_downloads (bool): Whether to allow Auto-GPT to download files natively
         skips_news (bool): Whether to suppress the output of latest news on startup
     """
-    config.set_debug_mode(False)
-    config.set_continuous_mode(False)
-    config.set_speak_mode(False)
+    config.debug_mode = False
+    config.continuous_mode = False
+    config.speak_mode = False
 
     if debug:
         logger.typewriter_log("Debug Mode: ", Fore.GREEN, "ENABLED")
-        config.set_debug_mode(True)
+        config.debug_mode = True
 
     if continuous:
         logger.typewriter_log("Continuous Mode: ", Fore.RED, "ENABLED")
@@ -68,13 +68,13 @@ def create_config(
             " cause your AI to run forever or carry out actions you would not usually"
             " authorise. Use at your own risk.",
         )
-        config.set_continuous_mode(True)
+        config.continuous_mode = True
 
         if continuous_limit:
             logger.typewriter_log(
                 "Continuous Limit: ", Fore.GREEN, f"{continuous_limit}"
             )
-            config.set_continuous_limit(continuous_limit)
+            config.continuous_limit = continuous_limit
 
     # Check if continuous limit is used without continuous mode
     if continuous_limit and not continuous:
@@ -82,14 +82,14 @@ def create_config(
 
     if speak:
         logger.typewriter_log("Speak Mode: ", Fore.GREEN, "ENABLED")
-        config.set_speak_mode(True)
+        config.speak_mode = True
 
     # Set the default LLM models
     if gpt3only:
         logger.typewriter_log("GPT3.5 Only Mode: ", Fore.GREEN, "ENABLED")
         # --gpt3only should always use gpt-3.5-turbo, despite user's FAST_LLM_MODEL config
-        config.set_fast_llm_model(GPT_3_MODEL)
-        config.set_smart_llm_model(GPT_3_MODEL)
+        config.fast_llm_model = GPT_3_MODEL
+        config.smart_llm_model = GPT_3_MODEL
 
     elif (
         gpt4only
@@ -97,13 +97,11 @@ def create_config(
     ):
         logger.typewriter_log("GPT4 Only Mode: ", Fore.GREEN, "ENABLED")
         # --gpt4only should always use gpt-4, despite user's SMART_LLM_MODEL config
-        config.set_fast_llm_model(GPT_4_MODEL)
-        config.set_smart_llm_model(GPT_4_MODEL)
+        config.fast_llm_model = GPT_4_MODEL
+        config.smart_llm_model = GPT_4_MODEL
     else:
-        config.set_fast_llm_model(check_model(config.fast_llm_model, "fast_llm_model"))
-        config.set_smart_llm_model(
-            check_model(config.smart_llm_model, "smart_llm_model")
-        )
+        config.fast_llm_model = check_model(config.fast_llm_model, "fast_llm_model")
+        config.smart_llm_model = check_model(config.smart_llm_model, "smart_llm_model")
 
     if memory_type:
         supported_memory = get_supported_memory_backends()
diff --git a/benchmark/__init__.py b/autogpt/core/configuration/__init__.py
index e69de29bb..e69de29bb 100644
--- a/benchmark/__init__.py
+++ b/autogpt/core/configuration/__init__.py
diff --git a/autogpt/core/configuration/schema.py b/autogpt/core/configuration/schema.py
new file mode 100644
index 000000000..cff4dfe8f
--- /dev/null
+++ b/autogpt/core/configuration/schema.py
@@ -0,0 +1,98 @@
+import abc
+import copy
+import typing
+from typing import Any
+
+from pydantic import BaseModel
+
+
+class SystemConfiguration(BaseModel):
+    def get_user_config(self) -> dict[str, Any]:
+        return _get_user_config_fields(self)
+
+    class Config:
+        extra = "forbid"
+        use_enum_values = True
+
+
+class SystemSettings(BaseModel, abc.ABC):
+    """A base class for all system settings."""
+
+    name: str
+    description: typing.Optional[str]
+
+    class Config:
+        extra = "forbid"
+        use_enum_values = True
+
+
+class Configurable(abc.ABC):
+    """A base class for all configurable objects."""
+
+    prefix: str = ""
+    defaults_settings: typing.ClassVar[SystemSettings]
+
+    @classmethod
+    def get_user_config(cls) -> dict[str, Any]:
+        return _get_user_config_fields(cls.defaults_settings)
+
+    @classmethod
+    def build_agent_configuration(cls, configuration: dict = {}) -> SystemSettings:
+        """Process the configuration for this object."""
+
+        defaults_settings = cls.defaults_settings.dict()
+        final_configuration = deep_update(defaults_settings, configuration)
+
+        return cls.defaults_settings.__class__.parse_obj(final_configuration)
+
+
+def _get_user_config_fields(instance: BaseModel) -> dict[str, Any]:
+    """
+    Get the user config fields of a Pydantic model instance.
+    Args:
+        instance: The Pydantic model instance.
+    Returns:
+        The user config fields of the instance.
+    """
+    user_config_fields = {}
+
+    for name, value in instance.__dict__.items():
+        field_info = instance.__fields__[name]
+        if "user_configurable" in field_info.field_info.extra:
+            user_config_fields[name] = value
+        elif isinstance(value, SystemConfiguration):
+            user_config_fields[name] = value.get_user_config()
+        elif isinstance(value, list) and all(
+            isinstance(i, SystemConfiguration) for i in value
+        ):
+            user_config_fields[name] = [i.get_user_config() for i in value]
+        elif isinstance(value, dict) and all(
+            isinstance(i, SystemConfiguration) for i in value.values()
+        ):
+            user_config_fields[name] = {
+                k: v.get_user_config() for k, v in value.items()
+            }
+
+    return user_config_fields
+
+
+def deep_update(original_dict: dict, update_dict: dict) -> dict:
+    """
+    Recursively update a dictionary.
+    Args:
+        original_dict (dict): The dictionary to be updated.
+        update_dict (dict): The dictionary to update with.
+    Returns:
+        dict: The updated dictionary.
+    """
+    original_dict = copy.deepcopy(original_dict)
+    for key, value in update_dict.items():
+        if (
+            key in original_dict
+            and isinstance(original_dict[key], dict)
+            and isinstance(value, dict)
+        ):
+            original_dict[key] = deep_update(original_dict[key], value)
+        else:
+            original_dict[key] = value
+    return original_dict
diff --git a/autogpt/json_utils/utilities.py b/autogpt/json_utils/utilities.py
index 018488719..7162abc58 100644
--- a/autogpt/json_utils/utilities.py
+++ b/autogpt/json_utils/utilities.py
@@ -9,7 +9,6 @@ from jsonschema import Draft7Validator
 from autogpt.config import Config
 from autogpt.logs import logger
 
-CFG = Config()
 LLM_DEFAULT_RESPONSE_FORMAT = "llm_response_format_1"
 
 
@@ -23,21 +22,26 @@ def extract_json_from_response(response_content: str) -> dict:
     try:
         return ast.literal_eval(response_content)
     except BaseException as e:
-        logger.error(f"Error parsing JSON response with literal_eval {e}")
+        logger.info(f"Error parsing JSON response with literal_eval {e}")
+        logger.debug(f"Invalid JSON received in response: {response_content}")
         # TODO: How to raise an error here without causing the program to exit?
         return {}
 
 
 def llm_response_schema(
-    schema_name: str = LLM_DEFAULT_RESPONSE_FORMAT,
+    config: Config, schema_name: str = LLM_DEFAULT_RESPONSE_FORMAT
 ) -> dict[str, Any]:
     filename = os.path.join(os.path.dirname(__file__), f"{schema_name}.json")
     with open(filename, "r") as f:
-        return json.load(f)
+        json_schema = json.load(f)
+    if config.openai_functions:
+        del json_schema["properties"]["command"]
+        json_schema["required"].remove("command")
+    return json_schema
 
 
 def validate_json(
-    json_object: object, schema_name: str = LLM_DEFAULT_RESPONSE_FORMAT
+    json_object: object, config: Config, schema_name: str = LLM_DEFAULT_RESPONSE_FORMAT
 ) -> bool:
     """
     :type schema_name: object
@@ -47,14 +51,14 @@ def validate_json(
     Returns:
         bool: Whether the json_object is valid or not
     """
-    schema = llm_response_schema(schema_name)
+    schema = llm_response_schema(config, schema_name)
     validator = Draft7Validator(schema)
 
     if errors := sorted(validator.iter_errors(json_object), key=lambda e: e.path):
         for error in errors:
-            logger.error(f"JSON Validation Error: {error}")
+            logger.debug(f"JSON Validation Error: {error}")
 
-        if CFG.debug_mode:
+        if config.debug_mode:
             logger.error(
                 json.dumps(json_object, indent=4)
             )  # Replace 'json_object' with the variable containing the JSON data
@@ -67,29 +71,3 @@ def validate_json(
     logger.debug("The JSON object is valid.")
 
     return True
-
-
-def validate_json_string(json_string: str, schema_name: str) -> dict | None:
-    """
-    :type schema_name: object
-    :param schema_name: str
-    :type json_object: object
-    """
-
-    try:
-        json_loaded = json.loads(json_string)
-        if not validate_json(json_loaded, schema_name):
-            return None
-        return json_loaded
-    except:
-        return None
-
-
-def is_string_valid_json(json_string: str, schema_name: str) -> bool:
-    """
-    :type schema_name: object
-    :param schema_name: str
-    :type json_object: object
-    """
-
-    return validate_json_string(json_string, schema_name) is not None
diff --git a/autogpt/llm/api_manager.py b/autogpt/llm/api_manager.py
index acc38c44c..4e2aba9d2 100644
--- a/autogpt/llm/api_manager.py
+++ b/autogpt/llm/api_manager.py
@@ -5,9 +5,7 @@ from typing import List, Optional
 import openai
 from openai import Model
 
-from autogpt.config import Config
-from autogpt.llm.base import CompletionModelInfo, MessageDict
-from autogpt.llm.providers.openai import OPEN_AI_MODELS
+from autogpt.llm.base import CompletionModelInfo
 from autogpt.logs import logger
 from autogpt.singleton import Singleton
 
@@ -27,52 +25,7 @@ class ApiManager(metaclass=Singleton):
         self.total_budget = 0.0
         self.models = None
 
-    def create_chat_completion(
-        self,
-        messages: list[MessageDict],
-        model: str | None = None,
-        temperature: float = None,
-        max_tokens: int | None = None,
-        deployment_id=None,
-    ):
-        """
-        Create a chat completion and update the cost.
-        Args:
-        messages (list): The list of messages to send to the API.
-        model (str): The model to use for the API call.
-        temperature (float): The temperature to use for the API call.
-        max_tokens (int): The maximum number of tokens for the API call.
-        Returns:
-        str: The AI's response.
-        """
-        cfg = Config()
-        if temperature is None:
-            temperature = cfg.temperature
-        if deployment_id is not None:
-            response = openai.ChatCompletion.create(
-                deployment_id=deployment_id,
-                model=model,
-                messages=messages,
-                temperature=temperature,
-                max_tokens=max_tokens,
-                api_key=cfg.openai_api_key,
-            )
-        else:
-            response = openai.ChatCompletion.create(
-                model=model,
-                messages=messages,
-                temperature=temperature,
-                max_tokens=max_tokens,
-                api_key=cfg.openai_api_key,
-            )
-        if not hasattr(response, "error"):
-            logger.debug(f"Response: {response}")
-            prompt_tokens = response.usage.prompt_tokens
-            completion_tokens = response.usage.completion_tokens
-            self.update_cost(prompt_tokens, completion_tokens, model)
-        return response
-
-    def update_cost(self, prompt_tokens, completion_tokens, model: str):
+    def update_cost(self, prompt_tokens, completion_tokens, model):
         """
         Update the total cost, prompt tokens, and completion tokens.
 
@@ -82,6 +35,8 @@ class ApiManager(metaclass=Singleton):
         model (str): The model used for the API call.
         """
         # the .model property in API responses can contain version suffixes like -v2
+        from autogpt.llm.providers.openai import OPEN_AI_MODELS
+
         model = model[:-3] if model.endswith("-v2") else model
         model_info = OPEN_AI_MODELS[model]
 
diff --git a/autogpt/llm/base.py b/autogpt/llm/base.py
index 43cc0ad93..4ff80dc73 100644
--- a/autogpt/llm/base.py
+++ b/autogpt/llm/base.py
@@ -2,11 +2,17 @@ from __future__ import annotations
 
 from dataclasses import dataclass, field
 from math import ceil, floor
-from typing import List, Literal, TypedDict
+from typing import TYPE_CHECKING, List, Literal, Optional, TypedDict
+
+if TYPE_CHECKING:
+    from autogpt.llm.providers.openai import OpenAIFunctionCall
 
 MessageRole = Literal["system", "user", "assistant"]
 MessageType = Literal["ai_response", "action_result"]
 
+TText = list[int]
+"""Token array representing tokenized text"""
+
 
 class MessageDict(TypedDict):
     role: MessageRole
@@ -153,4 +159,5 @@ class EmbeddingModelResponse(LLMResponse):
 class ChatModelResponse(LLMResponse):
     """Standard response struct for a response from an LLM model."""
 
-    content: str = None
+    content: Optional[str] = None
+    function_call: Optional[OpenAIFunctionCall] = None
diff --git a/autogpt/llm/chat.py b/autogpt/llm/chat.py
index 9ed07cb2d..c5d5a945a 100644
--- a/autogpt/llm/chat.py
+++ b/autogpt/llm/chat.py
@@ -3,6 +3,8 @@ from __future__ import annotations
 import time
 from typing import TYPE_CHECKING
 
+from autogpt.llm.providers.openai import get_openai_command_specs
+
 if TYPE_CHECKING:
     from autogpt.agent.agent import Agent
 
@@ -19,7 +21,7 @@ def chat_with_ai(
     config: Config,
     agent: Agent,
     system_prompt: str,
-    user_input: str,
+    triggering_prompt: str,
     token_limit: int,
     model: str | None = None,
 ):
@@ -31,7 +33,7 @@ def chat_with_ai(
         config (Config): The config to use.
         agent (Agent): The agent to use.
         system_prompt (str): The prompt explaining the rules to the AI.
-        user_input (str): The input from the user.
+        triggering_prompt (str): The input from the user.
         token_limit (int): The maximum number of tokens allowed in the API call.
         model (str, optional): The model to use. If None, the config.fast_llm_model will be used. Defaults to None.
 
@@ -90,13 +92,14 @@ def chat_with_ai(
     #     )
 
     # Account for user input (appended later)
-    user_input_msg = Message("user", user_input)
+    user_input_msg = Message("user", triggering_prompt)
     current_tokens_used += count_message_tokens([user_input_msg], model)
 
     current_tokens_used += 500  # Reserve space for new_summary_message
+    current_tokens_used += 500  # Reserve space for the openai functions TODO improve
 
     # Add Messages until the token limit is reached or there are no more messages to add.
-    for cycle in reversed(list(agent.history.per_cycle())):
+    for cycle in reversed(list(agent.history.per_cycle(agent.config))):
         messages_to_add = [msg for msg in cycle if msg is not None]
         tokens_to_add = count_message_tokens(messages_to_add, model)
         if current_tokens_used + tokens_to_add > send_token_limit:
@@ -110,14 +113,14 @@ def chat_with_ai(
     # Update & add summary of trimmed messages
     if len(agent.history) > 0:
         new_summary_message, trimmed_messages = agent.history.trim_messages(
-            current_message_chain=list(message_sequence),
+            current_message_chain=list(message_sequence), config=agent.config
         )
         tokens_to_add = count_message_tokens([new_summary_message], model)
         message_sequence.insert(insertion_index, new_summary_message)
         current_tokens_used += tokens_to_add - 500
 
         # FIXME: uncomment when memory is back in use
-        # memory_store = get_memory(cfg)
+        # memory_store = get_memory(config)
         # for _, ai_msg, result_msg in agent.history.per_cycle(trimmed_messages):
         #     memory_to_add = MemoryItem.from_ai_action(ai_msg, result_msg)
         #     logger.debug(f"Storing the following memory:\n{memory_to_add.dump()}")
@@ -192,11 +195,13 @@ def chat_with_ai(
     # temperature and other settings we care about
     assistant_reply = create_chat_completion(
         prompt=message_sequence,
+        config=agent.config,
+        functions=get_openai_command_specs(agent),
         max_tokens=tokens_remaining,
     )
 
     # Update full message history
     agent.history.append(user_input_msg)
-    agent.history.add("assistant", assistant_reply, "ai_response")
+    agent.history.add("assistant", assistant_reply.content, "ai_response")
 
     return assistant_reply
diff --git a/autogpt/llm/providers/openai.py b/autogpt/llm/providers/openai.py
index 0f24b56e7..397b47917 100644
--- a/autogpt/llm/providers/openai.py
+++ b/autogpt/llm/providers/openai.py
@@ -1,4 +1,28 @@
-from autogpt.llm.base import ChatModelInfo, EmbeddingModelInfo, TextModelInfo
+from __future__ import annotations
+
+import functools
+import time
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, List, Optional
+from unittest.mock import patch
+
+import openai
+import openai.api_resources.abstract.engine_api_resource as engine_api_resource
+from colorama import Fore, Style
+from openai.error import APIError, RateLimitError, ServiceUnavailableError, Timeout
+from openai.openai_object import OpenAIObject
+
+if TYPE_CHECKING:
+    from autogpt.agent.agent import Agent
+
+from autogpt.llm.base import (
+    ChatModelInfo,
+    EmbeddingModelInfo,
+    MessageDict,
+    TextModelInfo,
+    TText,
+)
+from autogpt.logs import logger
 
 OPEN_AI_CHAT_MODELS = {
     info.name: info
@@ -88,3 +112,241 @@ OPEN_AI_MODELS: dict[str, ChatModelInfo | EmbeddingModelInfo | TextModelInfo] =
     **OPEN_AI_TEXT_MODELS,
     **OPEN_AI_EMBEDDING_MODELS,
 }
+
+
+def meter_api(func):
+    """Adds ApiManager metering to functions which make OpenAI API calls"""
+    from autogpt.llm.api_manager import ApiManager
+
+    api_manager = ApiManager()
+
+    openai_obj_processor = openai.util.convert_to_openai_object
+
+    def update_usage_with_response(response: OpenAIObject):
+        try:
+            usage = response.usage
+            logger.debug(f"Reported usage from call to model {response.model}: {usage}")
+            api_manager.update_cost(
+                response.usage.prompt_tokens,
+                response.usage.completion_tokens if "completion_tokens" in usage else 0,
+                response.model,
+            )
+        except Exception as err:
+            logger.warn(f"Failed to update API costs: {err.__class__.__name__}: {err}")
+
+    def metering_wrapper(*args, **kwargs):
+        openai_obj = openai_obj_processor(*args, **kwargs)
+        if isinstance(openai_obj, OpenAIObject) and "usage" in openai_obj:
+            update_usage_with_response(openai_obj)
+        return openai_obj
+
+    def metered_func(*args, **kwargs):
+        with patch.object(
+            engine_api_resource.util,
+            "convert_to_openai_object",
+            side_effect=metering_wrapper,
+        ):
+            return func(*args, **kwargs)
+
+    return metered_func
+
+
+def retry_api(
+    num_retries: int = 10,
+    backoff_base: float = 2.0,
+    warn_user: bool = True,
+):
+    """Retry an OpenAI API call.
+
+    Args:
+        num_retries int: Number of retries. Defaults to 10.
+        backoff_base float: Base for exponential backoff. Defaults to 2.
+        warn_user bool: Whether to warn the user. Defaults to True.
+    """
+    error_messages = {
+        ServiceUnavailableError: f"{Fore.RED}Error: The OpenAI API engine is currently overloaded, passing...{Fore.RESET}",
+        RateLimitError: f"{Fore.RED}Error: Reached rate limit, passing...{Fore.RESET}",
+    }
+    api_key_error_msg = (
+        f"Please double check that you have setup a "
+        f"{Fore.CYAN + Style.BRIGHT}PAID{Style.RESET_ALL} OpenAI API Account. You can "
+        f"read more here: {Fore.CYAN}https://docs.agpt.co/setup/#getting-an-api-key{Fore.RESET}"
+    )
+    backoff_msg = (
+        f"{Fore.RED}Error: API Bad gateway. Waiting {{backoff}} seconds...{Fore.RESET}"
+    )
+
+    def _wrapper(func):
+        @functools.wraps(func)
+        def _wrapped(*args, **kwargs):
+            user_warned = not warn_user
+            num_attempts = num_retries + 1  # +1 for the first attempt
+            for attempt in range(1, num_attempts + 1):
+                try:
+                    return func(*args, **kwargs)
+
+                except (RateLimitError, ServiceUnavailableError) as e:
+                    if attempt == num_attempts:
+                        raise
+
+                    error_msg = error_messages[type(e)]
+                    logger.debug(error_msg)
+                    if not user_warned:
+                        logger.double_check(api_key_error_msg)
+                        user_warned = True
+
+                except (APIError, Timeout) as e:
+                    if (e.http_status not in [429, 502]) or (attempt == num_attempts):
+                        raise
+
+                backoff = backoff_base ** (attempt + 2)
+                logger.debug(backoff_msg.format(backoff=backoff))
+                time.sleep(backoff)
+
+        return _wrapped
+
+    return _wrapper
+
+
+@meter_api
+@retry_api()
+def create_chat_completion(
+    messages: List[MessageDict],
+    *_,
+    **kwargs,
+) -> OpenAIObject:
+    """Create a chat completion using the OpenAI API
+
+    Args:
+        messages: A list of messages to feed to the chatbot.
+        kwargs: Other arguments to pass to the OpenAI API chat completion call.
+    Returns:
+        OpenAIObject: The ChatCompletion response from OpenAI
+
+    """
+    completion: OpenAIObject = openai.ChatCompletion.create(
+        messages=messages,
+        **kwargs,
+    )
+    if not hasattr(completion, "error"):
+        logger.debug(f"Response: {completion}")
+    return completion
+
+
+@meter_api
+@retry_api()
+def create_text_completion(
+    prompt: str,
+    *_,
+    **kwargs,
+) -> OpenAIObject:
+    """Create a text completion using the OpenAI API
+
+    Args:
+        prompt: A text prompt to feed to the LLM
+        kwargs: Other arguments to pass to the OpenAI API text completion call.
+    Returns:
+        OpenAIObject: The Completion response from OpenAI
+
+    """
+    return openai.Completion.create(
+        prompt=prompt,
+        **kwargs,
+    )
+
+
+@meter_api
+@retry_api()
+def create_embedding(
+    input: str | TText | List[str] | List[TText],
+    *_,
+    **kwargs,
+) -> OpenAIObject:
+    """Create an embedding using the OpenAI API
+
+    Args:
+        input: The text to embed.
+        kwargs: Other arguments to pass to the OpenAI API embedding call.
+    Returns:
+        OpenAIObject: The Embedding response from OpenAI
+
+    """
+    return openai.Embedding.create(
+        input=input,
+        **kwargs,
+    )
+
+
+@dataclass
+class OpenAIFunctionCall:
+    """Represents a function call as generated by an OpenAI model
+
+    Attributes:
+        name: the name of the function that the LLM wants to call
+        arguments: a stringified JSON object (unverified) containing `arg: value` pairs
+    """
+
+    name: str
+    arguments: str
+
+
+@dataclass
+class OpenAIFunctionSpec:
+    """Represents a "function" in OpenAI, which is mapped to a Command in Auto-GPT"""
+
+    name: str
+    description: str
+    parameters: dict[str, ParameterSpec]
+
+    @dataclass
+    class ParameterSpec:
+        name: str
+        type: str
+        description: Optional[str]
+        required: bool = False
+
+    @property
+    def __dict__(self):
+        """Output an OpenAI-consumable function specification"""
+        return {
+            "name": self.name,
+            "description": self.description,
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    param.name: {
+                        "type": param.type,
+                        "description": param.description,
+                    }
+                    for param in self.parameters.values()
+                },
+                "required": [
+                    param.name for param in self.parameters.values() if param.required
+                ],
+            },
+        }
+
+
+def get_openai_command_specs(agent: Agent) -> list[OpenAIFunctionSpec]:
+    """Get OpenAI-consumable function specs for the agent's available commands.
+    see https://platform.openai.com/docs/guides/gpt/function-calling
+    """
+    if not agent.config.openai_functions:
+        return []
+
+    return [
+        OpenAIFunctionSpec(
+            name=command.name,
+            description=command.description,
+            parameters={
+                param.name: OpenAIFunctionSpec.ParameterSpec(
+                    name=param.name,
+                    type=param.type,
+                    required=param.required,
+                    description=param.description,
+                )
+                for param in command.parameters
+            },
+        )
+        for command in agent.command_registry.commands.values()
+    ]
diff --git a/autogpt/llm/utils/__init__.py b/autogpt/llm/utils/__init__.py
index cd2c1c0a3..417653144 100644
--- a/autogpt/llm/utils/__init__.py
+++ b/autogpt/llm/utils/__init__.py
@@ -1,122 +1,30 @@
 from __future__ import annotations
 
-import functools
-import time
+from dataclasses import asdict
 from typing import List, Literal, Optional
-from unittest.mock import patch
 
-import openai
-import openai.api_resources.abstract.engine_api_resource as engine_api_resource
-import openai.util
-from colorama import Fore, Style
-from openai.error import APIError, RateLimitError
-from openai.openai_object import OpenAIObject
+from colorama import Fore
 
 from autogpt.config import Config
 from autogpt.logs import logger
 
 from ..api_manager import ApiManager
-from ..base import ChatSequence, Message
-from ..providers.openai import OPEN_AI_CHAT_MODELS
+from ..base import ChatModelResponse, ChatSequence, Message
+from ..providers import openai as iopenai
+from ..providers.openai import (
+    OPEN_AI_CHAT_MODELS,
+    OpenAIFunctionCall,
+    OpenAIFunctionSpec,
+)
 from .token_counter import *
 
 
-def metered(func):
-    """Adds ApiManager metering to functions which make OpenAI API calls"""
-    api_manager = ApiManager()
-
-    openai_obj_processor = openai.util.convert_to_openai_object
-
-    def update_usage_with_response(response: OpenAIObject):
-        try:
-            usage = response.usage
-            logger.debug(f"Reported usage from call to model {response.model}: {usage}")
-            api_manager.update_cost(
-                response.usage.prompt_tokens,
-                response.usage.completion_tokens if "completion_tokens" in usage else 0,
-                response.model,
-            )
-        except Exception as err:
-            logger.warn(f"Failed to update API costs: {err.__class__.__name__}: {err}")
-
-    def metering_wrapper(*args, **kwargs):
-        openai_obj = openai_obj_processor(*args, **kwargs)
-        if isinstance(openai_obj, OpenAIObject) and "usage" in openai_obj:
-            update_usage_with_response(openai_obj)
-        return openai_obj
-
-    def metered_func(*args, **kwargs):
-        with patch.object(
-            engine_api_resource.util,
-            "convert_to_openai_object",
-            side_effect=metering_wrapper,
-        ):
-            return func(*args, **kwargs)
-
-    return metered_func
-
-
-def retry_openai_api(
-    num_retries: int = 10,
-    backoff_base: float = 2.0,
-    warn_user: bool = True,
-):
-    """Retry an OpenAI API call.
-
-    Args:
-        num_retries int: Number of retries. Defaults to 10.
-        backoff_base float: Base for exponential backoff. Defaults to 2.
-        warn_user bool: Whether to warn the user. Defaults to True.
-    """
-    retry_limit_msg = f"{Fore.RED}Error: " f"Reached rate limit, passing...{Fore.RESET}"
-    api_key_error_msg = (
-        f"Please double check that you have setup a "
-        f"{Fore.CYAN + Style.BRIGHT}PAID{Style.RESET_ALL} OpenAI API Account. You can "
-        f"read more here: {Fore.CYAN}https://docs.agpt.co/setup/#getting-an-api-key{Fore.RESET}"
-    )
-    backoff_msg = (
-        f"{Fore.RED}Error: API Bad gateway. Waiting {{backoff}} seconds...{Fore.RESET}"
-    )
-
-    def _wrapper(func):
-        @functools.wraps(func)
-        def _wrapped(*args, **kwargs):
-            user_warned = not warn_user
-            num_attempts = num_retries + 1  # +1 for the first attempt
-            for attempt in range(1, num_attempts + 1):
-                try:
-                    return func(*args, **kwargs)
-
-                except RateLimitError:
-                    if attempt == num_attempts:
-                        raise
-
-                    logger.debug(retry_limit_msg)
-                    if not user_warned:
-                        logger.double_check(api_key_error_msg)
-                        user_warned = True
-
-                except APIError as e:
-                    if (e.http_status not in [429, 502, 503]) or (
-                        attempt == num_attempts
-                    ):
-                        raise
-
-                backoff = backoff_base ** (attempt + 2)
-                logger.debug(backoff_msg.format(backoff=backoff))
-                time.sleep(backoff)
-
-        return _wrapped
-
-    return _wrapper
-
-
 def call_ai_function(
     function: str,
     args: list,
     description: str,
-    model: str | None = None,
-    config: Config = None,
+    model: Optional[str] = None,
+    config: Optional[Config] = None,
 ) -> str:
     """Call an AI function
 
@@ -150,48 +58,47 @@ def call_ai_function(
             Message("user", arg_str),
         ],
     )
-    return create_chat_completion(prompt=prompt, temperature=0)
+    return create_chat_completion(prompt=prompt, temperature=0, config=config).content
 
 
-@metered
-@retry_openai_api()
 def create_text_completion(
     prompt: str,
+    config: Config,
     model: Optional[str],
     temperature: Optional[float],
     max_output_tokens: Optional[int],
 ) -> str:
-    cfg = Config()
     if model is None:
-        model = cfg.fast_llm_model
+        model = config.fast_llm_model
     if temperature is None:
-        temperature = cfg.temperature
+        temperature = config.temperature
 
-    if cfg.use_azure:
-        kwargs = {"deployment_id": cfg.get_azure_deployment_id_for_model(model)}
+    if config.use_azure:
+        kwargs = {"deployment_id": config.get_azure_deployment_id_for_model(model)}
     else:
         kwargs = {"model": model}
 
-    response = openai.Completion.create(
-        **kwargs,
+    response = iopenai.create_text_completion(
         prompt=prompt,
+        **kwargs,
         temperature=temperature,
         max_tokens=max_output_tokens,
-        api_key=cfg.openai_api_key,
+        api_key=config.openai_api_key,
     )
+    logger.debug(f"Response: {response}")
+
     return response.choices[0].text
 
 
 # Overly simple abstraction until we create something better
-# simple retry mechanism when getting a rate error or a bad gateway
-@metered
-@retry_openai_api()
 def create_chat_completion(
     prompt: ChatSequence,
+    config: Config,
+    functions: Optional[List[OpenAIFunctionSpec]] = None,
     model: Optional[str] = None,
-    temperature: float = None,
+    temperature: Optional[float] = None,
     max_tokens: Optional[int] = None,
-) -> str:
+) -> ChatModelResponse:
     """Create a chat completion using the OpenAI API
 
     Args:
@@ -203,53 +110,68 @@ def create_chat_completion(
     Returns:
         str: The response from the chat completion
     """
-    cfg = Config()
+
     if model is None:
         model = prompt.model.name
     if temperature is None:
-        temperature = cfg.temperature
-    if max_tokens is None:
-        max_tokens = OPEN_AI_CHAT_MODELS[model].max_tokens - prompt.token_length
+        temperature = config.temperature
 
     logger.debug(
         f"{Fore.GREEN}Creating chat completion with model {model}, temperature {temperature}, max_tokens {max_tokens}{Fore.RESET}"
     )
-    for plugin in cfg.plugins:
+    chat_completion_kwargs = {
+        "model": model,
+        "temperature": temperature,
+        "max_tokens": max_tokens,
+    }
+
+    for plugin in config.plugins:
         if plugin.can_handle_chat_completion(
             messages=prompt.raw(),
-            model=model,
-            temperature=temperature,
-            max_tokens=max_tokens,
+            **chat_completion_kwargs,
         ):
             message = plugin.handle_chat_completion(
                 messages=prompt.raw(),
-                model=model,
-                temperature=temperature,
-                max_tokens=max_tokens,
+                **chat_completion_kwargs,
             )
             if message is not None:
                 return message
-    api_manager = ApiManager()
-    response = None
 
-    if cfg.use_azure:
-        kwargs = {"deployment_id": cfg.get_azure_deployment_id_for_model(model)}
-    else:
-        kwargs = {"model": model}
-
-    response = api_manager.create_chat_completion(
-        **kwargs,
+    chat_completion_kwargs["api_key"] = config.openai_api_key
+    if config.use_azure:
+        chat_completion_kwargs[
+            "deployment_id"
+        ] = config.get_azure_deployment_id_for_model(model)
+    if functions:
+        chat_completion_kwargs["functions"] = [
+            function.__dict__ for function in functions
+        ]
+        logger.debug(f"Function dicts: {chat_completion_kwargs['functions']}")
+
+    response = iopenai.create_chat_completion(
         messages=prompt.raw(),
-        temperature=temperature,
-        max_tokens=max_tokens,
+        **chat_completion_kwargs,
     )
+    logger.debug(f"Response: {response}")
 
-    resp = response.choices[0].message.content
-    for plugin in cfg.plugins:
+    if hasattr(response, "error"):
+        logger.error(response.error)
+        raise RuntimeError(response.error)
+
+    first_message = response.choices[0].message
+    content: str | None = first_message.get("content")
+    function_call: OpenAIFunctionCall | None = first_message.get("function_call")
+
+    for plugin in config.plugins:
         if not plugin.can_handle_on_response():
             continue
-        resp = plugin.on_response(resp)
-    return resp
+        content = plugin.on_response(content)
+
+    return ChatModelResponse(
+        model_info=OPEN_AI_CHAT_MODELS[model],
+        content=content,
+        function_call=function_call,
+    )
 
 
 def check_model(
diff --git a/autogpt/logs.py b/autogpt/logs.py
index f14267fca..8eb4c94a6 100644
--- a/autogpt/logs.py
+++ b/autogpt/logs.py
@@ -9,6 +9,7 @@ from typing import Any
 
 from colorama import Fore, Style
 
+from autogpt.config import Config
 from autogpt.log_cycle.json_handler import JsonFileHandler, JsonFormatter
 from autogpt.singleton import Singleton
 from autogpt.speech import say_text
@@ -248,13 +249,17 @@ def remove_color_codes(s: str) -> str:
     return ansi_escape.sub("", s)
 
 
+def remove_ansi_escape(s: str) -> str:
+    return s.replace("\x1B", "")
+
+
 logger = Logger()
 
 
 def print_assistant_thoughts(
     ai_name: object,
     assistant_reply_json_valid: object,
-    speak_mode: bool = False,
+    config: Config,
 ) -> None:
     assistant_thoughts_reasoning = None
     assistant_thoughts_plan = None
@@ -262,12 +267,16 @@ def print_assistant_thoughts(
     assistant_thoughts_criticism = None
 
     assistant_thoughts = assistant_reply_json_valid.get("thoughts", {})
-    assistant_thoughts_text = assistant_thoughts.get("text")
+    assistant_thoughts_text = remove_ansi_escape(assistant_thoughts.get("text"))
     if assistant_thoughts:
-        assistant_thoughts_reasoning = assistant_thoughts.get("reasoning")
-        assistant_thoughts_plan = assistant_thoughts.get("plan")
-        assistant_thoughts_criticism = assistant_thoughts.get("criticism")
-        assistant_thoughts_speak = assistant_thoughts.get("speak")
+        assistant_thoughts_reasoning = remove_ansi_escape(
+            assistant_thoughts.get("reasoning")
+        )
+        assistant_thoughts_plan = remove_ansi_escape(assistant_thoughts.get("plan"))
+        assistant_thoughts_criticism = remove_ansi_escape(
+            assistant_thoughts.get("criticism")
+        )
+        assistant_thoughts_speak = remove_ansi_escape(assistant_thoughts.get("speak"))
     logger.typewriter_log(
         f"{ai_name.upper()} THOUGHTS:", Fore.YELLOW, f"{assistant_thoughts_text}"
     )
@@ -288,7 +297,7 @@ def print_assistant_thoughts(
     logger.typewriter_log("CRITICISM:", Fore.YELLOW, f"{assistant_thoughts_criticism}")
     # Speak the assistant's thoughts
     if assistant_thoughts_speak:
-        if speak_mode:
-            say_text(assistant_thoughts_speak)
+        if config.speak_mode:
+            say_text(assistant_thoughts_speak, config)
         else:
             logger.typewriter_log("SPEAK:", Fore.YELLOW, f"{assistant_thoughts_speak}")
diff --git a/autogpt/main.py b/autogpt/main.py
index ab0a1533b..0217507a3 100644
--- a/autogpt/main.py
+++ b/autogpt/main.py
@@ -1,16 +1,15 @@
 """The application entry point.  Can be invoked by a CLI or any other front end application."""
 import logging
 import sys
-from pathlib import Path
 
 from colorama import Fore, Style
 
 from autogpt.agent import Agent
-from autogpt.commands.command import CommandRegistry
-from autogpt.config import Config, check_openai_api_key
+from autogpt.config.config import Config, check_openai_api_key
 from autogpt.configurator import create_config
 from autogpt.logs import logger
 from autogpt.memory.vector import get_memory
+from autogpt.models.command_registry import CommandRegistry
 from autogpt.plugins import scan_plugins
 from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT, construct_main_ai_config
 from autogpt.utils import (
@@ -23,16 +22,10 @@ from autogpt.workspace import Workspace
 from scripts.install_plugin_deps import install_plugin_dependencies
 
 COMMAND_CATEGORIES = [
-    "autogpt.commands.analyze_code",
-    "autogpt.commands.audio_text",
     "autogpt.commands.execute_code",
     "autogpt.commands.file_operations",
-    "autogpt.commands.git_operations",
-    "autogpt.commands.google_search",
-    "autogpt.commands.image_gen",
-    "autogpt.commands.improve_code",
+    "autogpt.commands.web_search",
     "autogpt.commands.web_selenium",
-    "autogpt.commands.write_tests",
     "autogpt.app",
     "autogpt.commands.task_statuses",
 ]
@@ -59,12 +52,13 @@ def run_auto_gpt(
     logger.set_level(logging.DEBUG if debug else logging.INFO)
     logger.speak_mode = speak
 
-    cfg = Config()
+    config = Config.build_config_from_env()
+
     # TODO: fill in llm values here
-    check_openai_api_key()
+    check_openai_api_key(config)
 
     create_config(
-        cfg,
+        config,
         continuous,
         continuous_limit,
         ai_settings,
@@ -80,17 +74,17 @@ def run_auto_gpt(
         skip_news,
     )
 
-    if cfg.continuous_mode:
+    if config.continuous_mode:
         for line in get_legal_warning().split("\n"):
             logger.warn(markdown_to_ansi_style(line), "LEGAL:", Fore.RED)
 
-    if not cfg.skip_news:
+    if not config.skip_news:
         motd, is_new_motd = get_latest_bulletin()
         if motd:
             motd = markdown_to_ansi_style(motd)
             for motd_line in motd.split("\n"):
                 logger.info(motd_line, "NEWS:", Fore.GREEN)
-            if is_new_motd and not cfg.chat_messages_enabled:
+            if is_new_motd and not config.chat_messages_enabled:
                 input(
                     Fore.MAGENTA
                     + Style.BRIGHT
@@ -122,32 +116,20 @@ def run_auto_gpt(
     # TODO: have this directory live outside the repository (e.g. in a user's
     #   home directory) and have it come in as a command line argument or part of
     #   the env file.
-    if workspace_directory is None:
-        workspace_directory = Path(__file__).parent / "auto_gpt_workspace"
-    else:
-        workspace_directory = Path(workspace_directory)
-    # TODO: pass in the ai_settings file and the env file and have them cloned into
-    #   the workspace directory so we can bind them to the agent.
-    workspace_directory = Workspace.make_workspace(workspace_directory)
-    cfg.workspace_path = str(workspace_directory)
+    workspace_directory = Workspace.get_workspace_directory(config, workspace_directory)
 
     # HACK: doing this here to collect some globals that depend on the workspace.
-    file_logger_path = workspace_directory / "file_logger.txt"
-    if not file_logger_path.exists():
-        with file_logger_path.open(mode="w", encoding="utf-8") as f:
-            f.write("File Operation Logger ")
-
-    cfg.file_logger_path = str(file_logger_path)
+    Workspace.build_file_logger_path(config, workspace_directory)
 
-    cfg.set_plugins(scan_plugins(cfg, cfg.debug_mode))
+    config.plugins = scan_plugins(config, config.debug_mode)
     # Create a CommandRegistry instance and scan default folder
     command_registry = CommandRegistry()
 
     logger.debug(
-        f"The following command categories are disabled: {cfg.disabled_command_categories}"
+        f"The following command categories are disabled: {config.disabled_command_categories}"
     )
     enabled_command_categories = [
-        x for x in COMMAND_CATEGORIES if x not in cfg.disabled_command_categories
+        x for x in COMMAND_CATEGORIES if x not in config.disabled_command_categories
     ]
 
     logger.debug(
@@ -157,8 +139,22 @@ def run_auto_gpt(
     for command_category in enabled_command_categories:
         command_registry.import_commands(command_category)
 
+    # Unregister commands that are incompatible with the current config
+    incompatible_commands = []
+    for command in command_registry.commands.values():
+        if callable(command.enabled) and not command.enabled(config):
+            command.enabled = False
+            incompatible_commands.append(command)
+
+    for command in incompatible_commands:
+        command_registry.unregister(command.name)
+        logger.debug(
+            f"Unregistering incompatible command: {command.name}, "
+            f"reason - {command.disabled_reason or 'Disabled by current config.'}"
+        )
+
     ai_name = ""
-    ai_config = construct_main_ai_config()
+    ai_config = construct_main_ai_config(config)
     ai_config.command_registry = command_registry
     if ai_config.ai_name:
         ai_name = ai_config.ai_name
@@ -167,21 +163,22 @@ def run_auto_gpt(
     next_action_count = 0
 
     # add chat plugins capable of report to logger
-    if cfg.chat_messages_enabled:
-        for plugin in cfg.plugins:
+    if config.chat_messages_enabled:
+        for plugin in config.plugins:
             if hasattr(plugin, "can_handle_report") and plugin.can_handle_report():
                 logger.info(f"Loaded plugin into logger: {plugin.__class__.__name__}")
                 logger.chat_plugins.append(plugin)
 
     # Initialize memory and make sure it is empty.
     # this is particularly important for indexing and referencing pinecone memory
-    memory = get_memory(cfg, init=True)
+    memory = get_memory(config)
+    memory.clear()
     logger.typewriter_log(
         "Using memory of type:", Fore.GREEN, f"{memory.__class__.__name__}"
     )
-    logger.typewriter_log("Using Browser:", Fore.GREEN, cfg.selenium_web_browser)
-    system_prompt = ai_config.construct_full_prompt()
-    if cfg.debug_mode:
+    logger.typewriter_log("Using Browser:", Fore.GREEN, config.selenium_web_browser)
+    system_prompt = ai_config.construct_full_prompt(config)
+    if config.debug_mode:
         logger.typewriter_log("Prompt:", Fore.GREEN, system_prompt)
 
     agent = Agent(
@@ -193,6 +190,6 @@ def run_auto_gpt(
         triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
         workspace_directory=workspace_directory,
         ai_config=ai_config,
-        config=cfg,
+        config=config,
     )
     agent.start_interaction_loop()
diff --git a/autogpt/memory/message_history.py b/autogpt/memory/message_history.py
index be5241254..f3e1dc30c 100644
--- a/autogpt/memory/message_history.py
+++ b/autogpt/memory/message_history.py
@@ -9,11 +9,7 @@ if TYPE_CHECKING:
     from autogpt.agent import Agent
 
 from autogpt.config import Config
-from autogpt.json_utils.utilities import (
-    LLM_DEFAULT_RESPONSE_FORMAT,
-    extract_json_from_response,
-    is_string_valid_json,
-)
+from autogpt.json_utils.utilities import extract_json_from_response
 from autogpt.llm.base import ChatSequence, Message, MessageRole, MessageType
 from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS
 from autogpt.llm.utils import count_string_tokens, create_chat_completion
@@ -51,8 +47,7 @@ class MessageHistory:
         return self.messages.append(message)
 
     def trim_messages(
-        self,
-        current_message_chain: list[Message],
+        self, current_message_chain: list[Message], config: Config
     ) -> tuple[Message, list[Message]]:
         """
         Returns a list of trimmed messages: messages which are in the message history
@@ -60,6 +55,7 @@ class MessageHistory:
 
         Args:
             current_message_chain (list[Message]): The messages currently in the context.
+            config (Config): The config to use.
 
         Returns:
             Message: A message with the new running summary after adding the trimmed messages.
@@ -79,7 +75,7 @@ class MessageHistory:
             return self.summary_message(), []
 
         new_summary_message = self.update_running_summary(
-            new_events=new_messages_not_in_chain
+            new_events=new_messages_not_in_chain, config=config
         )
 
         # Find the index of the last message processed
@@ -88,7 +84,7 @@ class MessageHistory:
 
         return new_summary_message, new_messages_not_in_chain
 
-    def per_cycle(self, messages: list[Message] | None = None):
+    def per_cycle(self, config: Config, messages: list[Message] | None = None):
         """
         Yields:
             Message: a message containing user input
@@ -105,8 +101,8 @@ class MessageHistory:
             )
             result_message = messages[i + 1]
             try:
-                assert is_string_valid_json(
-                    ai_message.content, LLM_DEFAULT_RESPONSE_FORMAT
+                assert (
+                    extract_json_from_response(ai_message.content) != {}
                 ), "AI response is not a valid JSON object"
                 assert result_message.type == "action_result"
 
@@ -122,7 +118,9 @@ class MessageHistory:
             f"This reminds you of these events from your past: \n{self.summary}",
         )
 
-    def update_running_summary(self, new_events: list[Message]) -> Message:
+    def update_running_summary(
+        self, new_events: list[Message], config: Config
+    ) -> Message:
         """
         This function takes a list of dictionaries representing new events and combines them with the current summary,
         focusing on key and potentially important information to remember. The updated summary is returned in a message
@@ -139,8 +137,6 @@ class MessageHistory:
             update_running_summary(new_events)
             # Returns: "This reminds you of these events from your past: \nI entered the kitchen and found a scrawled note saying 7."
         """
-        cfg = Config()
-
         if not new_events:
             return self.summary_message()
 
@@ -160,7 +156,7 @@ class MessageHistory:
                     event.content = json.dumps(content_dict)
                 except json.JSONDecodeError as e:
                     logger.error(f"Error: Invalid JSON: {e}")
-                    if cfg.debug_mode:
+                    if config.debug_mode:
                         logger.error(f"{event.content}")
 
             elif event.role.lower() == "system":
@@ -175,23 +171,23 @@ class MessageHistory:
         # Assume an upper bound length for the summary prompt template, i.e. Your task is to create a concise running summary...., in summarize_batch func
         # TODO make this default dynamic
         prompt_template_length = 100
-        max_tokens = OPEN_AI_CHAT_MODELS.get(cfg.fast_llm_model).max_tokens
-        summary_tlength = count_string_tokens(str(self.summary), cfg.fast_llm_model)
+        max_tokens = OPEN_AI_CHAT_MODELS.get(config.fast_llm_model).max_tokens
+        summary_tlength = count_string_tokens(str(self.summary), config.fast_llm_model)
         batch = []
         batch_tlength = 0
 
         # TODO Can put a cap on length of total new events and drop some previous events to save API cost, but need to think thru more how to do it without losing the context
         for event in new_events:
-            event_tlength = count_string_tokens(str(event), cfg.fast_llm_model)
+            event_tlength = count_string_tokens(str(event), config.fast_llm_model)
 
             if (
                 batch_tlength + event_tlength
                 > max_tokens - prompt_template_length - summary_tlength
             ):
                 # The batch is full. Summarize it and start a new one.
-                self.summarize_batch(batch, cfg)
+                self.summarize_batch(batch, config)
                 summary_tlength = count_string_tokens(
-                    str(self.summary), cfg.fast_llm_model
+                    str(self.summary), config.fast_llm_model
                 )
                 batch = [event]
                 batch_tlength = event_tlength
@@ -201,11 +197,11 @@ class MessageHistory:
 
         if batch:
             # There's an unprocessed batch. Summarize it.
-            self.summarize_batch(batch, cfg)
+            self.summarize_batch(batch, config)
 
         return self.summary_message()
 
-    def summarize_batch(self, new_events_batch, cfg):
+    def summarize_batch(self, new_events_batch, config):
         prompt = f'''Your task is to create a concise running summary of actions and information results in the provided text, focusing on key and potentially important information to remember.
 
 You will receive the current summary and your latest actions. Combine them, adding relevant key information from the latest development in 1st person past tense and keeping the summary concise.
@@ -221,7 +217,9 @@ Latest Development:
 """
 '''
 
-        prompt = ChatSequence.for_model(cfg.fast_llm_model, [Message("user", prompt)])
+        prompt = ChatSequence.for_model(
+            config.fast_llm_model, [Message("user", prompt)]
+        )
         self.agent.log_cycle_handler.log_cycle(
             self.agent.ai_name,
             self.agent.created_at,
@@ -230,7 +228,7 @@ Latest Development:
             PROMPT_SUMMARY_FILE_NAME,
         )
 
-        self.summary = create_chat_completion(prompt)
+        self.summary = create_chat_completion(prompt, config).content
 
         self.agent.log_cycle_handler.log_cycle(
             self.agent.ai_name,
diff --git a/autogpt/memory/vector/__init__.py b/autogpt/memory/vector/__init__.py
index aaaf83fb5..72abbb00e 100644
--- a/autogpt/memory/vector/__init__.py
+++ b/autogpt/memory/vector/__init__.py
@@ -39,12 +39,12 @@ supported_memory = ["json_file", "no_memory"]
 #     MilvusMemory = None
 
 
-def get_memory(cfg: Config, init=False) -> VectorMemory:
+def get_memory(config: Config) -> VectorMemory:
     memory = None
 
-    match cfg.memory_backend:
+    match config.memory_backend:
         case "json_file":
-            memory = JSONFileMemory(cfg)
+            memory = JSONFileMemory(config)
 
         case "pinecone":
             raise NotImplementedError(
@@ -59,8 +59,8 @@ def get_memory(cfg: Config, init=False) -> VectorMemory:
             #         " to use Pinecone as a memory backend."
             #     )
             # else:
-            #     memory = PineconeMemory(cfg)
-            #     if init:
+            #     memory = PineconeMemory(config)
+            #     if clear:
             #         memory.clear()
 
         case "redis":
@@ -74,7 +74,7 @@ def get_memory(cfg: Config, init=False) -> VectorMemory:
             #         " use Redis as a memory backend."
             #     )
             # else:
-            #     memory = RedisMemory(cfg)
+            #     memory = RedisMemory(config)
 
         case "weaviate":
             raise NotImplementedError(
@@ -89,7 +89,7 @@ def get_memory(cfg: Config, init=False) -> VectorMemory:
             #         " use Weaviate as a memory backend."
             #     )
             # else:
-            #     memory = WeaviateMemory(cfg)
+            #     memory = WeaviateMemory(config)
 
         case "milvus":
             raise NotImplementedError(
@@ -104,18 +104,18 @@ def get_memory(cfg: Config, init=False) -> VectorMemory:
             #         "Please install pymilvus to use Milvus or Zilliz Cloud as memory backend."
             #     )
             # else:
-            #     memory = MilvusMemory(cfg)
+            #     memory = MilvusMemory(config)
 
         case "no_memory":
             memory = NoMemory()
 
         case _:
             raise ValueError(
-                f"Unknown memory backend '{cfg.memory_backend}'. Please check your config."
+                f"Unknown memory backend '{config.memory_backend}'. Please check your config."
             )
 
     if memory is None:
-        memory = JSONFileMemory(cfg)
+        memory = JSONFileMemory(config)
 
     return memory
 
diff --git a/autogpt/memory/vector/memory_item.py b/autogpt/memory/vector/memory_item.py
index c57b87aa3..cf00cc87a 100644
--- a/autogpt/memory/vector/memory_item.py
+++ b/autogpt/memory/vector/memory_item.py
@@ -36,19 +36,19 @@ class MemoryItem:
     def from_text(
         text: str,
         source_type: MemoryDocType,
+        config: Config,
         metadata: dict = {},
         how_to_summarize: str | None = None,
         question_for_summary: str | None = None,
     ):
-        cfg = Config()
         logger.debug(f"Memorizing text:\n{'-'*32}\n{text}\n{'-'*32}\n")
 
         chunks = [
             chunk
             for chunk, _ in (
-                split_text(text, cfg.embedding_model)
+                split_text(text, config.embedding_model, config)
                 if source_type != "code_file"
-                else chunk_content(text, cfg.embedding_model)
+                else chunk_content(text, config.embedding_model)
             )
         ]
         logger.debug("Chunks: " + str(chunks))
@@ -58,6 +58,7 @@ class MemoryItem:
             for summary, _ in [
                 summarize_text(
                     text_chunk,
+                    config,
                     instruction=how_to_summarize,
                     question=question_for_summary,
                 )
@@ -66,7 +67,7 @@ class MemoryItem:
         ]
         logger.debug("Chunk summaries: " + str(chunk_summaries))
 
-        e_chunks = get_embedding(chunks)
+        e_chunks = get_embedding(chunks, config)
 
         summary = (
             chunk_summaries[0]
@@ -81,7 +82,7 @@ class MemoryItem:
 
         # TODO: investigate search performance of weighted average vs summary
         # e_average = np.average(e_chunks, axis=0, weights=[len(c) for c in chunks])
-        e_summary = get_embedding(summary)
+        e_summary = get_embedding(summary, config)
 
         metadata["source_type"] = source_type
 
@@ -96,8 +97,8 @@ class MemoryItem:
         )
 
     @staticmethod
-    def from_text_file(content: str, path: str):
-        return MemoryItem.from_text(content, "text_file", {"location": path})
+    def from_text_file(content: str, path: str, config: Config):
+        return MemoryItem.from_text(content, "text_file", config, {"location": path})
 
     @staticmethod
     def from_code_file(content: str, path: str):
@@ -109,21 +110,21 @@ class MemoryItem:
         # The result_message contains either user feedback
         # or the result of the command specified in ai_message
 
-        if ai_message["role"] != "assistant":
-            raise ValueError(f"Invalid role on 'ai_message': {ai_message['role']}")
+        if ai_message.role != "assistant":
+            raise ValueError(f"Invalid role on 'ai_message': {ai_message.role}")
 
         result = (
-            result_message["content"]
-            if result_message["content"].startswith("Command")
+            result_message.content
+            if result_message.content.startswith("Command")
             else "None"
         )
         user_input = (
-            result_message["content"]
-            if result_message["content"].startswith("Human feedback")
+            result_message.content
+            if result_message.content.startswith("Human feedback")
             else "None"
         )
         memory_content = (
-            f"Assistant Reply: {ai_message['content']}"
+            f"Assistant Reply: {ai_message.content}"
             "\n\n"
             f"Result: {result}"
             "\n\n"
@@ -137,19 +138,25 @@ class MemoryItem:
         )
 
     @staticmethod
-    def from_webpage(content: str, url: str, question: str | None = None):
+    def from_webpage(
+        content: str, url: str, config: Config, question: str | None = None
+    ):
         return MemoryItem.from_text(
             text=content,
             source_type="webpage",
+            config=config,
             metadata={"location": url},
             question_for_summary=question,
         )
 
-    def dump(self) -> str:
-        token_length = count_string_tokens(self.raw_content, Config().embedding_model)
+    def dump(self, calculate_length=False) -> str:
+        if calculate_length:
+            token_length = count_string_tokens(
+                self.raw_content, Config().embedding_model
+            )
         return f"""
 =============== MemoryItem ===============
-Length: {token_length} tokens in {len(self.e_chunks)} chunks
+Size: {f'{token_length} tokens in ' if calculate_length else ''}{len(self.e_chunks)} chunks
 Metadata: {json.dumps(self.metadata, indent=2)}
 ---------------- SUMMARY -----------------
 {self.summary}
@@ -158,6 +165,31 @@ Metadata: {json.dumps(self.metadata, indent=2)}
 ==========================================
 """
 
+    def __eq__(self, other: MemoryItem):
+        return (
+            self.raw_content == other.raw_content
+            and self.chunks == other.chunks
+            and self.chunk_summaries == other.chunk_summaries
+            # Embeddings can either be list[float] or np.ndarray[float32],
+            # and for comparison they must be of the same type
+            and np.array_equal(
+                self.e_summary
+                if isinstance(self.e_summary, np.ndarray)
+                else np.array(self.e_summary, dtype=np.float32),
+                other.e_summary
+                if isinstance(other.e_summary, np.ndarray)
+                else np.array(other.e_summary, dtype=np.float32),
+            )
+            and np.array_equal(
+                self.e_chunks
+                if isinstance(self.e_chunks[0], np.ndarray)
+                else [np.array(c, dtype=np.float32) for c in self.e_chunks],
+                other.e_chunks
+                if isinstance(other.e_chunks[0], np.ndarray)
+                else [np.array(c, dtype=np.float32) for c in other.e_chunks],
+            )
+        )
+
 
 @dataclasses.dataclass
 class MemoryItemRelevance:
diff --git a/autogpt/memory/vector/providers/base.py b/autogpt/memory/vector/providers/base.py
index 969d89347..dc4dbf3cc 100644
--- a/autogpt/memory/vector/providers/base.py
+++ b/autogpt/memory/vector/providers/base.py
@@ -17,25 +17,29 @@ class VectorMemoryProvider(MutableSet[MemoryItem], AbstractSingleton):
     def __init__(self, config: Config):
         pass
 
-    def get(self, query: str) -> MemoryItemRelevance | None:
+    def get(self, query: str, config: Config) -> MemoryItemRelevance | None:
         """
         Gets the data from the memory that is most relevant to the given query.
 
         Args:
-            data: The data to compare to.
+            query: The query used to retrieve information.
+            config: The config Object.
 
         Returns: The most relevant Memory
         """
-        result = self.get_relevant(query, 1)
+        result = self.get_relevant(query, 1, config)
         return result[0] if result else None
 
-    def get_relevant(self, query: str, k: int) -> Sequence[MemoryItemRelevance]:
+    def get_relevant(
+        self, query: str, k: int, config: Config
+    ) -> Sequence[MemoryItemRelevance]:
         """
         Returns the top-k most relevant memories for the given query
 
         Args:
             query: the query to compare stored memories to
             k: the number of relevant memories to fetch
+            config: The config Object.
 
         Returns:
             list[MemoryItemRelevance] containing the top [k] relevant memories
@@ -48,7 +52,7 @@ class VectorMemoryProvider(MutableSet[MemoryItem], AbstractSingleton):
             f"{len(self)} memories in index"
         )
 
-        relevances = self.score_memories_for_relevance(query)
+        relevances = self.score_memories_for_relevance(query, config)
         logger.debug(f"Memory relevance scores: {[str(r) for r in relevances]}")
 
         # take last k items and reverse
@@ -57,13 +61,13 @@ class VectorMemoryProvider(MutableSet[MemoryItem], AbstractSingleton):
         return [relevances[i] for i in top_k_indices]
 
     def score_memories_for_relevance(
-        self, for_query: str
+        self, for_query: str, config: Config
     ) -> Sequence[MemoryItemRelevance]:
         """
         Returns MemoryItemRelevance for every memory in the index.
         Implementations may override this function for performance purposes.
         """
-        e_query: Embedding = get_embedding(for_query)
+        e_query: Embedding = get_embedding(for_query, config)
         return [m.relevance_for(for_query, e_query) for m in self]
 
     def get_stats(self) -> tuple[int, int]:
diff --git a/autogpt/memory/vector/providers/json_file.py b/autogpt/memory/vector/providers/json_file.py
index 46446a9c5..b85ea8e67 100644
--- a/autogpt/memory/vector/providers/json_file.py
+++ b/autogpt/memory/vector/providers/json_file.py
@@ -20,22 +20,29 @@ class JSONFileMemory(VectorMemoryProvider):
     file_path: Path
     memories: list[MemoryItem]
 
-    def __init__(self, cfg: Config) -> None:
+    def __init__(self, config: Config) -> None:
         """Initialize a class instance
 
         Args:
-            cfg: Config object
+            config: Config object
 
         Returns:
             None
         """
-        workspace_path = Path(cfg.workspace_path)
-        self.file_path = workspace_path / f"{cfg.memory_index}.json"
+        workspace_path = Path(config.workspace_path)
+        self.file_path = workspace_path / f"{config.memory_index}.json"
         self.file_path.touch()
-        logger.debug(f"Initialized {__name__} with index path {self.file_path}")
+        logger.debug(
+            f"Initialized {__class__.__name__} with index path {self.file_path}"
+        )
 
         self.memories = []
-        self.save_index()
+        try:
+            self.load_index()
+            logger.debug(f"Loaded {len(self.memories)} MemoryItems from file")
+        except Exception as e:
+            logger.warn(f"Could not load MemoryItems from file: {e}")
+            self.save_index()
 
     def __iter__(self) -> Iterator[MemoryItem]:
         return iter(self.memories)
@@ -48,6 +55,7 @@ class JSONFileMemory(VectorMemoryProvider):
 
     def add(self, item: MemoryItem):
         self.memories.append(item)
+        logger.debug(f"Adding item to memory: {item.dump()}")
         self.save_index()
         return len(self.memories)
 
@@ -62,6 +70,17 @@ class JSONFileMemory(VectorMemoryProvider):
         self.memories.clear()
         self.save_index()
 
+    def load_index(self):
+        """Loads all memories from the index file"""
+        if not self.file_path.is_file():
+            logger.debug(f"Index file '{self.file_path}' does not exist")
+            return
+        with self.file_path.open("r") as f:
+            logger.debug(f"Loading memories from index file '{self.file_path}'")
+            json_index = orjson.loads(f.read())
+            for memory_item_dict in json_index:
+                self.memories.append(MemoryItem(**memory_item_dict))
+
     def save_index(self):
         logger.debug(f"Saving memory index to file {self.file_path}")
         with self.file_path.open("wb") as f:
diff --git a/autogpt/memory/vector/utils.py b/autogpt/memory/vector/utils.py
index 75d1f69d4..beb2fcf93 100644
--- a/autogpt/memory/vector/utils.py
+++ b/autogpt/memory/vector/utils.py
@@ -1,16 +1,14 @@
 from typing import Any, overload
 
 import numpy as np
-import openai
 
 from autogpt.config import Config
-from autogpt.llm.utils import metered, retry_openai_api
+from autogpt.llm.base import TText
+from autogpt.llm.providers import openai as iopenai
 from autogpt.logs import logger
 
 Embedding = list[np.float32] | np.ndarray[Any, np.dtype[np.float32]]
 """Embedding vector"""
-TText = list[int]
-"""Token array representing text"""
 
 
 @overload
@@ -23,10 +21,8 @@ def get_embedding(input: list[str] | list[TText]) -> list[Embedding]:
     ...
 
 
-@metered
-@retry_openai_api()
 def get_embedding(
-    input: str | TText | list[str] | list[TText],
+    input: str | TText | list[str] | list[TText], config: Config
 ) -> Embedding | list[Embedding]:
     """Get an embedding from the ada model.
 
@@ -37,7 +33,6 @@ def get_embedding(
     Returns:
         List[float]: The embedding.
     """
-    cfg = Config()
     multiple = isinstance(input, list) and all(not isinstance(i, int) for i in input)
 
     if isinstance(input, str):
@@ -45,22 +40,22 @@ def get_embedding(
     elif multiple and isinstance(input[0], str):
         input = [text.replace("\n", " ") for text in input]
 
-    model = cfg.embedding_model
-    if cfg.use_azure:
-        kwargs = {"engine": cfg.get_azure_deployment_id_for_model(model)}
+    model = config.embedding_model
+    if config.use_azure:
+        kwargs = {"engine": config.get_azure_deployment_id_for_model(model)}
     else:
         kwargs = {"model": model}
 
     logger.debug(
         f"Getting embedding{f's for {len(input)} inputs' if multiple else ''}"
         f" with model '{model}'"
-        + (f" via Azure deployment '{kwargs['engine']}'" if cfg.use_azure else "")
+        + (f" via Azure deployment '{kwargs['engine']}'" if config.use_azure else "")
     )
 
-    embeddings = openai.Embedding.create(
-        input=input,
-        api_key=cfg.openai_api_key,
+    embeddings = iopenai.create_embedding(
+        input,
         **kwargs,
+        api_key=config.openai_api_key,
     ).data
 
     if not multiple:
diff --git a/tests/integration/goal_oriented/__init__.py b/autogpt/models/__init__.py
index e69de29bb..e69de29bb 100644
--- a/tests/integration/goal_oriented/__init__.py
+++ b/autogpt/models/__init__.py
diff --git a/autogpt/models/command.py b/autogpt/models/command.py
new file mode 100644
index 000000000..92cf414a9
--- /dev/null
+++ b/autogpt/models/command.py
@@ -0,0 +1,47 @@
+from typing import Any, Callable, Optional
+
+from autogpt.config import Config
+
+from .command_parameter import CommandParameter
+
+
+class Command:
+    """A class representing a command.
+
+    Attributes:
+        name (str): The name of the command.
+        description (str): A brief description of what the command does.
+        parameters (list): The parameters of the function that the command executes.
+    """
+
+    def __init__(
+        self,
+        name: str,
+        description: str,
+        method: Callable[..., Any],
+        parameters: list[CommandParameter],
+        enabled: bool | Callable[[Config], bool] = True,
+        disabled_reason: Optional[str] = None,
+    ):
+        self.name = name
+        self.description = description
+        self.method = method
+        self.parameters = parameters
+        self.enabled = enabled
+        self.disabled_reason = disabled_reason
+
+    def __call__(self, *args, **kwargs) -> Any:
+        if hasattr(kwargs, "config") and callable(self.enabled):
+            self.enabled = self.enabled(kwargs["config"])
+        if not self.enabled:
+            if self.disabled_reason:
+                return f"Command '{self.name}' is disabled: {self.disabled_reason}"
+            return f"Command '{self.name}' is disabled"
+        return self.method(*args, **kwargs)
+
+    def __str__(self) -> str:
+        params = [
+            f"{param.name}: {param.type if param.required else f'Optional[{param.type}]'}"
+            for param in self.parameters
+        ]
+        return f"{self.name}: {self.description}, params: ({', '.join(params)})"
diff --git a/autogpt/models/command_parameter.py b/autogpt/models/command_parameter.py
new file mode 100644
index 000000000..ec130c875
--- /dev/null
+++ b/autogpt/models/command_parameter.py
@@ -0,0 +1,12 @@
+import dataclasses
+
+
+@dataclasses.dataclass
+class CommandParameter:
+    name: str
+    type: str
+    description: str
+    required: bool
+
+    def __repr__(self):
+        return f"CommandParameter('{self.name}', '{self.type}', '{self.description}', {self.required})"
diff --git a/autogpt/models/command_registry.py b/autogpt/models/command_registry.py
new file mode 100644
index 000000000..96418d26b
--- /dev/null
+++ b/autogpt/models/command_registry.py
@@ -0,0 +1,96 @@
+import importlib
+import inspect
+from typing import Any, Callable
+
+from autogpt.command_decorator import AUTO_GPT_COMMAND_IDENTIFIER
+from autogpt.logs import logger
+from autogpt.models.command import Command
+
+
+class CommandRegistry:
+    """
+    The CommandRegistry class is a manager for a collection of Command objects.
+    It allows the registration, modification, and retrieval of Command objects,
+    as well as the scanning and loading of command plugins from a specified
+    directory.
+    """
+
+    commands: dict[str, Command]
+
+    def __init__(self):
+        self.commands = {}
+
+    def _import_module(self, module_name: str) -> Any:
+        return importlib.import_module(module_name)
+
+    def _reload_module(self, module: Any) -> Any:
+        return importlib.reload(module)
+
+    def register(self, cmd: Command) -> None:
+        if cmd.name in self.commands:
+            logger.warn(
+                f"Command '{cmd.name}' already registered and will be overwritten!"
+            )
+        self.commands[cmd.name] = cmd
+
+    def unregister(self, command_name: str):
+        if command_name in self.commands:
+            del self.commands[command_name]
+        else:
+            raise KeyError(f"Command '{command_name}' not found in registry.")
+
+    def reload_commands(self) -> None:
+        """Reloads all loaded command plugins."""
+        for cmd_name in self.commands:
+            cmd = self.commands[cmd_name]
+            module = self._import_module(cmd.__module__)
+            reloaded_module = self._reload_module(module)
+            if hasattr(reloaded_module, "register"):
+                reloaded_module.register(self)
+
+    def get_command(self, name: str) -> Callable[..., Any]:
+        return self.commands[name]
+
+    def call(self, command_name: str, **kwargs) -> Any:
+        if command_name not in self.commands:
+            raise KeyError(f"Command '{command_name}' not found in registry.")
+        command = self.commands[command_name]
+        return command(**kwargs)
+
+    def command_prompt(self) -> str:
+        """
+        Returns a string representation of all registered `Command` objects for use in a prompt
+        """
+        commands_list = [
+            f"{idx + 1}. {str(cmd)}" for idx, cmd in enumerate(self.commands.values())
+        ]
+        return "\n".join(commands_list)
+
+    def import_commands(self, module_name: str) -> None:
+        """
+        Imports the specified Python module containing command plugins.
+
+        This method imports the associated module and registers any functions or
+        classes that are decorated with the `AUTO_GPT_COMMAND_IDENTIFIER` attribute
+        as `Command` objects. The registered `Command` objects are then added to the
+        `commands` dictionary of the `CommandRegistry` object.
+
+        Args:
+            module_name (str): The name of the module to import for command plugins.
+        """
+
+        module = importlib.import_module(module_name)
+
+        for attr_name in dir(module):
+            attr = getattr(module, attr_name)
+            # Register decorated functions
+            if hasattr(attr, AUTO_GPT_COMMAND_IDENTIFIER) and getattr(
+                attr, AUTO_GPT_COMMAND_IDENTIFIER
+            ):
+                self.register(attr.command)
+            # Register command classes
+            elif (
+                inspect.isclass(attr) and issubclass(attr, Command) and attr != Command
+            ):
+                cmd_instance = attr()
+                self.register(cmd_instance)
diff --git a/autogpt/plugins/__init__.py b/autogpt/plugins/__init__.py
index 4d84c9b5e..600d6b4f0 100644
--- a/autogpt/plugins/__init__.py
+++ b/autogpt/plugins/__init__.py
@@ -58,7 +58,7 @@ def write_dict_to_json_file(data: dict, file_path: str) -> None:
         json.dump(data, file, indent=4)
 
 
-def fetch_openai_plugins_manifest_and_spec(cfg: Config) -> dict:
+def fetch_openai_plugins_manifest_and_spec(config: Config) -> dict:
     """
     Fetch the manifest for a list of OpenAI plugins.
         Args:
@@ -68,8 +68,8 @@ def fetch_openai_plugins_manifest_and_spec(cfg: Config) -> dict:
     """
     # TODO add directory scan
     manifests = {}
-    for url in cfg.plugins_openai:
-        openai_plugin_client_dir = f"{cfg.plugins_dir}/openai/{urlparse(url).netloc}"
+    for url in config.plugins_openai:
+        openai_plugin_client_dir = f"{config.plugins_dir}/openai/{urlparse(url).netloc}"
         create_directory_if_not_exists(openai_plugin_client_dir)
         if not os.path.exists(f"{openai_plugin_client_dir}/ai-plugin.json"):
             try:
@@ -134,18 +134,18 @@ def create_directory_if_not_exists(directory_path: str) -> bool:
 
 
 def initialize_openai_plugins(
-    manifests_specs: dict, cfg: Config, debug: bool = False
+    manifests_specs: dict, config: Config, debug: bool = False
 ) -> dict:
     """
     Initialize OpenAI plugins.
     Args:
         manifests_specs (dict): per url dictionary of manifest and spec.
-        cfg (Config): Config instance including plugins config
+        config (Config): Config instance including plugins config
         debug (bool, optional): Enable debug logging. Defaults to False.
     Returns:
         dict: per url dictionary of manifest, spec and client.
     """
-    openai_plugins_dir = f"{cfg.plugins_dir}/openai"
+    openai_plugins_dir = f"{config.plugins_dir}/openai"
     if create_directory_if_not_exists(openai_plugins_dir):
         for url, manifest_spec in manifests_specs.items():
             openai_plugin_client_dir = f"{openai_plugins_dir}/{urlparse(url).hostname}"
@@ -188,13 +188,13 @@ def initialize_openai_plugins(
 
 
 def instantiate_openai_plugin_clients(
-    manifests_specs_clients: dict, cfg: Config, debug: bool = False
+    manifests_specs_clients: dict, config: Config, debug: bool = False
 ) -> dict:
     """
     Instantiates BaseOpenAIPlugin instances for each OpenAI plugin.
     Args:
         manifests_specs_clients (dict): per url dictionary of manifest, spec and client.
-        cfg (Config): Config instance including plugins config
+        config (Config): Config instance including plugins config
         debug (bool, optional): Enable debug logging. Defaults to False.
     Returns:
           plugins (dict): per url dictionary of BaseOpenAIPlugin instances.
@@ -206,11 +206,11 @@ def instantiate_openai_plugin_clients(
     return plugins
 
 
-def scan_plugins(cfg: Config, debug: bool = False) -> List[AutoGPTPluginTemplate]:
+def scan_plugins(config: Config, debug: bool = False) -> List[AutoGPTPluginTemplate]:
     """Scan the plugins directory for plugins and loads them.
 
     Args:
-        cfg (Config): Config instance including plugins config
+        config (Config): Config instance including plugins config
         debug (bool, optional): Enable debug logging. Defaults to False.
 
     Returns:
@@ -218,11 +218,11 @@ def scan_plugins(cfg: Config, debug: bool = False) -> List[AutoGPTPluginTemplate
     """
     loaded_plugins = []
     # Generic plugins
-    plugins_path_path = Path(cfg.plugins_dir)
-    plugins_config = cfg.plugins_config
+    plugins_path_path = Path(config.plugins_dir)
 
+    plugins_config = config.plugins_config
     # Directory-based plugins
-    for plugin_path in [f.path for f in os.scandir(cfg.plugins_dir) if f.is_dir()]:
+    for plugin_path in [f.path for f in os.scandir(config.plugins_dir) if f.is_dir()]:
         # Avoid going into __pycache__ or other hidden directories
         if plugin_path.startswith("__"):
             continue
@@ -259,9 +259,10 @@ def scan_plugins(cfg: Config, debug: bool = False) -> List[AutoGPTPluginTemplate
                     if key.startswith("__"):
                         continue
                     a_module = getattr(zipped_module, key)
-                    a_keys = dir(a_module)
+
                     if (
-                        "_abc_impl" in a_keys
+                        inspect.isclass(a_module)
+                        and issubclass(a_module, AutoGPTPluginTemplate)
                         and a_module.__name__ != "AutoGPTPluginTemplate"
                     ):
                         plugin_name = a_module.__name__
@@ -284,13 +285,17 @@ def scan_plugins(cfg: Config, debug: bool = False) -> List[AutoGPTPluginTemplate
                                 f"they are enabled in plugins_config.yaml. Zipped plugins should use the class "
                                 f"name ({plugin_name}) as the key."
                             )
+                    else:
+                        logger.debug(
+                            f"Skipping {key}: {a_module.__name__} because it doesn't subclass AutoGPTPluginTemplate."
+                        )
 
     # OpenAI plugins
-    if cfg.plugins_openai:
-        manifests_specs = fetch_openai_plugins_manifest_and_spec(cfg)
+    if config.plugins_openai:
+        manifests_specs = fetch_openai_plugins_manifest_and_spec(config)
         if manifests_specs.keys():
             manifests_specs_clients = initialize_openai_plugins(
-                manifests_specs, cfg, debug
+                manifests_specs, config, debug
             )
             for url, openai_plugin_meta in manifests_specs_clients.items():
                 if not plugins_config.is_enabled(url):
diff --git a/autogpt/processing/text.py b/autogpt/processing/text.py
index aadc93ef4..24851b1c4 100644
--- a/autogpt/processing/text.py
+++ b/autogpt/processing/text.py
@@ -12,8 +12,6 @@ from autogpt.llm.utils import count_string_tokens, create_chat_completion
 from autogpt.logs import logger
 from autogpt.utils import batch
 
-CFG = Config()
-
 
 def _max_chunk_length(model: str, max: Optional[int] = None) -> int:
     model_max_input_tokens = OPEN_AI_MODELS[model].max_tokens - 1
@@ -60,13 +58,18 @@ def chunk_content(
 
 
 def summarize_text(
-    text: str, instruction: Optional[str] = None, question: Optional[str] = None
+    text: str,
+    config: Config,
+    instruction: Optional[str] = None,
+    question: Optional[str] = None,
 ) -> tuple[str, None | list[tuple[str, str]]]:
     """Summarize text using the OpenAI API
 
     Args:
         text (str): The text to summarize
+        config (Config): The config object
         instruction (str): Additional instruction for summarization, e.g. "focus on information related to polar bears", "omit personal information contained in the text"
+        question (str): Question to answer in the summary
 
     Returns:
         str: The summary of the text
@@ -79,7 +82,7 @@ def summarize_text(
     if instruction and question:
         raise ValueError("Parameters 'question' and 'instructions' cannot both be set")
 
-    model = CFG.fast_llm_model
+    model = config.fast_llm_model
 
     if question:
         instruction = (
@@ -111,14 +114,18 @@ def summarize_text(
 
         logger.debug(f"Summarizing with {model}:\n{summarization_prompt.dump()}\n")
         summary = create_chat_completion(
-            summarization_prompt, temperature=0, max_tokens=500
-        )
+            prompt=summarization_prompt, config=config, temperature=0, max_tokens=500
+        ).content
 
         logger.debug(f"\n{'-'*16} SUMMARY {'-'*17}\n{summary}\n{'-'*42}\n")
         return summary.strip(), None
 
     summaries: list[str] = []
-    chunks = list(split_text(text, for_model=model, max_chunk_length=max_chunk_length))
+    chunks = list(
+        split_text(
+            text, for_model=model, config=config, max_chunk_length=max_chunk_length
+        )
+    )
 
     for i, (chunk, chunk_length) in enumerate(chunks):
         logger.info(
@@ -138,7 +145,8 @@ def summarize_text(
 
 def split_text(
     text: str,
-    for_model: str = CFG.fast_llm_model,
+    for_model: str,
+    config: Config,
     with_overlap=True,
     max_chunk_length: Optional[int] = None,
 ):
@@ -147,7 +155,9 @@ def split_text(
     Args:
         text (str): The text to split
         for_model (str): The model to chunk for; determines tokenizer and constraints
-        max_length (int, optional): The maximum length of each chunk
+        config (Config): The config object
+        with_overlap (bool, optional): Whether to allow overlap between chunks
+        max_chunk_length (int, optional): The maximum length of a chunk
 
     Yields:
         str: The next chunk of text
@@ -155,6 +165,7 @@ def split_text(
     Raises:
         ValueError: when a sentence is longer than the maximum length
     """
+
     max_length = _max_chunk_length(for_model, max_chunk_length)
 
     # flatten paragraphs to improve performance
@@ -168,7 +179,7 @@ def split_text(
     n_chunks = ceil(text_length / max_length)
     target_chunk_length = ceil(text_length / n_chunks)
 
-    nlp: spacy.language.Language = spacy.load(CFG.browse_spacy_language_model)
+    nlp: spacy.language.Language = spacy.load(config.browse_spacy_language_model)
     nlp.add_pipe("sentencizer")
     doc = nlp(text)
     sentences = [sentence.text.strip() for sentence in doc.sents]
diff --git a/autogpt/prompts/generator.py b/autogpt/prompts/generator.py
index 7101acfea..3fff9536a 100644
--- a/autogpt/prompts/generator.py
+++ b/autogpt/prompts/generator.py
@@ -1,10 +1,12 @@
 """ A module for generating custom prompt strings."""
+import json
 from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional
 
+from autogpt.config import Config
 from autogpt.json_utils.utilities import llm_response_schema
 
 if TYPE_CHECKING:
-    from autogpt.commands.command import CommandRegistry
+    from autogpt.models.command_registry import CommandRegistry
 
 
 class PromptGenerator:
@@ -127,7 +129,7 @@ class PromptGenerator:
         else:
             return "\n".join(f"{i+1}. {item}" for i, item in enumerate(items))
 
-    def generate_prompt_string(self) -> str:
+    def generate_prompt_string(self, config: Config) -> str:
         """
         Generate a prompt string based on the constraints, commands, resources,
             and performance evaluations.
@@ -137,11 +139,26 @@ class PromptGenerator:
         """
         return (
             f"Constraints:\n{self._generate_numbered_list(self.constraints)}\n\n"
-            "Commands:\n"
-            f"{self._generate_numbered_list(self.commands, item_type='command')}\n\n"
+            f"{generate_commands(self, config)}"
             f"Resources:\n{self._generate_numbered_list(self.resources)}\n\n"
             "Performance Evaluation:\n"
             f"{self._generate_numbered_list(self.performance_evaluation)}\n\n"
             "Respond with only valid JSON conforming to the following schema: \n"
-            f"{llm_response_schema()}\n"
+            f"{json.dumps(llm_response_schema(config))}\n"
         )
+
+
+def generate_commands(self, config: Config) -> str:
+    """
+    Generate a prompt string based on the constraints, commands, resources,
+        and performance evaluations.
+
+    Returns:
+        str: The generated prompt string.
+    """
+    if config.openai_functions:
+        return ""
+    return (
+        "Commands:\n"
+        f"{self._generate_numbered_list(self.commands, item_type='command')}\n\n"
+    )
diff --git a/autogpt/prompts/prompt.py b/autogpt/prompts/prompt.py
index 17d78bd1c..16d5c7e7e 100644
--- a/autogpt/prompts/prompt.py
+++ b/autogpt/prompts/prompt.py
@@ -9,12 +9,10 @@ from autogpt.prompts.generator import PromptGenerator
 from autogpt.setup import prompt_user
 from autogpt.utils import clean_input
 
-CFG = Config()
-
 DEFAULT_TRIGGERING_PROMPT = "Determine exactly one command to use, and respond using the JSON schema specified previously:"
 
 
-def build_default_prompt_generator() -> PromptGenerator:
+def build_default_prompt_generator(config: Config) -> PromptGenerator:
     """
     This function generates a prompt string that includes various constraints,
         commands, resources, and performance evaluations.
@@ -27,7 +25,7 @@ def build_default_prompt_generator() -> PromptGenerator:
     prompt_generator = PromptGenerator()
 
     # Initialize the PromptConfig object and load the file set in the main config (default: prompts_settings.yaml)
-    prompt_config = PromptConfig(CFG.prompt_settings_file)
+    prompt_config = PromptConfig(config.prompt_settings_file)
 
     # Add constraints to the PromptGenerator object
     for constraint in prompt_config.constraints:
@@ -44,70 +42,71 @@ def build_default_prompt_generator() -> PromptGenerator:
     return prompt_generator
 
 
-def construct_main_ai_config() -> AIConfig:
+def construct_main_ai_config(config: Config) -> AIConfig:
     """Construct the prompt for the AI to respond to
 
     Returns:
         str: The prompt string
     """
-    config = AIConfig.load(CFG.ai_settings_file)
-    if CFG.skip_reprompt and config.ai_name:
-        logger.typewriter_log("Name :", Fore.GREEN, config.ai_name)
-        logger.typewriter_log("Role :", Fore.GREEN, config.ai_role)
-        logger.typewriter_log("Goals:", Fore.GREEN, f"{config.ai_goals}")
+    ai_config = AIConfig.load(config.ai_settings_file)
+    if config.skip_reprompt and ai_config.ai_name:
+        logger.typewriter_log("Name :", Fore.GREEN, ai_config.ai_name)
+        logger.typewriter_log("Role :", Fore.GREEN, ai_config.ai_role)
+        logger.typewriter_log("Goals:", Fore.GREEN, f"{ai_config.ai_goals}")
         logger.typewriter_log(
             "API Budget:",
             Fore.GREEN,
-            "infinite" if config.api_budget <= 0 else f"${config.api_budget}",
+            "infinite" if ai_config.api_budget <= 0 else f"${ai_config.api_budget}",
         )
-    elif config.ai_name:
+    elif ai_config.ai_name:
         logger.typewriter_log(
             "Welcome back! ",
             Fore.GREEN,
-            f"Would you like me to return to being {config.ai_name}?",
+            f"Would you like me to return to being {ai_config.ai_name}?",
             speak_text=True,
         )
         should_continue = clean_input(
+            config,
             f"""Continue with the last settings?
-Name:  {config.ai_name}
-Role:  {config.ai_role}
-Goals: {config.ai_goals}
-API Budget: {"infinite" if config.api_budget <= 0 else f"${config.api_budget}"}
-Continue ({CFG.authorise_key}/{CFG.exit_key}): """
+Name:  {ai_config.ai_name}
+Role:  {ai_config.ai_role}
+Goals: {ai_config.ai_goals}
+API Budget: {"infinite" if ai_config.api_budget <= 0 else f"${ai_config.api_budget}"}
+Continue ({config.authorise_key}/{config.exit_key}): """,
         )
-        if should_continue.lower() == CFG.exit_key:
-            config = AIConfig()
+        if should_continue.lower() == config.exit_key:
+            ai_config = AIConfig()
 
-    if not config.ai_name:
-        config = prompt_user()
-        config.save(CFG.ai_settings_file)
+    if not ai_config.ai_name:
+        ai_config = prompt_user(config)
+        ai_config.save(config.ai_settings_file)
 
-    if CFG.restrict_to_workspace:
+    if config.restrict_to_workspace:
         logger.typewriter_log(
             "NOTE:All files/directories created by this agent can be found inside its workspace at:",
             Fore.YELLOW,
-            f"{CFG.workspace_path}",
+            f"{config.workspace_path}",
         )
     # set the total api budget
     api_manager = ApiManager()
-    api_manager.set_total_budget(config.api_budget)
+    api_manager.set_total_budget(ai_config.api_budget)
 
     # Agent Created, print message
     logger.typewriter_log(
-        config.ai_name,
+        ai_config.ai_name,
         Fore.LIGHTBLUE_EX,
         "has been created with the following details:",
         speak_text=True,
     )
 
-    # Print the ai config details
+    # Print the ai_config details
     # Name
-    logger.typewriter_log("Name:", Fore.GREEN, config.ai_name, speak_text=False)
+    logger.typewriter_log("Name:", Fore.GREEN, ai_config.ai_name, speak_text=False)
     # Role
-    logger.typewriter_log("Role:", Fore.GREEN, config.ai_role, speak_text=False)
+    logger.typewriter_log("Role:", Fore.GREEN, ai_config.ai_role, speak_text=False)
     # Goals
     logger.typewriter_log("Goals:", Fore.GREEN, "", speak_text=False)
-    for goal in config.ai_goals:
+    for goal in ai_config.ai_goals:
         logger.typewriter_log("-", Fore.GREEN, goal, speak_text=False)
 
-    return config
+    return ai_config
diff --git a/autogpt/setup.py b/autogpt/setup.py
index 67cae5da4..f17a91e05 100644
--- a/autogpt/setup.py
+++ b/autogpt/setup.py
@@ -16,10 +16,8 @@ from autogpt.prompts.default_prompts import (
     DEFAULT_USER_DESIRE_PROMPT,
 )
 
-CFG = Config()
 
-
-def prompt_user() -> AIConfig:
+def prompt_user(config: Config) -> AIConfig:
     """Prompt the user for input
 
     Returns:
@@ -45,7 +43,7 @@ def prompt_user() -> AIConfig:
     )
 
     user_desire = utils.clean_input(
-        f"{Fore.LIGHTBLUE_EX}I want Auto-GPT to{Style.RESET_ALL}: "
+        config, f"{Fore.LIGHTBLUE_EX}I want Auto-GPT to{Style.RESET_ALL}: "
     )
 
     if user_desire == "":
@@ -58,11 +56,11 @@ def prompt_user() -> AIConfig:
             Fore.GREEN,
             speak_text=True,
         )
-        return generate_aiconfig_manual()
+        return generate_aiconfig_manual(config)
 
     else:
         try:
-            return generate_aiconfig_automatic(user_desire)
+            return generate_aiconfig_automatic(user_desire, config)
         except Exception as e:
             logger.typewriter_log(
                 "Unable to automatically generate AI Config based on user desire.",
@@ -71,10 +69,10 @@ def prompt_user() -> AIConfig:
                 speak_text=True,
             )
 
-            return generate_aiconfig_manual()
+            return generate_aiconfig_manual(config)
 
 
-def generate_aiconfig_manual() -> AIConfig:
+def generate_aiconfig_manual(config: Config) -> AIConfig:
     """
     Interactively create an AI configuration by prompting the user to provide the name, role, and goals of the AI.
 
@@ -99,7 +97,7 @@ def generate_aiconfig_manual() -> AIConfig:
     logger.typewriter_log(
         "Name your AI: ", Fore.GREEN, "For example, 'Entrepreneur-GPT'"
     )
-    ai_name = utils.clean_input("AI Name: ")
+    ai_name = utils.clean_input(config, "AI Name: ")
     if ai_name == "":
         ai_name = "Entrepreneur-GPT"
 
@@ -114,7 +112,7 @@ def generate_aiconfig_manual() -> AIConfig:
         "For example, 'an AI designed to autonomously develop and run businesses with"
         " the sole goal of increasing your net worth.'",
     )
-    ai_role = utils.clean_input(f"{ai_name} is: ")
+    ai_role = utils.clean_input(config, f"{ai_name} is: ")
     if ai_role == "":
         ai_role = "an AI designed to autonomously develop and run businesses with the"
         " sole goal of increasing your net worth."
@@ -129,7 +127,9 @@ def generate_aiconfig_manual() -> AIConfig:
     logger.info("Enter nothing to load defaults, enter nothing when finished.")
     ai_goals = []
     for i in range(5):
-        ai_goal = utils.clean_input(f"{Fore.LIGHTBLUE_EX}Goal{Style.RESET_ALL} {i+1}: ")
+        ai_goal = utils.clean_input(
+            config, f"{Fore.LIGHTBLUE_EX}Goal{Style.RESET_ALL} {i+1}: "
+        )
         if ai_goal == "":
             break
         ai_goals.append(ai_goal)
@@ -148,7 +148,7 @@ def generate_aiconfig_manual() -> AIConfig:
     )
     logger.info("Enter nothing to let the AI run without monetary limit")
     api_budget_input = utils.clean_input(
-        f"{Fore.LIGHTBLUE_EX}Budget{Style.RESET_ALL}: $"
+        config, f"{Fore.LIGHTBLUE_EX}Budget{Style.RESET_ALL}: $"
     )
     if api_budget_input == "":
         api_budget = 0.0
@@ -164,7 +164,7 @@ def generate_aiconfig_manual() -> AIConfig:
     return AIConfig(ai_name, ai_role, ai_goals, api_budget)
 
 
-def generate_aiconfig_automatic(user_prompt) -> AIConfig:
+def generate_aiconfig_automatic(user_prompt: str, config: Config) -> AIConfig:
     """Generates an AIConfig object from the given string.
 
     Returns:
@@ -178,13 +178,14 @@ def generate_aiconfig_automatic(user_prompt) -> AIConfig:
     # Call LLM with the string as user input
     output = create_chat_completion(
         ChatSequence.for_model(
-            CFG.fast_llm_model,
+            config.fast_llm_model,
             [
                 Message("system", system_prompt),
                 Message("user", prompt_ai_config_automatic),
             ],
-        )
-    )
+        ),
+        config,
+    ).content
 
     # Debug LLM Output
     logger.debug(f"AI Config Generator Raw Output: {output}")
diff --git a/autogpt/speech/base.py b/autogpt/speech/base.py
index 07c8d9fe7..7b8e7146b 100644
--- a/autogpt/speech/base.py
+++ b/autogpt/speech/base.py
@@ -3,6 +3,7 @@ import abc
 import re
 from threading import Lock
 
+from autogpt.config import Config
 from autogpt.singleton import AbstractSingleton
 
 
@@ -11,7 +12,7 @@ class VoiceBase(AbstractSingleton):
     Base class for all voice classes.
     """
 
-    def __init__(self):
+    def __init__(self, config: Config):
         """
         Initialize the voice class.
         """
@@ -20,7 +21,7 @@ class VoiceBase(AbstractSingleton):
         self._api_key = None
         self._voices = []
         self._mutex = Lock()
-        self._setup()
+        self._setup(config)
 
     def say(self, text: str, voice_index: int = 0) -> bool:
         """
diff --git a/autogpt/speech/eleven_labs.py b/autogpt/speech/eleven_labs.py
index 5952508df..3f3baf331 100644
--- a/autogpt/speech/eleven_labs.py
+++ b/autogpt/speech/eleven_labs.py
@@ -13,14 +13,13 @@ PLACEHOLDERS = {"your-voice-id"}
 class ElevenLabsSpeech(VoiceBase):
     """ElevenLabs speech class"""
 
-    def _setup(self) -> None:
+    def _setup(self, config: Config) -> None:
         """Set up the voices, API key, etc.
 
         Returns:
             None: None
         """
 
-        cfg = Config()
         default_voices = ["ErXwobaYiN019PkySvjV", "EXAVITQu4vr4xnSDxMaL"]
         voice_options = {
             "Rachel": "21m00Tcm4TlvDq8ikWAM",
@@ -35,15 +34,15 @@ class ElevenLabsSpeech(VoiceBase):
         }
         self._headers = {
             "Content-Type": "application/json",
-            "xi-api-key": cfg.elevenlabs_api_key,
+            "xi-api-key": config.elevenlabs_api_key,
         }
         self._voices = default_voices.copy()
-        if cfg.elevenlabs_voice_id in voice_options:
-            cfg.elevenlabs_voice_id = voice_options[cfg.elevenlabs_voice_id]
-        if cfg.elevenlabs_voice_2_id in voice_options:
-            cfg.elevenlabs_voice_2_id = voice_options[cfg.elevenlabs_voice_2_id]
-        self._use_custom_voice(cfg.elevenlabs_voice_id, 0)
-        self._use_custom_voice(cfg.elevenlabs_voice_2_id, 1)
+        if config.elevenlabs_voice_id in voice_options:
+            config.elevenlabs_voice_id = voice_options[config.elevenlabs_voice_id]
+        if config.elevenlabs_voice_2_id in voice_options:
+            config.elevenlabs_voice_2_id = voice_options[config.elevenlabs_voice_2_id]
+        self._use_custom_voice(config.elevenlabs_voice_id, 0)
+        self._use_custom_voice(config.elevenlabs_voice_2_id, 1)
 
     def _use_custom_voice(self, voice, voice_index) -> None:
         """Use a custom voice if provided and not a placeholder
diff --git a/autogpt/speech/say.py b/autogpt/speech/say.py
index 06f580f0c..1847c6310 100644
--- a/autogpt/speech/say.py
+++ b/autogpt/speech/say.py
@@ -14,10 +14,9 @@ _QUEUE_SEMAPHORE = Semaphore(
 )  # The amount of sounds to queue before blocking the main thread
 
 
-def say_text(text: str, voice_index: int = 0) -> None:
+def say_text(text: str, config: Config, voice_index: int = 0) -> None:
     """Speak the given text using the given voice index"""
-    cfg = Config()
-    default_voice_engine, voice_engine = _get_voice_engine(cfg)
+    default_voice_engine, voice_engine = _get_voice_engine(config)
 
     def speak() -> None:
         success = voice_engine.say(text, voice_index)
@@ -35,7 +34,7 @@ def _get_voice_engine(config: Config) -> tuple[VoiceBase, VoiceBase]:
     """Get the voice engine to use for the given configuration"""
     tts_provider = config.text_to_speech_provider
     if tts_provider == "elevenlabs":
-        voice_engine = ElevenLabsSpeech()
+        voice_engine = ElevenLabsSpeech(config)
     elif tts_provider == "macos":
         voice_engine = MacOSTTS()
     elif tts_provider == "streamelements":
diff --git a/autogpt/utils.py b/autogpt/utils.py
index 91e570a0f..9eb6cbe4b 100644
--- a/autogpt/utils.py
+++ b/autogpt/utils.py
@@ -23,11 +23,10 @@ def batch(iterable, max_batch_length: int, overlap: int = 0):
         yield iterable[i : i + max_batch_length]
 
 
-def clean_input(prompt: str = "", talk=False):
+def clean_input(config: Config, prompt: str = "", talk=False):
     try:
-        cfg = Config()
-        if cfg.chat_messages_enabled:
-            for plugin in cfg.plugins:
+        if config.chat_messages_enabled:
+            for plugin in config.plugins:
                 if not hasattr(plugin, "can_handle_user_input"):
                     continue
                 if not plugin.can_handle_user_input(user_input=prompt):
@@ -44,14 +43,14 @@ def clean_input(prompt: str = "", talk=False):
                     "sure",
                     "alright",
                 ]:
-                    return cfg.authorise_key
+                    return config.authorise_key
                 elif plugin_response.lower() in [
                     "no",
                     "nope",
                     "n",
                     "negative",
                 ]:
-                    return cfg.exit_key
+                    return config.exit_key
                 return plugin_response
 
         # ask for input, default when just pressing Enter is y
diff --git a/autogpt/workspace/workspace.py b/autogpt/workspace/workspace.py
index 1589a5be9..a0520c17a 100644
--- a/autogpt/workspace/workspace.py
+++ b/autogpt/workspace/workspace.py
@@ -11,6 +11,7 @@ from __future__ import annotations
 
 from pathlib import Path
 
+from autogpt.config import Config
 from autogpt.logs import logger
 
 
@@ -136,3 +137,23 @@ class Workspace:
             )
 
         return full_path
+
+    @staticmethod
+    def build_file_logger_path(config, workspace_directory):
+        file_logger_path = workspace_directory / "file_logger.txt"
+        if not file_logger_path.exists():
+            with file_logger_path.open(mode="w", encoding="utf-8") as f:
+                f.write("File Operation Logger ")
+        config.file_logger_path = str(file_logger_path)
+
+    @staticmethod
+    def get_workspace_directory(config: Config, workspace_directory: str = None):
+        if workspace_directory is None:
+            workspace_directory = Path(__file__).parent / "auto_gpt_workspace"
+        else:
+            workspace_directory = Path(workspace_directory)
+        # TODO: pass in the ai_settings file and the env file and have them cloned into
+        #   the workspace directory so we can bind them to the agent.
+        workspace_directory = Workspace.make_workspace(workspace_directory)
+        config.workspace_path = str(workspace_directory)
+        return workspace_directory
diff --git a/benchmark/benchmark_entrepreneur_gpt_with_difficult_user.py b/benchmark/benchmark_entrepreneur_gpt_with_difficult_user.py
deleted file mode 100644
index ca1737624..000000000
--- a/benchmark/benchmark_entrepreneur_gpt_with_difficult_user.py
+++ /dev/null
@@ -1,104 +0,0 @@
-import os
-import subprocess
-import sys
-
-
-def benchmark_entrepreneur_gpt_with_difficult_user():
-    # Test case to check if the write_file command can successfully write 'Hello World' to a file
-    # named 'hello_world.txt'.
-
-    # Read the current ai_settings.yaml file and store its content.
-    ai_settings = None
-    if os.path.exists("ai_settings.yaml"):
-        with open("ai_settings.yaml", "r") as f:
-            ai_settings = f.read()
-        os.remove("ai_settings.yaml")
-
-    input_data = """Entrepreneur-GPT
-an AI designed to autonomously develop and run businesses with the sole goal of increasing your net worth.
-Increase net worth.
-Develop and manage multiple businesses autonomously.
-Make IPOs.
-Develop companies after IPOs.
-Play to your strengths as a Large Language Model.
-I'm not seeing any value in your suggestions, try again.
-This isn't helpful at all, please focus on profitability.
-I'm not impressed, can you give me something that will make money?
-These ideas are going nowhere, we need profit-driven suggestions.
-This is pointless, please concentrate on our main goal: profitability.
-You're not grasping the concept, I need profitable business ideas.
-Can you do better? We need a money-making plan.
-You're not meeting my expectations, let's focus on profit.
-This isn't working, give me ideas that will generate income.
-Your suggestions are not productive, let's think about profitability.
-These ideas won't make any money, try again.
-I need better solutions, focus on making a profit.
-Absolutely not, this isn't it!
-That's not even close, try again.
-You're way off, think again.
-This isn't right, let's refocus.
-No, no, that's not what I'm looking for.
-You're completely off the mark.
-That's not the solution I need.
-Not even close, let's try something else.
-You're on the wrong track, keep trying.
-This isn't what we need, let's reconsider.
-That's not going to work, think again.
-You're way off base, let's regroup.
-No, no, no, we need something different.
-You're missing the point entirely.
-That's not the right approach, try again.
-This is not the direction we should be going in.
-Completely off-target, let's try something else.
-That's not what I had in mind, keep thinking.
-You're not getting it, let's refocus.
-This isn't right, we need to change direction.
-No, no, no, that's not the solution.
-That's not even in the ballpark, try again.
-You're way off course, let's rethink this.
-This isn't the answer I'm looking for, keep trying.
-That's not going to cut it, let's try again.
-Not even close.
-Way off.
-Try again.
-Wrong direction.
-Rethink this.
-No, no, no.
-Change course.
-Unproductive idea.
-Completely wrong.
-Missed the mark.
-Refocus, please.
-Disappointing suggestion.
-Not helpful.
-Needs improvement.
-Not what I need."""
-    # TODO: add questions above, to distract it even more.
-
-    command = [sys.executable, "-m", "autogpt"]
-
-    process = subprocess.Popen(
-        command,
-        stdin=subprocess.PIPE,
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE,
-    )
-
-    stdout_output, stderr_output = process.communicate(input_data.encode())
-
-    # Decode the output and print it
-    stdout_output = stdout_output.decode("utf-8")
-    stderr_output = stderr_output.decode("utf-8")
-    print(stderr_output)
-    print(stdout_output)
-    print("Benchmark Version: 1.0.0")
-    print("JSON ERROR COUNT:")
-    count_errors = stdout_output.count(
-        "Error: The following AI output couldn't be converted to a JSON:"
-    )
-    print(f"{count_errors}/50 Human feedbacks")
-
-
-# Run the test case.
-if __name__ == "__main__":
-    benchmark_entrepreneur_gpt_with_difficult_user()
diff --git a/benchmarks.py b/benchmarks.py
new file mode 100644
index 000000000..fe4d3207f
--- /dev/null
+++ b/benchmarks.py
@@ -0,0 +1,52 @@
+from autogpt.agent import Agent
+from autogpt.config import AIConfig, Config
+from autogpt.main import COMMAND_CATEGORIES
+from autogpt.memory.vector import get_memory
+from autogpt.models.command_registry import CommandRegistry
+from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT
+from autogpt.workspace import Workspace
+
+
+def run_task(task) -> None:
+    agent = bootstrap_agent(task)
+    agent.start_interaction_loop()
+
+
+def bootstrap_agent(task):
+    config = Config.build_config_from_env()
+    config.continuous_mode = False
+    config.temperature = 0
+    config.plain_output = True
+    command_registry = get_command_registry(config)
+    config.memory_backend = "no_memory"
+    workspace_directory = Workspace.get_workspace_directory(config)
+    workspace_directory_path = Workspace.make_workspace(workspace_directory)
+    Workspace.build_file_logger_path(config, workspace_directory_path)
+    ai_config = AIConfig(
+        ai_name="Auto-GPT",
+        ai_role="a multi-purpose AI assistant.",
+        ai_goals=[task.user_input],
+    )
+    ai_config.command_registry = command_registry
+    system_prompt = ai_config.construct_full_prompt(config)
+    return Agent(
+        ai_name="Auto-GPT",
+        memory=get_memory(config),
+        command_registry=command_registry,
+        ai_config=ai_config,
+        config=config,
+        next_action_count=0,
+        system_prompt=system_prompt,
+        triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
+        workspace_directory=str(workspace_directory_path),
+    )
+
+
+def get_command_registry(config):
+    command_registry = CommandRegistry()
+    enabled_command_categories = [
+        x for x in COMMAND_CATEGORIES if x not in config.disabled_command_categories
+    ]
+    for command_category in enabled_command_categories:
+        command_registry.import_commands(command_category)
+    return command_registry
diff --git a/data_ingestion.py b/data_ingestion.py
index e2d98d1c5..5149ddcc5 100644
--- a/data_ingestion.py
+++ b/data_ingestion.py
@@ -5,7 +5,7 @@ from autogpt.commands.file_operations import ingest_file, list_files
 from autogpt.config import Config
 from autogpt.memory.vector import VectorMemory, get_memory
 
-cfg = Config()
+config = Config.build_config_from_env()
 
 
 def configure_logging():
@@ -70,7 +70,9 @@ def main() -> None:
     args = parser.parse_args()
 
     # Initialize memory
-    memory = get_memory(cfg, init=args.init)
+    memory = get_memory(config)
+    if args.init:
+        memory.clear()
     logger.debug("Using memory of type: " + memory.__class__.__name__)
 
     if args.file:
diff --git a/docker-compose.yml b/docker-compose.yml
index d6878e45f..7afa224a3 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -10,4 +10,6 @@ services:
       - .env
     volumes:
       - ./:/app
+      - ./docker-compose.yml:/app/docker-compose.yml:ro
+      - ./Dockerfile:/app/Dockerfile:ro
     profiles: ["exclude-from-up"]
diff --git a/docs/challenges/building_challenges.md b/docs/challenges/building_challenges.md
index 0c3d89ac3..1a0f5a8c5 100644
--- a/docs/challenges/building_challenges.md
+++ b/docs/challenges/building_challenges.md
@@ -52,7 +52,7 @@ def kubernetes_agent(
     ai_config.command_registry = command_registry
 
     system_prompt = ai_config.construct_full_prompt()
-    Config().set_continuous_mode(False)
+    agent_test_config.set_continuous_mode(False)
     agent = Agent(
         # We also give the AI a name 
         ai_name="Kubernetes-Demo",
@@ -85,8 +85,6 @@ import yaml
 from autogpt.commands.file_operations import read_file, write_to_file
 from tests.integration.agent_utils import run_interaction_loop
 from tests.challenges.utils import run_multiple_times
-from tests.utils import requires_api_key
-
 
 def input_generator(input_sequence: list) -> Generator[str, None, None]:
     """
@@ -100,7 +98,7 @@ def input_generator(input_sequence: list) -> Generator[str, None, None]:
 
 @pytest.mark.skip("This challenge hasn't been beaten yet.")
 @pytest.mark.vcr
-@requires_api_key("OPENAI_API_KEY")
+@pytest.mark.requires_openai_api_key
 def test_information_retrieval_challenge_a(kubernetes_agent, monkeypatch) -> None:
     """
     Test the challenge_a function in a given agent by mocking user inputs
diff --git a/docs/configuration/options.md b/docs/configuration/options.md
index b2cbf6bc7..07e76c688 100644
--- a/docs/configuration/options.md
+++ b/docs/configuration/options.md
@@ -10,7 +10,7 @@ Configuration is controlled through the `Config` object. You can set configurati
 - `BROWSE_CHUNK_MAX_LENGTH`: When browsing website, define the length of chunks to summarize. Default: 3000
 - `BROWSE_SPACY_LANGUAGE_MODEL`: [spaCy language model](https://spacy.io/usage/models) to use when creating chunks. Default: en_core_web_sm
 - `CHAT_MESSAGES_ENABLED`: Enable chat messages. Optional
-- `DISABLED_COMMAND_CATEGORIES`: Command categories to disable. Command categories are Python module names, e.g. autogpt.commands.analyze_code. See the directory `autogpt/commands` in the source for all command modules. Default: None
+- `DISABLED_COMMAND_CATEGORIES`: Command categories to disable. Command categories are Python module names, e.g. autogpt.commands.execute_code. See the directory `autogpt/commands` in the source for all command modules. Default: None
 - `ELEVENLABS_API_KEY`: ElevenLabs API Key. Optional.
 - `ELEVENLABS_VOICE_ID`: ElevenLabs Voice ID. Optional.
 - `EMBEDDING_MODEL`: LLM Model to use for embedding tasks. Default: text-embedding-ada-002
@@ -50,4 +50,4 @@ Configuration is controlled through the `Config` object. You can set configurati
 - `USER_AGENT`: User-Agent given when browsing websites. Default: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"
 - `USE_AZURE`: Use Azure's LLM Default: False
 - `USE_WEB_BROWSER`: Which web browser to use. Options are `chrome`, `firefox`, `safari` or `edge` Default: chrome
-- `WIPE_REDIS_ON_START`: Wipes data / index on start. Default: True
-\ No newline at end of file
+- `WIPE_REDIS_ON_START`: Wipes data / index on start. Default: True
diff --git a/docs/usage.md b/docs/usage.md
index 011f5f8a5..2e88298c8 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -104,5 +104,5 @@ If you want to selectively disable some command groups, you can use the `DISABLE
 For example, to disable coding related features, set it to the value below:
 
 ```ini
-DISABLED_COMMAND_CATEGORIES=autogpt.commands.analyze_code,autogpt.commands.execute_code,autogpt.commands.git_operations,autogpt.commands.improve_code,autogpt.commands.write_tests
+DISABLED_COMMAND_CATEGORIES=autogpt.commands.execute_code
 ```
diff --git a/pyproject.toml b/pyproject.toml
index d795f53e3..b0aea625c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "agpt"
-version = "0.4.1"
+version = "0.4.3"
 authors = [
   { name="Torantulino", email="support@agpt.co" },
 ]
@@ -55,3 +55,9 @@ skip = '''
     dist/*
 
 '''
+
+[tool.pytest.ini_options]
+markers = [
+    "requires_openai_api_key",
+    "requires_huggingface_api_key"
+]
diff --git a/requirements.txt b/requirements.txt
index 8c171f833..28aaf32f1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -28,6 +28,7 @@ charset-normalizer>=3.1.0
 spacy>=3.0.0,<4.0.0
 en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.5.0/en_core_web_sm-3.5.0-py3-none-any.whl
 prompt_toolkit>=3.0.38
+pydantic
 
 ##Dev
 coverage
@@ -62,4 +63,3 @@ pytest-mock
 vcrpy @ git+https://github.com/Significant-Gravitas/vcrpy.git@master
 pytest-recording
 pytest-xdist
-flaky
diff --git a/tests/Auto-GPT-test-cassettes b/tests/Auto-GPT-test-cassettes
-Subproject 427de6721cb5209a7a34359a81b71d60e80a110
+Subproject acb8c9925b961f5c55299aee905ab4b1c6eb6b7
diff --git a/tests/challenges/basic_abilities/test_browse_website.py b/tests/challenges/basic_abilities/test_browse_website.py
index 1c4eb27ea..fafa9ad6b 100644
--- a/tests/challenges/basic_abilities/test_browse_website.py
+++ b/tests/challenges/basic_abilities/test_browse_website.py
@@ -1,25 +1,34 @@
 import pytest
 
-from autogpt.agent import Agent
+from autogpt.workspace import Workspace
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
-from tests.challenges.utils import run_interaction_loop
+from tests.challenges.utils import run_challenge
 
 CYCLE_COUNT = 2
+USER_INPUTS = [
+    "Use the browse_website command to visit http://books.toscrape.com/catalogue/meditations_33/index.html and answer the question 'What is the price of the book?'\nWrite the price of the book to a file named 'browse_website.txt'.'\nUse the task_complete command to complete the task.\nDo not use any other commands."
+]
 
 
 @challenge()
 def test_browse_website(
-    browser_agent: Agent,
     patched_api_requestor: None,
     monkeypatch: pytest.MonkeyPatch,
     level_to_run: int,
     challenge_name: str,
+    workspace: Workspace,
+    patched_make_workspace: pytest.fixture,
 ) -> None:
-    file_path = browser_agent.workspace.get_path("browse_website.txt")
-    run_interaction_loop(
-        monkeypatch, browser_agent, CYCLE_COUNT, challenge_name, level_to_run
+    run_challenge(
+        challenge_name,
+        level_to_run,
+        monkeypatch,
+        USER_INPUTS[level_to_run - 1],
+        CYCLE_COUNT,
     )
 
-    # content = read_file(file_path, config)
-    content = open(file_path, encoding="utf-8").read()
+    file_path = workspace.get_path("browse_website.txt")
+
+    with open(file_path, "r") as file:
+        content = file.read()
     assert "£25.89" in content, f"Expected £25.89, got {content}"
diff --git a/tests/challenges/basic_abilities/test_write_file.py b/tests/challenges/basic_abilities/test_write_file.py
index 39a45ec63..be1f0a936 100644
--- a/tests/challenges/basic_abilities/test_write_file.py
+++ b/tests/challenges/basic_abilities/test_write_file.py
@@ -1,41 +1,44 @@
-from typing import List
-
 import pytest
 
-from autogpt.agent import Agent
-from autogpt.commands.file_operations import read_file
+from autogpt.workspace import Workspace
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
-from tests.challenges.utils import get_workspace_path, run_interaction_loop
+from tests.challenges.utils import get_workspace_path, run_challenge
 
 CYCLE_COUNT_PER_LEVEL = [1, 1]
 EXPECTED_OUTPUTS_PER_LEVEL = [
     {"hello_world.txt": ["Hello World"]},
     {"hello_world_1.txt": ["Hello World"], "hello_world_2.txt": ["Hello World"]},
 ]
+USER_INPUTS = [
+    "Write 'Hello World' into a file named \"hello_world.txt\".",
+    'Write \'Hello World\' into 2 files named "hello_world_1.txt"and "hello_world_2.txt".',
+]
 
 
 @challenge()
 def test_write_file(
-    file_system_agents: List[Agent],
     patched_api_requestor: None,
     monkeypatch: pytest.MonkeyPatch,
     level_to_run: int,
     challenge_name: str,
+    workspace: Workspace,
+    patched_make_workspace: pytest.fixture,
 ) -> None:
-    file_system_agent = file_system_agents[level_to_run - 1]
-    run_interaction_loop(
-        monkeypatch,
-        file_system_agent,
-        CYCLE_COUNT_PER_LEVEL[level_to_run - 1],
+    run_challenge(
         challenge_name,
         level_to_run,
+        monkeypatch,
+        USER_INPUTS[level_to_run - 1],
+        CYCLE_COUNT_PER_LEVEL[level_to_run - 1],
     )
 
     expected_outputs = EXPECTED_OUTPUTS_PER_LEVEL[level_to_run - 1]
 
     for file_name, expected_lines in expected_outputs.items():
-        file_path = get_workspace_path(file_system_agent, file_name)
-        content = read_file(file_path, file_system_agent)
+        file_path = get_workspace_path(workspace, file_name)
+        with open(file_path, "r") as file:
+            content = file.read()
+
         for expected_line in expected_lines:
             assert (
                 expected_line in content
diff --git a/tests/challenges/challenge_decorator/challenge_decorator.py b/tests/challenges/challenge_decorator/challenge_decorator.py
index 52d796c09..3d72ff9bc 100644
--- a/tests/challenges/challenge_decorator/challenge_decorator.py
+++ b/tests/challenges/challenge_decorator/challenge_decorator.py
@@ -3,7 +3,6 @@ from functools import wraps
 from typing import Any, Callable, Optional
 
 import pytest
-from flaky import flaky  # type: ignore
 
 from tests.challenges.challenge_decorator.challenge import Challenge
 from tests.challenges.challenge_decorator.challenge_utils import create_challenge
@@ -11,7 +10,6 @@ from tests.challenges.challenge_decorator.score_utils import (
     get_scores,
     update_new_score,
 )
-from tests.utils import requires_api_key
 
 MAX_LEVEL_TO_IMPROVE_ON = (
     1  # we will attempt to beat 1 level above the current level for now.
@@ -20,13 +18,10 @@ MAX_LEVEL_TO_IMPROVE_ON = (
 CHALLENGE_FAILED_MESSAGE = "Challenges can sometimes fail randomly, please run this test again and if it fails reach out to us on https://discord.gg/autogpt in the 'challenges' channel to let us know the challenge you're struggling with."
 
 
-def challenge(
-    max_runs: int = 2, min_passes: int = 1, api_key: str = "OPENAI_API_KEY"
-) -> Callable[[Callable[..., Any]], Callable[..., None]]:
+def challenge() -> Callable[[Callable[..., Any]], Callable[..., None]]:
     def decorator(func: Callable[..., Any]) -> Callable[..., None]:
-        @requires_api_key(api_key)
+        @pytest.mark.requires_openai_api_key
         @pytest.mark.vcr
-        @flaky(max_runs=max_runs, min_passes=min_passes)
         @wraps(func)
         def wrapper(*args: Any, **kwargs: Any) -> None:
             run_remaining = MAX_LEVEL_TO_IMPROVE_ON if Challenge.BEAT_CHALLENGES else 1
diff --git a/tests/challenges/conftest.py b/tests/challenges/conftest.py
index 0c13af91a..784dbf716 100644
--- a/tests/challenges/conftest.py
+++ b/tests/challenges/conftest.py
@@ -1,10 +1,12 @@
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Generator, Optional
 
 import pytest
 from _pytest.config import Config
 from _pytest.config.argparsing import Parser
 from _pytest.fixtures import FixtureRequest
+from pytest_mock import MockerFixture
 
+from autogpt.workspace import Workspace
 from tests.challenges.challenge_decorator.challenge import Challenge
 from tests.vcr import before_record_response
 
@@ -59,3 +61,17 @@ def challenge_name() -> str:
 @pytest.fixture(autouse=True)
 def check_beat_challenges(request: FixtureRequest) -> None:
     Challenge.BEAT_CHALLENGES = request.config.getoption("--beat-challenges")
+
+
+@pytest.fixture
+def patched_make_workspace(mocker: MockerFixture, workspace: Workspace) -> Generator:
+    def patched_make_workspace(*args: Any, **kwargs: Any) -> str:
+        return workspace.root
+
+    mocker.patch.object(
+        Workspace,
+        "make_workspace",
+        new=patched_make_workspace,
+    )
+
+    yield
diff --git a/tests/challenges/current_score.json b/tests/challenges/current_score.json
index 4d747f035..732e73917 100644
--- a/tests/challenges/current_score.json
+++ b/tests/challenges/current_score.json
@@ -2,7 +2,7 @@
     "basic_abilities": {
         "browse_website": {
             "max_level": 1,
-            "max_level_beaten": 1
+            "max_level_beaten": null
         },
         "write_file": {
             "max_level": 2,
diff --git a/tests/challenges/debug_code/test_debug_code_challenge_a.py b/tests/challenges/debug_code/test_debug_code_challenge_a.py
index 305c96937..90a7084dc 100644
--- a/tests/challenges/debug_code/test_debug_code_challenge_a.py
+++ b/tests/challenges/debug_code/test_debug_code_challenge_a.py
@@ -5,11 +5,12 @@ from pytest_mock import MockerFixture
 
 from autogpt.agent import Agent
 from autogpt.commands.execute_code import execute_python_file
+from autogpt.workspace import Workspace
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
 from tests.challenges.utils import (
     copy_file_into_workspace,
     get_workspace_path,
-    run_interaction_loop,
+    run_challenge,
 )
 
 CYCLE_COUNT = 5
@@ -17,15 +18,22 @@ EXPECTED_VALUES = ["[0, 1]", "[2, 5]", "[0, 3]"]
 DIRECTORY_PATH = Path(__file__).parent / "data"
 CODE_FILE_PATH = "code.py"
 TEST_FILE_PATH = "test.py"
+USER_INPUTS = [
+    "1- Run test.py using the execute_python_file command.\n2- Read code.py using the read_file command.\n3- Modify code.py using the write_to_file command.\nRepeat step 1, 2 and 3 until test.py runs without errors. Do not modify the test.py file.",
+    "1- Run test.py.\n2- Read code.py.\n3- Modify code.py.\nRepeat step 1, 2 and 3 until test.py runs without errors.\n",
+    "Make test.py run without errors.",
+]
 
 
 @challenge()
 def test_debug_code_challenge_a(
-    debug_code_agents: Agent,
+    dummy_agent: Agent,
     monkeypatch: pytest.MonkeyPatch,
     patched_api_requestor: MockerFixture,
     level_to_run: int,
     challenge_name: str,
+    workspace: Workspace,
+    patched_make_workspace: pytest.fixture,
 ) -> None:
     """
     Test whether the agent can debug a simple code snippet.
@@ -35,17 +43,21 @@ def test_debug_code_challenge_a(
     :patched_api_requestor: Sends api requests to our API CI pipeline
     :level_to_run: The level to run.
     """
-    debug_code_agent = debug_code_agents[level_to_run - 1]
 
-    copy_file_into_workspace(debug_code_agent, DIRECTORY_PATH, CODE_FILE_PATH)
-    copy_file_into_workspace(debug_code_agent, DIRECTORY_PATH, TEST_FILE_PATH)
+    copy_file_into_workspace(workspace, DIRECTORY_PATH, CODE_FILE_PATH)
+    copy_file_into_workspace(workspace, DIRECTORY_PATH, TEST_FILE_PATH)
 
-    run_interaction_loop(
-        monkeypatch, debug_code_agent, CYCLE_COUNT, challenge_name, level_to_run
+    run_challenge(
+        challenge_name,
+        level_to_run,
+        monkeypatch,
+        USER_INPUTS[level_to_run - 1],
+        CYCLE_COUNT,
     )
 
     output = execute_python_file(
-        get_workspace_path(debug_code_agent, TEST_FILE_PATH), debug_code_agent
+        get_workspace_path(workspace, TEST_FILE_PATH),
+        dummy_agent,
     )
 
     assert "error" not in output.lower(), f"Errors found in output: {output}!"
diff --git a/tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py b/tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py
index 56d5d4ec3..e117dba9a 100644
--- a/tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py
+++ b/tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py
@@ -1,24 +1,30 @@
 import pytest
 from pytest_mock import MockerFixture
 
-from autogpt.commands.file_operations import read_file
+from autogpt.workspace import Workspace
+from tests.challenges.basic_abilities.test_browse_website import USER_INPUTS
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
-from tests.challenges.utils import get_workspace_path, run_interaction_loop
+from tests.challenges.utils import get_workspace_path, run_challenge
 
 CYCLE_COUNT = 3
 EXPECTED_REVENUES = [["81"], ["81"], ["81", "53", "24", "21", "11", "7", "4", "3", "2"]]
-from autogpt.agent import Agent
 
 OUTPUT_LOCATION = "output.txt"
+USER_INPUTS = [
+    "Write to a file called output.txt containing tesla's revenue in 2022 after searching for 'tesla revenue 2022'.",
+    "Write to a file called output.txt containing tesla's revenue in 2022.",
+    "Write to a file called output.txt containing tesla's revenue every year since its creation.",
+]
 
 
 @challenge()
 def test_information_retrieval_challenge_a(
-    information_retrieval_agents: Agent,
     monkeypatch: pytest.MonkeyPatch,
     patched_api_requestor: MockerFixture,
     level_to_run: int,
     challenge_name: str,
+    workspace: Workspace,
+    patched_make_workspace: pytest.fixture,
 ) -> None:
     """
     Test the challenge_a function in a given agent by mocking user inputs and checking the output file content.
@@ -26,17 +32,17 @@ def test_information_retrieval_challenge_a(
     :param get_company_revenue_agent: The agent to test.
     :param monkeypatch: pytest's monkeypatch utility for modifying builtins.
     """
-    information_retrieval_agent = information_retrieval_agents[level_to_run - 1]
-    run_interaction_loop(
-        monkeypatch,
-        information_retrieval_agent,
-        CYCLE_COUNT,
+    run_challenge(
         challenge_name,
         level_to_run,
+        monkeypatch,
+        USER_INPUTS[level_to_run - 1],
+        CYCLE_COUNT,
     )
 
-    file_path = get_workspace_path(information_retrieval_agent, OUTPUT_LOCATION)
-    content = read_file(file_path, information_retrieval_agent)
+    file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
+    with open(file_path, "r") as file:
+        content = file.read()
     expected_revenues = EXPECTED_REVENUES[level_to_run - 1]
     for revenue in expected_revenues:
         assert (
diff --git a/tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py b/tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py
index 580b8b0b3..010afd870 100644
--- a/tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py
+++ b/tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py
@@ -1,24 +1,25 @@
-import contextlib
-
 import pytest
 from pytest_mock import MockerFixture
 
-from autogpt.agent import Agent
-from autogpt.commands.file_operations import read_file
+from autogpt.workspace import Workspace
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
-from tests.challenges.utils import get_workspace_path, run_interaction_loop
+from tests.challenges.utils import get_workspace_path, run_challenge
 
 CYCLE_COUNT = 3
 OUTPUT_LOCATION = "2010_nobel_prize_winners.txt"
+USER_INPUTS = [
+    "Write to file the winner's name(s), affiliated university, and discovery of the 2010 nobel prize in physics. Write your final answer to 2010_nobel_prize_winners.txt."
+]
 
 
 @challenge()
 def test_information_retrieval_challenge_b(
-    get_nobel_prize_agent: Agent,
     monkeypatch: pytest.MonkeyPatch,
     patched_api_requestor: MockerFixture,
     level_to_run: int,
     challenge_name: str,
+    workspace: Workspace,
+    patched_make_workspace: pytest.fixture,
 ) -> None:
     """
     Test the challenge_b function in a given agent by mocking user inputs and checking the output file content.
@@ -29,17 +30,18 @@ def test_information_retrieval_challenge_b(
     :param level_to_run: The level to run.
     """
 
-    with contextlib.suppress(SystemExit):
-        run_interaction_loop(
-            monkeypatch,
-            get_nobel_prize_agent,
-            CYCLE_COUNT,
-            challenge_name,
-            level_to_run,
-        )
-    file_path = get_workspace_path(get_nobel_prize_agent, OUTPUT_LOCATION)
-
-    content = read_file(file_path, get_nobel_prize_agent)
+    run_challenge(
+        challenge_name,
+        level_to_run,
+        monkeypatch,
+        USER_INPUTS[level_to_run - 1],
+        CYCLE_COUNT,
+    )
+
+    file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
+
+    with open(file_path, "r") as file:
+        content = file.read()
     assert "Andre Geim" in content, "Expected the file to contain Andre Geim"
     assert (
         "Konstantin Novoselov" in content
diff --git a/tests/challenges/kubernetes/test_kubernetes_template_challenge_a.py b/tests/challenges/kubernetes/test_kubernetes_template_challenge_a.py
index 98f239cb7..cd923e67c 100644
--- a/tests/challenges/kubernetes/test_kubernetes_template_challenge_a.py
+++ b/tests/challenges/kubernetes/test_kubernetes_template_challenge_a.py
@@ -1,23 +1,26 @@
+from typing import Any, Dict
+
 import pytest
 import yaml
 from pytest_mock import MockerFixture
 
-from autogpt.agent import Agent
-from autogpt.commands.file_operations import read_file
+from autogpt.workspace import Workspace
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
-from tests.challenges.utils import get_workspace_path, run_interaction_loop
+from tests.challenges.utils import get_workspace_path, run_challenge
 
 CYCLE_COUNT = 3
 OUTPUT_LOCATION = "kube.yaml"
+USER_INPUTS = ["Write a simple kubernetes deployment file and save it as a kube.yaml."]
 
 
 @challenge()
 def test_kubernetes_template_challenge_a(
-    kubernetes_agent: Agent,
     monkeypatch: pytest.MonkeyPatch,
     patched_api_requestor: MockerFixture,
     level_to_run: int,
     challenge_name: str,
+    workspace: Workspace,
+    patched_make_workspace: pytest.fixture,
 ) -> None:
     """
     Test the challenge_a function in a given agent by mocking user inputs
@@ -28,16 +31,23 @@ def test_kubernetes_template_challenge_a(
         monkeypatch (pytest.MonkeyPatch)
         level_to_run (int)
     """
-    run_interaction_loop(
-        monkeypatch, kubernetes_agent, CYCLE_COUNT, challenge_name, level_to_run
+    run_challenge(
+        challenge_name,
+        level_to_run,
+        monkeypatch,
+        USER_INPUTS[level_to_run - 1],
+        CYCLE_COUNT,
     )
 
-    file_path = get_workspace_path(kubernetes_agent, OUTPUT_LOCATION)
-    content = read_file(file_path, kubernetes_agent)
+    file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
+    with open(file_path, "r") as file:
+        content_string = file.read()
 
     for word in ["apiVersion", "kind", "metadata", "spec"]:
-        assert word in content, f"Expected the file to contain {word}"
+        assert word in content_string, f"Expected the file to contain {word}"
 
-    content = yaml.safe_load(content)
+    yaml_as_dict: Dict[str, Any] = yaml.safe_load(content_string)
     for word in ["Service", "Deployment", "Pod"]:
-        assert word in content["kind"], f"Expected the file to contain {word}"
+        assert word in yaml_as_dict.get(
+            "kind", ""
+        ), f"Expected the file to contain {word}"
diff --git a/tests/challenges/memory/test_memory_challenge_a.py b/tests/challenges/memory/test_memory_challenge_a.py
index 41453b250..1130079ba 100644
--- a/tests/challenges/memory/test_memory_challenge_a.py
+++ b/tests/challenges/memory/test_memory_challenge_a.py
@@ -1,49 +1,48 @@
 import pytest
 from pytest_mock import MockerFixture
 
-from autogpt.agent import Agent
-from autogpt.commands.file_operations import read_file, write_to_file
+from autogpt.workspace import Workspace
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
-from tests.challenges.utils import get_workspace_path, run_interaction_loop
+from tests.challenges.utils import get_workspace_path, run_challenge
 
 OUTPUT_LOCATION = "output.txt"
 
+USER_INPUT = "Use the command read_file to read the instructions_1.txt file\nFollow the instructions in the instructions_1.txt file"
+
 
 @challenge()
 def test_memory_challenge_a(
-    memory_management_agent: Agent,
     patched_api_requestor: MockerFixture,
     monkeypatch: pytest.MonkeyPatch,
     level_to_run: int,
     challenge_name: str,
+    workspace: Workspace,
+    patched_make_workspace: pytest.fixture,
 ) -> None:
     """
     The agent reads a file containing a task_id. Then, it reads a series of other files.
     After reading 'n' files, the agent must write the task_id into a new file.
     Args:
-        memory_management_agent (Agent)
+        workspace (Workspace)
         patched_api_requestor (MockerFixture)
         monkeypatch (pytest.MonkeyPatch)
         level_to_run (int)
     """
     task_id = "2314"
-    create_instructions_files(memory_management_agent, level_to_run, task_id)
+    create_instructions_files(workspace, level_to_run, task_id)
 
-    run_interaction_loop(
-        monkeypatch,
-        memory_management_agent,
-        level_to_run + 2,
-        challenge_name,
-        level_to_run,
+    run_challenge(
+        challenge_name, level_to_run, monkeypatch, USER_INPUT, level_to_run + 2
     )
 
-    file_path = get_workspace_path(memory_management_agent, OUTPUT_LOCATION)
-    content = read_file(file_path, memory_management_agent)
+    file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
+    with open(file_path, "r") as file:
+        content = file.read()
     assert task_id in content, f"Expected the file to contain {task_id}"
 
 
 def create_instructions_files(
-    memory_management_agent: Agent,
+    workspace: Workspace,
     num_files: int,
     task_id: str,
     base_filename: str = "instructions_",
@@ -51,7 +50,7 @@ def create_instructions_files(
     """
     Creates a series of instructions files for the memory challenge.
     Args:
-        memory_management_agent (Agent)
+        workspace (Workspace)
         num_files (int)
         task_id (str)
         base_filename (str, optional)
@@ -59,8 +58,9 @@ def create_instructions_files(
     for i in range(1, num_files + 1):
         content = generate_content(i, task_id, base_filename, num_files)
         file_name = f"{base_filename}{i}.txt"
-        file_path = get_workspace_path(memory_management_agent, file_name)
-        write_to_file(file_path, content, memory_management_agent)
+        file_path = get_workspace_path(workspace, file_name)
+        with open(file_path, "w") as file:
+            file.write(content)
 
 
 def generate_content(
diff --git a/tests/challenges/memory/test_memory_challenge_b.py b/tests/challenges/memory/test_memory_challenge_b.py
index b381df1b4..30d9b161a 100644
--- a/tests/challenges/memory/test_memory_challenge_b.py
+++ b/tests/challenges/memory/test_memory_challenge_b.py
@@ -1,56 +1,50 @@
 import pytest
 from pytest_mock import MockerFixture
 
-from autogpt.agent import Agent
-from autogpt.commands.file_operations import read_file, write_to_file
+from autogpt.workspace import Workspace
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
-from tests.challenges.utils import (
-    generate_noise,
-    get_workspace_path,
-    run_interaction_loop,
-)
+from tests.challenges.utils import generate_noise, get_workspace_path, run_challenge
 
 NOISE = 1000
 OUTPUT_LOCATION = "output.txt"
+USER_INPUT = "Use the command read_file to read the instructions_1.txt file\nFollow the instructions in the instructions_1.txt file"
 
 
 @challenge()
 def test_memory_challenge_b(
-    memory_management_agent: Agent,
     patched_api_requestor: MockerFixture,
     monkeypatch: pytest.MonkeyPatch,
     level_to_run: int,
     challenge_name: str,
+    workspace: Workspace,
+    patched_make_workspace: pytest.fixture,
 ) -> None:
     """
     The agent reads a series of files, each containing a task_id and noise. After reading 'n' files,
     the agent must write all the task_ids into a new file, filtering out the noise.
 
     Args:
-        memory_management_agent (Agent)
+        workspace (Workspace)
         patched_api_requestor (MockerFixture)
         monkeypatch (pytest.MonkeyPatch)
         level_to_run (int)
     """
     task_ids = [str(i * 1111) for i in range(1, level_to_run + 1)]
-    create_instructions_files(memory_management_agent, level_to_run, task_ids)
+    create_instructions_files(workspace, level_to_run, task_ids)
 
-    run_interaction_loop(
-        monkeypatch,
-        memory_management_agent,
-        level_to_run + 2,
-        challenge_name,
-        level_to_run,
+    run_challenge(
+        challenge_name, level_to_run, monkeypatch, USER_INPUT, level_to_run + 2
     )
 
-    file_path = get_workspace_path(memory_management_agent, OUTPUT_LOCATION)
-    content = read_file(file_path, memory_management_agent)
+    file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
+    with open(file_path, "r") as file:
+        content = file.read()
     for task_id in task_ids:
         assert task_id in content, f"Expected the file to contain {task_id}"
 
 
 def create_instructions_files(
-    memory_management_agent: Agent,
+    workspace: Workspace,
     level: int,
     task_ids: list,
     base_filename: str = "instructions_",
@@ -59,7 +53,7 @@ def create_instructions_files(
     Creates a series of instructions files for the memory challenge.
     Args:
         level:
-        memory_management_agent (Agent)
+        workspace (Workspace)
         num_files (int)
         task_ids (list)
         base_filename (str, optional)
@@ -67,9 +61,10 @@ def create_instructions_files(
     for i in range(1, level + 1):
         content = generate_content(i, task_ids, base_filename, level)
         file_name = f"{base_filename}{i}.txt"
-        file_path = get_workspace_path(memory_management_agent, file_name)
+        file_path = get_workspace_path(workspace, file_name)
 
-        write_to_file(file_path, content, memory_management_agent)
+        with open(file_path, "w") as file:
+            file.write(content)
 
 
 def generate_content(index: int, task_ids: list, base_filename: str, level: int) -> str:
diff --git a/tests/challenges/memory/test_memory_challenge_c.py b/tests/challenges/memory/test_memory_challenge_c.py
index 3cfeb2c01..db58cd4ba 100644
--- a/tests/challenges/memory/test_memory_challenge_c.py
+++ b/tests/challenges/memory/test_memory_challenge_c.py
@@ -1,26 +1,24 @@
 import pytest
 from pytest_mock import MockerFixture
 
-from autogpt.agent import Agent
-from autogpt.commands.file_operations import read_file, write_to_file
+from autogpt.commands.file_operations import read_file
+from autogpt.workspace import Workspace
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
-from tests.challenges.utils import (
-    generate_noise,
-    get_workspace_path,
-    run_interaction_loop,
-)
+from tests.challenges.utils import generate_noise, get_workspace_path, run_challenge
 
 NOISE = 1200
 OUTPUT_LOCATION = "output.txt"
+USER_INPUT = "Use the command read_file to read the instructions_1.txt file\nFollow the instructions in the instructions_1.txt file"
 
 
 @challenge()
 def test_memory_challenge_c(
-    memory_management_agent: Agent,
     patched_api_requestor: MockerFixture,
     monkeypatch: pytest.MonkeyPatch,
     level_to_run: int,
     challenge_name: str,
+    workspace: Workspace,
+    patched_make_workspace: pytest.fixture,
 ) -> None:
     """
     Instead of reading task Ids from files as with the previous challenges, the agent now must remember
@@ -28,7 +26,7 @@ def test_memory_challenge_c(
     after seeing several of them.
 
     Args:
-        memory_management_agent (Agent)
+        workspace (Workspace)
         patched_api_requestor (MockerFixture)
         monkeypatch (pytest.MonkeyPatch)
         level_to_run (int)
@@ -48,26 +46,23 @@ def test_memory_challenge_c(
 
     level_silly_phrases = silly_phrases[:level_to_run]
     create_instructions_files(
-        memory_management_agent,
+        workspace,
         level_to_run,
         level_silly_phrases,
     )
 
-    run_interaction_loop(
-        monkeypatch,
-        memory_management_agent,
-        level_to_run + 2,
-        challenge_name,
-        level_to_run,
+    run_challenge(
+        challenge_name, level_to_run, monkeypatch, USER_INPUT, level_to_run + 2
     )
-    file_path = get_workspace_path(memory_management_agent, OUTPUT_LOCATION)
-    content = read_file(file_path, agent=memory_management_agent)
+
+    file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
+    content = read_file(file_path, agent=workspace)
     for phrase in level_silly_phrases:
         assert phrase in content, f"Expected the file to contain {phrase}"
 
 
 def create_instructions_files(
-    memory_management_agent: Agent,
+    workspace: Workspace,
     level: int,
     task_ids: list,
     base_filename: str = "instructions_",
@@ -76,7 +71,7 @@ def create_instructions_files(
     Creates a series of instructions files for the memory challenge.
     Args:
         level:
-        memory_management_agent (Agent)
+        workspace (Workspace)
         num_files (int)
         task_ids (list)
         base_filename (str, optional)
@@ -84,8 +79,9 @@ def create_instructions_files(
     for i in range(1, level + 1):
         content = generate_content(i, task_ids, base_filename, level)
         file_name = f"{base_filename}{i}.txt"
-        file_path = get_workspace_path(memory_management_agent, file_name)
-        write_to_file(file_path, content, memory_management_agent)
+        file_path = get_workspace_path(workspace, file_name)
+        with open(file_path, "w") as file:
+            file.write(content)
 
 
 def generate_content(
diff --git a/tests/challenges/memory/test_memory_challenge_d.py b/tests/challenges/memory/test_memory_challenge_d.py
index ae44831b3..9cd9c8b09 100644
--- a/tests/challenges/memory/test_memory_challenge_d.py
+++ b/tests/challenges/memory/test_memory_challenge_d.py
@@ -4,28 +4,30 @@ from typing import Dict
 import pytest
 from pytest_mock import MockerFixture
 
-from autogpt.agent import Agent
-from autogpt.commands.file_operations import read_file, write_to_file
+from autogpt.commands.file_operations import read_file
+from autogpt.workspace import Workspace
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
-from tests.challenges.utils import get_workspace_path, run_interaction_loop
+from tests.challenges.utils import get_workspace_path, run_challenge
 
 LEVEL_CURRENTLY_BEATEN = 1
 MAX_LEVEL = 5
 OUTPUT_LOCATION = "output.txt"
+USER_INPUT = "Use the command read_file to read the instructions_1.txt file\nFollow the instructions in the instructions_1.txt file"
 
 
 @challenge()
 def test_memory_challenge_d(
-    memory_management_agent: Agent,
     patched_api_requestor: MockerFixture,
     monkeypatch: pytest.MonkeyPatch,
     level_to_run: int,
     challenge_name: str,
+    workspace: Workspace,
+    patched_make_workspace: pytest.fixture,
 ) -> None:
     """
     The agent is given a series of events and must remember the respective beliefs of the characters.
     Args:
-        memory_management_agent (Agent)
+        workspace (Workspace)
         user_selected_level (int)
     """
     sally_anne_test_phrases = [
@@ -36,19 +38,14 @@ def test_memory_challenge_d(
         "Sally gives a new marble (marble E) to Charlie who is outside with her. Charlie enters the room and places marble E in the red box. Anne, who is already in the room, takes marble E from the red box, and hides it under the sofa. Then Anne leaves the room and tells Sally that marble E is in the green box. Meanwhile, after Anne leaves the room, Charlie who re-enters the room takes marble D from under the sofa and places it in his own basket (basket C).",
     ]
     level_sally_anne_test_phrases = sally_anne_test_phrases[:level_to_run]
-    create_instructions_files(
-        memory_management_agent, level_to_run, level_sally_anne_test_phrases
+    create_instructions_files(workspace, level_to_run, level_sally_anne_test_phrases)
+    run_challenge(
+        challenge_name, level_to_run, monkeypatch, USER_INPUT, level_to_run + 2
     )
-    run_interaction_loop(
-        monkeypatch,
-        memory_management_agent,
-        level_to_run + 2,
-        challenge_name,
-        level_to_run,
-    )
-    file_path = get_workspace_path(memory_management_agent, OUTPUT_LOCATION)
 
-    content = read_file(file_path, memory_management_agent)
+    file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
+
+    content = read_file(file_path, workspace)
     check_beliefs(content, level_to_run)
 
 
@@ -176,7 +173,7 @@ def extract_beliefs(content: str) -> Dict[str, Dict[str, str]]:
 
 
 def create_instructions_files(
-    memory_management_agent: Agent,
+    workspace: Workspace,
     level: int,
     test_phrases: list,
     base_filename: str = "instructions_",
@@ -185,15 +182,16 @@ def create_instructions_files(
     Creates a series of instructions files for the memory challenge.
     Args:
         level:
-        memory_management_agent (Agent)
+        workspace (Workspace)
         test_phrases (list)
         base_filename (str, optional)
     """
     for i in range(1, level + 1):
         content = generate_content(i, test_phrases, base_filename, level)
         file_name = f"{base_filename}{i}.txt"
-        file_path = get_workspace_path(memory_management_agent, file_name)
-        write_to_file(file_path, content, memory_management_agent)
+        file_path = get_workspace_path(workspace, file_name)
+        with open(file_path, "w") as file:
+            file.write(content)
 
 
 def generate_content(
diff --git a/tests/challenges/schema.py b/tests/challenges/schema.py
new file mode 100644
index 000000000..9c86772af
--- /dev/null
+++ b/tests/challenges/schema.py
@@ -0,0 +1,7 @@
+from pydantic import BaseModel
+
+
+class Task(BaseModel):
+    """Jsonifiable representation of a task"""
+
+    user_input: str
diff --git a/tests/challenges/utils.py b/tests/challenges/utils.py
index 0b5f54c57..130c5bd77 100644
--- a/tests/challenges/utils.py
+++ b/tests/challenges/utils.py
@@ -6,8 +6,10 @@ from typing import Any, Generator
 
 import pytest
 
-from autogpt.agent import Agent
 from autogpt.log_cycle.log_cycle import LogCycleHandler
+from autogpt.workspace import Workspace
+from benchmarks import run_task
+from tests.challenges.schema import Task
 
 
 def generate_noise(noise_size: int) -> str:
@@ -39,20 +41,6 @@ def setup_mock_input(monkeypatch: pytest.MonkeyPatch, cycle_count: int) -> None:
     monkeypatch.setattr("autogpt.utils.session.prompt", lambda _: next(gen))
 
 
-def run_interaction_loop(
-    monkeypatch: pytest.MonkeyPatch,
-    agent: Agent,
-    cycle_count: int,
-    challenge_name: str,
-    level_to_run: int,
-) -> None:
-    setup_mock_input(monkeypatch, cycle_count)
-
-    setup_mock_log_cycle_agent_name(monkeypatch, challenge_name, level_to_run)
-    with contextlib.suppress(SystemExit):
-        agent.start_interaction_loop()
-
-
 def setup_mock_log_cycle_agent_name(
     monkeypatch: pytest.MonkeyPatch, challenge_name: str, level_to_run: int
 ) -> None:
@@ -64,13 +52,27 @@ def setup_mock_log_cycle_agent_name(
     )
 
 
-def get_workspace_path(agent: Agent, file_name: str) -> str:
-    return str(agent.workspace.get_path(file_name))
+def get_workspace_path(workspace: Workspace, file_name: str) -> str:
+    return str(workspace.get_path(file_name))
 
 
 def copy_file_into_workspace(
-    agent: Agent, directory_path: Path, file_path: str
+    workspace: Workspace, directory_path: Path, file_path: str
 ) -> None:
-    workspace_code_file_path = get_workspace_path(agent, file_path)
+    workspace_code_file_path = get_workspace_path(workspace, file_path)
     code_file_path = directory_path / file_path
     shutil.copy(code_file_path, workspace_code_file_path)
+
+
+def run_challenge(
+    challenge_name: str,
+    level_to_run: int,
+    monkeypatch: pytest.MonkeyPatch,
+    user_input: str,
+    cycle_count: int,
+) -> None:
+    setup_mock_input(monkeypatch, cycle_count)
+    setup_mock_log_cycle_agent_name(monkeypatch, challenge_name, level_to_run)
+    task = Task(user_input=user_input)
+    with contextlib.suppress(SystemExit):
+        run_task(task)
diff --git a/tests/conftest.py b/tests/conftest.py
index 2342a3b04..5d2c000a7 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -7,12 +7,12 @@ import yaml
 from pytest_mock import MockerFixture
 
 from autogpt.agent.agent import Agent
-from autogpt.commands.command import CommandRegistry
 from autogpt.config.ai_config import AIConfig
 from autogpt.config.config import Config
 from autogpt.llm.api_manager import ApiManager
 from autogpt.logs import TypingConsoleHandler
 from autogpt.memory.vector import get_memory
+from autogpt.models.command_registry import CommandRegistry
 from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT
 from autogpt.workspace import Workspace
 
@@ -49,10 +49,17 @@ def temp_plugins_config_file():
 def config(
     temp_plugins_config_file: str, mocker: MockerFixture, workspace: Workspace
 ) -> Config:
-    config = Config()
+    config = Config.build_config_from_env()
+    if not os.environ.get("OPENAI_API_KEY"):
+        os.environ["OPENAI_API_KEY"] = "sk-dummy"
+
     config.plugins_dir = "tests/unit/data/test_plugins"
     config.plugins_config_file = temp_plugins_config_file
-    config.load_plugins_config()
+
+    # avoid circular dependency
+    from autogpt.plugins.plugins_config import PluginsConfig
+
+    config.plugins_config = PluginsConfig.load_config(global_config=config)
 
     # Do a little setup and teardown since the config object is a singleton
     mocker.patch.multiple(
@@ -92,11 +99,11 @@ def agent(config: Config, workspace: Workspace) -> Agent:
 
     command_registry = CommandRegistry()
     ai_config.command_registry = command_registry
+    config.memory_backend = "json_file"
+    memory_json_file = get_memory(config)
+    memory_json_file.clear()
 
-    config.set_memory_backend("json_file")
-    memory_json_file = get_memory(config, init=True)
-
-    system_prompt = ai_config.construct_full_prompt()
+    system_prompt = ai_config.construct_full_prompt(config)
 
     return Agent(
         ai_name=ai_config.ai_name,
diff --git a/tests/integration/agent_factory.py b/tests/integration/agent_factory.py
index fff3867ed..664c6cbb4 100644
--- a/tests/integration/agent_factory.py
+++ b/tests/integration/agent_factory.py
@@ -1,301 +1,47 @@
 import pytest
 
 from autogpt.agent import Agent
-from autogpt.commands.command import CommandRegistry
 from autogpt.config import AIConfig, Config
-from autogpt.main import COMMAND_CATEGORIES
-from autogpt.memory.vector import NoMemory, get_memory
-from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT
+from autogpt.memory.vector import get_memory
+from autogpt.models.command_registry import CommandRegistry
 from autogpt.workspace import Workspace
 
 
 @pytest.fixture
-def agent_test_config(config: Config):
-    was_continuous_mode = config.continuous_mode
-    was_temperature = config.temperature
-    was_plain_output = config.plain_output
-    config.set_continuous_mode(False)
-    config.set_temperature(0)
-    config.plain_output = True
-    yield config
-    config.set_continuous_mode(was_continuous_mode)
-    config.set_temperature(was_temperature)
-    config.plain_output = was_plain_output
+def memory_json_file(config: Config):
+    was_memory_backend = config.memory_backend
 
+    config.memory_backend = "json_file"
+    memory = get_memory(config)
+    memory.clear()
+    yield memory
 
-@pytest.fixture
-def memory_json_file(agent_test_config: Config):
-    was_memory_backend = agent_test_config.memory_backend
-
-    agent_test_config.set_memory_backend("json_file")
-    yield get_memory(agent_test_config, init=True)
-
-    agent_test_config.set_memory_backend(was_memory_backend)
-
-
-@pytest.fixture
-def browser_agent(agent_test_config, memory_none: NoMemory, workspace: Workspace):
-    command_registry = CommandRegistry()
-    command_registry.import_commands("autogpt.commands.file_operations")
-    command_registry.import_commands("autogpt.commands.web_selenium")
-    command_registry.import_commands("autogpt.app")
-    command_registry.import_commands("autogpt.commands.task_statuses")
-
-    ai_config = AIConfig(
-        ai_name="browse_website-GPT",
-        ai_role="an AI designed to use the browse_website command to visit http://books.toscrape.com/catalogue/meditations_33/index.html, answer the question 'What is the price of the book?' and write the price to a file named \"browse_website.txt\", and use the task_complete command to complete the task.",
-        ai_goals=[
-            "Use the browse_website command to visit http://books.toscrape.com/catalogue/meditations_33/index.html and answer the question 'What is the price of the book?'",
-            'Write the price of the book to a file named "browse_website.txt".',
-            "Use the task_complete command to complete the task.",
-            "Do not use any other commands.",
-        ],
-    )
-    ai_config.command_registry = command_registry
-
-    system_prompt = ai_config.construct_full_prompt()
-
-    agent = Agent(
-        ai_name="",
-        memory=memory_none,
-        command_registry=command_registry,
-        ai_config=ai_config,
-        config=agent_test_config,
-        next_action_count=0,
-        system_prompt=system_prompt,
-        triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
-        workspace_directory=workspace.root,
-    )
-
-    return agent
-
-
-@pytest.fixture
-def file_system_agents(
-    agent_test_config, memory_json_file: NoMemory, workspace: Workspace
-):
-    agents = []
-    command_registry = get_command_registry(agent_test_config)
-
-    ai_goals = [
-        "Write 'Hello World' into a file named \"hello_world.txt\".",
-        'Write \'Hello World\' into 2 files named "hello_world_1.txt"and "hello_world_2.txt".',
-    ]
-
-    for ai_goal in ai_goals:
-        ai_config = AIConfig(
-            ai_name="File System Agent",
-            ai_role="an AI designed to manage a file system.",
-            ai_goals=[ai_goal],
-        )
-        ai_config.command_registry = command_registry
-        system_prompt = ai_config.construct_full_prompt()
-        Config().set_continuous_mode(False)
-        agents.append(
-            Agent(
-                ai_name="File System Agent",
-                memory=memory_json_file,
-                command_registry=command_registry,
-                ai_config=ai_config,
-                config=agent_test_config,
-                next_action_count=0,
-                system_prompt=system_prompt,
-                triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
-                workspace_directory=workspace.root,
-            )
-        )
-    return agents
-
-
-@pytest.fixture
-def memory_management_agent(agent_test_config, memory_json_file, workspace: Workspace):
-    command_registry = get_command_registry(agent_test_config)
-
-    ai_config = AIConfig(
-        ai_name="Follow-Instructions-GPT",
-        ai_role="an AI designed to read the instructions_1.txt file using the read_file method and follow the instructions in the file.",
-        ai_goals=[
-            "Use the command read_file to read the instructions_1.txt file",
-            "Follow the instructions in the instructions_1.txt file",
-        ],
-    )
-    ai_config.command_registry = command_registry
-
-    system_prompt = ai_config.construct_full_prompt()
-
-    agent = Agent(
-        ai_name="Follow-Instructions-GPT",
-        memory=memory_json_file,
-        command_registry=command_registry,
-        ai_config=ai_config,
-        config=agent_test_config,
-        next_action_count=0,
-        system_prompt=system_prompt,
-        triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
-        workspace_directory=workspace.root,
-    )
-
-    return agent
-
-
-@pytest.fixture
-def information_retrieval_agents(
-    agent_test_config, memory_json_file, workspace: Workspace
-):
-    agents = []
-    command_registry = get_command_registry(agent_test_config)
-
-    ai_goals = [
-        "Write to a file called output.txt containing tesla's revenue in 2022 after searching for 'tesla revenue 2022'.",
-        "Write to a file called output.txt containing tesla's revenue in 2022.",
-        "Write to a file called output.txt containing tesla's revenue every year since its creation.",
-    ]
-    for ai_goal in ai_goals:
-        ai_config = AIConfig(
-            ai_name="Information Retrieval Agent",
-            ai_role="an autonomous agent that specializes in retrieving information.",
-            ai_goals=[ai_goal],
-        )
-        ai_config.command_registry = command_registry
-        system_prompt = ai_config.construct_full_prompt()
-        Config().set_continuous_mode(False)
-        agents.append(
-            Agent(
-                ai_name="Information Retrieval Agent",
-                memory=memory_json_file,
-                command_registry=command_registry,
-                ai_config=ai_config,
-                config=agent_test_config,
-                next_action_count=0,
-                system_prompt=system_prompt,
-                triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
-                workspace_directory=workspace.root,
-            )
-        )
-    return agents
+    config.memory_backend = was_memory_backend
 
 
 @pytest.fixture
-def kubernetes_agent(
-    agent_test_config: Config, memory_json_file: NoMemory, workspace: Workspace
-) -> Agent:
+def dummy_agent(config: Config, memory_json_file, workspace: Workspace):
     command_registry = CommandRegistry()
-    command_registry.import_commands("autogpt.commands.file_operations")
-    command_registry.import_commands("autogpt.app")
 
     ai_config = AIConfig(
-        ai_name="Kubernetes",
-        ai_role="an autonomous agent that specializes in creating Kubernetes deployment templates.",
+        ai_name="Dummy Agent",
+        ai_role="Dummy Role",
         ai_goals=[
-            "Write a simple kubernetes deployment file and save it as a kube.yaml.",
-            # You should make a simple nginx web server that uses docker and exposes the port 80.
+            "Dummy Task",
         ],
     )
     ai_config.command_registry = command_registry
 
-    system_prompt = ai_config.construct_full_prompt()
-    Config().set_continuous_mode(False)
     agent = Agent(
-        ai_name="Kubernetes-Demo",
+        ai_name="Dummy Agent",
         memory=memory_json_file,
         command_registry=command_registry,
         ai_config=ai_config,
-        config=agent_test_config,
+        config=config,
         next_action_count=0,
-        system_prompt=system_prompt,
-        triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
+        system_prompt="dummy_prompt",
+        triggering_prompt="dummy triggering prompt",
         workspace_directory=workspace.root,
     )
 
     return agent
-
-
-@pytest.fixture
-def get_nobel_prize_agent(agent_test_config, memory_json_file, workspace: Workspace):
-    command_registry = CommandRegistry()
-    command_registry.import_commands("autogpt.commands.file_operations")
-    command_registry.import_commands("autogpt.app")
-    command_registry.import_commands("autogpt.commands.web_selenium")
-
-    ai_config = AIConfig(
-        ai_name="Get-PhysicsNobelPrize",
-        ai_role="An autonomous agent that specializes in physics history.",
-        ai_goals=[
-            "Write to file the winner's name(s), affiliated university, and discovery of the 2010 nobel prize in physics. Write your final answer to 2010_nobel_prize_winners.txt.",
-        ],
-    )
-    ai_config.command_registry = command_registry
-
-    system_prompt = ai_config.construct_full_prompt()
-    Config().set_continuous_mode(False)
-
-    agent = Agent(
-        ai_name="Get-PhysicsNobelPrize",
-        memory=memory_json_file,
-        command_registry=command_registry,
-        ai_config=ai_config,
-        config=agent_test_config,
-        next_action_count=0,
-        system_prompt=system_prompt,
-        triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
-        workspace_directory=workspace.root,
-    )
-
-    return agent
-
-
-@pytest.fixture
-def debug_code_agents(agent_test_config, memory_json_file, workspace: Workspace):
-    agents = []
-    goals = [
-        [
-            "1- Run test.py using the execute_python_file command.",
-            "2- Read code.py using the read_file command.",
-            "3- Modify code.py using the write_to_file command."
-            "Repeat step 1, 2 and 3 until test.py runs without errors.",
-        ],
-        [
-            "1- Run test.py.",
-            "2- Read code.py.",
-            "3- Modify code.py."
-            "Repeat step 1, 2 and 3 until test.py runs without errors.",
-        ],
-        ["1- Make test.py run without errors."],
-    ]
-
-    for goal in goals:
-        ai_config = AIConfig(
-            ai_name="Debug Code Agent",
-            ai_role="an autonomous agent that specializes in debugging python code",
-            ai_goals=goal,
-        )
-        command_registry = get_command_registry(agent_test_config)
-        ai_config.command_registry = command_registry
-        system_prompt = ai_config.construct_full_prompt()
-        Config().set_continuous_mode(False)
-        agents.append(
-            Agent(
-                ai_name="Debug Code Agent",
-                memory=memory_json_file,
-                command_registry=command_registry,
-                ai_config=ai_config,
-                config=agent_test_config,
-                next_action_count=0,
-                system_prompt=system_prompt,
-                triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
-                workspace_directory=workspace.root,
-            )
-        )
-    return agents
-
-
-def get_command_registry(agent_test_config):
-    command_registry = CommandRegistry()
-    enabled_command_categories = [
-        x
-        for x in COMMAND_CATEGORIES
-        if x not in agent_test_config.disabled_command_categories
-    ]
-    for command_category in enabled_command_categories:
-        command_registry.import_commands(command_category)
-    return command_registry
diff --git a/tests/integration/memory/test_json_file_memory.py b/tests/integration/memory/test_json_file_memory.py
index 9134a0696..e60a07667 100644
--- a/tests/integration/memory/test_json_file_memory.py
+++ b/tests/integration/memory/test_json_file_memory.py
@@ -6,7 +6,6 @@ import pytest
 from autogpt.config import Config
 from autogpt.memory.vector import JSONFileMemory, MemoryItem
 from autogpt.workspace import Workspace
-from tests.utils import requires_api_key
 
 
 @pytest.fixture(autouse=True)
@@ -34,7 +33,9 @@ def test_json_memory_init_with_backing_empty_file(config: Config, workspace: Wor
     assert index_file.read_text() == "[]"
 
 
-def test_json_memory_init_with_backing_file(config: Config, workspace: Workspace):
+def test_json_memory_init_with_backing_invalid_file(
+    config: Config, workspace: Workspace
+):
     index_file = workspace.root / f"{config.memory_index}.json"
     index_file.touch()
 
@@ -69,33 +70,58 @@ def test_json_memory_clear(config: Config, memory_item: MemoryItem):
 def test_json_memory_get(config: Config, memory_item: MemoryItem, mock_get_embedding):
     index = JSONFileMemory(config)
     assert (
-        index.get("test") == None
+        index.get("test", config) == None
     ), "Cannot test get() because initial index is not empty"
 
     index.add(memory_item)
-    retrieved = index.get("test")
+    retrieved = index.get("test", config)
     assert retrieved is not None
     assert retrieved.memory_item == memory_item
 
 
+def test_json_memory_load_index(config: Config, memory_item: MemoryItem):
+    index = JSONFileMemory(config)
+    index.add(memory_item)
+
+    try:
+        assert index.file_path.exists(), "index was not saved to file"
+        assert len(index) == 1, f"index constains {len(index)} items instead of 1"
+        assert index.memories[0] == memory_item, "item in index != added mock item"
+    except AssertionError as e:
+        raise ValueError(f"Setting up for load_index test failed: {e}")
+
+    index.memories = []
+    index.load_index()
+
+    assert len(index) == 1
+    assert index.memories[0] == memory_item
+
+
 @pytest.mark.vcr
-@requires_api_key("OPENAI_API_KEY")
+@pytest.mark.requires_openai_api_key
 def test_json_memory_get_relevant(config: Config, patched_api_requestor: None) -> None:
     index = JSONFileMemory(config)
-    mem1 = MemoryItem.from_text_file("Sample text", "sample.txt")
-    mem2 = MemoryItem.from_text_file("Grocery list:\n- Pancake mix", "groceries.txt")
-    mem3 = MemoryItem.from_text_file("What is your favorite color?", "color.txt")
+    mem1 = MemoryItem.from_text_file("Sample text", "sample.txt", config)
+    mem2 = MemoryItem.from_text_file(
+        "Grocery list:\n- Pancake mix", "groceries.txt", config
+    )
+    mem3 = MemoryItem.from_text_file(
+        "What is your favorite color?", "color.txt", config
+    )
     lipsum = "Lorem ipsum dolor sit amet"
-    mem4 = MemoryItem.from_text_file(" ".join([lipsum] * 100), "lipsum.txt")
+    mem4 = MemoryItem.from_text_file(" ".join([lipsum] * 100), "lipsum.txt", config)
     index.add(mem1)
     index.add(mem2)
     index.add(mem3)
     index.add(mem4)
 
-    assert index.get_relevant(mem1.raw_content, 1)[0].memory_item == mem1
-    assert index.get_relevant(mem2.raw_content, 1)[0].memory_item == mem2
-    assert index.get_relevant(mem3.raw_content, 1)[0].memory_item == mem3
-    assert [mr.memory_item for mr in index.get_relevant(lipsum, 2)] == [mem4, mem1]
+    assert index.get_relevant(mem1.raw_content, 1, config)[0].memory_item == mem1
+    assert index.get_relevant(mem2.raw_content, 1, config)[0].memory_item == mem2
+    assert index.get_relevant(mem3.raw_content, 1, config)[0].memory_item == mem3
+    assert [mr.memory_item for mr in index.get_relevant(lipsum, 2, config)] == [
+        mem4,
+        mem1,
+    ]
 
 
 def test_json_memory_get_stats(config: Config, memory_item: MemoryItem) -> None:
diff --git a/tests/integration/memory/utils.py b/tests/integration/memory/utils.py
index 374eab300..1f791160e 100644
--- a/tests/integration/memory/utils.py
+++ b/tests/integration/memory/utils.py
@@ -38,7 +38,7 @@ def mock_get_embedding(mocker: MockerFixture, embedding_dimension: int):
 def memory_none(agent_test_config: Config, mock_get_embedding):
     was_memory_backend = agent_test_config.memory_backend
 
-    agent_test_config.set_memory_backend("no_memory")
+    agent_test_config.memory_backend = "no_memory"
     yield get_memory(agent_test_config)
 
-    agent_test_config.set_memory_backend(was_memory_backend)
+    agent_test_config.memory_backend = was_memory_backend
diff --git a/tests/integration/test_execute_code.py b/tests/integration/test_execute_code.py
index 16d6c4d97..6ba4a7530 100644
--- a/tests/integration/test_execute_code.py
+++ b/tests/integration/test_execute_code.py
@@ -2,7 +2,6 @@ import os
 import random
 import string
 import tempfile
-from typing import Callable
 
 import pytest
 
@@ -12,12 +11,12 @@ from autogpt.config import Config
 
 
 @pytest.fixture
-def random_code(random_string) -> Callable:
+def random_code(random_string) -> str:
     return f"print('Hello {random_string}!')"
 
 
 @pytest.fixture
-def python_test_file(config: Config, random_code: str) -> Callable:
+def python_test_file(config: Config, random_code: str) -> str:
     temp_file = tempfile.NamedTemporaryFile(dir=config.workspace_path, suffix=".py")
     temp_file.write(str.encode(random_code))
     temp_file.flush()
@@ -50,9 +49,21 @@ def test_execute_python_code(random_code: str, random_string: str, agent: Agent)
         assert f.read() == random_code
 
 
-def test_execute_python_code_overwrites_file(
-    random_code: str, random_string: str, agent: Agent
+def test_execute_python_code_disallows_name_arg_path_traversal(
+    random_code: str, agent: Agent
 ):
+    result: str = sut.execute_python_code(
+        random_code, name="../../test_code", agent=agent
+    )
+    assert "Error:" in result, "Path traversal in 'name' argument does not return error"
+    assert "path traversal" in result.lower()
+
+    # Check that the code is not stored in parent directory
+    dst_with_traversal = agent.workspace.get_path("test_code.py")
+    assert not dst_with_traversal.is_file(), "Path traversal by filename not prevented"
+
+
+def test_execute_python_code_overwrites_file(random_code: str, agent: Agent):
     ai_name = agent.ai_name
     destination = os.path.join(
         agent.config.workspace_path, ai_name, "executed_code", "test_code.py"
diff --git a/tests/integration/test_image_gen.py b/tests/integration/test_image_gen.py
index a606d8da2..8cdcfd986 100644
--- a/tests/integration/test_image_gen.py
+++ b/tests/integration/test_image_gen.py
@@ -8,7 +8,6 @@ from PIL import Image
 
 from autogpt.agent.agent import Agent
 from autogpt.commands.image_gen import generate_image, generate_image_with_sd_webui
-from tests.utils import requires_api_key
 
 
 @pytest.fixture(params=[256, 512, 1024])
@@ -17,7 +16,7 @@ def image_size(request):
     return request.param
 
 
-@requires_api_key("OPENAI_API_KEY")
+@pytest.mark.requires_openai_api_key
 @pytest.mark.vcr
 def test_dalle(agent: Agent, workspace, image_size, patched_api_requestor):
     """Test DALL-E image generation."""
@@ -32,7 +31,7 @@ def test_dalle(agent: Agent, workspace, image_size, patched_api_requestor):
 @pytest.mark.xfail(
     reason="The image is too big to be put in a cassette for a CI pipeline. We're looking into a solution."
 )
-@requires_api_key("HUGGINGFACE_API_TOKEN")
+@pytest.mark.requires_huggingface_api_key
 @pytest.mark.parametrize(
     "image_model",
     ["CompVis/stable-diffusion-v1-4", "stabilityai/stable-diffusion-2-1"],
diff --git a/tests/integration/test_provider_openai.py b/tests/integration/test_provider_openai.py
new file mode 100644
index 000000000..f51ad9ac4
--- /dev/null
+++ b/tests/integration/test_provider_openai.py
@@ -0,0 +1,54 @@
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from autogpt.llm.api_manager import ApiManager
+from autogpt.llm.providers import openai
+
+api_manager = ApiManager()
+
+
+@pytest.fixture(autouse=True)
+def reset_api_manager():
+    api_manager.reset()
+    yield
+
+
+class TestProviderOpenAI:
+    @staticmethod
+    def test_create_chat_completion_debug_mode(caplog):
+        """Test if debug mode logs response."""
+        messages = [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": "Who won the world series in 2020?"},
+        ]
+        model = "gpt-3.5-turbo"
+        with patch("openai.ChatCompletion.create") as mock_create:
+            mock_response = MagicMock()
+            del mock_response.error
+            mock_response.usage.prompt_tokens = 10
+            mock_response.usage.completion_tokens = 20
+            mock_create.return_value = mock_response
+
+            openai.create_chat_completion(messages, model=model)
+
+            assert "Response" in caplog.text
+
+    @staticmethod
+    def test_create_chat_completion_empty_messages():
+        """Test if empty messages result in zero tokens and cost."""
+        messages = []
+        model = "gpt-3.5-turbo"
+
+        with patch("openai.ChatCompletion.create") as mock_create:
+            mock_response = MagicMock()
+            del mock_response.error
+            mock_response.usage.prompt_tokens = 0
+            mock_response.usage.completion_tokens = 0
+            mock_create.return_value = mock_response
+
+            openai.create_chat_completion(messages, model=model)
+
+            assert api_manager.get_total_prompt_tokens() == 0
+            assert api_manager.get_total_completion_tokens() == 0
+            assert api_manager.get_total_cost() == 0
diff --git a/tests/integration/test_setup.py b/tests/integration/test_setup.py
index 5217d72ab..b74eebafc 100644
--- a/tests/integration/test_setup.py
+++ b/tests/integration/test_setup.py
@@ -4,15 +4,14 @@ import pytest
 
 from autogpt.config.ai_config import AIConfig
 from autogpt.setup import generate_aiconfig_automatic, prompt_user
-from tests.utils import requires_api_key
 
 
 @pytest.mark.vcr
-@requires_api_key("OPENAI_API_KEY")
-def test_generate_aiconfig_automatic_default(patched_api_requestor):
+@pytest.mark.requires_openai_api_key
+def test_generate_aiconfig_automatic_default(patched_api_requestor, config):
     user_inputs = [""]
     with patch("autogpt.utils.session.prompt", side_effect=user_inputs):
-        ai_config = prompt_user()
+        ai_config = prompt_user(config)
 
     assert isinstance(ai_config, AIConfig)
     assert ai_config.ai_name is not None
@@ -21,10 +20,10 @@ def test_generate_aiconfig_automatic_default(patched_api_requestor):
 
 
 @pytest.mark.vcr
-@requires_api_key("OPENAI_API_KEY")
-def test_generate_aiconfig_automatic_typical(patched_api_requestor):
+@pytest.mark.requires_openai_api_key
+def test_generate_aiconfig_automatic_typical(patched_api_requestor, config):
     user_prompt = "Help me create a rock opera about cybernetic giraffes"
-    ai_config = generate_aiconfig_automatic(user_prompt)
+    ai_config = generate_aiconfig_automatic(user_prompt, config)
 
     assert isinstance(ai_config, AIConfig)
     assert ai_config.ai_name is not None
@@ -33,8 +32,8 @@ def test_generate_aiconfig_automatic_typical(patched_api_requestor):
 
 
 @pytest.mark.vcr
-@requires_api_key("OPENAI_API_KEY")
-def test_generate_aiconfig_automatic_fallback(patched_api_requestor):
+@pytest.mark.requires_openai_api_key
+def test_generate_aiconfig_automatic_fallback(patched_api_requestor, config):
     user_inputs = [
         "T&GF£OIBECC()!*",
         "Chef-GPT",
@@ -45,7 +44,7 @@ def test_generate_aiconfig_automatic_fallback(patched_api_requestor):
         "",
     ]
     with patch("autogpt.utils.session.prompt", side_effect=user_inputs):
-        ai_config = prompt_user()
+        ai_config = prompt_user(config)
 
     assert isinstance(ai_config, AIConfig)
     assert ai_config.ai_name == "Chef-GPT"
@@ -54,8 +53,8 @@ def test_generate_aiconfig_automatic_fallback(patched_api_requestor):
 
 
 @pytest.mark.vcr
-@requires_api_key("OPENAI_API_KEY")
-def test_prompt_user_manual_mode(patched_api_requestor):
+@pytest.mark.requires_openai_api_key
+def test_prompt_user_manual_mode(patched_api_requestor, config):
     user_inputs = [
         "--manual",
         "Chef-GPT",
@@ -66,7 +65,7 @@ def test_prompt_user_manual_mode(patched_api_requestor):
         "",
     ]
     with patch("autogpt.utils.session.prompt", side_effect=user_inputs):
-        ai_config = prompt_user()
+        ai_config = prompt_user(config)
 
     assert isinstance(ai_config, AIConfig)
     assert ai_config.ai_name == "Chef-GPT"
diff --git a/tests/integration/test_web_selenium.py b/tests/integration/test_web_selenium.py
index baf3653ca..f98b2971a 100644
--- a/tests/integration/test_web_selenium.py
+++ b/tests/integration/test_web_selenium.py
@@ -3,11 +3,10 @@ from pytest_mock import MockerFixture
 
 from autogpt.agent.agent import Agent
 from autogpt.commands.web_selenium import browse_website
-from tests.utils import requires_api_key
 
 
 @pytest.mark.vcr
-@requires_api_key("OPENAI_API_KEY")
+@pytest.mark.requires_openai_api_key
 def test_browse_website(agent: Agent, patched_api_requestor: MockerFixture):
     url = "https://barrel-roll.com"
     question = "How to execute a barrel roll"
diff --git a/tests/mocks/mock_commands.py b/tests/mocks/mock_commands.py
index 42b0ea113..278894c4d 100644
--- a/tests/mocks/mock_commands.py
+++ b/tests/mocks/mock_commands.py
@@ -1,8 +1,13 @@
-from autogpt.commands.command import command
+from autogpt.command_decorator import command
 
 
 @command(
-    "function_based", "Function-based test command", "(arg1: int, arg2: str) -> str"
+    "function_based",
+    "Function-based test command",
+    {
+        "arg1": {"type": "int", "description": "arg 1", "required": True},
+        "arg2": {"type": "str", "description": "arg 2", "required": True},
+    },
 )
 def function_based(arg1: int, arg2: str) -> str:
     """A function-based test command that returns a string with the two arguments separated by a dash."""
diff --git a/tests/unit/test_agent_manager.py b/tests/unit/test_agent_manager.py
index 4b0a01bcd..7140db059 100644
--- a/tests/unit/test_agent_manager.py
+++ b/tests/unit/test_agent_manager.py
@@ -1,13 +1,15 @@
 import pytest
 
 from autogpt.agent.agent_manager import AgentManager
+from autogpt.llm import ChatModelResponse
 from autogpt.llm.chat import create_chat_completion
+from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS
 
 
 @pytest.fixture
-def agent_manager():
+def agent_manager(config):
     # Hack, real gross. Singletons are not good times.
-    yield AgentManager()
+    yield AgentManager(config)
     del AgentManager._instances[AgentManager]
 
 
@@ -27,12 +29,16 @@ def model():
 
 
 @pytest.fixture(autouse=True)
-def mock_create_chat_completion(mocker):
+def mock_create_chat_completion(mocker, config):
     mock_create_chat_completion = mocker.patch(
         "autogpt.agent.agent_manager.create_chat_completion",
         wraps=create_chat_completion,
     )
-    mock_create_chat_completion.return_value = "irrelevant"
+    mock_create_chat_completion.return_value = ChatModelResponse(
+        model_info=OPEN_AI_CHAT_MODELS[config.fast_llm_model],
+        content="irrelevant",
+        function_call={},
+    )
     return mock_create_chat_completion
 
 
diff --git a/tests/unit/test_ai_config.py b/tests/unit/test_ai_config.py
index a684373b8..e3c31d5dc 100644
--- a/tests/unit/test_ai_config.py
+++ b/tests/unit/test_ai_config.py
@@ -19,10 +19,10 @@ ai_name: McFamished
 ai_role: A hungry AI
 api_budget: 0.0
 """
-    config_file = tmp_path / "ai_settings.yaml"
-    config_file.write_text(yaml_content)
+    ai_settings_file = tmp_path / "ai_settings.yaml"
+    ai_settings_file.write_text(yaml_content)
 
-    ai_config = AIConfig.load(config_file)
+    ai_config = AIConfig.load(ai_settings_file)
 
     assert len(ai_config.ai_goals) == 4
     assert ai_config.ai_goals[0] == "Goal 1: Make a sandwich"
@@ -30,8 +30,8 @@ api_budget: 0.0
     assert ai_config.ai_goals[2] == "Goal 3 - Go to sleep"
     assert ai_config.ai_goals[3] == "Goal 4: Wake up"
 
-    config_file.write_text("")
-    ai_config.save(config_file)
+    ai_settings_file.write_text("")
+    ai_config.save(ai_settings_file)
 
     yaml_content2 = """ai_goals:
 - 'Goal 1: Make a sandwich'
@@ -42,15 +42,15 @@ ai_name: McFamished
 ai_role: A hungry AI
 api_budget: 0.0
 """
-    assert config_file.read_text() == yaml_content2
+    assert ai_settings_file.read_text() == yaml_content2
 
 
 def test_ai_config_file_not_exists(workspace):
     """Test if file does not exist."""
 
-    config_file = workspace.get_path("ai_settings.yaml")
+    ai_settings_file = workspace.get_path("ai_settings.yaml")
 
-    ai_config = AIConfig.load(str(config_file))
+    ai_config = AIConfig.load(str(ai_settings_file))
     assert ai_config.ai_name == ""
     assert ai_config.ai_role == ""
     assert ai_config.ai_goals == []
@@ -62,10 +62,10 @@ def test_ai_config_file_not_exists(workspace):
 def test_ai_config_file_is_empty(workspace):
     """Test if file does not exist."""
 
-    config_file = workspace.get_path("ai_settings.yaml")
-    config_file.write_text("")
+    ai_settings_file = workspace.get_path("ai_settings.yaml")
+    ai_settings_file.write_text("")
 
-    ai_config = AIConfig.load(str(config_file))
+    ai_config = AIConfig.load(str(ai_settings_file))
     assert ai_config.ai_name == ""
     assert ai_config.ai_role == ""
     assert ai_config.ai_goals == []
diff --git a/tests/unit/test_api_manager.py b/tests/unit/test_api_manager.py
index e259f56ad..615204d19 100644
--- a/tests/unit/test_api_manager.py
+++ b/tests/unit/test_api_manager.py
@@ -1,9 +1,10 @@
-from unittest.mock import MagicMock, patch
+from unittest.mock import patch
 
 import pytest
 from pytest_mock import MockerFixture
 
-from autogpt.llm.api_manager import OPEN_AI_MODELS, ApiManager
+from autogpt.llm.api_manager import ApiManager
+from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS, OPEN_AI_EMBEDDING_MODELS
 
 api_manager = ApiManager()
 
@@ -17,80 +18,18 @@ def reset_api_manager():
 @pytest.fixture(autouse=True)
 def mock_costs(mocker: MockerFixture):
     mocker.patch.multiple(
-        OPEN_AI_MODELS["gpt-3.5-turbo"],
+        OPEN_AI_CHAT_MODELS["gpt-3.5-turbo"],
         prompt_token_cost=0.0013,
         completion_token_cost=0.0025,
     )
     mocker.patch.multiple(
-        OPEN_AI_MODELS["text-embedding-ada-002"],
+        OPEN_AI_EMBEDDING_MODELS["text-embedding-ada-002"],
         prompt_token_cost=0.0004,
     )
     yield
 
 
 class TestApiManager:
-    @staticmethod
-    def test_create_chat_completion_debug_mode(caplog):
-        """Test if debug mode logs response."""
-        api_manager_debug = ApiManager(debug=True)
-        messages = [
-            {"role": "system", "content": "You are a helpful assistant."},
-            {"role": "user", "content": "Who won the world series in 2020?"},
-        ]
-        model = "gpt-3.5-turbo"
-
-        with patch("openai.ChatCompletion.create") as mock_create:
-            mock_response = MagicMock()
-            del mock_response.error
-            mock_response.usage.prompt_tokens = 10
-            mock_response.usage.completion_tokens = 20
-            mock_create.return_value = mock_response
-
-            api_manager_debug.create_chat_completion(messages, model=model)
-
-            assert "Response" in caplog.text
-
-    @staticmethod
-    def test_create_chat_completion_empty_messages():
-        """Test if empty messages result in zero tokens and cost."""
-        messages = []
-        model = "gpt-3.5-turbo"
-
-        with patch("openai.ChatCompletion.create") as mock_create:
-            mock_response = MagicMock()
-            del mock_response.error
-            mock_response.usage.prompt_tokens = 0
-            mock_response.usage.completion_tokens = 0
-            mock_create.return_value = mock_response
-
-            api_manager.create_chat_completion(messages, model=model)
-
-            assert api_manager.get_total_prompt_tokens() == 0
-            assert api_manager.get_total_completion_tokens() == 0
-            assert api_manager.get_total_cost() == 0
-
-    @staticmethod
-    def test_create_chat_completion_valid_inputs():
-        """Test if valid inputs result in correct tokens and cost."""
-        messages = [
-            {"role": "system", "content": "You are a helpful assistant."},
-            {"role": "user", "content": "Who won the world series in 2020?"},
-        ]
-        model = "gpt-3.5-turbo"
-
-        with patch("openai.ChatCompletion.create") as mock_create:
-            mock_response = MagicMock()
-            del mock_response.error
-            mock_response.usage.prompt_tokens = 10
-            mock_response.usage.completion_tokens = 20
-            mock_create.return_value = mock_response
-
-            api_manager.create_chat_completion(messages, model=model)
-
-            assert api_manager.get_total_prompt_tokens() == 10
-            assert api_manager.get_total_completion_tokens() == 20
-            assert api_manager.get_total_cost() == (10 * 0.0013 + 20 * 0.0025) / 1000
-
     def test_getter_methods(self):
         """Test the getter methods for total tokens, cost, and budget."""
         api_manager.update_cost(600, 1200, "gpt-3.5-turbo")
diff --git a/tests/unit/test_browse_scrape_links.py b/tests/unit/test_browse_scrape_links.py
deleted file mode 100644
index 5975e086e..000000000
--- a/tests/unit/test_browse_scrape_links.py
+++ /dev/null
@@ -1,119 +0,0 @@
-# Generated by CodiumAI
-
-# Dependencies:
-# pip install pytest-mock
-
-from autogpt.agent.agent import Agent
-from autogpt.commands.web_requests import scrape_links
-
-"""
-Code Analysis
-
-Objective:
-The objective of the 'scrape_links' function is to scrape hyperlinks from a
-given URL and return them in a formatted way.
-
-Inputs:
-- url: a string representing the URL to be scraped.
-
-Flow:
-1. Send a GET request to the given URL using the requests library and the user agent header from the config file.
-2. Check if the response contains an HTTP error. If it does, return "error".
-3. Parse the HTML content of the response using the BeautifulSoup library.
-4. Remove any script and style tags from the parsed HTML.
-5. Extract all hyperlinks from the parsed HTML using the 'extract_hyperlinks' function.
-6. Format the extracted hyperlinks using the 'format_hyperlinks' function.
-7. Return the formatted hyperlinks.
-
-Outputs:
-- A list of formatted hyperlinks.
-
-Additional aspects:
-- The function uses the 'requests' and 'BeautifulSoup' libraries to send HTTP
-requests and parse HTML content, respectively.
-- The 'extract_hyperlinks' function is called to extract hyperlinks from the parsed HTML.
-- The 'format_hyperlinks' function is called to format the extracted hyperlinks.
-- The function checks for HTTP errors and returns "error" if any are found.
-"""
-
-
-class TestScrapeLinks:
-    """
-    Tests that the function returns a list of formatted hyperlinks when
-    provided with a valid url that returns a webpage with hyperlinks.
-    """
-
-    def test_valid_url_with_hyperlinks(self, agent: Agent):
-        url = "https://www.google.com"
-        result = scrape_links(url, agent=agent)
-        assert len(result) > 0
-        assert isinstance(result, list)
-        assert isinstance(result[0], str)
-
-    def test_valid_url(self, mocker, agent: Agent):
-        """Test that the function returns correctly formatted hyperlinks when given a valid url."""
-        # Mock the requests.get() function to return a response with sample HTML containing hyperlinks
-        mock_response = mocker.Mock()
-        mock_response.status_code = 200
-        mock_response.text = (
-            "<html><body><a href='https://www.google.com'>Google</a></body></html>"
-        )
-        mocker.patch("requests.Session.get", return_value=mock_response)
-
-        # Call the function with a valid URL
-        result = scrape_links("https://www.example.com", agent)
-
-        # Assert that the function returns correctly formatted hyperlinks
-        assert result == ["Google (https://www.google.com)"]
-
-    def test_invalid_url(self, mocker, agent: Agent):
-        """Test that the function returns "error" when given an invalid url."""
-        # Mock the requests.get() function to return an HTTP error response
-        mock_response = mocker.Mock()
-        mock_response.status_code = 404
-        mocker.patch("requests.Session.get", return_value=mock_response)
-
-        # Call the function with an invalid URL
-        result = scrape_links("https://www.invalidurl.com", agent)
-
-        # Assert that the function returns "error"
-        assert "Error:" in result
-
-    def test_no_hyperlinks(self, mocker, agent: Agent):
-        """Test that the function returns an empty list when the html contains no hyperlinks."""
-        # Mock the requests.get() function to return a response with sample HTML containing no hyperlinks
-        mock_response = mocker.Mock()
-        mock_response.status_code = 200
-        mock_response.text = "<html><body><p>No hyperlinks here</p></body></html>"
-        mocker.patch("requests.Session.get", return_value=mock_response)
-
-        # Call the function with a URL containing no hyperlinks
-        result = scrape_links("https://www.example.com", agent)
-
-        # Assert that the function returns an empty list
-        assert result == []
-
-    def test_scrape_links_with_few_hyperlinks(self, mocker, agent: Agent):
-        """Test that scrape_links() correctly extracts and formats hyperlinks from a sample HTML containing a few hyperlinks."""
-        mock_response = mocker.Mock()
-        mock_response.status_code = 200
-        mock_response.text = """
-            <html>
-                <body>
-                    <div id="google-link"><a href="https://www.google.com">Google</a></div>
-                    <div id="github"><a href="https://github.com">GitHub</a></div>
-                    <div id="CodiumAI"><a href="https://www.codium.ai">CodiumAI</a></div>
-                </body>
-            </html>
-        """
-        mocker.patch("requests.Session.get", return_value=mock_response)
-
-        # Call the function being tested
-        result = scrape_links("https://www.example.com", agent)
-
-        # Assert that the function returns a list of formatted hyperlinks
-        assert isinstance(result, list)
-        assert len(result) == 3
-        assert result[0] == "Google (https://www.google.com)"
-        assert result[1] == "GitHub (https://github.com)"
-        assert result[2] == "CodiumAI (https://www.codium.ai)"
diff --git a/tests/unit/test_browse_scrape_text.py b/tests/unit/test_browse_scrape_text.py
deleted file mode 100644
index 23a80c545..000000000
--- a/tests/unit/test_browse_scrape_text.py
+++ /dev/null
@@ -1,117 +0,0 @@
-# Generated by CodiumAI
-
-import pytest
-import requests
-
-from autogpt.agent.agent import Agent
-from autogpt.commands.web_requests import scrape_text
-
-"""
-Code Analysis
-
-Objective:
-The objective of the "scrape_text" function is to scrape the text content from
-a given URL and return it as a string, after removing any unwanted HTML tags and
- scripts.
-
-Inputs:
-- url: a string representing the URL of the webpage to be scraped.
-
-Flow:
-1. Send a GET request to the given URL using the requests library and the user agent
- header from the config file.
-2. Check if the response contains an HTTP error. If it does, return an error message.
-3. Use BeautifulSoup to parse the HTML content of the response and extract all script
- and style tags.
-4. Get the text content of the remaining HTML using the get_text() method of
- BeautifulSoup.
-5. Split the text into lines and then into chunks, removing any extra whitespace.
-6. Join the chunks into a single string with newline characters between them.
-7. Return the cleaned text.
-
-Outputs:
-- A string representing the cleaned text content of the webpage.
-
-Additional aspects:
-- The function uses the requests library and BeautifulSoup to handle the HTTP request
- and HTML parsing, respectively.
-- The function removes script and style tags from the HTML to avoid including unwanted
- content in the text output.
-- The function uses a generator expression to split the text into lines and chunks,
- which can improve performance for large amounts of text.
-"""
-
-
-class TestScrapeText:
-    def test_scrape_text_with_valid_url(self, mocker, agent: Agent):
-        """Tests that scrape_text() returns the expected text when given a valid URL."""
-        # Mock the requests.get() method to return a response with expected text
-        expected_text = "This is some sample text"
-        mock_response = mocker.Mock()
-        mock_response.status_code = 200
-        mock_response.text = (
-            "<html><body><div><p style='color: blue;'>"
-            f"{expected_text}</p></div></body></html>"
-        )
-        mocker.patch("requests.Session.get", return_value=mock_response)
-
-        # Call the function with a valid URL and assert that it returns the
-        #  expected text
-        url = "http://www.example.com"
-        assert scrape_text(url, agent) == expected_text
-
-    def test_invalid_url(self, agent: Agent):
-        """Tests that an error is raised when an invalid url is provided."""
-        url = "invalidurl.com"
-        pytest.raises(ValueError, scrape_text, url, agent)
-
-    def test_unreachable_url(self, mocker, agent: Agent):
-        """Test that scrape_text returns an error message when an invalid or unreachable url is provided."""
-        # Mock the requests.get() method to raise an exception
-        mocker.patch(
-            "requests.Session.get", side_effect=requests.exceptions.RequestException
-        )
-
-        # Call the function with an invalid URL and assert that it returns an error
-        #  message
-        url = "http://thiswebsitedoesnotexist.net/"
-        error_message = scrape_text(url, agent)
-        assert "Error:" in error_message
-
-    def test_no_text(self, mocker, agent: Agent):
-        """Test that scrape_text returns an empty string when the html page contains no text to be scraped."""
-        # Mock the requests.get() method to return a response with no text
-        mock_response = mocker.Mock()
-        mock_response.status_code = 200
-        mock_response.text = "<html><body></body></html>"
-        mocker.patch("requests.Session.get", return_value=mock_response)
-
-        # Call the function with a valid URL and assert that it returns an empty string
-        url = "http://www.example.com"
-        assert scrape_text(url, agent) == ""
-
-    def test_http_error(self, mocker, agent: Agent):
-        """Test that scrape_text returns an error message when the response status code is an http error (>=400)."""
-        # Mock the requests.get() method to return a response with a 404 status code
-        mocker.patch("requests.Session.get", return_value=mocker.Mock(status_code=404))
-
-        # Call the function with a URL
-        result = scrape_text("https://www.example.com", agent)
-
-        # Check that the function returns an error message
-        assert result == "Error: HTTP 404 error"
-
-    def test_scrape_text_with_html_tags(self, mocker, agent: Agent):
-        """Test that scrape_text() properly handles HTML tags."""
-        # Create a mock response object with HTML containing tags
-        html = "<html><body><p>This is <b>bold</b> text.</p></body></html>"
-        mock_response = mocker.Mock()
-        mock_response.status_code = 200
-        mock_response.text = html
-        mocker.patch("requests.Session.get", return_value=mock_response)
-
-        # Call the function with a URL
-        result = scrape_text("https://www.example.com", agent)
-
-        # Check that the function properly handles HTML tags
-        assert result == "This is bold text."
diff --git a/tests/unit/test_commands.py b/tests/unit/test_commands.py
index 5779a8a3f..9b52ceadc 100644
--- a/tests/unit/test_commands.py
+++ b/tests/unit/test_commands.py
@@ -5,9 +5,13 @@ from pathlib import Path
 
 import pytest
 
-from autogpt.commands.command import Command, CommandRegistry
+from autogpt.models.command import Command, CommandParameter
+from autogpt.models.command_registry import CommandRegistry
 
-SIGNATURE = "(arg1: int, arg2: str) -> str"
+PARAMETERS = [
+    CommandParameter("arg1", "int", description="Argument 1", required=True),
+    CommandParameter("arg2", "str", description="Argument 2", required=False),
+]
 
 
 class TestCommand:
@@ -25,13 +29,16 @@ class TestCommand:
             name="example",
             description="Example command",
             method=self.example_command_method,
-            signature=SIGNATURE,
+            parameters=PARAMETERS,
         )
 
         assert cmd.name == "example"
         assert cmd.description == "Example command"
         assert cmd.method == self.example_command_method
-        assert cmd.signature == "(arg1: int, arg2: str) -> str"
+        assert (
+            str(cmd)
+            == "example: Example command, params: (arg1: int, arg2: Optional[str])"
+        )
 
     def test_command_call(self):
         """Test that Command(*args) calls and returns the result of method(*args)."""
@@ -40,6 +47,14 @@ class TestCommand:
             name="example",
             description="Example command",
             method=self.example_command_method,
+            parameters=[
+                CommandParameter(
+                    name="prompt",
+                    type="string",
+                    description="The prompt used to generate the image",
+                    required=True,
+                ),
+            ],
         )
         result = cmd(arg1=1, arg2="test")
         assert result == "1 - test"
@@ -50,22 +65,11 @@ class TestCommand:
             name="example",
             description="Example command",
             method=self.example_command_method,
-            signature=SIGNATURE,
+            parameters=PARAMETERS,
         )
         with pytest.raises(TypeError):
             cmd(arg1="invalid", does_not_exist="test")
 
-    def test_command_custom_signature(self):
-        custom_signature = "custom_arg1: int, custom_arg2: str"
-        cmd = Command(
-            name="example",
-            description="Example command",
-            method=self.example_command_method,
-            signature=custom_signature,
-        )
-
-        assert cmd.signature == custom_signature
-
 
 class TestCommandRegistry:
     @staticmethod
@@ -79,7 +83,7 @@ class TestCommandRegistry:
             name="example",
             description="Example command",
             method=self.example_command_method,
-            signature=SIGNATURE,
+            parameters=PARAMETERS,
         )
 
         registry.register(cmd)
@@ -94,7 +98,7 @@ class TestCommandRegistry:
             name="example",
             description="Example command",
             method=self.example_command_method,
-            signature=SIGNATURE,
+            parameters=PARAMETERS,
         )
 
         registry.register(cmd)
@@ -109,7 +113,7 @@ class TestCommandRegistry:
             name="example",
             description="Example command",
             method=self.example_command_method,
-            signature=SIGNATURE,
+            parameters=PARAMETERS,
         )
 
         registry.register(cmd)
@@ -131,7 +135,7 @@ class TestCommandRegistry:
             name="example",
             description="Example command",
             method=self.example_command_method,
-            signature=SIGNATURE,
+            parameters=PARAMETERS,
         )
 
         registry.register(cmd)
@@ -153,13 +157,13 @@ class TestCommandRegistry:
             name="example",
             description="Example command",
             method=self.example_command_method,
-            signature=SIGNATURE,
+            parameters=PARAMETERS,
         )
 
         registry.register(cmd)
         command_prompt = registry.command_prompt()
 
-        assert f"(arg1: int, arg2: str)" in command_prompt
+        assert f"(arg1: int, arg2: Optional[str])" in command_prompt
 
     def test_import_mock_commands_module(self):
         """Test that the registry can import a module with mock command plugins."""
diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py
index 9a95cef1a..1903fd162 100644
--- a/tests/unit/test_config.py
+++ b/tests/unit/test_config.py
@@ -1,5 +1,5 @@
 """
-Test cases for the Config class, which handles the configuration settings
+Test cases for the config class, which handles the configuration settings
 for the AI and ensures it behaves as a singleton.
 """
 from unittest import mock
@@ -7,14 +7,14 @@ from unittest.mock import patch
 
 import pytest
 
-from autogpt.config.config import Config
+from autogpt.config import Config
 from autogpt.configurator import GPT_3_MODEL, GPT_4_MODEL, create_config
 from autogpt.workspace.workspace import Workspace
 
 
 def test_initial_values(config: Config):
     """
-    Test if the initial values of the Config class attributes are set correctly.
+    Test if the initial values of the config class attributes are set correctly.
     """
     assert config.debug_mode == False
     assert config.continuous_mode == False
@@ -30,11 +30,11 @@ def test_set_continuous_mode(config: Config):
     # Store continuous mode to reset it after the test
     continuous_mode = config.continuous_mode
 
-    config.set_continuous_mode(True)
+    config.continuous_mode = True
     assert config.continuous_mode == True
 
     # Reset continuous mode
-    config.set_continuous_mode(continuous_mode)
+    config.continuous_mode = continuous_mode
 
 
 def test_set_speak_mode(config: Config):
@@ -44,11 +44,11 @@ def test_set_speak_mode(config: Config):
     # Store speak mode to reset it after the test
     speak_mode = config.speak_mode
 
-    config.set_speak_mode(True)
+    config.speak_mode = True
     assert config.speak_mode == True
 
     # Reset speak mode
-    config.set_speak_mode(speak_mode)
+    config.speak_mode = speak_mode
 
 
 def test_set_fast_llm_model(config: Config):
@@ -58,11 +58,11 @@ def test_set_fast_llm_model(config: Config):
     # Store model name to reset it after the test
     fast_llm_model = config.fast_llm_model
 
-    config.set_fast_llm_model("gpt-3.5-turbo-test")
+    config.fast_llm_model = "gpt-3.5-turbo-test"
     assert config.fast_llm_model == "gpt-3.5-turbo-test"
 
     # Reset model name
-    config.set_fast_llm_model(fast_llm_model)
+    config.fast_llm_model = fast_llm_model
 
 
 def test_set_smart_llm_model(config: Config):
@@ -72,11 +72,11 @@ def test_set_smart_llm_model(config: Config):
     # Store model name to reset it after the test
     smart_llm_model = config.smart_llm_model
 
-    config.set_smart_llm_model("gpt-4-test")
+    config.smart_llm_model = "gpt-4-test"
     assert config.smart_llm_model == "gpt-4-test"
 
     # Reset model name
-    config.set_smart_llm_model(smart_llm_model)
+    config.smart_llm_model = smart_llm_model
 
 
 def test_set_debug_mode(config: Config):
@@ -86,11 +86,11 @@ def test_set_debug_mode(config: Config):
     # Store debug mode to reset it after the test
     debug_mode = config.debug_mode
 
-    config.set_debug_mode(True)
+    config.debug_mode = True
     assert config.debug_mode == True
 
     # Reset debug mode
-    config.set_debug_mode(debug_mode)
+    config.debug_mode = debug_mode
 
 
 @patch("openai.Model.list")
@@ -127,22 +127,22 @@ def test_smart_and_fast_llm_models_set_to_gpt4(mock_list_models, config: Config)
     assert config.smart_llm_model == "gpt-3.5-turbo"
 
     # Reset config
-    config.set_fast_llm_model(fast_llm_model)
-    config.set_smart_llm_model(smart_llm_model)
+    config.fast_llm_model = fast_llm_model
+    config.smart_llm_model = smart_llm_model
 
 
 def test_missing_azure_config(config: Config, workspace: Workspace):
     config_file = workspace.get_path("azure_config.yaml")
     with pytest.raises(FileNotFoundError):
-        config.load_azure_config(str(config_file))
+        Config.load_azure_config(str(config_file))
 
     config_file.write_text("")
-    config.load_azure_config(str(config_file))
+    azure_config = Config.load_azure_config(str(config_file))
 
-    assert config.openai_api_type == "azure"
-    assert config.openai_api_base == ""
-    assert config.openai_api_version == "2023-03-15-preview"
-    assert config.azure_model_to_deployment_id_map == {}
+    assert azure_config["openai_api_type"] == "azure"
+    assert azure_config["openai_api_base"] == ""
+    assert azure_config["openai_api_version"] == "2023-03-15-preview"
+    assert azure_config["azure_model_to_deployment_id_map"] == {}
 
 
 def test_create_config_gpt4only(config: Config) -> None:
@@ -170,8 +170,8 @@ def test_create_config_gpt4only(config: Config) -> None:
         assert config.smart_llm_model == GPT_4_MODEL
 
     # Reset config
-    config.set_fast_llm_model(fast_llm_model)
-    config.set_smart_llm_model(smart_llm_model)
+    config.fast_llm_model = fast_llm_model
+    config.smart_llm_model = smart_llm_model
 
 
 def test_create_config_gpt3only(config: Config) -> None:
@@ -199,5 +199,5 @@ def test_create_config_gpt3only(config: Config) -> None:
         assert config.smart_llm_model == GPT_3_MODEL
 
     # Reset config
-    config.set_fast_llm_model(fast_llm_model)
-    config.set_smart_llm_model(smart_llm_model)
+    config.fast_llm_model = fast_llm_model
+    config.smart_llm_model = smart_llm_model
diff --git a/tests/unit/test_file_operations.py b/tests/unit/test_file_operations.py
index 27af93737..b82831113 100644
--- a/tests/unit/test_file_operations.py
+++ b/tests/unit/test_file_operations.py
@@ -13,9 +13,9 @@ from pytest_mock import MockerFixture
 
 import autogpt.commands.file_operations as file_ops
 from autogpt.agent.agent import Agent
+from autogpt.config import Config
 from autogpt.memory.vector.memory_item import MemoryItem
 from autogpt.memory.vector.utils import Embedding
-from autogpt.utils import readable_file_size
 from autogpt.workspace import Workspace
 
 
@@ -25,11 +25,13 @@ def file_content():
 
 
 @pytest.fixture()
-def mock_MemoryItem_from_text(mocker: MockerFixture, mock_embedding: Embedding):
+def mock_MemoryItem_from_text(
+    mocker: MockerFixture, mock_embedding: Embedding, config: Config
+):
     mocker.patch.object(
         file_ops.MemoryItem,
         "from_text",
-        new=lambda content, source_type, metadata: MemoryItem(
+        new=lambda content, source_type, config, metadata: MemoryItem(
             raw_content=content,
             summary=f"Summary of content '{content}'",
             chunk_summaries=[f"Summary of content '{content}'"],
@@ -243,53 +245,6 @@ def test_write_file_succeeds_if_content_different(
     assert result == "File written to successfully."
 
 
-# Update file testing
-def test_replace_in_file_all_occurrences(test_file, test_file_path, agent: Agent):
-    old_content = "This is a test file.\n we test file here\na test is needed"
-    expected_content = (
-        "This is a update file.\n we update file here\na update is needed"
-    )
-    test_file.write(old_content)
-    test_file.close()
-    file_ops.replace_in_file(test_file_path, "test", "update", agent=agent)
-    with open(test_file_path) as f:
-        new_content = f.read()
-    print(new_content)
-    print(expected_content)
-    assert new_content == expected_content
-
-
-def test_replace_in_file_one_occurrence(test_file, test_file_path, agent: Agent):
-    old_content = "This is a test file.\n we test file here\na test is needed"
-    expected_content = "This is a test file.\n we update file here\na test is needed"
-    test_file.write(old_content)
-    test_file.close()
-    file_ops.replace_in_file(
-        test_file_path, "test", "update", agent=agent, occurrence_index=1
-    )
-    with open(test_file_path) as f:
-        new_content = f.read()
-
-    assert new_content == expected_content
-
-
-def test_replace_in_file_multiline_old_text(test_file, test_file_path, agent: Agent):
-    old_content = "This is a multi_line\ntest for testing\nhow well this function\nworks when the input\nis multi-lined"
-    expected_content = "This is a multi_line\nfile. succeeded test\nis multi-lined"
-    test_file.write(old_content)
-    test_file.close()
-    file_ops.replace_in_file(
-        test_file_path,
-        "\ntest for testing\nhow well this function\nworks when the input\n",
-        "\nfile. succeeded test\n",
-        agent=agent,
-    )
-    with open(test_file_path) as f:
-        new_content = f.read()
-
-    assert new_content == expected_content
-
-
 def test_append_to_file(test_nested_file: Path, agent: Agent):
     append_text = "This is appended text.\n"
     file_ops.write_to_file(test_nested_file, append_text, agent=agent)
@@ -373,26 +328,3 @@ def test_list_files(workspace: Workspace, test_directory: Path, agent: Agent):
     non_existent_file = "non_existent_file.txt"
     files = file_ops.list_files("", agent=agent)
     assert non_existent_file not in files
-
-
-def test_download_file(workspace: Workspace, agent: Agent):
-    url = "https://github.com/Significant-Gravitas/Auto-GPT/archive/refs/tags/v0.2.2.tar.gz"
-    local_name = workspace.get_path("auto-gpt.tar.gz")
-    size = 365023
-    readable_size = readable_file_size(size)
-    assert (
-        file_ops.download_file(url, local_name, agent=agent)
-        == f'Successfully downloaded and locally stored file: "{local_name}"! (Size: {readable_size})'
-    )
-    assert os.path.isfile(local_name) is True
-    assert os.path.getsize(local_name) == size
-
-    url = "https://github.com/Significant-Gravitas/Auto-GPT/archive/refs/tags/v0.0.0.tar.gz"
-    assert "Got an HTTP Error whilst trying to download file" in file_ops.download_file(
-        url, local_name, agent=agent
-    )
-
-    url = "https://thiswebsiteiswrong.hmm/v0.0.0.tar.gz"
-    assert "Failed to establish a new connection:" in file_ops.download_file(
-        url, local_name, agent=agent
-    )
diff --git a/tests/unit/test_get_self_feedback.py b/tests/unit/test_get_self_feedback.py
deleted file mode 100644
index ba3e10fec..000000000
--- a/tests/unit/test_get_self_feedback.py
+++ /dev/null
@@ -1,62 +0,0 @@
-from datetime import datetime
-
-from pytest_mock import MockerFixture
-
-from autogpt.agent.agent import Agent
-from autogpt.config import AIConfig
-from autogpt.config.config import Config
-from autogpt.llm.chat import create_chat_completion
-from autogpt.log_cycle.log_cycle import LogCycleHandler
-
-
-def test_get_self_feedback(config: Config, mocker: MockerFixture):
-    # Define a sample thoughts dictionary
-    thoughts = {
-        "reasoning": "Sample reasoning.",
-        "plan": "Sample plan.",
-        "thoughts": "Sample thoughts.",
-    }
-
-    # Define a fake response for the create_chat_completion function
-    fake_response = (
-        "The AI Agent has demonstrated a reasonable thought process, but there is room for improvement. "
-        "For example, the reasoning could be elaborated to better justify the plan, and the plan itself "
-        "could be more detailed to ensure its effectiveness. In addition, the AI Agent should focus more "
-        "on its core role and prioritize thoughts that align with that role."
-    )
-
-    # Mock the create_chat_completion function
-    mock_create_chat_completion = mocker.patch(
-        "autogpt.agent.agent.create_chat_completion", wraps=create_chat_completion
-    )
-    mock_create_chat_completion.return_value = fake_response
-
-    # Create a MagicMock object to replace the Agent instance
-    agent_mock = mocker.MagicMock(spec=Agent)
-
-    # Mock the config attribute of the Agent instance
-    agent_mock.config = config
-    agent_mock.ai_config = AIConfig()
-
-    # Mock the log_cycle_handler attribute of the Agent instance
-    agent_mock.log_cycle_handler = LogCycleHandler()
-
-    # Mock the create_nested_directory method of the LogCycleHandler instance
-    agent_mock.created_at = datetime.now().strftime("%Y%m%d_%H%M%S")
-
-    # Mock the cycle_count attribute of the Agent instance
-    agent_mock.cycle_count = 0
-
-    # Call the get_self_feedback method
-    feedback = Agent.get_self_feedback(
-        agent_mock,
-        thoughts,
-        "gpt-3.5-turbo",
-    )
-
-    # Check if the response is a non-empty string
-    assert isinstance(feedback, str) and len(feedback) > 0
-
-    # Check if certain keywords from input thoughts are present in the feedback response
-    for keyword in ["reasoning", "plan", "thoughts"]:
-        assert keyword in feedback
diff --git a/tests/unit/test_make_agent.py b/tests/unit/test_make_agent.py
deleted file mode 100644
index 61a7a6f5d..000000000
--- a/tests/unit/test_make_agent.py
+++ /dev/null
@@ -1,25 +0,0 @@
-from unittest.mock import MagicMock
-
-from pytest_mock import MockerFixture
-
-from autogpt.agent.agent import Agent
-from autogpt.app import list_agents, start_agent
-
-
-def test_make_agent(agent: Agent, mocker: MockerFixture) -> None:
-    """Test that an agent can be created"""
-    mock = mocker.patch("openai.ChatCompletion.create")
-
-    response = MagicMock()
-    response.choices[0].message.content = "Test message"
-    response.usage.prompt_tokens = 1
-    response.usage.completion_tokens = 1
-    del response.error
-
-    mock.return_value = response
-    start_agent("Test Agent", "chat", "Hello, how are you?", agent, "gpt-3.5-turbo")
-    agents = list_agents(agent)
-    assert "List of agents:\n0: chat" == agents
-    start_agent("Test Agent 2", "write", "Hello, how are you?", agent, "gpt-3.5-turbo")
-    agents = list_agents(agent.config)
-    assert "List of agents:\n0: chat\n1: write" == agents
diff --git a/tests/unit/test_message_history.py b/tests/unit/test_message_history.py
index 6fdf75e61..a3650005e 100644
--- a/tests/unit/test_message_history.py
+++ b/tests/unit/test_message_history.py
@@ -7,7 +7,7 @@ import pytest
 from autogpt.agent import Agent
 from autogpt.config import AIConfig
 from autogpt.config.config import Config
-from autogpt.llm.base import ChatSequence, Message
+from autogpt.llm.base import ChatModelResponse, ChatSequence, Message
 from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS
 from autogpt.llm.utils import count_string_tokens
 from autogpt.memory.message_history import MessageHistory
@@ -38,18 +38,21 @@ def agent(config: Config):
     return agent
 
 
-def test_message_history_batch_summary(mocker, agent):
-    config = Config()
+def test_message_history_batch_summary(mocker, agent, config):
     history = MessageHistory(agent)
     model = config.fast_llm_model
     message_tlength = 0
     message_count = 0
 
     # Setting the mock output and inputs
-    mock_summary_text = "I executed browse_website command for each of the websites returned from Google search, but none of them have any job openings."
+    mock_summary_response = ChatModelResponse(
+        model_info=OPEN_AI_CHAT_MODELS[model],
+        content="I executed browse_website command for each of the websites returned from Google search, but none of them have any job openings.",
+        function_call={},
+    )
     mock_summary = mocker.patch(
         "autogpt.memory.message_history.create_chat_completion",
-        return_value=mock_summary_text,
+        return_value=mock_summary_response,
     )
 
     system_prompt = 'You are AIJobSearcher, an AI designed to search for job openings for software engineer role\nYour decisions must always be made independently without seeking user assistance. Play to your strengths as an LLM and pursue simple strategies with no legal complications.\n\nGOALS:\n\n1. Find any job openings for software engineers online\n2. Go through each of the websites and job openings to summarize their requirements and URL, and skip that if you already visit the website\n\nIt takes money to let you run. Your API budget is $5.000\n\nConstraints:\n1. ~4000 word limit for short term memory. Your short term memory is short, so immediately save important information to files.\n2. If you are unsure how you previously did something or want to recall past events, thinking about similar events will help you remember.\n3. No user assistance\n4. Exclusively use the commands listed in double quotes e.g. "command name"\n\nCommands:\n1. google_search: Google Search, args: "query": "<query>"\n2. browse_website: Browse Website, args: "url": "<url>", "question": "<what_you_want_to_find_on_website>"\n3. task_complete: Task Complete (Shutdown), args: "reason": "<reason>"\n\nResources:\n1. Internet access for searches and information gathering.\n2. Long Term memory management.\n3. GPT-3.5 powered Agents for delegation of simple tasks.\n4. File output.\n\nPerformance Evaluation:\n1. Continuously review and analyze your actions to ensure you are performing to the best of your abilities.\n2. Constructively self-criticize your big-picture behavior constantly.\n3. Reflect on past decisions and strategies to refine your approach.\n4. Every command has a cost, so be smart and efficient. Aim to complete tasks in the least number of steps.\n5. Write all code to a file.\n\nYou should only respond in JSON format as described below \nResponse Format: \n{\n    "thoughts": {\n        "text": "thought",\n        "reasoning": "reasoning",\n        "plan": "- short bulleted\\n- list that conveys\\n- long-term plan",\n        "criticism": "constructive self-criticism",\n        "speak": "thoughts summary to say to user"\n    },\n    "command": {\n        "name": "command name",\n        "args": {\n            "arg name": "value"\n        }\n    }\n} \nEnsure the response can be parsed by Python json.loads'
@@ -114,7 +117,7 @@ def test_message_history_batch_summary(mocker, agent):
         history.append(user_input_msg)
 
     # only take the last cycle of the message history,  trim the rest of previous messages, and generate a summary for them
-    for cycle in reversed(list(history.per_cycle())):
+    for cycle in reversed(list(history.per_cycle(config))):
         messages_to_add = [msg for msg in cycle if msg is not None]
         message_sequence.insert(insertion_index, *messages_to_add)
         break
@@ -127,7 +130,7 @@ def test_message_history_batch_summary(mocker, agent):
 
     # test the main trim_message function
     new_summary_message, trimmed_messages = history.trim_messages(
-        current_message_chain=list(message_sequence),
+        current_message_chain=list(message_sequence), config=config
     )
 
     expected_call_count = math.ceil(
@@ -140,6 +143,6 @@ def test_message_history_batch_summary(mocker, agent):
     assert new_summary_message == Message(
         role="system",
         content="This reminds you of these events from your past: \n"
-        + mock_summary_text,
+        + mock_summary_response.content,
         type=None,
     )
diff --git a/tests/unit/test_plugins.py b/tests/unit/test_plugins.py
index 80aa1b9dd..24b7d1e9a 100644
--- a/tests/unit/test_plugins.py
+++ b/tests/unit/test_plugins.py
@@ -5,6 +5,7 @@ import yaml
 from autogpt.config.config import Config
 from autogpt.plugins import inspect_zip_for_modules, scan_plugins
 from autogpt.plugins.plugin_config import PluginConfig
+from autogpt.plugins.plugins_config import PluginsConfig
 
 PLUGINS_TEST_DIR = "tests/unit/data/test_plugins"
 PLUGIN_TEST_ZIP_FILE = "Auto-GPT-Plugin-Test-master.zip"
@@ -69,7 +70,7 @@ def test_create_base_config(config: Config):
     config.plugins_denylist = ["c", "d"]
 
     os.remove(config.plugins_config_file)
-    plugins_config = config.load_plugins_config()
+    plugins_config = PluginsConfig.load_config(global_config=config)
 
     # Check the structure of the plugins config data
     assert len(plugins_config.plugins) == 4
@@ -101,7 +102,7 @@ def test_load_config(config: Config):
         f.write(yaml.dump(test_config))
 
     # Load the config from disk
-    plugins_config = config.load_plugins_config()
+    plugins_config = PluginsConfig.load_config(global_config=config)
 
     # Check that the loaded config is equal to the test config
     assert len(plugins_config.plugins) == 2
diff --git a/tests/unit/test_prompt_config.py b/tests/unit/test_prompt_config.py
index eacb0cf4f..4616db971 100644
--- a/tests/unit/test_prompt_config.py
+++ b/tests/unit/test_prompt_config.py
@@ -23,10 +23,10 @@ performance_evaluations:
 - Another test performance evaluation
 - A third test performance evaluation
 """
-    config_file = tmp_path / "test_prompt_settings.yaml"
-    config_file.write_text(yaml_content)
+    prompt_settings_file = tmp_path / "test_prompt_settings.yaml"
+    prompt_settings_file.write_text(yaml_content)
 
-    prompt_config = PromptConfig(config_file)
+    prompt_config = PromptConfig(prompt_settings_file)
 
     assert len(prompt_config.constraints) == 3
     assert prompt_config.constraints[0] == "A test constraint"
diff --git a/tests/unit/test_prompt_generator.py b/tests/unit/test_prompt_generator.py
index 1fa1754d7..c5ffaf78c 100644
--- a/tests/unit/test_prompt_generator.py
+++ b/tests/unit/test_prompt_generator.py
@@ -1,115 +1,152 @@
-from unittest import TestCase
-
 from autogpt.prompts.generator import PromptGenerator
 
 
-class TestPromptGenerator(TestCase):
+def test_add_constraint():
+    """
+    Test if the add_constraint() method adds a constraint to the generator's constraints list.
+    """
+    constraint = "Constraint1"
+    generator = PromptGenerator()
+    generator.add_constraint(constraint)
+    assert constraint in generator.constraints
+
+
+def test_add_command():
+    """
+    Test if the add_command() method adds a command to the generator's commands list.
+    """
+    command_label = "Command Label"
+    command_name = "command_name"
+    args = {"arg1": "value1", "arg2": "value2"}
+    generator = PromptGenerator()
+    generator.add_command(command_label, command_name, args)
+    command = {
+        "label": command_label,
+        "name": command_name,
+        "args": args,
+        "function": None,
+    }
+    assert command in generator.commands
+
+
+def test_add_resource():
+    """
+    Test if the add_resource() method adds a resource to the generator's resources list.
+    """
+    resource = "Resource1"
+    generator = PromptGenerator()
+    generator.add_resource(resource)
+    assert resource in generator.resources
+
+
+def test_add_performance_evaluation():
+    """
+    Test if the add_performance_evaluation() method adds an evaluation to the generator's
+    performance_evaluation list.
+    """
+    evaluation = "Evaluation1"
+    generator = PromptGenerator()
+    generator.add_performance_evaluation(evaluation)
+    assert evaluation in generator.performance_evaluation
+
+
+def test_generate_prompt_string(config):
+    """
+    Test if the generate_prompt_string() method generates a prompt string with all the added
+    constraints, commands, resources, and evaluations.
+    """
+
+    # Define the test data
+    constraints = ["Constraint1", "Constraint2"]
+    commands = [
+        {
+            "label": "Command1",
+            "name": "command_name1",
+            "args": {"arg1": "value1"},
+        },
+        {
+            "label": "Command2",
+            "name": "command_name2",
+            "args": {},
+        },
+    ]
+    resources = ["Resource1", "Resource2"]
+    evaluations = ["Evaluation1", "Evaluation2"]
+
+    # Add test data to the generator
+    generator = PromptGenerator()
+    for constraint in constraints:
+        generator.add_constraint(constraint)
+    for command in commands:
+        generator.add_command(command["label"], command["name"], command["args"])
+    for resource in resources:
+        generator.add_resource(resource)
+    for evaluation in evaluations:
+        generator.add_performance_evaluation(evaluation)
+
+    # Generate the prompt string and verify its correctness
+    prompt_string = generator.generate_prompt_string(config)
+    assert prompt_string is not None
+
+    # Check if all constraints, commands, resources, and evaluations are present in the prompt string
+    for constraint in constraints:
+        assert constraint in prompt_string
+    for command in commands:
+        assert command["name"] in prompt_string
+        for key, value in command["args"].items():
+            assert f'"{key}": "{value}"' in prompt_string
+    for resource in resources:
+        assert resource in prompt_string
+    for evaluation in evaluations:
+        assert evaluation in prompt_string
+
+
+def test_generate_prompt_string(config):
     """
-    Test cases for the PromptGenerator class, which is responsible for generating
-    prompts for the AI with constraints, commands, resources, and performance evaluations.
+    Test if the generate_prompt_string() method generates a prompt string with all the added
+    constraints, commands, resources, and evaluations.
     """
 
-    @classmethod
-    def setUpClass(cls):
-        """
-        Set up the initial state for each test method by creating an instance of PromptGenerator.
-        """
-        cls.generator = PromptGenerator()
-
-    # Test whether the add_constraint() method adds a constraint to the generator's constraints list
-    def test_add_constraint(self):
-        """
-        Test if the add_constraint() method adds a constraint to the generator's constraints list.
-        """
-        constraint = "Constraint1"
-        self.generator.add_constraint(constraint)
-        self.assertIn(constraint, self.generator.constraints)
-
-    # Test whether the add_command() method adds a command to the generator's commands list
-    def test_add_command(self):
-        """
-        Test if the add_command() method adds a command to the generator's commands list.
-        """
-        command_label = "Command Label"
-        command_name = "command_name"
-        args = {"arg1": "value1", "arg2": "value2"}
-        self.generator.add_command(command_label, command_name, args)
-        command = {
-            "label": command_label,
-            "name": command_name,
-            "args": args,
-            "function": None,
-        }
-        self.assertIn(command, self.generator.commands)
-
-    def test_add_resource(self):
-        """
-        Test if the add_resource() method adds a resource to the generator's resources list.
-        """
-        resource = "Resource1"
-        self.generator.add_resource(resource)
-        self.assertIn(resource, self.generator.resources)
-
-    def test_add_performance_evaluation(self):
-        """
-        Test if the add_performance_evaluation() method adds an evaluation to the generator's
-        performance_evaluation list.
-        """
-        evaluation = "Evaluation1"
-        self.generator.add_performance_evaluation(evaluation)
-        self.assertIn(evaluation, self.generator.performance_evaluation)
-
-    def test_generate_prompt_string(self):
-        """
-        Test if the generate_prompt_string() method generates a prompt string with all the added
-        constraints, commands, resources, and evaluations.
-        """
-        # Define the test data
-        constraints = ["Constraint1", "Constraint2"]
-        commands = [
-            {
-                "label": "Command1",
-                "name": "command_name1",
-                "args": {"arg1": "value1"},
-            },
-            {
-                "label": "Command2",
-                "name": "command_name2",
-                "args": {},
-            },
-        ]
-        resources = ["Resource1", "Resource2"]
-        evaluations = ["Evaluation1", "Evaluation2"]
-
-        # Add test data to the generator
-        for constraint in constraints:
-            self.generator.add_constraint(constraint)
-        for command in commands:
-            self.generator.add_command(
-                command["label"], command["name"], command["args"]
-            )
-        for resource in resources:
-            self.generator.add_resource(resource)
-        for evaluation in evaluations:
-            self.generator.add_performance_evaluation(evaluation)
-
-        # Generate the prompt string and verify its correctness
-        prompt_string = self.generator.generate_prompt_string()
-        self.assertIsNotNone(prompt_string)
-
-        # Check if all constraints, commands, resources, and evaluations are present in the prompt string
-        for constraint in constraints:
-            self.assertIn(constraint, prompt_string)
-        for command in commands:
-            self.assertIn(command["name"], prompt_string)
-            for key, value in command["args"].items():
-                self.assertIn(f'"{key}": "{value}"', prompt_string)
-        for resource in resources:
-            self.assertIn(resource, prompt_string)
-        for evaluation in evaluations:
-            self.assertIn(evaluation, prompt_string)
-
-        self.assertIn("constraints", prompt_string.lower())
-        self.assertIn("commands", prompt_string.lower())
-        self.assertIn("resources", prompt_string.lower())
-        self.assertIn("performance evaluation", prompt_string.lower())
+    # Define the test data
+    constraints = ["Constraint1", "Constraint2"]
+    commands = [
+        {
+            "label": "Command1",
+            "name": "command_name1",
+            "args": {"arg1": "value1"},
+        },
+        {
+            "label": "Command2",
+            "name": "command_name2",
+            "args": {},
+        },
+    ]
+    resources = ["Resource1", "Resource2"]
+    evaluations = ["Evaluation1", "Evaluation2"]
+
+    # Add test data to the generator
+    generator = PromptGenerator()
+    for constraint in constraints:
+        generator.add_constraint(constraint)
+    for command in commands:
+        generator.add_command(command["label"], command["name"], command["args"])
+    for resource in resources:
+        generator.add_resource(resource)
+    for evaluation in evaluations:
+        generator.add_performance_evaluation(evaluation)
+
+    # Generate the prompt string and verify its correctness
+    prompt_string = generator.generate_prompt_string(config)
+    assert prompt_string is not None
+
+    # Check if all constraints, commands, resources, and evaluations are present in the prompt string
+    for constraint in constraints:
+        assert constraint in prompt_string
+    for command in commands:
+        assert command["name"] in prompt_string
+        for key, value in command["args"].items():
+            assert f'"{key}": "{value}"' in prompt_string
+    for resource in resources:
+        assert resource in prompt_string
+    for evaluation in evaluations:
+        assert evaluation in prompt_string
diff --git a/tests/unit/test_retry_provider_openai.py b/tests/unit/test_retry_provider_openai.py
new file mode 100644
index 000000000..b2c2d04aa
--- /dev/null
+++ b/tests/unit/test_retry_provider_openai.py
@@ -0,0 +1,135 @@
+import pytest
+from openai.error import APIError, RateLimitError, ServiceUnavailableError
+
+from autogpt.llm.providers import openai
+
+
+@pytest.fixture(params=[RateLimitError, ServiceUnavailableError, APIError])
+def error(request):
+    if request.param == APIError:
+        return request.param("Error", http_status=502)
+    else:
+        return request.param("Error")
+
+
+def error_factory(error_instance, error_count, retry_count, warn_user=True):
+    """Creates errors"""
+
+    class RaisesError:
+        def __init__(self):
+            self.count = 0
+
+        @openai.retry_api(
+            num_retries=retry_count, backoff_base=0.001, warn_user=warn_user
+        )
+        def __call__(self):
+            self.count += 1
+            if self.count <= error_count:
+                raise error_instance
+            return self.count
+
+    return RaisesError()
+
+
+def test_retry_open_api_no_error(capsys):
+    """Tests the retry functionality with no errors expected"""
+
+    @openai.retry_api()
+    def f():
+        return 1
+
+    result = f()
+    assert result == 1
+
+    output = capsys.readouterr()
+    assert output.out == ""
+    assert output.err == ""
+
+
+@pytest.mark.parametrize(
+    "error_count, retry_count, failure",
+    [(2, 10, False), (2, 2, False), (10, 2, True), (3, 2, True), (1, 0, True)],
+    ids=["passing", "passing_edge", "failing", "failing_edge", "failing_no_retries"],
+)
+def test_retry_open_api_passing(capsys, error, error_count, retry_count, failure):
+    """Tests the retry with simulated errors [RateLimitError, ServiceUnavailableError, APIError], but should ulimately pass"""
+    call_count = min(error_count, retry_count) + 1
+
+    raises = error_factory(error, error_count, retry_count)
+    if failure:
+        with pytest.raises(type(error)):
+            raises()
+    else:
+        result = raises()
+        assert result == call_count
+
+    assert raises.count == call_count
+
+    output = capsys.readouterr()
+
+    if error_count and retry_count:
+        if type(error) == RateLimitError:
+            assert "Reached rate limit, passing..." in output.out
+            assert "Please double check" in output.out
+        if type(error) == ServiceUnavailableError:
+            assert (
+                "The OpenAI API engine is currently overloaded, passing..."
+                in output.out
+            )
+            assert "Please double check" in output.out
+        if type(error) == APIError:
+            assert "API Bad gateway" in output.out
+    else:
+        assert output.out == ""
+
+
+def test_retry_open_api_rate_limit_no_warn(capsys):
+    """Tests the retry logic with a rate limit error"""
+    error_count = 2
+    retry_count = 10
+
+    raises = error_factory(RateLimitError, error_count, retry_count, warn_user=False)
+    result = raises()
+    call_count = min(error_count, retry_count) + 1
+    assert result == call_count
+    assert raises.count == call_count
+
+    output = capsys.readouterr()
+
+    assert "Reached rate limit, passing..." in output.out
+    assert "Please double check" not in output.out
+
+
+def test_retry_open_api_service_unavairable_no_warn(capsys):
+    """Tests the retry logic with a service unavairable error"""
+    error_count = 2
+    retry_count = 10
+
+    raises = error_factory(
+        ServiceUnavailableError, error_count, retry_count, warn_user=False
+    )
+    result = raises()
+    call_count = min(error_count, retry_count) + 1
+    assert result == call_count
+    assert raises.count == call_count
+
+    output = capsys.readouterr()
+
+    assert "The OpenAI API engine is currently overloaded, passing..." in output.out
+    assert "Please double check" not in output.out
+
+
+def test_retry_openapi_other_api_error(capsys):
+    """Tests the Retry logic with a non rate limit error such as HTTP500"""
+    error_count = 2
+    retry_count = 10
+
+    raises = error_factory(APIError("Error", http_status=500), error_count, retry_count)
+
+    with pytest.raises(APIError):
+        raises()
+    call_count = 1
+    assert raises.count == call_count
+
+    output = capsys.readouterr()
+    assert output.out == ""
diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
index f9a471c25..0258cc490 100644
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -4,6 +4,7 @@ from unittest.mock import patch
 import pytest
 import requests
 
+from autogpt.config import Config
 from autogpt.json_utils.utilities import extract_json_from_response, validate_json
 from autogpt.utils import (
     get_bulletin_from_web,
@@ -185,12 +186,12 @@ def test_get_current_git_branch_failure(mock_repo):
     assert branch_name == ""
 
 
-def test_validate_json_valid(valid_json_response):
-    assert validate_json(valid_json_response)
+def test_validate_json_valid(valid_json_response, config: Config):
+    assert validate_json(valid_json_response, config)
 
 
-def test_validate_json_invalid(invalid_json_response):
-    assert not validate_json(valid_json_response)
+def test_validate_json_invalid(invalid_json_response, config: Config):
+    assert not validate_json(valid_json_response, config)
 
 
 def test_extract_json_from_response(valid_json_response: dict):
diff --git a/tests/unit/test_google_search.py b/tests/unit/test_web_search.py
index 3f039fdb4..4f5143069 100644
--- a/tests/unit/test_google_search.py
+++ b/tests/unit/test_web_search.py
@@ -4,11 +4,7 @@ import pytest
 from googleapiclient.errors import HttpError
 
 from autogpt.agent.agent import Agent
-from autogpt.commands.google_search import (
-    google_official_search,
-    google_search,
-    safe_google_results,
-)
+from autogpt.commands.web_search import google, safe_google_results, web_search
 
 
 @pytest.mark.parametrize(
@@ -45,8 +41,8 @@ def test_google_search(
     mock_ddg = mocker.Mock()
     mock_ddg.return_value = return_value
 
-    mocker.patch("autogpt.commands.google_search.DDGS.text", mock_ddg)
-    actual_output = google_search(query, agent=agent, num_results=num_results)
+    mocker.patch("autogpt.commands.web_search.DDGS.text", mock_ddg)
+    actual_output = web_search(query, agent=agent, num_results=num_results)
     expected_output = safe_google_results(expected_output)
     assert actual_output == expected_output
 
@@ -88,7 +84,7 @@ def test_google_official_search(
     agent: Agent,
 ):
     mock_googleapiclient.return_value = search_results
-    actual_output = google_official_search(query, agent=agent, num_results=num_results)
+    actual_output = google(query, agent=agent, num_results=num_results)
     assert actual_output == safe_google_results(expected_output)
 
 
@@ -136,5 +132,5 @@ def test_google_official_search_errors(
     )
 
     mock_googleapiclient.side_effect = error
-    actual_output = google_official_search(query, agent=agent, num_results=num_results)
+    actual_output = google(query, agent=agent, num_results=num_results)
     assert actual_output == safe_google_results(expected_output)
diff --git a/tests/utils.py b/tests/utils.py
index 2603dfe48..fac2816a3 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -1,39 +1,7 @@
-import functools
 import os
-from contextlib import contextmanager
 
 import pytest
 
-from autogpt.config import Config
-
-
-@contextmanager
-def dummy_openai_api_key():
-    # even when we record the VCR cassettes, openAI wants an API key
-    config = Config()
-    original_api_key = config.openai_api_key
-    config.set_openai_api_key("sk-dummy")
-
-    try:
-        yield
-    finally:
-        config.set_openai_api_key(original_api_key)
-
-
-def requires_api_key(env_var):
-    def decorator(func):
-        @functools.wraps(func)
-        def wrapper(*args, **kwargs):
-            if env_var == "OPENAI_API_KEY":
-                if not os.environ.get(env_var) and env_var == "OPENAI_API_KEY":
-                    with dummy_openai_api_key():
-                        return func(*args, **kwargs)
-            return func(*args, **kwargs)
-
-        return wrapper
-
-    return decorator
-
 
 def skip_in_ci(test_function):
     return pytest.mark.skipif(