1 files changed, 94 insertions, 70 deletions
diff --git a/autogpts/autogpt/autogpt/memory/vector/memory_item.py b/autogpts/autogpt/autogpt/memory/vector/memory_item.py
index 7ca8c7880..8d03d0209 100644
--- a/autogpts/autogpt/autogpt/memory/vector/memory_item.py
+++ b/autogpts/autogpt/autogpt/memory/vector/memory_item.py
@@ -9,7 +9,11 @@ import numpy as np
 from pydantic import BaseModel
 
 from autogpt.config import Config
-from autogpt.core.resource.model_providers import ChatMessage
+from autogpt.core.resource.model_providers import (
+    ChatMessage,
+    ChatModelProvider,
+    EmbeddingModelProvider,
+)
 from autogpt.processing.text import chunk_content, split_text, summarize_text
 
 from .utils import Embedding, get_embedding
@@ -19,7 +23,6 @@ logger = logging.getLogger(__name__)
 MemoryDocType = Literal["webpage", "text_file", "code_file", "agent_history"]
 
 
-# FIXME: implement validators instead of allowing arbitrary types
 class MemoryItem(BaseModel, arbitrary_types_allowed=True):
     """Memory object containing raw content as well as embeddings"""
 
@@ -34,8 +37,56 @@ class MemoryItem(BaseModel, arbitrary_types_allowed=True):
     def relevance_for(self, query: str, e_query: Embedding | None = None):
         return MemoryItemRelevance.of(self, query, e_query)
 
-    @staticmethod
-    def from_text(
+    def dump(self, calculate_length=False) -> str:
+        n_chunks = len(self.e_chunks)
+        return f"""
+=============== MemoryItem ===============
+Size: {n_chunks} chunks
+Metadata: {json.dumps(self.metadata, indent=2)}
+---------------- SUMMARY -----------------
+{self.summary}
+------------------ RAW -------------------
+{self.raw_content}
+==========================================
+"""
+
+    def __eq__(self, other: MemoryItem):
+        return (
+            self.raw_content == other.raw_content
+            and self.chunks == other.chunks
+            and self.chunk_summaries == other.chunk_summaries
+            # Embeddings can either be list[float] or np.ndarray[float32],
+            # and for comparison they must be of the same type
+            and np.array_equal(
+                self.e_summary
+                if isinstance(self.e_summary, np.ndarray)
+                else np.array(self.e_summary, dtype=np.float32),
+                other.e_summary
+                if isinstance(other.e_summary, np.ndarray)
+                else np.array(other.e_summary, dtype=np.float32),
+            )
+            and np.array_equal(
+                self.e_chunks
+                if isinstance(self.e_chunks[0], np.ndarray)
+                else [np.array(c, dtype=np.float32) for c in self.e_chunks],
+                other.e_chunks
+                if isinstance(other.e_chunks[0], np.ndarray)
+                else [np.array(c, dtype=np.float32) for c in other.e_chunks],
+            )
+        )
+
+
+class MemoryItemFactory:
+    def __init__(
+        self,
+        llm_provider: ChatModelProvider,
+        embedding_provider: EmbeddingModelProvider,
+    ):
+        self.llm_provider = llm_provider
+        self.embedding_provider = embedding_provider
+
+    async def from_text(
+        self,
         text: str,
         source_type: MemoryDocType,
         config: Config,
@@ -52,9 +103,19 @@ class MemoryItem(BaseModel, arbitrary_types_allowed=True):
         chunks = [
             chunk
             for chunk, _ in (
-                split_text(text, config.embedding_model, config)
+                split_text(
+                    text=text,
+                    config=config,
+                    max_chunk_length=1000,  # arbitrary, but shorter ~= better
+                    tokenizer=self.llm_provider.get_tokenizer(config.fast_llm),
+                )
                 if source_type != "code_file"
-                else chunk_content(text, config.embedding_model)
+                # TODO: chunk code based on structure/outline
+                else chunk_content(
+                    content=text,
+                    max_chunk_length=1000,
+                    tokenizer=self.llm_provider.get_tokenizer(config.fast_llm),
+                )
             )
         ]
         logger.debug("Chunks: " + str(chunks))
@@ -62,34 +123,38 @@ class MemoryItem(BaseModel, arbitrary_types_allowed=True):
         chunk_summaries = [
             summary
             for summary, _ in [
-                summarize_text(
-                    text_chunk,
-                    config,
+                await summarize_text(
+                    text=text_chunk,
                     instruction=how_to_summarize,
                     question=question_for_summary,
+                    llm_provider=self.llm_provider,
+                    config=config,
                 )
                 for text_chunk in chunks
             ]
         ]
         logger.debug("Chunk summaries: " + str(chunk_summaries))
 
-        e_chunks = get_embedding(chunks, config)
+        e_chunks = get_embedding(chunks, config, self.embedding_provider)
 
         summary = (
             chunk_summaries[0]
             if len(chunks) == 1
-            else summarize_text(
-                "\n\n".join(chunk_summaries),
-                config,
-                instruction=how_to_summarize,
-                question=question_for_summary,
+            else (
+                await summarize_text(
+                    text="\n\n".join(chunk_summaries),
+                    instruction=how_to_summarize,
+                    question=question_for_summary,
+                    llm_provider=self.llm_provider,
+                    config=config,
+                )
             )[0]
         )
         logger.debug("Total summary: " + summary)
 
         # TODO: investigate search performance of weighted average vs summary
         # e_average = np.average(e_chunks, axis=0, weights=[len(c) for c in chunks])
-        e_summary = get_embedding(summary, config)
+        e_summary = get_embedding(summary, config, self.embedding_provider)
 
         metadata["source_type"] = source_type
 
@@ -103,17 +168,14 @@ class MemoryItem(BaseModel, arbitrary_types_allowed=True):
             metadata=metadata,
         )
 
-    @staticmethod
-    def from_text_file(content: str, path: str, config: Config):
-        return MemoryItem.from_text(content, "text_file", config, {"location": path})
+    def from_text_file(self, content: str, path: str, config: Config):
+        return self.from_text(content, "text_file", config, {"location": path})
 
-    @staticmethod
-    def from_code_file(content: str, path: str):
+    def from_code_file(self, content: str, path: str):
         # TODO: implement tailored code memories
-        return MemoryItem.from_text(content, "code_file", {"location": path})
+        return self.from_text(content, "code_file", {"location": path})
 
-    @staticmethod
-    def from_ai_action(ai_message: ChatMessage, result_message: ChatMessage):
+    def from_ai_action(self, ai_message: ChatMessage, result_message: ChatMessage):
         # The result_message contains either user feedback
         # or the result of the command specified in ai_message
 
@@ -138,17 +200,20 @@ class MemoryItem(BaseModel, arbitrary_types_allowed=True):
             f"Human Feedback: {user_input}"
         )
 
-        return MemoryItem.from_text(
+        return self.from_text(
             text=memory_content,
             source_type="agent_history",
-            how_to_summarize="if possible, also make clear the link between the command in the assistant's response and the command result. Do not mention the human feedback if there is none",
+            how_to_summarize=(
+                "if possible, also make clear the link between the command in the"
+                " assistant's response and the command result. "
+                "Do not mention the human feedback if there is none.",
+            ),
         )
 
-    @staticmethod
     def from_webpage(
-        content: str, url: str, config: Config, question: str | None = None
+        self, content: str, url: str, config: Config, question: str | None = None
     ):
-        return MemoryItem.from_text(
+        return self.from_text(
             text=content,
             source_type="webpage",
             config=config,
@@ -156,47 +221,6 @@ class MemoryItem(BaseModel, arbitrary_types_allowed=True):
             question_for_summary=question,
         )
 
-    def dump(self, calculate_length=False) -> str:
-        if calculate_length:
-            token_length = self.llm_provider.count_tokens(
-                self.raw_content, Config().embedding_model
-            )
-        return f"""
-=============== MemoryItem ===============
-Size: {f'{token_length} tokens in ' if calculate_length else ''}{len(self.e_chunks)} chunks
-Metadata: {json.dumps(self.metadata, indent=2)}
----------------- SUMMARY -----------------
-{self.summary}
------------------- RAW -------------------
-{self.raw_content}
-==========================================
-"""
-
-    def __eq__(self, other: MemoryItem):
-        return (
-            self.raw_content == other.raw_content
-            and self.chunks == other.chunks
-            and self.chunk_summaries == other.chunk_summaries
-            # Embeddings can either be list[float] or np.ndarray[float32],
-            # and for comparison they must be of the same type
-            and np.array_equal(
-                self.e_summary
-                if isinstance(self.e_summary, np.ndarray)
-                else np.array(self.e_summary, dtype=np.float32),
-                other.e_summary
-                if isinstance(other.e_summary, np.ndarray)
-                else np.array(other.e_summary, dtype=np.float32),
-            )
-            and np.array_equal(
-                self.e_chunks
-                if isinstance(self.e_chunks[0], np.ndarray)
-                else [np.array(c, dtype=np.float32) for c in self.e_chunks],
-                other.e_chunks
-                if isinstance(other.e_chunks[0], np.ndarray)
-                else [np.array(c, dtype=np.float32) for c in other.e_chunks],
-            )
-        )
-
 
 class MemoryItemRelevance(BaseModel):
     """