From e428130e4a70396787d34aba397d3974f7459b6f Mon Sep 17 00:00:00 2001
From: Reinier van der Leer <pwuts@agpt.co>
Date: Thu, 14 Dec 2023 02:04:56 +0100
Subject: fix(agent/file_operations): Fix read_file command in GCS and S3
 workspaces

- Update the `read_file` function in `file_operations.py` to pass the file's extension to the `decode_textual_file` function.
- Modify the `decode_textual_file` function in `file_operations_utils.py` to accept the file extension as an argument.
- Update the `content` property in the `FileContextItem` class in `context_item.py` to pass the file's extension to the `decode_textual_file` function.
- Update the `test_parsers` function in `test_text_file_parsers.py` to pass the file extension to the `decode_textual_file` function.
---
 autogpts/autogpt/autogpt/commands/file_operations.py   |  2 +-
 .../autogpt/autogpt/commands/file_operations_utils.py  | 18 +++++++++++-------
 autogpts/autogpt/autogpt/models/context_item.py        |  4 +++-
 autogpts/autogpt/tests/unit/test_text_file_parsers.py  |  3 ++-
 4 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/autogpts/autogpt/autogpt/commands/file_operations.py b/autogpts/autogpt/autogpt/commands/file_operations.py
index 9d70b3874..29c92b346 100644
--- a/autogpts/autogpt/autogpt/commands/file_operations.py
+++ b/autogpts/autogpt/autogpt/commands/file_operations.py
@@ -150,7 +150,7 @@ def read_file(filename: str | Path, agent: Agent) -> str:
         str: The contents of the file
     """
     file = agent.workspace.open_file(filename, binary=True)
-    content = decode_textual_file(file, logger)
+    content = decode_textual_file(file, os.path.splitext(filename)[1], logger)
 
     # # TODO: invalidate/update memory when file is edited
     # file_memory = MemoryItem.from_text_file(content, str(filename), agent.config)
diff --git a/autogpts/autogpt/autogpt/commands/file_operations_utils.py b/autogpts/autogpt/autogpt/commands/file_operations_utils.py
index e7c001e93..683c70dfb 100644
--- a/autogpts/autogpt/autogpt/commands/file_operations_utils.py
+++ b/autogpts/autogpt/autogpt/commands/file_operations_utils.py
@@ -24,7 +24,10 @@ class ParserStrategy(ABC):
 class TXTParser(ParserStrategy):
     def read(self, file: BinaryIO) -> str:
         charset_match = charset_normalizer.from_bytes(file.read()).best()
-        logger.debug(f"Reading '{file.name}' with encoding '{charset_match.encoding}'")
+        logger.debug(
+            f"Reading {getattr(file, 'name', 'file')} "
+            f"with encoding '{charset_match.encoding}'"
+        )
         return str(charset_match)
 
 
@@ -95,7 +98,9 @@ class FileContext:
         self.parser = parser
 
     def decode_file(self, file: BinaryIO) -> str:
-        self.logger.debug(f"Reading file {file.name} with parser {self.parser}")
+        self.logger.debug(
+            f"Reading {getattr(file, 'name', 'file')} with parser {self.parser}"
+        )
         return self.parser.read(file)
 
 
@@ -133,15 +138,14 @@ def is_file_binary_fn(file: BinaryIO):
     return False
 
 
-def decode_textual_file(file: BinaryIO, logger: logging.Logger) -> str:
+def decode_textual_file(file: BinaryIO, ext: str, logger: logging.Logger) -> str:
     if not file.readable():
-        raise ValueError(f"read_file failed: {file.name} is not a file")
+        raise ValueError(f"{repr(file)} is not readable")
 
-    file_extension = os.path.splitext(file.name)[1].lower()
-    parser = extension_to_parser.get(file_extension)
+    parser = extension_to_parser.get(ext.lower())
     if not parser:
         if is_file_binary_fn(file):
-            raise ValueError(f"Unsupported binary file format: {file_extension}")
+            raise ValueError(f"Unsupported binary file format: {ext}")
         # fallback to txt file parser (to support script and code files loading)
         parser = TXTParser()
     file_context = FileContext(parser, logger)
diff --git a/autogpts/autogpt/autogpt/models/context_item.py b/autogpts/autogpt/autogpt/models/context_item.py
index 0e5d9a373..a669bdcc8 100644
--- a/autogpts/autogpt/autogpt/models/context_item.py
+++ b/autogpts/autogpt/autogpt/models/context_item.py
@@ -1,4 +1,5 @@
 import logging
+import os.path
 from abc import ABC, abstractmethod
 from pathlib import Path
 from typing import Optional
@@ -56,8 +57,9 @@ class FileContextItem(BaseModel, ContextItem):
 
     @property
     def content(self) -> str:
+        # TODO: use workspace.open_file()
         with open(self.file_path, "rb") as file:
-            return decode_textual_file(file, logger)
+            return decode_textual_file(file, os.path.splitext(file.name)[1], logger)
 
 
 class FolderContextItem(BaseModel, ContextItem):
diff --git a/autogpts/autogpt/tests/unit/test_text_file_parsers.py b/autogpts/autogpt/tests/unit/test_text_file_parsers.py
index d17de9c8c..c13241580 100644
--- a/autogpts/autogpt/tests/unit/test_text_file_parsers.py
+++ b/autogpts/autogpt/tests/unit/test_text_file_parsers.py
@@ -1,5 +1,6 @@
 import json
 import logging
+import os.path
 import tempfile
 from pathlib import Path
 from xml.etree import ElementTree
@@ -159,7 +160,7 @@ binary_files_extensions = [".pdf", ".docx"]
 def test_parsers(file_extension, c_file_creator):
     created_file_path = Path(c_file_creator())
     with open(created_file_path, "rb") as file:
-        loaded_text = decode_textual_file(file, logger)
+        loaded_text = decode_textual_file(file, os.path.splitext(file.name)[1], logger)
 
         assert plain_text_str in loaded_text
 
-- 
cgit v1.2.3