aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Reinier van der Leer <pwuts@agpt.co> 2023-12-14 02:04:56 +0100
committerGravatar Reinier van der Leer <pwuts@agpt.co> 2023-12-14 02:07:22 +0100
commite428130e4a70396787d34aba397d3974f7459b6f (patch)
tree4d132253fd1d6c8c0d8767c1b5650557ad227f16
parentlint: Remove unused import in autogpt/core/utils/json_schema.py (diff)
downloadAuto-GPT-e428130e4a70396787d34aba397d3974f7459b6f.tar.gz
Auto-GPT-e428130e4a70396787d34aba397d3974f7459b6f.tar.bz2
Auto-GPT-e428130e4a70396787d34aba397d3974f7459b6f.zip
fix(agent/file_operations): Fix read_file command in GCS and S3 workspaces
- Update the `read_file` function in `file_operations.py` to pass the file's extension to the `decode_textual_file` function. - Modify the `decode_textual_file` function in `file_operations_utils.py` to accept the file extension as an argument. - Update the `content` property in the `FileContextItem` class in `context_item.py` to pass the file's extension to the `decode_textual_file` function. - Update the `test_parsers` function in `test_text_file_parsers.py` to pass the file extension to the `decode_textual_file` function.
-rw-r--r--autogpts/autogpt/autogpt/commands/file_operations.py2
-rw-r--r--autogpts/autogpt/autogpt/commands/file_operations_utils.py18
-rw-r--r--autogpts/autogpt/autogpt/models/context_item.py4
-rw-r--r--autogpts/autogpt/tests/unit/test_text_file_parsers.py3
4 files changed, 17 insertions, 10 deletions
diff --git a/autogpts/autogpt/autogpt/commands/file_operations.py b/autogpts/autogpt/autogpt/commands/file_operations.py
index 9d70b3874..29c92b346 100644
--- a/autogpts/autogpt/autogpt/commands/file_operations.py
+++ b/autogpts/autogpt/autogpt/commands/file_operations.py
@@ -150,7 +150,7 @@ def read_file(filename: str | Path, agent: Agent) -> str:
str: The contents of the file
"""
file = agent.workspace.open_file(filename, binary=True)
- content = decode_textual_file(file, logger)
+ content = decode_textual_file(file, os.path.splitext(filename)[1], logger)
# # TODO: invalidate/update memory when file is edited
# file_memory = MemoryItem.from_text_file(content, str(filename), agent.config)
diff --git a/autogpts/autogpt/autogpt/commands/file_operations_utils.py b/autogpts/autogpt/autogpt/commands/file_operations_utils.py
index e7c001e93..683c70dfb 100644
--- a/autogpts/autogpt/autogpt/commands/file_operations_utils.py
+++ b/autogpts/autogpt/autogpt/commands/file_operations_utils.py
@@ -24,7 +24,10 @@ class ParserStrategy(ABC):
class TXTParser(ParserStrategy):
def read(self, file: BinaryIO) -> str:
charset_match = charset_normalizer.from_bytes(file.read()).best()
- logger.debug(f"Reading '{file.name}' with encoding '{charset_match.encoding}'")
+ logger.debug(
+ f"Reading {getattr(file, 'name', 'file')} "
+ f"with encoding '{charset_match.encoding}'"
+ )
return str(charset_match)
@@ -95,7 +98,9 @@ class FileContext:
self.parser = parser
def decode_file(self, file: BinaryIO) -> str:
- self.logger.debug(f"Reading file {file.name} with parser {self.parser}")
+ self.logger.debug(
+ f"Reading {getattr(file, 'name', 'file')} with parser {self.parser}"
+ )
return self.parser.read(file)
@@ -133,15 +138,14 @@ def is_file_binary_fn(file: BinaryIO):
return False
-def decode_textual_file(file: BinaryIO, logger: logging.Logger) -> str:
+def decode_textual_file(file: BinaryIO, ext: str, logger: logging.Logger) -> str:
if not file.readable():
- raise ValueError(f"read_file failed: {file.name} is not a file")
+ raise ValueError(f"{repr(file)} is not readable")
- file_extension = os.path.splitext(file.name)[1].lower()
- parser = extension_to_parser.get(file_extension)
+ parser = extension_to_parser.get(ext.lower())
if not parser:
if is_file_binary_fn(file):
- raise ValueError(f"Unsupported binary file format: {file_extension}")
+ raise ValueError(f"Unsupported binary file format: {ext}")
# fallback to txt file parser (to support script and code files loading)
parser = TXTParser()
file_context = FileContext(parser, logger)
diff --git a/autogpts/autogpt/autogpt/models/context_item.py b/autogpts/autogpt/autogpt/models/context_item.py
index 0e5d9a373..a669bdcc8 100644
--- a/autogpts/autogpt/autogpt/models/context_item.py
+++ b/autogpts/autogpt/autogpt/models/context_item.py
@@ -1,4 +1,5 @@
import logging
+import os.path
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Optional
@@ -56,8 +57,9 @@ class FileContextItem(BaseModel, ContextItem):
@property
def content(self) -> str:
+ # TODO: use workspace.open_file()
with open(self.file_path, "rb") as file:
- return decode_textual_file(file, logger)
+ return decode_textual_file(file, os.path.splitext(file.name)[1], logger)
class FolderContextItem(BaseModel, ContextItem):
diff --git a/autogpts/autogpt/tests/unit/test_text_file_parsers.py b/autogpts/autogpt/tests/unit/test_text_file_parsers.py
index d17de9c8c..c13241580 100644
--- a/autogpts/autogpt/tests/unit/test_text_file_parsers.py
+++ b/autogpts/autogpt/tests/unit/test_text_file_parsers.py
@@ -1,5 +1,6 @@
import json
import logging
+import os.path
import tempfile
from pathlib import Path
from xml.etree import ElementTree
@@ -159,7 +160,7 @@ binary_files_extensions = [".pdf", ".docx"]
def test_parsers(file_extension, c_file_creator):
created_file_path = Path(c_file_creator())
with open(created_file_path, "rb") as file:
- loaded_text = decode_textual_file(file, logger)
+ loaded_text = decode_textual_file(file, os.path.splitext(file.name)[1], logger)
assert plain_text_str in loaded_text