From e428130e4a70396787d34aba397d3974f7459b6f Mon Sep 17 00:00:00 2001 From: Reinier van der Leer Date: Thu, 14 Dec 2023 02:04:56 +0100 Subject: fix(agent/file_operations): Fix read_file command in GCS and S3 workspaces - Update the `read_file` function in `file_operations.py` to pass the file's extension to the `decode_textual_file` function. - Modify the `decode_textual_file` function in `file_operations_utils.py` to accept the file extension as an argument. - Update the `content` property in the `FileContextItem` class in `context_item.py` to pass the file's extension to the `decode_textual_file` function. - Update the `test_parsers` function in `test_text_file_parsers.py` to pass the file extension to the `decode_textual_file` function. --- autogpts/autogpt/autogpt/commands/file_operations.py | 2 +- .../autogpt/autogpt/commands/file_operations_utils.py | 18 +++++++++++------- autogpts/autogpt/autogpt/models/context_item.py | 4 +++- autogpts/autogpt/tests/unit/test_text_file_parsers.py | 3 ++- 4 files changed, 17 insertions(+), 10 deletions(-) diff --git a/autogpts/autogpt/autogpt/commands/file_operations.py b/autogpts/autogpt/autogpt/commands/file_operations.py index 9d70b3874..29c92b346 100644 --- a/autogpts/autogpt/autogpt/commands/file_operations.py +++ b/autogpts/autogpt/autogpt/commands/file_operations.py @@ -150,7 +150,7 @@ def read_file(filename: str | Path, agent: Agent) -> str: str: The contents of the file """ file = agent.workspace.open_file(filename, binary=True) - content = decode_textual_file(file, logger) + content = decode_textual_file(file, os.path.splitext(filename)[1], logger) # # TODO: invalidate/update memory when file is edited # file_memory = MemoryItem.from_text_file(content, str(filename), agent.config) diff --git a/autogpts/autogpt/autogpt/commands/file_operations_utils.py b/autogpts/autogpt/autogpt/commands/file_operations_utils.py index e7c001e93..683c70dfb 100644 --- a/autogpts/autogpt/autogpt/commands/file_operations_utils.py +++ b/autogpts/autogpt/autogpt/commands/file_operations_utils.py @@ -24,7 +24,10 @@ class ParserStrategy(ABC): class TXTParser(ParserStrategy): def read(self, file: BinaryIO) -> str: charset_match = charset_normalizer.from_bytes(file.read()).best() - logger.debug(f"Reading '{file.name}' with encoding '{charset_match.encoding}'") + logger.debug( + f"Reading {getattr(file, 'name', 'file')} " + f"with encoding '{charset_match.encoding}'" + ) return str(charset_match) @@ -95,7 +98,9 @@ class FileContext: self.parser = parser def decode_file(self, file: BinaryIO) -> str: - self.logger.debug(f"Reading file {file.name} with parser {self.parser}") + self.logger.debug( + f"Reading {getattr(file, 'name', 'file')} with parser {self.parser}" + ) return self.parser.read(file) @@ -133,15 +138,14 @@ def is_file_binary_fn(file: BinaryIO): return False -def decode_textual_file(file: BinaryIO, logger: logging.Logger) -> str: +def decode_textual_file(file: BinaryIO, ext: str, logger: logging.Logger) -> str: if not file.readable(): - raise ValueError(f"read_file failed: {file.name} is not a file") + raise ValueError(f"{repr(file)} is not readable") - file_extension = os.path.splitext(file.name)[1].lower() - parser = extension_to_parser.get(file_extension) + parser = extension_to_parser.get(ext.lower()) if not parser: if is_file_binary_fn(file): - raise ValueError(f"Unsupported binary file format: {file_extension}") + raise ValueError(f"Unsupported binary file format: {ext}") # fallback to txt file parser (to support script and code files loading) parser = TXTParser() file_context = FileContext(parser, logger) diff --git a/autogpts/autogpt/autogpt/models/context_item.py b/autogpts/autogpt/autogpt/models/context_item.py index 0e5d9a373..a669bdcc8 100644 --- a/autogpts/autogpt/autogpt/models/context_item.py +++ b/autogpts/autogpt/autogpt/models/context_item.py @@ -1,4 +1,5 @@ import logging +import os.path from abc import ABC, abstractmethod from pathlib import Path from typing import Optional @@ -56,8 +57,9 @@ class FileContextItem(BaseModel, ContextItem): @property def content(self) -> str: + # TODO: use workspace.open_file() with open(self.file_path, "rb") as file: - return decode_textual_file(file, logger) + return decode_textual_file(file, os.path.splitext(file.name)[1], logger) class FolderContextItem(BaseModel, ContextItem): diff --git a/autogpts/autogpt/tests/unit/test_text_file_parsers.py b/autogpts/autogpt/tests/unit/test_text_file_parsers.py index d17de9c8c..c13241580 100644 --- a/autogpts/autogpt/tests/unit/test_text_file_parsers.py +++ b/autogpts/autogpt/tests/unit/test_text_file_parsers.py @@ -1,5 +1,6 @@ import json import logging +import os.path import tempfile from pathlib import Path from xml.etree import ElementTree @@ -159,7 +160,7 @@ binary_files_extensions = [".pdf", ".docx"] def test_parsers(file_extension, c_file_creator): created_file_path = Path(c_file_creator()) with open(created_file_path, "rb") as file: - loaded_text = decode_textual_file(file, logger) + loaded_text = decode_textual_file(file, os.path.splitext(file.name)[1], logger) assert plain_text_str in loaded_text -- cgit v1.2.3