1 files changed, 0 insertions, 161 deletions
diff --git a/autogpt/commands/file_operations_utils.py b/autogpt/commands/file_operations_utils.py
deleted file mode 100644
index b00779688..000000000
--- a/autogpt/commands/file_operations_utils.py
+++ /dev/null
@@ -1,161 +0,0 @@
-import json
-import os
-
-import charset_normalizer
-import docx
-import markdown
-import PyPDF2
-import yaml
-from bs4 import BeautifulSoup
-from pylatexenc.latex2text import LatexNodes2Text
-
-from autogpt import logs
-from autogpt.logs import logger
-
-
-class ParserStrategy:
-    def read(self, file_path: str) -> str:
-        raise NotImplementedError
-
-
-# Basic text file reading
-class TXTParser(ParserStrategy):
-    def read(self, file_path: str) -> str:
-        charset_match = charset_normalizer.from_path(file_path).best()
-        logger.debug(f"Reading '{file_path}' with encoding '{charset_match.encoding}'")
-        return str(charset_match)
-
-
-# Reading text from binary file using pdf parser
-class PDFParser(ParserStrategy):
-    def read(self, file_path: str) -> str:
-        parser = PyPDF2.PdfReader(file_path)
-        text = ""
-        for page_idx in range(len(parser.pages)):
-            text += parser.pages[page_idx].extract_text()
-        return text
-
-
-# Reading text from binary file using docs parser
-class DOCXParser(ParserStrategy):
-    def read(self, file_path: str) -> str:
-        doc_file = docx.Document(file_path)
-        text = ""
-        for para in doc_file.paragraphs:
-            text += para.text
-        return text
-
-
-# Reading as dictionary and returning string format
-class JSONParser(ParserStrategy):
-    def read(self, file_path: str) -> str:
-        with open(file_path, "r") as f:
-            data = json.load(f)
-            text = str(data)
-        return text
-
-
-class XMLParser(ParserStrategy):
-    def read(self, file_path: str) -> str:
-        with open(file_path, "r") as f:
-            soup = BeautifulSoup(f, "xml")
-            text = soup.get_text()
-        return text
-
-
-# Reading as dictionary and returning string format
-class YAMLParser(ParserStrategy):
-    def read(self, file_path: str) -> str:
-        with open(file_path, "r") as f:
-            data = yaml.load(f, Loader=yaml.FullLoader)
-            text = str(data)
-        return text
-
-
-class HTMLParser(ParserStrategy):
-    def read(self, file_path: str) -> str:
-        with open(file_path, "r") as f:
-            soup = BeautifulSoup(f, "html.parser")
-            text = soup.get_text()
-        return text
-
-
-class MarkdownParser(ParserStrategy):
-    def read(self, file_path: str) -> str:
-        with open(file_path, "r") as f:
-            html = markdown.markdown(f.read())
-            text = "".join(BeautifulSoup(html, "html.parser").findAll(string=True))
-        return text
-
-
-class LaTeXParser(ParserStrategy):
-    def read(self, file_path: str) -> str:
-        with open(file_path, "r") as f:
-            latex = f.read()
-        text = LatexNodes2Text().latex_to_text(latex)
-        return text
-
-
-class FileContext:
-    def __init__(self, parser: ParserStrategy, logger: logs.Logger):
-        self.parser = parser
-        self.logger = logger
-
-    def set_parser(self, parser: ParserStrategy) -> None:
-        self.logger.debug(f"Setting Context Parser to {parser}")
-        self.parser = parser
-
-    def read_file(self, file_path) -> str:
-        self.logger.debug(f"Reading file {file_path} with parser {self.parser}")
-        return self.parser.read(file_path)
-
-
-extension_to_parser = {
-    ".txt": TXTParser(),
-    ".csv": TXTParser(),
-    ".pdf": PDFParser(),
-    ".docx": DOCXParser(),
-    ".json": JSONParser(),
-    ".xml": XMLParser(),
-    ".yaml": YAMLParser(),
-    ".yml": YAMLParser(),
-    ".html": HTMLParser(),
-    ".htm": HTMLParser(),
-    ".xhtml": HTMLParser(),
-    ".md": MarkdownParser(),
-    ".markdown": MarkdownParser(),
-    ".tex": LaTeXParser(),
-}
-
-
-def is_file_binary_fn(file_path: str):
-    """Given a file path load all its content and checks if the null bytes is present
-
-    Args:
-        file_path (_type_): _description_
-
-    Returns:
-        bool: is_binary
-    """
-    with open(file_path, "rb") as f:
-        file_data = f.read()
-    if b"\x00" in file_data:
-        return True
-    return False
-
-
-def read_textual_file(file_path: str, logger: logs.Logger) -> str:
-    if not os.path.isfile(file_path):
-        raise FileNotFoundError(
-            f"read_file {file_path} failed: no such file or directory"
-        )
-    is_binary = is_file_binary_fn(file_path)
-    file_extension = os.path.splitext(file_path)[1].lower()
-    parser = extension_to_parser.get(file_extension)
-    if not parser:
-        if is_binary:
-            raise ValueError(f"Unsupported binary file format: {file_extension}")
-        # fallback to txt file parser (to support script and code files loading)
-        parser = TXTParser()
-    file_context = FileContext(parser, logger)
-    return file_context.read_file(file_path)