aboutsummaryrefslogtreecommitdiff
path: root/tests/unit/test_text_file_parsers.py
diff options
context:
space:
mode:
Diffstat (limited to 'tests/unit/test_text_file_parsers.py')
-rw-r--r--tests/unit/test_text_file_parsers.py150
1 files changed, 0 insertions, 150 deletions
diff --git a/tests/unit/test_text_file_parsers.py b/tests/unit/test_text_file_parsers.py
deleted file mode 100644
index 2e71aa69f..000000000
--- a/tests/unit/test_text_file_parsers.py
+++ /dev/null
@@ -1,150 +0,0 @@
-import json
-import tempfile
-from unittest import TestCase
-from xml.etree import ElementTree
-
-import docx
-import yaml
-from bs4 import BeautifulSoup
-
-from autogpt.commands.file_operations_utils import is_file_binary_fn, read_textual_file
-from autogpt.logs import logger
-
-plain_text_str = "Hello, world!"
-
-
-def mock_text_file():
- with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as f:
- f.write(plain_text_str)
- return f.name
-
-
-def mock_csv_file():
- with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".csv") as f:
- f.write(plain_text_str)
- return f.name
-
-
-def mock_pdf_file():
- with tempfile.NamedTemporaryFile(mode="wb", delete=False, suffix=".pdf") as f:
- # Create a new PDF and add a page with the text plain_text_str
- # Write the PDF header
- f.write(b"%PDF-1.7\n")
- # Write the document catalog
- f.write(b"1 0 obj\n")
- f.write(b"<< /Type /Catalog /Pages 2 0 R >>\n")
- f.write(b"endobj\n")
- # Write the page object
- f.write(b"2 0 obj\n")
- f.write(
- b"<< /Type /Page /Parent 1 0 R /Resources << /Font << /F1 3 0 R >> >> /MediaBox [0 0 612 792] /Contents 4 0 R >>\n"
- )
- f.write(b"endobj\n")
- # Write the font object
- f.write(b"3 0 obj\n")
- f.write(
- b"<< /Type /Font /Subtype /Type1 /Name /F1 /BaseFont /Helvetica-Bold >>\n"
- )
- f.write(b"endobj\n")
- # Write the page contents object
- f.write(b"4 0 obj\n")
- f.write(b"<< /Length 25 >>\n")
- f.write(b"stream\n")
- f.write(b"BT\n/F1 12 Tf\n72 720 Td\n(Hello, world!) Tj\nET\n")
- f.write(b"endstream\n")
- f.write(b"endobj\n")
- # Write the cross-reference table
- f.write(b"xref\n")
- f.write(b"0 5\n")
- f.write(b"0000000000 65535 f \n")
- f.write(b"0000000017 00000 n \n")
- f.write(b"0000000073 00000 n \n")
- f.write(b"0000000123 00000 n \n")
- f.write(b"0000000271 00000 n \n")
- f.write(b"trailer\n")
- f.write(b"<< /Size 5 /Root 1 0 R >>\n")
- f.write(b"startxref\n")
- f.write(b"380\n")
- f.write(b"%%EOF\n")
- f.write(b"\x00")
- return f.name
-
-
-def mock_docx_file():
- with tempfile.NamedTemporaryFile(mode="wb", delete=False, suffix=".docx") as f:
- document = docx.Document()
- document.add_paragraph(plain_text_str)
- document.save(f.name)
- return f.name
-
-
-def mock_json_file():
- with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".json") as f:
- json.dump({"text": plain_text_str}, f)
- return f.name
-
-
-def mock_xml_file():
- root = ElementTree.Element("text")
- root.text = plain_text_str
- tree = ElementTree.ElementTree(root)
- with tempfile.NamedTemporaryFile(mode="wb", delete=False, suffix=".xml") as f:
- tree.write(f)
- return f.name
-
-
-def mock_yaml_file():
- with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".yaml") as f:
- yaml.dump({"text": plain_text_str}, f)
- return f.name
-
-
-def mock_html_file():
- html = BeautifulSoup(
- f"<html><head><title>This is a test</title></head><body><p>{plain_text_str}</p></body></html>",
- "html.parser",
- )
- with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".html") as f:
- f.write(str(html))
- return f.name
-
-
-def mock_md_file():
- with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".md") as f:
- f.write(f"# {plain_text_str}!\n")
- return f.name
-
-
-def mock_latex_file():
- with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".tex") as f:
- latex_str = rf"\documentclass{{article}}\begin{{document}}{plain_text_str}\end{{document}}"
- f.write(latex_str)
- return f.name
-
-
-respective_file_creation_functions = {
- ".txt": mock_text_file,
- ".csv": mock_csv_file,
- ".pdf": mock_pdf_file,
- ".docx": mock_docx_file,
- ".json": mock_json_file,
- ".xml": mock_xml_file,
- ".yaml": mock_yaml_file,
- ".html": mock_html_file,
- ".md": mock_md_file,
- ".tex": mock_latex_file,
-}
-
-
-class TestConfig(TestCase):
- def test_parsers(self):
- binary_files_extensions = [".pdf", ".docx"]
- for (
- file_extension,
- c_file_creator,
- ) in respective_file_creation_functions.items():
- created_filepath = c_file_creator()
- loaded_text = read_textual_file(created_filepath, logger)
- self.assertIn(plain_text_str, loaded_text)
- should_be_binary = file_extension in binary_files_extensions
- self.assertEqual(should_be_binary, is_file_binary_fn(created_filepath))