diff options
author | Reinier van der Leer <pwuts@agpt.co> | 2023-12-12 15:29:25 +0100 |
---|---|---|
committer | Reinier van der Leer <pwuts@agpt.co> | 2023-12-12 15:29:25 +0100 |
commit | 198a0ecad61a4d43c64f815d758d14ffcd330ba2 (patch) | |
tree | 5291f4e94105a8ae8bc7d87f054233095d88b9fd | |
parent | refactor: Remove X- prefix from API request headers (diff) | |
download | Auto-GPT-198a0ecad61a4d43c64f815d758d14ffcd330ba2.tar.gz Auto-GPT-198a0ecad61a4d43c64f815d758d14ffcd330ba2.tar.bz2 Auto-GPT-198a0ecad61a4d43c64f815d758d14ffcd330ba2.zip |
fix(agent/file_operations): Fix path processing in file_operations.py and across workspace backend implementations
- Adjusted path processing and use of `agent.workspace` in the file_operations.py module to prevent double path resolution.
- Updated the `is_duplicate_operation` and `log_operation` functions in file_operations.py to use the `make_relative` argument of the `sanitize_path_arg` decorator.
- Refactored the `write_to_file`, `list_folder`, and `list_files` functions in file_operations.py to accept both string and Path objects as the path argument.
- Modified the GCSFileWorkspace and S3FileWorkspace classes in the file_workspace module to ensure that the root path is always an absolute path.
This commit addresses issues with path processing in the file_operations.py module and across different workspace backend implementations. The changes ensure that relative paths are correctly converted to absolute paths where necessary and that the file operations logic functions consistently handle path arguments as strings or Path objects. Additionally, the GCSFileWorkspace and S3FileWorkspace classes now enforce that the root path is always an absolute path.
-rw-r--r-- | autogpts/autogpt/autogpt/commands/file_operations.py | 47 | ||||
-rw-r--r-- | autogpts/autogpt/autogpt/file_workspace/__init__.py | 2 | ||||
-rw-r--r-- | autogpts/autogpt/autogpt/file_workspace/base.py | 4 | ||||
-rw-r--r-- | autogpts/autogpt/autogpt/file_workspace/gcs.py | 13 | ||||
-rw-r--r-- | autogpts/autogpt/autogpt/file_workspace/local.py | 8 | ||||
-rw-r--r-- | autogpts/autogpt/autogpt/file_workspace/s3.py | 17 | ||||
-rw-r--r-- | autogpts/autogpt/tests/unit/test_gcs_file_workspace.py | 28 | ||||
-rw-r--r-- | autogpts/autogpt/tests/unit/test_s3_file_workspace.py | 28 |
8 files changed, 81 insertions, 66 deletions
diff --git a/autogpts/autogpt/autogpt/commands/file_operations.py b/autogpts/autogpt/autogpt/commands/file_operations.py index ec61d5875..6f4986105 100644 --- a/autogpts/autogpt/autogpt/commands/file_operations.py +++ b/autogpts/autogpt/autogpt/commands/file_operations.py @@ -2,7 +2,6 @@ from __future__ import annotations -import contextlib import hashlib import logging import os @@ -84,7 +83,7 @@ def file_operations_state(log_path: str | Path) -> dict[str, str]: return state -@sanitize_path_arg("file_path") +@sanitize_path_arg("file_path", make_relative=True) def is_duplicate_operation( operation: Operation, file_path: Path, agent: Agent, checksum: str | None = None ) -> bool: @@ -99,10 +98,6 @@ def is_duplicate_operation( Returns: True if the operation has already been performed on the file """ - # Make the file path into a relative path if possible - with contextlib.suppress(ValueError): - file_path = file_path.relative_to(agent.workspace.root) - state = file_operations_state(agent.file_manager.file_ops_log_path) if operation == "delete" and str(file_path) not in state: return True @@ -111,9 +106,12 @@ def is_duplicate_operation( return False -@sanitize_path_arg("file_path") +@sanitize_path_arg("file_path", make_relative=True) def log_operation( - operation: Operation, file_path: Path, agent: Agent, checksum: str | None = None + operation: Operation, + file_path: str | Path, + agent: Agent, + checksum: str | None = None, ) -> None: """Log the file operation to the file_logger.log @@ -122,10 +120,6 @@ def log_operation( file_path: The name of the file the operation was performed on checksum: The checksum of the contents to be written """ - # Make the file path into a relative path if possible - with contextlib.suppress(ValueError): - file_path = file_path.relative_to(agent.workspace.root) - log_entry = f"{operation}: {file_path}" if checksum is not None: log_entry += f" #{checksum}" @@ -210,8 +204,7 @@ def ingest_file( }, aliases=["create_file"], ) -@sanitize_path_arg("filename") -async def write_to_file(filename: Path, contents: str, agent: Agent) -> str: +async def write_to_file(filename: str | Path, contents: str, agent: Agent) -> str: """Write contents to a file Args: @@ -222,14 +215,14 @@ async def write_to_file(filename: Path, contents: str, agent: Agent) -> str: str: A message indicating success or failure """ checksum = text_checksum(contents) - if is_duplicate_operation("write", filename, agent, checksum): - raise DuplicateOperationError(f"File {filename.name} has already been updated.") + if is_duplicate_operation("write", Path(filename), agent, checksum): + raise DuplicateOperationError(f"File {filename} has already been updated.") - directory = os.path.dirname(filename) - os.makedirs(directory, exist_ok=True) + if directory := os.path.dirname(filename): + agent.workspace.get_path(directory).mkdir(exist_ok=True) await agent.workspace.write_file(filename, contents) log_operation("write", filename, agent, checksum) - return f"File {filename.name} has been written successfully." + return f"File {filename} has been written successfully." def append_to_file( @@ -264,8 +257,7 @@ def append_to_file( ) }, ) -@sanitize_path_arg("folder") -def list_folder(folder: Path, agent: Agent) -> list[str]: +def list_folder(folder: str | Path, agent: Agent) -> list[str]: """Lists files in a folder recursively Args: @@ -274,15 +266,4 @@ def list_folder(folder: Path, agent: Agent) -> list[str]: Returns: list[str]: A list of files found in the folder """ - found_files = [] - - for root, _, files in os.walk(folder): - for file in files: - if file.startswith("."): - continue - relative_path = os.path.relpath( - os.path.join(root, file), agent.workspace.root - ) - found_files.append(relative_path) - - return found_files + return [str(p) for p in agent.workspace.list(folder)] diff --git a/autogpts/autogpt/autogpt/file_workspace/__init__.py b/autogpts/autogpt/autogpt/file_workspace/__init__.py index ebca767dd..a0e06e38f 100644 --- a/autogpts/autogpt/autogpt/file_workspace/__init__.py +++ b/autogpts/autogpt/autogpt/file_workspace/__init__.py @@ -16,7 +16,7 @@ def get_workspace( ) -> FileWorkspace: assert bool(root_path) != bool(id), "Specify root_path or id to get workspace" if root_path is None: - root_path = Path(f"workspaces/{id}") + root_path = Path(f"/workspaces/{id}") match backend: case FileWorkspaceBackendName.LOCAL: diff --git a/autogpts/autogpt/autogpt/file_workspace/base.py b/autogpts/autogpt/autogpt/file_workspace/base.py index c52162b29..2e4f43475 100644 --- a/autogpts/autogpt/autogpt/file_workspace/base.py +++ b/autogpts/autogpt/autogpt/file_workspace/base.py @@ -72,8 +72,8 @@ class FileWorkspace(ABC): """Write to a file in the workspace.""" @abstractmethod - def list_files(self, path: str | Path = ".") -> list[Path]: - """List all files in a directory in the workspace.""" + def list(self, path: str | Path = ".") -> list[Path]: + """List all files (recursively) in a directory in the workspace.""" @abstractmethod def delete_file(self, path: str | Path) -> None: diff --git a/autogpts/autogpt/autogpt/file_workspace/gcs.py b/autogpts/autogpt/autogpt/file_workspace/gcs.py index 1be240873..bca410451 100644 --- a/autogpts/autogpt/autogpt/file_workspace/gcs.py +++ b/autogpts/autogpt/autogpt/file_workspace/gcs.py @@ -29,6 +29,7 @@ class GCSFileWorkspace(FileWorkspace): def __init__(self, config: GCSFileWorkspaceConfiguration): self._bucket_name = config.bucket self._root = config.root + assert self._root.is_absolute() self._gcs = storage.Client() super().__init__() @@ -47,7 +48,7 @@ class GCSFileWorkspace(FileWorkspace): self._bucket = self._gcs.get_bucket(self._bucket_name) def get_path(self, relative_path: str | Path) -> Path: - return super().get_path(relative_path).relative_to(Path("/")) + return super().get_path(relative_path).relative_to("/") def open_file(self, path: str | Path, mode: str = "r"): """Open a file in the workspace.""" @@ -78,11 +79,13 @@ class GCSFileWorkspace(FileWorkspace): if inspect.isawaitable(res): await res - def list_files(self, path: str | Path = ".") -> list[Path]: - """List all files in a directory in the workspace.""" + def list(self, path: str | Path = ".") -> list[Path]: + """List all files (recursively) in a directory in the workspace.""" path = self.get_path(path) - blobs = self._bucket.list_blobs(prefix=str(path)) - return [Path(blob.name) for blob in blobs if not blob.name.endswith("/")] + blobs = self._bucket.list_blobs( + prefix=f"{path}/" if path != Path(".") else None + ) + return [Path(blob.name).relative_to(path) for blob in blobs] def delete_file(self, path: str | Path) -> None: """Delete a file in the workspace.""" diff --git a/autogpts/autogpt/autogpt/file_workspace/local.py b/autogpts/autogpt/autogpt/file_workspace/local.py index fd9fd3eb3..347e29667 100644 --- a/autogpts/autogpt/autogpt/file_workspace/local.py +++ b/autogpts/autogpt/autogpt/file_workspace/local.py @@ -56,10 +56,10 @@ class LocalFileWorkspace(FileWorkspace): if inspect.isawaitable(res): await res - def list_files(self, path: str | Path = "."): - """List all files in a directory in the workspace.""" - full_path = self.get_path(path) - return [str(file) for file in full_path.glob("*") if file.is_file()] + def list(self, path: str | Path = ".") -> list[Path]: + """List all files (recursively) in a directory in the workspace.""" + path = self.get_path(path) + return [file.relative_to(path) for file in path.rglob("*") if file.is_file()] def delete_file(self, path: str | Path): """Delete a file in the workspace.""" diff --git a/autogpts/autogpt/autogpt/file_workspace/s3.py b/autogpts/autogpt/autogpt/file_workspace/s3.py index 0b5fdb9c5..d52d9388f 100644 --- a/autogpts/autogpt/autogpt/file_workspace/s3.py +++ b/autogpts/autogpt/autogpt/file_workspace/s3.py @@ -40,6 +40,7 @@ class S3FileWorkspace(FileWorkspace): def __init__(self, config: S3FileWorkspaceConfiguration): self._bucket_name = config.bucket self._root = config.root + assert self._root.is_absolute() # https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html self._s3 = boto3.resource( @@ -71,7 +72,7 @@ class S3FileWorkspace(FileWorkspace): self._bucket = self._s3.create_bucket(Bucket=self._bucket_name) def get_path(self, relative_path: str | Path) -> Path: - return super().get_path(relative_path).relative_to(Path("/")) + return super().get_path(relative_path).relative_to("/") def open_file(self, path: str | Path, mode: str = "r"): """Open a file in the workspace.""" @@ -99,20 +100,14 @@ class S3FileWorkspace(FileWorkspace): if inspect.isawaitable(res): await res - def list_files(self, path: str | Path = ".") -> list[Path]: - """List all files in a directory in the workspace.""" + def list(self, path: str | Path = ".") -> list[Path]: + """List all files (recursively) in a directory in the workspace.""" path = self.get_path(path) if path == Path("."): - return [ - Path(obj.key) - for obj in self._bucket.objects.all() - if not obj.key.endswith("/") - ] + return [Path(obj.key) for obj in self._bucket.objects.all()] else: return [ - Path(obj.key) - for obj in self._bucket.objects.filter(Prefix=str(path)) - if not obj.key.endswith("/") + Path(obj.key) for obj in self._bucket.objects.filter(Prefix=f"{path}/") ] def delete_file(self, path: str | Path) -> None: diff --git a/autogpts/autogpt/tests/unit/test_gcs_file_workspace.py b/autogpts/autogpt/tests/unit/test_gcs_file_workspace.py index 4fe441c23..7e23fc584 100644 --- a/autogpts/autogpt/tests/unit/test_gcs_file_workspace.py +++ b/autogpts/autogpt/tests/unit/test_gcs_file_workspace.py @@ -60,8 +60,8 @@ def gcs_workspace(gcs_workspace_uninitialized: GCSFileWorkspace) -> GCSFileWorks TEST_FILES: list[tuple[str | Path, str]] = [ ("existing_test_file_1", "test content 1"), - ("existing_test_file_2.txt", "test content 2"), - (Path("existing_test_file_3"), "test content 3"), + ("/existing_test_file_2.txt", "test content 2"), + (Path("/existing_test_file_3"), "test content 3"), (Path("existing/test/file/4"), "test content 4"), ] @@ -69,7 +69,9 @@ TEST_FILES: list[tuple[str | Path, str]] = [ @pytest_asyncio.fixture async def gcs_workspace_with_files(gcs_workspace: GCSFileWorkspace) -> GCSFileWorkspace: for file_name, file_content in TEST_FILES: - gcs_workspace._bucket.blob(str(file_name)).upload_from_string(file_content) + gcs_workspace._bucket.blob( + str(gcs_workspace.get_path(file_name)) + ).upload_from_string(file_content) yield gcs_workspace # type: ignore @@ -84,8 +86,24 @@ async def test_read_file(gcs_workspace_with_files: GCSFileWorkspace): def test_list_files(gcs_workspace_with_files: GCSFileWorkspace): - files = gcs_workspace_with_files.list_files() - assert set(files) == set(Path(file_name) for file_name, _ in TEST_FILES) + # List at root level + assert (files := gcs_workspace_with_files.list()) == gcs_workspace_with_files.list() + assert len(files) > 0 + assert set(files) == set( + p.relative_to("/") if (p := Path(file_name)).is_absolute() else p + for file_name, _ in TEST_FILES + ) + + # List at nested path + assert ( + nested_files := gcs_workspace_with_files.list("existing") + ) == gcs_workspace_with_files.list("existing") + assert len(nested_files) > 0 + assert set(nested_files) == set( + p + for file_name, _ in TEST_FILES + if (p := Path(file_name)).is_relative_to("existing") + ) @pytest.mark.asyncio diff --git a/autogpts/autogpt/tests/unit/test_s3_file_workspace.py b/autogpts/autogpt/tests/unit/test_s3_file_workspace.py index 8517e2749..218241260 100644 --- a/autogpts/autogpt/tests/unit/test_s3_file_workspace.py +++ b/autogpts/autogpt/tests/unit/test_s3_file_workspace.py @@ -58,8 +58,8 @@ def s3_workspace(s3_workspace_uninitialized: S3FileWorkspace) -> S3FileWorkspace TEST_FILES: list[tuple[str | Path, str]] = [ ("existing_test_file_1", "test content 1"), - ("existing_test_file_2.txt", "test content 2"), - (Path("existing_test_file_3"), "test content 3"), + ("/existing_test_file_2.txt", "test content 2"), + (Path("/existing_test_file_3"), "test content 3"), (Path("existing/test/file/4"), "test content 4"), ] @@ -67,7 +67,9 @@ TEST_FILES: list[tuple[str | Path, str]] = [ @pytest_asyncio.fixture async def s3_workspace_with_files(s3_workspace: S3FileWorkspace) -> S3FileWorkspace: for file_name, file_content in TEST_FILES: - s3_workspace._bucket.Object(str(file_name)).put(Body=file_content) + s3_workspace._bucket.Object(str(s3_workspace.get_path(file_name))).put( + Body=file_content + ) yield s3_workspace # type: ignore @@ -82,8 +84,24 @@ async def test_read_file(s3_workspace_with_files: S3FileWorkspace): def test_list_files(s3_workspace_with_files: S3FileWorkspace): - files = s3_workspace_with_files.list_files() - assert set(files) == set(Path(file_name) for file_name, _ in TEST_FILES) + # List at root level + assert (files := s3_workspace_with_files.list()) == s3_workspace_with_files.list() + assert len(files) > 0 + assert set(files) == set( + p.relative_to("/") if (p := Path(file_name)).is_absolute() else p + for file_name, _ in TEST_FILES + ) + + # List at nested path + assert ( + nested_files := s3_workspace_with_files.list("existing") + ) == s3_workspace_with_files.list("existing") + assert len(nested_files) > 0 + assert set(nested_files) == set( + p + for file_name, _ in TEST_FILES + if (p := Path(file_name)).is_relative_to("existing") + ) @pytest.mark.asyncio |