diff options
author | Reinier van der Leer <pwuts@agpt.co> | 2023-12-13 18:07:25 +0100 |
---|---|---|
committer | Reinier van der Leer <pwuts@agpt.co> | 2023-12-13 18:07:25 +0100 |
commit | 967338193e96b8960f76329b4af97d07193f0ddb (patch) | |
tree | 61e741194421173f11ba7938d8643f6aa91867b3 | |
parent | chore(autogpt): Update duckduckgo-search to v4.0.0 (diff) | |
download | Auto-GPT-967338193e96b8960f76329b4af97d07193f0ddb.tar.gz Auto-GPT-967338193e96b8960f76329b4af97d07193f0ddb.tar.bz2 Auto-GPT-967338193e96b8960f76329b4af97d07193f0ddb.zip |
fix(agent/file_workspace): Fix workspace initialization and listing behavior in GCS and S3 file workspaces
- Update GCSFileWorkspace.initialize() to handle cases where the bucket doesn't exist and create it if necessary
- Add logging to S3FileWorkspace.initialize() and GCSFileWorkspace.initialize()
- Update GCSFileWorkspace.list() and S3FileWorkspace.list() to correctly handle nested paths and return the relative paths of files
- Fix tests for GCSFileWorkspace and S3FileWorkspace to account for the changes in initialization and listing behavior
- Fix S3FileWorkspace.open_file() to correctly switch between binary and text mode
- Added tests to verify the fixes in workspace initialization and listing behavior
-rw-r--r-- | autogpts/autogpt/autogpt/file_workspace/gcs.py | 21 | ||||
-rw-r--r-- | autogpts/autogpt/autogpt/file_workspace/s3.py | 15 | ||||
-rw-r--r-- | autogpts/autogpt/tests/unit/test_gcs_file_workspace.py | 53 | ||||
-rw-r--r-- | autogpts/autogpt/tests/unit/test_s3_file_workspace.py | 21 |
4 files changed, 65 insertions, 45 deletions
diff --git a/autogpts/autogpt/autogpt/file_workspace/gcs.py b/autogpts/autogpt/autogpt/file_workspace/gcs.py index 3a6b9772c..c6ff3fc97 100644 --- a/autogpts/autogpt/autogpt/file_workspace/gcs.py +++ b/autogpts/autogpt/autogpt/file_workspace/gcs.py @@ -10,6 +10,7 @@ from io import IOBase from pathlib import Path from google.cloud import storage +from google.cloud.exceptions import NotFound from autogpt.core.configuration.schema import UserConfigurable @@ -46,7 +47,12 @@ class GCSFileWorkspace(FileWorkspace): return True def initialize(self) -> None: - self._bucket = self._gcs.get_bucket(self._bucket_name) + logger.debug(f"Initializing {repr(self)}...") + try: + self._bucket = self._gcs.get_bucket(self._bucket_name) + except NotFound: + logger.info(f"Bucket '{self._bucket_name}' does not exist; creating it...") + self._bucket = self._gcs.create_bucket(self._bucket_name) def get_path(self, relative_path: str | Path) -> Path: return super().get_path(relative_path).relative_to("/") @@ -85,13 +91,18 @@ class GCSFileWorkspace(FileWorkspace): def list(self, path: str | Path = ".") -> list[Path]: """List all files (recursively) in a directory in the workspace.""" path = self.get_path(path) - blobs = self._bucket.list_blobs( - prefix=f"{path}/" if path != Path(".") else None - ) - return [Path(blob.name).relative_to(path) for blob in blobs] + return [ + Path(blob.name).relative_to(path) + for blob in self._bucket.list_blobs( + prefix=f"{path}/" if path != Path(".") else None + ) + ] def delete_file(self, path: str | Path) -> None: """Delete a file in the workspace.""" path = self.get_path(path) blob = self._bucket.blob(str(path)) blob.delete() + + def __repr__(self) -> str: + return f"{__class__.__name__}(bucket='{self._bucket_name}', root={self._root})" diff --git a/autogpts/autogpt/autogpt/file_workspace/s3.py b/autogpts/autogpt/autogpt/file_workspace/s3.py index 79f3ae74e..a60291374 100644 --- a/autogpts/autogpt/autogpt/file_workspace/s3.py +++ b/autogpts/autogpt/autogpt/file_workspace/s3.py @@ -64,12 +64,14 @@ class S3FileWorkspace(FileWorkspace): return True def initialize(self) -> None: + logger.debug(f"Initializing {repr(self)}...") try: self._s3.meta.client.head_bucket(Bucket=self._bucket_name) self._bucket = self._s3.Bucket(self._bucket_name) except botocore.exceptions.ClientError as e: if "(404)" not in str(e): raise + logger.info(f"Bucket '{self._bucket_name}' does not exist; creating it...") self._bucket = self._s3.create_bucket(Bucket=self._bucket_name) def get_path(self, relative_path: str | Path) -> Path: @@ -86,12 +88,11 @@ class S3FileWorkspace(FileWorkspace): def open_file(self, path: str | Path, binary: bool = False) -> IOBase: """Open a file in the workspace.""" obj = self._get_obj(path) - return obj.get()["Body"] if not binary else TextIOWrapper(obj.get()["Body"]) + return obj.get()["Body"] if binary else TextIOWrapper(obj.get()["Body"]) def read_file(self, path: str | Path, binary: bool = False) -> str | bytes: """Read a file in the workspace.""" - file_content = self.open_file(path, binary).read() - return file_content if binary else file_content.decode() + return self.open_file(path, binary).read() async def write_file(self, path: str | Path, content: str | bytes) -> None: """Write to a file in the workspace.""" @@ -109,11 +110,12 @@ class S3FileWorkspace(FileWorkspace): def list(self, path: str | Path = ".") -> list[Path]: """List all files (recursively) in a directory in the workspace.""" path = self.get_path(path) - if path == Path("."): + if path == Path("."): # root level of bucket return [Path(obj.key) for obj in self._bucket.objects.all()] else: return [ - Path(obj.key) for obj in self._bucket.objects.filter(Prefix=f"{path}/") + Path(obj.key).relative_to(path) + for obj in self._bucket.objects.filter(Prefix=f"{path}/") ] def delete_file(self, path: str | Path) -> None: @@ -121,3 +123,6 @@ class S3FileWorkspace(FileWorkspace): path = self.get_path(path) obj = self._s3.Object(self._bucket_name, str(path)) obj.delete() + + def __repr__(self) -> str: + return f"{__class__.__name__}(bucket='{self._bucket_name}', root={self._root})" diff --git a/autogpts/autogpt/tests/unit/test_gcs_file_workspace.py b/autogpts/autogpt/tests/unit/test_gcs_file_workspace.py index 7e23fc584..6372a91cd 100644 --- a/autogpts/autogpt/tests/unit/test_gcs_file_workspace.py +++ b/autogpts/autogpt/tests/unit/test_gcs_file_workspace.py @@ -4,12 +4,16 @@ from pathlib import Path import pytest import pytest_asyncio +from google.auth.exceptions import GoogleAuthError +from google.cloud import storage from google.cloud.exceptions import NotFound from autogpt.file_workspace.gcs import GCSFileWorkspace, GCSFileWorkspaceConfiguration -if not os.getenv("GOOGLE_APPLICATION_CREDENTIALS"): - pytest.skip("GOOGLE_APPLICATION_CREDENTIALS are not set", allow_module_level=True) +try: + storage.Client() +except GoogleAuthError: + pytest.skip("Google Cloud Authentication not configured", allow_module_level=True) @pytest.fixture @@ -21,6 +25,7 @@ def gcs_bucket_name() -> str: def gcs_workspace_uninitialized(gcs_bucket_name: str) -> GCSFileWorkspace: os.environ["WORKSPACE_STORAGE_BUCKET"] = gcs_bucket_name ws_config = GCSFileWorkspaceConfiguration.from_env() + ws_config.root = Path("/workspaces/AutoGPT-some-unique-task-id") workspace = GCSFileWorkspace(ws_config) yield workspace # type: ignore del os.environ["WORKSPACE_STORAGE_BUCKET"] @@ -29,23 +34,19 @@ def gcs_workspace_uninitialized(gcs_bucket_name: str) -> GCSFileWorkspace: def test_initialize( gcs_bucket_name: str, gcs_workspace_uninitialized: GCSFileWorkspace ): - gcs = gcs_workspace_uninitialized._bucket + gcs = gcs_workspace_uninitialized._gcs # test that the bucket doesn't exist yet with pytest.raises(NotFound): - gcs.get_blob(gcs_bucket_name) + gcs.get_bucket(gcs_bucket_name) gcs_workspace_uninitialized.initialize() # test that the bucket has been created - gcs.get_blob(gcs_bucket_name) + bucket = gcs.get_bucket(gcs_bucket_name) - -def test_workspace_bucket_name( - gcs_workspace: GCSFileWorkspace, - gcs_bucket_name: str, -): - assert gcs_workspace._bucket.name == gcs_bucket_name + # clean up + bucket.delete(force=True) @pytest.fixture @@ -54,15 +55,22 @@ def gcs_workspace(gcs_workspace_uninitialized: GCSFileWorkspace) -> GCSFileWorks yield gcs_workspace # type: ignore # Empty & delete the test bucket - gcs_workspace._bucket.delete_blobs(gcs_workspace._bucket.list_blobs()) - gcs_workspace._bucket.delete() + gcs_workspace._bucket.delete(force=True) +def test_workspace_bucket_name( + gcs_workspace: GCSFileWorkspace, + gcs_bucket_name: str, +): + assert gcs_workspace._bucket.name == gcs_bucket_name + + +NESTED_DIR = "existing/test/dir" TEST_FILES: list[tuple[str | Path, str]] = [ ("existing_test_file_1", "test content 1"), - ("/existing_test_file_2.txt", "test content 2"), - (Path("/existing_test_file_3"), "test content 3"), - (Path("existing/test/file/4"), "test content 4"), + ("existing_test_file_2.txt", "test content 2"), + (Path("existing_test_file_3"), "test content 3"), + (Path(f"{NESTED_DIR}/test/file/4"), "test content 4"), ] @@ -89,20 +97,17 @@ def test_list_files(gcs_workspace_with_files: GCSFileWorkspace): # List at root level assert (files := gcs_workspace_with_files.list()) == gcs_workspace_with_files.list() assert len(files) > 0 - assert set(files) == set( - p.relative_to("/") if (p := Path(file_name)).is_absolute() else p - for file_name, _ in TEST_FILES - ) + assert set(files) == set(Path(file_name) for file_name, _ in TEST_FILES) # List at nested path assert ( - nested_files := gcs_workspace_with_files.list("existing") - ) == gcs_workspace_with_files.list("existing") + nested_files := gcs_workspace_with_files.list(NESTED_DIR) + ) == gcs_workspace_with_files.list(NESTED_DIR) assert len(nested_files) > 0 assert set(nested_files) == set( - p + p.relative_to(NESTED_DIR) for file_name, _ in TEST_FILES - if (p := Path(file_name)).is_relative_to("existing") + if (p := Path(file_name)).is_relative_to(NESTED_DIR) ) diff --git a/autogpts/autogpt/tests/unit/test_s3_file_workspace.py b/autogpts/autogpt/tests/unit/test_s3_file_workspace.py index 218241260..980d1e3b8 100644 --- a/autogpts/autogpt/tests/unit/test_s3_file_workspace.py +++ b/autogpts/autogpt/tests/unit/test_s3_file_workspace.py @@ -21,6 +21,7 @@ def s3_bucket_name() -> str: def s3_workspace_uninitialized(s3_bucket_name: str) -> S3FileWorkspace: os.environ["WORKSPACE_STORAGE_BUCKET"] = s3_bucket_name ws_config = S3FileWorkspaceConfiguration.from_env() + ws_config.root = Path("/workspaces/AutoGPT-some-unique-task-id") workspace = S3FileWorkspace(ws_config) yield workspace # type: ignore del os.environ["WORKSPACE_STORAGE_BUCKET"] @@ -56,11 +57,12 @@ def s3_workspace(s3_workspace_uninitialized: S3FileWorkspace) -> S3FileWorkspace s3_workspace._bucket.delete() +NESTED_DIR = "existing/test/dir" TEST_FILES: list[tuple[str | Path, str]] = [ ("existing_test_file_1", "test content 1"), - ("/existing_test_file_2.txt", "test content 2"), - (Path("/existing_test_file_3"), "test content 3"), - (Path("existing/test/file/4"), "test content 4"), + ("existing_test_file_2.txt", "test content 2"), + (Path("existing_test_file_3"), "test content 3"), + (Path(f"{NESTED_DIR}/test/file/4"), "test content 4"), ] @@ -87,20 +89,17 @@ def test_list_files(s3_workspace_with_files: S3FileWorkspace): # List at root level assert (files := s3_workspace_with_files.list()) == s3_workspace_with_files.list() assert len(files) > 0 - assert set(files) == set( - p.relative_to("/") if (p := Path(file_name)).is_absolute() else p - for file_name, _ in TEST_FILES - ) + assert set(files) == set(Path(file_name) for file_name, _ in TEST_FILES) # List at nested path assert ( - nested_files := s3_workspace_with_files.list("existing") - ) == s3_workspace_with_files.list("existing") + nested_files := s3_workspace_with_files.list(NESTED_DIR) + ) == s3_workspace_with_files.list(NESTED_DIR) assert len(nested_files) > 0 assert set(nested_files) == set( - p + p.relative_to(NESTED_DIR) for file_name, _ in TEST_FILES - if (p := Path(file_name)).is_relative_to("existing") + if (p := Path(file_name)).is_relative_to(NESTED_DIR) ) |