Skip to content

Commit

Permalink
Better public artifact support
Browse files Browse the repository at this point in the history
  • Loading branch information
oeway committed Oct 10, 2024
1 parent 1288723 commit 880fb97
Show file tree
Hide file tree
Showing 5 changed files with 146 additions and 47 deletions.
49 changes: 32 additions & 17 deletions docs/artifact-manager.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ gallery_manifest = {
"collection": [],
}

await artifact_manager.create(prefix="collections/dataset-gallery", manifest=gallery_manifest)
await artifact_manager.create(prefix="collections/dataset-gallery", manifest=gallery_manifest, public=True)
print("Dataset Gallery created.")
```

Expand All @@ -56,7 +56,7 @@ dataset_manifest = {
"files": [],
}

await artifact_manager.create(prefix="collections/dataset-gallery/example-dataset", manifest=dataset_manifest, stage=True)
await artifact_manager.create(prefix="collections/dataset-gallery/example-dataset", manifest=dataset_manifest, stage=True) # no need to set public since it is already set in the collection, but you can make it private by setting public=False
print("Dataset added to the gallery.")
```

Expand Down Expand Up @@ -124,7 +124,7 @@ async def main():
"type": "collection",
"collection": [],
}
await artifact_manager.create(prefix="collections/dataset-gallery", manifest=gallery_manifest)
await artifact_manager.create(prefix="collections/dataset-gallery", manifest=gallery_manifest, public=True)
print("Dataset Gallery created.")

# Create a new dataset inside the Dataset Gallery
Expand Down Expand Up @@ -195,7 +195,7 @@ gallery_manifest = {
"collection": [],
}

await artifact_manager.create(prefix="collections/schema-dataset-gallery", manifest=gallery_manifest)
await artifact_manager.create(prefix="collections/schema-dataset-gallery", manifest=gallery_manifest, public=True)
print("Schema-based Dataset Gallery created.")
```

Expand Down Expand Up @@ -224,6 +224,25 @@ print("Valid dataset committed.")

## API References

### `create(prefix: str, manifest: dict, public: bool = True, stage: bool = False) -> None`

Creates a new artifact or collection with the specified manifest. The artifact is staged until committed. For collections, the `collection` field should be an empty list.

**Parameters:**

- `prefix`: The path of the new artifact or collection (e.g., `"collections/dataset-gallery/example-dataset"`).
- `manifest`: The manifest of the new artifact. Ensure the manifest follows the required schema if applicable (e.g., for collections).
- `public`: Optional. A boolean flag to make the artifact public. Default is `True`.
- `stage`: Optional. A boolean flag to stage the artifact. Default is `False`.

**Example:**

```python
await artifact_manager.create(prefix="collections/dataset-gallery/example-dataset", manifest=dataset_manifest, public=True, stage=True)
```

---

### `edit(prefix: str, manifest: dict) -> None`

Edits an existing artifact's manifest. The new manifest is staged until committed. The updated manifest is stored temporarily as `_manifest.yaml`.
Expand Down Expand Up @@ -332,13 +351,14 @@ get_url = await artifact_manager.get_file(prefix="collections/dataset-gallery/ex

---

### `list(prefix: str, stage: bool = False) -> list`
### `list(prefix: str, max_length: int = 1000, stage: bool = False) -> list`

Lists all artifacts or collections under the specified prefix. Returns either a list of artifact names or collection items, depending on whether the prefix points to a collection.

**Parameters:**

- `prefix`: The path under which the artifacts or collections are listed (e.g., `"collections/dataset-gallery"`).
- `max_length`: Optional. The maximum number of items to list. Default is `1000`.
- `stage`: Optional. If `True`, it lists the artifacts in staging mode. Default is `False`.

**Returns:** A list of artifact or collection item names.
Expand Down Expand Up @@ -424,7 +444,7 @@ gallery_manifest = {
"type": "collection",
"collection": [],
}
await artifact_manager.create(prefix="collections/dataset-gallery", manifest=gallery_manifest)
await artifact_manager.create(prefix="collections/dataset-gallery", manifest=gallery_manifest, public=True)

# Step 3: Add a dataset to the gallery
dataset_manifest = {
Expand All @@ -434,7 +454,7 @@ dataset_manifest = {
"type": "dataset",
"files": [],
}
await artifact_manager.create(prefix="collections/dataset-gallery/example-dataset", manifest=dataset_manifest, stage=True)
await artifact_manager.create(prefix="collections/dataset-gallery/example-dataset", manifest=dataset_manifest, stage=True, public=True)

# Step 4: Upload a file to the dataset
put_url = await artifact_manager.put_file(prefix="collections/dataset-gallery/example-dataset", file_path="data.csv")
Expand Down Expand Up @@ -467,26 +487,21 @@ The `Artifact Manager` provides an HTTP endpoint for retrieving artifact manifes
### Endpoint: `/{workspace}/artifact/{path:path}`

- **Workspace**: The workspace in which the artifact is stored.
- **Path**: The relative path to the artifact.
- For public artifacts, the path must begin with `public/`.
- For private artifacts, the path does not include the `public/` prefix and requires proper authentication.
- **Path**: The relative path to the artifact. For private artifacts, it requires proper authentication by passing the user's token in the request headers.

### Request Format:

- **Method**: `GET`
- **Headers**:
- `Authorization`: Optional. The user's token for accessing private artifacts (obtained via the login logic or created by `api.generate_token()`). Not required for public artifacts under the `public/` prefix.
- `Authorization`: Optional. The user's token for accessing private artifacts (obtained via the login logic or created by `api.generate_token()`). Not required for public artifacts.
- **Parameters**:
- `workspace`: The workspace in which the artifact is stored.
- `path`:

The path to the artifact (e.g., `public/collections/dataset-gallery/example-dataset`).
- `path`: The path to the artifact (e.g., `collections/dataset-gallery/example-dataset`).
- `stage` (optional): A boolean flag to indicate whether to fetch the staged version of the manifest (`_manifest.yaml`). Default is `False`.

### Response:

- **For public artifacts**: Returns the artifact manifest if it exists under the `public/` prefix, including any download statistics in the `_stats` field.
- **For private artifacts**: Returns the artifact manifest if the user has the necessary permissions.
Returns the artifact manifest if it exists, including any download statistics in the `_stats` field. For private artifacts, make sure if the user has the necessary permissions.


### Example: Fetching a public artifact with download statistics
Expand All @@ -496,7 +511,7 @@ import requests

SERVER_URL = "https://hypha.aicell.io"
workspace = "my-workspace"
response = requests.get(f"{SERVER_URL}/{workspace}/artifact/public/collections/dataset-gallery/example-dataset")
response = requests.get(f"{SERVER_URL}/{workspace}/artifact/collections/dataset-gallery/example-dataset")
if response.ok:
artifact = response.json()
print(artifact["name"]) # Output: Example Dataset
Expand Down
2 changes: 1 addition & 1 deletion hypha/VERSION
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{
"version": "0.20.38.post1"
"version": "0.20.38.post2"
}
87 changes: 64 additions & 23 deletions hypha/artifact.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
Column,
String,
Integer,
Boolean,
Float,
JSON,
UniqueConstraint,
Expand Down Expand Up @@ -58,6 +59,9 @@ class ArtifactModel(Base):
) # Store the weights for counting downloads; a dictionary of file paths and their weights 0-1
download_count = Column(Float, nullable=False, default=0.0) # New counter field
view_count = Column(Float, nullable=False, default=0.0) # New counter field
public = Column(
Boolean, nullable=True
) # New field for public artifacts, None means it follows the parent artifact
__table_args__ = (
UniqueConstraint("workspace", "prefix", name="_workspace_prefix_uc"),
)
Expand Down Expand Up @@ -92,14 +96,15 @@ async def get_artifact(
):
"""Get artifact from the database."""
try:
if path.startswith("public/"):
return await self._read_manifest(workspace, path, stage=stage)
else:
artifact, manifest = await self._read_manifest(
workspace, path, stage=stage
)
if not artifact.public:
if not user_info.check_permission(workspace, UserPermission.read):
raise PermissionError(
"User does not have read permission to the workspace."
"User does not have read permission to the non-public artifact."
)
return await self._read_manifest(workspace, path, stage=stage)
return manifest
except KeyError as e:
raise HTTPException(status_code=404, detail=str(e))
except PermissionError as e:
Expand Down Expand Up @@ -226,7 +231,7 @@ async def _read_manifest(self, workspace, prefix, stage=False):
}
if manifest.get("type") == "collection":
manifest["_stats"]["child_count"] = len(collection)
return manifest
return artifact, manifest
except Exception as e:
raise e
finally:
Expand All @@ -246,6 +251,7 @@ async def create(
manifest: dict,
overwrite=False,
stage=False,
public=None,
context: dict = None,
):
"""Create a new artifact and store its manifest in the database."""
Expand Down Expand Up @@ -274,6 +280,16 @@ async def create(
session = await self._get_session()
try:
async with session.begin():
if public is None:
# if public is not set, try to use the parent artifact's public setting
parent_prefix = "/".join(prefix.split("/")[:-1])
if parent_prefix:
parent_artifact = await self._get_artifact(
session, ws, parent_prefix
)
if parent_artifact:
public = parent_artifact.public

existing_artifact = await self._get_artifact(session, ws, prefix)

if existing_artifact:
Expand All @@ -294,6 +310,7 @@ async def create(
stage_manifest=manifest if stage else None,
stage_files=[] if stage else None,
download_weights=None,
public=public,
type=manifest["type"],
)
session.add(new_artifact)
Expand Down Expand Up @@ -345,12 +362,15 @@ async def read(self, prefix, stage=False, context: dict = None):
ws = context["ws"]

user_info = UserInfo.model_validate(context["user"])
if not user_info.check_permission(ws, UserPermission.read):
artifact, manifest = await self._read_manifest(ws, prefix, stage=stage)

if not artifact.public and not user_info.check_permission(
ws, UserPermission.read
):
raise PermissionError(
"User does not have read permission to the workspace."
)

manifest = await self._read_manifest(ws, prefix, stage=stage)
return manifest

async def edit(self, prefix, manifest=None, context: dict = None):
Expand Down Expand Up @@ -505,13 +525,18 @@ async def _delete_s3_files(self, ws, prefix):
async with self.s3_controller.create_client_async() as s3_client:
await remove_objects_async(s3_client, self.workspace_bucket, artifact_path)

async def list_files(self, prefix, max_length=1000, context: dict = None):
async def list_files(
self, prefix, max_length=1000, stage=False, context: dict = None
):
"""List files in the specified S3 prefix."""
if context is None or "ws" not in context:
raise ValueError("Context must include 'ws' (workspace).")
ws = context["ws"]
user_info = UserInfo.model_validate(context["user"])
if not user_info.check_permission(ws, UserPermission.read):
artifact, _ = await self._read_manifest(ws, prefix, stage=stage)
if not artifact.public and not user_info.check_permission(
ws, UserPermission.read
):
raise PermissionError(
"User does not have read permission to the workspace."
)
Expand All @@ -520,19 +545,30 @@ async def list_files(self, prefix, max_length=1000, context: dict = None):
items = await list_objects_async(
s3_client, self.workspace_bucket, full_path, max_length=max_length
)
# TODO: If stage should we return only the staged files?
return items

async def list_artifacts(self, prefix="", stage=False, context: dict = None):
async def list_artifacts(self, prefix="", context: dict = None):
"""List all artifacts under a certain prefix."""
if context is None or "ws" not in context:
raise ValueError("Context must include 'ws' (workspace).")
ws = context["ws"]

user_info = UserInfo.model_validate(context["user"])
if not user_info.check_permission(ws, UserPermission.read):
raise PermissionError(
"User does not have read permission to the workspace."
)
try:
artifact, _ = await self._read_manifest(ws, prefix)
if not artifact.public and not user_info.check_permission(
ws, UserPermission.read
):
raise PermissionError(
"User does not have read permission to the workspace."
)
except KeyError:
# the prefix is not a valid artifact, check if the user has permission to list the artifacts
if not user_info.check_permission(ws, UserPermission.read):
raise PermissionError(
"User does not have read permission to the workspace."
)

session = await self._get_session()
try:
Expand All @@ -548,7 +584,6 @@ async def list_artifacts(self, prefix="", stage=False, context: dict = None):
for artifact in sub_artifacts:
if artifact.prefix == prefix:
continue
sub_manifest = artifact.manifest
name = artifact.prefix[len(prefix) + 1 :]
name = name.split("/")[0]
collection.append(name)
Expand All @@ -573,7 +608,10 @@ async def search(
ws = context["ws"]

user_info = UserInfo.model_validate(context["user"])
if not user_info.check_permission(ws, UserPermission.read):
artifact, _ = await self._read_manifest(ws, prefix)
if not artifact.public and not user_info.check_permission(
ws, UserPermission.read
):
raise PermissionError(
"User does not have read permission to the workspace."
)
Expand Down Expand Up @@ -705,7 +743,10 @@ async def get_file(self, prefix, path, options: dict = None, context: dict = Non
ws = context["ws"]

user_info = UserInfo.model_validate(context["user"])
if not user_info.check_permission(ws, UserPermission.read):
artifact, _ = await self._read_manifest(ws, prefix)
if not artifact.public and not user_info.check_permission(
ws, UserPermission.read
):
raise PermissionError(
"User does not have read permission to the workspace."
)
Expand Down Expand Up @@ -799,13 +840,13 @@ def get_artifact_service(self):
"create": self.create,
"reset_stats": self.reset_stats,
"edit": self.edit,
"read": self.read,
"read": self.read, # accessible to public if the artifact is public
"commit": self.commit,
"delete": self.delete,
"put_file": self.put_file,
"remove_file": self.remove_file,
"get_file": self.get_file,
"list": self.list_artifacts,
"search": self.search,
"list_files": self.list_files,
"get_file": self.get_file, # accessible to public if the artifact is public
"list": self.list_artifacts, # accessible to public if the artifact is public
"search": self.search, # accessible to public if the artifact is public
"list_files": self.list_files, # accessible to public if the artifact is public
}
Loading

0 comments on commit 880fb97

Please sign in to comment.