Merge pull request #371 from rabroldan/issue-366

Added an enhancement - issue 366 to add a download all if no argument…
DagsHub · Oct 3, 2023 · 93d1a1a · 93d1a1a
2 parents b03420f + 903429d
commit 93d1a1a
Showing 1 changed file with 24 additions and 6 deletions.
diff --git a/dagshub/data_engine/model/query_result.py b/dagshub/data_engine/model/query_result.py
@@ -15,11 +15,15 @@
 from dagshub.common.helpers import sizeof_fmt, prompt_user
 from dagshub.common.rich_util import get_rich_progress
 from dagshub.common.util import lazy_load
-from dagshub.data_engine.annotation.voxel_conversion import add_voxel_annotations, add_ls_annotations
+from dagshub.data_engine.annotation.voxel_conversion import (
+    add_voxel_annotations,
+    add_ls_annotations,
+)
 from dagshub.data_engine.client.models import DatasourceType
 from dagshub.data_engine.model.datapoint import Datapoint, _get_blob
 from dagshub.data_engine.client.loaders.base import DagsHubDataset
 from dagshub.data_engine.voxel_plugin_server.utils import set_voxel_envvars
+from dagshub.data_engine.dtypes import MetadataFieldType
 
 if TYPE_CHECKING:
     from dagshub.data_engine.model.datasource import Datasource
@@ -141,7 +145,10 @@ def as_ml_dataloader(self, flavor, **kwargs):
                         the shuffle order is determined for the first epoch; default: False
         """
 
-        send_analytics_event("Client_DataEngine_DataLoaderInitialized", repo=self.datasource.source.repoApi)
+        send_analytics_event(
+            "Client_DataEngine_DataLoaderInitialized",
+            repo=self.datasource.source.repoApi,
+        )
 
         def keypairs(keys):
             return {key: kwargs[key] for key in keys}
@@ -203,16 +210,21 @@ def get_blob_fields(
         Downloads data from blob fields
 
         Args:
-            fields: list of binary fields to download blobs for
+            fields: list of binary fields to download blobs for. If empty, download all blob fields.
             load_into_memory: Whether to load the blobs into the datapoints, or just store them on disk
-                If True : the datapoints' specified fields will contain the blob data
+                If True: the datapoints' specified fields will contain the blob data
                 If False: the datapoints' specified fields will contain Path objects to the file of the downloaded blob
             cache_on_disk: Whether to cache the blobs on disk or not (valid only if load_into_memory is set to True)
                 Cache location is `~/dagshub/datasets/<user>/<repo>/<datasource_id>/.metadata_blobs/`
         """
         send_analytics_event("Client_DataEngine_downloadBlobs", repo=self.datasource.source.repoApi)
         if not load_into_memory:
             assert cache_on_disk
+
+        # If no fields are specified, include all blob fields from self.datasource.fields
+        if not fields:
+            fields = [field.name for field in self.datasource.fields if field.valueType == MetadataFieldType.BLOB]
+
         for fld in fields:
             logger.info(f"Downloading metadata for field {fld} with {num_proc} processes")
             cache_location = self.datasource.default_dataset_location / ".metadata_blobs"
@@ -277,9 +289,15 @@ def download_files(
         logger.warning(f"Downloading {len(self.entries)} files to {str(target_path)}")
 
         if self.datasource.source.source_type == DatasourceType.BUCKET:
-            send_analytics_event("Client_DataEngine_downloadDatapointsFromBucket", repo=self.datasource.source.repoApi)
+            send_analytics_event(
+                "Client_DataEngine_downloadDatapointsFromBucket",
+                repo=self.datasource.source.repoApi,
+            )
         else:
-            send_analytics_event("Client_DataEngine_downloadDatapoints", repo=self.datasource.source.repoApi)
+            send_analytics_event(
+                "Client_DataEngine_downloadDatapoints",
+                repo=self.datasource.source.repoApi,
+            )
 
         def dp_path(dp: Datapoint):
             if path_field is not None: