Skip to content

Commit

Permalink
Make all data imports lazy
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 691908578
  • Loading branch information
Conchylicultor authored and The kauldron Authors committed Nov 6, 2024
1 parent 00fd080 commit 61c94b9
Showing 1 changed file with 28 additions and 28 deletions.
56 changes: 28 additions & 28 deletions kauldron/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,43 +20,43 @@

import etils.epy as _epy


# TODO(epot): Somehow importing kauldron.data.py inside lazy_api_imports create
# infinite recursion when the import is resolved, likely because there's
# some special handling of the suffix `py` to support `third_party.py`. I
# don't have time to investigate so instead the module is imported in
# `lazy_imports` rather than `lazy_api_imports`.
with _epy.lazy_imports():
# PyGrain based data pipeline.
from kauldron.data import py

# tf.data based data pipeline.
from kauldron.data import tf


with _epy.lazy_api_imports(globals()):
# Top-level abstractions, independent of any specific backend (TF, PyGrain)
from kauldron.data.data_utils import IterableDataset
# Top-level pipelines
from kauldron.data.pipelines import Pipeline
# TODO(epot): Remove `InMemoryPipeline` to use `PyGrain` if PyGrain support
# efficient batch-lookup.
from kauldron.data.in_memory import InMemoryPipeline
from kauldron.data.utils import BatchSize

# PyGrain based data pipeline.
# TODO(epot): Somehow importing here create infinite recursion when the
# import is resolved, likely because there's some special handling of the
# suffix `py` to support `third_party.py`. I don't have time to investigate
# so instead the module is imported below in `lazy_imports` rather than
# `lazy_api_imports`.
# from kauldron.data import py

# TODO(epot): Migrate all existing symbols to `kd.data.tf.`
# tf.data based data pipeline.
from kauldron.data import tf
# ****************************************************************************
# DO NOT ADD preprocessing ops here. Instead, add them to `kd.contrib.data`
# ****************************************************************************

# User should inherit from those base classes to have transformations
# supported by both TfGrain (`kd.data.tf`) and PyGrain (`kd.data.py`)
from kauldron.data.transforms.abc import MapTransform
# from kauldron.data.transforms.abc import RandomMapTransform
from kauldron.data.transforms.abc import FilterTransform

from kauldron.data.utils import BatchSize

# ****************************************************************************
# DO NOT ADD preprocessing ops here. Instead, add them to `kd.contrib.data`
# ****************************************************************************

# TODO(epot): Should migrate all users to use explicitly `kd.data.tf`
from kauldron.data.transforms.base import Elements
from kauldron.data.transforms.base import ElementWiseTransform
from kauldron.data.transforms.base import TreeFlattenWithPath
from kauldron.data.transforms.map_transforms import Gather
from kauldron.data.transforms.map_transforms import Rearrange
from kauldron.data.transforms.map_transforms import ValueRange

with _epy.lazy_imports():
from kauldron.data import py
# Transformations can be used in both `kd.data.tf` and `kd.data.py`
from kauldron.data.transforms.base import Elements
from kauldron.data.transforms.base import ElementWiseTransform
from kauldron.data.transforms.base import TreeFlattenWithPath
from kauldron.data.transforms.map_transforms import Gather
from kauldron.data.transforms.map_transforms import Rearrange
from kauldron.data.transforms.map_transforms import ValueRange

0 comments on commit 61c94b9

Please sign in to comment.