diff --git a/assets/images/TorchTitan_logo_main.jpg b/assets/images/TorchTitan_logo_main.jpg new file mode 100644 index 00000000..8ebde433 Binary files /dev/null and b/assets/images/TorchTitan_logo_main.jpg differ diff --git a/torchtitan/datasets/hf_datasets.py b/torchtitan/datasets/hf_datasets.py index 9db036b0..c8dc2a6f 100644 --- a/torchtitan/datasets/hf_datasets.py +++ b/torchtitan/datasets/hf_datasets.py @@ -13,6 +13,7 @@ from torchdata.stateful_dataloader import StatefulDataLoader + from torchtitan.datasets.tokenizer import Tokenizer from torchtitan.logging import logger @@ -141,6 +142,7 @@ def _get_data_iter(self): if isinstance(self._data, Dataset) and self._sample_idx == len(self._data): return iter([]) + return iter(self._data.skip(self._sample_idx)) def load_state_dict(self, state_dict):