Skip to content

Commit

Permalink
added pad token to tokenizer
Browse files Browse the repository at this point in the history
  • Loading branch information
kshitijkg committed Oct 18, 2023
1 parent fbfb277 commit 6999b7c
Showing 1 changed file with 2 additions and 0 deletions.
2 changes: 2 additions & 0 deletions megatron/tokenizer/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,8 @@ def __init__(self, vocab_file):
self.eod_id = self.tokenizer.token_to_id("<|endoftext|>")
self.pad_id = self.tokenizer.token_to_id("<|padding|>")
self.pad_token_id = self.pad_id
self._pad_token = self.pad_id
self.padding_side = "right"

@property
def vocab_size(self):
Expand Down

0 comments on commit 6999b7c

Please sign in to comment.