From 325000e6314dd1a4fa20bf076e873b81b0aa8e28 Mon Sep 17 00:00:00 2001 From: Ikko Eltociear Ashimine Date: Fri, 6 Jan 2023 23:01:47 +0900 Subject: [PATCH] Fix typo in bpe.py occurence -> occurrence occurences -> occurrence --- mingpt/bpe.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mingpt/bpe.py b/mingpt/bpe.py index b8468ef9..fa8face2 100644 --- a/mingpt/bpe.py +++ b/mingpt/bpe.py @@ -118,13 +118,13 @@ def bpe(self, token): break # no more bigrams are eligible to be merged first, second = bigram - # we will now replace all occurences of (first, second) in the list of current + # we will now replace all occurrence of (first, second) in the list of current # words into one merged token first_second, in the output list new_words new_word = [] i = 0 while i < len(word): - # find the next occurence of first in the sequence of current words + # find the next occurrence of first in the sequence of current words try: j = word.index(first, i) new_word.extend(word[i:j]) @@ -133,7 +133,7 @@ def bpe(self, token): new_word.extend(word[i:]) break - # if this occurence is also followed by second, then merge them into one + # if this occurrence is also followed by second, then merge them into one if word[i] == first and i < len(word)-1 and word[i+1] == second: new_word.append(first+second) i += 2 @@ -141,7 +141,7 @@ def bpe(self, token): new_word.append(word[i]) i += 1 - # all occurences of (first, second) have been merged to first_second + # all occurrence of (first, second) have been merged to first_second new_word = tuple(new_word) word = new_word if len(word) == 1: