diff --git a/source/embed.py b/source/embed.py index c775ffce..b0a37221 100644 --- a/source/embed.py +++ b/source/embed.py @@ -364,15 +364,21 @@ def EmbedMmap(fname, dim=1024, dtype=np.float32, verbose=False): with tempfile.TemporaryDirectory() as tmpdir: ifname = '' # stdin will be used + tok_fname = os.path.join(tmpdir, 'tok') + if args.token_lang != '--': - tok_fname = os.path.join(tmpdir, 'tok') + Token(ifname, tok_fname, lang=args.token_lang, romanize=True if args.token_lang == 'el' else False, lower_case=True, gzip=False, verbose=args.verbose, over_write=False) - ifname = tok_fname + else: + with open(tok_fname, "w") as ofname: + for line in sys.stdin: + ofname.write(line) + ifname = tok_fname if args.bpe_codes: bpe_fname = os.path.join(tmpdir, 'bpe')