def from_file(vocab: str, **kwargs): vocab = WordPiece.read_file(vocab) return BertWordPieceTokenizer(vocab, **kwargs)
def load_vocab(self, vocab, merges): vocab, merges = tok_model.read_file(vocab, merges) self.tokenizer.model = tok_model(vocab, merges)