def __init__(self, bpe_dict, is_chinese): bpe_parser = subword_nmt.create_apply_bpe_parser() bpe_args = bpe_parser.parse_args(args=['-c', bpe_dict]) self.bpe = subword_nmt.BPE(bpe_args.codes, bpe_args.merges, bpe_args.separator, None, bpe_args.glossaries) self.is_chinese = is_chinese
def __init__(self, args, is_chinese): bpe_parser = subword_nmt.create_apply_bpe_parser() bpe_args = bpe_parser.parse_args(args=['-c', args.src_bpe_dict]) self.bpe = subword_nmt.BPE(bpe_args.codes, bpe_args.merges, bpe_args.separator, None, bpe_args.glossaries) self.is_chinese = is_chinese self.src_vocab = Vocab.load_vocabulary(args.src_vocab_fpath, bos_token=args.special_token[0], eos_token=args.special_token[1], unk_token=args.special_token[2]) self.trg_vocab = Vocab.load_vocabulary(args.trg_vocab_fpath, bos_token=args.special_token[0], eos_token=args.special_token[1], unk_token=args.special_token[2]) args.src_vocab_size = len(self.src_vocab) args.trg_vocab_size = len(self.trg_vocab) self.args = args
def __init__(self, bpe_codes_fpath, src_vocab_fpath, trg_vocab_fpath, special_token=["<s>", "<e>", "<unk>"]): bpe_parser = subword_nmt.create_apply_bpe_parser() bpe_args = bpe_parser.parse_args(args=['-c', bpe_codes_fpath]) self.bpe = subword_nmt.BPE(bpe_args.codes, bpe_args.merges, bpe_args.separator, None, bpe_args.glossaries) self.src_vocab = Vocab.load_vocabulary(src_vocab_fpath, bos_token=special_token[0], eos_token=special_token[1], unk_token=special_token[2]) self.trg_vocab = Vocab.load_vocabulary(trg_vocab_fpath, bos_token=special_token[0], eos_token=special_token[1], unk_token=special_token[2]) self.src_vocab_size = len(self.src_vocab) self.trg_vocab_size = len(self.trg_vocab)