Пример #1
0
 def __init__(self, bpe_dict, is_chinese):
     bpe_parser = subword_nmt.create_apply_bpe_parser()
     bpe_args = bpe_parser.parse_args(args=['-c', bpe_dict])
     self.bpe = subword_nmt.BPE(bpe_args.codes, bpe_args.merges,
                                bpe_args.separator, None,
                                bpe_args.glossaries)
     self.is_chinese = is_chinese
Пример #2
0
    def __init__(self, args, is_chinese):
        bpe_parser = subword_nmt.create_apply_bpe_parser()
        bpe_args = bpe_parser.parse_args(args=['-c', args.src_bpe_dict])
        self.bpe = subword_nmt.BPE(bpe_args.codes, bpe_args.merges,
                                   bpe_args.separator, None,
                                   bpe_args.glossaries)
        self.is_chinese = is_chinese

        self.src_vocab = Vocab.load_vocabulary(args.src_vocab_fpath,
                                               bos_token=args.special_token[0],
                                               eos_token=args.special_token[1],
                                               unk_token=args.special_token[2])

        self.trg_vocab = Vocab.load_vocabulary(args.trg_vocab_fpath,
                                               bos_token=args.special_token[0],
                                               eos_token=args.special_token[1],
                                               unk_token=args.special_token[2])

        args.src_vocab_size = len(self.src_vocab)
        args.trg_vocab_size = len(self.trg_vocab)
        self.args = args
Пример #3
0
    def __init__(self,
                 bpe_codes_fpath,
                 src_vocab_fpath,
                 trg_vocab_fpath,
                 special_token=["<s>", "<e>", "<unk>"]):
        bpe_parser = subword_nmt.create_apply_bpe_parser()
        bpe_args = bpe_parser.parse_args(args=['-c', bpe_codes_fpath])
        self.bpe = subword_nmt.BPE(bpe_args.codes, bpe_args.merges,
                                   bpe_args.separator, None,
                                   bpe_args.glossaries)

        self.src_vocab = Vocab.load_vocabulary(src_vocab_fpath,
                                               bos_token=special_token[0],
                                               eos_token=special_token[1],
                                               unk_token=special_token[2])

        self.trg_vocab = Vocab.load_vocabulary(trg_vocab_fpath,
                                               bos_token=special_token[0],
                                               eos_token=special_token[1],
                                               unk_token=special_token[2])

        self.src_vocab_size = len(self.src_vocab)
        self.trg_vocab_size = len(self.trg_vocab)