def _get_parser(): parser = ArgumentParser(description='translate.py') # import ipdb; ipdb.set_trace() opts.config_opts(parser) opts.translate_opts(parser) return parser
def _get_parser(): parser = ArgumentParser(description='translate.py') opts.config_opts(parser) opts.translate_opts(parser) print(parser.parse_args()) return parser
def _get_opt(self, language, method): parser = ArgumentParser(description='summarizer.py') if language == 'en' and method == 'bert': config_file = 'en_bert_transformer.yml' elif language == 'en' and method == 'conv': config_file = 'en_conv_transformer.yml' elif language == 'de' and method == 'bert': config_file = 'de_bert_transformer.yml' elif language == 'de' and method == 'conv': config_file = 'de_conv_transformer.yml' else: sys.stderr.write( f"Method '{method}' for language '{language}' is not supported." ) #Hack to load parser arguments prec_argv = sys.argv sys.argv = [sys.argv[0]] sys.argv.extend(['-config', 'config/' + config_file]) opts.config_opts(parser) opts.translate_opts(parser) opt = parser.parse_args() sys.argv = prec_argv return opt
def translate_file(input_filename, output_filename): parser = ArgumentParser(description='translation') opts.config_opts(parser) opts.translate_opts(parser) # print(opts) args = f'''-model m16_step_44000.pt -src source_products_16.txt -output op_16x_4400_50_10.txt -batch_size 128 -replace_unk -max_length 200 -verbose -beam_size 50 -n_best 10 -min_length 5''' opt = parser.parse_args(args) # print(opt.model) translator = build_translator(opt, report_score=True) src_shards = split_corpus(opt.src, opt.shard_size) tgt_shards = repeat(None) shard_pairs = zip(src_shards, tgt_shards) for i, (src_shard, tgt_shard) in enumerate(shard_pairs): scores, predictions = translator.translate(src=src_shard, tgt=tgt_shard, src_dir=opt.src_dir, batch_size=opt.batch_size, attn_debug=opt.attn_debug) return scores, predictions
def _get_parser(): parser = ArgumentParser(description='translate.py') opts.config_opts(parser) opts.translate_opts(parser) parser.add( '--model', '-model', dest='models', metavar='MODEL', nargs='+', type=str, default=[ "F:/Project/Python/selfProject/translate_NMT/transflate_NMT/data_step_100.pt" ], required=False, help="模型使用得训练文件") parser.add( '--src', '-src', required=False, default= "F:/Project/Python/selfProject/translate_NMT/transflate_NMT/data/src-test.txt", help="自己写的测试文件在哪里????") parser.add( '--output', '-output', default= 'F:/Project/Python/selfProject/translate_NMT/transflate_NMT/data/pred.txt', help="测试文件输出未知 改成自己得") return parser
def build_translator(model, fields, model_opt, beam_size=1, n_best=1): # model, fields, model_opt are all loaded from start of script model_opt.beta = -0.0 model_opt.coverage_penalty = "none" model_opt.length_penalty = "none" scorer = onmt.translate.GNMTGlobalScorer(alpha=0.0, beta=-0.0, cov_penalty="none", length_penalty="none") dummy_parser = argparse.ArgumentParser(description='translate.py') opts.translate_opts(dummy_parser) dummy_translate_opt = dummy_parser.parse_known_args( "-model dummy -src dummy".split())[0] dummy_translate_opt.beam_size = beam_size dummy_translate_opt.cuda = False translator = Translator(model, fields, beam_size, n_best, global_scorer=scorer, out_file=None, report_score=False, gpu=False, replace_unk=True) # translator.beam_size = beam_size # translator.n_best = n_best return translator
def _get_parser(): parser = ArgumentParser(description='translate.py') opts.config_opts(parser) opts.translate_opts(parser) opts.mmod_finetune_translate_opts(parser) return parser
def _get_parser(): parser = ArgumentParser(description='run_kp_eval.py') opts.config_opts(parser) opts.translate_opts(parser) return parser
def translate_file(input_filename, output_filename): parser = ArgumentParser(description='translation') opts.config_opts(parser) opts.translate_opts(parser) args = f'''-model Experiments/Checkpoints/retrosynthesis_augmented_medium/retrosynthesis_aug_medium_model_step_100000.pt -src MCTS_data/{input_filename}.txt -output MCTS_data/{output_filename}.txt -batch_size 128 -replace_unk -max_length 200 -verbose -beam_size 10 -n_best 10 -min_length 5 -gpu 0''' opt = parser.parse_args(args) translator = build_translator(opt, report_score=True) src_shards = split_corpus(opt.src, opt.shard_size) tgt_shards = repeat(None) shard_pairs = zip(src_shards, tgt_shards) for i, (src_shard, tgt_shard) in enumerate(shard_pairs): scores, predictions = translator.translate(src=src_shard, tgt=tgt_shard, src_dir=opt.src_dir, batch_size=opt.batch_size, attn_debug=opt.attn_debug) return scores, predictions
def _get_parser(): parser = ArgumentParser(description='RL_train.py') opts.config_opts(parser) # yida RL opts.model_opts(parser) # opts.train_opts(parser) opts.translate_opts(parser) return parser
def onmt_parser() -> ArgumentParser: """ Create the OpenNMT parser, adapted from OpenNMT-Py repo. """ parser = ArgumentParser(description='translate.py') opts.config_opts(parser) opts.translate_opts(parser) return parser
def configure_opt(self, description): # Uses OpenNMT's ArgumentParser class to create an object # That holds all the parameters needed to load the model parser = ArgumentParser(description='translation') opts.config_opts(parser) opts.translate_opts(parser) opt = {a.dest: a.default for a in parser._actions} opt.update(description['opt']) opt['models'] = [description['model']] opt = types.SimpleNamespace(**opt) return opt
def __init__(self): # おまじない parser = ArgumentParser() opts.config_opts(parser) opts.translate_opts(parser) self.opt = parser.parse_args() ArgumentParser.validate_translate_opts(self.opt) self.translator = build_translator(self.opt, report_score=True) # 単語分割用にMeCabを使用 self.mecab = MeCab.Tagger("-Owakati") self.mecab.parse("")
def load_model(self, path=None): if path is None or not os.path.exists( os.path.abspath(os.path.join(os.getcwd(), path))): print("No model present at the specified path : {}".format(path)) parser = ArgumentParser(description='translate.py') opts.config_opts(parser) opts.translate_opts(parser) opt = parser.parse_args(["--model", path]) self.model = build_translator(opt, report_score=True) return
def __init__(self): # コマンドラインで指定したオプションをもとにモデルを読み込む parser = ArgumentParser() opts.config_opts(parser) opts.translate_opts(parser) self.opt = parser.parse_args() ArgumentParser.validate_translate_opts(self.opt) self.translator = build_translator(self.opt, report_score=True) # 分かち書きのためにMeCabを使用 self.mecab = MeCab.Tagger("-Owakati") self.mecab.parse("") # 前回の応答を保存しておく辞書 self.prev_uttr_dict = {}
def __init__(self): # おまじない parser = ArgumentParser() opts.config_opts(parser) opts.translate_opts(parser) self.opt = parser.parse_args(args=[ "-model", "../models/model.pt", "-src", "None", "-replace_unk", "--beam_size", "10", "--min_length", "7", "--block_ngram_repeat", "2" ]) ArgumentParser.validate_translate_opts(self.opt) self.translator = build_translator(self.opt, report_score=True) # 単語分割用にMeCabを使用 self.mecab = MeCab.Tagger("-Owakati") self.mecab.parse("")
def _build_translator(args): """ Initializes a seq2seq translator model """ from onmt.utils.parse import ArgumentParser parser = ArgumentParser() import onmt.opts as opts opts.config_opts(parser) opts.translate_opts(parser) opt = parser.parse_args(args=args) ArgumentParser.validate_translate_opts(opt) from onmt.translate.translator import build_translator translator = build_translator(opt, report_score=False) return translator, opt
def trans(n_bset,content_src): parser = argparse.ArgumentParser() translate_opts(parser) opt = parser.parse_args() model_dir = os.path.join(os.path.dirname(__file__), 'available_models') opt.model=os.path.join(model_dir, 'zh2zh_model_2000000.pt') opt.n_best=n_bset opt.gpu=-1 opt.data_type='text' translator = build_translator(opt, report_score=True) result = translator.translate(src_path=opt.src, src_data_iter=content_src, tgt_path=opt.tgt, src_dir=opt.src_dir, batch_size=opt.batch_size, attn_debug=opt.attn_debug) return result
def __init__(self, gpu): parser = configargparse.ArgumentParser( description='translate.py', config_file_parser_class=configargparse.YAMLConfigFileParser, formatter_class=configargparse.ArgumentDefaultsHelpFormatter) opts.config_opts(parser) opts.add_md_help_argument(parser) opts.translate_opts(parser) os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" if len(gpu) > 1: print('do not try hacking') exit() os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu) sys.argv = ["python"] opt = parser.parse_args() opt.models = [ './modules/multi_summ/dataset_m2s2/korean_bert_8_single_new_economy_add_cls_sep_segment_eos_penalty_new_bloom_step_25000.pt' ] opt.segment = True opt.batch_size = 8 opt.beam_size = 10 opt.src = '.1' opt.output = '.1' opt.verbose = True opt.stepwise_penalty = True opt.coverage_penalty = 'sumarry' opt.beta = 5 opt.length_penalty = 'wu' opt.alpha = 0.9 opt.block_ngram_repeat = 3 opt.ignore_when_blocking = [".", "</t", "<t>", ",_", "%"] opt.max_length = 300 opt.min_length = 35 opt.gpu = 0 opt.segment = True logger = init_logger(opt.log_file) self.translator = build_translator(opt, report_score=True)
def onmt_translator_builder(my_opts, s0_model_path, logger=None): if logger is not None: logger.info('Building ONMT translator with model from ' + s0_model_path) else: print('Building ONMT translator with model from ' + s0_model_path) parser = ArgumentParser() opts.config_opts(parser) opts.translate_opts(parser) arglist = ['-model', s0_model_path] + opts_to_list(my_opts) print(arglist) opt = parser.parse_args(arglist) ArgumentParser.validate_translate_opts(opt) translator = build_translator(opt, report_score=True, logger=logger) if logger is not None: logger.info('Finished building ONMT translator.') else: print('Finished building ONMT translator.') return translator
def translate_trained_model(n_latent, data_path, output_dir, trained_model_path, use_segments=False, max_segments=10): parser = ArgumentParser() opts.config_opts(parser) opts.translate_opts(parser) opt = parser.parse_args('') for k, v in translate.DEFAULT_TRANSLATE_PARAMS.items(): vars(opt)[k] = v vars(opt)['models'] = [trained_model_path] src_path = '/'.join(data_path.split('/')[:-1]) + '/src-test.txt' vars(opt)['src'] = src_path output_path = '/'.join(output_dir.split('/')[:-2]) + '/preds' vars(opt)['output_dir'] = output_path vars(opt)['n_latent'] = n_latent vars(opt)['use_segments'] = use_segments vars(opt)['max_segments'] = max_segments translate.main(opt)
def translate_esan(input_eng, model): input_file = "corpus/input.txt" output_file = "corpus/output.txt" f = open(input_file, "w", encoding="UTF-8") f.write(input_eng) f.close() open(output_file, "w", encoding="UTF-8").close() parser = configargparse.ArgumentParser( description='translate.py', config_file_parser_class=configargparse.DefaultConfigFileParser, formatter_class=configargparse.ArgumentDefaultsHelpFormatter, add_env_var_help=True) opts.config_opts(parser) opts.translate_opts(parser) s = "-model " + model + " -src " + input_file + " -output " + output_file opt = parser.parse_args(s) main(opt) f = open(output_file, "r", encoding="Utf-8") content = "" if f.mode == 'r': content = f.read() f.close() return content
# else: # tgt_iter = None translator.translate(src_data_iter=src_iter, tgt_data_iter=tgt_iter, batch_size=opt.batch_size, out_file=out_file) out_file.close() if __name__ == "__main__": parser = configargparse.ArgumentParser( description='translate.py', config_file_parser_class=configargparse.YAMLConfigFileParser, formatter_class=configargparse.ArgumentDefaultsHelpFormatter) opts.config_opts(parser) opts.translate_opts(parser) opt = parser.parse_args() logger = init_logger(opt.log_file) logger.info("Input args: %r", opt) path = 'rein_model/rein_model_step' for i in range(0, 25000, 10): current_path = path + '_' + str(i) + '.pt' if os.path.exists(current_path): model_path = current_path opt.output = 'rein_data/rein.tran' + '_' + str(i) main(opt, model_path) else: continue
def __build_translator(self, model_addr, src_addr): parser = ArgumentParser() opts.config_opts(parser) opts.translate_opts(parser) opt = parser.parse_args(['-model', model_addr, '-src', src_addr]) return build_translator(opt, report_score=False)
def _get_parser(): parser = ArgumentParser(description='translate.py') parser.add_argument('--pivot_vocab', action='store', dest='pivot_vocab') opts.config_opts(parser) opts.translate_opts(parser) return parser
def _get_parser(): parser = ArgumentParser(description='kp_generate.py') opts.config_opts(parser) opts.translate_opts(parser) return parser
def _get_parser(): parser = ArgumentParser(description='translate_dynamic.py') opts.config_opts(parser) opts.translate_opts(parser, dynamic=True) return parser
def __init__(self, model_filename, cmdline_args): parser = argparse.ArgumentParser( description='translate.py', formatter_class=argparse.ArgumentDefaultsHelpFormatter) opts.add_md_help_argument(parser) opts.translate_opts(parser) opt = parser.parse_args(['-model', model_filename, '-src', ''] + (cmdline_args or [])) translator = make_translator(opt) model = translator.model fields = translator.fields tgt_vocab = fields["tgt"].vocab def encode_from_src(src): enc_states, memory_bank = model.encoder(src) return dict(enc_states=enc_states, memory_bank=memory_bank, src=src) @lru_cache(maxsize=32) def encode_text(in_text): text_preproc = fields['src'].preprocess(in_text) src, src_len = fields['src'].process([text_preproc], device=-1, train=False) src = src.unsqueeze(2) # not sure why return encode_from_src(src) @lru_cache(maxsize=32) def encode_img(image_idx): if isinstance(image_idx, str): image_idx = int(image_idx) src = Variable(torch.IntTensor([image_idx]), volatile=True) return encode_from_src(src) def encode(inp): if model.encoder.__class__.__name__ == 'VecsEncoder': return encode_img(inp) else: return encode_text(inp) @lru_cache(maxsize=128) def get_decoder_state(in_text, tokens_so_far): encoder_out = encode(in_text) enc_states = encoder_out['enc_states'] memory_bank = encoder_out['memory_bank'] src = encoder_out['src'] if len(tokens_so_far) == 0: return None, translator.model.decoder.init_decoder_state( src, memory_bank, enc_states) prev_out, prev_state = get_decoder_state(in_text, tokens_so_far[:-1]) tgt_in = Variable(torch.LongTensor( [tgt_vocab.stoi[tokens_so_far[-1]]]), volatile=True) # [tgt_len] tgt_in = tgt_in.unsqueeze(1) # [tgt_len x batch=1] tgt_in = tgt_in.unsqueeze(1) # [tgt_len x batch=1 x nfeats=1] # Prepare to call the decoder. Unfortunately the decoder mutates the state passed in! memory_bank = copy.deepcopy(memory_bank) assert isinstance(prev_state.hidden, tuple) prev_state.hidden = tuple(v.detach() for v in prev_state.hidden) prev_state = copy.deepcopy(prev_state) assert memory_bank.size()[1] == 1 dec_out, dec_states, attn = translator.model.decoder( tgt_in, memory_bank, prev_state) assert dec_out.shape[0] == 1 return dec_out[0], dec_states def generate_completions(in_text, tokens_so_far): tokens_so_far = [onmt.io.BOS_WORD] + tokens_so_far tokens_so_far = tuple(tokens_so_far) # Make it hashable dec_out, dec_states = get_decoder_state(in_text, tokens_so_far) logits = model.generator.forward(dec_out).data vocab = tgt_vocab.itos assert logits.shape[0] == 1 logits = logits[0] return logits, vocab def eval_logprobs(in_text, tokens, *, use_eos): encoder_out = encode(in_text) enc_states = encoder_out['enc_states'] memory_bank = encoder_out['memory_bank'] src = encoder_out['src'] tokens = [onmt.io.BOS_WORD] + tokens if use_eos: tokens = tokens + [onmt.io.EOS_WORD] decoder_state = model.decoder.init_decoder_state( src, memory_bank=memory_bank, encoder_final=enc_states) tgt = Variable( torch.LongTensor([tgt_vocab.stoi[tok] for tok in tokens ]).unsqueeze(1).unsqueeze(1)) dec_out, dec_states, attn = model.decoder(tgt[:-1], memory_bank, decoder_state) logits = model.generator(dec_out) return F.nll_loss(logits.squeeze(1), tgt[1:].squeeze(1).squeeze(1), reduce=False, size_average=False).data.numpy() self.model = model self.fields = fields self.translator = translator self.encode = encode self.get_decoder_state = get_decoder_state self.generate_completions = generate_completions self.eval_logprobs = eval_logprobs