Пример #1
0
def run_translate(verbose, translate_flags=tuple()):
  t0 = time()

  #translate_flags = ['--keep-unreachable-facts']
  #translate_flags = ['--negative-axioms']

  fd_root = get_fd_root()
  translate_path = os.path.join(fd_root, FD_BIN, TRANSLATE_DIR)
  if translate_path not in sys.path:
    sys.path.append(translate_path)

  temp_argv = sys.argv[:]
  sys.argv = sys.argv[:1] + list(translate_flags) + [DOMAIN_PATH, PROBLEM_PATH]
  import translate # NOTE - no longer causes any overhead
  sys.argv = temp_argv

  if verbose:
    print SEPARATOR
    translate.main()
    print
    print DOMAIN_PATH
    print PROBLEM_PATH
    print 'from translate import main', time() - t0
  else:
    with open(os.devnull, 'w') as devnull:
      old_stdout = sys.stdout
      sys.stdout = devnull
      try:
        translate.main()
      finally:
        sys.stdout = old_stdout
Пример #2
0
def run_translate(verbose, translate_flags=tuple()):
    t0 = time()

    fd_root = get_fd_root()
    translate_path = os.path.join(fd_root, FD_BIN, TRANSLATE_DIR)
    if translate_path not in sys.path:
        sys.path.append(translate_path)

    temp_argv = sys.argv[:]
    sys.argv = sys.argv[:1] + \
        list(translate_flags) + [DOMAIN_PATH, PROBLEM_PATH]
    import translate
    sys.argv = temp_argv

    if verbose:
        print SEPARATOR
        translate.main()
        print
        print 'from translate import main', time() - t0
    else:
        with open(os.devnull, 'w') as devnull:
            old_stdout = sys.stdout
            sys.stdout = devnull
            try:
                translate.main()
            finally:
                sys.stdout = old_stdout
Пример #3
0
    def run(self, *args, **kwargs):
        if self.translate is not None:
            # subprocess.check_call(["../translate.sh", "translate"])
            if IN_PYTHON3:
                translate.main(xcheck=False, snudown=SCRIPT_DIR)
            else:
                subprocess.check_call(
                    ["python3", "../translate.py", "translate", SCRIPT_DIR])
            extensions.append(self.build_extension())

        if self.rust_crosschecks is not None:
            # subprocess.check_call(["../translate.sh", "rustcheck"])
            if IN_PYTHON3:
                translate.main(xcheck=True, snudown=SCRIPT_DIR)
            else:
                subprocess.check_call(
                    ["python3", "../translate.py", "rustcheck", SCRIPT_DIR])
            extensions.append(self.build_extension())

        if self.clang_crosschecks is not None:
            # subprocess.check_call(["../translate.sh"])
            if IN_PYTHON3:
                translate.generate_html_entries_header(snudown=SCRIPT_DIR)
            else:
                subprocess.check_call([
                    "python3", "../translate.py", "html_entities", SCRIPT_DIR
                ])
            extensions.append(self.build_extension())

        distutils.command.build.build.run(self, *args, **kwargs)
Пример #4
0
def main():
    if len(sys.argv) < 2:
        print sys.argv[0] + " start|stop|restart "
        sys.exit(1)
    cmd = sys.argv[1]
    context = daemon.DaemonContext(pidfile=PIDLockFile('/tmp/translate.pid'), 
                                   working_directory='/tmp')
    
    if cmd == "start":
        with context:
            translate.main()
            
    elif cmd == "stop":
        context.close()
        
    elif cmd == "restart":
        print "todo: implement"

    else:
        print "start, stop, restart"
Пример #5
0
 def run_opennmt_translate(self, pointer_generator_folder):
     translate_args = list()
     translate_args.append(
         {"model": os.path.join("models", self.lang, "model_dsa_60_0_20K_2l_step_18000.pt"),
          "src_file": os.path.join(pointer_generator_folder, "src_sequences.txt"),
          "output_file": os.path.join(pointer_generator_folder, "predicted_by_model_1.txt"),
          "all_concepts_lowercase": False,
          "do_fix_sequences": True
          })
     translate_args.append(
         {"model": os.path.join("models", self.lang, "model_dsa_dbpedia_spotlight10_100K_3l_step_80000.pt"),
          "src_file": os.path.join(pointer_generator_folder, "src_sequences.txt"),
          "output_file": os.path.join(pointer_generator_folder, "predicted_by_model_2.txt"),
          "all_concepts_lowercase": False,
          "do_fix_sequences": True
          })
     for params in translate_args:
         opt = self.parser.parse_args(["-model", params["model"],
                                       "-src", params["src_file"],
                                       "-output", params["output_file"],
                                       "-replace_unk"])  # "-gpu", 0
         translate.main(opt)
     return translate_args
Пример #6
0
def translate_trained_model(n_latent,
                            data_path,
                            output_dir,
                            trained_model_path,
                            use_segments=False,
                            max_segments=10):
    parser = ArgumentParser()
    opts.config_opts(parser)
    opts.translate_opts(parser)
    opt = parser.parse_args('')

    for k, v in translate.DEFAULT_TRANSLATE_PARAMS.items():
        vars(opt)[k] = v

    vars(opt)['models'] = [trained_model_path]
    src_path = '/'.join(data_path.split('/')[:-1]) + '/src-test.txt'
    vars(opt)['src'] = src_path
    output_path = '/'.join(output_dir.split('/')[:-2]) + '/preds'
    vars(opt)['output_dir'] = output_path
    vars(opt)['n_latent'] = n_latent
    vars(opt)['use_segments'] = use_segments
    vars(opt)['max_segments'] = max_segments
    translate.main(opt)
Пример #7
0
def run_translate(verbose, temp_dir, use_negative=False):
    t0 = time()
    translate_path = os.path.join(get_fd_root(), FD_BIN, TRANSLATE_DIR)
    if translate_path not in sys.path:
        sys.path.append(translate_path)

    if use_negative and ('modified' in get_fd_root()):
        translate_flags = ['--negative-axioms']
    else:
        translate_flags = []

    temp_argv = sys.argv[:]

    sys.argv = sys.argv[:1] + translate_flags + [DOMAIN_INPUT, PROBLEM_INPUT]
    import translate
    sys.argv = temp_argv

    old_cwd = os.getcwd()
    tmp_cwd = os.path.join(old_cwd, temp_dir)
    if verbose:
        print '\nTranslate command: import translate; translate.main()'
        os.chdir(tmp_cwd)
        translate.main()
        os.chdir(old_cwd)
        print 'Translate runtime:', time() - t0
        return

    with open(os.devnull, 'w') as devnull:
        old_stdout = sys.stdout
        sys.stdout = devnull
        os.chdir(tmp_cwd)
        try:
            translate.main()
        finally:
            sys.stdout = old_stdout
            os.chdir(old_cwd)
Пример #8
0
def main(jobId, status, imgFile, transTo='en'):

	if status == "failure":
		return (False, "character recognition api", "failure")

	words = recognition.main(jobId)
	if len(words) == 0:
		return (False, "character recognition api", "characters don't exist")

	access_token = translate.getAccessToken()
	if access_token == None:
		return (False, "translation api", "can't get access token")

	transWords = translate.main(access_token, words, _to=transTo)
	if transWords == None:
		return (False, "translation api", "can't translate words")

	return (True, transWords, imgFile)
Пример #9
0
def make_summary(queue, keyword, article_list):

    # 소주제별 추출 문장 출력
    #    print("INPUT TEXT : " + str(morphemized_article))

    make_tmp_input(keyword, article_list)

    opt = make_tmp_opt(keyword)

    # 밑에 소스랑 갈아끼우면 됨
    attns_info, oov_info, copy_info, raw_attns_info = translate.main(opt)

    # 2018/11/22
    # GPU memroy flush
    #print("paraent pid", os.getpid())
    #ctx = mp.get_context('spawn')
    #queue = ctx.Queue()
    #p = ctx.Process(target=translate.sub_main, args=(queue, opt))
    #p.start()

    #p.join(3) # timeout 안 설정하면 안끝남

    #attns_info, oov_info, copy_info, raw_attns_info = queue.get()
    #    print("exit sub process")
    #

    res = read_result(keyword)

    # 입력에 대한 모델 결과(요약 출력)
    #    print("OUTPUT TEXT : " + str(res))

    json_list = []
    for i in range(len(raw_attns_info)):
        json_obj = make_demo_attn_info(article_list[i], res[i],
                                       raw_attns_info[i],
                                       json_path.format(keyword), i)
        json_list.append(json_obj)

    gen_abs_list = [dic["gen_abstract"] for dic in json_list]
    normal_word_gen_abs_list = m_to_n_convert.convert(gen_abs_list, keyword)

    #    print("DEMO JSON : " + str(json_obj))
    queue.put((normal_word_gen_abs_list, json_list))
Пример #10
0
def main():
    data = "../../data_%s/%s/%s-train.pt" % (opt.task, opt.data, opt.data)
    print("Loading data from '%s'" % data)
    if opt.label_smooth:
        assert opt.num_rb_bin == 2
    dataset = torch.load(data)
    if opt.separate_threshold:
        print dataset["src_threshold"]
        print dataset["tgt_threshold"]
        threshold = {
            "src": dataset["src_threshold"][opt.num_rb_bin],
            "tgt": dataset["tgt_threshold"][opt.num_rb_bin]
        }
    else:
        if opt.num_rb_bin > 0:
            single_threshold = dataset['all_threshold'][opt.num_rb_bin]
        else:
            single_threshold = [0]
        threshold = {"src": single_threshold, "tgt": single_threshold}
    print threshold
    dicts = dataset['dicts']
    ori_datasets = copy.deepcopy(dataset)
    if opt.parallel_ratio is not None:
        parallel_len = l = int(
            len(dataset['train']['src']) * opt.parallel_ratio)
        dataset['train']['src'] = dataset['train']['src'][:l]
        print dataset['train']['src'][-1]
        dataset['train']['tgt'] = dataset['train']['tgt'][:l]
        dataset['train']['src_rb'] = dataset['train']['src_rb'][:l]
        dataset['train']['tgt_rb'] = dataset['train']['tgt_rb'][:l]
    else:
        parallel_len = None
    if opt.separate_encoder == 0:
        forward_data = onmt.BucketIterator(dataset['train']['src'],
                                           dataset['train']['tgt'],
                                           dataset['train']['src_rb'],
                                           dataset['train']['tgt_rb'], opt,
                                           threshold)
        valid_data = onmt.BucketIterator(dataset['valid']['src'],
                                         dataset['valid']['tgt'],
                                         dataset['valid']['src_rb'],
                                         dataset['valid']['tgt_rb'], opt,
                                         threshold)
        valid_datas = [valid_data]
        valid_weight = [1.]
        valid_probability = [1.]
        train_datas = [forward_data]
        probability = [1.]
        weights = [1.]
        print len(forward_data)
    else:
        opt.filter_src_rb = 0
        forward_data = onmt.BucketIterator(dataset['train']['src'],
                                           dataset['train']['tgt'],
                                           dataset['train']['src_rb'],
                                           dataset['train']['tgt_rb'], opt,
                                           threshold)
        #print len(forward_data)
        valid_data = onmt.BucketIterator(dataset['valid']['src'],
                                         dataset['valid']['tgt'],
                                         dataset['valid']['src_rb'],
                                         dataset['valid']['tgt_rb'], opt,
                                         threshold)
        valid_datas = [valid_data]
        valid_weight = [1.]
        valid_probability = [1.]
        train_datas = [forward_data]
        probability = [1.]
        weights = [1.]

        opt.filter_src_rb = 1
        forward_data = onmt.BucketIterator(dataset['train']['src'],
                                           dataset['train']['tgt'],
                                           dataset['train']['src_rb'],
                                           dataset['train']['tgt_rb'], opt,
                                           threshold)
        valid_data = onmt.BucketIterator(dataset['valid']['src'],
                                         dataset['valid']['tgt'],
                                         dataset['valid']['src_rb'],
                                         dataset['valid']['tgt_rb'], opt,
                                         threshold)
        valid_datas += [valid_data]
        valid_weight += [1.]
        valid_probability += [1.]
        train_datas += [forward_data]
        probability += [1.]
        weights += [1.]
        opt.filter_src_rb = None

    if not opt.no_tgt_to_src:
        backwardData = onmt.BucketIterator(dataset['train_bi']['src'],
                                           dataset['train_bi']['tgt'],
                                           dataset['train_bi']['src_rb'],
                                           dataset['train_bi']['tgt_rb'], opt,
                                           threshold)
        train_datas.append(backwardData)
        weights.append(1.)
        probability = [0.5, 0.5]
    trainData = onmt.mixed_iterator(train_datas, probability)
    validData = onmt.mixed_iterator(valid_datas, valid_probability)

    print(' * vocabulary size. source = %d; target = %d' %
          (dicts['src'].size(), dicts['tgt'].size()))
    print(' * number of training sentences. %d' % len(dataset['train']['src']))
    print(' * maximum batch size. %d' % opt.batch_size)

    print('Building model...')

    if opt.train_from is None:
        decoder = onmt.Models.Decoder(opt,
                                      dicts['tgt'],
                                      attn_type=opt.attn_type)
        generator = nn.Sequential(nn.Linear(opt.rnn_size, dicts['tgt'].size()),
                                  nn.LogSoftmax())
        if opt.cuda > 1:
            generator = nn.DataParallel(generator, device_ids=opt.gpus)
        discriminator = onmt.Models.Discriminator(opt)
        if not opt.separate_encoder:
            encoder = onmt.Models.Encoder(opt, dicts['src'])
            models = [
                onmt.Models.NMTModel(encoder, decoder, generator,
                                     discriminator, opt)
            ]
        else:
            models = []
            for i in range(opt.num_rb_bin):
                encoder = onmt.Models.Encoder(opt, dicts['src'])
                models += [
                    onmt.Models.NMTModel(encoder, decoder, generator,
                                         discriminator, opt)
                ]
        optims = []
        for model_single in models:
            if opt.cuda > 1:
                model_single = nn.DataParallel(model_single,
                                               device_ids=opt.gpus)
            if opt.cuda:
                model_single.cuda()
            else:
                model_single.cpu()
            model_single.generator = generator
            for p in model_single.get_seq2seq_parameters():
                p.data.uniform_(-opt.param_init, opt.param_init)
            for p in model_single.get_disc_parameters():
                if opt.non_linear == "relu":
                    opt.adv_para_init = 2. / opt.disc_size
                p.data.uniform_(-opt.adv_param_init, opt.adv_param_init)
            optim_single = onmt.Optim(
                model_single.parameters(),
                model_single.get_seq2seq_parameters(),
                model_single.get_disc_parameters(),
                model_single.get_encoder_parameters(),
                opt.optim,
                opt.learning_rate,
                opt.max_grad_norm,
                lr_decay=opt.learning_rate_decay,
                start_decay_at=opt.start_decay_at,
                adam_momentum=opt.adam_momentum,
            )
            optims += [optim_single]
    else:
        print('Loading from checkpoint at %s' % opt.train_from)
        checkpoint = torch.load(opt.train_from)
        model_single = checkpoint['model']
        if opt.cuda:
            model_single.cuda()
        else:
            model_single.cpu()
        optim_single = checkpoint['optim']
        opt.start_epoch = checkpoint['epoch'] + 1

    nParams = sum([
        p.nelement() for model_single in models
        for p in model_single.parameters()
    ])
    print('* number of parameters: %d' % nParams)

    trainModel(models, trainData, validData, dataset, optims, dicts, weights,
               valid_weight, threshold)
    if opt.task == "MT":
        translate.main([
            "-task", opt.task, "-data", opt.data, "-model",
            "%s/model.pt" % exp_path, "-replace_unk", "-gpus",
            str(opt.gpus[0]), "-output",
            "%s/test_no_unk.txt" % exp_path, "-verbose"
        ])
        evaluate_file.main([
            "-task", opt.task, "-data", opt.data, "-outputs",
            "%s/test_no_unk.txt" % exp_path
        ])
    elif opt.task == "Multi-MT":
        for test_set in ["test"]:
            for language_pair in dataset["language_pairs"]:
                line = language_pair.split("-")
                S_lang = line[0]
                T_lang = line[1]
                print "test_set", test_set + "_" + language_pair
                if opt.filter_src_rb is None or opt.filter_src_rb == dataset[
                        "src_language_mapping"][S_lang]:
                    translate.main([
                        "-task", opt.task, "-data", opt.data, "-model",
                        "%s/model.pt" % exp_path, "-replace_unk", "-gpus",
                        str(opt.gpus[0]), "-output",
                        "%s/%s_%s_no_unk.txt" %
                        (exp_path, test_set, language_pair), "-verbose",
                        "-language_pair", language_pair, "-test_set", test_set,
                        "-bpe"
                    ])

                    evaluate_file.main([
                        "-task", opt.task, "-data", opt.data, "-outputs",
                        "%s/%s_%s_no_unk.txt" %
                        (exp_path, test_set, language_pair), "-language_pair",
                        language_pair, "-test_set", test_set
                    ])
                else:
                    print "BLEU  0.0, SARI   0.00, R1   0.00, R2   0.00, RL   0.00, FK_O   0.0, acc   0.00"
    else:
        for i in range(opt.num_rb_bin):
            translate.main([
                "-task", opt.task, "-data", opt.data, "-model",
                "%s/model.pt" % exp_path, "-replace_unk", "-gpus",
                str(opt.gpus[0]), "-output",
                "%s/test_no_unk.txt" % exp_path, "-verbose", "-tgt_rb_all",
                str(i)
            ])
            evaluate_file.main([
                "-task", opt.task, "-data", opt.data, "-outputs",
                "%s/test_no_unk.txt" % exp_path, "-single_rb",
                str(i)
            ])
            print "all rb", i
        translate.main([
            "-task", opt.task, "-data", opt.data, "-model",
            "%s/model.pt" % exp_path, "-replace_unk", "-gpus",
            str(opt.gpus[0]), "-output",
            "%s/test_no_unk.txt" % exp_path, "-verbose"
        ])
        evaluate_file.main([
            "-task", opt.task, "-data", opt.data, "-outputs",
            "%s/test_no_unk.txt" % exp_path
        ])
Пример #11
0
def translate_task(task_file):
    print('\nTranslating %s:' % get_task_name(task_file))
    sys.argv = [sys.argv[0], '--force-old-python', task_file]
    translate.main()
Пример #12
0
import translate

if __name__ == '__main__':
    translate.main()