Exemplo n.º 1
0
        evaluator = WEREvaluator()
    elif args.evaluator == "cer":
        evaluator = CEREvaluator()
    else:
        raise RuntimeError("Unkonwn evaluation metric {}".format(
            args.evaluator))

    ref_corpus = read_data(args.ref_file)
    hyp_corpus = read_data(args.hyp_file)
    len_before = len(hyp_corpus)
    ref_corpus, hyp_corpus = zip(
        *filter(lambda x: NO_DECODING_ATTEMPTED not in x[1],
                zip(ref_corpus, hyp_corpus)))
    if len(ref_corpus) < len_before:
        print("> ignoring %s out of %s test sentences." %
              (len_before - len(ref_corpus), len_before))

    eval_score = evaluator.evaluate(ref_corpus, hyp_corpus)

    return eval_score


if __name__ == "__main__":

    parser = OptionParser()
    parser.add_task("evaluate", options)
    args = parser.args_from_command_line("evaluate", sys.argv[1:])

    score = xnmt_evaluate(args)
    print("{} Score = {}".format(args.evaluator, score))
Exemplo n.º 2
0
  # Perform decoding

  translator.set_train(False)
  with open(args.trg_file, 'wb') as fp:  # Saving the translated output to a trg file
    for src in src_corpus:
      if args.max_src_len is not None and len(src) > args.max_src_len:
        trg_sent = NO_DECODING_ATTEMPTED
      else:
        dy.renew_cg()
        token_string = translator.translate(src, search_strategy)
        trg_sent = output_generator.process(token_string)[0]

      if isinstance(trg_sent, unicode):
        trg_sent = trg_sent.encode('utf-8', errors='ignore')

      else:  # do bytestring -> unicode -> utf8 full circle, to ensure valid utf8
        #trg_sent = unicode(trg_sent, 'utf-8', errors='ignore').encode('utf-8', errors='ignore')
        trg_sent = trg_sent.decode('utf-8', errors='ignore').encode('utf-8', errors='ignore')

      fp.write(trg_sent + '\n')


if __name__ == "__main__":
  # Parse arguments
  parser = OptionParser()
  parser.add_task("decode", options)
  args = parser.args_from_command_line("decode", sys.argv[1:])
  # Load model
  xnmt_decode(args)

Exemplo n.º 3
0
                    self.model_serializer.save_to_file(self.args.model_file,
                                                       self.model_params,
                                                       self.model)
                else:
                    # otherwise: learning rate decay / early stopping
                    if self.args.lr_decay < 1.0:
                        self.learning_scale *= self.args.lr_decay
                        print('new learning rate: %s' %
                              (self.learning_scale * self.args.learning_rate))
                    if self.learning_scale * self.args.learning_rate < self.args.lr_threshold:
                        print('Early stopping')
                        self.early_stopping_reached = True

                self.trainer.update_epoch()
                self.translator.set_train(True)

        return math.exp(self.logger.epoch_loss / self.logger.epoch_words), \
               math.exp(self.logger.dev_loss / self.logger.dev_words)


if __name__ == "__main__":
    parser = OptionParser()
    parser.add_task("train", general_options + options)
    args = parser.args_from_command_line("train", sys.argv[1:])
    print("Starting xnmt-train:\nArguments: %r" % (args))

    xnmt_trainer = XnmtTrainer(args)

    while not xnmt_trainer.early_stopping_reached:
        xnmt_trainer.run_epoch()
Exemplo n.º 4
0
    argparser.add_argument("--dynet-gpu",
                           action='store_true',
                           help="use GPU acceleration")
    argparser.add_argument("--dynet-gpu-ids", type=int)
    argparser.add_argument("--generate-doc",
                           action='store_true',
                           help="Do not run, output documentation instead")
    argparser.add_argument("experiments_file")
    argparser.add_argument("experiment_name",
                           nargs='*',
                           help="Run only the specified experiments")
    argparser.set_defaults(generate_doc=False)
    args = argparser.parse_args()

    config_parser = OptionParser()
    config_parser.add_task("preproc", xnmt_preproc.options)
    config_parser.add_task("train", xnmt_train.options)
    config_parser.add_task("decode", xnmt_decode.options)
    config_parser.add_task("evaluate", xnmt_evaluate.options)

    # Tweak the options to make config files less repetitive:
    # - Delete evaluate:evaluator, replace with exp:eval_metrics
    # - Delete decode:hyp_file, evaluate:hyp_file, replace with exp:hyp_file
    # - Delete train:model, decode:model_file, replace with exp:model_file
    config_parser.remove_option("evaluate", "evaluator")
    config_parser.remove_option("decode", "trg_file")
    config_parser.remove_option("evaluate", "hyp_file")
    config_parser.remove_option("train", "model_file")
    config_parser.remove_option("decode", "model_file")

    experiment_options = [