Exemplo n.º 1
0
 def test_train_dev_loss_equal(self):
   layer_dim = 512
   batcher = SrcBatcher(batch_size=5, break_ties_randomly=False)
   train_args = {}
   train_args['src_file'] = "examples/data/head.ja"
   train_args['trg_file'] = "examples/data/head.en"
   train_args['loss_calculator'] = AutoRegressiveMLELoss()
   train_args['model'] = DefaultTranslator(src_reader=PlainTextReader(),
                                           trg_reader=PlainTextReader(),
                                           src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
                                           encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim),
                                           attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim,
                                                                hidden_dim=layer_dim),
                                           trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
                                           decoder=AutoRegressiveDecoder(input_dim=layer_dim,
                                                                     trg_embed_dim=layer_dim,
                                                                     rnn=UniLSTMSeqTransducer(input_dim=layer_dim,
                                                                                                    hidden_dim=layer_dim,
                                                                                                    decoder_input_dim=layer_dim,
                                                                                                    yaml_path="model.decoder.rnn"),
                                                                     transform=NonLinear(input_dim=layer_dim*2, output_dim=layer_dim),
                                                                     scorer=Softmax(input_dim=layer_dim, vocab_size=100),
                                                                     bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)),
                                           )
   train_args['dev_tasks'] = [LossEvalTask(model=train_args['model'],
                                           src_file="examples/data/head.ja",
                                           ref_file="examples/data/head.en",
                                           batcher=batcher)]
   train_args['trainer'] = DummyTrainer()
   train_args['batcher'] = batcher
   train_args['run_for_epochs'] = 1
   training_regimen = xnmt.training_regimen.SimpleTrainingRegimen(**train_args)
   training_regimen.run_training(save_fct = lambda: None)
   self.assertAlmostEqual(training_regimen.train_loss_tracker.epoch_loss.sum_factors() / training_regimen.train_loss_tracker.epoch_words,
                          training_regimen.dev_loss_tracker.dev_score.loss, places=5)
Exemplo n.º 2
0
 def test_overfitting(self):
     layer_dim = 16
     batcher = SrcBatcher(batch_size=10, break_ties_randomly=False)
     train_args = {}
     train_args['src_file'] = "examples/data/head.ja"
     train_args['trg_file'] = "examples/data/head.en"
     train_args['loss_calculator'] = MLELoss()
     train_args['model'] = DefaultTranslator(
         src_reader=PlainTextReader(),
         trg_reader=PlainTextReader(),
         src_embedder=SimpleWordEmbedder(vocab_size=100, emb_dim=layer_dim),
         encoder=BiLSTMSeqTransducer(input_dim=layer_dim,
                                     hidden_dim=layer_dim),
         attender=MlpAttender(input_dim=layer_dim,
                              state_dim=layer_dim,
                              hidden_dim=layer_dim),
         trg_embedder=SimpleWordEmbedder(vocab_size=100, emb_dim=layer_dim),
         decoder=MlpSoftmaxDecoder(input_dim=layer_dim,
                                   trg_embed_dim=layer_dim,
                                   rnn_layer=UniLSTMSeqTransducer(
                                       input_dim=layer_dim,
                                       hidden_dim=layer_dim,
                                       decoder_input_dim=layer_dim,
                                       yaml_path="model.decoder.rnn_layer"),
                                   mlp_layer=MLP(
                                       input_dim=layer_dim,
                                       hidden_dim=layer_dim,
                                       decoder_rnn_dim=layer_dim,
                                       vocab_size=100,
                                       yaml_path="model.decoder.rnn_layer"),
                                   bridge=CopyBridge(dec_dim=layer_dim,
                                                     dec_layers=1)),
     )
     train_args['dev_tasks'] = [
         LossEvalTask(model=train_args['model'],
                      src_file="examples/data/head.ja",
                      ref_file="examples/data/head.en",
                      batcher=batcher)
     ]
     train_args['run_for_epochs'] = 1
     train_args['trainer'] = AdamTrainer(alpha=0.1)
     train_args['batcher'] = batcher
     training_regimen = xnmt.training_regimen.SimpleTrainingRegimen(
         **train_args)
     for _ in range(50):
         training_regimen.run_training(save_fct=lambda: None,
                                       update_weights=True)
     self.assertAlmostEqual(
         0.0,
         training_regimen.train_loss_tracker.epoch_loss.sum() /
         training_regimen.train_loss_tracker.epoch_words,
         places=2)
Exemplo n.º 3
0
 def test_train_dev_loss_equal(self):
     layer_dim = 512
     batcher = SrcBatcher(batch_size=5, break_ties_randomly=False)
     train_args = {}
     train_args['src_file'] = "examples/data/head.ja"
     train_args['trg_file'] = "examples/data/head.en"
     train_args['loss_calculator'] = LossCalculator()
     train_args['model'] = DefaultTranslator(
         src_reader=PlainTextReader(),
         trg_reader=PlainTextReader(),
         src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
         encoder=BiLSTMSeqTransducer(input_dim=layer_dim,
                                     hidden_dim=layer_dim),
         attender=MlpAttender(input_dim=layer_dim,
                              state_dim=layer_dim,
                              hidden_dim=layer_dim),
         trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
         decoder=MlpSoftmaxDecoder(input_dim=layer_dim,
                                   lstm_dim=layer_dim,
                                   mlp_hidden_dim=layer_dim,
                                   trg_embed_dim=layer_dim,
                                   vocab_size=100,
                                   bridge=CopyBridge(dec_layers=1,
                                                     dec_dim=layer_dim)),
     )
     train_args['dev_tasks'] = [
         LossEvalTask(model=train_args['model'],
                      src_file="examples/data/head.ja",
                      ref_file="examples/data/head.en",
                      batcher=batcher)
     ]
     train_args['trainer'] = None
     train_args['batcher'] = batcher
     train_args['run_for_epochs'] = 1
     training_regimen = xnmt.training_regimen.SimpleTrainingRegimen(
         **train_args)
     training_regimen.run_training(save_fct=lambda: None,
                                   update_weights=False)
     self.assertAlmostEqual(training_regimen.logger.epoch_loss.sum() /
                            training_regimen.logger.epoch_words,
                            training_regimen.logger.dev_score.loss,
                            places=5)
Exemplo n.º 4
0
 def test_overfitting(self):
     self.exp_global = ExpGlobal(
         dynet_param_collection=NonPersistentParamCollection(), dropout=0.0)
     self.exp_global.default_layer_dim = 16
     batcher = SrcBatcher(batch_size=10, break_ties_randomly=False)
     train_args = {}
     train_args['src_file'] = "examples/data/head.ja"
     train_args['trg_file'] = "examples/data/head.en"
     train_args['loss_calculator'] = LossCalculator()
     train_args['model'] = DefaultTranslator(
         src_reader=PlainTextReader(),
         trg_reader=PlainTextReader(),
         src_embedder=SimpleWordEmbedder(self.exp_global, vocab_size=100),
         encoder=BiLSTMSeqTransducer(self.exp_global),
         attender=MlpAttender(self.exp_global),
         trg_embedder=SimpleWordEmbedder(self.exp_global, vocab_size=100),
         decoder=MlpSoftmaxDecoder(self.exp_global,
                                   vocab_size=100,
                                   bridge=CopyBridge(
                                       exp_global=self.exp_global,
                                       dec_layers=1)),
     )
     train_args['dev_tasks'] = [
         LossEvalTask(model=train_args['model'],
                      src_file="examples/data/head.ja",
                      ref_file="examples/data/head.en",
                      batcher=batcher)
     ]
     train_args['run_for_epochs'] = 1
     train_args['trainer'] = AdamTrainer(self.exp_global, alpha=0.1)
     train_args['batcher'] = batcher
     training_regimen = xnmt.training_regimen.SimpleTrainingRegimen(
         exp_global=self.exp_global, **train_args)
     training_regimen.exp_global = self.exp_global
     for _ in range(50):
         training_regimen.run_training(save_fct=lambda: None,
                                       update_weights=True)
     self.assertAlmostEqual(0.0,
                            training_regimen.logger.epoch_loss.sum() /
                            training_regimen.logger.epoch_words,
                            places=2)
Exemplo n.º 5
0
 def test_overfitting(self):
     self.model_context = ModelContext()
     self.model_context.dynet_param_collection = NonPersistentParamCollection(
     )
     self.model_context.default_layer_dim = 16
     train_args = {}
     training_corpus = BilingualTrainingCorpus(
         train_src="examples/data/head.ja",
         train_trg="examples/data/head.en",
         dev_src="examples/data/head.ja",
         dev_trg="examples/data/head.en")
     train_args['corpus_parser'] = BilingualCorpusParser(
         training_corpus=training_corpus,
         src_reader=PlainTextReader(),
         trg_reader=PlainTextReader())
     train_args['loss_calculator'] = LossCalculator()
     train_args['model'] = DefaultTranslator(
         src_embedder=SimpleWordEmbedder(self.model_context,
                                         vocab_size=100),
         encoder=BiLSTMSeqTransducer(self.model_context),
         attender=MlpAttender(self.model_context),
         trg_embedder=SimpleWordEmbedder(self.model_context,
                                         vocab_size=100),
         decoder=MlpSoftmaxDecoder(self.model_context, vocab_size=100),
     )
     train_args['run_for_epochs'] = 1
     train_args['trainer'] = AdamTrainer(self.model_context, alpha=0.1)
     train_args['batcher'] = SrcBatcher(batch_size=10,
                                        break_ties_randomly=False)
     training_regimen = xnmt.training_regimen.SimpleTrainingRegimen(
         yaml_context=self.model_context, **train_args)
     training_regimen.model_context = self.model_context
     for _ in range(50):
         training_regimen.run_training(update_weights=True)
     self.assertAlmostEqual(
         0.0,
         training_regimen.logger.epoch_loss.loss_values['loss'] /
         training_regimen.logger.epoch_words,
         places=2)
Exemplo n.º 6
0
def main(overwrite_args=None):
  argparser = argparse.ArgumentParser()
  argparser.add_argument("--dynet-mem", type=int)
  argparser.add_argument("--dynet-seed", type=int)
  argparser.add_argument("--dynet-autobatch", type=int)
  argparser.add_argument("--dynet-devices", type=str)
  argparser.add_argument("--dynet-viz", action='store_true', help="use visualization")
  argparser.add_argument("--dynet-gpu", action='store_true', help="use GPU acceleration")
  argparser.add_argument("--dynet-gpu-ids", type=int)
  argparser.add_argument("--dynet-gpus", type=int)
  argparser.add_argument("--dynet-weight-decay", type=float)
  argparser.add_argument("--dynet-profiling", type=int)
  argparser.add_argument("--generate-doc", action='store_true', help="Do not run, output documentation instead")
  argparser.add_argument("experiments_file")
  argparser.add_argument("experiment_name", nargs='*', help="Run only the specified experiments")
  argparser.set_defaults(generate_doc=False)
  args = argparser.parse_args(overwrite_args)

  config_parser = OptionParser()

  if args.generate_doc:
    print(config_parser.generate_options_table())
    exit(0)

  if args.dynet_seed:
    random.seed(args.dynet_seed)
    np.random.seed(args.dynet_seed)

  config_experiment_names = config_parser.experiment_names_from_file(args.experiments_file)

  results = []

  # Check ahead of time that all experiments exist, to avoid bad surprises
  experiment_names = args.experiment_name or config_experiment_names

  if args.experiment_name:
    nonexistent = set(experiment_names).difference(config_experiment_names)
    if len(nonexistent) != 0:
      raise Exception("Experiments {} do not exist".format(",".join(list(nonexistent))))

  for experiment_name in experiment_names:
    exp_tasks = config_parser.parse_experiment(args.experiments_file, experiment_name)

    print("=> Running {}".format(experiment_name))
    
    exp_args = exp_tasks.get("experiment", {})
    model_file = exp_args.pop("model_file", "<EXP>.mod")
    hyp_file = exp_args.pop("hyp_file", "<EXP>.hyp")
    out_file = exp_args.pop("out_file", "<EXP>.out")
    err_file = exp_args.pop("err_file", "<EXP>.err")
    eval_only = exp_args.pop("eval_only", False)
    eval_metrics = exp_args.pop("eval_metrics", "bleu")
    save_num_checkpoints = exp_args.pop("save_num_checkpoints", 1)
    cfg_file = exp_args.pop("cfg_file", None)
    if len(exp_args)>0:
      raise ValueError("unsupported experiment arguments: {}".format(str(exp_args)))
    if cfg_file:
      shutil.copyfile(args.experiments_file, cfg_file)

    preproc_args = exp_tasks.get("preproc", {})
    # Do preprocessing
    print("> Preprocessing")
    xnmt.xnmt_preproc.xnmt_preproc(**preproc_args)

    print("> Initializing TrainingRegimen")
    train_args = exp_tasks["train"]
    train_args.dynet_profiling = args.dynet_profiling
    model_context = ModelContext()
    model_context.dynet_param_collection = PersistentParamCollection(model_file, save_num_checkpoints)
    if hasattr(train_args, "glob"):
      for k in train_args.glob:
        setattr(model_context, k, train_args.glob[k])
    train_args = YamlSerializer().initialize_if_needed(UninitializedYamlObject(train_args), model_context)
    
    inference = exp_tasks.get("inference", {})
    inference.trg_file = hyp_file
    inference = YamlSerializer().initialize_if_needed(UninitializedYamlObject(inference), model_context)

    evaluate_args = exp_tasks.get("evaluate", {})
    evaluate_args["hyp_file"] = hyp_file
    evaluators = map(lambda s: s.lower(), eval_metrics.split(","))

    output = Tee(out_file, 3)
    err_output = Tee(err_file, 3, error=True)

    # Do training
    if "random_search_report" in exp_tasks:
      print("> instantiated random parameter search: %s" % exp_tasks["random_search_report"])

    print("> Training")
    training_regimen = train_args

    eval_scores = "Not evaluated"
    if not eval_only:
      training_regimen.run_training()

    if not eval_only:
      print('reverting learned weights to best checkpoint..')
      training_regimen.yaml_context.dynet_param_collection.revert_to_best_model()
    if evaluators:
      print("> Evaluating test set")
      output.indent += 2
      inference(training_regimen.corpus_parser, training_regimen.model, training_regimen.batcher)
      eval_scores = []
      for evaluator in evaluators:
        evaluate_args["evaluator"] = evaluator
        eval_score = xnmt.xnmt_evaluate.xnmt_evaluate(**evaluate_args)
        print(eval_score)
        eval_scores.append(eval_score)
      output.indent -= 2

    results.append((experiment_name, eval_scores))

    output.close()
    err_output.close()

  print("")
  print("{:<30}|{:<40}".format("Experiment", " Final Scores"))
  print("-" * (70 + 1))

  for line in results:
    experiment_name, eval_scores = line
    for i in range(len(eval_scores)):
      print("{:<30}| {:<40}".format((experiment_name if i==0 else ""), str(eval_scores[i])))