def test_train_dev_loss_equal(self): layer_dim = 512 batcher = SrcBatcher(batch_size=5, break_ties_randomly=False) train_args = {} train_args['src_file'] = "examples/data/head.ja" train_args['trg_file'] = "examples/data/head.en" train_args['loss_calculator'] = AutoRegressiveMLELoss() train_args['model'] = DefaultTranslator(src_reader=PlainTextReader(), trg_reader=PlainTextReader(), src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), decoder=AutoRegressiveDecoder(input_dim=layer_dim, trg_embed_dim=layer_dim, rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim*2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) train_args['dev_tasks'] = [LossEvalTask(model=train_args['model'], src_file="examples/data/head.ja", ref_file="examples/data/head.en", batcher=batcher)] train_args['trainer'] = DummyTrainer() train_args['batcher'] = batcher train_args['run_for_epochs'] = 1 training_regimen = xnmt.training_regimen.SimpleTrainingRegimen(**train_args) training_regimen.run_training(save_fct = lambda: None) self.assertAlmostEqual(training_regimen.train_loss_tracker.epoch_loss.sum_factors() / training_regimen.train_loss_tracker.epoch_words, training_regimen.dev_loss_tracker.dev_score.loss, places=5)
def test_overfitting(self): layer_dim = 16 batcher = SrcBatcher(batch_size=10, break_ties_randomly=False) train_args = {} train_args['src_file'] = "examples/data/head.ja" train_args['trg_file'] = "examples/data/head.en" train_args['loss_calculator'] = MLELoss() train_args['model'] = DefaultTranslator( src_reader=PlainTextReader(), trg_reader=PlainTextReader(), src_embedder=SimpleWordEmbedder(vocab_size=100, emb_dim=layer_dim), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), trg_embedder=SimpleWordEmbedder(vocab_size=100, emb_dim=layer_dim), decoder=MlpSoftmaxDecoder(input_dim=layer_dim, trg_embed_dim=layer_dim, rnn_layer=UniLSTMSeqTransducer( input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn_layer"), mlp_layer=MLP( input_dim=layer_dim, hidden_dim=layer_dim, decoder_rnn_dim=layer_dim, vocab_size=100, yaml_path="model.decoder.rnn_layer"), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) train_args['dev_tasks'] = [ LossEvalTask(model=train_args['model'], src_file="examples/data/head.ja", ref_file="examples/data/head.en", batcher=batcher) ] train_args['run_for_epochs'] = 1 train_args['trainer'] = AdamTrainer(alpha=0.1) train_args['batcher'] = batcher training_regimen = xnmt.training_regimen.SimpleTrainingRegimen( **train_args) for _ in range(50): training_regimen.run_training(save_fct=lambda: None, update_weights=True) self.assertAlmostEqual( 0.0, training_regimen.train_loss_tracker.epoch_loss.sum() / training_regimen.train_loss_tracker.epoch_words, places=2)
def test_train_dev_loss_equal(self): layer_dim = 512 batcher = SrcBatcher(batch_size=5, break_ties_randomly=False) train_args = {} train_args['src_file'] = "examples/data/head.ja" train_args['trg_file'] = "examples/data/head.en" train_args['loss_calculator'] = LossCalculator() train_args['model'] = DefaultTranslator( src_reader=PlainTextReader(), trg_reader=PlainTextReader(), src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), decoder=MlpSoftmaxDecoder(input_dim=layer_dim, lstm_dim=layer_dim, mlp_hidden_dim=layer_dim, trg_embed_dim=layer_dim, vocab_size=100, bridge=CopyBridge(dec_layers=1, dec_dim=layer_dim)), ) train_args['dev_tasks'] = [ LossEvalTask(model=train_args['model'], src_file="examples/data/head.ja", ref_file="examples/data/head.en", batcher=batcher) ] train_args['trainer'] = None train_args['batcher'] = batcher train_args['run_for_epochs'] = 1 training_regimen = xnmt.training_regimen.SimpleTrainingRegimen( **train_args) training_regimen.run_training(save_fct=lambda: None, update_weights=False) self.assertAlmostEqual(training_regimen.logger.epoch_loss.sum() / training_regimen.logger.epoch_words, training_regimen.logger.dev_score.loss, places=5)
def test_overfitting(self): self.exp_global = ExpGlobal( dynet_param_collection=NonPersistentParamCollection(), dropout=0.0) self.exp_global.default_layer_dim = 16 batcher = SrcBatcher(batch_size=10, break_ties_randomly=False) train_args = {} train_args['src_file'] = "examples/data/head.ja" train_args['trg_file'] = "examples/data/head.en" train_args['loss_calculator'] = LossCalculator() train_args['model'] = DefaultTranslator( src_reader=PlainTextReader(), trg_reader=PlainTextReader(), src_embedder=SimpleWordEmbedder(self.exp_global, vocab_size=100), encoder=BiLSTMSeqTransducer(self.exp_global), attender=MlpAttender(self.exp_global), trg_embedder=SimpleWordEmbedder(self.exp_global, vocab_size=100), decoder=MlpSoftmaxDecoder(self.exp_global, vocab_size=100, bridge=CopyBridge( exp_global=self.exp_global, dec_layers=1)), ) train_args['dev_tasks'] = [ LossEvalTask(model=train_args['model'], src_file="examples/data/head.ja", ref_file="examples/data/head.en", batcher=batcher) ] train_args['run_for_epochs'] = 1 train_args['trainer'] = AdamTrainer(self.exp_global, alpha=0.1) train_args['batcher'] = batcher training_regimen = xnmt.training_regimen.SimpleTrainingRegimen( exp_global=self.exp_global, **train_args) training_regimen.exp_global = self.exp_global for _ in range(50): training_regimen.run_training(save_fct=lambda: None, update_weights=True) self.assertAlmostEqual(0.0, training_regimen.logger.epoch_loss.sum() / training_regimen.logger.epoch_words, places=2)
def test_overfitting(self): self.model_context = ModelContext() self.model_context.dynet_param_collection = NonPersistentParamCollection( ) self.model_context.default_layer_dim = 16 train_args = {} training_corpus = BilingualTrainingCorpus( train_src="examples/data/head.ja", train_trg="examples/data/head.en", dev_src="examples/data/head.ja", dev_trg="examples/data/head.en") train_args['corpus_parser'] = BilingualCorpusParser( training_corpus=training_corpus, src_reader=PlainTextReader(), trg_reader=PlainTextReader()) train_args['loss_calculator'] = LossCalculator() train_args['model'] = DefaultTranslator( src_embedder=SimpleWordEmbedder(self.model_context, vocab_size=100), encoder=BiLSTMSeqTransducer(self.model_context), attender=MlpAttender(self.model_context), trg_embedder=SimpleWordEmbedder(self.model_context, vocab_size=100), decoder=MlpSoftmaxDecoder(self.model_context, vocab_size=100), ) train_args['run_for_epochs'] = 1 train_args['trainer'] = AdamTrainer(self.model_context, alpha=0.1) train_args['batcher'] = SrcBatcher(batch_size=10, break_ties_randomly=False) training_regimen = xnmt.training_regimen.SimpleTrainingRegimen( yaml_context=self.model_context, **train_args) training_regimen.model_context = self.model_context for _ in range(50): training_regimen.run_training(update_weights=True) self.assertAlmostEqual( 0.0, training_regimen.logger.epoch_loss.loss_values['loss'] / training_regimen.logger.epoch_words, places=2)
def main(overwrite_args=None): argparser = argparse.ArgumentParser() argparser.add_argument("--dynet-mem", type=int) argparser.add_argument("--dynet-seed", type=int) argparser.add_argument("--dynet-autobatch", type=int) argparser.add_argument("--dynet-devices", type=str) argparser.add_argument("--dynet-viz", action='store_true', help="use visualization") argparser.add_argument("--dynet-gpu", action='store_true', help="use GPU acceleration") argparser.add_argument("--dynet-gpu-ids", type=int) argparser.add_argument("--dynet-gpus", type=int) argparser.add_argument("--dynet-weight-decay", type=float) argparser.add_argument("--dynet-profiling", type=int) argparser.add_argument("--generate-doc", action='store_true', help="Do not run, output documentation instead") argparser.add_argument("experiments_file") argparser.add_argument("experiment_name", nargs='*', help="Run only the specified experiments") argparser.set_defaults(generate_doc=False) args = argparser.parse_args(overwrite_args) config_parser = OptionParser() if args.generate_doc: print(config_parser.generate_options_table()) exit(0) if args.dynet_seed: random.seed(args.dynet_seed) np.random.seed(args.dynet_seed) config_experiment_names = config_parser.experiment_names_from_file(args.experiments_file) results = [] # Check ahead of time that all experiments exist, to avoid bad surprises experiment_names = args.experiment_name or config_experiment_names if args.experiment_name: nonexistent = set(experiment_names).difference(config_experiment_names) if len(nonexistent) != 0: raise Exception("Experiments {} do not exist".format(",".join(list(nonexistent)))) for experiment_name in experiment_names: exp_tasks = config_parser.parse_experiment(args.experiments_file, experiment_name) print("=> Running {}".format(experiment_name)) exp_args = exp_tasks.get("experiment", {}) model_file = exp_args.pop("model_file", "<EXP>.mod") hyp_file = exp_args.pop("hyp_file", "<EXP>.hyp") out_file = exp_args.pop("out_file", "<EXP>.out") err_file = exp_args.pop("err_file", "<EXP>.err") eval_only = exp_args.pop("eval_only", False) eval_metrics = exp_args.pop("eval_metrics", "bleu") save_num_checkpoints = exp_args.pop("save_num_checkpoints", 1) cfg_file = exp_args.pop("cfg_file", None) if len(exp_args)>0: raise ValueError("unsupported experiment arguments: {}".format(str(exp_args))) if cfg_file: shutil.copyfile(args.experiments_file, cfg_file) preproc_args = exp_tasks.get("preproc", {}) # Do preprocessing print("> Preprocessing") xnmt.xnmt_preproc.xnmt_preproc(**preproc_args) print("> Initializing TrainingRegimen") train_args = exp_tasks["train"] train_args.dynet_profiling = args.dynet_profiling model_context = ModelContext() model_context.dynet_param_collection = PersistentParamCollection(model_file, save_num_checkpoints) if hasattr(train_args, "glob"): for k in train_args.glob: setattr(model_context, k, train_args.glob[k]) train_args = YamlSerializer().initialize_if_needed(UninitializedYamlObject(train_args), model_context) inference = exp_tasks.get("inference", {}) inference.trg_file = hyp_file inference = YamlSerializer().initialize_if_needed(UninitializedYamlObject(inference), model_context) evaluate_args = exp_tasks.get("evaluate", {}) evaluate_args["hyp_file"] = hyp_file evaluators = map(lambda s: s.lower(), eval_metrics.split(",")) output = Tee(out_file, 3) err_output = Tee(err_file, 3, error=True) # Do training if "random_search_report" in exp_tasks: print("> instantiated random parameter search: %s" % exp_tasks["random_search_report"]) print("> Training") training_regimen = train_args eval_scores = "Not evaluated" if not eval_only: training_regimen.run_training() if not eval_only: print('reverting learned weights to best checkpoint..') training_regimen.yaml_context.dynet_param_collection.revert_to_best_model() if evaluators: print("> Evaluating test set") output.indent += 2 inference(training_regimen.corpus_parser, training_regimen.model, training_regimen.batcher) eval_scores = [] for evaluator in evaluators: evaluate_args["evaluator"] = evaluator eval_score = xnmt.xnmt_evaluate.xnmt_evaluate(**evaluate_args) print(eval_score) eval_scores.append(eval_score) output.indent -= 2 results.append((experiment_name, eval_scores)) output.close() err_output.close() print("") print("{:<30}|{:<40}".format("Experiment", " Final Scores")) print("-" * (70 + 1)) for line in results: experiment_name, eval_scores = line for i in range(len(eval_scores)): print("{:<30}| {:<40}".format((experiment_name if i==0 else ""), str(eval_scores[i])))