def do_training(arguments, vocab): logging.debug("Init training") n_epochs = arguments.epochs batch_size = arguments.batch_size # prep data logging.info(">> Loading in data") logging.info("tokenizing train data ...") training_data = vocab.tokenize_conll(arguments.train) logging.info("... tokenized train data") if arguments.dev_mode: training_data = training_data[:100] logging.info("tokenizing dev data ...") dev_data = vocab.tokenize_conll(arguments.dev) logging.info("... tokenized dev data") # instantiate model logging.info("creating model ...") model = DependencyParser(vocab, arguments.upos_dim, arguments.word_dim, arguments.hidden_dim) logging.info("... model created") callbacks = [] tensorboard_logger = None if arguments.tb_dest: tensorboard_logger = TensorboardLoggerCallback(arguments.tb_dest) callbacks.append(tensorboard_logger) logging.info("creating ModelSaveCallback ...") save_callback = ModelSaveCallback(arguments.model_file) callbacks.append(save_callback) logging.info("... ModelSaveCallback created") # prep params logging.info("creating Model ...") parser = ParserModel(model, decoder="eisner", loss="kiperwasser", optimizer="adam", strategy="bucket", vocab=vocab) logging.info("... Model created") logging.info("training Model ...") parser.train(training_data, arguments.dev, dev_data, epochs=n_epochs, batch_size=batch_size, callbacks=callbacks, patience=arguments.patience) logging.info("...Model trained") logging.info("Model maxed on dev at epoch %s " % (save_callback.best_epoch)) return parser
model = DependencyParser(vocab) save_callback = ModelSaveCallback(arguments.model) # prep params parser = ParserModel(model, decoder=arguments.decoder, loss="hinge", optimizer="adam", strategy="bucket", vocab=vocab) parser.train(training_data, arguments.dev, dev_data, epochs=arguments.epochs, batch_size=arguments.batch_size, callbacks=[save_callback]) # load best model model.load_from_file(arguments.model) metrics = parser.parse_and_evaluate(arguments.test, test_data, batch_size=arguments.batch_size) test_UAS = metrics["nopunct_uas"] test_LAS = metrics["nopunct_las"] print(metrics)
# instantiate model model = DependencyParser(vocab, embs, arguments.no_update_pretrained_emb) callbacks = [] tensorboard_logger = None if arguments.tb_dest: tensorboard_logger = TensorboardLoggerCallback(arguments.tb_dest) callbacks.append(tensorboard_logger) save_callback = ModelSaveCallback(arguments.model_dest) callbacks.append(save_callback) # prep params parser = ParserModel(model, decoder="eisner", loss="kiperwasser", optimizer="adam", strategy="bucket", vocab=vocab) parser.train(training_data, arguments.dev, dev_data, epochs=n_epochs, batch_size=32, callbacks=callbacks, patience=arguments.patience) parser.load_from_file(arguments.model_dest) metrics = parser.parse_and_evaluate(arguments.test, test_data, batch_size=32) test_UAS = metrics["nopunct_uas"] test_LAS = metrics["nopunct_las"] print(metrics) if arguments.tb_dest and tensorboard_logger: tensorboard_logger.raw_write("test_UAS", test_UAS) tensorboard_logger.raw_write("test_LAS", test_LAS) print() print(">>> Model maxed on dev at epoch", save_callback.best_epoch) print(">>> Test score:", test_UAS, test_LAS)
callbacks = [custom_learning_update_callback, save_callback] parser = ParserModel(model, decoder="cle", loss="crossentropy", optimizer=optimizer, strategy="scaled_batch", vocab=vocab) """ Prep data """ training_data = vocab.tokenize_conll(arguments.train) dev_data = vocab.tokenize_conll(arguments.dev) test_data = vocab.tokenize_conll(arguments.test) parser.train(training_data, dev_file, dev_data, epochs=n_epochs, batch_size=batch_scale, callbacks=callbacks) parser.load_from_file(model_destination) metrics = parser.parse_and_evaluate(arguments.test, test_data, batch_size=batch_scale) test_UAS = metrics["nopunct_uas"] test_LAS = metrics["nopunct_las"] tensorboard_logger.raw_write("test_UAS", test_UAS) tensorboard_logger.raw_write("test_LAS", test_LAS) print()