示例#1
0
def do_training(arguments, vocab):
    logging.debug("Init training")
    n_epochs = arguments.epochs
    batch_size = arguments.batch_size

    # prep data
    logging.info(">> Loading in data")

    logging.info("tokenizing train data ...")
    training_data = vocab.tokenize_conll(arguments.train)
    logging.info("... tokenized train data")

    if arguments.dev_mode:
        training_data = training_data[:100]

    logging.info("tokenizing dev data ...")
    dev_data = vocab.tokenize_conll(arguments.dev)
    logging.info("... tokenized dev data")

    # instantiate model
    logging.info("creating model ...")
    model = DependencyParser(vocab, arguments.upos_dim, arguments.word_dim,
                             arguments.hidden_dim)
    logging.info("... model created")

    callbacks = []
    tensorboard_logger = None
    if arguments.tb_dest:
        tensorboard_logger = TensorboardLoggerCallback(arguments.tb_dest)
        callbacks.append(tensorboard_logger)

    logging.info("creating ModelSaveCallback ...")
    save_callback = ModelSaveCallback(arguments.model_file)
    callbacks.append(save_callback)
    logging.info("... ModelSaveCallback created")

    # prep params
    logging.info("creating Model ...")
    parser = ParserModel(model,
                         decoder="eisner",
                         loss="kiperwasser",
                         optimizer="adam",
                         strategy="bucket",
                         vocab=vocab)
    logging.info("... Model created")

    logging.info("training Model ...")
    parser.train(training_data,
                 arguments.dev,
                 dev_data,
                 epochs=n_epochs,
                 batch_size=batch_size,
                 callbacks=callbacks,
                 patience=arguments.patience)
    logging.info("...Model trained")

    logging.info("Model maxed on dev at epoch %s " %
                 (save_callback.best_epoch))

    return parser
示例#2
0
model = DependencyParser(vocab)

save_callback = ModelSaveCallback(arguments.model)

# prep params
parser = ParserModel(model,
                     decoder=arguments.decoder,
                     loss="hinge",
                     optimizer="adam",
                     strategy="bucket",
                     vocab=vocab)

parser.train(training_data,
             arguments.dev,
             dev_data,
             epochs=arguments.epochs,
             batch_size=arguments.batch_size,
             callbacks=[save_callback])

# load best model
model.load_from_file(arguments.model)

metrics = parser.parse_and_evaluate(arguments.test,
                                    test_data,
                                    batch_size=arguments.batch_size)
test_UAS = metrics["nopunct_uas"]
test_LAS = metrics["nopunct_las"]

print(metrics)
示例#3
0
# instantiate model
model = DependencyParser(vocab, embs, arguments.no_update_pretrained_emb)

callbacks = []
tensorboard_logger = None
if arguments.tb_dest:
    tensorboard_logger = TensorboardLoggerCallback(arguments.tb_dest)
    callbacks.append(tensorboard_logger)


save_callback = ModelSaveCallback(arguments.model_dest)
callbacks.append(save_callback)

# prep params
parser = ParserModel(model, decoder="eisner", loss="kiperwasser", optimizer="adam", strategy="bucket", vocab=vocab)
parser.train(training_data, arguments.dev, dev_data, epochs=n_epochs, batch_size=32, callbacks=callbacks, patience=arguments.patience)
parser.load_from_file(arguments.model_dest)

metrics = parser.parse_and_evaluate(arguments.test, test_data, batch_size=32)
test_UAS = metrics["nopunct_uas"]
test_LAS = metrics["nopunct_las"]

print(metrics)

if arguments.tb_dest and tensorboard_logger:
    tensorboard_logger.raw_write("test_UAS", test_UAS)
    tensorboard_logger.raw_write("test_LAS", test_LAS)

print()
print(">>> Model maxed on dev at epoch", save_callback.best_epoch)
print(">>> Test score:", test_UAS, test_LAS)
示例#4
0
    callbacks = [custom_learning_update_callback, save_callback]

parser = ParserModel(model,
                     decoder="cle",
                     loss="crossentropy",
                     optimizer=optimizer,
                     strategy="scaled_batch",
                     vocab=vocab)
""" Prep data """
training_data = vocab.tokenize_conll(arguments.train)
dev_data = vocab.tokenize_conll(arguments.dev)
test_data = vocab.tokenize_conll(arguments.test)

parser.train(training_data,
             dev_file,
             dev_data,
             epochs=n_epochs,
             batch_size=batch_scale,
             callbacks=callbacks)

parser.load_from_file(model_destination)

metrics = parser.parse_and_evaluate(arguments.test,
                                    test_data,
                                    batch_size=batch_scale)
test_UAS = metrics["nopunct_uas"]
test_LAS = metrics["nopunct_las"]

tensorboard_logger.raw_write("test_UAS", test_UAS)
tensorboard_logger.raw_write("test_LAS", test_LAS)

print()