示例#1
0
torch.manual_seed(config.seed); np.random.seed(config.seed)

# Initialize model
model = LanguageModel(config=config) #CTCModel(config=config)
print(model)

# Generate datasets
train_dataset, valid_dataset, test_dataset = get_text_datasets(config)

trainer = Trainer(model=model, config=config)
if restart: trainer.load_checkpoint()

# Train the final model
if train:
	for epoch in range(config.num_epochs):
		print("========= Epoch %d of %d =========" % (epoch+1, config.num_epochs))
		train_loss = trainer.train(train_dataset)
		model = model.cpu()
		valid_loss = trainer.test(valid_dataset, set="valid")
		if torch.cuda.is_available(): model = model.cuda()

		print("========= Results: epoch %d of %d =========" % (epoch+1, config.num_epochs))
		print("train loss: %.2f| valid loss: %.2f\n" % (train_loss, valid_loss) )

		trainer.save_checkpoint()

	trainer.load_best_model()
	test_loss = trainer.test(test_dataset, set="test")
	print("========= Test results =========")
	print("test loss: %.2f \n" % (test_WER, test_loss) )
trainer = Trainer(model=model, config=config)

if train:
    print("Training the controller...")
    trainer.train_controller(train_dataset)
    print("Done.")
    for epoch in range(config.num_epochs):
        print("========= Epoch %d of %d =========" %
              (epoch + 1, config.num_epochs))
        train_WER, train_loss, train_FLOPs_mean, train_FLOPs_std = trainer.train(
            train_dataset)
        if epoch % config.validation_period == 0:
            model.sample_based_on_surprisal_during_testing = False
            valid_WER_random, valid_loss_random, valid_FLOPs_mean_random, valid_FLOPs_std_random = trainer.test(
                valid_dataset, set="valid")
            trainer.save_checkpoint(WER=valid_WER_random,
                                    sampling_method="random")

            model.sample_based_on_surprisal_during_testing = True
            valid_WER_surprisal, valid_loss_surprisal, valid_FLOPs_mean_surprisal, valid_FLOPs_std_surprisal = trainer.test(
                valid_dataset, set="valid")
            trainer.save_checkpoint(WER=valid_WER_surprisal,
                                    sampling_method="surprisal")
        print("========= Results: epoch %d of %d =========" %
              (epoch + 1, config.num_epochs))
        print("train WER: %.2f| train loss: %.2f| train FLOPs: %d" %
              (train_WER * 100, train_loss, train_FLOPs_mean))
        print(
            "valid WER: %.2f| valid loss: %.2f| valid FLOPs: %d (random sampling)"
            % (valid_WER_random * 100, valid_loss_random,
               valid_FLOPs_mean_random))
        print(
示例#3
0
文件: main.py 项目: lorenlugosch/g2p
    for epoch in range(config.num_epochs):
        print("========= Epoch %d of %d =========" %
              (epoch + 1, config.num_epochs))
        train_WER, train_loss = trainer.train(train_dataset)
        if epoch % config.validation_period == 0:
            model = model.cpu()
            valid_WER, valid_loss = trainer.test(valid_dataset, set="valid")
            if torch.cuda.is_available(): model = model.cuda()

        print("========= Results: epoch %d of %d =========" %
              (epoch + 1, config.num_epochs))
        print(
            "train WER: %.2f| train loss: %.2f| valid WER: %.2f| valid loss: %.2f\n"
            % (train_WER, train_loss, valid_WER, valid_loss))

        trainer.save_checkpoint(WER=valid_WER)

    trainer.load_best_model()
    test_WER, test_loss = trainer.test(test_dataset, set="test")
    print("========= Test results =========")
    print("test WER: %.2f| test loss: %.2f \n" % (test_WER, test_loss))


def G2P(word):
    x = train_dataset.word_tokenizer.EncodeAsIds(word)
    T = [len(x)]
    x = torch.tensor(x).unsqueeze(0).long()
    y = model.infer(x, T)
    phonemes = train_dataset.phoneme_tokenizer.DecodeIds(y[0])
    return phonemes
示例#4
0
import torch
from models import HMM
from data import get_datasets, read_config
from training import Trainer

# Generate datasets from text file
path = "data"
N = 128
config = read_config(N,path)
train_dataset, valid_dataset = get_datasets(config)
checkpoint_path = "."

# Initialize model
model = HMM(config=config)

# Train the model
num_epochs = 10
trainer = Trainer(model, config, lr=0.003)
trainer.load_checkpoint(checkpoint_path)

for epoch in range(num_epochs):
	print("========= Epoch %d of %d =========" % (epoch+1, num_epochs))
	train_loss = trainer.train(train_dataset)
	valid_loss = trainer.test(valid_dataset)
	trainer.save_checkpoint(epoch, checkpoint_path)

	print("========= Results: epoch %d of %d =========" % (epoch+1, num_epochs))
	print("train loss: %.2f| valid loss: %.2f\n" % (train_loss, valid_loss) )