示例#1
0
def load_and_cache_examples(args, tokenizer, evaluate=False):
    if args.not_pretrain:
        dataset = finetuneDataset(tokenizer,
                                  args,
                                  logger,
                                  file_type='dev' if evaluate else 'train',
                                  block_size=args.block_size)
    else:
        dataset = TextDataset(tokenizer,
                              args,
                              logger,
                              file_type='dev' if evaluate else 'train',
                              block_size=args.block_size)
    return dataset
示例#2
0
def generate(config):
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)  # fixme
    data_loader = DataLoader(dataset, config.batch_size, drop_last=True)

    # Initialize the model that we are going to use
    model = TextGenerationModel(batch_size=config.batch_size,
                                seq_length=config.seq_length,
                                vocabulary_size=86,
                                lstm_num_hidden=config.lstm_num_hidden,
                                lstm_num_layers=config.lstm_num_layers,
                                device=config.device).to(device)
    model.load_state_dict(torch.load(config.model))

    for l in ["In 1776 ", "Liberty is ", "Democracy is "]:
        char_id = torch.tensor([dataset._char_to_ix[ch]
                                for ch in l]).reshape(-1, 1).to(device)
        hidden = (torch.zeros(
            (config.lstm_num_layers, 1, config.lstm_num_hidden)).to(device),
                  torch.zeros((config.lstm_num_layers, 1,
                               config.lstm_num_hidden)).to(device))
        sequence = sample(model=model,
                          dataset=dataset,
                          init_seq=char_id,
                          init_hidden=hidden,
                          seq_length=200,
                          device=device,
                          temp=config.temp)
        print(
            dataset.convert_to_string(char_id.cpu().numpy().reshape(-1)) +
            sequence)
示例#3
0
def build_text_graph_dataset(dataset, window_size):
    if "small" in dataset or "presplit" in dataset or 'sentiment' in dataset:
        dataset_name = "_".join(dataset.split("_")[:-1])
    else:
        dataset_name = dataset
    clean_text_path = join(get_corpus_path(),
                           dataset_name + '_sentences_clean.txt')
    labels_path = join(get_corpus_path(), dataset_name + '_labels.txt')
    labels = pd.read_csv(labels_path, header=None, sep='\t')
    doc_list = []
    f = open(clean_text_path, 'rb')
    for line in f.readlines():
        doc_list.append(line.strip().decode())
    f.close()
    assert len(labels) == len(doc_list)
    if 'presplit' not in dataset:
        labels_list = labels.iloc[0:, 0].tolist()
        split_dict = None
    else:
        labels_list = labels.iloc[0:, 2].tolist()
        split = labels.iloc[0:, 1].tolist()
        split_dict = {}
        for i, v in enumerate(split):
            split_dict[i] = v
    if "small" in dataset:
        doc_list = doc_list[:200]
        labels_list = labels_list[:200]

    word_freq = get_vocab(doc_list)
    vocab = list(word_freq.keys())
    if not exists(join(get_corpus_path(), dataset + '_vocab.txt')):
        vocab_str = '\n'.join(vocab)
        f = open(join(get_corpus_path(), dataset + '_vocab.txt'), 'w')
        f.write(vocab_str)
        f.close()
    words_in_docs, word_doc_freq = build_word_doc_edges(doc_list)
    word_id_map = {word: i for i, word in enumerate(vocab)}

    sparse_graph = build_edges(doc_list, word_id_map, vocab, word_doc_freq,
                               window_size)
    docs_dict = {i: doc for i, doc in enumerate(doc_list)}
    return TextDataset(dataset,
                       sparse_graph,
                       labels_list,
                       vocab,
                       word_id_map,
                       docs_dict,
                       None,
                       train_test_split=split_dict)
示例#4
0
def construct_dictionary(data_train, data_val, data_test):
    if args.use_val:
        dataset = pd.concat([data_train, data_val], 0)
        if args.use_test:
            dataset = pd.concat([dataset, data_test], 0)
    else:
        dataset = data_train
    print("constructing doctionary...")
    dictionary = Dictionary()
    dictionary.word2idx, dictionary.idx2word = TextDataset.assign_word_ids(
        args.emsize, dataset)
    print("----processed {%d} word_2_id----" % (len(dictionary.word2idx)))
    with open(p.dict_path, 'wb') as f:
        pickle.dump(dictionary, f)
    return dictionary
示例#5
0
def get_prediction_slength(grouped):
    preds = []
    label_list = []
    slength_list = []
    # for every batch
    for name, group in grouped:
        s = np.sum(list(map(lambda x: len(x), group.tokens)))
        slength_list.append(s)
        tokens = TextDataset._text2idx(group.tokens, dictionary.word2idx)
        labels = np.array(group.label.values)
        tokens, labels = process_batch(tokens, labels)
        if config.pooling == 'attn':
            y_pred, _, _ = model.forward(tokens)
        else:
            y_pred = model.forward(tokens)
        _, y_pred = torch.max(y_pred, 1)
        preds.append(y_pred.item())
        label_list.append(labels[0].item())
    return preds, label_list, slength_list
def check_loss_and_accuracy(grouped):
    loss = []
    preds = []
    labels = []
    for name, group in grouped:
        tokens = TextDataset._text2idx(group.tokens, dictionary.word2idx)
        labels = np.array(group.label.values)
        tokens, labels = process_batch(tokens, labels)
        y_pred = model.forward(tokens)
        loss.append(loss.item())

        loss = criterion(y_pred.cuda(), labels[0])
        _, y_pred = torch.max(y_pred, 1)
        preds.append(np.ndarray.flatten(y_pred.data.cpu().numpy()))
        labels.append(np.ndarray.flatten(labels[0]))
    preds = np.array([item for sublist in preds for item in sublist])
    labels = np.array([item for sublist in labels for item in sublist])
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds)
    return np.mean(np.array(loss)), accuracy_score(
        labels, preds), precision, recall, f1, confusion_matrix(labels, preds)
def check_loss_and_accuracy(grouped, model, dictionary):
    preds = []
    label_list = []
    for name, group in grouped:
        tokens = TextDataset._text2idx(group.tokens, dictionary.word2idx)
        labels = np.array(group.label.values)
        tokens, labels = process_batch(tokens, labels)
        if config.pooling == 'attn':
            y_pred, _, _ = model.forward(tokens)
        elif config.pooling == 'ensem':
            y_pred = model.forward(tokens)

        labels = labels.view(labels.shape[0], -1)

        _, y_pred = torch.max(y_pred, 1)
        preds.append(y_pred.item())
        label_list.append(labels[0].item())
    preds = np.array(preds)
    label_list = np.array(label_list)
    precision, recall, f1, _ = precision_recall_fscore_support(label_list, preds)
    return accuracy_score(label_list, preds), precision, recall, f1, confusion_matrix(label_list, preds)
def main():
    args = parse_args()

    print('BATCH_SIZE: {}'.format(args.batch_size))
    print('SEQ_LENGTH: {}'.format(args.seq_length))
    print('EMBEDDING_DIM: {}'.format(args.embedding_dim))
    print('HIDDEN_DIM: {}'.format(args.hidden_dim))
    print('LR: {}'.format(args.lr))
    print('DROPOUT: {}'.format(args.dropout))
    print('EPOCHS: {}'.format(args.epochs))
    print('LOG_INTERVAL: {}'.format(args.log_interval))
    print('----------------------------')

    # Prepare data & split
    dataset = TextDataset(args.corpus, seq_length=args.seq_length)
    train_set_size = int(len(dataset) * 0.8)
    train_set, test_set = random_split(
        dataset,
        [train_set_size, len(dataset) - train_set_size])
    train_dataloader = DataLoader(train_set,
                                  batch_size=args.batch_size,
                                  shuffle=True)
    test_dataloader = DataLoader(test_set, batch_size=args.batch_size)

    # Create model & optimizer
    model = Net(len(dataset.chars),
                args.embedding_dim,
                args.hidden_dim,
                dropout=args.dropout)
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    # Train
    train(model, optimizer, train_dataloader, args)

    # Save model
    torch.save(model.state_dict(), args.output_model)

    # Test
    test(model, test_dataloader, args)
示例#9
0
    id_str, tensor = vis_ds[i]

    with torch.no_grad():
        tensor = tensor.view([1, -1]).cuda()
        tensor = net.encode_visual(tensor)
        tensor = tensor.cpu().numpy()[0]

    vis_ids.append(id_str)
    vis_emb[i] = tensor


"""
ENCODE TEXT QUERIES
"""

txt_ds = TextDataset(args.text_ds)
bow_encoder = Text2BoWEncoder(args.bow_vocab)
w2v_encoder = Text2W2VEncoder(args.w2v_weights)

txt_ids = []
txt_emb = np.empty([len(txt_ds), 2048])
for i in tqdm.trange(len(txt_ds)):
    id_str, tensor = txt_ds[i]
    bow_tensor = bow_encoder.encode(tensor)
    w2v_tensor = w2v_encoder.encode(tensor)
    roberta_tensor = net.roberta.encode(tensor)

    with torch.no_grad():
        roberta_tensor_len = torch.LongTensor([len(roberta_tensor)]).cuda()
        roberta_tensor = roberta_tensor.view([1, -1]).cuda()
        static_tensor = torch.cat([bow_tensor, w2v_tensor], 0).view([1, -1]).cuda()
示例#10
0
def main(args):
    fix_seeds()
    # if os.path.exists('./logs'):
    #     shutil.rmtree('./logs')
    # os.mkdir('./logs')
    # writer = SummaryWriter(log_dir='./logs')
    vis = visdom.Visdom()
    val_avg_loss_window = create_plot_window(vis,
                                             '#Epochs',
                                             'Loss',
                                             'Average Loss',
                                             legend=['Train', 'Val'])
    val_avg_accuracy_window = create_plot_window(vis,
                                                 '#Epochs',
                                                 'Accuracy',
                                                 'Average Accuracy',
                                                 legend=['Val'])
    size = (args.height, args.width)
    train_transform = transforms.Compose([
        transforms.Resize(size),
        # transforms.RandomResizedCrop(size=size, scale=(0.5, 1)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomAffine(10,
                                translate=(0.1, 0.1),
                                scale=(0.8, 1.2),
                                resample=PIL.Image.BILINEAR),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    val_transform = transforms.Compose([
        transforms.Resize(size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    train_dataset = TextDataset(args.data_path,
                                'train.txt',
                                size=args.train_size,
                                transform=train_transform)
    val_dataset = TextDataset(args.data_path,
                              'val.txt',
                              size=args.val_size,
                              transform=val_transform)
    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              num_workers=args.workers,
                              shuffle=True)
    val_loader = DataLoader(val_dataset,
                            batch_size=args.batch_size,
                            num_workers=args.workers,
                            shuffle=False)

    model = models.resnet18(pretrained=False)
    model.fc = nn.Linear(512, 16)

    model.load_state_dict(torch.load(args.resume_from)['model'])

    device = 'cpu'
    if args.cuda:
        device = 'cuda'
    print(device)
    metrics = {'accuracy': Accuracy(), 'loss': Loss(criterion)}
    evaluator = create_supervised_evaluator(model, metrics, device=device)

    @trainer.on(Events.ITERATION_COMPLETED)
    def lr_step(engine):
        if model.training:
            scheduler.step()

    global pbar, desc
    pbar, desc = None, None

    @trainer.on(Events.EPOCH_STARTED)
    def create_train_pbar(engine):
        global desc, pbar
        if pbar is not None:
            pbar.close()
        desc = 'Train iteration - loss: {:.4f} - lr: {:.4f}'
        pbar = tqdm(initial=0,
                    leave=False,
                    total=len(train_loader),
                    desc=desc.format(0, lr))

    @trainer.on(Events.EPOCH_COMPLETED)
    def create_val_pbar(engine):
        global desc, pbar
        if pbar is not None:
            pbar.close()
        desc = 'Validation iteration - loss: {:.4f}'
        pbar = tqdm(initial=0,
                    leave=False,
                    total=len(val_loader),
                    desc=desc.format(0))

    # desc_val = 'Validation iteration - loss: {:.4f}'
    # pbar_val = tqdm(initial=0, leave=False, total=len(val_loader), desc=desc_val.format(0))

    log_interval = 1
    e = Events.ITERATION_COMPLETED(every=log_interval)

    train_losses = []

    @trainer.on(e)
    def log_training_loss(engine):
        lr = optimizer.param_groups[0]['lr']
        train_losses.append(engine.state.output)
        pbar.desc = desc.format(engine.state.output, lr)
        pbar.update(log_interval)
        # writer.add_scalar("training/loss", engine.state.output, engine.state.iteration)
        # writer.add_scalar("lr", lr, engine.state.iteration)

    @evaluator.on(e)
    def log_validation_loss(engine):
        label = engine.state.batch[1].to(device)
        output = engine.state.output[0]
        pbar.desc = desc.format(criterion(output, label))
        pbar.update(log_interval)

    # if args.resume_from is not None:
    #     @trainer.on(Events.STARTED)
    #     def _(engine):
    #         pbar.n = engine.state.iteration

    # @trainer.on(Events.EPOCH_COMPLETED(every=1))
    # def log_train_results(engine):
    #     evaluator.run(train_loader) # eval on train set to check for overfitting
    #     metrics = evaluator.state.metrics
    #     avg_accuracy = metrics['accuracy']
    #     avg_nll = metrics['loss']
    #     tqdm.write(
    #         "Train Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f}"
    #         .format(engine.state.epoch, avg_accuracy, avg_nll))
    #     pbar.n = pbar.last_print_n = 0

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_validation_results(engine):
        pbar.refresh()
        evaluator.run(val_loader)
        metrics = evaluator.state.metrics
        avg_accuracy = metrics['accuracy']
        avg_nll = metrics['loss']
        tqdm.write(
            "Validation Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f}"
            .format(engine.state.epoch, avg_accuracy, avg_nll))
        # pbar.n = pbar.last_print_n = 0

        # writer.add_scalars("avg losses", {"train": statistics.mean(train_losses),
        #                                   "valid": avg_nll}, engine.state.epoch)
        # # writer.add_scalar("valdation/avg_loss", avg_nll, engine.state.epoch)
        # writer.add_scalar("avg_accuracy", avg_accuracy, engine.state.epoch)
        vis.line(X=np.array([engine.state.epoch]),
                 Y=np.array([avg_accuracy]),
                 win=val_avg_accuracy_window,
                 update='append')
        vis.line(X=np.column_stack(
            (np.array([engine.state.epoch]), np.array([engine.state.epoch]))),
                 Y=np.column_stack((np.array([statistics.mean(train_losses)]),
                                    np.array([avg_nll]))),
                 win=val_avg_loss_window,
                 update='append',
                 opts=dict(legend=['Train', 'Val']))
        del train_losses[:]

    objects_to_checkpoint = {
        "trainer": trainer,
        "model": model,
        "optimizer": optimizer,
        "scheduler": scheduler
    }
    training_checkpoint = Checkpoint(to_save=objects_to_checkpoint,
                                     save_handler=DiskSaver(
                                         args.snapshot_dir,
                                         require_empty=False))
    trainer.add_event_handler(Events.EPOCH_COMPLETED(every=1),
                              training_checkpoint)
    if args.resume_from not in [None, '']:
        tqdm.write("Resume from a checkpoint: {}".format(args.resume_from))
        checkpoint = torch.load(args.resume_from)
        Checkpoint.load_objects(to_load=objects_to_checkpoint,
                                checkpoint=checkpoint)

    try:
        trainer.run(train_loader, max_epochs=args.epochs)
        pbar.close()
    except Exception as e:
        import traceback
        print(traceback.format_exc())
示例#11
0
import time

import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn, optim
from torch.autograd import Variable
from torch.utils.data import DataLoader

from dataset import TextDataset
from model.seq2seq import AttnDecoderRNN, DecoderRNN, EncoderRNN

SOS_token = 0
EOS_token = 1
MAX_LENGTH = 10
lang_dataset = TextDataset()
# batch_size = 1
lang_dataloader = DataLoader(lang_dataset, shuffle=True)

# input words num
input_size = lang_dataset.input_lang_words
hidden_size = 256
# output words num
output_size = lang_dataset.output_lang_words
total_epoch = 20

encoder = EncoderRNN(input_size, hidden_size)
decoder = DecoderRNN(hidden_size, output_size, n_layers=2)
attn_decoder = AttnDecoderRNN(hidden_size, output_size, n_layers=2)
use_attn = True
示例#12
0
def train(config):
    
    
    # Initialize the device which to run the model on
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    
    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)   # fixme
    data_loader = DataLoader(dataset, batch_size = config.batch_size, shuffle=True, num_workers=1)
    vocab_size = dataset.vocab_size
    # char2i = dataset._char_to_ix
    # i2char = dataset._ix_to_char
    # ----------------------------------------
    
    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size, config.seq_length, vocab_size, \
                                config.lstm_num_hidden, config.lstm_num_layers, device)  # fixme
    model.to(device)

    # Setup the loss and optimizer
    criterion = nn.NLLLoss()  # fixme
    optimizer = optim.RMSprop(model.parameters(), lr = config.learning_rate)  # fixme
    logSoftmax = nn.LogSoftmax(dim=2)
    
    # Learning rate scheduler
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, \
                  step_size=config.learning_rate_step, gamma=config.learning_rate_decay)
    step = 1
    
    if config.resume:
        if os.path.isfile(config.resume):
            print("Loading checkpoint '{}'".format(config.resume))
            checkpoint = torch.load(config.resume)
            step = checkpoint['step']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
            print("Checkpoint loaded '{}', steps {}".format(config.resume, checkpoint['step']))

    if not os.path.isdir(config.summary_path):
            os.makedirs(config.summary_path)

    if config.sampling =="greedy":
        
        f = open(os.path.join(config.summary_path,"sampled_"+config.sampling+".txt"), "w+")
    else:
        f = open(os.path.join(config.summary_path,"sampled_"+config.sampling+"_"+str(config.temp)+".txt"), "w+")



    
   
    best_accuracy = 0.0
    pl_loss =[]
    average_loss =[]
    acc =[]

    for epochs in range(30):

        if step == config.train_steps:
            print('Done training.')
            break

        for (batch_inputs, batch_targets) in data_loader:

            if config.batch_size!=batch_inputs.size()[0]:
                print("batch mismatch")
                break

            # Only for time measurement of step through network
            t1 = time.time()
            model.hidden = model.init_hidden(config.batch_size)

            model.zero_grad()
            #######################################################
            # Add more code here ...
            
            #convert batch inputs to one-hot vector
            batch_inputs= torch.zeros(config.batch_size, config.seq_length, vocab_size).scatter_(2,batch_inputs.unsqueeze(-1),1.0)
            
            batch_inputs, batch_targets = batch_inputs.to(device), batch_targets.to(device)

            predictions, _ = model(batch_inputs)
            if config.sampling=="greedy":
                predictions = logSoftmax(predictions)
            else:
                predictions = logSoftmax(predictions/config.temp)

            loss = criterion(predictions.transpose(2,1), batch_targets)   # fixme

            _, predictions = torch.max(predictions, dim=2, keepdim=True)
            predictions = (predictions.squeeze(-1) == batch_targets).float()
            accuracy = torch.mean(predictions)
            
            
            
            loss.backward()
            torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm)
            
            optimizer.step()
            lr_scheduler.step()

            #######################################################

            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size/float(t2-t1)
            pl_loss.append(loss.item())
            average_loss.append(np.mean(pl_loss[:-100:-1]))
            acc.append(accuracy)


            if step % config.print_every == 0:

                print("[{}] Train Step {}/{}, Batch Size = {}, Examples/Sec = {:.2f}, "
                    "Accuracy = {:.2f}, Loss = {:.3f}".format(
                        datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                        config.train_steps, config.batch_size, examples_per_second,
                        accuracy, loss.item()
                ))
                
                

            if step % config.sample_every == 0:
                               
                model.eval()
               
                with torch.no_grad():
                   char_ix = generate_sample(model, vocab_size, config.seq_length, device, config)
                   sentence = dataset.convert_to_string(char_ix) 
                           
            
                f.write("--------------"+str(step)+"----------------\n")
                f.write(sentence+"\n")
                print(sentence)
                print()
                model.train()
                # ###########################################################################
                # save training loss
                plt.plot(pl_loss,'r-', label="Batch loss", alpha=0.5)
                plt.plot(average_loss,'g-', label="Average loss", alpha=0.5)
                plt.legend()
                plt.xlabel("Iterations")
                plt.ylabel("Loss")  
                plt.title("Training Loss")
                plt.grid(True)
                # plt.show()
                if config.sampling == "greedy":
                    plt.savefig("loss_"+config.sampling+".png")
                else:
                    plt.savefig("loss_"+config.sampling+"_"+str(config.temp)+".png")

                plt.close()
                ################################training##################################################
                plt.plot(acc,'g-', alpha=0.5)
                plt.xlabel("Iterations")
                plt.ylabel("Accuracy")
                plt.title("Train Accuracy")
                plt.grid(True)
                if config.sampling == "greedy":
                    plt.savefig("accuracy_"+config.sampling+".png")
                else:
                    plt.savefig("accuracy_"+config.sampling+"_"+str(config.temp)+".png")
                plt.close()

            if step == config.train_steps:
                # If you receive a PyTorch data-loader error, check this bug report:
                # https://github.com/pytorch/pytorch/pull/9655
                break
            
            step+=1
            
        save_checkpoint({
            'epoch': epochs + 1,
            'step': step,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'lr_scheduler':lr_scheduler.state_dict(),
            'accuracy': accuracy
                }, config)
        
    f.close()
示例#13
0
def train(config):
    def acc(predictions, targets):
        hotvec = predictions.argmax(-2) == targets
        accuracy = torch.mean(hotvec.float())
        return accuracy

    # Initialize the device which to run the model on
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=0)
    print('batch', config.batch_size)

    vocabulary_size = dataset.vocab_size
    print('vocab', vocabulary_size)
    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size,
                                config.seq_length,
                                vocabulary_size=vocabulary_size,
                                lstm_num_hidden=config.lstm_num_hidden,
                                lstm_num_layers=config.lstm_num_layers,
                                dropout=1 - config.dropout_keep_prob,
                                device=device)
    model = model.to(device)
    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=config.learning_rate,
                                 weight_decay=1e-5)
    gamma = 1 - config.learning_rate_decay
    lr_optim = torch.optim.lr_scheduler.StepLR(optimizer,
                                               config.learning_rate_step,
                                               gamma=gamma,
                                               last_epoch=-1)
    print('Hi')
    acc_list = []
    loss_list = []
    step_list = []
    text_list = []
    epoch = 100
    offset = 2380
    temperature = 1
    policy = 'greedy'
    for e in range(epoch):
        torch.save(model.state_dict(), str(e + 1) + 'tunedmodel.pt')
        for step, (batch_inputs, batch_targets) in enumerate(data_loader):

            # Only for time measurement of step through network
            lr_optim.step()
            optimizer.zero_grad()
            t1 = time.time()
            inputs = torch.stack([*batch_inputs], dim=1)
            targets = torch.stack([*batch_targets], dim=1)
            inputs = inputs.to(device)
            targets = targets.to(device)
            out = model.forward(inputs)[0]
            out = out.permute(0, 2, 1)
            loss = criterion(out, targets)
            accuracy = acc(out, targets)

            torch.nn.utils.clip_grad_norm(model.parameters(),
                                          max_norm=config.max_norm)
            loss.backward()
            optimizer.step()

            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size / float(t2 - t1)

            if step % config.print_every == 0:

                print('accuracy, loss, step: \n',
                      np.around(accuracy.item(), 4), np.around(loss.item(),
                                                               4), step, '\n')
                acc_list.append(accuracy.item())
                loss_list.append(loss.item())

                step_list.append(step + offset * e)

            if step % config.sample_every == 0:
                # Generate some sentences by sampling from the model
                generator = torch.randint(low=0,
                                          high=vocabulary_size,
                                          size=(1, 1)).to(device)
                hidden = None
                char_list = []
                for _ in range(config.seq_length):
                    generator, hidden = model.forward(generator, hidden)
                    if policy == 'greedy':
                        idx = torch.argmax(generator).item()
                    else:
                        pass
                    generator = torch.Tensor([idx]).unsqueeze(-1)
                    generator = generator.to(device)
                    char_list.append(idx)
                char = dataset.convert_to_string(char_list)
                with open("MyTunedBook.txt", "a") as text_file:
                    print('Epoch. ',
                          e,
                          'Stahp: ',
                          step,
                          '\n Output: ',
                          char,
                          file=text_file)

                print('Epoch. ', e, 'Stahp: ', step, '\n Output: ', char)
                text_list.append((str((step + offset * e)) + '\n' + char))

                pass

            if step == config.train_steps:
                # If you receive a PyTorch data-loader error, check this bug report:
                # https://github.com/pytorch/pytorch/pull/9655
                break

    print('Done training.')

    with open('FinalTunedBook.txt', 'w+') as f:
        for item in text_list:
            f.write("%s\n" % item)

    # save with pandas
    header = ['accuracy', 'length', 'loss', 'step']
    savefiles = zip(acc_list, [config.seq_length] * len(acc_list), loss_list,
                    step_list)
    df = pd.DataFrame(list(savefiles), columns=header)
    df.to_csv('GEN' + str(config.seq_length) + 'tunedlstm.csv')

    print('I am Loaded')

    temp_list = [0., 0.5, 1., 2.]
    policy_list = ['greedy', 'temp']
    seq_length = 111
    alice_string = list('Alice')

    # Generate some sentences by sampling from the model
    for policy in policy_list:
        for temperature in temp_list:
            char_list = []
            hidden = None
            for alice in alice_string:
                idx = dataset.convert_to_idx(alice)
                char_list.append(idx)
                generator = torch.tensor([idx]).unsqueeze(-1)
                generator = generator.to(device)
                generator, hidden = model.forward(generator, hidden)

            for _ in range(seq_length):
                if policy == 'greedy':
                    idx = torch.argmax(generator).item()
                else:
                    temp = generator.squeeze() / temperature
                    soft = torch.softmax(temp, dim=0)
                    idx = torch.multinomial(soft, 1)[-1].item()
                generator = torch.tensor([idx]).unsqueeze(-1)
                generator = generator.to(device)
                generator, hidden = model.forward(generator, hidden)
                char_list.append(idx)
            char = dataset.convert_to_string(char_list)
            with open(
                    "BonusTemp" + str(int(np.floor(temperature))) + "Book.txt",
                    "w+") as text_file:
                print(policy + ': ',
                      temperature,
                      '\n Output: ',
                      char,
                      file=text_file)

            print(policy + ': ', temperature, '\n Output: ', char)
    print('Finito!')
示例#14
0
def train(config):

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file,
                          config.seq_length)  # should we do +1??
    torch.save(dataset, config.save_dataset)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size, config.seq_length,
                                dataset.vocab_size, config.lstm_num_hidden,
                                config.lstm_num_layers,
                                1 - config.dropout_keep_prob, device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate)

    losses = []
    accuracies = []

    # run through the dataset several times till u reach max_steps
    step = 0
    while step < config.train_steps:
        for (batch_inputs, batch_targets) in data_loader:
            step += 1
            # Only for time measurement of step through network
            t1 = time.time()

            batch_inputs = torch.stack(batch_inputs).to(device)
            batch_targets = torch.stack(batch_targets, dim=1).to(
                device)  #dim=1 to avoid transposing

            batch_predictions, (_, _) = model.forward(batch_inputs)
            batch_predictions = batch_predictions.permute(1, 2, 0)
            loss = criterion(batch_predictions, batch_targets)
            losses.append(loss.item())
            model.zero_grad()  # should we do this??
            loss.backward()

            torch.nn.utils.clip_grad_norm(
                model.parameters(),
                max_norm=config.max_norm)  # prevents maximum gradient problem

            optimizer.step()

            accuracy = accuracy_(batch_predictions, batch_targets)
            accuracies.append(accuracy)

            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size / float(t2 - t1)

            if step % config.print_every == 0:

                print(
                    "[{}] Train Step {}/{}, Batch Size = {}, Examples/Sec = {:.2f}, "
                    "Accuracy = {:.2f}, Loss = {:.3f}".format(
                        datetime.now().strftime("%Y-%m-%d %H:%M"), int(step),
                        int(config.train_steps), config.batch_size,
                        examples_per_second, accuracy, loss))

            if step % config.sample_every == 0:

                for temperature in [0]:
                    for length in [30, 60, 90, 120]:
                        sentence = generate_sentence(model, dataset,
                                                     temperature, length,
                                                     device)
                        with open(config.save_generated_text,
                                  'a',
                                  encoding='utf-8') as file:
                            file.write("{};{};{};{}\n".format(
                                step, temperature, length, sentence))

            if step % config.save_every == 0:
                torch.save(model.state_dict(), config.save_model)

            if step == config.train_steps:
                # save only the model parameters
                torch.save(model.state_dict(), config.save_model)
                # If you receive a PyTorch data-loader error, check this bug report:
                # https://github.com/pytorch/pytorch/pull/9655
                break

    # revive the model
    # model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size(),
    #                                 config.lstm_num_hidden, config.lstm_num_layers, device)
    # model.load_state_dict(torch.load(config.save_model))

    print('Done training.')
示例#15
0
def train(config):

    if not os.path.isdir(CHECKPOINTS_FOLDER):
        os.mkdir(CHECKPOINTS_FOLDER)

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length,
                          config.batch_size, config.train_steps)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size, config.seq_length,
                                dataset.vocab_size).to(device=device)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)

    generated_sentences = []

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):
        # Only for time measurement of step through network
        t1 = time.time()

        optimizer.zero_grad()

        batch_inputs = torch.unsqueeze(torch.stack(batch_inputs),
                                       2).float().to(device=device)
        batch_targets = torch.cat(batch_targets).to(device=device)

        predictions = model(batch_inputs, config.batch_size)
        torch.nn.utils.clip_grad_norm_(model.parameters(),
                                       max_norm=config.max_norm)

        loss = criterion(predictions, batch_targets)
        accuracy = get_accuracy(predictions, batch_targets)

        loss.backward()
        optimizer.step()

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if step % config.print_every == 0:
            print(
                "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    int(config.train_steps), config.batch_size,
                    examples_per_second, accuracy, loss))

        if step % config.sample_every == 0:
            # Generate some sentences by sampling from the model
            sentence = generate_sentence(model, dataset, config)
            generated_sentences.append(sentence)

    state = {
        'state_dict': model.state_dict(),
        'optimizer': optimizer.state_dict(),
    }
    torch.save(
        state, 'checkpoints/{}'.format(
            config.txt_file.split("/", 1)[1].replace('.txt', '')))

    filename = config.txt_file.replace('.txt', '') + 'generated_sentences.txt'
    f = open(filename, 'w')
    output_string = '\n'.join(generated_sentences)
    f.write(output_string)

    print('Done training.')
示例#16
0
                t0 = time.time()
                cur_cost = self.train_model(b, lr,mb)
                print("Time calculating minibatch cost: {:.4f}. Cost: {}".format(time.time() - t0,cur_cost)) 
                if b % 20 == 0 and b != 0:
                    t0 = time.time() 
                    r = np.random.randint(0, in_test.shape[0] - 1001)
                    err_test = self.error(in_test[r:r+1000], obs_test[r:r+1000]) 
                    err_train = self.error(in_train[r:r+1000], obs_train[r:r+1000]) 
                    print("Current cost: {}".format(cur_cost))
                    print("Current Test Error: {}".format(err_test))
                    print("Current Train Error: {}".format(err_train))
                    print("Time calculating errors: {:.4f}".format(time.time() - t0))

if __name__ == '__main__':
    
    dataset = TextDataset('shakespeare.hdf5') 
    dataset.cut_by_sequence(10,classify=False)
    #x = T.matrix('x') 
    #y = T.matrix('y') 
    x = T.tensor3('x')
    y = T.tensor3('y') 
    foo = np.random.rand(10,50) #random data 
    nhid = 200
    rnn = RNN(x, dataset.seq_len, [dataset.char_len,dataset.char_len,nhid],mode='LSTM',bptt_truncate=-1)

    trainer = Trainer(rnn, dataset)

    trainer.compile_functions(x,y) 
    trainer.gradient_descent(0.01,200,10) 
    #print(trainer.feed_forward(foo).shape) 
示例#17
0
文件: network.py 项目: MoDeep/TACO
    def __init__(self, K, hidden_size):
        self.K = K
        self.hidden_size = hidden_size
        pass

    def build(self):
        conv_bank = list()
        batch_norm_list = list()

        conv_bank.append(nn.Conv1d(1, self.hidden_size, 1))
        batch_norm_list.append(nn.BatchNorm1d(self.hidden_size))
        for k in range(2, self.K + 1):
            conv_bank.append(nn.Conv1d(self.hidden_size, self.hidden_size, k))
            batch_norm_list.append(nn.BatchNorm1d(self.hidden_size))

    def forward(self, x):
        pass


if __name__ == '__main__':
    transcript_path = 'kss/transcript.txt'

    txt_dataset = TextDataset(transcript_path)
    data_loader = DataLoader(dataset=txt_dataset,
                             batch_size=32,
                             shuffle=True,
                             num_workers=2)

    print('Dataset making and Loading Success')

    prenet = Prenet()
示例#18
0
文件: train.py 项目: euyy/MirrorGAN-1
def main():
    #DataGenerator
    imsize = cfg.TREE.BASE_SIZE * (2**(cfg.TREE.BRANCH_NUM - 1))  #64, 3
    image_transform = transforms.Compose([
        transforms.Resize(int(imsize * 76 / 64)),
        transforms.RandomCrop(imsize),
        transforms.RandomHorizontalFlip()
    ])
    #cfg.DATA_DIR = "data/birds"
    dataset = TextDataset(cfg.DATA_DIR,
                          "train",
                          base_size=cfg.TREE.BASE_SIZE,
                          transform=image_transform)
    assert dataset

    traingenerator = DataGenerator(dataset, batchsize=cfg.TRAIN.BATCH_SIZE)

    ##Create model
    G_model, D_model, GRD_model, CR_model, RNN_model = model_create(dataset)
    print("loadmodel_completed")

    #Preparation for learning
    total_epoch = cfg.TRAIN.MAX_EPOCH
    batch_size = traingenerator.batchsize
    step_epoch = int(len(dataset) / batch_size)
    wrong_step = 3
    wrong_step_epoch = int(step_epoch / wrong_step)

    image_list, captions_ar, captions_ar_prezeropad, \
        z_code, eps_code, mask, keys_list, captions_label, \
            real_label, fake_label = next(traingenerator)
    traingenerator.count = 0
    #for image plot
    test_noise = deepcopy(z_code[:20])
    test_eps = deepcopy(eps_code[:20])
    test_cap_pd = deepcopy(captions_ar_prezeropad[:20])
    test_cap = deepcopy(captions_ar[:20])
    test_mask = deepcopy(mask[:20])
    test_mask = np.where(test_mask == 1, -float("inf"), 0)

    #Start learning
    print("batch_size: {}  step_epoch : {} srong_step_epoch {}".format(
        batch_size, step_epoch, wrong_step_epoch))

    for epoch in range(total_epoch):
        total_D_loss = 0
        total_D_acc = 0
        total_D_wrong_loss = 0
        total_D_wrong_acc = 0
        total_G_loss = 0
        total_G_des_loss = 0
        total_G_enc_loss = 0

        print("----------------EPOCH: {} START----------------".format(epoch))

        for batch in tqdm(range(step_epoch)):

            image_list, captions_ar, captions_ar_prezeropad, \
                z_code, eps_code, mask, keys_list, captions_label, \
                    real_label, fake_label = next(traingenerator)

            mask = np.where(mask == 1, -float("inf"), 0)

            if cfg.TREE.BRANCH_NUM == 1:
                real_image = image_list[0]
            if cfg.TREE.BRANCH_NUM == 2:
                real_image = image_list[1]
            if cfg.TREE.BRANCH_NUM == 3:
                real_image = image_list[2]
            #D learning
            if cfg.TREE.BRANCH_NUM == 1:
                fake_image = G_model.predict(
                    [captions_ar_prezeropad, eps_code, z_code])
            else:  # 2 or 3
                fake_image = G_model.predict(
                    [captions_ar_prezeropad, eps_code, z_code, mask])

            if batch % 1 == 0:
                histDr = D_model.train_on_batch(
                    [real_image, captions_ar_prezeropad],
                    [real_label, real_label],
                )
                total_D_loss += histDr[0]
                total_D_acc += (histDr[3] + histDr[4]) / 2

                histDf = D_model.train_on_batch(
                    [fake_image, captions_ar_prezeropad],
                    [fake_label, fake_label],
                )
                total_D_loss += histDf[0]
                total_D_acc += (histDf[3] + histDf[4]) / 2

            if batch % wrong_step == 0:
                histDw = D_model.train_on_batch(
                    [real_image[:-1], captions_ar_prezeropad[1:]],
                    [fake_label[:-1], fake_label[:-1]],
                )
                total_D_wrong_loss += histDw[0]
                total_D_wrong_acc += (histDw[3] + histDw[4]) / 2

            #G learning
            if cfg.TREE.BRANCH_NUM == 1:
                histGRD = GRD_model.train_on_batch(
                    [captions_ar_prezeropad, eps_code, z_code, captions_ar],
                    [real_label, real_label, captions_label],
                )
            else:  # 2 or 3
                histGRD = GRD_model.train_on_batch(
                    [
                        captions_ar_prezeropad, eps_code, z_code, mask,
                        captions_ar
                    ],
                    [real_label, real_label, captions_label],
                )
            total_G_loss += histGRD[0]
            total_G_des_loss += (histGRD[1] + histGRD[2]) / 2
            total_G_enc_loss += histGRD[3]

        #Calculation of loss
        D_loss = total_D_loss / step_epoch / 2
        D_acc = total_D_acc / step_epoch / 2
        D_wrong_loss = total_D_wrong_loss / wrong_step_epoch
        D_wrong_acc = total_D_wrong_acc / wrong_step_epoch
        G_loss = total_G_loss / step_epoch
        G_des_loss = total_G_des_loss / step_epoch
        G_enc_loss = total_G_enc_loss / step_epoch

        print(
            "D_loss: {:.5f} D_wrong_loss: {:.5f} D_acc:  {:.5f} D_wrong_acc:  {:.5f}"
            .format(D_loss, D_wrong_loss, D_acc, D_wrong_acc))
        print(
            "G_loss:  {:.5f} G_discriminator_loss:  {:.5f} G_encoder_loss:  {:.5f}"
            .format(G_loss, G_des_loss, G_enc_loss))

        if epoch % 4 == 0:
            G_save_path = "model/G_epoch{}.h5".format(epoch)
            G_model.save_weights(G_save_path)
            D_save_path = "model/D_epoch{}.h5".format(epoch)
            D_model.save_weights(D_save_path)

        #Save image
        if epoch % 1 == 0:
            sample_images(epoch, test_noise, test_eps, test_cap_pd, test_mask,
                          G_model)
示例#19
0
def main():
    device = torch.device('cuda')

    embedding_vectors = torch.load(f'{EMBEDDINGS_DIR}/vectors.pkl')

    text_processor = TextProcessor(
        wti=pickle.load(open(f'{EMBEDDINGS_DIR}/wti.pkl', 'rb')),
        tokenizer=get_tokenizer('basic_english'),
        standardize=True,
        min_len=3,
    )

    dataset = TextDataset(CORPUS_DIR, text_processor)

    # split into training and test set
    # TODO: fix this splitting sometimes failing when corpus size changes
    train_set, test_set = torch.utils.data.random_split(
        dataset, [
            int(len(dataset) * DATA_SPLIT),
            int(len(dataset) * (1.0 - DATA_SPLIT))
        ])

    # count number of samples in each class
    class_count = [0, 0]
    for data, label in dataset:
        class_count[int(label.item())] += 1

    # get relative weights for classes
    _sum = sum(class_count)
    class_count[0] /= _sum
    class_count[1] /= _sum

    # reverse the weights since we're getting the inverse for the sampler
    class_count = list(reversed(class_count))

    # set weight for every sample
    weights = [class_count[int(x[1].item())] for x in train_set]

    # weighted sampler
    sampler = torch.utils.data.WeightedRandomSampler(
        weights=weights, num_samples=len(train_set), replacement=True)

    train_loader = DataLoader(dataset=train_set,
                              batch_size=32,
                              collate_fn=Sequencer(SEQUENCE_LEN),
                              sampler=sampler)

    test_loader = DataLoader(dataset=test_set,
                             batch_size=32,
                             collate_fn=Sequencer(SEQUENCE_LEN))

    # number of filters in each convolutional filter
    N_FILTERS = 64

    # sizes and number of convolutional layers
    FILTER_SIZES = [2, 3]

    # dropout for between conv and dense layers
    DROPOUT = 0.5

    model = TextCNN(
        embeddings=embedding_vectors,
        n_filters=N_FILTERS,
        filter_sizes=FILTER_SIZES,
        dropout=DROPOUT,
    ).to(device)

    print(model)
    print('Trainable params:',
          sum(p.numel() for p in model.parameters() if p.requires_grad))

    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

    EPOCHS = 12

    best_acc = 0.0

    # training loop
    for epoch in range(EPOCHS):
        print('Epoch', epoch + 1)

        for i, data in tqdm(enumerate(train_loader), total=len(train_loader)):
            # get word indices vector and corresponding labels
            x, labels = data

            # send to device
            x = x.to(device)
            labels = labels.to(device)

            # make predictions
            predictions = model(x).squeeze()

            # calculate loss
            loss = criterion(predictions, labels)

            # learning stuff...
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # evaluate
        with torch.no_grad():
            model.eval()

            correct = 0
            wrong = 0
            m = [[0, 0], [0, 0]]

            for data in test_loader:
                x, label = data
                x = x.to(device)

                predictions = model(x).squeeze()

                for truth, prediction in zip(label, predictions):
                    y = int(truth.item())
                    y_pred = 1 if prediction.item() > 0.5 else 0

                    m[y][y_pred] += 1

                    if y == y_pred:
                        correct += 1
                    else:
                        wrong += 1

            model.train()

            acc = correct / (correct + wrong)
            if acc > best_acc:
                best_acc = acc
                for file in glob.glob('models/model_*.pth'):
                    os.remove(file)
                torch.save(model.state_dict(), f'models/state_{epoch}.pth')

            print()
            print('Correct:', f'{correct}/{correct + wrong}', 'Accuracy:', acc)
            print('[[TN, FP], [FN, TP]]')
            print(m)
            print()

    # put into evaluation mode
    model.eval()

    text_processor.do_standardize = True

    with torch.no_grad():
        while True:
            text = input('Prompt: ')
            x = text_processor.process(text)
            x = torch.tensor(x).unsqueeze(dim=0)
            print(model(x.to(device)).squeeze())
示例#20
0
def train(config, CHOICES):
    
    # Initialize the device which to run the model on
    #device = torch.device(config.device)# fix this!
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(device)
    
    # Initialize the model that we are going to use

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length );  # fixme
    model = TextGenerationModel( config.batch_size, config.seq_length, dataset.vocab_size, config.temperature).cuda();
    if (CHOICES['LOAD_BEST_MODEL']):
        model.load_state_dict(torch.load('./model_parameter.txt'));
    #print(model.state_dict());
    
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss();
    optimizer = torch.optim.RMSprop(model.parameters(),lr=config.learning_rate);
    if (CHOICES['LOAD_BEST_MODEL']):
        optimizer.load_state_dict(torch.load('./model_optimizer.txt'));
    accuracy_list = [];
    loss_list = [];
    string_list = [];
    tmp_accuracy = 0;
    
    a = 76;
    while (tmp_accuracy == 0) or (accuracy_list[-1] >0.85): 
        for step, (batch_inputs, batch_targets) in enumerate(data_loader):

            # Only for time measurement of step through network
            t1 = time.time()
            
            batch_inputs = torch.stack(batch_inputs)[:,:, None].view(config.seq_length, -1).to(device); # sequ_length * batch_size
            batch_targets = torch.stack(batch_targets)[:,:, None].view(config.seq_length, -1).to(device); # sequ_length * batch_size
            
            if not((int(batch_inputs.size()[1])) == config.batch_size):
                continue;
                
            #print(dataset.convert_to_string(batch_inputs[:, 0].cpu().numpy())); 
            
            batch_inputs_onehot = one_hot(batch_inputs, dataset.vocab_size); # seq_length * batch_size * vacab_size;
            optimizer.zero_grad();
            torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm);
            out = model(batch_inputs_onehot);
            
            values, indices = torch.max(out, 1);
            
            loss_criterion = criterion(out,batch_targets);
            loss_criterion.backward();
            optimizer.step();
            
            loss = loss_criterion.data[0]/(config.seq_length);
            values, indices = torch.max(out, 1);
            
            accuracy = ((indices[indices == batch_targets].size())[0])/(config.batch_size*config.seq_length);

            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size/float(t2-t1)
            if step % config.print_every == 0:
                print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                          "Accuracy = {:.2f}, Loss = {:.3f}".format(
                            datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                            int(config.train_steps), config.batch_size, examples_per_second,
                            accuracy, loss))
                            
                # generate sentences
                if step % 50000 == 0 and CHOICES['GENERATE_FIVE_SENTENCES']:                            
                    model.eval();                    
                    test_input = (torch.Tensor(batch_inputs.size())).type(torch.LongTensor).to(device);
                    a = a + 1;
                    test_input = test_input.fill_(a);
                    output_string = generate_new_stings(model, test_input, dataset.vocab_size, config.seq_length);  
                    tmp = dataset.convert_to_string(output_string.cpu().numpy().tolist());
                    string_list += [tmp];
                    print(tmp);
                    print('---')     
                    
                    model.train();
                # save parameter
                torch.save(model.state_dict(), './model_parameter{:d}.txt'.format(step));
                torch.save(optimizer.state_dict(), './model_optimizer{:d}.txt'.format(step));                    
                
                
                if (CHOICES['DRAW_ACCURACY_PLOT']):
                    accuracy_list += [accuracy];  
                    loss_list += [loss]; 
                

            if step == config.sample_every:
                # Generate some sentences by sampling from the model
                pass
            
            if step == config.train_steps:
                # If you receive a PyTorch data-loader error, check this bug report:
                # https://github.com/pytorch/pytorch/pull/9655
                break
                
            if (CHOICES['GENERATE_FIVE_SENTENCES']) and (len(string_list) == 5):
                break;
        
        if (CHOICES['GENERATE_FIVE_SENTENCES']) and (len(string_list) == 5):
                break;
                        
        
        print("============ finish {} epoch ============ ".format(len(accuracy_list)));
        
    torch.save(model.state_dict(), './model_parameter.txt');
    torch.save(optimizer.state_dict(), './model_optimizer.txt');
    print('Done training.');
    
    if (CHOICES['GENERATE_FIVE_SENTENCES']):
    
        if (CHOICES['DRAW_ACCURACY_PLOT']):
            fig, ax = plt.subplots();
            ax.plot(np.arange(len(accuracy_list)), accuracy_list, 'r', label = 'accuracy');
            ax.plot(np.arange(len(accuracy_list)), loss_list, 'b', label = 'loss');
            legend = ax.legend(loc='upper center');      
            plt.xlabel('Steps');
            plt.title('loss and accuracy of LSTM in 2000 steps');
            plt.show();
        
        for idx in range(5):
            print('====')
            print(string_list[idx]);
示例#21
0
def weights_init(m):
    classname = m.__class__.__name__
    if classname == 'LSTM':
        nn.init.orthogonal_(m.weight_ih_l0)
        nn.init.orthogonal_(m.weight_hh_l0)
        nn.init.orthogonal_(m.weight_ih_l1)
        nn.init.orthogonal_(m.weight_hh_l1)


label_size = 8
batch_size = 64
learning_rate = 0.001
epochs = 10
chapters = choose_chapters2()
cp = Corpus(chapters)
train_set = TextDataset(cp, train=True)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
test_set = TextDataset(cp, train=False)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True)
rnn = 'gru'

if rnn == 'lstm':
    model = LSTMNet(512, 128, vocab_size=len(cp.vocab), label_size=label_size, batch_size=batch_size).cuda()
    model.apply(weights_init)
else:
    model = GRUNet(512, 128, vocab_size=len(cp.vocab), label_size=label_size, batch_size=batch_size).cuda()

optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-4)
loss_function = nn.CrossEntropyLoss()

record = {}
示例#22
0
def train(config):

    # Print all configs to confirm parameter settings
    print_flags()
    assert config.sampling_method in ('greedy', 'random')
    assert config.generate_mode in ('generate', 'finish')

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(filename=config.txt_file,
                          seq_length=config.seq_length)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Initialize the model that we are going to use
    model = TextGenerationModel(batch_size=config.batch_size,
                                seq_length=config.seq_length,
                                vocabulary_size=dataset.vocab_size,
                                dropout=1-config.dropout_keep_prob,
                                lstm_num_hidden=config.lstm_num_hidden,
                                lstm_num_layers=config.lstm_num_layers,
                                device=device)
    model.to(device)

    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
    epoch = 10

    # Store some measures
    los = list()
    iteration = list()
    acc = list()
    max_step = 0

    for i in range(epoch):
      for step, (batch_inputs, batch_targets) in enumerate(data_loader):

          # Only for time measurement of step through network
          t1 = time.time()

          model.train()
          optimizer.zero_grad()

          batch_inputs = torch.stack(batch_inputs).to(device)
          batch_targets = torch.stack(batch_targets).to(device)

          h_0 = torch.zeros(config.lstm_num_layers, batch_inputs.shape[1], config.lstm_num_hidden).to(device)
          c_0 = torch.zeros(config.lstm_num_layers, batch_inputs.shape[1], config.lstm_num_hidden).to(device)

          pred, _, _ = model(batch_inputs, h_0, c_0)
          accuracy = compute_accuracy(pred, batch_targets)
          pred = pred.permute(1, 2, 0)
          batch_targets = batch_targets.permute(1, 0)
          loss = criterion(pred, batch_targets)
          loss.backward()
          torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm)
          optimizer.step()

          # Just for time measurement
          t2 = time.time()
          examples_per_second = config.batch_size/float(t2-t1)

          if (step + i * max_step) % config.print_every == 0:

              print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                    "Accuracy = {:.2f}, Loss = {:.3f}".format(
                      datetime.now().strftime("%Y-%m-%d %H:%M"), step + i * max_step,
                      int(config.train_steps), config.batch_size, examples_per_second,
                      accuracy, loss
              ))
              iteration.append(step + i * max_step)
              acc.append(accuracy)
              los.append(loss)
              if max_step < step:
                max_step = step

          if (step + i * max_step) % config.sample_every == 0:
              model.eval()
              batch_sample = 5
              if config.generate_mode == 'finish':
                generated = [dataset._char_to_ix[c] for c in config.input_seq]
                generated = torch.LongTensor(generated).view(-1, 1).to(device)
                for l in range(config.generate_length):
                  if l == 0:
                    h_s = torch.zeros(config.lstm_num_layers, 1, config.lstm_num_hidden).to(device)
                    c_s = torch.zeros(config.lstm_num_layers, 1, config.lstm_num_hidden).to(device)
                    gen, h_s, c_s = model(generated, h_s, c_s)
                    gen = torch.unsqueeze(gen[-1], 0)
                  else:
                    gen, h_s, c_s = model(gen, h_s, c_s)
                  if config.sampling_method == 'greedy':
                    gen = gen.argmax(dim=2)
                  else:
                    gen = nn.functional.softmax(gen/config.temperature, dim=2)
                    dist = torch.distributions.categorical.Categorical(gen)
                    gen = dist.sample()
                  generated = torch.cat((generated, gen))
              else:
                generated = [dataset._char_to_ix[random.choice(dataset._chars)] for c in range(batch_sample)]
                generated = torch.LongTensor(generated).view(-1, batch_sample).to(device)
                for l in range(config.generate_length - 1):
                  if l == 0:
                    h_s = torch.zeros(config.lstm_num_layers, batch_sample, config.lstm_num_hidden).to(device)
                    c_s = torch.zeros(config.lstm_num_layers, batch_sample, config.lstm_num_hidden).to(device)
                    gen, h_s, c_s = model(generated, h_s, c_s)
                  else:
                    gen, h_s, c_s = model(gen, h_s, c_s)
                  if config.sampling_method == 'greedy':
                    gen = gen.argmax(dim=2)
                  else:
                    gen = nn.functional.softmax(gen/config.temperature, dim=2)
                    dist = torch.distributions.categorical.Categorical(gen)
                    gen = dist.sample()
                  generated = torch.cat((generated, gen))
              generated = generated.t()
              sentence = [dataset.convert_to_string(idx) for idx in generated.tolist()]
              if config.sampling_method == 'random':
                with open('{}/{}_{}_{}_{}.txt'.format(config.summary_path, config.generate_mode, datetime.now().strftime("%Y-%m-%d"), config.sampling_method, config.temperature), 'a', encoding='utf-8') as file:
                  file.write('--------------\n')
                  file.write('Training Step: {}\n'.format(step + i * max_step))
                  file.write('--------------\n')
                  for sen in sentence:
                    file.write('{}\n'.format(sen))
                  file.write('\n')
                  file.close()   
              else:
                with open('{}/{}_{}_{}.txt'.format(config.summary_path, config.generate_mode, datetime.now().strftime("%Y-%m-%d"), config.sampling_method), 'a', encoding='utf-8') as file:
                  file.write('--------------\n')
                  file.write('Training Step: {}\n'.format(step + i * max_step))
                  file.write('--------------\n')
                  for sen in sentence:
                    file.write('{}\n'.format(sen))
                  file.write('\n')
                  file.close()

          if (step + i * max_step) == config.train_steps:
              # If you receive a PyTorch data-loader error, check this bug report:
              # https://github.com/pytorch/pytorch/pull/9655
              break

      if (step + i * max_step) == config.train_steps:
        break

    print('Done training.')
    fig, axs = plt.subplots(1, 2, figsize=(10,5))
    axs[0].plot(iteration, acc)
    axs[0].set_xlabel('Iteration')
    axs[0].set_ylabel('Accuracy')
    axs[1].plot(iteration, los)
    axs[1].set_xlabel('Iteration')
    axs[1].set_ylabel('Loss')
    fig.tight_layout()
    plt.show()
label = torch.FloatTensor(opt.batchSize)
real_label = 1
fake_label = 0

if opt.cuda:
    netD.cuda()
    netG.cuda()
    criterion.cuda()
    input, label = input.cuda(), label.cuda()
    noise, fixed_noise = noise.cuda(), fixed_noise.cuda()

fixed_noise = Variable(fixed_noise)

if not opt.eval:

    train_dataset = TextDataset(opt.dataroot, transform=image_transform)

    ## Completed - TODO: Make a new DataLoader and Dataset to include embeddings
    train_dataloader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=opt.batchSize,
                                                   shuffle=True,
                                                   num_workers=int(
                                                       opt.workers))

    # setup optimizer
    optimizerD = optim.Adam(netD.parameters(),
                            lr=opt.lr,
                            betas=(opt.beta1, 0.999))
    optimizerG = optim.Adam(netG.parameters(),
                            lr=opt.lr,
                            betas=(opt.beta1, 0.999))
示例#24
0
def train(config):
    # determine the filename (to be used for saving results, checkpoints, models, etc.)
    filename = Path(config.txt_file).stem

    # Initialize the device which to run the model on
    if config.device == 'cuda':
        if torch.cuda.is_available():
            device = torch.device(config.device)
        else:
            device = torch.device('cpu')
    else:
        device = torch.device(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(
        filename=config.txt_file,
        seq_length=config.seq_length
    )
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # get the vocabulary size and int2char and char2int dictionaries for use later
    VOCAB_SIZE = dataset.vocab_size

    # Initialize the model that we are going to use
    model = TextGenerationModel(
        batch_size=config.batch_size,
        seq_length=config.seq_length,
        vocabulary_size=VOCAB_SIZE,
        lstm_num_hidden=config.lstm_num_hidden,
        lstm_num_layers=config.lstm_num_layers,
        device=device,
        batch_first=config.batch_first,
        dropout=1.0-config.dropout_keep_prob
    )

    # Setup the loss and optimizer and learning rate scheduler
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(
        model.parameters(),
        config.learning_rate
    )

    # Load the latest checkpoint, if any exist
    checkpoints = list(CHECKPOINTS_DIR.glob(f'{model.__class__.__name__}_{filename}_checkpoint_*.pt'))
    if len(checkpoints) > 0:
        # load the latest checkpoint
        checkpoints.sort(key=os.path.getctime)
        latest_checkpoint_path = checkpoints[-1]
        start_step, results, sequences = load_checkpoint(latest_checkpoint_path, model, optimizer)
    else:
         # initialize the epoch, results and best_accuracy
        start_step = 0
        results = {
            'step': [],
            'accuracy': [],
            'loss': [],
        }
        sequences = {
            'step': [],
            't': [],
            'temperature': [],
            'sequence': []
        }

    for step in range(start_step, int(config.train_steps)):
        # reinitialize the data_loader iterater if we have iterated over all available mini-batches
        if step % len(data_loader) == 0 or step == start_step:
            data_iter = iter(data_loader)
        
        # get the mini-batch
        batch_inputs, batch_targets = next(data_iter)

        # Only for time measurement of step through network
        t1 = time.time()

        #######################################################
        # Add more code here ...
        #######################################################

        # put the model in training mode
        model.train()

        # convert the data and send to device
        X = torch.stack(batch_inputs, dim=1)
        X = X.to(device)

        Y = torch.stack(batch_targets, dim=1)
        Y = Y.to(device)

        # forward pass the mini-batch
        Y_out, _ = model.forward(X)
        Y_pred = Y_out.argmax(dim=-1)

        # (re)set the optimizer gradient to 0
        optimizer.zero_grad()

        # compute the accuracy and the loss
        accuracy = get_accuracy(Y_pred, Y)
        loss = criterion.forward(Y_out.transpose(2, 1), Y)

        # backwards propogate the loss
        loss.backward()

        # clip the gradients (to preven them from exploding)
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm)

        # tune the model parameters
        optimizer.step()

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size/float(t2-t1)

        if step % config.print_every == 0:
            print(f'[{datetime.now().strftime("%Y-%m-%d %H:%M")}], Train Step {step:04d}/{int(config.train_steps):04d}, Batch Size = {config.batch_size}, Examples/Sec = {examples_per_second:.2f}, Accuracy = {accuracy:.2f}, Loss = {loss:.3f}')

            # append the accuracy and loss to the results
            results['step'].append(step)
            results['accuracy'].append(accuracy.item())
            results['loss'].append(loss.item())

        if step % config.sample_every == 0:
            for T in [20, 30, 60, 120]:
                for temperature in [0.0, 0.5, 1.0, 2.0]:
                    # Generate some sentences by sampling from the model
                    sequence = sample_sequence(
                        model=model,
                        vocab_size=VOCAB_SIZE,
                        T=T,
                        char=None,
                        temperature=temperature,
                        device=device
                    )
                    sequence_str = dataset.convert_to_string(sequence)
                    print(f'Generated sample sequence (T={T}, temp={temperature}): {sequence_str}')

                    # append the generated sequence to the sequences
                    sequences['step'].append(step)
                    sequences['t'].append(T)
                    sequences['temperature'].append(temperature)
                    sequences['sequence'].append(sequence_str)

        if step % config.checkpoint_every == 0:
            # create a checkpoint
            create_checkpoint(CHECKPOINTS_DIR, filename, step, model, optimizer, results, sequences)

            # save the results
            save_results(RESULTS_DIR, filename, results, sequences, model)

            # save the model
            save_model(MODELS_DIR, filename, model)

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training.')
def train_early_stopping(epoch_number):
    global best_val_loss, best_acc

    loss_epoch = []
    i = 1
    batch_start = time.time()
    for name, group in train_grouped:
        # print(group.tokens.values)
        tokens = TextDataset._text2idx(group.tokens, dictionary.word2idx)
        labels = np.array(group.label.values)
        try:
            tokens, labels = process_batch(tokens, labels)
        except:
            print(tokens)
            sys.exit(0)

        loss = train_data(tokens, labels)
        loss_epoch.append(loss)
        # print loss every n passes
        if i % (p.print_loss_every * 5) == 0:
            print('| epoch   %d | %d/%d batches | ms/batch (%s) | loss %f' %
                  (epoch_number, i % (num_batches + 1), num_batches,
                   time_since(batch_start), np.mean(loss_epoch)))
            batch_start = time.time()
        i += 1

    # word_encoder.eval()
    # sent_encoder.eval()
    model.eval()

    print('-' * 89)
    val_loss, val_acc, precision, recall, f1, conf_matrix = check_loss_and_accuracy(
        val_grouped)
    print(
        '| val set result | valid loss (pure) {:5.4f} | Acc {:8.4f} | Precision {:8.4f} | Recall {:8.4f} '
        '| F1-score {:8.4f}'.format(val_loss, val_acc, precision, recall, f1))
    print('The confusion matrix is: ')
    print(str(conf_matrix))
    print('-' * 89)

    test_loss, test_acc, precision, recall, f1, conf_matrix = check_loss_and_accuracy(
        test_grouped)
    print(
        '| test set result | valid loss (pure) {:5.4f} | Acc {:8.4f} | Precision {:8.4f} | Recall {:8.4f} '
        '| F1-score {:8.4f}'.format(test_loss, test_acc, precision, recall,
                                    f1))
    print('The confusion matrix is: ')
    print(str(conf_matrix))
    print('-' * 89)

    directory = "./experiments/%s/models/" % config.exp_num

    if not os.path.exists(directory):
        os.makedirs(directory)

    if not best_val_loss or val_loss < best_val_loss:
        best_val_loss = val_loss
    else:  # if loss doesn't go down, divide the learning rate by 5.
        for param_group in optimizer.param_groups:
            param_group['lr'] = param_group['lr'] * 0.2
    if not best_acc or val_acc > best_acc:
        with open(
                directory + 'para_{}.best_acc.pt'.format(config.para_pooling),
                'wb') as f:
            torch.save(model, f)
        best_acc = val_acc
    with open(
            directory + 'para_{}.epoch-{:02d}.pt'.format(
                config.para_pooling, epoch_number), 'wb') as f:
        torch.save(model, f)

    with open("./experiments/{}/optimizer.pt".format(config.exp_num),
              'wb') as f:
        torch.save(optimizer.state_dict(), f)
示例#26
0
def train(config):

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)  # fixme
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Save the instantiated dataset.
    with open('model_ckpt/train.dataset', 'wb') as dataset_file:
        pickle.dump(dataset, dataset_file)

    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size, config.lstm_num_hidden,
                                config.lstm_num_layers, device, config.dropout_keep_prob)  # fixme

    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss()  # reduction='mean'(default) - average over all timesteps and all batches as they are merged.
    optimizer = optim.RMSprop(model.parameters(), config.learning_rate)  # fixme
    # optimizer = optim.Adam(model.parameters(), config.learning_rate)

    # Create a tensor to hold the one-hot encoding for the batch inputs.
    onehot_batch_inputs = torch.FloatTensor(config.seq_length, config.batch_size, dataset.vocab_size)
    onehot_batch_inputs = onehot_batch_inputs.to(device)

    h_init = torch.zeros(config.lstm_num_layers, config.batch_size, config.lstm_num_hidden, device=device)
    c_init = torch.zeros(config.lstm_num_layers, config.batch_size, config.lstm_num_hidden, device=device)

    # Record the learning rate steps individually for learning rate decay.
    lr_step = 0
    lr = 1
    for epoch in np.arange(config.epochs):
        losses = []
        accs = []
        for step, (batch_inputs, batch_targets) in enumerate(data_loader):

            # Only for time measurement of step through network
            t1 = time.time()

            #######################################################
            # Add more code here ...
            #######################################################
            model.train()
            # Convert the DataLoader output from list of tensors to tensors.
            batch_inputs = torch.stack(batch_inputs)
            batch_inputs = batch_inputs.to(device)

            # If the epoch is finished and there is not enough character to extract, break the loop
            if batch_inputs.shape[0] * batch_inputs.shape[1] != onehot_batch_inputs.shape[0] * onehot_batch_inputs.shape[1]:
                break

            # Zero the one-hot encoding and encode according to batch_inputs.
            onehot_batch_inputs.zero_()
            onehot_batch_inputs.scatter_(2, batch_inputs.unsqueeze_(-1), 1)

            # Convert the DataLoader output from list of tensors to tensors.
            batch_targets = torch.stack(batch_targets)
            batch_targets = batch_targets.to(device)

            # Learning rate decay.
            if lr_step % config.learning_rate_step == 0:
                optimizer = optim.RMSprop(model.parameters(), config.learning_rate * lr)
                lr *= config.learning_rate_decay

            optimizer.zero_grad()
            logits, _, _ = model(onehot_batch_inputs, h_init, c_init)
            # The seq_length dimension and batch_size dimension of the logits and batch_targets are merged together, and the mean is computed over this new dimension.
            loss = criterion(logits.view(-1, dataset.vocab_size), batch_targets.view(-1))   # fixme
            loss.backward()

            torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm)

            accuracy = accuracy_fn(logits.view(-1, dataset.vocab_size), batch_targets.view(-1))  # fixme
            optimizer.step()

            losses.append(loss.item())
            accs.append(accuracy)

            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size/float(t2-t1)

            if step % config.print_every == 0:
                print("[{}] Epoch {}/{}, Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                      "Accuracy = {:.2f}, Loss = {:.3f}".format(
                          datetime.now().strftime("%Y-%m-%d %H:%M"), epoch + 1, config.epochs, step,
                          config.train_steps, config.batch_size, examples_per_second,
                          accuracy, loss
                      ))

            if step % config.sample_every == 0:
                # Generate some sentences by sampling from the model
                model.eval()
                # Create tensor to hold the generated samples.
                samples = torch.zeros((5, config.sample_length), dtype=torch.int, device=device)
                # Initialize the first characters for the samples.
                start_chars = torch.randint(dataset.vocab_size, size=(1, 5, 1), dtype=torch.long, device=device)
                samples[:, 0] = start_chars.squeeze()
                # Create a tensor to hold the one-hot encoding for the output characters of the LSTM network (one per each time step).
                onehot_chars = torch.zeros((1, 5, dataset.vocab_size), device=device)
                onehot_chars.scatter_(2, start_chars, 1)

                last_h = torch.zeros(config.lstm_num_layers, 5, config.lstm_num_hidden, device=device)
                last_c = torch.zeros(config.lstm_num_layers, 5, config.lstm_num_hidden, device=device)
                for t in np.arange(config.sample_length - 1):
                    logits, last_h, last_c = model(onehot_chars, last_h, last_c)
                    next_chars = logits.squeeze().argmax(-1)
                    onehot_chars.zero_()
                    onehot_chars.scatter_(2, next_chars.view(1, 5, 1), 1)
                    samples[:, t + 1] = next_chars

                samples = samples.tolist()
                samples = [dataset.convert_to_string(sample) for sample in samples]
                # Output the samples into a text file.
                with open(config.summary_path + 'samples.txt', 'a') as txt_file:
                    txt_file.write('Epoch: {}\nStep: {}\n'.format(epoch + 1, step))
                    txt_file.writelines(map(lambda x: x + '\n', samples))

            if step == config.train_steps:
                # If you receive a PyTorch data-loader error, check this bug report:
                # https://github.com/pytorch/pytorch/pull/9655
                break

            lr_step += 1
        # After each training epoch, save the model and the training loss and accuracy.
        model.train()
        torch.save(model.state_dict(), 'model_ckpt/lstm_gen_epoch{}.ckpt'.format(epoch + 1))
        with open(config.summary_path + 'train_epoch{}.csv'.format(epoch + 1), 'w', newline='') as csv_file:
            csv_writer = csv.writer(csv_file)
            csv_writer.writerow(losses)
            csv_writer.writerow(accs)

    print('Done training.')
示例#27
0
    start = time.time()
    if args.model == 'lstmcnn':
        vocab_list = list(
            """abcdefghijklmnopqrstuvwxyzABSCEFGHIJKLMNOPQRSTUVWXYZ0123456789,;.!?:'\"/\\|_@#$%^&*~`+-=<>()[]{} """
        )
    else:
        vocab_list = list(
            """abcdefghijklmnopqrstuvwxyz0123456789,;.!?:'\"/\\|_@#$%^&*~`+-=<>()[]{} """
        )

    print('==> download dataset ' + args.dataset)
    download_dataset(args.data_path)

    print('==> make dataset')
    train_dataset = TextDataset(args.data_path,
                                args.seq_length,
                                vocab_list,
                                is_train=True)
    test_dataset = TextDataset(args.data_path,
                               args.seq_length,
                               vocab_list,
                               is_train=False)
    train_loader = data_utils.DataLoader(train_dataset,
                                         batch_size=args.batch_size,
                                         shuffle=True,
                                         num_workers=args.num_workers)
    test_loader = data_utils.DataLoader(test_dataset,
                                        batch_size=args.batch_size,
                                        shuffle=True,
                                        num_workers=args.num_workers)

    print('==> make model')
示例#28
0
def train(config):
    def compute_accuracy(outputs, targets):
        """
        Compute the accuracy of the predicitions.
        """
        outputs = torch.argmax(outputs, -1)

        return (outputs == targets).float().mean()

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)  # fixme
    data_loader = DataLoader(dataset, config.batch_size, num_workers=4)

    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size, config.seq_length,
                                dataset.vocab_size, config.lstm_num_hidden,
                                config.lstm_num_layers, device,
                                config.dropout_keep_prob).to(device)

    learning_rate = config.learning_rate

    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss()  # fixme
    optimizer = optim.Adam(model.parameters(), learning_rate)  # fixme

    x_onehot = torch.FloatTensor(config.seq_length, config.batch_size,
                                 dataset.vocab_size).to(device)
    y_onehot = torch.FloatTensor(config.seq_length, config.batch_size,
                                 dataset.vocab_size).to(device)

    # HACK: config.train_steps seems to be of type 'float' instead of 'int'.
    config.train_steps = int(config.train_steps)

    step = 0

    loss_list = []
    accuracy_list = []

    while step < config.train_steps:
        for batch_inputs, batch_targets in data_loader:

            # Only for time measurement of step through network
            t1 = time.time()

            #######################################################
            # Add more code here ...
            #######################################################
            optimizer.zero_grad()

            batch_inputs = torch.stack(batch_inputs).to(device)
            batch_targets = torch.stack(batch_targets).to(device)
            # print(dataset.convert_to_string(batch_inputs.t()[0].cpu().numpy()))

            try:
                x_onehot.zero_()
                x_onehot.scatter_(2, batch_inputs.unsqueeze(-1), 1)
            except RuntimeError:
                continue

            y = model(x_onehot)

            loss = criterion(y.view(-1, dataset.vocab_size),
                             batch_targets.view(-1))

            loss.backward()
            optimizer.step()

            loss = loss.item()  # fixme
            accuracy = compute_accuracy(y, batch_targets)  # fixme

            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size / float(t2 - t1)

            loss_list.append(loss)
            accuracy_list.append(accuracy)

            if step % config.learning_rate_step == 0:
                learning_rate = config.learning_rate_decay * learning_rate
                print(learning_rate)
                optimizer = optim.Adam(model.parameters(), learning_rate)

            if step % config.print_every == 0:

                print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, "
                      "Examples/Sec = {:.2f}, Accuracy = {:.2f}, "
                      "Loss = {:.3f}".format(
                          datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                          config.train_steps, config.batch_size,
                          examples_per_second, accuracy, loss))

                # Save an image of loss and accuracy during training.
                plt.figure()
                plt.subplot(121)
                plt.plot(loss_list)
                plt.xlabel("Steps")
                plt.ylabel("Loss")
                plt.subplot(122)
                plt.plot(accuracy_list)
                plt.xlabel("Steps")
                plt.ylabel("Accuracy")
                plt.tight_layout()
                plt.savefig('loss.png')
                plt.close()

            if step % config.sample_every == 0:
                # Generate some sentences by sampling from the model
                inputs = sample_text(dataset, x_onehot)
                output = sample_text(dataset, y)
                sample = sample_text(dataset, model.sample())

                for idx in range(5):
                    print(f"{inputs[idx]} | {output[idx]} | {sample[idx]}")

                # Save some sampled sequences.
                with open('samples.csv', 'a') as file:
                    for line in sample[:5]:
                        file.write(f"{step};'{line}'\n")

                torch.save(
                    {
                        'step': step + 1,
                        'state_dict': model.state_dict(),
                        'optimizer': optimizer.state_dict()
                    },
                    os.path.join(config.summary_path, f"model_{step}.pth.tar"))

            if step == config.train_steps:
                # If you receive a PyTorch data-loader error, check this
                # bug report: https://github.com/pytorch/pytorch/pull/9655
                break
            else:
                step += 1

    print('Done training.')
def train(config):

    # Initialize the device which to run the model on
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda:0" if use_cuda else "cpu")

    #path to save the model
    path = "results/"

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)
    # print("Data file:", dataset._data[0:5])
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size, config.seq_length, dataset,
                                config.lstm_num_hidden, config.lstm_num_layers,
                                device)
    # model = torch.load("results/book_EN_grimms_fairy_tails_final_model.pt")

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(model.parameters(),
                                    lr=config.learning_rate)

    # Store Accuracy and losses:
    results = {'accuracy': [], 'loss': []}

    # Training:
    total_steps = 0
    while total_steps <= config.train_steps:

        for step, (batch_inputs, batch_targets) in enumerate(data_loader):

            # Only for time measurement of step through network
            t1 = time.time()
            optimizer.zero_grad()

            # Stacking and One-hot encoding:
            batch_inputs = torch.stack(batch_inputs, dim=1).to(device)
            batch_targets = torch.stack(batch_targets, dim=1).to(device)
            # print("Inputs and targets:", x_onehot.size(), batch_targets.size())

            # forward inputs to the model:
            pred_targets, _ = model.forward(
                index_to_onehot(batch_inputs, dataset.vocab_size))
            # print("pred_targets trans shape:", pred_targets.transpose(2,1).size())
            loss = criterion(pred_targets.transpose(2, 1), batch_targets)

            #Backward pass
            loss.backward(retain_graph=True)
            optimizer.step()

            #Accuracy
            # argmax along the vocab dimension
            accuracy = (pred_targets.argmax(
                dim=2) == batch_targets).float().mean().item()

            #Update the accuracy and losses for visualization:
            results['accuracy'].append(accuracy)
            results['loss'].append(loss.item())

            # Just for time measurement
            t2 = time.time()
            # examples_per_second = config.batch_size/float(t2-t1)
            total_steps += 1

            if step % config.print_every == 0:

                # print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                #       "Accuracy = {:.2f}, Loss = {:.3f}".format(
                #         datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                #         config.train_steps, config.batch_size, examples_per_second,
                #         accuracy, loss
                # ))
                print("[{}] Train Step {:07d}/{:07d}, Batch Size = {}, "
                      "Accuracy = {:.2f}, Loss = {:.3f}".format(
                          datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                          total_steps, config.batch_size,
                          results['accuracy'][-1], results['loss'][-1]))

            if step % config.sample_every == 0:
                # Generate some sentences by sampling from the model
                print('GENERATED NO TEMP:')
                print(model.generate_sentence(100))
                print('__________________')
                print('GENERATED 0.5 TEMP:')
                print(model.generate_sentence(100, 0.5))
                print('__________________')
                print('GENERATED 1 TEMP:')
                print(model.generate_sentence(100, 1))
                print('__________________')
                print('GENERATED 2 TEMP:')
                print(model.generate_sentence(100, 2))
                # save model for individual timesteps
                torch.save(
                    model, path + config.txt_file.split('/')[1].split('.')[0] +
                    str(step) + "_model.pt")

            if step == config.train_steps:
                # If you receive a PyTorch data-loader error, check this bug report:
                # https://github.com/pytorch/pytorch/pull/9655
                break

        print('Done training.')
        #Save the final model

        torch.save(
            model, path + config.txt_file.split('/')[1].split('.')[0] +
            "_final_model.pt")
        print("saving results in folder...")
        np.save(path + "loss_train", results['loss'])
        np.save(path + "accuracy_train", results['accuracy'])
示例#30
0
def train(config, lr):

    # Initialize the device which to run the model on
    #device = torch.device(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)  # fixme
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Initialize the model that we are going to use
    model = TextGenerationModel(
        batch_size=config.batch_size,
        seq_length=config.seq_length,
        vocabulary_size=dataset.vocab_size,
        lstm_num_hidden=config.lstm_num_hidden,
        lstm_num_layers=config.lstm_num_layers)  # fixme

    if torch.cuda.is_available():
        device = 'cuda'
    else:
        device = 'cpu'
    print('Currently using: ', device)

    model = model.to(device)
    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()  # fixme
    #optimizer = torch.optim.Adam(model.parameters(), lr = config.learning_rate, amsgrad = True)  # fixme
    #optimizer = torch.optim.Adam(model.parameters(), lr = lr, amsgrad = True)
    acc_list = []
    loss_list = []

    test_batches_in = []
    test_batches_ta = []

    test_acc = []

    best_accuracy = 0

    ### Flag for temperature
    temp = True
    temp_value = 2

    for runs in range(3):
        optimizer = torch.optim.RMSprop(model.parameters(), lr=lr)

        for step, (batch_inputs, batch_targets) in enumerate(data_loader):

            if step % config.print_every != 0 or step == 0:

                t1 = time.time()
                #print(type(step))

                #model.train()

                #######################################################
                torch.nn.utils.clip_grad_norm(model.parameters(),
                                              max_norm=config.max_norm)

                zerox = create_zerox(batch_inputs, dataset.vocab_size, device)

                output, _ = model.forward(zerox)  #.to(device)

                targets = torch.stack(batch_targets).to(device)

                output_indices = torch.argmax(output, dim=2).to(device)

                output = output.transpose(0, 1).transpose(1, 2).to(device)

                #print(output.shape, targets.shape)
                #return 'a'

                #print(output.transpose(0,2).shape, targets.t().shape)
                #return 'a'
                loss_for_backward = criterion(output.transpose(0, 2),
                                              targets.t()).to(device)

                optimizer.zero_grad()
                loss_for_backward.backward()
                optimizer.step()

                correct_indices = output_indices == targets.transpose(
                    0, 1).to(device)

                #return correct_indices
                #######################################################

                #loss = criterion.forward(output, targets)

                #accuracy = int(sum(sum(correct_indices)))/int(correct_indices.shape[0]*
                #correct_indices.shape[1])
                #print(type(accuracy),type(loss))
                # Just for time measurement
                t2 = time.time()
                examples_per_second = config.batch_size / float(t2 - t1)

            if step % config.print_every == 0 and step != 0:
                #model.eval()

                zerox = create_zerox(batch_inputs, dataset.vocab_size, device)

                output, _ = model.forward(zerox)

                output_indices = torch.argmax(output, dim=2).to(device)

                output = output.transpose(0, 1).transpose(1, 2).to(device)
                targets = torch.stack(batch_targets).to(device)

                #loss_for_backward = criterion(output,targets).to(device)
                loss_for_backward = criterion(output.transpose(0, 2),
                                              targets.t()).to(device)
                correct_indices = output_indices == targets.transpose(
                    0, 1)  #.to(device)
                #return output_indices, targets.transpose(0,1)

                #print(correct_indices.shape)
                #accuracy = sum(acc_list) / len(acc_list)
                #accuracy = int(sum(sum(correct_indices)))/int(correct_indices.numel())
                accuracy = np.array(correct_indices.detach().cpu()).mean()

                #print("[{}] Train Step {:04d}/{:f}, Batch Size = {}, Examples/Sec = {:.2f}, "
                #      "Accuracy = {:.2f}, Loss = {:.3f}".format(
                #        datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                #        config.train_steps, config.batch_size, examples_per_second,
                #        accuracy,
                #        loss_for_backward
                #))
                acc_list.append(accuracy)
                loss_list.append(float(loss_for_backward))

                if accuracy > best_accuracy:
                    torch.save(
                        {
                            'model_state_dict': model.state_dict(),
                            'optimizer_state_dict': optimizer.state_dict()
                        }, 'model.pth')

            if step % config.sample_every == 0:
                # Generate some sentences by sampling from the model
                ## Generate a good sample instead of the same one over and over again
                #model.eval()

                ### Append every modulo batch to a list of test batches and run
                ### over that list to test

                zerox = create_zerox(batch_inputs, dataset.vocab_size, device)

                test_batches_in.append(zerox)

                targets = torch.stack(batch_targets).to(device)

                test_batches_ta.append(targets)

                batch_inputz = torch.stack(batch_inputs).to(device)
                batch_input = batch_inputz.transpose(1, 0).to(device)

                output, _ = model.forward(zerox)  #.to(device)
                output_indices = torch.argmax(output, dim=2).to(device)
                output = output.transpose(0, 1).transpose(1, 2).to(device)

                loss_for_backward = criterion(output, targets).to(device)
                correct_indices = output_indices == targets.transpose(
                    0, 1).to(device)

                best_sample = np.argmax(
                    np.asarray(sum(correct_indices.t().detach().cpu())))
                print(
                    'Real: ',
                    dataset.convert_to_string(
                        np.asarray(batch_input[best_sample].cpu())))
                output, _ = model.forward(zerox)  #.to(device)
                output_indices = torch.argmax(output, dim=2).to(device)
                print(
                    'prediction: ',
                    dataset.convert_to_string(
                        np.asarray(output_indices[best_sample].cpu())))

                bc = int(sum(correct_indices.t().detach().cpu())
                         [best_sample]) / config.seq_length
                print('This sample had:', bc, 'characters right')

                output = np.random.randint(dataset.vocab_size)
                letters = [output]

                greedy_output = np.random.randint(dataset.vocab_size)
                greedy_letters = [greedy_output]

                Temperature_time(runs, step, dataset, device, model)
                for i in range(config.seq_length - 1):

                    #if temp:
                    # =============================================================================
                    #
                    #                         soft = torch.nn.Softmax(dim=2)
                    #
                    #
                    #
                    #
                    #                         zerol = torch.zeros([1,1,dataset.vocab_size])
                    #                         one_hot_letter = torch.tensor(output).unsqueeze(-1).unsqueeze(-1).unsqueeze(-1)
                    #                         zerol.scatter_(2,one_hot_letter,1)
                    #                         zerol = zerol.to(device)
                    #                         if i == 0:
                    #                             output, h = model.forward(zerol)
                    #
                    #                         else:
                    #                             output, h = model.forward(zerol, h)
                    #
                    #                         tempered = soft(output/temp_value)
                    #                         #print(tempered)
                    #                         output = int(torch.multinomial(tempered[0][0],1).detach().cpu())
                    #                         #print(output)
                    #                         letters.append(output)
                    # =============================================================================

                    greedy_zerol = torch.zeros([1, 1, dataset.vocab_size])
                    greedy_one_hot_letter = torch.tensor(
                        greedy_output).unsqueeze(-1).unsqueeze(-1).unsqueeze(
                            -1)
                    greedy_zerol.scatter_(2, greedy_one_hot_letter, 1)
                    greedy_zerol = greedy_zerol.to(device)

                    if i == 0:
                        greedy_output, greedy_h = model.forward(greedy_zerol)
                    else:
                        greedy_output, greedy_h = model.forward(
                            greedy_zerol, greedy_h)

                    greedy_output = int(
                        torch.argmax(greedy_output, dim=2).detach().cpu())
                    greedy_letters.append(greedy_output)

                print('Greedy Generation ',
                      dataset.convert_to_string(greedy_letters))
                abs_step = (runs * 10000) + step
                line = ' '.join(('Step:', str(abs_step),
                                 dataset.convert_to_string(letters)))

                with open('GreedyGeneration.txt', 'a') as file:
                    file.write(line + '\n')

    # =============================================================================
    #         if step % (config.sample_every*1000) ==0:
    #             avg = []
    #             print('Testing over ', len(test_batches_in), 'batches')
    #             for z in range(len(test_batches_in)):
    #                 ##OUTPUT
    #                 output,_ = model.forward(test_batches_in[z])
    #                 output_indices = torch.argmax(output, dim=2).to(device)
    #                 output =  output.transpose(0,1).transpose(1,2).to(device)
    #
    #                 ##LOSS AND ACCURACY
    #                 loss_for_backward = criterion(output,targets).to(device)
    #                 correct_indices = output_indices == test_batches_ta[z].transpose(0,1).to(device)
    #
    #                 accuracy = int(sum(sum(correct_indices)))/int(correct_indices.shape[0]*
    #                               correct_indices.shape[1])
    #
    #                 avg.append(accuracy)
    #
    #             this_test_acc = sum(avg)/len(avg)
    #             print('The test accuracy over ',len(test_batches_in), 'is: ', this_test_acc)
    #             test_acc.append(this_test_acc)
    #             #if bc > 0.8:
    #             #    print(bc)
    #             #    #return correct_indices
    #
    # =============================================================================
            if step == config.train_steps:
                # If you receive a PyTorch data-loader error, check this bug report:
                # https://github.com/pytorch/pytorch/pull/9655
                break
        print('Done training.')
        line = ' '.join(
            ('Test accuracy:', str(test_acc.append), 'Learning rate:', str(lr),
             'Accuracy:', str(acc_list), 'Loss:', str(loss_list)))
        with open('textresults.txt', 'a') as file:
            file.write(line + '\n')

        #hiddenstates = [None]*30
        output = np.random.randint(dataset.vocab_size)
        letters = [output]
        for i in range(400):
            zerol = torch.zeros([1, 1, dataset.vocab_size])
            one_hot_letter = torch.tensor(output).unsqueeze(-1).unsqueeze(
                -1).unsqueeze(-1)
            zerol.scatter_(2, one_hot_letter, 1)
            zerol = zerol.to(device)
            if i == 0:
                output, h = model.forward(zerol)

                output = int(torch.argmax(output, dim=2).detach().cpu())

                letters.append(output)
                #hiddenstates[i] = h
            else:
                output, h = model.forward(zerol, h)

                output = int(torch.argmax(output, dim=2).detach().cpu())

                letters.append(output)
                #hiddenstates[i % 30] = h
        print('Final generation: ', dataset.convert_to_string(letters))
    line = ' '.join(('Accuracy:', str(acc_list), 'Loss', str(loss_list)))
    with open('PrideAndPrejudice2.txt', 'a') as file:
        file.write(line + '\n')
示例#31
0
    model_config = yaml.load(open(args.model_config, "r"),
                             Loader=yaml.FullLoader)
    train_config = yaml.load(open(args.train_config, "r"),
                             Loader=yaml.FullLoader)
    configs = (preprocess_config, model_config, train_config)

    # Get model
    model = get_model(args, configs, device, train=False)

    # Load vocoder
    vocoder = get_vocoder(model_config, device)

    # Preprocess texts
    if args.mode == "batch":
        # Get dataset
        dataset = TextDataset(args.source, preprocess_config)
        batchs = DataLoader(
            dataset,
            batch_size=8,
            collate_fn=dataset.collate_fn,
        )
    if args.mode == "single":
        ids = raw_texts = [args.text[:100]]
        speakers = np.array([args.speaker_id])
        if preprocess_config["preprocessing"]["text"]["language"] == "en":
            texts = np.array(
                [preprocess_english(args.text, preprocess_config)])
        text_lens = np.array([len(texts[0])])
        batchs = [(ids, raw_texts, speakers, texts, text_lens, max(text_lens))]

    synthesize(model, args.restore_step, configs, vocoder, batchs)