Exemplo n.º 1
0
def train(train: Examples, model: RNN, optimizer, criterion):
    epoch_loss = 0
    epoch_acc = 0
    count = 0
    model.train()
    for x, y, z in batch(train.shuffled(), config.batch_size):
        x, y, z = get_long_tensor(x), get_long_tensor(
            y).float(), get_long_tensor(z)

        optimizer.zero_grad()
        if config.setting == 'RNN':
            predictions = model(x).squeeze(1)
        else:
            predictions = model(x, z).squeeze(1)

        loss = criterion(predictions, y)

        acc = binary_accuracy(predictions, y)

        loss.backward()

        optimizer.step()

        epoch_loss += loss.item()
        epoch_acc += acc.item()
        count += 1
    return epoch_loss / count, epoch_acc / count
Exemplo n.º 2
0
def main(_):
    # load data
    data, ix2word, word2ix = load_data()
    num_train = data.shape[0]
    vocab_size = len(ix2word)
    # variables for training
    X=tf.placeholder(tf.int32, [BATCH_SIZE, None])
    y=tf.placeholder(tf.int32, [BATCH_SIZE, None])
    rnn_model = RNN(model=model, batch_size=BATCH_SIZE, vocab_size=vocab_size, embedding_dim=embedding_dim, n_neurons=n_neurons, n_layers=3, lr=lr, keep_prob=keep_prob)
    loss, optimizer = rnn_model.train(X, y)

    # start trian
    start_time = time.time()
    with tf.Session() as sess:
        # Visualize graph
        # write loss into logs
        merged = tf.summary.merge_all()
        writer = tf.summary.FileWriter('./logs/', sess.graph)
        tf.global_variables_initializer().run()
        print("="*15+"strat training"+"="*15)
        for epc in range(NUM_EPOCH):
            print("="*15, "epoch: %d" % epc, "="*15)
            for step in range(num_train//BATCH_SIZE):
                # get batch data
                idx_strat = step*BATCH_SIZE
                idx_end = idx_strat+BATCH_SIZE
                batch_data = data[idx_strat:idx_end, ...]
                x_data = batch_data[:, :-1]
                y_data = batch_data[:, 1:]

                feed_dict={X:x_data,y:y_data}
                sess.run(optimizer, feed_dict=feed_dict)
                
                # print evaluation results for every 100 steps
                if step%eval_frequence==0:
                    l = sess.run(loss,feed_dict=feed_dict)
                    result = sess.run(merged,feed_dict=feed_dict)
                    writer.add_summary(result, (epc*num_train//BATCH_SIZE)+step)

                    input_seq = "湖光秋月两相和"
                    result = generate_poem(rnn_model=rnn_model, sess=sess, input_seqs=input_seq, ix2word=ix2word,word2ix=word2ix, max_len=125, prefix_words=None)
                    result_poem = ''.join(result)
                    
                    run_time = time.time() - start_time
                    start_time = time.time()
                    print("step: %d, run time: %.1f ms" % (step, run_time*1000/eval_frequence))
                    print("minibatch loss: %d" % l)
                    print("generated poem length: %d, poem is: %s" % (len(result_poem), result_poem))
                    sys.stdout.flush()
        # save model
        if SAVE:
            saver = tf.train.Saver()
            saver.save(sess, CKPT_PATH+'rnn_model.ckpt')
Exemplo n.º 3
0
    labls_acc  = np.mean(np.logical_and(gold_arcs==pred_arcs, gold_labels==pred_labels))

    return arcs_acc,labls_acc


highestScore = 0
tsid = 0
name_model = 'parser_model2.pt'
path_save_model = os.path.join('gen', name_model)
for epoch in range(1, args.epochs+1):

    for i, (word_tensor, ext_word_ids,char_ids,pos_tensor,xpos_tensor,head_targets,rel_targets,seq_lengths,perm_idx) in enumerate(train_loader):

        start = time.time()
        # switch to train mode
        model.train()
        ts = (((epoch -1) * train_loader.n_batches) + (i+1))
        if (ts%5000 == 0):
            adjust_learning_rate(args.lr, optimizer,optimizer_sparse)

        if args.cuda:
            word_tensor = word_tensor.cuda()
            pos_tensor = pos_tensor.cuda()
            xpos_tensor = xpos_tensor.cuda()
            head_targets = head_targets.cuda()
            rel_targets = rel_targets.cuda()

        # compute output
        arc_logits,label_logits = model(word_tensor,ext_word_ids,char_ids,pos_tensor,xpos_tensor,seq_lengths)
        arc_logits = arc_logits[:,1:,:]
        label_logits = label_logits[:,1:,:,:]
Exemplo n.º 4
0
                opt_fname = os.path.join(expt_dir, f"opt_epoch_{ep}")
                torch.save(model.state_dict(), model_fname)
                torch.save(optimizer.state_dict(), opt_fname)

        model_fname = os.path.join(expt_dir, f"model_final_{ep}")
        opt_fname = os.path.join(expt_dir, f"opt_final_{ep}")
        torch.save(model.state_dict(), model_fname)
        torch.save(optimizer.state_dict(), opt_fname)
    finally:
        log()


if __name__ == "__main__":

    fname = "_bios.json"
    bc = ByteCode("byte_values.txt")
    ds = ByteDataset(fname, bc, device=torch.device('cpu'))
    print(f"Loaded {len(ds)} samples")
    dl = ByteDataLoader(ds, batch_size=1)
    rnn = RNN(bc.num_codes)
    rnn.train()
    epochs = 1
    lr = 1e-3
    losses = []
    lossfn = nn.CrossEntropyLoss(reduction='none')

    optimizer = Adam(rnn.parameters(), lr=lr)

    train(dl, rnn, optimizer, dict(epochs=epochs,
                                   expt_dir="tst",
                                   sample_step=1), torch.device('cpu'), bc)
Exemplo n.º 5
0
def main():
    logging.basicConfig(filename='logs/train.log', level=logging.DEBUG)

    # saved model path
    save_path = 'history/trained_model'

    # input file
    #filename = 'data/train_and_test.csv'
    filename = 'data/golden_400.csv'

    embedding_size = 300  # 128 for torch embeddings, 300 for pre-trained
    hidden_size = 24
    batch_size = 64
    nb_epochs = 200
    lr = 1e-4
    max_norm = 5
    folds = 3

    # Dataset
    ds = ClaimsDataset(filename)
    vocab_size = ds.vocab.__len__()
    pad_id = ds.vocab.token2id.get('<pad>')

    test_len = val_len = math.ceil(ds.__len__() * .10)
    train_len = ds.__len__() - (val_len + test_len)
    print("\nTrain size: {}\tValidate size: {}\tTest Size: {}".format(
        train_len, val_len, test_len))

    # randomly split dataset into tr, te, & val sizes
    d_tr, d_val, d_te = torch.utils.data.dataset.random_split(
        ds, [train_len, val_len, test_len])

    # data loaders
    dl_tr = torch.utils.data.DataLoader(d_tr, batch_size=batch_size)
    dl_val = torch.utils.data.DataLoader(d_val, batch_size=batch_size)
    dl_test = torch.utils.data.DataLoader(d_te, batch_size=batch_size)

    model = RNN(vocab_size, embedding_size, hidden_size, pad_id, ds)
    model = utils.cuda(model)
    model.zero_grad()

    parameters = list([
        parameter for parameter in model.parameters()
        if parameter.requires_grad
    ])
    #parameters = list(model.parameters())   # comment out when using pre-trained embeddings

    optim = torch.optim.Adam(parameters,
                             lr=lr,
                             weight_decay=35e-3,
                             amsgrad=True)  # optimizer
    criterion = nn.NLLLoss(weight=torch.Tensor([1.0, 2.2]).cuda())
    losses = defaultdict(list)

    print("\nTraining started: {}\n".format(utils.get_time()))

    phases, loaders = ['train', 'val'], [dl_tr, dl_val]
    tr_acc, v_acc = [], []

    for epoch in range(nb_epochs):
        for phase, loader in zip(phases, loaders):
            if phase == 'train':
                model.train()
            else:
                model.eval()

            ep_loss, out_list, label_list = [], [], []
            for i, inputs in enumerate(loader):
                optim.zero_grad()

                claim, labels = inputs
                labels = utils.variable(labels)

                out = model(claim)

                out_list.append(utils.normalize_out(
                    out))  # collect output from every epoch
                label_list.append(labels)

                out = torch.log(out)

                # criterion.weight = get_weights(labels)
                loss = criterion(out, labels)

                # back propagate, for training only
                if phase == 'train':
                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(
                        parameters,
                        max_norm=max_norm)  # exploding gradients? say no more!
                    optim.step()

                ep_loss.append(loss.item())

            losses[phase].append(
                np.mean(ep_loss)
            )  # record average losses from every phase at each epoch

            acc = utils.get_accuracy(label_list, out_list)
            if phase == 'train':
                tr_acc.append(acc)
            else:
                v_acc.append(acc)

            print("Epoch: {} \t Phase: {} \t Loss: {:.4f} \t Accuracy: {:.3f}".
                  format(epoch, phase, loss, acc))

    print("\nTime finished: {}\n".format(utils.get_time()))

    utils.plot_loss(losses['train'], losses['val'], tr_acc, v_acc, filename,
                    -1)

    logging.info("\nTrain file=> " + filename +
                 "\nParameters=> \nBatch size: " + str(batch_size) +
                 "\nHidden size: " + str(hidden_size) + "\nMax_norm: " +
                 str(max_norm) + "\nL2 Reg/weight decay: " +
                 str(optim.param_groups[0]['weight_decay']) +
                 "\nLoss function: \n" + str(criterion))
    logging.info('Final train accuracy: ' + str(tr_acc[-1]))
    logging.info('Final validation accuracy: ' + str(v_acc[-1]))

    # Save the model
    torch.save(model.state_dict(), save_path)

    #test(model, batch_size)

    # predict
    f1_test, acc_test = [], []
    for i, inputs in enumerate(dl_test):
        claim, label = inputs
        label = utils.variable(label.float())

        out = model(claim)
        y_pred = utils.normalize_out(out)

        #print("\n\t\tF1 score: {}\n\n".format(get_f1(label, y_pred)))   # f1 score
        f1_test.append(utils.get_f1(label, y_pred))
        acc_test.append(metrics.accuracy_score(label, y_pred))

    print("\t\tF1: {:.3f}\tAccuracy: {:.3f}".format(np.mean(f1_test),
                                                    np.mean(acc_test)))
    logging.info('\nTest f1: ' + str(np.mean(f1_test)) + '\nTest Accuracy: ' +
                 str(np.mean(acc_test)))
Exemplo n.º 6
0
def main():

    # saved model path
    save_path = 'history/model_fold_'

    test_file = 'data/test120.csv'
    # create dataset
    #filename = 'data/golden_400.csv'
    #filename = 'data/golden_train_and_val.csv'
    filename = 'data/train_val120.csv'
    ds = ClaimsDataset(filename)
    vocab_size = ds.vocab.__len__()
    pad_id = ds.vocab.token2id.get('<pad>')

    embedding_size = 128  # 128 for torch embeddings, 300 for pre-trained
    hidden_size = 24
    batch_size = 64
    nb_epochs = 150
    lr = 1e-4
    max_norm = 5
    folds = 10
    criterion = nn.NLLLoss(weight=torch.Tensor([1.0, 2.2]).cuda())

    # For testing phase
    fold_scores = {}
    test_set = ClaimsDataset(test_file)
    dl_test = torch_data.DataLoader(test_set,
                                    batch_size=batch_size,
                                    shuffle=True)
    mean = []  # holds the mean validation accuracy of every fold
    print("\nTraining\n")
    logger.info(utils.get_time())

    for i in range(folds):
        print("\nFold: {}\n".format(i))

        losses = defaultdict(list)
        train, val = utils.split_dataset(ds, i)

        print("Train size: {} \t Validate size: {}".format(
            len(train), len(val)))

        dl_train = torch_data.DataLoader(train,
                                         batch_size=batch_size,
                                         shuffle=True)
        dl_val = torch_data.DataLoader(val,
                                       batch_size=batch_size,
                                       shuffle=True)

        model = RNN(vocab_size, embedding_size, hidden_size, pad_id, ds)
        model = utils.cuda(model)
        model.zero_grad()

        # When using pre-trained embeddings, uncomment below otherwise, use the second statement
        #parameters = list([parameter for parameter in model.parameters()
        #                   if parameter.requires_grad])
        parameters = list(model.parameters())

        optim = torch.optim.Adam(parameters,
                                 lr=lr,
                                 weight_decay=35e-3,
                                 amsgrad=True)

        phases, loaders = ['train', 'val'], [dl_train, dl_val]
        tr_acc, v_acc = [], []

        for epoch in range(nb_epochs):
            for p, loader in zip(phases, loaders):
                if p == 'train':
                    model.train()
                else:
                    model.eval()

                ep_loss, out_list, label_list = [], [], []
                for _, inputs in enumerate(loader):
                    optim.zero_grad()

                    claim, labels = inputs
                    labels = utils.variable(labels)

                    out = model(claim)

                    out_list.append(utils.normalize_out(out))
                    label_list.append(labels)

                    out = torch.log(out)
                    loss = criterion(out, labels)

                    if p == 'train':
                        loss.backward()
                        torch.nn.utils.clip_grad_norm_(parameters,
                                                       max_norm=max_norm)
                        optim.step()

                    ep_loss.append(loss.item())
                losses[p].append(np.mean(ep_loss))

                acc = utils.get_accuracy(label_list, out_list)
                if p == 'train':
                    tr_acc.append(acc)
                else:
                    v_acc.append(acc)
                print(
                    "Epoch: {} \t Phase: {} \t Loss: {:.4f} \t Accuracy: {:.3f}"
                    .format(epoch, p, loss, acc))

        utils.plot_loss(losses['train'], losses['val'], tr_acc, v_acc,
                        filename, i)
        mean.append(np.mean(v_acc))
        logger.info("\n Fold: " + str(i))
        logger.info("Train file=> " + filename +
                    "\nParameters=> \nBatch size: " + str(batch_size) +
                    "\nHidden size: " + str(hidden_size) + "\nMax_norm: " +
                    str(max_norm) + "\nL2 Reg/weight decay: " +
                    str(optim.param_groups[0]['weight_decay']) +
                    "\nLoss function: " + str(criterion))
        logger.info('Final train accuracy: ' + str(tr_acc[-1]))
        logger.info('Final validation accuracy: ' + str(v_acc[-1]))

        # Save model for current fold
        torch.save(model.state_dict(), save_path + str(i))

        test_f1, test_acc = [], []
        for _, inp in enumerate(dl_test):
            claim, label = inp
            label = utils.variable(label)

            model.eval()
            out = model(claim)
            y_pred = utils.normalize_out(out)

            test_f1.append(utils.get_f1(label, y_pred))
            test_acc.append(metrics.accuracy_score(label, y_pred))
        t_f1, t_acc = np.mean(test_f1), np.mean(test_acc)
        fold_scores[i] = dict([('F1', t_f1), ('Accuracy', t_acc)])
        print("\tf1: {:.3f} \t accuracy: {:.3f}".format(t_f1, t_acc))
        #logger.info('\nTest f1: '+str(t_f1)+'\nTest Accuracy: '+str(t_acc))

    logger.info('Mean accuracy over 10 folds: \t' + str(np.mean(mean)))
    logger.info(fold_scores)
Exemplo n.º 7
0
def train(cfg, datasets, dataloaders, device, save_model_path):
    model = RNN(cfg.model_type, cfg.input_dim, cfg.hidden_dim, cfg.n_layers,
                cfg.drop_p, cfg.output_dim, cfg.bi_dir)
    model = model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
    criterion = torch.nn.CrossEntropyLoss()
    # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

    best_metric = 0.0
    best_epoch = 0
    best_model_wts = copy.deepcopy(model.state_dict())

    for epoch in range(cfg.num_epochs):
        for phase in ['train', 'valid']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()  # Set model to evaluate mode

            running_loss = 0.0
            # running_corrects = 0
            y_pred = []
            y_true = []

            # Iterate over data.
            for batch in dataloaders[phase]:
                inputs = batch['inputs'].to(device)
                targets = batch['targets'][cfg.task].to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs, hiddens = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, targets)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                # running_corrects += torch.sum(preds == targets.data)
                y_pred.extend(preds.tolist())
                y_true.extend(targets.tolist())

            # if phase == 'train':
            #     scheduler.step()

            # epoch_acc = running_corrects.double() / len(datasets[phase])
            epoch_loss = running_loss / len(datasets[phase])
            f1_ep = f1_score(y_true, y_pred, average='weighted')
            precision_ep = precision_score(y_true, y_pred, average='weighted')
            recall_ep = recall_score(y_true, y_pred, average='weighted')
            accuracy_ep = accuracy_score(y_true, y_pred)

            # print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
            print(
                f'({phase} @ {epoch+1}): L: {epoch_loss:3f}; A: {accuracy_ep:3f}; R: {recall_ep:3f}; '
                + f'P: {precision_ep:3f}; F1: {f1_ep:3f}')

            # deep copy the model
            if phase == 'valid' and f1_ep > best_metric:
                best_metric = f1_ep
                best_epoch = epoch
                best_model_wts = copy.deepcopy(model.state_dict())

    print(f'Best val Metric {best_metric:3f} @ {best_epoch+1}\n')

    # load best model weights and saves it
    model.load_state_dict(best_model_wts)
    torch.save(model.state_dict(), save_model_path)
    print(f'model is saved @ {save_model_path}')
    return best_metric
Exemplo n.º 8
0
# init data
init_data()
# create model
rnn = RNN(len(all_categories), len(all_letters), n_hidden, len(all_letters))
# setup data
train_data = load_data()
train_dataset = NameDataset(train_data)
train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True)
# setup optimizer and criterion
optimizer = optim.Adam(rnn.parameters(), lr=0.0005)
criterion = nn.NLLLoss()
# train
all_loss = []
for epoch in range(epochs):
    rnn = rnn.train()
    current_loss = 0
    for idx, (category, name) in enumerate(train_dataloader):
        category, name = category[0], name[0]
        category, name = Variable(category), Variable(name)
        hidden = rnn.initHidden()
        for i in range(name.size()[0] - 1):
            optimizer.zero_grad()
            output, hidden = rnn(category, name[i], hidden)
            loss = criterion(output, torch.argmax(name[i + 1], dim=1).long())
            loss.backward(retain_graph=True)
            optimizer.step()
            current_loss += loss
        if idx >= plot_every and idx % plot_every == 0:
            all_loss.append(float(current_loss) / plot_every)
            current_loss = 0
Exemplo n.º 9
0
def main(args):
    print(sys.argv)

    if not os.path.exists('models'):
        os.mkdir('models')

    num_epochs = args.ne
    lr_decay = args.decay
    learning_rate = args.lr

    data_loader = get_data_loader(args.gt_path,
                                  args.tensors_path,
                                  args.bs,
                                  args.json_labels_path,
                                  num_workers=8)
    model = RNN()
    if torch.cuda.is_available():
        model.cuda()
    model.train()

    #optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.mm)
    if args.rms:
        optimizer = optim.RMSprop(model.parameters(),
                                  lr=args.lr,
                                  momentum=args.mm)
    else:
        optimizer = optim.Adam(model.parameters(), lr=args.lr)
    model_loss = torch.nn.BCEWithLogitsLoss()

    losses = []
    p = 1
    try:
        for epoch in range(num_epochs):
            if epoch % args.decay_epoch == 0 and epoch > 0:
                learning_rate = learning_rate * lr_decay
                for param_group in optimizer.param_groups:
                    param_group['lr'] = learning_rate

            if epoch < 3:
                p = 1.0
            elif epoch >= 3 and epoch < 6:
                p = 0.5
            elif epoch >= 6 and epoch < 9:
                p = 0.25
            else:
                p = 0.0

            loss_epoch = []
            for step, (feat_maps, gt) in enumerate(data_loader):
                if torch.cuda.is_available():
                    feat_maps = feat_maps.cuda()
                    gt = gt.cuda()
                model.zero_grad()

                out = model(feat_maps, gt, p)
                loss = model_loss(out, gt)
                loss.backward()
                optimizer.step()

                loss_step = loss.cpu().detach().numpy()
                loss_epoch.append(loss_step)

                print('Epoch ' + str(epoch + 1) + '/' + str(num_epochs) +
                      ' - Step ' + str(step + 1) + '/' +
                      str(len(data_loader)) + " - Loss: " + str(loss_step))

            loss_epoch_mean = np.mean(np.array(loss_epoch))
            losses.append(loss_epoch_mean)
            print('Total epoch loss: ' + str(loss_epoch_mean))

            if (epoch + 1) % args.save_epoch == 0 and epoch > 0:
                filename = 'model-epoch-' + str(epoch + 1) + '.pth'
                model_path = os.path.join('models/', filename)
                torch.save(model.state_dict(), model_path)
    except KeyboardInterrupt:
        pass

    filename = 'model-epoch-last.pth'
    model_path = os.path.join('models', filename)
    torch.save(model.state_dict(), model_path)
    plt.plot(losses)
    plt.show()
Exemplo n.º 10
0
def train(args):
    if args.create_dataset:
        df = pd.read_csv("../data/endpoints_calculated_std.csv")
        smiles = df["smiles"].to_list()
        data = df[df.columns[3:]].to_numpy()
        print("Building LegoModel")
        legoModel = LegoGram(smiles = smiles, nworkers=8)
        torch.save(legoModel, "legoModel.pk")
        print("Building sampler")
        sampler = LegoGramRNNSampler(legoModel)
        torch.save(sampler, "sampler.pk")
        print("Constracting dataset")
        dataset = MolecularNotationDataset(smiles,sampler,data)
        torch.save(dataset,'lg.bin')
    else:
        dataset = torch.load('lg.bin')

    train_loader = DataLoader(dataset, batch_size=args.batch_size, collate_fn=collect)
    device = torch.device('cpu')
    if args.cuda:
        device = torch.device('cuda')
    model = RNN(voc_size=dataset.vocsize, device=device)
    model.train()
    model.cuda()
    print(f"Model has been created on device {device}")
    smiles_dataset = dataset.smiles
    optimizer = optim.Adam(model.parameters(), lr=args.lr)
    loss_f = nn.CrossEntropyLoss(reduction='mean', ignore_index=0)
    writer = SummaryWriter(comment = args.name_task)
    losses = []
    out_counter = 0
    cnt = 0
    for epoch in range(args.num_epochs):
        loss_list =[]
        for iteration, (batch, lengths) in enumerate(tqdm(train_loader)):
            batch = batch.cuda()
            logits, endp_model = model(batch, lengths)
            print(logits.shape)
            print(batch.shape)
            loss = loss_f(logits[:, :, :-1], batch[:, 1:])

            loss_list.append(loss.item())
            writer.add_scalar("CrossEntropyLoss", loss_list[-1], iteration+epoch*len(train_loader))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if iteration % args.print_every == 0 and iteration > 0:
                model.eval()
                number_generate = 100

                res = model.sample(number_generate, dataset.model)
                writer.add_text("Molecules after generator", json.dumps([res]))
                valid = len(res) * 100 / number_generate
                print(res)
                print("valid : {} %".format(valid))
                writer.add_scalar("Valid", valid, cnt)
                res = [robust_standardizer(mol) for mol in res]
                res = list(filter(lambda x: x is not None, res))
                unique = len([elem for elem in res if elem not in smiles_dataset])

                writer.add_text("Unique mols", json.dumps([res]))
                print(f"There are unique mols {unique}")
                print(res)
                writer.add_scalar("Unique", unique, cnt)
                cnt += 1
                model.train()
        writer.flush()
        epoch_loss = np.mean(loss_list)
        print(f"Loss on epoch {epoch } is {epoch_loss}")
        if out_counter < args.stop_after and epoch>0:
            if losses[-1] <= epoch_loss:
                out_counter += 1
            else:
                out_counter = 0
                torch.save(model, "experiments/" + args.name_task + "/model.pt")
        if epoch == 0:
            torch.save(model, "experiments/" + args.name_task + "/model.pt")
        losses.append(epoch_loss)
    return losses
Exemplo n.º 11
0
print_every = 50

# training interface
step = 0
tracker = {'NLL': []}
start_time = time.time()
for ep in range(epoch):
    # learning rate decay
    if ep >= 10 and ep % 2 == 0:
        learning_rate = learning_rate * 0.5
        for param_group in optimizer.param_groups:
            param_group['lr'] = learning_rate

    for split in splits:
        dataloader = dataloaders[split]
        model.train() if split == 'train' else model.eval()
        totals = {'NLL': 0., 'words': 0}

        for itr, (_, dec_inputs, targets, lengths) in enumerate(dataloader):
            bsize = dec_inputs.size(0)
            dec_inputs = dec_inputs.to(device)
            targets = targets.to(device)
            lengths = lengths.to(device)

            # forward
            logp = model(dec_inputs, lengths)

            # calculate loss
            NLL_loss = NLL(logp, targets, lengths + 1)
            loss = NLL_loss / bsize
Exemplo n.º 12
0
def main(args):
    if not os.path.exists('models'):
        os.mkdir('models')

    num_epochs = args.ne
    lr_decay = args.decay
    learning_rate = args.lr

    data_loader = get_data_loader(args.gt_path, args.descriptors_path,
                                  args.json_labels_path, args.bs)
    model = RNN(num_descriptors=args.num_descriptors,
                hidden_size=args.hidden_size,
                lstm_in_size=args.input_size)
    if torch.cuda.is_available():
        model.cuda()
    model.train()

    # optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.mm)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)
    # model_loss = torch.nn.BCEWithLogitsLoss()
    model_loss = Loss()

    losses = []
    try:
        for epoch in range(num_epochs):
            if epoch % args.decay_epoch == 0 and epoch > 0:
                learning_rate = learning_rate * lr_decay
                for param_group in optimizer.param_groups:
                    param_group['lr'] = learning_rate

            loss_epoch = []
            for step, (descriptors, labels) in enumerate(data_loader):
                if torch.cuda.is_available():
                    descriptors = descriptors.cuda()
                    labels = labels.cuda()
                model.zero_grad()

                attention = model(descriptors)
                loss = model_loss(attention, labels)
                loss.backward()
                optimizer.step()

                loss_epoch.append(loss.cpu().detach().numpy())

                print('Epoch ' + str(epoch + 1) + '/' + str(num_epochs) +
                      ' - Step ' + str(step + 1) + '/' +
                      str(len(data_loader)) + ' - Loss: ' + str(float(loss)))
            loss_epoch_mean = np.mean(np.array(loss_epoch))
            losses.append(loss_epoch_mean)
            print('Total epoch loss: ' + str(loss_epoch_mean))
            if (epoch + 1) % args.save_epoch == 0 and epoch > 0:
                filename = 'model-epoch-' + str(epoch + 1) + '.pth'
                model_path = os.path.join('models/models_361_dropout',
                                          filename)
                torch.save(model.state_dict(), model_path)
    except KeyboardInterrupt:
        pass

    filename = 'model-epoch-last.pth'
    model_path = os.path.join('models', filename)
    torch.save(model.state_dict(), model_path)
    plt.plot(losses)
    plt.show()
Exemplo n.º 13
0
                  data_loader.unique_chars)

    # generate onehot representation of training data and label
    X_onehot = np.zeros([rnn_net.K, data_len])
    target_onehot = np.zeros([rnn_net.K, data_len])
    X_int = [char2int[ch] for ch in file_data]
    target_int = [char2int[ch] for ch in file_data[1:] + file_data[0]]
    X_onehot[X_int, range(data_len)] = 1
    target_onehot[target_int, range(data_len)] = 1
    del file_data, X_int, target_int

    # start training
    smooth_loss_acc = rnn_net.train(X_onehot,
                                    target_onehot,
                                    h_prev,
                                    int2char,
                                    char2int,
                                    epoch_num=cfg.EPOCH,
                                    batch_size=cfg.BATCH_SIZE)
    print("Smoothed loss:")
    print(smooth_loss_acc)

    # save results
    loss_save_path = os.path.join(cfg.SAVE_PATH, tag + '_loss.npy')
    fig_save_path = os.path.join(cfg.SAVE_PATH, tag + '_loss.png')
    np.save(loss_save_path, smooth_loss_acc)

    fig = plt.figure()
    plt.plot(range(len(smooth_loss_acc)), smooth_loss_acc)
    plt.xlabel("Iterations (x100)")
    plt.ylabel("Smoothed Loss")
Exemplo n.º 14
0
def main(args):
    print("in main")
    #creating tensorboard object
    tb_writer = SummaryWriter(log_dir=os.path.join(args.outdir, "tb/"),
                              purge_step=0)

    #Loading data
    train_dl, val_dl, vocab, label_map = fetch_dataset(args.datapath)

    #Defining loss
    criterion = nn.CrossEntropyLoss()

    #Defining optimizer
    vocab_size = len(vocab)
    num_classes = len(label_map)
    model = RNN(vocab_size, num_classes, args.embed_dim, args.hidden_size)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    #Looping training data
    for epoch in range(args.epochlen):
        running_loss, test_loss = 0.0, 0.0
        count = 0
        correct = 0
        total_labels = 0
        all_train_loss = []
        all_test_loss = []
        model.train()
        best_accuracy = 0
        for i, batch in enumerate(train_dl):
            seqs, labels = batch

            #names = Vocab.get_string(batch)

            #zero the parameter gradients
            optimizer.zero_grad()

            #forward + backward + optimize
            pred_outputs = model(seqs)
            loss = criterion(pred_outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            count += 1

            correct += (torch.argmax(pred_outputs,
                                     dim=1) == labels).sum().item()
            total_labels += labels.size(0)
        total_loss = running_loss / count
        all_train_loss.append(total_loss)
        accuracy = (correct * 100) / total_labels
        tb_writer.add_scalar('Train_Loss', running_loss, epoch)
        tb_writer.add_scalar('Train_Accuracy', accuracy, epoch)

        count = 0
        model.eval()
        for batch in val_dl:
            seqs, labels = batch

            pred_outputs = model(seqs)
            loss = criterion(pred_outputs, labels)
            test_loss += loss.item()
            count += 1

            correct += (torch.argmax(pred_outputs,
                                     dim=1) == labels).sum().item()
            total_labels += labels.size(0)
        total_test_loss = test_loss / count
        all_test_loss.append(total_test_loss)
        test_accuracy = (correct * 100) / total_labels
        print(
            f"Epoch : {str(epoch).zfill(2)}, Training Loss : {round(total_loss, 4)}, Training Accuracy : {round(accuracy, 4)},"
            f" Test Loss : {round(total_test_loss, 4)}, Test Accuracy : {round(test_accuracy, 4)}"
        )
        tb_writer.add_scalar('Test_Loss', test_loss, epoch)
        tb_writer.add_scalar('Test_Accuracy', test_accuracy, epoch)

        if best_accuracy < test_accuracy:
            best_accuracy = test_accuracy
            torch.save(model.state_dict(),
                       args.outdir + args.modelname + str(epoch))

    # Plot confusion matrix
    y_true = []
    y_pred = []
    for data in val_dl:
        seq, labels = data
        outputs = model(seq)
        predicted = torch.argmax(outputs, dim=1)
        y_true += labels.tolist()
        y_pred += predicted.tolist()

    cm = confusion_matrix(np.array(y_true), np.array(y_pred))

    disp = ConfusionMatrixDisplay(confusion_matrix=cm,
                                  display_labels=label_map.keys())
    disp.plot(include_values=True,
              cmap='viridis',
              ax=None,
              xticks_rotation='horizontal',
              values_format=None)
    plt.show()
Exemplo n.º 15
0
def main(args):
    print(sys.argv)

    if not os.path.exists('models'):
        os.mkdir('models')

    num_epochs = args.ne
    lr_decay = args.decay
    learning_rate = args.lr

    data_loader = get_data_loader(args.gt_path, args.tensors_path,
                                  args.json_labels_path, args.bs)
    model = RNN(lstm_hidden_size=args.hidden_size)
    if torch.cuda.is_available():
        model.cuda()
    model.train()

    #optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.mm)
    if args.rms:
        optimizer = optim.RMSprop(model.parameters(),
                                  lr=args.lr,
                                  momentum=args.mm)
    else:
        optimizer = optim.Adam(model.parameters(), lr=args.lr)
    model_loss = torch.nn.BCEWithLogitsLoss()
    # model_loss = Loss()

    losses = []
    p = 1
    try:
        for epoch in range(num_epochs):
            if epoch % args.decay_epoch == 0 and epoch > 0:
                learning_rate = learning_rate * lr_decay
                for param_group in optimizer.param_groups:
                    param_group['lr'] = learning_rate

            if epoch in (3, 7, 15):
                if epoch == 3:
                    p = 2 / 3
                if epoch == 7:
                    p = 1 / 3
                if epoch == 15:
                    p = 0

            loss_epoch = []
            loss1_epoch = []
            loss2_epoch = []
            for step, (tensors, masks, gt) in enumerate(data_loader):
                if torch.cuda.is_available():
                    tensors = tensors.cuda()
                    masks = masks.cuda()
                    gt = gt.cuda()
                model.zero_grad()

                out, att = model(tensors, masks, gt, p)
                loss1 = model_loss(out, gt)
                # att[:, :-1, :] -> attention produced (location in the next frame) until the last frame -1 (49)
                # gt[:, 1:, :] -> gt from the second frame until the last frame (49)
                loss2 = model_loss(att[:, :-1, :], gt[:, 1:, :])
                loss = loss1 + loss2
                loss.backward()
                optimizer.step()

                loss_epoch.append(loss.cpu().detach().numpy())
                loss1_epoch.append(loss1.cpu().detach().numpy())
                loss2_epoch.append(loss2.cpu().detach().numpy())

                #print('Epoch ' + str(epoch + 1) + '/' + str(num_epochs) + ' - Step ' + str(step + 1) + '/' +
                #      str(len(data_loader)) + ' - Loss: ' + str(float(loss)) + " (Loss1: " + str(float(loss1))
                #       + ", Loss2: " + str(float(loss2)) + ")")
            loss_epoch_mean = np.mean(np.array(loss_epoch))
            loss1_epoch_mean = np.mean(np.array(loss_epoch))
            loss2_epoch_mean = np.mean(np.array(loss_epoch))
            losses.append(loss_epoch_mean)
            print('Total epoch loss: ' + str(loss_epoch_mean) + " (loss1: " +
                  str(loss1_epoch_mean) + ", loss2: " + str(loss2_epoch_mean) +
                  ")")
            if (epoch + 1) % args.save_epoch == 0 and epoch > 0:
                filename = 'model-epoch-' + str(epoch + 1) + '.pth'
                model_path = os.path.join('models/', filename)
                torch.save(model.state_dict(), model_path)
    except KeyboardInterrupt:
        pass

    filename = 'model-epoch-last.pth'
    model_path = os.path.join('models', filename)
    torch.save(model.state_dict(), model_path)
    plt.plot(losses)
    plt.show()
Exemplo n.º 16
0
def train(args, labeled, resume_from, ckpt_file):
    print("========== In the train step ==========")

    iterator, TEXT, LABEL, tabular_dataset = load_data(stage="train",
                                                       args=args,
                                                       indices=labeled)

    print("Created the iterators")
    INPUT_DIM = len(TEXT.vocab)
    OUTPUT_DIM = 1
    BIDIRECTIONAL = True

    PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

    model = RNN(
        INPUT_DIM,
        args["EMBEDDING_DIM"],
        args["HIDDEN_DIM"],
        OUTPUT_DIM,
        args["N_LAYERS"],
        BIDIRECTIONAL,
        args["DROPOUT"],
        PAD_IDX,
    )

    model = model.to(device=device)

    pretrained_embeddings = TEXT.vocab.vectors

    model.embedding.weight.data.copy_(pretrained_embeddings)

    unk_idx = TEXT.vocab.stoi["<unk>"]
    pad_idx = TEXT.vocab.stoi["<pad>"]

    model.embedding.weight.data[unk_idx] = torch.zeros(args["EMBEDDING_DIM"])
    model.embedding.weight.data[pad_idx] = torch.zeros(args["EMBEDDING_DIM"])

    optimizer = optim.Adam(model.parameters())

    criterion = nn.BCEWithLogitsLoss()

    model = model.to("cuda")

    criterion = criterion.to("cuda")

    if resume_from is not None:
        ckpt = torch.load(os.path.join(args["EXPT_DIR"], resume_from + ".pth"))
        model.load_state_dict(ckpt["model"])
        optimizer.load_state_dict(ckpt["optimizer"])
    else:
        getdatasetstate(args)

    model.train()  # turn on dropout, etc
    for epoch in tqdm(range(args["train_epochs"]), desc="Training"):

        running_loss = 0
        i = 0

        for batch in iterator:

            # print("Batch is", batch.review[0])

            text, text_length = batch.review

            labels = batch.sentiment

            text = text.cuda()
            text_length = text_length.cuda()

            optimizer.zero_grad()

            output = model(text, text_length)

            loss = criterion(torch.squeeze(output).float(), labels.float())
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            if i % 10:
                print(
                    "epoch: {} batch: {} running-loss: {}".format(
                        epoch + 1, i + 1, running_loss / 1000),
                    end="\r",
                )
                running_loss = 0
            i += 1

    print("Finished Training. Saving the model as {}".format(ckpt_file))

    ckpt = {"model": model.state_dict(), "optimizer": optimizer.state_dict()}
    torch.save(ckpt, os.path.join(args["EXPT_DIR"], ckpt_file + ".pth"))

    return
Exemplo n.º 17
0
def main():

    parser = argparse.ArgumentParser(description="==========[RNN]==========")
    parser.add_argument("--mode",
                        default="train",
                        help="available modes: train, test, eval")
    parser.add_argument("--model",
                        default="rnn",
                        help="available models: rnn, lstm")
    parser.add_argument("--dataset",
                        default="all",
                        help="available datasets: all, MA, MI, TN")
    parser.add_argument("--rnn_layers",
                        default=3,
                        type=int,
                        help="number of stacked rnn layers")
    parser.add_argument("--hidden_dim",
                        default=16,
                        type=int,
                        help="number of hidden dimensions")
    parser.add_argument("--lin_layers",
                        default=1,
                        type=int,
                        help="number of linear layers before output")
    parser.add_argument("--epochs",
                        default=100,
                        type=int,
                        help="number of max training epochs")
    parser.add_argument("--dropout",
                        default=0.0,
                        type=float,
                        help="dropout probability")
    parser.add_argument("--learning_rate",
                        default=0.01,
                        type=float,
                        help="learning rate")
    parser.add_argument("--verbose",
                        default=2,
                        type=int,
                        help="how much training output?")

    options = parser.parse_args()
    verbose = options.verbose

    if torch.cuda.is_available():
        device = torch.device("cuda")
        if verbose > 0:
            print("GPU available, using cuda...")
            print()
    else:
        device = torch.device("cpu")
        if verbose > 0:
            print("No available GPU, using CPU...")
            print()

    params = {
        "MODE": options.mode,
        "MODEL": options.model,
        "DATASET": options.dataset,
        "RNN_LAYERS": options.rnn_layers,
        "HIDDEN_DIM": options.hidden_dim,
        "LIN_LAYERS": options.lin_layers,
        "EPOCHS": options.epochs,
        "DROPOUT_PROB": options.dropout,
        "LEARNING_RATE": options.learning_rate,
        "DEVICE": device,
        "OUTPUT_SIZE": 1
    }

    params["PATH"] = "models/" + params["MODEL"] + "_" + params[
        "DATASET"] + "_" + str(params["RNN_LAYERS"]) + "_" + str(
            params["HIDDEN_DIM"]) + "_" + str(
                params["LIN_LAYERS"]) + "_" + str(
                    params["LEARNING_RATE"]) + "_" + str(
                        params["DROPOUT_PROB"]) + "_" + str(
                            params["EPOCHS"]) + "_model.pt"

    #if options.mode == "train":
    #    print("training placeholder...")

    train_data = utils.DistrictData(params["DATASET"], "train")
    val_data = utils.DistrictData(params["DATASET"], "val")

    params["INPUT_SIZE"] = train_data[0]['sequence'].size()[1]

    if params["MODEL"] == "rnn":
        model = RNN(params)
    elif params["MODEL"] == "lstm":
        model = LSTM(params)
    model.to(params["DEVICE"])
    criterion = nn.MSELoss(reduction='sum')
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=params["LEARNING_RATE"])

    if verbose == 0:
        print(params["PATH"])
    else:
        utils.print_params(params)
        print("Beginning training...")
        print()
    since = time.time()
    best_val_loss = 10.0

    for e in range(params["EPOCHS"]):

        running_loss = 0.0
        #model.zero_grad()
        model.train()
        train_loader = DataLoader(train_data,
                                  batch_size=32,
                                  shuffle=True,
                                  num_workers=4)

        for batch in train_loader:
            x = batch['sequence'].to(device)
            y = batch['target'].to(device)
            seq_len = batch['size'].to(device)

            optimizer.zero_grad()
            y_hat, hidden = model(x, seq_len)
            loss = criterion(y_hat, y)

            running_loss += loss

            loss.backward()
            optimizer.step()

        mean_loss = running_loss / len(train_data)
        val_loss = evaluate(val_data,
                            model,
                            params,
                            criterion,
                            validation=True)

        if verbose == 2 or (verbose == 1 and (e + 1) % 100 == 0):
            print('=' * 25 + ' EPOCH {}/{} '.format(e + 1, params["EPOCHS"]) +
                  '=' * 25)
            print('Training Loss: {}'.format(mean_loss))
            print('Validation Loss: {}'.format(val_loss))
            print()

        if e > params["EPOCHS"] / 3:
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                best_model = model.state_dict()
                torch.save(best_model, params["PATH"])

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Final Training Loss: {:4f}'.format(mean_loss))
    print('Best Validation Loss: {:4f}'.format(best_val_loss))

    test_data = utils.DistrictData(params["DATASET"], "test")
    test_loss = evaluate(test_data, model, params, criterion)
    print('Test Loss: {}'.format(test_loss))
    print()
Exemplo n.º 18
0
class dl_model():

	def __init__(self, mode):

		# Read config fielewhich contains parameters
		self.config = config
		self.mode = mode

		# Architecture name decides prefix for storing models and plots
		feature_dim = self.config['vocab_size']
		self.arch_name = '_'.join(
			[self.config['rnn'], str(self.config['num_layers']), str(self.config['hidden_dim']), str(feature_dim)])

		print("Architecture:", self.arch_name)
		# Change paths for storing models
		self.config['models'] = self.config['models'].split('/')[0] + '_' + self.arch_name + '/'
		self.config['plots'] = self.config['plots'].split('/')[0] + '_' + self.arch_name + '/'

		# Make folders if DNE
		if not os.path.exists(self.config['models']):
			os.mkdir(self.config['models'])
		if not os.path.exists(self.config['plots']):
			os.mkdir(self.config['plots'])
		if not os.path.exists(self.config['pickle']):
			os.mkdir(self.config['pickle'])

		self.cuda = (self.config['cuda'] and torch.cuda.is_available())

		# load/initialise metrics to be stored and load model
		if mode == 'train' or mode == 'test':

			self.plots_dir = self.config['plots']
			# store hyperparameters
			self.total_epochs = self.config['epochs']
			self.test_every = self.config['test_every_epoch']
			self.test_per = self.config['test_per_epoch']
			self.print_per = self.config['print_per_epoch']
			self.save_every = self.config['save_every']
			self.plot_every = self.config['plot_every']

			# dataloader which returns batches of data
			self.train_loader = dataloader('train', self.config)
			self.test_loader = dataloader('test', self.config)
			#declare model
			self.model = RNN(self.config)

			self.start_epoch = 1
			self.edit_dist = []
			self.train_losses, self.test_losses = [], []

		else:

			self.model = RNN(self.config)

		if self.cuda:
			self.model.cuda()

		# resume training from some stored model
		if self.mode == 'train' and self.config['resume']:
			self.start_epoch, self.train_losses, self.test_losses = self.model.load_model(mode, self.model.rnn_name, self.model.num_layers, self.model.hidden_dim)
			self.start_epoch += 1

		# load best model for testing/inference
		elif self.mode == 'test' or mode == 'test_one':
			self.model.load_model(mode, self.config['rnn'], self.model.num_layers, self.model.hidden_dim)

		#whether using embeddings
		if self.config['use_embedding']:
			self.use_embedding = True
		else:
			self.use_embedding = False

	# Train the model
	def train(self):

		print("Starting training at t =", datetime.datetime.now())
		print('Batches per epoch:', len(self.train_loader))
		self.model.train()

		# when to print losses during the epoch
		print_range = list(np.linspace(0, len(self.train_loader), self.print_per + 2, dtype=np.uint32)[1:-1])
		if self.test_per == 0:
			test_range = []
		else:
			test_range = list(np.linspace(0, len(self.train_loader), self.test_per + 2, dtype=np.uint32)[1:-1])

		for epoch in range(self.start_epoch, self.total_epochs + 1):

			try:

				print("Epoch:", str(epoch))
				epoch_loss = 0.0
				# i used for monitoring batch and printing loss, etc.
				i = 0

				while True:

					i += 1

					# Get batch of inputs, labels, missed_chars and lengths along with status (when to end epoch)
					inputs, labels, miss_chars, input_lens, status = self.train_loader.return_batch()

					if self.use_embedding:
						inputs = torch.from_numpy(inputs).long() #embeddings should be of dtype long
					else:
						inputs = torch.from_numpy(inputs).float()

					#convert to torch tensors
					labels = torch.from_numpy(labels).float()
					miss_chars = torch.from_numpy(miss_chars).float()
					input_lens = torch.from_numpy(input_lens).long()

					if self.cuda:
						inputs = inputs.cuda()
						labels = labels.cuda()
						miss_chars = miss_chars.cuda()
						input_lens = input_lens.cuda()

					# zero the parameter gradients
					self.model.optimizer.zero_grad()
					# forward + backward + optimize
					outputs = self.model(inputs, input_lens, miss_chars)
					loss, miss_penalty = self.model.calculate_loss(outputs, labels, input_lens, miss_chars, self.cuda)
					loss.backward()

					# clip gradient
					# torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.config['grad_clip'])
					self.model.optimizer.step()

					# store loss
					epoch_loss += loss.item()

					# print loss
					if i in print_range and epoch == 1:
						print('After %i batches, Current Loss = %.7f' % (i, epoch_loss / i))
					elif i in print_range and epoch > 1:
						print('After %i batches, Current Loss = %.7f, Avg. Loss = %.7f, Miss Loss = %.7f' % (
								i, epoch_loss / i, np.mean(np.array([x[0] for x in self.train_losses])), miss_penalty))

					# test model periodically
					if i in test_range:
						self.test(epoch)
						self.model.train()

					# Reached end of dataset
					if status == 1:
						break

				#refresh dataset i.e. generate a new dataset from corpurs
				if epoch % self.config['reset_after'] == 0:
					self.train_loader.refresh_data(epoch)

				#take the last example from the epoch and print the incomplete word, target characters and missed characters
				random_eg = min(np.random.randint(self.train_loader.batch_size), inputs.shape[0]-1)
				encoded_to_string(inputs.cpu().numpy()[random_eg], labels.cpu().numpy()[random_eg], miss_chars.cpu().numpy()[random_eg],
								  input_lens.cpu().numpy()[random_eg], self.train_loader.char_to_id, self.use_embedding)

				# Store tuple of training loss and epoch number
				self.train_losses.append((epoch_loss / len(self.train_loader), epoch))

				# save model
				if epoch % self.save_every == 0:
					self.model.save_model(False, epoch, self.train_losses, self.test_losses,
										  self.model.rnn_name, self.model.num_layers, self.model.hidden_dim)

				# test every 5 epochs in the beginning and then every fixed no of epochs specified in config file
				# useful to see how loss stabilises in the beginning
				if epoch % 5 == 0 and epoch < self.test_every:
					self.test(epoch)
					self.model.train()
				elif epoch % self.test_every == 0:
					self.test(epoch)
					self.model.train()
				# plot loss and accuracy
				if epoch % self.plot_every == 0:
					self.plot_loss_acc(epoch)

			except KeyboardInterrupt:
				#save model before exiting
				print("Saving model before quitting")
				self.model.save_model(False, epoch-1, self.train_losses, self.test_losses,
									  self.model.rnn_name, self.model.num_layers, self.model.hidden_dim)
				exit(0)


	# test model
	def test(self, epoch=None):

		self.model.eval()

		print("Testing...")
		print('Total batches:', len(self.test_loader))
		test_loss = 0

		#generate a new dataset form corpus
		self.test_loader.refresh_data(epoch)

		with torch.no_grad():

			while True:

				# Get batch of input, labels, missed characters and lengths along with status (when to end epoch)
				inputs, labels, miss_chars, input_lens, status = self.test_loader.return_batch()
				
				if self.use_embedding:
					inputs = torch.from_numpy(inputs).long()
				else:
					inputs = torch.from_numpy(inputs).float()

				labels = torch.from_numpy(labels).float()
				miss_chars = torch.from_numpy(miss_chars).float()
				input_lens= torch.from_numpy(input_lens).long()

				if self.cuda:
					inputs = inputs.cuda()
					labels = labels.cuda()
					miss_chars = miss_chars.cuda()
					input_lens = input_lens.cuda()

				# zero the parameter gradients
				self.model.optimizer.zero_grad()
				# forward + backward + optimize
				outputs = self.model(inputs, input_lens, miss_chars)
				loss, miss_penalty = self.model.calculate_loss(outputs, labels, input_lens, miss_chars, self.cuda)
				test_loss += loss.item()

				# Reached end of dataset
				if status == 1:
					break

		#take a random example from the epoch and print the incomplete word, target characters and missed characters
		#min since the last batch may not be of length batch_size
		random_eg = min(np.random.randint(self.train_loader.batch_size), inputs.shape[0]-1)
		encoded_to_string(inputs.cpu().numpy()[random_eg], labels.cpu().numpy()[random_eg], miss_chars.cpu().numpy()[random_eg],
			input_lens.cpu().numpy()[random_eg], self.train_loader.char_to_id, self.use_embedding)

		# Average out the losses and edit distance
		test_loss /= len(self.test_loader)

		print("Test Loss: %.7f, Miss Penalty: %.7f" % (test_loss, miss_penalty))

		# Store in lists for keeping track of model performance
		self.test_losses.append((test_loss, epoch))

		# if testing loss is minimum, store it as the 'best.pth' model, which is used during inference
		# store only when doing train/test together i.e. mode is train
		if test_loss == min([x[0] for x in self.test_losses]) and self.mode == 'train':
			print("Best new model found!")
			self.model.save_model(True, epoch, self.train_losses, self.test_losses,
								  self.model.rnn_name, self.model.num_layers, self.model.hidden_dim)

		return test_loss

	def predict(self, string, misses, char_to_id):
		"""
		called during inference
		:param string: word with predicted characters and blanks at remaining places
		:param misses: list of characters which were predicted but game feedback indicated that they are not present
		:param char_to_id: mapping from characters to id
		"""

		id_to_char = {v:k for k,v in char_to_id.items()}

		#convert string into desired input tensor
		if self.use_embedding:
			encoded = np.zeros((len(char_to_id)))
			for i, c in enumerate(string):
				if c == '*':
					encoded[i] = len(id_to_char) - 1 
				else:
					encoded[i] = char_to_id[c]

			inputs = np.array(encoded)[None, :]
			inputs = torch.from_numpy(inputs).long()

		else:

			encoded = np.zeros((len(string), len(char_to_id)))
			for i, c in enumerate(string):
				if c == '*':
					encoded[i][len(id_to_char) - 1] = 1
				else:
					encoded[i][char_to_id[c]] = 1

			inputs = np.array(encoded)[None, :, :]
			inputs = torch.from_numpy(inputs).float()

		#encode the missed characters
		miss_encoded = np.zeros((len(char_to_id) - 1))
		for c in misses:
			miss_encoded[char_to_id[c]] = 1
		miss_encoded = np.array(miss_encoded)[None, :]
		miss_encoded = torch.from_numpy(miss_encoded).float()

		input_lens = np.array([len(string)])
		input_lens= torch.from_numpy(input_lens).long()	

		#pass through model
		output = self.model(inputs, input_lens, miss_encoded).detach().cpu().numpy()[0]

		#sort predictions
		sorted_predictions = np.argsort(output)[::-1]
		
		#we cannnot consider only the argmax since a missed character may also get assigned a high probability
		#in case of a well-trained model, we shouldn't observe this
		return [id_to_char[x] for x in sorted_predictions]

	def plot_loss_acc(self, epoch):
		"""
		take train/test loss and test accuracy input and plot it over time
		:param epoch: to track performance across epochs
		"""

		plt.clf()
		fig, ax1 = plt.subplots()

		ax1.set_xlabel('Epoch')
		ax1.set_ylabel('Loss')
		ax1.plot([x[1] for x in self.train_losses], [x[0] for x in self.train_losses], color='r', label='Train Loss')
		ax1.plot([x[1] for x in self.test_losses], [x[0] for x in self.test_losses], color='b', label='Test Loss')
		ax1.tick_params(axis='y')
		ax1.legend(loc='upper left')

		fig.tight_layout()  # otherwise the right y-label is slightly clipped
		plt.grid(True)
		plt.legend()
		plt.title(self.arch_name)

		filename = self.plots_dir + 'plot_' + self.arch_name + '_' + str(epoch) + '.png'
		plt.savefig(filename)

		print("Saved plots")