Exemplo n.º 1
0
    if not idx == 0 :
        data = lines.split('\t')[2]
        data = normalizeString(data).strip()
        obj1.add_text(data)


print('read all the lines')

limitDict(vocabLimit,obj1)

if use_cuda:
    model = Model(50,100).cuda()
else:
    model = Model(50,100)

loss_function = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

epochs = 4

torch.save(model.state_dict(), 'model' + str(0)+'.pth')
print('starting training')

for i in range(epochs) :
    avg_loss = 0.0
    for idx,lines in enumerate(f) :
        if not idx == 0 :
            data = lines.split('\t')[2]
            data = normalizeString(data).strip()
            input_data = [obj1.word_to_idx[word] for word in data.split(' ')]
            #print("input data length ", len(input_data))
def main():
    #####################
    # Generate data
    #####################
    # data loader -
    if isServerRun:
        path = '/home/[email protected]/thesisML/'
    else:
        path = '/Users/chanaross/dev/Thesis/UberData/'
    fileName = '3D_allDataLatLonCorrected_20MultiClass_500gridpickle_30min.p'
    dataInput = np.load(path + fileName)

    flag_save_network = True

    xmin = 0
    xmax = dataInput.shape[0]
    ymin = 0
    ymax = dataInput.shape[1]
    zmin = 48
    zmax = np.floor(dataInput.shape[2]*0.7).astype(int)
    dataInput     = dataInput[xmin:xmax, ymin:ymax, zmin:zmax]  # shrink matrix size for fast training in order to test model
    dataInput     = dataInput[5:6, 10:11, :]
    smoothParam   = [10, 30, 40]  #[10, 20, 30, 40]  #[10, 15, 30]

    testSize            = 0.2
    # define hyper parameters -
    hidden_sizeVec      = [64, 128, 256]     # [20, 64, 256, 512] #[20, 64, 264, 512]  # [20, 40, 64, 128]
    sequence_sizeVec    = [50, 60]  # [5, 10, 20]  # [5, 20, 30, 40]  # [5, 10, 15]  # length of sequence for lstm network
    batch_sizeVec       = [40]
    num_epochs          = 500

    # optimizer parameters -
    lrVec   = [0.05, 0.01]  #[0.05, 0.2, 0.5]  # [0.1, 0.5, 0.9] #[0.1, 0.5, 0.9]  # [0.1, 0.01, 0.001]
    otVec   = [1]  # [1, 2]
    dmp     = 0
    mm      = 0.9
    eps     = 1e-08
    wdVec   = [2e-3]

    # create case vectors
    networksDict = {}
    itr = itertools.product(smoothParam, sequence_sizeVec, batch_sizeVec, hidden_sizeVec, lrVec, otVec, wdVec)
    for i in itr:
        networkStr = 'smooth_{0}_seq_{1}_bs_{2}_hs_{3}_lr_{4}_ot_{5}_wd_{6}'.format(i[0], i[1], i[2], i[3], i[4], i[5], i[6])
        networksDict[networkStr] = {'seq': i[1], 'bs': i[2], 'hs': i[3], 'lr': i[4], 'ot': i[5], 'wd': i[6], 'sm': i[0]}

    for netConfig in networksDict:
        dataInputSmooth = moving_average(dataInput, networksDict[netConfig]['sm'])  # smoothing data so that results are more clear to network

        # dataInput[dataInput>1] = 1  # limit all events larger than 10 to be 10
        # define important sizes for network -
        x_size              = dataInputSmooth.shape[0]
        y_size              = dataInputSmooth.shape[1]
        dataSize            = dataInputSmooth.shape[2]
        class_size          = (np.max(np.unique(dataInputSmooth)) + 1).astype(int)

        num_train = int((1 - testSize) * dataSize)
        grid_size = x_size * y_size

        # output file
        outFile = open('LSTM_networksOutput.csv', 'w')
        outFile.write('Name;finalAcc;finalLoss;trainTime;numWeights;NumEpochs\n')


        print('Net Parameters: ' + netConfig)

        # create network based on input parameter's -
        hidden_size     = networksDict[netConfig]['hs']
        batch_size      = networksDict[netConfig]['bs']
        sequence_size   = networksDict[netConfig]['seq']
        lr              = networksDict[netConfig]['lr']
        ot              = networksDict[netConfig]['ot']
        wd              = networksDict[netConfig]['wd']

        my_net          = Model(grid_size, hidden_size, batch_size, sequence_size, class_size)
        my_net.lstm     = my_net.create_lstm(grid_size)  # lstm receives all grid points and seq length of
        my_net.fc_after_lstm = my_net.create_fc_after_lstm(my_net.hiddenSize, grid_size*class_size)
        my_net.to(device)
        print("model device is:")
        print(next(my_net.parameters()).device)
        numWeights = sum(param.numel() for param in my_net.parameters())
        print('number of parameters: ', numWeights)
        my_net.optimizer    = CreateOptimizer(my_net.parameters(), ot, lr, dmp, mm, eps, wd)
        my_net.lossCrit     = nn.NLLLoss(size_average=True)  # nn.BCELoss(size_average=True)

        my_net.maxEpochs = num_epochs
        my_net.lr        = lr
        my_net.wd        = wd
        my_net.smoothingParam = networksDict[netConfig]['sm']

        # network_path = '/Users/chanaross/dev/Thesis/MachineLearning/forGPU/GPU_results/limitedZero_500grid/'
        # network_name = 'gridSize11_epoch4_batch5_torch.pkl'
        # my_net = torch.load(network_path + network_name, map_location=lambda storage, loc: storage)

        # load data from data loader and create train and test sets
        data_train = dataInputSmooth[:, :, 0:num_train]
        data_test  = dataInputSmooth[:, :, num_train:]

        dataset_uber_train = DataSet_oneLSTM_allGrid(data_train, sequence_size)
        dataset_uber_test  = DataSet_oneLSTM_allGrid(data_test , sequence_size)

        # creating data loader
        dataloader_uber_train = data.DataLoader(dataset=dataset_uber_train, batch_size=batch_size, shuffle=False)
        dataloader_uber_test  = data.DataLoader(dataset=dataset_uber_test , batch_size=batch_size, shuffle=False)
        netOutDict = {}
        labelsOutDict = {}
        for numEpoch in range(num_epochs):
            my_net.loss = None
            # for each epoch, calculate loss for each batch -
            my_net.train()
            localLoss = [4]
            accTrain = [0]
            rmseTrain = [1]
            trainCorr = 0.0
            trainTot = 0.0
            if (1+numEpoch)%40 == 0:
                if my_net.optimizer.param_groups[0]['lr'] > 0.001:
                    my_net.optimizer.param_groups[0]['lr'] = my_net.optimizer.param_groups[0]['lr']/2
                else:
                    my_net.optimizer.param_groups[0]['lr'] = 0.001
            print('lr is: %.6f' % my_net.optimizer.param_groups[0]['lr'])
            netOutList   = []
            labelOutList = []
            for i, (input, labels) in enumerate(dataloader_uber_train):
                inputD = input.to(device)
                labelsD = labels.to(device)
                my_net.loss = None
                # create torch variables
                # input is of size [batch_size, grid_id, seq_size]
                inputVar = Variable(inputD).to(device)
                labVar   = Variable(labelsD).to(device)
                # if isServerRun:
                #     labVar   = labVar.type(torch.cuda.FloatTensor)
                # else:
                #     labVar   = labVar.type(torch.FloatTensor)
                # reset gradient
                my_net.optimizer.zero_grad()
                # forward
                grid_size        = labels.shape[1]
                local_batch_size = input.shape[0]
                # input to LSTM is [seq_size, batch_size, grid_size] , will be transferred as part of the forward
                netOut = my_net.forward(inputVar)
                netOut = netOut.view(local_batch_size, class_size, grid_size)
                _, labTrain = torch.max(torch.exp(netOut.data), 1)
                my_net.calcLoss(netOut, labVar)
                # backwards
                my_net.backward()
                # optimizer step
                my_net.optimizer.step()
                # local loss function list
                localLoss.append(my_net.loss.item())
                # if isServerRun:
                #     labTrain = labTrain.cpu()
                if isServerRun:
                    labTrainNp = labTrain.type(torch.cuda.LongTensor).cpu().detach().numpy()
                    # print("number of net labels different from 0 is:" + str(np.sum(labTrainNp > 0)))
                    # print("number of net labels 0 is:"+str(np.sum(labTrainNp == 0)))
                    labelsNp = labels.cpu().detach().numpy()
                    # print("number of real labels different from 0 is:" + str(np.sum(labelsNp > 0)))
                    # print("number of real labels 0 is:" + str(np.sum(labelsNp == 0)))
                    trainCorr = torch.sum(labTrain.type(torch.cuda.LongTensor) == labels).cpu().detach().numpy() + trainCorr
                else:
                    labTrainNp = labTrain.long().detach().numpy()
                    labelsNp = labels.detach().numpy()
                    trainCorr = torch.sum(labTrain.long() == labels).detach().numpy() + trainCorr
                netOutList.append(labTrainNp)
                labelOutList.append(labelsNp)
                trainTot = labels.size(0) * labels.size(1) + trainTot
                rmse = sqrt(metrics.mean_squared_error(labTrainNp.reshape(-1), labelsNp.reshape(-1)))
                accTrain.append(100 * trainCorr / trainTot)
                rmseTrain.append(rmse)
                # output current state
                if (i + 1) % 2 == 0:
                    print('Epoch: [%d/%d1 ], Step: [%d/%d], Loss: %.4f, Acc: %.4f, RMSE: %.4f'
                          % (numEpoch + 1, my_net.maxEpochs, i + 1,
                            dataloader_uber_train.batch_size,
                             my_net.loss.item(), accTrain[-1], rmseTrain[-1]))
                    # if (i+1) % 20 == 0:
                    #     if ((localLoss[-1] < np.max(np.array(localLoss[0:-1]))) or (accTrain[-1] > np.max(np.array(accTrain[0:-1])))) and flag_save_network:
                    #         # pickle.dump(my_net, open("gridSize" + str(xmax - xmin) + "_epoch" + str(numEpoch+1) + "_batch" + str(i+1) + ".pkl", 'wb'))
                    #         my_net.saveModel("gridSize" + str(xmax - xmin) + "_epoch" + str(numEpoch+1) + "_batch" + str(i+1) + "_torch.pkl")
                    #         # networkStr = "gridSize" + str(xmax - xmin) + "_epoch" + str(numEpoch+1) + "_batch" + str(i+1)
                    #         # outArray = np.stack([np.array(localLoss), np.array(accTrain)])
                    #         # np.save(networkStr + "_oArrBatch.npy", outArray)
            my_net.lossVecTrain.append(np.average(localLoss))
            my_net.accVecTrain.append(np.average(accTrain))
            my_net.rmseVecTrain.append(np.average(rmseTrain))
            # test network for each epoch stage
            accEpochTest, lossEpochTest, rmseEpochTest = my_net.test_spesific(testLoader=dataloader_uber_test)
            my_net.accVecTest.append(accEpochTest)
            my_net.lossVecTest.append(lossEpochTest)
            my_net.rmseVecTest.append(rmseEpochTest)
            netOutDict[numEpoch] = netOutList
            labelsOutDict[numEpoch] = labelOutList
            if (flag_save_network):
                my_net.saveModel(netConfig + "_torch.pkl")
                # outArray = np.stack([np.array(my_net.lossVecTest), np.array(my_net.lossVecTrain),
                #                      np.array(my_net.accVecTest), np.array(my_net.accVecTrain)])
                # np.save("gridSize" + str(xmax - xmin) + "_epoch" + str(numEpoch)  + "_oArrBatch.npy", outArray)
        my_net.finalAcc  = accEpochTest
        my_net.finalLoss = lossEpochTest
        my_net.finalRmse = rmseEpochTest
        # name, HyperPerams, accur, num total weights
        # err vs epoch, loss vs epoch,
        saveFile(netOutDict, 'netDict')
        saveFile(labelsOutDict, 'labelsDict')
        strWrite = '{0};{1};{2};{3};{4}\n'.format(netConfig, my_net.finalAcc, my_net.finalLoss, numWeights, my_net.maxEpochs)
        outFile.write(strWrite)

    outFile.close()

    return
Exemplo n.º 3
0
def train(args, model, logger, in_queue, out_queue):
    """Train the order embedding model.

    args: Commandline arguments
    logger: logger for logging progress
    in_queue: input queue to an intersection computation worker
    out_queue: output queue to an intersection computation worker
    """
    scheduler, opt = utils.build_optimizer(args, model.parameters())
    if args.method_type == "order":
        clf_opt = optim.Adam(model.clf_model.parameters(), lr=args.lr)

    done = False
    while not done:
        data_source = make_data_source(args)
        loaders = data_source.gen_data_loaders(args.eval_interval *
                                               args.batch_size,
                                               args.batch_size,
                                               train=True)
        for batch_target, batch_neg_target, batch_neg_query in zip(*loaders):
            msg, _ = in_queue.get()
            if msg == "done":
                done = True
                break
            # train
            model.train()
            model.zero_grad()
            pos_a, pos_b, neg_a, neg_b = data_source.gen_batch(
                batch_target, batch_neg_target, batch_neg_query, True)
            emb_pos_a, emb_pos_b = model.emb_model(pos_a), model.emb_model(
                pos_b)
            emb_neg_a, emb_neg_b = model.emb_model(neg_a), model.emb_model(
                neg_b)
            #print(emb_pos_a.shape, emb_neg_a.shape, emb_neg_b.shape)
            emb_as = torch.cat((emb_pos_a, emb_neg_a), dim=0)
            emb_bs = torch.cat((emb_pos_b, emb_neg_b), dim=0)
            labels = torch.tensor([1] * pos_a.num_graphs +
                                  [0] * neg_a.num_graphs).to(
                                      utils.get_device())
            intersect_embs = None
            pred = model(emb_as, emb_bs)
            loss = model.criterion(pred, intersect_embs, labels)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            opt.step()
            if scheduler:
                scheduler.step()

            if args.method_type == "order":
                with torch.no_grad():
                    pred = model.predict(pred)
                model.clf_model.zero_grad()
                pred = model.clf_model(pred.unsqueeze(1))
                criterion = nn.NLLLoss()
                clf_loss = criterion(pred, labels)
                clf_loss.backward()
                clf_opt.step()
            pred = pred.argmax(dim=-1)
            acc = torch.mean((pred == labels).type(torch.float))
            train_loss = loss.item()
            train_acc = acc.item()

            out_queue.put(("step", (loss.item(), acc)))
import csv
import random
import re
import os
import unicodedata
import codecs
from io import open
import itertools
import math

from transformer_prep_data import *
from transformer_voc import Voc
from transformer_global_variables import *


criterion = nn.NLLLoss(ignore_index=0)


def train(input_variable, lengths, target_variable, mask, max_target_len, transformer, embedding,
          optimizer, ntokens, batch_size, clip, device, max_length=MAX_LENGTH):

    # Zero gradients
    optimizer.zero_grad()

    # Set device options
    input_variable = input_variable.to(device)
    lengths = lengths.to(device)
    target_variable = target_variable.to(device)
    mask = mask.to(device)

    # Forward pass through encoder
Exemplo n.º 5
0
    def __init__(self, generator, tgt_vocab):
        super(UnsupNMTLossCompute, self).__init__(generator, tgt_vocab)

        weight = torch.ones(len(tgt_vocab))
        weight[self.padding_idx] = 0
        self.criterion = nn.NLLLoss(weight, size_average=False)
Exemplo n.º 6
0
 def __init__(self, weight=None, size_average=True, ignore_index=255):
     super(CrossEntropyLoss2d, self).__init__()
     #NLLLoss2d 错误,修改为NLLLoss
     self.nll_loss = nn.NLLLoss(weight, size_average, ignore_index)
Exemplo n.º 7
0
def train(data):
    print "Training model..."
    data.show_data_summary()
    save_data_name = data.model_dir + ".dset"
    data.save(save_data_name)
    model = SeqModel(data)
    loss_function = nn.NLLLoss()
    if data.optimizer.lower() == "sgd":
        optimizer = optim.SGD(model.parameters(),
                              lr=data.HP_lr,
                              momentum=data.HP_momentum,
                              weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adagrad":
        optimizer = optim.Adagrad(model.parameters(),
                                  lr=data.HP_lr,
                                  weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adadelta":
        optimizer = optim.Adadelta(model.parameters(),
                                   lr=data.HP_lr,
                                   weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "rmsprop":
        optimizer = optim.RMSprop(model.parameters(),
                                  lr=data.HP_lr,
                                  weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adam":
        optimizer = optim.Adam(model.parameters(),
                               lr=data.HP_lr,
                               weight_decay=data.HP_l2)
    else:
        print("Optimizer illegal: %s" % (data.optimizer))
        exit(0)
    best_dev = -10
    data.HP_iteration = 1
    ## start training
    for idx in range(data.HP_iteration):
        epoch_start = time.time()
        temp_start = epoch_start
        print("Epoch: %s/%s" % (idx, data.HP_iteration))
        if data.optimizer == "SGD":
            optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr)
        instance_count = 0
        sample_id = 0
        sample_loss = 0
        total_loss = 0
        right_token = 0
        whole_token = 0
        random.shuffle(data.train_Ids)
        ## set model in train model
        model.train()
        model.zero_grad()
        batch_size = data.HP_batch_size
        batch_id = 0
        train_num = len(data.train_Ids)
        total_batch = train_num // batch_size + 1
        for batch_id in range(total_batch):
            start = batch_id * batch_size
            end = (batch_id + 1) * batch_size
            if end > train_num:
                end = train_num
            instance = data.train_Ids[start:end]
            if not instance:
                continue
            batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(
                instance, data.HP_gpu)
            instance_count += 1
            loss, tag_seq = model.neg_log_likelihood_loss(
                batch_word, batch_features, batch_wordlen, batch_char,
                batch_charlen, batch_charrecover, batch_label, mask)
            right, whole = predict_check(tag_seq, batch_label, mask)
            right_token += right
            whole_token += whole
            sample_loss += loss.data[0]
            total_loss += loss.data[0]
            if end % 500 == 0:
                temp_time = time.time()
                temp_cost = temp_time - temp_start
                temp_start = temp_time
                print(
                    "     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"
                    % (end, temp_cost, sample_loss, right_token, whole_token,
                       (right_token + 0.) / whole_token))
                sys.stdout.flush()
                sample_loss = 0
            loss.backward()
            optimizer.step()
            model.zero_grad()
        temp_time = time.time()
        temp_cost = temp_time - temp_start
        print("     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" %
              (end, temp_cost, sample_loss, right_token, whole_token,
               (right_token + 0.) / whole_token))
        epoch_finish = time.time()
        epoch_cost = epoch_finish - epoch_start
        print(
            "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s,  total loss: %s"
            % (idx, epoch_cost, train_num / epoch_cost, total_loss))
        # continue
        speed, acc, p, r, f, _, _ = evaluate(data, model, "dev")
        dev_finish = time.time()
        dev_cost = dev_finish - epoch_finish

        if data.seg:
            current_score = f
            print(
                "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                % (dev_cost, speed, acc, p, r, f))
        else:
            current_score = acc
            print("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" %
                  (dev_cost, speed, acc))

        if current_score > best_dev:
            if data.seg:
                print "Exceed previous best f score:", best_dev
            else:
                print "Exceed previous best acc score:", best_dev
            model_name = data.model_dir + '.' + str(idx) + ".model"
            print "Save current best model in file:", model_name
            torch.save(model.state_dict(), model_name)
            best_dev = current_score
        # ## decode test
        speed, acc, p, r, f, _, _ = evaluate(data, model, "test")
        test_finish = time.time()
        test_cost = test_finish - dev_finish
        if data.seg:
            print(
                "Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                % (test_cost, speed, acc, p, r, f))
        else:
            print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f" %
                  (test_cost, speed, acc))
        gc.collect()
print(ps.shape)

top_p, top_class = ps.topk(1, dim=1)
# Look at the most likely classes for the first 10 examples

print(top_class[:10,:])



equals = top_class == labels.view(*top_class.shape)

accuracy = torch.mean(equals.type(torch.FloatTensor))
print(f'Accuracy: {accuracy.item()*100}%')

model = Classifier()
criterian = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.003)

epochs = 30
steps = 0


train_losses, test_losses = [], []
for e in range(epochs):
    running_loss = 0
    for images, labels in trainloader:
        
        optimizer.zero_grad()
        
        log_ps = model(images)
        loss = criterian(log_ps, labels)
Exemplo n.º 9
0
def train(data, save_model_dir, seg=True):
    print "Training model..."
    data.show_data_summary()
    save_data_name = save_model_dir + ".dset"
    save_data_setting(data, save_data_name)
    loss_function = nn.NLLLoss()
    model = SeqModel(data)
    #model=copy.deepcopy(premodel)
    optimizer = optim.SGD(model.examiner.parameters(),
                          lr=data.HP_lr,
                          momentum=data.HP_momentum)
    best_dev = -1
    data.HP_iteration = 5
    USE_CRF = True
    ## start training
    acc_list = []
    p_list = []
    r_list = []
    f_list = []
    map_list = []
    #random.seed(2)
    print("total", )
    data.HP_lr = 0.1
    for idx in range(1):
        epoch_start = time.time()
        temp_start = epoch_start
        print("Epoch: %s/%s" % (idx, data.HP_iteration))
        optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr)
        instance_count = 0
        sample_id = 0
        sample_loss = 0
        total_loss = 0
        total_rl_loss = 0
        total_ml_loss = 0
        total_num = 0.0
        total_reward = 0.0
        right_token_reform = 0
        whole_token_reform = 0
        #random.seed(2)
        #random.shuffle(data.train_Ids)
        #random.seed(seed_num)
        ## set model in train model
        model.examiner.train()
        model.examiner.zero_grad()
        model.topk = 5
        model.examiner.topk = 5
        batch_size = data.HP_batch_size
        batch_id = 0
        train_num = len(data.train_Ids)
        total_batch = train_num // batch_size + 1
        gamma = 0
        cnt = 0
        click = 0
        sum_click = 0
        sum_p_at_5 = 0.0
        sum_p = 1.0
        #if idx==0:
        #    selected_data=[batch_id for batch_id in range(0,total_batch//1000)]
        tag_mask = None
        batch_ids = [i for i in range(total_batch)]
        for batch_idx in range(0, total_batch):
            # if end%500 == 0:
            #     temp_time = time.time()
            #     temp_cost = temp_time - temp_start
            #     temp_start = temp_time
            #     print("     Instance: %s; Time: %.2fs; loss: %.4f;"%(end, temp_cost, sample_loss))
            #     sys.stdout.flush()
            #     sample_loss = 0
            #updating the crf by selected position
            batch_id = batch_ids[batch_idx]

            start = batch_id * batch_size
            end = (batch_id + 1) * batch_size
            if end > train_num:
                end = train_num
            instance = data.train_Ids[start:end]
            if not instance:
                continue

            update_once = False

            start_time = time.time()
            #selected_data.append(batch_id)

            if batch_id == 15:

                for j in range(0, 10):
                    __tot = 0.0
                    for i in range(5, 15):
                        model.sample_train(0, i)
                        batch_id_temp = batch_ids[i]
                        start = batch_id_temp * batch_size
                        end = (batch_id_temp + 1) * batch_size
                        instance = data.train_Ids[start:end]

                        batch_word, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(
                            instance, data.HP_gpu)
                        real_batch_label = batch_label
                        batch_label, tag_seq, tag_prob, tag_mask, score, indices, scores_ref = model.crf_loss(
                            batch_word, batch_wordlen, batch_char,
                            batch_charlen, batch_charrecover, batch_label,
                            mask)

                        #_pred_label, _gold_label = recover_label(Variable(tag_seq.cuda()), real_batch_label.cuda(),mask.cuda(), data.label_alphabet, batch_wordrecover)
                        _tag_mask = tag_mask

                        pos_mask, score = model.reinforment_supervised(
                            batch_word, batch_wordlen, batch_char,
                            batch_charlen, batch_charrecover, real_batch_label,
                            tag_seq, tag_prob, mask)
                        __tot += score.sum()

                        score.sum().backward()
                        optimizer.step()
                        model.examiner.zero_grad()

                    __tot = 0.0
                    for i in range(10, -1, -1):
                        print(i)
                        model.sample_train(i + 1, 15)
                        batch_id_temp = batch_ids[i]
                        start = batch_id_temp * batch_size
                        end = (batch_id_temp + 1) * batch_size
                        instance = data.train_Ids[start:end]

                        batch_word, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(
                            instance, data.HP_gpu)
                        real_batch_label = batch_label
                        batch_label, tag_seq, tag_prob, tag_mask, score, indices, scores_ref = model.crf_loss(
                            batch_word, batch_wordlen, batch_char,
                            batch_charlen, batch_charrecover, batch_label,
                            mask)

                        #_pred_label, _gold_label = recover_label(Variable(tag_seq.cuda()), real_batch_label.cuda(),mask.cuda(), data.label_alphabet, batch_wordrecover)
                        _tag_mask = tag_mask

                        pos_mask, score = model.reinforment_supervised(
                            batch_word, batch_wordlen, batch_char,
                            batch_charlen, batch_charrecover, real_batch_label,
                            tag_seq, tag_prob, mask)
                        __tot += score.sum()

                        score.sum().backward()
                        optimizer.step()
                        model.examiner.zero_grad()
                    print("score", __tot / 14)
                model.train()
            if batch_id >= 15:
                t = np.random.randint(0, len(model.X_train))
                if np.random.rand() > -1 or model.tag_mask_list[t].sum(
                ).data[0] <= 5:
                    t = np.random.randint(len(model.X_train), total_batch)
                    #This is for seq choosing
                    #if batch_id>total_batch//100+100:
                    #    batch_id=batch_ids[batch_idx]
                    # tmin=-1
                    # for i in range(len(model.X_train),total_batch):
                    #     batch_id=batch_ids[i]
                    #     start = batch_id*batch_size
                    #     end = (batch_id+1)*batch_size
                    #     if end >train_num:
                    #         end = train_num
                    #     instance = data.train_Ids[start:end]
                    #     if len(instance)==0:
                    #         continue
                    #     batch_word, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask  = batchify_with_label(instance, data.HP_gpu)
                    #     batch_label,tag_seq,tag_mask,score,indices,scores_ref=model.crf_loss(batch_word, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask)
                    #     if tmin==-1 or (scores_ref.cpu().data[0]》=tmin):
                    #         tmin=scores_ref.cpu().data[0]
                    #         t=i
                    # temp=batch_ids[batch_idx]
                    # batch_ids[batch_idx]=batch_ids[t]
                    # batch_ids[t]=temp

                    batch_id = batch_ids[batch_idx]
                    start = batch_id * batch_size
                    end = (batch_id + 1) * batch_size
                    if end > train_num:
                        end = train_num
                    instance = data.train_Ids[start:end]

                    batch_word, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(
                        instance, data.HP_gpu)
                    real_batch_label = batch_label
                    batch_label, tag_seq, tag_prob, tag_mask, score, indices, scores_ref = model.crf_loss(
                        batch_word, batch_wordlen, batch_char, batch_charlen,
                        batch_charrecover, batch_label, mask)
                    model.add_instance(batch_word, batch_label, tag_mask,
                                       instance, scores_ref.data[0])

                    #pred_label, gold_label = recover_label(Variable(tag_seq.cuda()), real_batch_label.cuda(),mask.cuda(), data.label_alphabet, batch_wordrecover)

                    # u=False
                    # for x in pred_label:
                    #     if not gold_label==pred_label:
                    #         u=True
                    #         break
                    # #if u==True:
                    # print "mask", tag_mask
                    # print "gold", gold_label
                    # print "pred", pred_label

                else:
                    # tmin=model.scores_refs[t]
                    # for i in range(len(model.X_train)):
                    #     if model.scores_refs[i]<=tmin:
                    #         tmin=model.scores_refs[i]
                    #         t=i

                    instance = model.instances[t]
                    batch_word, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(
                        instance, data.HP_gpu)
                    real_batch_label = batch_label
                    batch_label, tag_seq, tag_prob, tag_mask, score, indices, scores_ref = model.crf_loss(
                        batch_word,
                        batch_wordlen,
                        batch_char,
                        batch_charlen,
                        batch_charrecover,
                        batch_label,
                        mask,
                        t=t)
                    model.readd_instance(batch_label, mask, tag_mask, t,
                                         scores_ref.data[0])

                print("score", score)
                #sum_p_at_5+=score
                sum_p += 1.0

                end_time = time.time()
                if click + 5 >= 10:
                    print("time", end_time - start_time)
            else:
                batch_word, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(
                    instance, data.HP_gpu)
                model.add_instance(batch_word, batch_label, tag_mask, instance,
                                   -100000.0)

            #print("Y_train",model.Y_train[-1])
            # if batch_id>=total_batch//100+15:
            #     for i in range(15):
            #         model.train()
            #         model.reevaluate_instance(mask)
            #print("loss",loss)
            #print(batch_wordlen)
            if batch_id < 15:
                if batch_id == 14:
                    model.train()
                    #print("Y_train",model.Y_train)
                    print(batch_ids)
                    speed, acc, p, r, f, _ = evaluate(data, model, "test")
                    print(len(model.Y_train))
                    print("after", acc)
                    print("Check", f)
                    acc_list.append(acc)
                    p_list.append(p)
                    r_list.append(r)
                    f_list.append(sum_click)
                    sum_p_at_5 = 0.0
                    sum_p = 1.0
                continue
            if batch_id == 15:
                model.train()
                #print("Y_train",model.Y_train)
                print(batch_ids)
                speed, acc, p, r, f, _ = evaluate(data, model, "test")
                print(len(model.Y_train))
                print("after", acc)
                print("Check", f)
                acc_list.append(acc)
                p_list.append(p)
                r_list.append(r)
                f_list.append(sum_click)
                sum_p_at_5 = 0.0
                sum_p = 1.0

            click += model.topk
            sum_click += model.topk

            #click+=batch_wordlen[0]
            #sum_click+=batch_wordlen[0]

            if click >= 10:
                model.train()
                speed, acc, p, r, f, _ = evaluate(data, model, "test")
                print("Step:", len(model.Y_train))
                print("after", acc)
                acc_list.append(acc)
                p_list.append(p)
                r_list.append(r)
                f_list.append(sum_click)
                sum_p_at_5 = 0.0
                sum_p = 1.0

                click -= 10
            instance_count += 1

            pos_mask, selection_score, select_reward = model.reinforment_reward(
                batch_word, batch_wordlen, batch_char, batch_charlen,
                batch_charrecover, real_batch_label, tag_seq, tag_prob, mask)
            if USE_CRF == True:
                start_time = time.time()
                t = np.random.randint(1, 10)
                #print("size",total_batch)
                speed, acc, p, r, f, _ = evaluate(data, model, "dev")
                end_time = time.time()
                if total_num != 0:
                    ave_scores = total_reward / total_num
                else:
                    ave_scores = 0.0
                total_reward += acc
                total_num += 1

                # print(batch_label)
                sample_scores = torch.from_numpy(np.asarray([acc])).float()
                ave_scores = torch.from_numpy(np.asarray([ave_scores])).float()
                if idx >= 0:
                    reward_diff = Variable(sample_scores - ave_scores,
                                           requires_grad=False)
                else:
                    reward_diff = select_reward
                reward_diff = reward_diff.cuda()
            rl_loss = -selection_score  # B

            #if idx>=10:
            #print("rl_loss",rl_loss)
            print("reward", reward_diff)
            rl_loss = torch.mul(rl_loss,
                                reward_diff.expand_as(rl_loss))  #b_size

            #print("reward",reward_diff)
            #rl_loss = rl_loss.sum()

            rl_loss.backward()
            optimizer.step()
            model.examiner.zero_grad()
            if len(p_list) >= 100:
                break
        if len(p_list) >= 100:
            break

        temp_time = time.time()
        temp_cost = temp_time - temp_start
        print("rl_loss", total_rl_loss)
        print("ml_loss", total_ml_loss)
        #print("     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"%(end, temp_cost, sample_loss, right_token, whole_token,(right_token+0.)/whole_token))
        epoch_finish = time.time()
        epoch_cost = epoch_finish - epoch_start
        print(
            "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s,  total loss: %s"
            % (idx, epoch_cost, train_num / epoch_cost, total_loss))
        # continue
        speed, acc, p, r, f, _ = evaluate(data, model, "test")
        dev_finish = time.time()
        dev_cost = dev_finish - epoch_finish

        if seg:
            current_score = f
            print(
                "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                % (dev_cost, speed, acc, p, r, f))
        else:
            current_score = acc
            print("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" %
                  (dev_cost, speed, acc))

        if current_score > best_dev:
            if seg:
                print "Exceed previous best f score:", best_dev
            else:
                print "Exceed previous best acc score:", best_dev
            model_name = save_model_dir + '.' + str(idx) + ".model"
            #torch.save(model.state_dict(), model_name)
            best_dev = current_score
        ## decode test

        speed, acc, p, r, f, _ = evaluate(data, model, "test")
        test_finish = time.time()
        test_cost = test_finish - dev_finish
        if best_dev == current_score:
            best_ = test_cost, speed, acc, p, r, f
        if seg:
            print(
                "Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                % (test_cost, speed, acc, p, r, f))
        else:
            print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f" %
                  (test_cost, speed, acc))
        gc.collect()
    file_dump = open("exp_list.pkl", "w")
    pickle.dump([acc_list, p_list, r_list, f_list, map_list], file_dump)
    file_dump.close()
    def train(self, data_dir, epochs, learning_rate):
        image_datasets, dataloaders, class_to_idx  = self.load_data(data_dir)
        criterion = nn.NLLLoss()
        optimizer = optim.Adam(self.model.classifier.parameters(), lr=learning_rate)
        
        # gpu or cpu
        self.model.to(self.device)
        
        # start training
        train_losses = []
        test_losses = []
        for e in range(epochs):
            running_train_loss = 0
            self.model.train()
            for images, labels in dataloaders['train']:
                images, labels = images.to(self.device), labels.to(self.device)

                optimizer.zero_grad()

                # get log probs
                log_ps = self.model.forward(images)

                # get loss
                loss = criterion(log_ps, labels)
                running_train_loss += loss.item()
        #         print(f'running_train_loss: {running_train_loss}')

                # back propagation
                loss.backward()

                # adjust weights
                optimizer.step()

            else:
                self.model.eval()
                running_test_loss = 0
                accuracy = 0
                with torch.no_grad():
                    for images, labels in dataloaders['test']:
                        images, labels = images.to(self.device), labels.to(self.device)

                        # get log probs
                        log_ps = self.model.forward(images)

                        # get loss
                        test_loss = criterion(log_ps, labels)
                        running_test_loss += test_loss.item()
        #                 print(f'running_test_loss: {running_test_loss}')

                        # turn log probs into real probs
                        ps = torch.exp(log_ps)

                        # calc accuracy
                        top_p, top_class = ps.topk(1, dim=1)
                        equals = top_class == labels.view(*top_class.shape)
                        accuracy += torch.mean(equals.type(torch.FloatTensor)).item()

            n_test_batches = len(dataloaders['test'])
            n_train_batches = len(dataloaders['train'])

            epoch_train_loss = running_train_loss / n_train_batches
            epoch_test_loss  = running_test_loss / n_test_batches

            train_losses.append(epoch_train_loss)
            test_losses.append(epoch_test_loss)

            print(f'Epoch: {e+1}/{epochs}',
                  f'Training Loss {epoch_train_loss:{0}.{4}}',
                  f'Validation Loss {epoch_test_loss:{0}.{4}}',
                  f'Accuracy {(accuracy / n_test_batches):{0}.{4}}'
                 )
        
        #return e+1, train_losses, test_losses
        self.final_epoch = e+1
        self.train_losses = train_losses
        self.test_losses = test_losses
        self.class_to_idx = class_to_idx
def main(cfg, gpus):
    # Network Builders
    net_encoder = ModelBuilder.build_encoder(
        arch=cfg.MODEL.arch_encoder.lower(),
        fc_dim=cfg.MODEL.fc_dim,
        weights=cfg.MODEL.weights_encoder)
    net_decoder = ModelBuilder.build_decoder(
        arch=cfg.MODEL.arch_decoder.lower(),
        fc_dim=cfg.MODEL.fc_dim,
        num_class=cfg.DATASET.num_class,
        weights=cfg.MODEL.weights_decoder)

    crit = nn.NLLLoss(ignore_index=-1)

    if cfg.MODEL.arch_decoder.endswith('deepsup'):
        segmentation_module = SegmentationModule(net_encoder, net_decoder,
                                                 crit,
                                                 cfg.TRAIN.deep_sup_scale)
    else:
        segmentation_module = SegmentationModule(net_encoder, net_decoder,
                                                 crit)

    # Dataset and Loader
    dataset_train = TrainDataset(cfg.DATASET.root_dataset,
                                 cfg.DATASET.list_train,
                                 cfg.DATASET,
                                 batch_per_gpu=cfg.TRAIN.batch_size_per_gpu)

    loader_train = torch.utils.data.DataLoader(
        dataset_train,
        batch_size=len(gpus),  # we have modified data_parallel
        shuffle=False,  # we do not use this param
        collate_fn=user_scattered_collate,
        num_workers=cfg.TRAIN.workers,
        drop_last=True,
        pin_memory=True)
    print('1 Epoch = {} iters'.format(cfg.TRAIN.epoch_iters))

    # create loader iterator
    iterator_train = iter(loader_train)

    # load nets into gpu
    if len(gpus) > 1:
        segmentation_module = UserScatteredDataParallel(segmentation_module,
                                                        device_ids=gpus)
        # For sync bn
        patch_replication_callback(segmentation_module)
    segmentation_module.cuda()

    # Set up optimizers
    nets = (net_encoder, net_decoder, crit)
    optimizers = create_optimizers(nets, cfg)

    # Main loop
    history = {'train': {'epoch': [], 'loss': [], 'acc': []}}

    for epoch in range(cfg.TRAIN.start_epoch, cfg.TRAIN.num_epoch):
        train(segmentation_module, iterator_train, optimizers, history,
              epoch + 1, cfg)

        # checkpointing
        checkpoint(nets, history, cfg, epoch + 1)

    print('Training Done!')
        def __init__(self, ignore_index):
            super(NLLLoss, self).__init__()
#             step_loss = F.nll_loss(log_probs, target, reduction="none", ignore_index=PAD)
            self.NLL = nn.NLLLoss(ignore_index=ignore_index, reduction='sum')
Exemplo n.º 13
0
def train_model(image_dataloaders,
                arch='vgg16',
                num_outputs=102,
                hidden_units=4096,
                learning_rate=0.001,
                epochs=20,
                gpu=False,
                checkpoint=''):
    '''
        Trains the model on given dataloaders
    '''
    #load the model
    model = load_model(arch, num_outputs, hidden_units)
    #attach ancillary information about the nn to the model object
    model.arch = arch
    model.num_outputs = num_outputs
    model.hidden_units = hidden_units
    model.learning_rate = learning_rate
    model.epochs = epochs
    model.gpu = gpu
    model.checkpoint = checkpoint

    print('Architecture: ', arch, 'Hidden units: ', hidden_units)
    print('Training epochs: ', epochs, 'Learning rate: ', learning_rate)
    print(
        'Trianing data size: {} images, '.format(
            len(image_dataloaders['train'].dataset)),
        'validation data size: {} images'.format(
            len(image_dataloaders['valid'].dataset)))

    #use gpu if selected and available
    if gpu and torch.cuda.is_available():
        print('On GPU')
        device = torch.device("cuda:0")
        model.cuda()
    else:
        print('On CPU')
        device = torch.device("cpu")

    #setup the loss function
    if arch == 'inception' or 'resnet':
        criterion = nn.CrossEntropyLoss()
    else:
        criterion = nn.NLLLoss()

    #only the new or modified layers will get gradient updates
    print("Parameters to learn:")
    params_to_update = []
    for name, param in model.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)
            print("\t", name)
    #making sure only added parameters are being optimized with
    #gradient adjustments during training
    optimizer = optim.Adam(params_to_update, lr=learning_rate)

    #resetting accuracy and deep copying the model weights/biases
    best_accuracy = 0
    best_model_weights = copy.deepcopy(model.state_dict())

    #to keep track of the losses throughout training
    train_losses, valid_losses = [], []

    print_every = 100  #for debugging
    start_time = time.time()

    for e in range(epochs):
        epoch_accuracy = 0
        running_loss = 0
        steps = 0
        start_training_time_per_steps = time.time()

        for images, labels in image_dataloaders['train']:
            images, labels = images.to(device), labels.to(device)
            steps += 1
            optimizer.zero_grad()

            #run training data through the model
            if arch == 'inception':
                #From https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958
                outputs, aux_outputs = model(images)
                loss1 = criterion(outputs, labels)
                loss2 = criterion(aux_outputs, labels)
                loss = loss1 + 0.4 * loss2
            else:
                output_logps = model(images)
                loss = criterion(output_logps, labels)

            running_loss += loss.item()
            loss.backward()
            optimizer.step()

            #perform validation at "print_every"
            if steps % print_every == 0:
                #calculate the training time per steps
                training_time_per_steps = time.time(
                ) - start_training_time_per_steps
                #reset the accuracy and validation loss
                accuracy, valid_loss = 0, 0
                #put the model in evaluation mode for quicker validation
                model.eval()
                #we're not doing any gradient related calculations when punching
                #through the validation data
                with torch.no_grad():
                    valid_start_time = time.time()
                    for images, labels in image_dataloaders['valid']:
                        images, labels = images.to(device), labels.to(device)
                        valid_logps = model(images)
                        #calculate the validation loss before taking the exp
                        valid_loss += criterion(valid_logps, labels)

                        valid_ps = torch.exp(valid_logps)
                        top_p, top_class = valid_ps.topk(1, dim=1)
                        equality = top_class == labels.view(*top_class.shape)
                        accuracy += torch.mean(equality.type(
                            torch.FloatTensor)).item()

                valid_time = time.time() - valid_start_time
                #keeping track of the losses to plot later in case we need to
                train_losses.append(running_loss / steps)
                valid_losses.append(valid_loss /
                                    len(image_dataloaders['valid']))
                epoch_accuracy = accuracy / len(image_dataloaders['valid'])

                #printing losses, accuracy, etc. as we train
                print(
                    'Epoch {}/{} '.format(e + 1,
                                          epochs), 'Step {} '.format(steps),
                    'Train loss: {:.3f} '.format(running_loss / steps),
                    'Valid loss: {:.3f} '.format(
                        valid_loss / len(image_dataloaders['valid'])),
                    'Accuracy: {:.2f}% '.format(epoch_accuracy * 100),
                    'Train dur: {:.1f}s '.format(training_time_per_steps),
                    'Valid dur: {:.1f}s'.format(valid_time))
                #reset the running loss to zero and put the model back into training mode
                running_loss = 0
                model.train()
                start_training_time_per_steps = time.time()

        #saving the best weights and biases based on best accuracy
        if (epoch_accuracy > best_accuracy):
            best_accuracy = epoch_accuracy
            best_model_wts = copy.deepcopy(model.state_dict())

    #loading model object with best weights
    model.load_state_dict(best_model_wts)

    #storing dir_to_cat into the model object - added this for easier lookup
    with open('dir_to_cat.json', 'r') as f:
        dir_to_cat = json.load(f)

    #saving train and valid losses to the model in case we need to access them
    model.train_losses = train_losses
    model.valid_losses = valid_losses

    #printing total training time and best accuracy
    total_time = time.time() - start_time
    print('Time for complete training {:.0f}m {:.0f}s'.format(
        total_time // 60, total_time % 60))
    print('Best accuracy: {:3f}%'.format(best_accuracy * 100))

    #saving checkpoint if requested
    if checkpoint:
        print('Checkpoint saved to:', checkpoint)
        checkpoint_dict = {
            'arch': arch,
            'dir_to_cat': dir_to_cat,
            'hidden_units': hidden_units,
            'best_accuracy': best_accuracy,
            'best_model_weights': best_model_wts,
            'train_losses': train_losses,
            'valid_losses': valid_losses
        }
        torch.save(checkpoint_dict, checkpoint)

    #return the model object with best weights and biases
    return model
Exemplo n.º 14
0
if gpu == True:
    using_gpu = torch.cuda.is_available()
    device = 'gpu'
    print('GPU On')
else:
    print('CPU ON')
    device = 'cpu'

# Loading Dataset
data_transforms, directories, dataloaders, dataset_sizes, image_datasets = script.loading_data(
    data_dir)
class_to_idx = image_datasets['training_transforms'].class_to_idx
print("cudaorcpu_3")
for i in dataloaders:
    print("dataloaders ", dataloaders[i])

# Network Setup
model, input_size = script.make_model(arch, hidden_units)
criteria = nn.NLLLoss()
optimizer = optim.Adam(model.classifier.parameters(), lr)
sched = lr_scheduler.StepLR(optimizer, step_size=4, gamma=0.1)
epochs = epoch
model_ft = script.train_model(dataloaders, dataset_sizes, model, criteria,
                              optimizer, sched, epochs, device)

# Testing Model
script.check_accuracy_on_test(dataloaders, model, 'testing_transforms', True)

# Saving Checkpoint
script.save_checkpoints(model, arch, lr, epochs, input_size, hidden_units,
                        class_to_idx, save_dir)
Exemplo n.º 15
0
def main():
    data_holder, task2id, id2task, num_feat, num_voc, num_char, tgt_dict, embeddings = DataLoader_elmo.multitask_dataloader(
        pkl_path, num_task=num_task, batch_size=BATCH_SIZE)
    para = model_para
    #task2label = {"conll2000": "chunk", "unidep": "POS", "conll2003": "NER"}
    task2label = {"conll2000": "chunk", "wsjpos": "POS", "conll2003": "NER"}
    logger = Logger('./logs/' + str(args.gpu))
    para["id2task"] = id2task
    para["n_feats"] = num_feat
    para["n_vocs"] = num_voc
    para["n_tasks"] = num_task
    para["out_size"] = [
        len(tgt_dict[task2label[id2task[ids]]]) for ids in range(num_task)
    ]
    para["n_chars"] = num_char
    model = Model_s.build_model_cnn(para)
    model.Word_embeddings.apply_weights(embeddings)

    params = list(filter(lambda p: p.requires_grad, model.parameters()))
    num_params = sum(p.numel() for p in model.parameters())
    print(model)
    print("Num of paras:", num_params)
    print(model.concat_flag)

    def lr_decay(optimizer, epoch, decay_rate=0.05, init_lr=0.015):
        lr = init_lr / (1 + decay_rate * epoch)
        print(" Learning rate is set as:", lr)
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
        return optimizer

    def exp_lr_decay(optimizer, epoch, decay_rate=0.05, init_lr=0.015):
        lr = init_lr * decay_rate**epoch
        print(" Learning rate is set as:", lr)
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
        return optimizer

    if args.optim == "noam":
        model_optim = optim_custorm.NoamOpt(
            para["d_hid"], 1, 1000,
            DenseSparseAdam(params, lr=0.0, betas=(0.9, 0.98), eps=1e-9))
        args.decay = None
    elif args.optim == "sgd":
        model_optim = optim.SGD(params,
                                lr=0.015,
                                momentum=args.momentum,
                                weight_decay=1e-8)
    elif args.optim == "adam":
        model_optim = optim.Adam(params,
                                 lr=0.0,
                                 betas=(0.9, 0.98),
                                 eps=1e-9,
                                 weight_decay=1e-8)
    if args.mode == "train":
        best_F1 = 0
        if not para["crf"]:
            calculate_loss = nn.NLLLoss()
        else:
            calculate_loss = None

        print("Start training...")
        print('-' * 60)
        KLLoss = None  #nn.KLDivLoss()
        start_point = time.time()
        for epoch_idx in range(NUM_EPOCH):
            if args.optim == "sgd":
                if args.decay == "exp":
                    model_optim = exp_lr_decay(model_optim, epoch_idx)
                elif args.decay == "normal":
                    model_optim = lr_decay(model_optim, epoch_idx)
            Pre, Rec, F1, loss_list = run_epoch(model, data_holder,
                                                model_optim, calculate_loss,
                                                KLLoss, para, epoch_idx,
                                                id2task, logger)

            use_time = time.time() - start_point
            print("Time using: %f mins" % (use_time / 60))
            if not best_F1 or best_F1 < F1:
                best_F1 = F1
                Model_s.save_model(model_path, model, para)
                print('*' * 60)
                print(
                    "Save model with average Pre: %f, Rec: %f, F1: %f on dev set."
                    % (Pre, Rec, F1))
                save_idx = epoch_idx
                print('*' * 60)
        print("save model at epoch:", save_idx)

    else:
        para_path = os.path.join(path, 'para.pkl')
        with open(para_path, "wb") as f:
            para_save = pickle.load(f)
        model = Model_s.build_model(para_save)
        model = Model_s.read_model(model_path, model)
        prec_list, rec_list, f1_list = infer(model, data_holder, "test")
Exemplo n.º 16
0
    def __init__(self, model, config):
        """
        Creates a new TrainManager for a model, specified as in configuration.

        :param model:
        :param config:
        """
        train_config = config["training"]
        self.model = model
        self.pad_index = self.model.pad_index
        self.bos_index = self.model.bos_index
        criterion = nn.NLLLoss(ignore_index=self.pad_index, reduction='sum')
        self.learning_rate_min = train_config.get("learning_rate_min", 1.0e-8)
        if train_config["loss"].lower() not in [
                "crossentropy", "xent", "mle", "cross-entropy"
        ]:
            raise NotImplementedError("Loss is not implemented. Only xent.")
        learning_rate = train_config.get("learning_rate", 3.0e-4)
        weight_decay = train_config.get("weight_decay", 0)
        if train_config["optimizer"].lower() == "adam":
            self.optimizer = torch.optim.Adam(model.parameters(),
                                              weight_decay=weight_decay,
                                              lr=learning_rate)
        else:
            # default
            self.optimizer = torch.optim.SGD(model.parameters(),
                                             weight_decay=weight_decay,
                                             lr=learning_rate)
        self.schedule_metric = train_config.get("schedule_metric",
                                                "eval_metric")
        self.ckpt_metric = train_config.get("ckpt_metric", "eval_metric")
        self.best_ckpt_iteration = 0
        # if we schedule after BLEU/chrf, we want to maximize it, else minimize
        scheduler_mode = "max" if self.schedule_metric == "eval_metric" \
            else "min"
        # the ckpt metric decides on how to find a good early stopping point:
        # ckpts are written when there's a new high/low score for this metric
        if self.ckpt_metric == "eval_metric":
            self.best_ckpt_score = -np.inf
            self.is_best = lambda x: x > self.best_ckpt_score
        else:
            self.best_ckpt_score = np.inf
            self.is_best = lambda x: x < self.best_ckpt_score
        self.scheduler = None
        if "scheduling" in train_config.keys() and \
                train_config["scheduling"]:
            if train_config["scheduling"].lower() == "plateau":
                # learning rate scheduler
                self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
                    optimizer=self.optimizer,
                    mode=scheduler_mode,
                    verbose=False,
                    threshold_mode='abs',
                    factor=train_config.get("decrease_factor", 0.1),
                    patience=train_config.get("patience", 10))
            elif train_config["scheduling"].lower() == "decaying":
                self.scheduler = torch.optim.lr_scheduler.StepLR(
                    optimizer=self.optimizer,
                    step_size=train_config.get("decaying_step_size", 10))
            elif train_config["scheduling"].lower() == "exponential":
                self.scheduler = torch.optim.lr_scheduler.ExponentialLR(
                    optimizer=self.optimizer,
                    gamma=train_config.get("decrease_factor", 0.99))
        self.shuffle = train_config.get("shuffle", True)
        self.epochs = train_config["epochs"]
        self.batch_size = train_config["batch_size"]
        self.batch_multiplier = train_config.get("batch_multiplier", 1)
        self.criterion = criterion
        self.normalization = train_config.get("normalization", "batch")
        self.steps = 0
        # stop training if this flag is True by reaching learning rate minimum
        self.stop = False
        self.total_tokens = 0
        self.max_output_length = train_config.get("max_output_length", None)
        self.overwrite = train_config.get("overwrite", False)
        self.model_dir = self._make_model_dir(train_config["model_dir"])
        self.logger = self._make_logger()
        self.valid_report_file = "{}/validations.txt".format(self.model_dir)
        self.use_cuda = train_config["use_cuda"]
        if self.use_cuda:
            self.model.cuda()
        self.logging_freq = train_config.get("logging_freq", 100)
        self.validation_freq = train_config.get("validation_freq", 1000)
        self.eval_metric = train_config.get("eval_metric", "bleu")
        self.print_valid_sents = train_config["print_valid_sents"]
        self.level = config["data"]["level"]
        self.clip_grad_fun = None
        if "clip_grad_val" in train_config.keys():
            clip_value = train_config["clip_grad_val"]
            self.clip_grad_fun = lambda params:\
                nn.utils.clip_grad_value_(parameters=params,
                                          clip_value=clip_value)
        elif "clip_grad_norm" in train_config.keys():
            max_norm = train_config["clip_grad_norm"]
            self.clip_grad_fun = lambda params:\
                nn.utils.clip_grad_norm_(parameters=params, max_norm=max_norm)

        assert not ("clip_grad_val" in train_config.keys() and
                    "clip_grad_norm" in train_config.keys()), \
            "you can only specify either clip_grad_val or clip_grad_norm"

        if "load_model" in train_config.keys():
            model_load_path = train_config["load_model"]
            self.logger.info("Loading model from {}".format(model_load_path))
            self.load_checkpoint(model_load_path)

        trainable_params = [
            n for (n, p) in self.model.named_parameters() if p.requires_grad
        ]
        self.logger.info("Trainable parameters: {}".format(trainable_params))
        assert len(trainable_params) > 0
 def __init__(self, input_dim, nclass):
     super(LINEAR_LOGSOFTMAX, self).__init__()
     self.fc = nn.Linear(input_dim,nclass)
     self.logic = nn.LogSoftmax(dim=1)
     self.lossfunction =  nn.NLLLoss()
Exemplo n.º 18
0
def main():
    start = time.time()
    parser = args.parse_args()

    # run some checks on arguments
    check_args(parser)

    # format logging
    log_name = os.path.join(
        parser.run_log,
        '{}_run_log_{}.log'.format(parser.experiment,
                                   dt.now().strftime("%Y%m%d_%H%M")))

    log.basicConfig(filename=log_name,
                    format='%(asctime)s | %(name)s -- %(message)s',
                    level=log.INFO)
    os.chmod(log_name, parser.access_mode)

    # set device to CPU if available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Starting experiment {} VN -> EN NMT on {}.".format(
        parser.experiment, device))
    log.info("Starting experiment {} VN -> EN NMT on {}.".format(
        parser.experiment, device))

    # set seed for replication
    random.seed(parser.seed)
    np.random.seed(parser.seed)
    torch.manual_seed(parser.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(parser.seed)
    log.info("For reproducibility, the seed is set to {}.".format(parser.seed))

    # set file paths
    source_name = parser.source_name
    target_name = parser.target_name

    # get saved models dir
    base_saved_models_dir = parser.save_dir
    saved_models_dir = os.path.join(base_saved_models_dir,
                                    source_name + '2' + target_name)
    plots_dir = parser.plots_dir

    log.info("We will save the models in this directory: {}".format(
        saved_models_dir))
    log.info("We will save the plots in this directory: {}".format(plots_dir))

    # get data dir
    main_data_path = parser.data_dir
    path_to_train_data = {
        'source': main_data_path + 'train.tok.' + source_name,
        'target': main_data_path + 'train.tok.' + target_name
    }
    path_to_dev_data = {
        'source': main_data_path + 'dev.tok.' + source_name,
        'target': main_data_path + 'dev.tok.' + target_name
    }
    path_to_test_data = {
        'source': main_data_path + 'test.tok.' + source_name,
        'target': main_data_path + 'test.tok.' + target_name
    }

    # Configuration
    bs = parser.batch_size
    log.info("Batch size = {}.".format(bs))

    enc_emb = parser.enc_emb
    enc_hidden = parser.enc_hidden
    enc_layers = parser.enc_layers
    rnn_type = parser.rnn_type

    dec_emb = parser.dec_emb
    dec_hidden = parser.dec_hidden
    dec_layers = parser.dec_layers

    learning_rate = parser.learning_rate
    num_epochs = parser.epochs
    attn_flag = parser.attn
    log.info("The attention flag is set to {}.".format(attn_flag))
    beam_size = parser.beam_size
    log.info("We evaluate using beam size of {}.".format(beam_size))

    train, val, test, en_lang, vi_lang = dataset_helper.train_val_load(
        "", main_data_path)

    # get vocab sizes
    log.info('English has vocab size of: {} words.'.format(en_lang.n_words))
    log.info('Vietnamese has vocab size of: {} words.'.format(vi_lang.n_words))

    # get max sentence length by 95% percentile
    MAX_LEN = int(train['en_len'].quantile(0.95))
    log.info(
        'We will have a max sentence length of {} (95 percentile).'.format(
            MAX_LEN))

    # set data loaders
    bs_dict = {'train': bs, 'validate': 1, 'test': 1}
    shuffle_dict = {'train': True, 'validate': False, 'test': False}

    train_used = train
    val_used = val

    collate_fn_dict = {
        'train': partial(dataset_helper.vocab_collate_func, MAX_LEN=MAX_LEN),
        'validate': dataset_helper.vocab_collate_func_val,
        'test': dataset_helper.vocab_collate_func_val
    }

    transformed_dataset = {
        'train': dataset_helper.Vietnamese(train_used),
        'validate': dataset_helper.Vietnamese(val_used, val=True),
        'test': dataset_helper.Vietnamese(test, val=True)
    }

    dataloader = {
        x: DataLoader(transformed_dataset[x],
                      batch_size=bs_dict[x],
                      collate_fn=collate_fn_dict[x],
                      shuffle=shuffle_dict[x],
                      num_workers=0)
        for x in ['train', 'validate', 'test']
    }

    # instantiate encoder/decoder
    encoder_w_att = nnet_models.EncoderRNN(input_size=vi_lang.n_words,
                                           embed_dim=enc_emb,
                                           hidden_size=enc_hidden,
                                           n_layers=enc_layers,
                                           rnn_type=rnn_type).to(device)
    decoder_w_att = nnet_models.AttentionDecoderRNN(
        output_size=en_lang.n_words,
        embed_dim=dec_emb,
        hidden_size=dec_hidden,
        n_layers=dec_layers,
        attention=attn_flag).to(device)

    # instantiate optimizer
    if parser.optimizer == 'sgd':
        encoder_optimizer = optim.SGD(encoder_w_att.parameters(),
                                      lr=learning_rate,
                                      nesterov=True,
                                      momentum=0.99)
        decoder_optimizer = optim.SGD(decoder_w_att.parameters(),
                                      lr=learning_rate,
                                      nesterov=True,
                                      momentum=0.99)
    elif parser.optimizer == 'adam':
        encoder_optimizer = optim.Adam(encoder_w_att.parameters(), lr=5e-3)
        decoder_optimizer = optim.Adam(decoder_w_att.parameters(), lr=5e-3)
    else:
        raise ValueError('Invalid optimizer!')

    # instantiate scheduler
    enc_scheduler = ReduceLROnPlateau(encoder_optimizer,
                                      min_lr=1e-4,
                                      factor=0.5,
                                      patience=0)
    dec_scheduler = ReduceLROnPlateau(decoder_optimizer,
                                      min_lr=1e-4,
                                      factor=0.5,
                                      patience=0)
    criterion = nn.NLLLoss(ignore_index=global_variables.PAD_IDX)

    log.info(
        "Seq2Seq Model with the following parameters: batch_size = {}, learning_rate = {}, rnn_type = {}, enc_emb = {}, enc_hidden = {}, enc_layers = {}, dec_emb = {}, dec_hidden = {}, dec_layers = {}, num_epochs = {}, source_name = {}, target_name = {}"
        .format(bs, learning_rate, rnn_type, enc_emb, enc_hidden, enc_layers,
                dec_emb, dec_hidden, dec_layers, num_epochs, source_name,
                target_name))

    # do we want to train again?
    train_again = False
    encoder_save = '{}_att_{}bs_{}hs_{}_{}beam_enc_{}_layer'.format(
        rnn_type, bs, enc_hidden, parser.optimizer, beam_size, enc_layers)
    decoder_save = '{}_att_{}bs_{}hs_{}_{}beam_dec_{}_layer'.format(
        rnn_type, bs, enc_hidden, parser.optimizer, beam_size, dec_layers)

    if os.path.exists(utils.get_full_filepath(
            saved_models_dir, encoder_save)) and os.path.exists(
                utils.get_full_filepath(saved_models_dir,
                                        decoder_save)) and (not train_again):
        log.info("Retrieving saved encoder from {}".format(
            utils.get_full_filepath(saved_models_dir, encoder_save)))
        log.info("Retrieving saved decoder from {}".format(
            utils.get_full_filepath(saved_models_dir, decoder_save)))
        encoder_w_att.load_state_dict(
            torch.load(utils.get_full_filepath(saved_models_dir,
                                               encoder_save)))
        decoder_w_att.load_state_dict(
            torch.load(utils.get_full_filepath(saved_models_dir,
                                               decoder_save)))
    else:
        log.info("Check if encoder path exists: {}".format(
            utils.get_full_filepath(saved_models_dir, encoder_save)))
        log.info("Check if decoder path exists: {}".format(
            utils.get_full_filepath(saved_models_dir, decoder_save)))
        log.info("Encoder and Decoder do not exist! Starting to train...")
        encoder_w_att, decoder_w_att, loss_hist, acc_hist = train_utilities.train_model(
            encoder_optimizer,
            decoder_optimizer,
            encoder_w_att,
            decoder_w_att,
            criterion,
            "attention",
            dataloader,
            en_lang,
            vi_lang,
            saved_models_dir,
            encoder_save,
            decoder_save,
            num_epochs=num_epochs,
            rm=0.95,
            enc_scheduler=enc_scheduler,
            dec_scheduler=dec_scheduler)
        log.info("Total time is: {} min : {} s".format(
            (time.time() - start) // 60, (time.time() - start) % 60))
        log.info(
            "We will save the encoder/decoder in this directory: {}".format(
                saved_models_dir))

    # BLEU with beam size
    bleu_no_unk, att_score_wo, pred_wo, src_wo = train_utilities.validation_beam_search(
        encoder_w_att,
        decoder_w_att,
        dataloader['validate'],
        en_lang,
        vi_lang,
        'attention',
        beam_size,
        verbose=False)

    log.info("Bleu-{} Score (No UNK): {}".format(beam_size, bleu_no_unk))
    print("Bleu-{} Score (No UNK): {}".format(beam_size, bleu_no_unk))

    bleu_unk, att_score_wo, pred_wo, src_wo = train_utilities.validation_beam_search(
        encoder_w_att,
        decoder_w_att,
        dataloader['validate'],
        en_lang,
        vi_lang,
        'attention',
        beam_size,
        verbose=False,
        replace_unk=True)

    log.info("Bleu-{} Score (UNK): {}".format(beam_size, bleu_unk))
    print("Bleu-{} Score (UNK): {}".format(beam_size, bleu_unk))

    # generate 5 random predictions
    indexes = range(len(pred_wo))
    for i in np.random.choice(indexes, 5):
        print('Source: {} \nPrediction: {}\n---'.format(src_wo[i], pred_wo[i]))
        log.info('Source: {} \nPrediction: {}\n---'.format(
            src_wo[i], pred_wo[i]))

    log.info("Exported Binned Bleu Score Plot to {}!".format(plots_dir))
    _, _, fig = utils.get_binned_bl_score(
        encoder=encoder_w_att,
        decoder=decoder_w_att,
        val_dataset=transformed_dataset['validate'],
        attn_flag=attn_flag,
        beam_size=beam_size,
        location=plots_dir,
        collate=collate_fn_dict['validate'],
        lang_en=en_lang,
        lang_vi=vi_lang)
Exemplo n.º 19
0
    def __init__(self, opt, shared=None):
        """Set up model if shared params not set, otherwise no work to do."""
        super().__init__(opt, shared)
        opt = self.opt  # there is a deepcopy in the init

        # all instances may need some params
        self.truncate = opt['truncate'] if opt['truncate'] > 0 else None
        self.metrics = {'loss': 0.0, 'num_tokens': 0}
        self.history = {}
        self.report_freq = opt.get('report_freq', 0.001)
        states = {}

        # check for cuda
        self.use_cuda = not opt.get('no_cuda') and torch.cuda.is_available()
        if opt.get('numthreads', 1) > 1:
            torch.set_num_threads(1)

        if shared:
            # set up shared properties
            self.opt = shared['opt']
            opt = self.opt
            self.dict = shared['dict']
            self.START_IDX = shared['START_IDX']
            self.END_IDX = shared['END_IDX']
            self.NULL_IDX = shared['NULL_IDX']
            # answers contains a batch_size list of the last answer produced
            self.answers = shared['answers']

            if 'model' in shared:
                # model is shared during hogwild
                self.model = shared['model']
                self.metrics = shared['metrics']
                states = shared['states']
        else:
            # this is not a shared instance of this class, so do full init
            # answers contains a batch_size list of the last answer produced
            self.answers = [None] * opt['batchsize']

            if self.use_cuda:
                print('[ Using CUDA ]')
                torch.cuda.set_device(opt['gpu'])

            init_model = None
            # check first for 'init_model' for loading model from file
            if opt.get('init_model') and os.path.isfile(opt['init_model']):
                init_model = opt['init_model']
            # next check for 'model_file', this would override init_model
            if opt.get('model_file') and os.path.isfile(opt['model_file']):
                init_model = opt['model_file']

            if init_model is not None:
                # load model parameters if available
                print('[ Loading existing model params from {} ]'.format(
                    init_model))
                states = self.load(opt['model_file'])

            if ((init_model is not None
                 and os.path.isfile(init_model + '.dict'))
                    or opt['dict_file'] is None):
                opt['dict_file'] = init_model + '.dict'
            # load dictionary and basic tokens & vectors
            self.dict = DictionaryAgent(opt)
            self.id = 'Seq2Seq'
            # we use START markers to start our output
            self.START_IDX = self.dict[self.dict.start_token]
            # we use END markers to end our output
            self.END_IDX = self.dict[self.dict.end_token]
            # get index of null token from dictionary (probably 0)
            self.NULL_IDX = self.dict[self.dict.null_token]

            if not hasattr(self, 'model_class'):
                # this allows child classes to override this but inherit init
                self.model_class = Seq2seq
            self.model = self.model_class(opt,
                                          len(self.dict),
                                          padding_idx=self.NULL_IDX,
                                          start_idx=self.START_IDX,
                                          end_idx=self.END_IDX,
                                          longest_label=states.get(
                                              'longest_label', 1))

            if opt['embedding_type'] != 'random':
                # set up preinitialized embeddings
                try:
                    import torchtext.vocab as vocab
                except ModuleNotFoundError as ex:
                    print(
                        'Please install torch text with `pip install torchtext`'
                    )
                    raise ex
                if opt['embedding_type'].startswith('glove'):
                    init = 'glove'
                    embs = vocab.GloVe(name='840B',
                                       dim=300,
                                       cache=os.path.join(
                                           opt['parlai_home'], 'data',
                                           'models', 'glove_vectors'))
                elif opt['embedding_type'].startswith('fasttext'):
                    init = 'fasttext'
                    embs = vocab.FastText(language='en',
                                          cache=os.path.join(
                                              opt['parlai_home'], 'data',
                                              'models', 'fasttext_vectors'))
                else:
                    raise RuntimeError('embedding type not implemented')

                if opt['embeddingsize'] != 300:
                    rp = torch.Tensor(300, opt['embeddingsize']).normal_()
                    t = lambda x: torch.mm(x.unsqueeze(0), rp)
                else:
                    t = lambda x: x
                cnt = 0
                for w, i in self.dict.tok2ind.items():
                    if w in embs.stoi:
                        vec = t(embs.vectors[embs.stoi[w]])
                        self.model.decoder.lt.weight.data[i] = vec
                        cnt += 1
                        if opt['lookuptable'] in ['unique', 'dec_out']:
                            # also set encoder lt, since it's not shared
                            self.model.encoder.lt.weight.data[i] = vec
                print('Seq2seq: initialized embeddings for {} tokens from {}.'
                      ''.format(cnt, init))

            if states:
                # set loaded states if applicable
                self.model.load_state_dict(states['model'])

            if self.use_cuda:
                self.model.cuda()

        if hasattr(self, 'model'):
            # if model was built, do more setup
            self.clip = opt.get('gradient_clip', -1)
            self.rank = opt['rank_candidates']

            # set up tensors once
            self.xs = torch.LongTensor(1, 1)
            self.ys = torch.LongTensor(1, 1)
            if self.rank:
                self.cands = torch.LongTensor(1, 1, 1)

            # set up criteria
            if opt.get('numsoftmax', 1) > 1:
                self.criterion = nn.NLLLoss(ignore_index=self.NULL_IDX,
                                            size_average=False)
            else:
                self.criterion = nn.CrossEntropyLoss(
                    ignore_index=self.NULL_IDX, size_average=False)

            if self.use_cuda:
                # push to cuda
                self.xs = self.xs.cuda()
                self.ys = self.ys.cuda()
                if self.rank:
                    self.cands = self.cands.cuda()
                self.criterion.cuda()

            # set up optimizer
            lr = opt['learningrate']
            optim_class = Seq2seqAgent.OPTIM_OPTS[opt['optimizer']]
            kwargs = {'lr': lr}
            if opt.get('momentum') > 0 and opt['optimizer'] in [
                    'sgd', 'rmsprop'
            ]:
                kwargs['momentum'] = opt['momentum']
                if opt['optimizer'] == 'sgd':
                    kwargs['nesterov'] = True
            if opt['optimizer'] == 'adam':
                # https://openreview.net/forum?id=ryQu7f-RZ
                kwargs['amsgrad'] = True

            if opt['embedding_type'].endswith('fixed'):
                print('Seq2seq: fixing embedding weights.')
                self.model.decoder.lt.weight.requires_grad = False
                self.model.encoder.lt.weight.requires_grad = False
                if opt['lookuptable'] in ['dec_out', 'all']:
                    self.model.decoder.e2s.weight.requires_grad = False
            self.optimizer = optim_class(
                [p for p in self.model.parameters() if p.requires_grad],
                **kwargs)
            if states.get('optimizer'):
                if states['optimizer_type'] != opt['optimizer']:
                    print('WARNING: not loading optim state since optim class '
                          'changed.')
                else:
                    self.optimizer.load_state_dict(states['optimizer'])
                    if self.use_cuda:
                        for state in self.optimizer.state.values():
                            for k, v in state.items():
                                if isinstance(v, torch.Tensor):
                                    state[k] = v.cuda()
            self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(
                self.optimizer, 'min', factor=0.5, patience=3, verbose=True)

        self.reset()
Exemplo n.º 20
0
    def __init__(self,
                 emb_dim,
                 hidden_dim,
                 num_layers,
                 heads,
                 depth_size,
                 filter_size,
                 tokenizer,
                 pretrained_file,
                 pointer_gen,
                 logger,
                 weight_sharing=True,
                 model_file_path=None,
                 is_eval=False,
                 load_optim=False,
                 label_smoothing=False,
                 multi_input=False,
                 context_size=2,
                 attention_fusion_type='mean'):
        super(TransformerSeq2Seq, self).__init__()
        self.tokenizer = tokenizer
        self.vocab_size = tokenizer.n_words

        self.embed_obj = Embedding(tokenizer, emb_dim, pretrained_file, logger)

        self.embedding = self.embed_obj.get_embedding()
        self.encoder = Encoder(emb_dim,
                               hidden_dim,
                               num_layers=num_layers,
                               num_heads=heads,
                               total_key_depth=depth_size,
                               total_value_depth=depth_size,
                               filter_size=filter_size)

        self.decoder = Decoder(emb_dim,
                               hidden_dim,
                               num_layers=num_layers,
                               num_heads=heads,
                               total_key_depth=depth_size,
                               total_value_depth=depth_size,
                               filter_size=filter_size,
                               multi_input=multi_input,
                               context_size=context_size,
                               attention_fusion_type=attention_fusion_type)
        self.generator = Generator(hidden_dim, self.vocab_size, pointer_gen)
        self.pad_id = tokenizer.pad_id
        self.n_embeddings = tokenizer.n_words
        self.embeddings_size = emb_dim
        self.multi_input = multi_input

        if weight_sharing:
            # Share the weight matrix between target word embedding & the final logit dense layer
            self.generator.proj.weight = self.embedding.weight

        self.criterion = nn.NLLLoss(ignore_index=self.pad_id)
        if label_smoothing:
            self.criterion = LabelSmoothing(size=self.vocab_size,
                                            padding_idx=self.pad_id,
                                            smoothing=0.1)
            self.criterion_ppl = nn.NLLLoss(ignore_index=self.pad_id)
        if is_eval:
            self.encoder = self.encoder.eval()
            self.decoder = self.decoder.eval()
            self.generator = self.generator.eval()
            self.embedding = self.embedding.eval()
Exemplo n.º 21
0
itos_total = ["EOS", "EOW", "SOS"] + itos_pos_fine + itos[:vocab_size]
assert len(itos_total) == outVocabSize

initrange = 0.1
crossEntropy = 10.0

import torch.nn.functional

counter = 0

lastDevLoss = None
failedDevRuns = 0
devLosses = []

lossModule = nn.NLLLoss()
lossModuleTest = nn.NLLLoss(size_average=False, reduce=False, ignore_index=2)

corpusBase = corpus_cached["train"]
corpus = corpusBase.iterator()

# get the initial grammar

# perform splits on the grammar

# run EM

unary_rules = {}

binary_rules = {}
def hyperparameters_tuning_LBFGS_minibatch(trainset, valset, batchsize_grid,
                                           history_size_grid, epochs,
                                           model_NN):

    training_loss = []
    test_loss = []
    training_accuracy = []
    test_accuracy = []
    times = []

    for bs in batchsize_grid:
        trainloader = torch.utils.data.DataLoader(trainset,
                                                  batch_size=bs,
                                                  shuffle=True)
        valloader = torch.utils.data.DataLoader(valset,
                                                batch_size=bs,
                                                shuffle=True)
        dataiter = iter(trainloader)
        images, _ = dataiter.next()
        image_size = images[0].shape[1]
        input_size = int(image_size**2)
        output_size = 10
        for hs in history_size_grid:
            print("Minibatch size: ", bs)
            print("History size: ", hs)

            if model_NN == "FCNN":
                sizes = [input_size, 128, 64, output_size]
                model = fully_connected_NN(sizes)
                criterion = nn.NLLLoss()
                optimizer = optim.LBFGS(model.parameters(),
                                        max_iter=hs,
                                        history_size=hs,
                                        line_search_fn='strong_wolfe')

            elif model_NN == "CNN":
                model = ConvNet(image_size)
                criterion = nn.CrossEntropyLoss()
                optimizer = optim.LBFGS(model.parameters(),
                                        max_iter=hs,
                                        history_size=hs,
                                        line_search_fn='strong_wolfe')

            if model_NN == "FCNN":
                train_losses, test_losses, train_accuracies, test_accuracies, train_time = optimize(
                    optimizer,
                    epochs,
                    trainloader,
                    valloader,
                    model,
                    criterion,
                    method="LBFGS")
            elif model_NN == "CNN":
                train_losses, test_losses, train_accuracies, test_accuracies, train_time = optimize_CNN(
                    optimizer,
                    epochs,
                    trainloader,
                    valloader,
                    model,
                    criterion,
                    method="LBFGS")

            times.append(train_time)
            training_loss.append(train_losses)
            test_loss.append(test_losses)
            training_accuracy.append(train_accuracies)
            test_accuracy.append(test_accuracies)

    return training_loss, test_loss, training_accuracy, test_accuracy, times
Exemplo n.º 23
0
    def __setup_model(self, **kwargs):
        """Helper to Classifier.__init__()
        Setup the Classifier's model using checkpoint information or the
        information to load a new model and classifier for training.

        Keyword Args:
        Will always be called with the following, which is enough information
        to build load a new model and add a classifier to be trained:
        - model_architecture
        - output_size
        - hidden_layers
        - learn_rate
        - drop_p
        - class_to_idx
        If the following are passed to this function, the checkpoint state will
        be loaded so the model can be used to classify images or so training
        can continue.
        - input_size
        - current_epoch
        - model_state_dict
        - optimizer_state_dict
        """
        self.model_architecture = kwargs['model_architecture'].upper()
        self.model = Classifier.IMAGENET_MODELS[self.model_architecture](
            pretrained=True)

        if 'input_size' in kwargs:  # Loading from a checkpoint
            self.input_size = kwargs['input_size']
            self.model.current_epoch = kwargs['current_epoch']

        else:  # No checkpoint, will be creating a new classifier for the model
            # The number of features coming from the feature detector CNN
            if 'ALEXNET' in self.model_architecture:
                self.input_size = self.model.classifier[1].in_features
            elif 'VGG' in self.model_architecture:
                self.input_size = self.model.classifier[0].in_features
            elif 'DENSENET' in self.model_architecture:
                self.input_size = self.model.classifier.in_features

            # Freeze the feature detector parameters to prevent backpropagating
            # through them.
            for param in self.model.parameters():
                param.requires_grad = False

            self.model.current_epoch = 1

        self.output_size = kwargs['output_size']
        self.hidden_layers = kwargs['hidden_layers']
        self.learn_rate = kwargs['learn_rate']
        self.drop_p = kwargs['drop_p']

        self.model.class_to_idx = kwargs['class_to_idx']
        self.model.classifier = Network(self.input_size, self.output_size,
                                        self.hidden_layers, self.drop_p)

        if 'model_state_dict' in kwargs:  # load the state from checkpoint
            self.model.load_state_dict(kwargs['model_state_dict'])

        self.criterion = nn.NLLLoss()
        self.optimizer = optim.Adam(self.model.classifier.parameters(),
                                    lr=self.learn_rate)

        if 'optimizer_state_dict' in kwargs:  # load the state from checkpoint
            self.optimizer.load_state_dict(kwargs['optimizer_state_dict'])
def hyperparameters_tuning_LBFGS_new_minibatch(trainset, valset,
                                               batchsize_grid, max_iter_grid,
                                               epochs, model_NN):

    training_loss = []
    test_loss = []
    training_accuracy = []
    test_accuracy = []
    times = []
    parameters = []
    results = []
    Names = [
        "training_loss", "training_accuracy", "test_loss", "test_accuracy",
        "times", "parameters: batch iter"
    ]
    results.append(Names)

    for bs in batchsize_grid:

        trainloader = torch.utils.data.DataLoader(trainset,
                                                  batch_size=bs,
                                                  shuffle=True)
        valloader = torch.utils.data.DataLoader(valset,
                                                batch_size=bs,
                                                shuffle=True)
        dataiter = iter(trainloader)
        images, _ = dataiter.next()
        image_size = images[0].shape[1]
        input_size = int(image_size**2)
        output_size = 10

        for max_iter_ in max_iter_grid:
            print("Minibatch size: ", bs)
            print("History size: ", max_iter_)
            parameter = []
            if model_NN == "FCNN":
                sizes = [input_size, 128, 64, output_size]
                model = fully_connected_NN(sizes)
                criterion = nn.NLLLoss()
                optimizer = LBFGSNew(model.parameters(),
                                     max_iter=max_iter_,
                                     history_size=max_iter_,
                                     line_search_fn=True,
                                     batch_mode=True)

            elif model_NN == "CNN":
                model = ConvNet(image_size)
                criterion = nn.CrossEntropyLoss()
                optimizer = LBFGSNew(model.parameters(),
                                     max_iter=max_iter_,
                                     history_size=max_iter_,
                                     line_search_fn=True,
                                     batch_mode=True)
            elif model_NN == "CNN_BN":
                model = ConvNet_BN(image_size)
                criterion = nn.CrossEntropyLoss()
                optimizer = LBFGSNew(model.parameters(),
                                     max_iter=max_iter_,
                                     history_size=max_iter_,
                                     line_search_fn=True,
                                     batch_mode=True)

            if model_NN == "FCNN":
                train_losses, test_losses, train_accuracies, test_accuracies, train_time = optimize(
                    optimizer,
                    epochs,
                    trainloader,
                    valloader,
                    model,
                    criterion,
                    method="LBFGS")

            elif model_NN == "CNN":
                train_losses, test_losses, train_accuracies, test_accuracies, train_time = optimize_CNN(
                    optimizer,
                    epochs,
                    trainloader,
                    valloader,
                    model,
                    criterion,
                    method="LBFGS")

            elif model_NN == "CNN_BN":
                train_losses, test_losses, train_accuracies, test_accuracies, train_time = optimize_CNN(
                    optimizer,
                    epochs,
                    trainloader,
                    valloader,
                    model,
                    criterion,
                    method="LBFGS")

            # save the parameters
            parameter = []
            parameter.append(bs)
            parameter.append(max_iter_)

            parameters.append(parameter)
            times.append(train_time)
            training_loss.append(train_losses)
            test_loss.append(test_losses)
            training_accuracy.append(train_accuracies)
            test_accuracy.append(test_accuracies)

    results.append(training_loss)
    results.append(training_accuracy)
    results.append(test_loss)
    results.append(test_accuracy)
    results.append(times)
    results.append(parameters)

    return results
Exemplo n.º 25
0
    def __init__(self, weight=None):
        super(CrossEntropyLoss2d, self).__init__()

        self.loss = nn.NLLLoss(weight)
def hyperparameters_tuning_Curveball_minibatch(trainset, valset,
                                               batchsize_grid, epochs,
                                               model_NN):

    training_loss = []
    test_loss = []
    training_accuracy = []
    test_accuracy = []
    times = []
    parameters = []
    results = []
    Names = [
        "training_loss", "training_accuracy", "test_loss", "test_accuracy",
        "times", "parameters: batch iter"
    ]
    results.append(Names)

    for bs in batchsize_grid:

        trainloader = torch.utils.data.DataLoader(trainset,
                                                  batch_size=bs,
                                                  shuffle=True)
        valloader = torch.utils.data.DataLoader(valset,
                                                batch_size=bs,
                                                shuffle=True)
        dataiter = iter(trainloader)
        images, _ = dataiter.next()
        image_size = images[0].shape[1]
        input_size = int(image_size**2)
        output_size = 10

        print("Minibatch size: ", bs)

        parameter = []
        if model_NN == "FCNN":
            sizes = [input_size, 128, 64, output_size]
            model = fully_connected_NN(sizes)
            criterion = nn.NLLLoss()
            optimizer = CurveBall(model.parameters(), lr=0.1, momentum=0.9)

        elif model_NN == "CNN":
            model = ConvNet(image_size)
            criterion = nn.CrossEntropyLoss()
            optimizer = CurveBall(model.parameters(), lr=-1, momentum=-1)
        elif model_NN == "CNN_BN":
            model = ConvNet_BN(image_size)
            criterion = nn.CrossEntropyLoss()
            optimizer = CurveBall(model.parameters(), lr=-1, momentum=-1)

        if model_NN == "FCNN":
            train_losses, test_losses, train_accuracies, test_accuracies, train_time = optimize(
                optimizer,
                epochs,
                trainloader,
                valloader,
                model,
                criterion,
                method="CurveBall")

        elif model_NN == "CNN":
            train_losses, test_losses, train_accuracies, test_accuracies, train_time = optimize_CNN(
                optimizer,
                epochs,
                trainloader,
                valloader,
                model,
                criterion,
                method="CurveBall")

        elif model_NN == "CNN_BN":
            train_losses, test_losses, train_accuracies, test_accuracies, train_time = optimize_CNN(
                optimizer,
                epochs,
                trainloader,
                valloader,
                model,
                criterion,
                method="CurveBall")

        # save the parameters
        parameter = []
        parameter.append(bs)

        parameters.append(parameter)
        times.append(train_time)
        training_loss.append(train_losses)
        test_loss.append(test_losses)
        training_accuracy.append(train_accuracies)
        test_accuracy.append(test_accuracies)

    results.append(training_loss)
    results.append(training_accuracy)
    results.append(test_loss)
    results.append(test_accuracy)
    results.append(times)
    results.append(parameters)

    return results
Exemplo n.º 27
0
def main():
    epochs = 10
    batchSize = 64
    lr = 0.00001

    #writer = SummaryWriter('./logs')
    #train = pd.read_csv(f'train_sam.csv')
    #train.columns = ["article", "title"]
    TEXT = data.Field(tokenize=data.get_tokenizer('spacy'),
                      lower=True,
                      eos_token='_eos_')
    trn_data_fields = [("original", TEXT), ("summary", TEXT)]
    train, valid = data.TabularDataset.splits(path=f'',
                                              train='train_sam.csv',
                                              validation='valid_sam.csv',
                                              format='csv',
                                              skip_header=True,
                                              fields=trn_data_fields)

    TEXT.build_vocab(train, vectors=GloVe(name='6B', dim=200))

    train_iter, val_iter = data.BucketIterator.splits(
        (train, valid),
        batch_sizes=(batchSize, batchSize),
        sort_key=lambda x: len(x.original),
        sort_within_batch=False,
        repeat=False)

    input_size = len(TEXT.vocab)
    hidden_size = 128 * 2
    dropout = 0.5
    num_layers = 1

    bidirectional = True

    encoder = Encoder(input_size, hidden_size, num_layers, batchSize,
                      bidirectional)
    decoder = Decoder(input_size,
                      num_layers * hidden_size * (1 + int(bidirectional)),
                      num_layers, dropout, batchSize)

    # define your LSTM loss function here
    #loss_func = F.cross_entropy()

    # define optimizer for lstm model
    #optim = Adam(model.parameters(), lr=lr)
    encoder_optimizer = Adam(encoder.parameters(), lr=lr)
    decoder_optimizer = Adam(decoder.parameters(), lr=lr)
    losses = []
    valLosses = []
    originals = []
    summaries = []

    criterion = nn.NLLLoss()

    for epoch in range(epochs):
        originals = []
        genSumms = []
        origSumms = []
        batchNum = 0
        step = 0
        valBatchNum = 0
        for batch in train_iter:
            batchS = len(batch)
            batchNum += 1
            loss = 0
            orig = batch.original
            summ = batch.summary

            encoder_outputs = torch.zeros(
                80, encoder.hidden_size * (1 + int(bidirectional)))
            encoder_hidden = encoder.initHidden(batchS)
            for ei in range(len(orig)):

                encoder_output, encoder_hidden = encoder.forward(
                    orig[ei], encoder_hidden, batchS)
                #print(encoder_outputs[ei].shape)
                #print(encoder_output[0,0].shape)
                encoder_outputs[ei] = encoder_output[0, 0]

            decoder_hidden = encoder_hidden
            decoder_input = torch.zeros(batchS).long()

            genSumm = []
            origSumm = []
            for di in range(len(summ)):
                decoder_output, decoder_hidden, decoder_attention = decoder.forward(
                    decoder_hidden, encoder_outputs, decoder_input, batchS)
                loss += criterion(decoder_output, summ[di])
                #print(decoder_output)
                #print(summ[di])
                decoder_input = summ[di]
                DO = decoder_output.detach().numpy()
                genSumm.append(np.argmax(DO[5]))
                origSumm.append(summ[di][5])
                #print(np.argmax(DO[5]))

                lossAvg = loss.item() / len(summ)

            encoder_optimizer.zero_grad()

            decoder_optimizer.zero_grad()

            loss.backward()

            #writer.add_scalar('training loss', loss.item(), step+1)
            step += 1

            encoder_optimizer.step()
            decoder_optimizer.step()
            genSumms.append(genSumm)
            origSumms.append(origSumm)

            originals.append(orig[:, 5])
            genTensorO = torch.IntTensor(origSumms[0])
            genTensor = torch.IntTensor(genSumms[0])

            if (batchNum % 25 == 0):
                losses.append(lossAvg)
                print("Epoch: [{}/{}], Batch:[{}/{}], Loss: {}".format(
                    epoch, epochs, batchNum, len(train_iter), lossAvg))

                translatedOrig = word_ids_to_sentence(originals[0],
                                                      TEXT.vocab,
                                                      join=' ')
                print(translatedOrig)
                translatedSummO = word_ids_to_sentence(genTensorO,
                                                       TEXT.vocab,
                                                       join=' ')
                print(translatedSummO)
                translatedSumm = word_ids_to_sentence(genTensor,
                                                      TEXT.vocab,
                                                      join=' ')
                print(translatedSumm)

            #genSumms = []

            if (batchNum % 25 == 0):
                for batchVal in val_iter:
                    valBatchNum += 1
                    valLoss = 0
                    batchS = len(batch)
                    valOrig = batchVal.original
                    valSumm = batchVal.summary
                    encoder_outputs = torch.zeros(
                        80, encoder.hidden_size * (1 + int(bidirectional)))
                    encoder_hidden = encoder.initHidden(batchS)
                    for ei in range(len(valOrig)):
                        encoder_output, encoder_hidden = encoder.forward(
                            valOrig[ei], encoder_hidden, batchS)
                        encoder_outputs[ei] = encoder_output[0, 0]

                    decoder_hidden = encoder_hidden
                    decoder_input = torch.zeros(batchS).long()

                    #genSumm = []
                    for di in range(len(valSumm)):
                        decoder_output, decoder_hidden, decoder_attention = decoder.forward(
                            decoder_hidden, encoder_outputs, decoder_input,
                            batchS)
                        valLoss += criterion(decoder_output, valSumm[di])
                        decoder_input = valSumm[di]
                        #DO = decoder_output.detach().numpy()
                        #genSumm.append(np.argmax(DO[5]))

                    valLossAvg = valLoss.item() / len(valSumm)

                    valLosses.append(valLossAvg)
                    print("VALLoss: {}".format(valLossAvg))
                    break

    plt.figure()
    plt.plot(losses)
    plt.plot(valLosses)
    plt.ylabel('Loss')
    plt.show()
Exemplo n.º 28
0
def main(to_csv_path, train_obj, training_iter=10, sample_size=None):
    """Main function."""
    training_iter = int(training_iter)

    def _generate(start_letter):
        sample_char_idx = [char2idx[start_letter]]
        logger.debug('sample_char_idx: ', sample_char_idx)
        input_ = input_tensor(sample_char_idx)
        hidden = model.init_hidden()
        output_name = start_letter

        for i in range(MAX_LENGTH):
            output, hidden = model(input_, hidden)
            _, topi = output.data.topk(1)
            logger.debug('topi before: ', topi)
            topi = topi.item()
            logger.debug('topi: ', topi)
            logger.debug('char2idx: ', char2idx['EOS'])
            if topi == char2idx['EOS']:
                break
            else:
                letter = idx2char[topi]
                output_name += letter
            input_ = input_tensor([topi])

        return output_name

    def generate(start_letters):
        for start_letter in start_letters:
            print(_generate(start_letter))

    df = pd.read_csv(to_csv_path)
    text_for_train = df[train_obj].unique()
    all_char_set = set(
        [chr(i) for i in range(ord('a'),
                               ord('z') + 1)] +
        [chr(i) for i in range(0x30a1, 0x30f5)] + [
            '0', '@', '!', '%', '?', '、', '。', '・', '.', 'ー', '/', '【', '】',
            '+', '-', '{', '}', '=', '(', ')', ':'
        ])

    print(all_char_set)
    char2idx = {char: i for i, char in enumerate(all_char_set)}
    char2idx['EOS'] = len(char2idx)

    idx2char = {v: k for k, v in char2idx.items()}

    if sample_size is None:
        names_idxs = [[char2idx[char] for char in name_str]
                      for name_str in text_for_train]
    else:
        names_idxs = [[char2idx[char] for char in name_str]
                      for name_str in text_for_train[:int(sample_size)]]

    print(len(names_idxs))

    # build model
    model = LSTM(input_dim=len(char2idx), embed_dim=100, hidden_dim=128)

    criterion = nn.NLLLoss()
    optimizer = optim.RMSprop(model.parameters(), lr=0.001)

    for itr in range(training_iter + 1):
        random.shuffle(names_idxs)
        total_loss = 0

        for i, name_idxs in enumerate(names_idxs):
            input_ = input_tensor(name_idxs)
            target = target_tensor(name_idxs[1:], char2idx['EOS'])

            loss = train(model, criterion, input_, target)
            total_loss += loss

            if not (i % 100):
                print('step: {}'.format(i))

            optimizer.step()

        print(itr, '/', training_iter)
        print('loss {:.4f}'.format(float(total_loss / len(names_idxs))))

    generate([chr(i) for i in range(0x30a1, 0x30f5)])