if not idx == 0 : data = lines.split('\t')[2] data = normalizeString(data).strip() obj1.add_text(data) print('read all the lines') limitDict(vocabLimit,obj1) if use_cuda: model = Model(50,100).cuda() else: model = Model(50,100) loss_function = nn.NLLLoss() optimizer = optim.Adam(model.parameters(), lr=1e-3) epochs = 4 torch.save(model.state_dict(), 'model' + str(0)+'.pth') print('starting training') for i in range(epochs) : avg_loss = 0.0 for idx,lines in enumerate(f) : if not idx == 0 : data = lines.split('\t')[2] data = normalizeString(data).strip() input_data = [obj1.word_to_idx[word] for word in data.split(' ')] #print("input data length ", len(input_data))
def main(): ##################### # Generate data ##################### # data loader - if isServerRun: path = '/home/[email protected]/thesisML/' else: path = '/Users/chanaross/dev/Thesis/UberData/' fileName = '3D_allDataLatLonCorrected_20MultiClass_500gridpickle_30min.p' dataInput = np.load(path + fileName) flag_save_network = True xmin = 0 xmax = dataInput.shape[0] ymin = 0 ymax = dataInput.shape[1] zmin = 48 zmax = np.floor(dataInput.shape[2]*0.7).astype(int) dataInput = dataInput[xmin:xmax, ymin:ymax, zmin:zmax] # shrink matrix size for fast training in order to test model dataInput = dataInput[5:6, 10:11, :] smoothParam = [10, 30, 40] #[10, 20, 30, 40] #[10, 15, 30] testSize = 0.2 # define hyper parameters - hidden_sizeVec = [64, 128, 256] # [20, 64, 256, 512] #[20, 64, 264, 512] # [20, 40, 64, 128] sequence_sizeVec = [50, 60] # [5, 10, 20] # [5, 20, 30, 40] # [5, 10, 15] # length of sequence for lstm network batch_sizeVec = [40] num_epochs = 500 # optimizer parameters - lrVec = [0.05, 0.01] #[0.05, 0.2, 0.5] # [0.1, 0.5, 0.9] #[0.1, 0.5, 0.9] # [0.1, 0.01, 0.001] otVec = [1] # [1, 2] dmp = 0 mm = 0.9 eps = 1e-08 wdVec = [2e-3] # create case vectors networksDict = {} itr = itertools.product(smoothParam, sequence_sizeVec, batch_sizeVec, hidden_sizeVec, lrVec, otVec, wdVec) for i in itr: networkStr = 'smooth_{0}_seq_{1}_bs_{2}_hs_{3}_lr_{4}_ot_{5}_wd_{6}'.format(i[0], i[1], i[2], i[3], i[4], i[5], i[6]) networksDict[networkStr] = {'seq': i[1], 'bs': i[2], 'hs': i[3], 'lr': i[4], 'ot': i[5], 'wd': i[6], 'sm': i[0]} for netConfig in networksDict: dataInputSmooth = moving_average(dataInput, networksDict[netConfig]['sm']) # smoothing data so that results are more clear to network # dataInput[dataInput>1] = 1 # limit all events larger than 10 to be 10 # define important sizes for network - x_size = dataInputSmooth.shape[0] y_size = dataInputSmooth.shape[1] dataSize = dataInputSmooth.shape[2] class_size = (np.max(np.unique(dataInputSmooth)) + 1).astype(int) num_train = int((1 - testSize) * dataSize) grid_size = x_size * y_size # output file outFile = open('LSTM_networksOutput.csv', 'w') outFile.write('Name;finalAcc;finalLoss;trainTime;numWeights;NumEpochs\n') print('Net Parameters: ' + netConfig) # create network based on input parameter's - hidden_size = networksDict[netConfig]['hs'] batch_size = networksDict[netConfig]['bs'] sequence_size = networksDict[netConfig]['seq'] lr = networksDict[netConfig]['lr'] ot = networksDict[netConfig]['ot'] wd = networksDict[netConfig]['wd'] my_net = Model(grid_size, hidden_size, batch_size, sequence_size, class_size) my_net.lstm = my_net.create_lstm(grid_size) # lstm receives all grid points and seq length of my_net.fc_after_lstm = my_net.create_fc_after_lstm(my_net.hiddenSize, grid_size*class_size) my_net.to(device) print("model device is:") print(next(my_net.parameters()).device) numWeights = sum(param.numel() for param in my_net.parameters()) print('number of parameters: ', numWeights) my_net.optimizer = CreateOptimizer(my_net.parameters(), ot, lr, dmp, mm, eps, wd) my_net.lossCrit = nn.NLLLoss(size_average=True) # nn.BCELoss(size_average=True) my_net.maxEpochs = num_epochs my_net.lr = lr my_net.wd = wd my_net.smoothingParam = networksDict[netConfig]['sm'] # network_path = '/Users/chanaross/dev/Thesis/MachineLearning/forGPU/GPU_results/limitedZero_500grid/' # network_name = 'gridSize11_epoch4_batch5_torch.pkl' # my_net = torch.load(network_path + network_name, map_location=lambda storage, loc: storage) # load data from data loader and create train and test sets data_train = dataInputSmooth[:, :, 0:num_train] data_test = dataInputSmooth[:, :, num_train:] dataset_uber_train = DataSet_oneLSTM_allGrid(data_train, sequence_size) dataset_uber_test = DataSet_oneLSTM_allGrid(data_test , sequence_size) # creating data loader dataloader_uber_train = data.DataLoader(dataset=dataset_uber_train, batch_size=batch_size, shuffle=False) dataloader_uber_test = data.DataLoader(dataset=dataset_uber_test , batch_size=batch_size, shuffle=False) netOutDict = {} labelsOutDict = {} for numEpoch in range(num_epochs): my_net.loss = None # for each epoch, calculate loss for each batch - my_net.train() localLoss = [4] accTrain = [0] rmseTrain = [1] trainCorr = 0.0 trainTot = 0.0 if (1+numEpoch)%40 == 0: if my_net.optimizer.param_groups[0]['lr'] > 0.001: my_net.optimizer.param_groups[0]['lr'] = my_net.optimizer.param_groups[0]['lr']/2 else: my_net.optimizer.param_groups[0]['lr'] = 0.001 print('lr is: %.6f' % my_net.optimizer.param_groups[0]['lr']) netOutList = [] labelOutList = [] for i, (input, labels) in enumerate(dataloader_uber_train): inputD = input.to(device) labelsD = labels.to(device) my_net.loss = None # create torch variables # input is of size [batch_size, grid_id, seq_size] inputVar = Variable(inputD).to(device) labVar = Variable(labelsD).to(device) # if isServerRun: # labVar = labVar.type(torch.cuda.FloatTensor) # else: # labVar = labVar.type(torch.FloatTensor) # reset gradient my_net.optimizer.zero_grad() # forward grid_size = labels.shape[1] local_batch_size = input.shape[0] # input to LSTM is [seq_size, batch_size, grid_size] , will be transferred as part of the forward netOut = my_net.forward(inputVar) netOut = netOut.view(local_batch_size, class_size, grid_size) _, labTrain = torch.max(torch.exp(netOut.data), 1) my_net.calcLoss(netOut, labVar) # backwards my_net.backward() # optimizer step my_net.optimizer.step() # local loss function list localLoss.append(my_net.loss.item()) # if isServerRun: # labTrain = labTrain.cpu() if isServerRun: labTrainNp = labTrain.type(torch.cuda.LongTensor).cpu().detach().numpy() # print("number of net labels different from 0 is:" + str(np.sum(labTrainNp > 0))) # print("number of net labels 0 is:"+str(np.sum(labTrainNp == 0))) labelsNp = labels.cpu().detach().numpy() # print("number of real labels different from 0 is:" + str(np.sum(labelsNp > 0))) # print("number of real labels 0 is:" + str(np.sum(labelsNp == 0))) trainCorr = torch.sum(labTrain.type(torch.cuda.LongTensor) == labels).cpu().detach().numpy() + trainCorr else: labTrainNp = labTrain.long().detach().numpy() labelsNp = labels.detach().numpy() trainCorr = torch.sum(labTrain.long() == labels).detach().numpy() + trainCorr netOutList.append(labTrainNp) labelOutList.append(labelsNp) trainTot = labels.size(0) * labels.size(1) + trainTot rmse = sqrt(metrics.mean_squared_error(labTrainNp.reshape(-1), labelsNp.reshape(-1))) accTrain.append(100 * trainCorr / trainTot) rmseTrain.append(rmse) # output current state if (i + 1) % 2 == 0: print('Epoch: [%d/%d1 ], Step: [%d/%d], Loss: %.4f, Acc: %.4f, RMSE: %.4f' % (numEpoch + 1, my_net.maxEpochs, i + 1, dataloader_uber_train.batch_size, my_net.loss.item(), accTrain[-1], rmseTrain[-1])) # if (i+1) % 20 == 0: # if ((localLoss[-1] < np.max(np.array(localLoss[0:-1]))) or (accTrain[-1] > np.max(np.array(accTrain[0:-1])))) and flag_save_network: # # pickle.dump(my_net, open("gridSize" + str(xmax - xmin) + "_epoch" + str(numEpoch+1) + "_batch" + str(i+1) + ".pkl", 'wb')) # my_net.saveModel("gridSize" + str(xmax - xmin) + "_epoch" + str(numEpoch+1) + "_batch" + str(i+1) + "_torch.pkl") # # networkStr = "gridSize" + str(xmax - xmin) + "_epoch" + str(numEpoch+1) + "_batch" + str(i+1) # # outArray = np.stack([np.array(localLoss), np.array(accTrain)]) # # np.save(networkStr + "_oArrBatch.npy", outArray) my_net.lossVecTrain.append(np.average(localLoss)) my_net.accVecTrain.append(np.average(accTrain)) my_net.rmseVecTrain.append(np.average(rmseTrain)) # test network for each epoch stage accEpochTest, lossEpochTest, rmseEpochTest = my_net.test_spesific(testLoader=dataloader_uber_test) my_net.accVecTest.append(accEpochTest) my_net.lossVecTest.append(lossEpochTest) my_net.rmseVecTest.append(rmseEpochTest) netOutDict[numEpoch] = netOutList labelsOutDict[numEpoch] = labelOutList if (flag_save_network): my_net.saveModel(netConfig + "_torch.pkl") # outArray = np.stack([np.array(my_net.lossVecTest), np.array(my_net.lossVecTrain), # np.array(my_net.accVecTest), np.array(my_net.accVecTrain)]) # np.save("gridSize" + str(xmax - xmin) + "_epoch" + str(numEpoch) + "_oArrBatch.npy", outArray) my_net.finalAcc = accEpochTest my_net.finalLoss = lossEpochTest my_net.finalRmse = rmseEpochTest # name, HyperPerams, accur, num total weights # err vs epoch, loss vs epoch, saveFile(netOutDict, 'netDict') saveFile(labelsOutDict, 'labelsDict') strWrite = '{0};{1};{2};{3};{4}\n'.format(netConfig, my_net.finalAcc, my_net.finalLoss, numWeights, my_net.maxEpochs) outFile.write(strWrite) outFile.close() return
def train(args, model, logger, in_queue, out_queue): """Train the order embedding model. args: Commandline arguments logger: logger for logging progress in_queue: input queue to an intersection computation worker out_queue: output queue to an intersection computation worker """ scheduler, opt = utils.build_optimizer(args, model.parameters()) if args.method_type == "order": clf_opt = optim.Adam(model.clf_model.parameters(), lr=args.lr) done = False while not done: data_source = make_data_source(args) loaders = data_source.gen_data_loaders(args.eval_interval * args.batch_size, args.batch_size, train=True) for batch_target, batch_neg_target, batch_neg_query in zip(*loaders): msg, _ = in_queue.get() if msg == "done": done = True break # train model.train() model.zero_grad() pos_a, pos_b, neg_a, neg_b = data_source.gen_batch( batch_target, batch_neg_target, batch_neg_query, True) emb_pos_a, emb_pos_b = model.emb_model(pos_a), model.emb_model( pos_b) emb_neg_a, emb_neg_b = model.emb_model(neg_a), model.emb_model( neg_b) #print(emb_pos_a.shape, emb_neg_a.shape, emb_neg_b.shape) emb_as = torch.cat((emb_pos_a, emb_neg_a), dim=0) emb_bs = torch.cat((emb_pos_b, emb_neg_b), dim=0) labels = torch.tensor([1] * pos_a.num_graphs + [0] * neg_a.num_graphs).to( utils.get_device()) intersect_embs = None pred = model(emb_as, emb_bs) loss = model.criterion(pred, intersect_embs, labels) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) opt.step() if scheduler: scheduler.step() if args.method_type == "order": with torch.no_grad(): pred = model.predict(pred) model.clf_model.zero_grad() pred = model.clf_model(pred.unsqueeze(1)) criterion = nn.NLLLoss() clf_loss = criterion(pred, labels) clf_loss.backward() clf_opt.step() pred = pred.argmax(dim=-1) acc = torch.mean((pred == labels).type(torch.float)) train_loss = loss.item() train_acc = acc.item() out_queue.put(("step", (loss.item(), acc)))
import csv import random import re import os import unicodedata import codecs from io import open import itertools import math from transformer_prep_data import * from transformer_voc import Voc from transformer_global_variables import * criterion = nn.NLLLoss(ignore_index=0) def train(input_variable, lengths, target_variable, mask, max_target_len, transformer, embedding, optimizer, ntokens, batch_size, clip, device, max_length=MAX_LENGTH): # Zero gradients optimizer.zero_grad() # Set device options input_variable = input_variable.to(device) lengths = lengths.to(device) target_variable = target_variable.to(device) mask = mask.to(device) # Forward pass through encoder
def __init__(self, generator, tgt_vocab): super(UnsupNMTLossCompute, self).__init__(generator, tgt_vocab) weight = torch.ones(len(tgt_vocab)) weight[self.padding_idx] = 0 self.criterion = nn.NLLLoss(weight, size_average=False)
def __init__(self, weight=None, size_average=True, ignore_index=255): super(CrossEntropyLoss2d, self).__init__() #NLLLoss2d 错误,修改为NLLLoss self.nll_loss = nn.NLLLoss(weight, size_average, ignore_index)
def train(data): print "Training model..." data.show_data_summary() save_data_name = data.model_dir + ".dset" data.save(save_data_name) model = SeqModel(data) loss_function = nn.NLLLoss() if data.optimizer.lower() == "sgd": optimizer = optim.SGD(model.parameters(), lr=data.HP_lr, momentum=data.HP_momentum, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adagrad": optimizer = optim.Adagrad(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adadelta": optimizer = optim.Adadelta(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "rmsprop": optimizer = optim.RMSprop(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adam": optimizer = optim.Adam(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) else: print("Optimizer illegal: %s" % (data.optimizer)) exit(0) best_dev = -10 data.HP_iteration = 1 ## start training for idx in range(data.HP_iteration): epoch_start = time.time() temp_start = epoch_start print("Epoch: %s/%s" % (idx, data.HP_iteration)) if data.optimizer == "SGD": optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr) instance_count = 0 sample_id = 0 sample_loss = 0 total_loss = 0 right_token = 0 whole_token = 0 random.shuffle(data.train_Ids) ## set model in train model model.train() model.zero_grad() batch_size = data.HP_batch_size batch_id = 0 train_num = len(data.train_Ids) total_batch = train_num // batch_size + 1 for batch_id in range(total_batch): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = data.train_Ids[start:end] if not instance: continue batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label( instance, data.HP_gpu) instance_count += 1 loss, tag_seq = model.neg_log_likelihood_loss( batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask) right, whole = predict_check(tag_seq, batch_label, mask) right_token += right whole_token += whole sample_loss += loss.data[0] total_loss += loss.data[0] if end % 500 == 0: temp_time = time.time() temp_cost = temp_time - temp_start temp_start = temp_time print( " Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)) sys.stdout.flush() sample_loss = 0 loss.backward() optimizer.step() model.zero_grad() temp_time = time.time() temp_cost = temp_time - temp_start print(" Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)) epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start print( "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s" % (idx, epoch_cost, train_num / epoch_cost, total_loss)) # continue speed, acc, p, r, f, _, _ = evaluate(data, model, "dev") dev_finish = time.time() dev_cost = dev_finish - epoch_finish if data.seg: current_score = f print( "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (dev_cost, speed, acc, p, r, f)) else: current_score = acc print("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" % (dev_cost, speed, acc)) if current_score > best_dev: if data.seg: print "Exceed previous best f score:", best_dev else: print "Exceed previous best acc score:", best_dev model_name = data.model_dir + '.' + str(idx) + ".model" print "Save current best model in file:", model_name torch.save(model.state_dict(), model_name) best_dev = current_score # ## decode test speed, acc, p, r, f, _, _ = evaluate(data, model, "test") test_finish = time.time() test_cost = test_finish - dev_finish if data.seg: print( "Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (test_cost, speed, acc, p, r, f)) else: print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f" % (test_cost, speed, acc)) gc.collect()
print(ps.shape) top_p, top_class = ps.topk(1, dim=1) # Look at the most likely classes for the first 10 examples print(top_class[:10,:]) equals = top_class == labels.view(*top_class.shape) accuracy = torch.mean(equals.type(torch.FloatTensor)) print(f'Accuracy: {accuracy.item()*100}%') model = Classifier() criterian = nn.NLLLoss() optimizer = optim.Adam(model.parameters(), lr=0.003) epochs = 30 steps = 0 train_losses, test_losses = [], [] for e in range(epochs): running_loss = 0 for images, labels in trainloader: optimizer.zero_grad() log_ps = model(images) loss = criterian(log_ps, labels)
def train(data, save_model_dir, seg=True): print "Training model..." data.show_data_summary() save_data_name = save_model_dir + ".dset" save_data_setting(data, save_data_name) loss_function = nn.NLLLoss() model = SeqModel(data) #model=copy.deepcopy(premodel) optimizer = optim.SGD(model.examiner.parameters(), lr=data.HP_lr, momentum=data.HP_momentum) best_dev = -1 data.HP_iteration = 5 USE_CRF = True ## start training acc_list = [] p_list = [] r_list = [] f_list = [] map_list = [] #random.seed(2) print("total", ) data.HP_lr = 0.1 for idx in range(1): epoch_start = time.time() temp_start = epoch_start print("Epoch: %s/%s" % (idx, data.HP_iteration)) optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr) instance_count = 0 sample_id = 0 sample_loss = 0 total_loss = 0 total_rl_loss = 0 total_ml_loss = 0 total_num = 0.0 total_reward = 0.0 right_token_reform = 0 whole_token_reform = 0 #random.seed(2) #random.shuffle(data.train_Ids) #random.seed(seed_num) ## set model in train model model.examiner.train() model.examiner.zero_grad() model.topk = 5 model.examiner.topk = 5 batch_size = data.HP_batch_size batch_id = 0 train_num = len(data.train_Ids) total_batch = train_num // batch_size + 1 gamma = 0 cnt = 0 click = 0 sum_click = 0 sum_p_at_5 = 0.0 sum_p = 1.0 #if idx==0: # selected_data=[batch_id for batch_id in range(0,total_batch//1000)] tag_mask = None batch_ids = [i for i in range(total_batch)] for batch_idx in range(0, total_batch): # if end%500 == 0: # temp_time = time.time() # temp_cost = temp_time - temp_start # temp_start = temp_time # print(" Instance: %s; Time: %.2fs; loss: %.4f;"%(end, temp_cost, sample_loss)) # sys.stdout.flush() # sample_loss = 0 #updating the crf by selected position batch_id = batch_ids[batch_idx] start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = data.train_Ids[start:end] if not instance: continue update_once = False start_time = time.time() #selected_data.append(batch_id) if batch_id == 15: for j in range(0, 10): __tot = 0.0 for i in range(5, 15): model.sample_train(0, i) batch_id_temp = batch_ids[i] start = batch_id_temp * batch_size end = (batch_id_temp + 1) * batch_size instance = data.train_Ids[start:end] batch_word, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label( instance, data.HP_gpu) real_batch_label = batch_label batch_label, tag_seq, tag_prob, tag_mask, score, indices, scores_ref = model.crf_loss( batch_word, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask) #_pred_label, _gold_label = recover_label(Variable(tag_seq.cuda()), real_batch_label.cuda(),mask.cuda(), data.label_alphabet, batch_wordrecover) _tag_mask = tag_mask pos_mask, score = model.reinforment_supervised( batch_word, batch_wordlen, batch_char, batch_charlen, batch_charrecover, real_batch_label, tag_seq, tag_prob, mask) __tot += score.sum() score.sum().backward() optimizer.step() model.examiner.zero_grad() __tot = 0.0 for i in range(10, -1, -1): print(i) model.sample_train(i + 1, 15) batch_id_temp = batch_ids[i] start = batch_id_temp * batch_size end = (batch_id_temp + 1) * batch_size instance = data.train_Ids[start:end] batch_word, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label( instance, data.HP_gpu) real_batch_label = batch_label batch_label, tag_seq, tag_prob, tag_mask, score, indices, scores_ref = model.crf_loss( batch_word, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask) #_pred_label, _gold_label = recover_label(Variable(tag_seq.cuda()), real_batch_label.cuda(),mask.cuda(), data.label_alphabet, batch_wordrecover) _tag_mask = tag_mask pos_mask, score = model.reinforment_supervised( batch_word, batch_wordlen, batch_char, batch_charlen, batch_charrecover, real_batch_label, tag_seq, tag_prob, mask) __tot += score.sum() score.sum().backward() optimizer.step() model.examiner.zero_grad() print("score", __tot / 14) model.train() if batch_id >= 15: t = np.random.randint(0, len(model.X_train)) if np.random.rand() > -1 or model.tag_mask_list[t].sum( ).data[0] <= 5: t = np.random.randint(len(model.X_train), total_batch) #This is for seq choosing #if batch_id>total_batch//100+100: # batch_id=batch_ids[batch_idx] # tmin=-1 # for i in range(len(model.X_train),total_batch): # batch_id=batch_ids[i] # start = batch_id*batch_size # end = (batch_id+1)*batch_size # if end >train_num: # end = train_num # instance = data.train_Ids[start:end] # if len(instance)==0: # continue # batch_word, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(instance, data.HP_gpu) # batch_label,tag_seq,tag_mask,score,indices,scores_ref=model.crf_loss(batch_word, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask) # if tmin==-1 or (scores_ref.cpu().data[0]》=tmin): # tmin=scores_ref.cpu().data[0] # t=i # temp=batch_ids[batch_idx] # batch_ids[batch_idx]=batch_ids[t] # batch_ids[t]=temp batch_id = batch_ids[batch_idx] start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = data.train_Ids[start:end] batch_word, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label( instance, data.HP_gpu) real_batch_label = batch_label batch_label, tag_seq, tag_prob, tag_mask, score, indices, scores_ref = model.crf_loss( batch_word, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask) model.add_instance(batch_word, batch_label, tag_mask, instance, scores_ref.data[0]) #pred_label, gold_label = recover_label(Variable(tag_seq.cuda()), real_batch_label.cuda(),mask.cuda(), data.label_alphabet, batch_wordrecover) # u=False # for x in pred_label: # if not gold_label==pred_label: # u=True # break # #if u==True: # print "mask", tag_mask # print "gold", gold_label # print "pred", pred_label else: # tmin=model.scores_refs[t] # for i in range(len(model.X_train)): # if model.scores_refs[i]<=tmin: # tmin=model.scores_refs[i] # t=i instance = model.instances[t] batch_word, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label( instance, data.HP_gpu) real_batch_label = batch_label batch_label, tag_seq, tag_prob, tag_mask, score, indices, scores_ref = model.crf_loss( batch_word, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask, t=t) model.readd_instance(batch_label, mask, tag_mask, t, scores_ref.data[0]) print("score", score) #sum_p_at_5+=score sum_p += 1.0 end_time = time.time() if click + 5 >= 10: print("time", end_time - start_time) else: batch_word, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label( instance, data.HP_gpu) model.add_instance(batch_word, batch_label, tag_mask, instance, -100000.0) #print("Y_train",model.Y_train[-1]) # if batch_id>=total_batch//100+15: # for i in range(15): # model.train() # model.reevaluate_instance(mask) #print("loss",loss) #print(batch_wordlen) if batch_id < 15: if batch_id == 14: model.train() #print("Y_train",model.Y_train) print(batch_ids) speed, acc, p, r, f, _ = evaluate(data, model, "test") print(len(model.Y_train)) print("after", acc) print("Check", f) acc_list.append(acc) p_list.append(p) r_list.append(r) f_list.append(sum_click) sum_p_at_5 = 0.0 sum_p = 1.0 continue if batch_id == 15: model.train() #print("Y_train",model.Y_train) print(batch_ids) speed, acc, p, r, f, _ = evaluate(data, model, "test") print(len(model.Y_train)) print("after", acc) print("Check", f) acc_list.append(acc) p_list.append(p) r_list.append(r) f_list.append(sum_click) sum_p_at_5 = 0.0 sum_p = 1.0 click += model.topk sum_click += model.topk #click+=batch_wordlen[0] #sum_click+=batch_wordlen[0] if click >= 10: model.train() speed, acc, p, r, f, _ = evaluate(data, model, "test") print("Step:", len(model.Y_train)) print("after", acc) acc_list.append(acc) p_list.append(p) r_list.append(r) f_list.append(sum_click) sum_p_at_5 = 0.0 sum_p = 1.0 click -= 10 instance_count += 1 pos_mask, selection_score, select_reward = model.reinforment_reward( batch_word, batch_wordlen, batch_char, batch_charlen, batch_charrecover, real_batch_label, tag_seq, tag_prob, mask) if USE_CRF == True: start_time = time.time() t = np.random.randint(1, 10) #print("size",total_batch) speed, acc, p, r, f, _ = evaluate(data, model, "dev") end_time = time.time() if total_num != 0: ave_scores = total_reward / total_num else: ave_scores = 0.0 total_reward += acc total_num += 1 # print(batch_label) sample_scores = torch.from_numpy(np.asarray([acc])).float() ave_scores = torch.from_numpy(np.asarray([ave_scores])).float() if idx >= 0: reward_diff = Variable(sample_scores - ave_scores, requires_grad=False) else: reward_diff = select_reward reward_diff = reward_diff.cuda() rl_loss = -selection_score # B #if idx>=10: #print("rl_loss",rl_loss) print("reward", reward_diff) rl_loss = torch.mul(rl_loss, reward_diff.expand_as(rl_loss)) #b_size #print("reward",reward_diff) #rl_loss = rl_loss.sum() rl_loss.backward() optimizer.step() model.examiner.zero_grad() if len(p_list) >= 100: break if len(p_list) >= 100: break temp_time = time.time() temp_cost = temp_time - temp_start print("rl_loss", total_rl_loss) print("ml_loss", total_ml_loss) #print(" Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"%(end, temp_cost, sample_loss, right_token, whole_token,(right_token+0.)/whole_token)) epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start print( "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s" % (idx, epoch_cost, train_num / epoch_cost, total_loss)) # continue speed, acc, p, r, f, _ = evaluate(data, model, "test") dev_finish = time.time() dev_cost = dev_finish - epoch_finish if seg: current_score = f print( "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (dev_cost, speed, acc, p, r, f)) else: current_score = acc print("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" % (dev_cost, speed, acc)) if current_score > best_dev: if seg: print "Exceed previous best f score:", best_dev else: print "Exceed previous best acc score:", best_dev model_name = save_model_dir + '.' + str(idx) + ".model" #torch.save(model.state_dict(), model_name) best_dev = current_score ## decode test speed, acc, p, r, f, _ = evaluate(data, model, "test") test_finish = time.time() test_cost = test_finish - dev_finish if best_dev == current_score: best_ = test_cost, speed, acc, p, r, f if seg: print( "Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (test_cost, speed, acc, p, r, f)) else: print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f" % (test_cost, speed, acc)) gc.collect() file_dump = open("exp_list.pkl", "w") pickle.dump([acc_list, p_list, r_list, f_list, map_list], file_dump) file_dump.close()
def train(self, data_dir, epochs, learning_rate): image_datasets, dataloaders, class_to_idx = self.load_data(data_dir) criterion = nn.NLLLoss() optimizer = optim.Adam(self.model.classifier.parameters(), lr=learning_rate) # gpu or cpu self.model.to(self.device) # start training train_losses = [] test_losses = [] for e in range(epochs): running_train_loss = 0 self.model.train() for images, labels in dataloaders['train']: images, labels = images.to(self.device), labels.to(self.device) optimizer.zero_grad() # get log probs log_ps = self.model.forward(images) # get loss loss = criterion(log_ps, labels) running_train_loss += loss.item() # print(f'running_train_loss: {running_train_loss}') # back propagation loss.backward() # adjust weights optimizer.step() else: self.model.eval() running_test_loss = 0 accuracy = 0 with torch.no_grad(): for images, labels in dataloaders['test']: images, labels = images.to(self.device), labels.to(self.device) # get log probs log_ps = self.model.forward(images) # get loss test_loss = criterion(log_ps, labels) running_test_loss += test_loss.item() # print(f'running_test_loss: {running_test_loss}') # turn log probs into real probs ps = torch.exp(log_ps) # calc accuracy top_p, top_class = ps.topk(1, dim=1) equals = top_class == labels.view(*top_class.shape) accuracy += torch.mean(equals.type(torch.FloatTensor)).item() n_test_batches = len(dataloaders['test']) n_train_batches = len(dataloaders['train']) epoch_train_loss = running_train_loss / n_train_batches epoch_test_loss = running_test_loss / n_test_batches train_losses.append(epoch_train_loss) test_losses.append(epoch_test_loss) print(f'Epoch: {e+1}/{epochs}', f'Training Loss {epoch_train_loss:{0}.{4}}', f'Validation Loss {epoch_test_loss:{0}.{4}}', f'Accuracy {(accuracy / n_test_batches):{0}.{4}}' ) #return e+1, train_losses, test_losses self.final_epoch = e+1 self.train_losses = train_losses self.test_losses = test_losses self.class_to_idx = class_to_idx
def main(cfg, gpus): # Network Builders net_encoder = ModelBuilder.build_encoder( arch=cfg.MODEL.arch_encoder.lower(), fc_dim=cfg.MODEL.fc_dim, weights=cfg.MODEL.weights_encoder) net_decoder = ModelBuilder.build_decoder( arch=cfg.MODEL.arch_decoder.lower(), fc_dim=cfg.MODEL.fc_dim, num_class=cfg.DATASET.num_class, weights=cfg.MODEL.weights_decoder) crit = nn.NLLLoss(ignore_index=-1) if cfg.MODEL.arch_decoder.endswith('deepsup'): segmentation_module = SegmentationModule(net_encoder, net_decoder, crit, cfg.TRAIN.deep_sup_scale) else: segmentation_module = SegmentationModule(net_encoder, net_decoder, crit) # Dataset and Loader dataset_train = TrainDataset(cfg.DATASET.root_dataset, cfg.DATASET.list_train, cfg.DATASET, batch_per_gpu=cfg.TRAIN.batch_size_per_gpu) loader_train = torch.utils.data.DataLoader( dataset_train, batch_size=len(gpus), # we have modified data_parallel shuffle=False, # we do not use this param collate_fn=user_scattered_collate, num_workers=cfg.TRAIN.workers, drop_last=True, pin_memory=True) print('1 Epoch = {} iters'.format(cfg.TRAIN.epoch_iters)) # create loader iterator iterator_train = iter(loader_train) # load nets into gpu if len(gpus) > 1: segmentation_module = UserScatteredDataParallel(segmentation_module, device_ids=gpus) # For sync bn patch_replication_callback(segmentation_module) segmentation_module.cuda() # Set up optimizers nets = (net_encoder, net_decoder, crit) optimizers = create_optimizers(nets, cfg) # Main loop history = {'train': {'epoch': [], 'loss': [], 'acc': []}} for epoch in range(cfg.TRAIN.start_epoch, cfg.TRAIN.num_epoch): train(segmentation_module, iterator_train, optimizers, history, epoch + 1, cfg) # checkpointing checkpoint(nets, history, cfg, epoch + 1) print('Training Done!')
def __init__(self, ignore_index): super(NLLLoss, self).__init__() # step_loss = F.nll_loss(log_probs, target, reduction="none", ignore_index=PAD) self.NLL = nn.NLLLoss(ignore_index=ignore_index, reduction='sum')
def train_model(image_dataloaders, arch='vgg16', num_outputs=102, hidden_units=4096, learning_rate=0.001, epochs=20, gpu=False, checkpoint=''): ''' Trains the model on given dataloaders ''' #load the model model = load_model(arch, num_outputs, hidden_units) #attach ancillary information about the nn to the model object model.arch = arch model.num_outputs = num_outputs model.hidden_units = hidden_units model.learning_rate = learning_rate model.epochs = epochs model.gpu = gpu model.checkpoint = checkpoint print('Architecture: ', arch, 'Hidden units: ', hidden_units) print('Training epochs: ', epochs, 'Learning rate: ', learning_rate) print( 'Trianing data size: {} images, '.format( len(image_dataloaders['train'].dataset)), 'validation data size: {} images'.format( len(image_dataloaders['valid'].dataset))) #use gpu if selected and available if gpu and torch.cuda.is_available(): print('On GPU') device = torch.device("cuda:0") model.cuda() else: print('On CPU') device = torch.device("cpu") #setup the loss function if arch == 'inception' or 'resnet': criterion = nn.CrossEntropyLoss() else: criterion = nn.NLLLoss() #only the new or modified layers will get gradient updates print("Parameters to learn:") params_to_update = [] for name, param in model.named_parameters(): if param.requires_grad == True: params_to_update.append(param) print("\t", name) #making sure only added parameters are being optimized with #gradient adjustments during training optimizer = optim.Adam(params_to_update, lr=learning_rate) #resetting accuracy and deep copying the model weights/biases best_accuracy = 0 best_model_weights = copy.deepcopy(model.state_dict()) #to keep track of the losses throughout training train_losses, valid_losses = [], [] print_every = 100 #for debugging start_time = time.time() for e in range(epochs): epoch_accuracy = 0 running_loss = 0 steps = 0 start_training_time_per_steps = time.time() for images, labels in image_dataloaders['train']: images, labels = images.to(device), labels.to(device) steps += 1 optimizer.zero_grad() #run training data through the model if arch == 'inception': #From https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958 outputs, aux_outputs = model(images) loss1 = criterion(outputs, labels) loss2 = criterion(aux_outputs, labels) loss = loss1 + 0.4 * loss2 else: output_logps = model(images) loss = criterion(output_logps, labels) running_loss += loss.item() loss.backward() optimizer.step() #perform validation at "print_every" if steps % print_every == 0: #calculate the training time per steps training_time_per_steps = time.time( ) - start_training_time_per_steps #reset the accuracy and validation loss accuracy, valid_loss = 0, 0 #put the model in evaluation mode for quicker validation model.eval() #we're not doing any gradient related calculations when punching #through the validation data with torch.no_grad(): valid_start_time = time.time() for images, labels in image_dataloaders['valid']: images, labels = images.to(device), labels.to(device) valid_logps = model(images) #calculate the validation loss before taking the exp valid_loss += criterion(valid_logps, labels) valid_ps = torch.exp(valid_logps) top_p, top_class = valid_ps.topk(1, dim=1) equality = top_class == labels.view(*top_class.shape) accuracy += torch.mean(equality.type( torch.FloatTensor)).item() valid_time = time.time() - valid_start_time #keeping track of the losses to plot later in case we need to train_losses.append(running_loss / steps) valid_losses.append(valid_loss / len(image_dataloaders['valid'])) epoch_accuracy = accuracy / len(image_dataloaders['valid']) #printing losses, accuracy, etc. as we train print( 'Epoch {}/{} '.format(e + 1, epochs), 'Step {} '.format(steps), 'Train loss: {:.3f} '.format(running_loss / steps), 'Valid loss: {:.3f} '.format( valid_loss / len(image_dataloaders['valid'])), 'Accuracy: {:.2f}% '.format(epoch_accuracy * 100), 'Train dur: {:.1f}s '.format(training_time_per_steps), 'Valid dur: {:.1f}s'.format(valid_time)) #reset the running loss to zero and put the model back into training mode running_loss = 0 model.train() start_training_time_per_steps = time.time() #saving the best weights and biases based on best accuracy if (epoch_accuracy > best_accuracy): best_accuracy = epoch_accuracy best_model_wts = copy.deepcopy(model.state_dict()) #loading model object with best weights model.load_state_dict(best_model_wts) #storing dir_to_cat into the model object - added this for easier lookup with open('dir_to_cat.json', 'r') as f: dir_to_cat = json.load(f) #saving train and valid losses to the model in case we need to access them model.train_losses = train_losses model.valid_losses = valid_losses #printing total training time and best accuracy total_time = time.time() - start_time print('Time for complete training {:.0f}m {:.0f}s'.format( total_time // 60, total_time % 60)) print('Best accuracy: {:3f}%'.format(best_accuracy * 100)) #saving checkpoint if requested if checkpoint: print('Checkpoint saved to:', checkpoint) checkpoint_dict = { 'arch': arch, 'dir_to_cat': dir_to_cat, 'hidden_units': hidden_units, 'best_accuracy': best_accuracy, 'best_model_weights': best_model_wts, 'train_losses': train_losses, 'valid_losses': valid_losses } torch.save(checkpoint_dict, checkpoint) #return the model object with best weights and biases return model
if gpu == True: using_gpu = torch.cuda.is_available() device = 'gpu' print('GPU On') else: print('CPU ON') device = 'cpu' # Loading Dataset data_transforms, directories, dataloaders, dataset_sizes, image_datasets = script.loading_data( data_dir) class_to_idx = image_datasets['training_transforms'].class_to_idx print("cudaorcpu_3") for i in dataloaders: print("dataloaders ", dataloaders[i]) # Network Setup model, input_size = script.make_model(arch, hidden_units) criteria = nn.NLLLoss() optimizer = optim.Adam(model.classifier.parameters(), lr) sched = lr_scheduler.StepLR(optimizer, step_size=4, gamma=0.1) epochs = epoch model_ft = script.train_model(dataloaders, dataset_sizes, model, criteria, optimizer, sched, epochs, device) # Testing Model script.check_accuracy_on_test(dataloaders, model, 'testing_transforms', True) # Saving Checkpoint script.save_checkpoints(model, arch, lr, epochs, input_size, hidden_units, class_to_idx, save_dir)
def main(): data_holder, task2id, id2task, num_feat, num_voc, num_char, tgt_dict, embeddings = DataLoader_elmo.multitask_dataloader( pkl_path, num_task=num_task, batch_size=BATCH_SIZE) para = model_para #task2label = {"conll2000": "chunk", "unidep": "POS", "conll2003": "NER"} task2label = {"conll2000": "chunk", "wsjpos": "POS", "conll2003": "NER"} logger = Logger('./logs/' + str(args.gpu)) para["id2task"] = id2task para["n_feats"] = num_feat para["n_vocs"] = num_voc para["n_tasks"] = num_task para["out_size"] = [ len(tgt_dict[task2label[id2task[ids]]]) for ids in range(num_task) ] para["n_chars"] = num_char model = Model_s.build_model_cnn(para) model.Word_embeddings.apply_weights(embeddings) params = list(filter(lambda p: p.requires_grad, model.parameters())) num_params = sum(p.numel() for p in model.parameters()) print(model) print("Num of paras:", num_params) print(model.concat_flag) def lr_decay(optimizer, epoch, decay_rate=0.05, init_lr=0.015): lr = init_lr / (1 + decay_rate * epoch) print(" Learning rate is set as:", lr) for param_group in optimizer.param_groups: param_group['lr'] = lr return optimizer def exp_lr_decay(optimizer, epoch, decay_rate=0.05, init_lr=0.015): lr = init_lr * decay_rate**epoch print(" Learning rate is set as:", lr) for param_group in optimizer.param_groups: param_group['lr'] = lr return optimizer if args.optim == "noam": model_optim = optim_custorm.NoamOpt( para["d_hid"], 1, 1000, DenseSparseAdam(params, lr=0.0, betas=(0.9, 0.98), eps=1e-9)) args.decay = None elif args.optim == "sgd": model_optim = optim.SGD(params, lr=0.015, momentum=args.momentum, weight_decay=1e-8) elif args.optim == "adam": model_optim = optim.Adam(params, lr=0.0, betas=(0.9, 0.98), eps=1e-9, weight_decay=1e-8) if args.mode == "train": best_F1 = 0 if not para["crf"]: calculate_loss = nn.NLLLoss() else: calculate_loss = None print("Start training...") print('-' * 60) KLLoss = None #nn.KLDivLoss() start_point = time.time() for epoch_idx in range(NUM_EPOCH): if args.optim == "sgd": if args.decay == "exp": model_optim = exp_lr_decay(model_optim, epoch_idx) elif args.decay == "normal": model_optim = lr_decay(model_optim, epoch_idx) Pre, Rec, F1, loss_list = run_epoch(model, data_holder, model_optim, calculate_loss, KLLoss, para, epoch_idx, id2task, logger) use_time = time.time() - start_point print("Time using: %f mins" % (use_time / 60)) if not best_F1 or best_F1 < F1: best_F1 = F1 Model_s.save_model(model_path, model, para) print('*' * 60) print( "Save model with average Pre: %f, Rec: %f, F1: %f on dev set." % (Pre, Rec, F1)) save_idx = epoch_idx print('*' * 60) print("save model at epoch:", save_idx) else: para_path = os.path.join(path, 'para.pkl') with open(para_path, "wb") as f: para_save = pickle.load(f) model = Model_s.build_model(para_save) model = Model_s.read_model(model_path, model) prec_list, rec_list, f1_list = infer(model, data_holder, "test")
def __init__(self, model, config): """ Creates a new TrainManager for a model, specified as in configuration. :param model: :param config: """ train_config = config["training"] self.model = model self.pad_index = self.model.pad_index self.bos_index = self.model.bos_index criterion = nn.NLLLoss(ignore_index=self.pad_index, reduction='sum') self.learning_rate_min = train_config.get("learning_rate_min", 1.0e-8) if train_config["loss"].lower() not in [ "crossentropy", "xent", "mle", "cross-entropy" ]: raise NotImplementedError("Loss is not implemented. Only xent.") learning_rate = train_config.get("learning_rate", 3.0e-4) weight_decay = train_config.get("weight_decay", 0) if train_config["optimizer"].lower() == "adam": self.optimizer = torch.optim.Adam(model.parameters(), weight_decay=weight_decay, lr=learning_rate) else: # default self.optimizer = torch.optim.SGD(model.parameters(), weight_decay=weight_decay, lr=learning_rate) self.schedule_metric = train_config.get("schedule_metric", "eval_metric") self.ckpt_metric = train_config.get("ckpt_metric", "eval_metric") self.best_ckpt_iteration = 0 # if we schedule after BLEU/chrf, we want to maximize it, else minimize scheduler_mode = "max" if self.schedule_metric == "eval_metric" \ else "min" # the ckpt metric decides on how to find a good early stopping point: # ckpts are written when there's a new high/low score for this metric if self.ckpt_metric == "eval_metric": self.best_ckpt_score = -np.inf self.is_best = lambda x: x > self.best_ckpt_score else: self.best_ckpt_score = np.inf self.is_best = lambda x: x < self.best_ckpt_score self.scheduler = None if "scheduling" in train_config.keys() and \ train_config["scheduling"]: if train_config["scheduling"].lower() == "plateau": # learning rate scheduler self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer=self.optimizer, mode=scheduler_mode, verbose=False, threshold_mode='abs', factor=train_config.get("decrease_factor", 0.1), patience=train_config.get("patience", 10)) elif train_config["scheduling"].lower() == "decaying": self.scheduler = torch.optim.lr_scheduler.StepLR( optimizer=self.optimizer, step_size=train_config.get("decaying_step_size", 10)) elif train_config["scheduling"].lower() == "exponential": self.scheduler = torch.optim.lr_scheduler.ExponentialLR( optimizer=self.optimizer, gamma=train_config.get("decrease_factor", 0.99)) self.shuffle = train_config.get("shuffle", True) self.epochs = train_config["epochs"] self.batch_size = train_config["batch_size"] self.batch_multiplier = train_config.get("batch_multiplier", 1) self.criterion = criterion self.normalization = train_config.get("normalization", "batch") self.steps = 0 # stop training if this flag is True by reaching learning rate minimum self.stop = False self.total_tokens = 0 self.max_output_length = train_config.get("max_output_length", None) self.overwrite = train_config.get("overwrite", False) self.model_dir = self._make_model_dir(train_config["model_dir"]) self.logger = self._make_logger() self.valid_report_file = "{}/validations.txt".format(self.model_dir) self.use_cuda = train_config["use_cuda"] if self.use_cuda: self.model.cuda() self.logging_freq = train_config.get("logging_freq", 100) self.validation_freq = train_config.get("validation_freq", 1000) self.eval_metric = train_config.get("eval_metric", "bleu") self.print_valid_sents = train_config["print_valid_sents"] self.level = config["data"]["level"] self.clip_grad_fun = None if "clip_grad_val" in train_config.keys(): clip_value = train_config["clip_grad_val"] self.clip_grad_fun = lambda params:\ nn.utils.clip_grad_value_(parameters=params, clip_value=clip_value) elif "clip_grad_norm" in train_config.keys(): max_norm = train_config["clip_grad_norm"] self.clip_grad_fun = lambda params:\ nn.utils.clip_grad_norm_(parameters=params, max_norm=max_norm) assert not ("clip_grad_val" in train_config.keys() and "clip_grad_norm" in train_config.keys()), \ "you can only specify either clip_grad_val or clip_grad_norm" if "load_model" in train_config.keys(): model_load_path = train_config["load_model"] self.logger.info("Loading model from {}".format(model_load_path)) self.load_checkpoint(model_load_path) trainable_params = [ n for (n, p) in self.model.named_parameters() if p.requires_grad ] self.logger.info("Trainable parameters: {}".format(trainable_params)) assert len(trainable_params) > 0
def __init__(self, input_dim, nclass): super(LINEAR_LOGSOFTMAX, self).__init__() self.fc = nn.Linear(input_dim,nclass) self.logic = nn.LogSoftmax(dim=1) self.lossfunction = nn.NLLLoss()
def main(): start = time.time() parser = args.parse_args() # run some checks on arguments check_args(parser) # format logging log_name = os.path.join( parser.run_log, '{}_run_log_{}.log'.format(parser.experiment, dt.now().strftime("%Y%m%d_%H%M"))) log.basicConfig(filename=log_name, format='%(asctime)s | %(name)s -- %(message)s', level=log.INFO) os.chmod(log_name, parser.access_mode) # set device to CPU if available device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("Starting experiment {} VN -> EN NMT on {}.".format( parser.experiment, device)) log.info("Starting experiment {} VN -> EN NMT on {}.".format( parser.experiment, device)) # set seed for replication random.seed(parser.seed) np.random.seed(parser.seed) torch.manual_seed(parser.seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(parser.seed) log.info("For reproducibility, the seed is set to {}.".format(parser.seed)) # set file paths source_name = parser.source_name target_name = parser.target_name # get saved models dir base_saved_models_dir = parser.save_dir saved_models_dir = os.path.join(base_saved_models_dir, source_name + '2' + target_name) plots_dir = parser.plots_dir log.info("We will save the models in this directory: {}".format( saved_models_dir)) log.info("We will save the plots in this directory: {}".format(plots_dir)) # get data dir main_data_path = parser.data_dir path_to_train_data = { 'source': main_data_path + 'train.tok.' + source_name, 'target': main_data_path + 'train.tok.' + target_name } path_to_dev_data = { 'source': main_data_path + 'dev.tok.' + source_name, 'target': main_data_path + 'dev.tok.' + target_name } path_to_test_data = { 'source': main_data_path + 'test.tok.' + source_name, 'target': main_data_path + 'test.tok.' + target_name } # Configuration bs = parser.batch_size log.info("Batch size = {}.".format(bs)) enc_emb = parser.enc_emb enc_hidden = parser.enc_hidden enc_layers = parser.enc_layers rnn_type = parser.rnn_type dec_emb = parser.dec_emb dec_hidden = parser.dec_hidden dec_layers = parser.dec_layers learning_rate = parser.learning_rate num_epochs = parser.epochs attn_flag = parser.attn log.info("The attention flag is set to {}.".format(attn_flag)) beam_size = parser.beam_size log.info("We evaluate using beam size of {}.".format(beam_size)) train, val, test, en_lang, vi_lang = dataset_helper.train_val_load( "", main_data_path) # get vocab sizes log.info('English has vocab size of: {} words.'.format(en_lang.n_words)) log.info('Vietnamese has vocab size of: {} words.'.format(vi_lang.n_words)) # get max sentence length by 95% percentile MAX_LEN = int(train['en_len'].quantile(0.95)) log.info( 'We will have a max sentence length of {} (95 percentile).'.format( MAX_LEN)) # set data loaders bs_dict = {'train': bs, 'validate': 1, 'test': 1} shuffle_dict = {'train': True, 'validate': False, 'test': False} train_used = train val_used = val collate_fn_dict = { 'train': partial(dataset_helper.vocab_collate_func, MAX_LEN=MAX_LEN), 'validate': dataset_helper.vocab_collate_func_val, 'test': dataset_helper.vocab_collate_func_val } transformed_dataset = { 'train': dataset_helper.Vietnamese(train_used), 'validate': dataset_helper.Vietnamese(val_used, val=True), 'test': dataset_helper.Vietnamese(test, val=True) } dataloader = { x: DataLoader(transformed_dataset[x], batch_size=bs_dict[x], collate_fn=collate_fn_dict[x], shuffle=shuffle_dict[x], num_workers=0) for x in ['train', 'validate', 'test'] } # instantiate encoder/decoder encoder_w_att = nnet_models.EncoderRNN(input_size=vi_lang.n_words, embed_dim=enc_emb, hidden_size=enc_hidden, n_layers=enc_layers, rnn_type=rnn_type).to(device) decoder_w_att = nnet_models.AttentionDecoderRNN( output_size=en_lang.n_words, embed_dim=dec_emb, hidden_size=dec_hidden, n_layers=dec_layers, attention=attn_flag).to(device) # instantiate optimizer if parser.optimizer == 'sgd': encoder_optimizer = optim.SGD(encoder_w_att.parameters(), lr=learning_rate, nesterov=True, momentum=0.99) decoder_optimizer = optim.SGD(decoder_w_att.parameters(), lr=learning_rate, nesterov=True, momentum=0.99) elif parser.optimizer == 'adam': encoder_optimizer = optim.Adam(encoder_w_att.parameters(), lr=5e-3) decoder_optimizer = optim.Adam(decoder_w_att.parameters(), lr=5e-3) else: raise ValueError('Invalid optimizer!') # instantiate scheduler enc_scheduler = ReduceLROnPlateau(encoder_optimizer, min_lr=1e-4, factor=0.5, patience=0) dec_scheduler = ReduceLROnPlateau(decoder_optimizer, min_lr=1e-4, factor=0.5, patience=0) criterion = nn.NLLLoss(ignore_index=global_variables.PAD_IDX) log.info( "Seq2Seq Model with the following parameters: batch_size = {}, learning_rate = {}, rnn_type = {}, enc_emb = {}, enc_hidden = {}, enc_layers = {}, dec_emb = {}, dec_hidden = {}, dec_layers = {}, num_epochs = {}, source_name = {}, target_name = {}" .format(bs, learning_rate, rnn_type, enc_emb, enc_hidden, enc_layers, dec_emb, dec_hidden, dec_layers, num_epochs, source_name, target_name)) # do we want to train again? train_again = False encoder_save = '{}_att_{}bs_{}hs_{}_{}beam_enc_{}_layer'.format( rnn_type, bs, enc_hidden, parser.optimizer, beam_size, enc_layers) decoder_save = '{}_att_{}bs_{}hs_{}_{}beam_dec_{}_layer'.format( rnn_type, bs, enc_hidden, parser.optimizer, beam_size, dec_layers) if os.path.exists(utils.get_full_filepath( saved_models_dir, encoder_save)) and os.path.exists( utils.get_full_filepath(saved_models_dir, decoder_save)) and (not train_again): log.info("Retrieving saved encoder from {}".format( utils.get_full_filepath(saved_models_dir, encoder_save))) log.info("Retrieving saved decoder from {}".format( utils.get_full_filepath(saved_models_dir, decoder_save))) encoder_w_att.load_state_dict( torch.load(utils.get_full_filepath(saved_models_dir, encoder_save))) decoder_w_att.load_state_dict( torch.load(utils.get_full_filepath(saved_models_dir, decoder_save))) else: log.info("Check if encoder path exists: {}".format( utils.get_full_filepath(saved_models_dir, encoder_save))) log.info("Check if decoder path exists: {}".format( utils.get_full_filepath(saved_models_dir, decoder_save))) log.info("Encoder and Decoder do not exist! Starting to train...") encoder_w_att, decoder_w_att, loss_hist, acc_hist = train_utilities.train_model( encoder_optimizer, decoder_optimizer, encoder_w_att, decoder_w_att, criterion, "attention", dataloader, en_lang, vi_lang, saved_models_dir, encoder_save, decoder_save, num_epochs=num_epochs, rm=0.95, enc_scheduler=enc_scheduler, dec_scheduler=dec_scheduler) log.info("Total time is: {} min : {} s".format( (time.time() - start) // 60, (time.time() - start) % 60)) log.info( "We will save the encoder/decoder in this directory: {}".format( saved_models_dir)) # BLEU with beam size bleu_no_unk, att_score_wo, pred_wo, src_wo = train_utilities.validation_beam_search( encoder_w_att, decoder_w_att, dataloader['validate'], en_lang, vi_lang, 'attention', beam_size, verbose=False) log.info("Bleu-{} Score (No UNK): {}".format(beam_size, bleu_no_unk)) print("Bleu-{} Score (No UNK): {}".format(beam_size, bleu_no_unk)) bleu_unk, att_score_wo, pred_wo, src_wo = train_utilities.validation_beam_search( encoder_w_att, decoder_w_att, dataloader['validate'], en_lang, vi_lang, 'attention', beam_size, verbose=False, replace_unk=True) log.info("Bleu-{} Score (UNK): {}".format(beam_size, bleu_unk)) print("Bleu-{} Score (UNK): {}".format(beam_size, bleu_unk)) # generate 5 random predictions indexes = range(len(pred_wo)) for i in np.random.choice(indexes, 5): print('Source: {} \nPrediction: {}\n---'.format(src_wo[i], pred_wo[i])) log.info('Source: {} \nPrediction: {}\n---'.format( src_wo[i], pred_wo[i])) log.info("Exported Binned Bleu Score Plot to {}!".format(plots_dir)) _, _, fig = utils.get_binned_bl_score( encoder=encoder_w_att, decoder=decoder_w_att, val_dataset=transformed_dataset['validate'], attn_flag=attn_flag, beam_size=beam_size, location=plots_dir, collate=collate_fn_dict['validate'], lang_en=en_lang, lang_vi=vi_lang)
def __init__(self, opt, shared=None): """Set up model if shared params not set, otherwise no work to do.""" super().__init__(opt, shared) opt = self.opt # there is a deepcopy in the init # all instances may need some params self.truncate = opt['truncate'] if opt['truncate'] > 0 else None self.metrics = {'loss': 0.0, 'num_tokens': 0} self.history = {} self.report_freq = opt.get('report_freq', 0.001) states = {} # check for cuda self.use_cuda = not opt.get('no_cuda') and torch.cuda.is_available() if opt.get('numthreads', 1) > 1: torch.set_num_threads(1) if shared: # set up shared properties self.opt = shared['opt'] opt = self.opt self.dict = shared['dict'] self.START_IDX = shared['START_IDX'] self.END_IDX = shared['END_IDX'] self.NULL_IDX = shared['NULL_IDX'] # answers contains a batch_size list of the last answer produced self.answers = shared['answers'] if 'model' in shared: # model is shared during hogwild self.model = shared['model'] self.metrics = shared['metrics'] states = shared['states'] else: # this is not a shared instance of this class, so do full init # answers contains a batch_size list of the last answer produced self.answers = [None] * opt['batchsize'] if self.use_cuda: print('[ Using CUDA ]') torch.cuda.set_device(opt['gpu']) init_model = None # check first for 'init_model' for loading model from file if opt.get('init_model') and os.path.isfile(opt['init_model']): init_model = opt['init_model'] # next check for 'model_file', this would override init_model if opt.get('model_file') and os.path.isfile(opt['model_file']): init_model = opt['model_file'] if init_model is not None: # load model parameters if available print('[ Loading existing model params from {} ]'.format( init_model)) states = self.load(opt['model_file']) if ((init_model is not None and os.path.isfile(init_model + '.dict')) or opt['dict_file'] is None): opt['dict_file'] = init_model + '.dict' # load dictionary and basic tokens & vectors self.dict = DictionaryAgent(opt) self.id = 'Seq2Seq' # we use START markers to start our output self.START_IDX = self.dict[self.dict.start_token] # we use END markers to end our output self.END_IDX = self.dict[self.dict.end_token] # get index of null token from dictionary (probably 0) self.NULL_IDX = self.dict[self.dict.null_token] if not hasattr(self, 'model_class'): # this allows child classes to override this but inherit init self.model_class = Seq2seq self.model = self.model_class(opt, len(self.dict), padding_idx=self.NULL_IDX, start_idx=self.START_IDX, end_idx=self.END_IDX, longest_label=states.get( 'longest_label', 1)) if opt['embedding_type'] != 'random': # set up preinitialized embeddings try: import torchtext.vocab as vocab except ModuleNotFoundError as ex: print( 'Please install torch text with `pip install torchtext`' ) raise ex if opt['embedding_type'].startswith('glove'): init = 'glove' embs = vocab.GloVe(name='840B', dim=300, cache=os.path.join( opt['parlai_home'], 'data', 'models', 'glove_vectors')) elif opt['embedding_type'].startswith('fasttext'): init = 'fasttext' embs = vocab.FastText(language='en', cache=os.path.join( opt['parlai_home'], 'data', 'models', 'fasttext_vectors')) else: raise RuntimeError('embedding type not implemented') if opt['embeddingsize'] != 300: rp = torch.Tensor(300, opt['embeddingsize']).normal_() t = lambda x: torch.mm(x.unsqueeze(0), rp) else: t = lambda x: x cnt = 0 for w, i in self.dict.tok2ind.items(): if w in embs.stoi: vec = t(embs.vectors[embs.stoi[w]]) self.model.decoder.lt.weight.data[i] = vec cnt += 1 if opt['lookuptable'] in ['unique', 'dec_out']: # also set encoder lt, since it's not shared self.model.encoder.lt.weight.data[i] = vec print('Seq2seq: initialized embeddings for {} tokens from {}.' ''.format(cnt, init)) if states: # set loaded states if applicable self.model.load_state_dict(states['model']) if self.use_cuda: self.model.cuda() if hasattr(self, 'model'): # if model was built, do more setup self.clip = opt.get('gradient_clip', -1) self.rank = opt['rank_candidates'] # set up tensors once self.xs = torch.LongTensor(1, 1) self.ys = torch.LongTensor(1, 1) if self.rank: self.cands = torch.LongTensor(1, 1, 1) # set up criteria if opt.get('numsoftmax', 1) > 1: self.criterion = nn.NLLLoss(ignore_index=self.NULL_IDX, size_average=False) else: self.criterion = nn.CrossEntropyLoss( ignore_index=self.NULL_IDX, size_average=False) if self.use_cuda: # push to cuda self.xs = self.xs.cuda() self.ys = self.ys.cuda() if self.rank: self.cands = self.cands.cuda() self.criterion.cuda() # set up optimizer lr = opt['learningrate'] optim_class = Seq2seqAgent.OPTIM_OPTS[opt['optimizer']] kwargs = {'lr': lr} if opt.get('momentum') > 0 and opt['optimizer'] in [ 'sgd', 'rmsprop' ]: kwargs['momentum'] = opt['momentum'] if opt['optimizer'] == 'sgd': kwargs['nesterov'] = True if opt['optimizer'] == 'adam': # https://openreview.net/forum?id=ryQu7f-RZ kwargs['amsgrad'] = True if opt['embedding_type'].endswith('fixed'): print('Seq2seq: fixing embedding weights.') self.model.decoder.lt.weight.requires_grad = False self.model.encoder.lt.weight.requires_grad = False if opt['lookuptable'] in ['dec_out', 'all']: self.model.decoder.e2s.weight.requires_grad = False self.optimizer = optim_class( [p for p in self.model.parameters() if p.requires_grad], **kwargs) if states.get('optimizer'): if states['optimizer_type'] != opt['optimizer']: print('WARNING: not loading optim state since optim class ' 'changed.') else: self.optimizer.load_state_dict(states['optimizer']) if self.use_cuda: for state in self.optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.cuda() self.scheduler = optim.lr_scheduler.ReduceLROnPlateau( self.optimizer, 'min', factor=0.5, patience=3, verbose=True) self.reset()
def __init__(self, emb_dim, hidden_dim, num_layers, heads, depth_size, filter_size, tokenizer, pretrained_file, pointer_gen, logger, weight_sharing=True, model_file_path=None, is_eval=False, load_optim=False, label_smoothing=False, multi_input=False, context_size=2, attention_fusion_type='mean'): super(TransformerSeq2Seq, self).__init__() self.tokenizer = tokenizer self.vocab_size = tokenizer.n_words self.embed_obj = Embedding(tokenizer, emb_dim, pretrained_file, logger) self.embedding = self.embed_obj.get_embedding() self.encoder = Encoder(emb_dim, hidden_dim, num_layers=num_layers, num_heads=heads, total_key_depth=depth_size, total_value_depth=depth_size, filter_size=filter_size) self.decoder = Decoder(emb_dim, hidden_dim, num_layers=num_layers, num_heads=heads, total_key_depth=depth_size, total_value_depth=depth_size, filter_size=filter_size, multi_input=multi_input, context_size=context_size, attention_fusion_type=attention_fusion_type) self.generator = Generator(hidden_dim, self.vocab_size, pointer_gen) self.pad_id = tokenizer.pad_id self.n_embeddings = tokenizer.n_words self.embeddings_size = emb_dim self.multi_input = multi_input if weight_sharing: # Share the weight matrix between target word embedding & the final logit dense layer self.generator.proj.weight = self.embedding.weight self.criterion = nn.NLLLoss(ignore_index=self.pad_id) if label_smoothing: self.criterion = LabelSmoothing(size=self.vocab_size, padding_idx=self.pad_id, smoothing=0.1) self.criterion_ppl = nn.NLLLoss(ignore_index=self.pad_id) if is_eval: self.encoder = self.encoder.eval() self.decoder = self.decoder.eval() self.generator = self.generator.eval() self.embedding = self.embedding.eval()
itos_total = ["EOS", "EOW", "SOS"] + itos_pos_fine + itos[:vocab_size] assert len(itos_total) == outVocabSize initrange = 0.1 crossEntropy = 10.0 import torch.nn.functional counter = 0 lastDevLoss = None failedDevRuns = 0 devLosses = [] lossModule = nn.NLLLoss() lossModuleTest = nn.NLLLoss(size_average=False, reduce=False, ignore_index=2) corpusBase = corpus_cached["train"] corpus = corpusBase.iterator() # get the initial grammar # perform splits on the grammar # run EM unary_rules = {} binary_rules = {}
def hyperparameters_tuning_LBFGS_minibatch(trainset, valset, batchsize_grid, history_size_grid, epochs, model_NN): training_loss = [] test_loss = [] training_accuracy = [] test_accuracy = [] times = [] for bs in batchsize_grid: trainloader = torch.utils.data.DataLoader(trainset, batch_size=bs, shuffle=True) valloader = torch.utils.data.DataLoader(valset, batch_size=bs, shuffle=True) dataiter = iter(trainloader) images, _ = dataiter.next() image_size = images[0].shape[1] input_size = int(image_size**2) output_size = 10 for hs in history_size_grid: print("Minibatch size: ", bs) print("History size: ", hs) if model_NN == "FCNN": sizes = [input_size, 128, 64, output_size] model = fully_connected_NN(sizes) criterion = nn.NLLLoss() optimizer = optim.LBFGS(model.parameters(), max_iter=hs, history_size=hs, line_search_fn='strong_wolfe') elif model_NN == "CNN": model = ConvNet(image_size) criterion = nn.CrossEntropyLoss() optimizer = optim.LBFGS(model.parameters(), max_iter=hs, history_size=hs, line_search_fn='strong_wolfe') if model_NN == "FCNN": train_losses, test_losses, train_accuracies, test_accuracies, train_time = optimize( optimizer, epochs, trainloader, valloader, model, criterion, method="LBFGS") elif model_NN == "CNN": train_losses, test_losses, train_accuracies, test_accuracies, train_time = optimize_CNN( optimizer, epochs, trainloader, valloader, model, criterion, method="LBFGS") times.append(train_time) training_loss.append(train_losses) test_loss.append(test_losses) training_accuracy.append(train_accuracies) test_accuracy.append(test_accuracies) return training_loss, test_loss, training_accuracy, test_accuracy, times
def __setup_model(self, **kwargs): """Helper to Classifier.__init__() Setup the Classifier's model using checkpoint information or the information to load a new model and classifier for training. Keyword Args: Will always be called with the following, which is enough information to build load a new model and add a classifier to be trained: - model_architecture - output_size - hidden_layers - learn_rate - drop_p - class_to_idx If the following are passed to this function, the checkpoint state will be loaded so the model can be used to classify images or so training can continue. - input_size - current_epoch - model_state_dict - optimizer_state_dict """ self.model_architecture = kwargs['model_architecture'].upper() self.model = Classifier.IMAGENET_MODELS[self.model_architecture]( pretrained=True) if 'input_size' in kwargs: # Loading from a checkpoint self.input_size = kwargs['input_size'] self.model.current_epoch = kwargs['current_epoch'] else: # No checkpoint, will be creating a new classifier for the model # The number of features coming from the feature detector CNN if 'ALEXNET' in self.model_architecture: self.input_size = self.model.classifier[1].in_features elif 'VGG' in self.model_architecture: self.input_size = self.model.classifier[0].in_features elif 'DENSENET' in self.model_architecture: self.input_size = self.model.classifier.in_features # Freeze the feature detector parameters to prevent backpropagating # through them. for param in self.model.parameters(): param.requires_grad = False self.model.current_epoch = 1 self.output_size = kwargs['output_size'] self.hidden_layers = kwargs['hidden_layers'] self.learn_rate = kwargs['learn_rate'] self.drop_p = kwargs['drop_p'] self.model.class_to_idx = kwargs['class_to_idx'] self.model.classifier = Network(self.input_size, self.output_size, self.hidden_layers, self.drop_p) if 'model_state_dict' in kwargs: # load the state from checkpoint self.model.load_state_dict(kwargs['model_state_dict']) self.criterion = nn.NLLLoss() self.optimizer = optim.Adam(self.model.classifier.parameters(), lr=self.learn_rate) if 'optimizer_state_dict' in kwargs: # load the state from checkpoint self.optimizer.load_state_dict(kwargs['optimizer_state_dict'])
def hyperparameters_tuning_LBFGS_new_minibatch(trainset, valset, batchsize_grid, max_iter_grid, epochs, model_NN): training_loss = [] test_loss = [] training_accuracy = [] test_accuracy = [] times = [] parameters = [] results = [] Names = [ "training_loss", "training_accuracy", "test_loss", "test_accuracy", "times", "parameters: batch iter" ] results.append(Names) for bs in batchsize_grid: trainloader = torch.utils.data.DataLoader(trainset, batch_size=bs, shuffle=True) valloader = torch.utils.data.DataLoader(valset, batch_size=bs, shuffle=True) dataiter = iter(trainloader) images, _ = dataiter.next() image_size = images[0].shape[1] input_size = int(image_size**2) output_size = 10 for max_iter_ in max_iter_grid: print("Minibatch size: ", bs) print("History size: ", max_iter_) parameter = [] if model_NN == "FCNN": sizes = [input_size, 128, 64, output_size] model = fully_connected_NN(sizes) criterion = nn.NLLLoss() optimizer = LBFGSNew(model.parameters(), max_iter=max_iter_, history_size=max_iter_, line_search_fn=True, batch_mode=True) elif model_NN == "CNN": model = ConvNet(image_size) criterion = nn.CrossEntropyLoss() optimizer = LBFGSNew(model.parameters(), max_iter=max_iter_, history_size=max_iter_, line_search_fn=True, batch_mode=True) elif model_NN == "CNN_BN": model = ConvNet_BN(image_size) criterion = nn.CrossEntropyLoss() optimizer = LBFGSNew(model.parameters(), max_iter=max_iter_, history_size=max_iter_, line_search_fn=True, batch_mode=True) if model_NN == "FCNN": train_losses, test_losses, train_accuracies, test_accuracies, train_time = optimize( optimizer, epochs, trainloader, valloader, model, criterion, method="LBFGS") elif model_NN == "CNN": train_losses, test_losses, train_accuracies, test_accuracies, train_time = optimize_CNN( optimizer, epochs, trainloader, valloader, model, criterion, method="LBFGS") elif model_NN == "CNN_BN": train_losses, test_losses, train_accuracies, test_accuracies, train_time = optimize_CNN( optimizer, epochs, trainloader, valloader, model, criterion, method="LBFGS") # save the parameters parameter = [] parameter.append(bs) parameter.append(max_iter_) parameters.append(parameter) times.append(train_time) training_loss.append(train_losses) test_loss.append(test_losses) training_accuracy.append(train_accuracies) test_accuracy.append(test_accuracies) results.append(training_loss) results.append(training_accuracy) results.append(test_loss) results.append(test_accuracy) results.append(times) results.append(parameters) return results
def __init__(self, weight=None): super(CrossEntropyLoss2d, self).__init__() self.loss = nn.NLLLoss(weight)
def hyperparameters_tuning_Curveball_minibatch(trainset, valset, batchsize_grid, epochs, model_NN): training_loss = [] test_loss = [] training_accuracy = [] test_accuracy = [] times = [] parameters = [] results = [] Names = [ "training_loss", "training_accuracy", "test_loss", "test_accuracy", "times", "parameters: batch iter" ] results.append(Names) for bs in batchsize_grid: trainloader = torch.utils.data.DataLoader(trainset, batch_size=bs, shuffle=True) valloader = torch.utils.data.DataLoader(valset, batch_size=bs, shuffle=True) dataiter = iter(trainloader) images, _ = dataiter.next() image_size = images[0].shape[1] input_size = int(image_size**2) output_size = 10 print("Minibatch size: ", bs) parameter = [] if model_NN == "FCNN": sizes = [input_size, 128, 64, output_size] model = fully_connected_NN(sizes) criterion = nn.NLLLoss() optimizer = CurveBall(model.parameters(), lr=0.1, momentum=0.9) elif model_NN == "CNN": model = ConvNet(image_size) criterion = nn.CrossEntropyLoss() optimizer = CurveBall(model.parameters(), lr=-1, momentum=-1) elif model_NN == "CNN_BN": model = ConvNet_BN(image_size) criterion = nn.CrossEntropyLoss() optimizer = CurveBall(model.parameters(), lr=-1, momentum=-1) if model_NN == "FCNN": train_losses, test_losses, train_accuracies, test_accuracies, train_time = optimize( optimizer, epochs, trainloader, valloader, model, criterion, method="CurveBall") elif model_NN == "CNN": train_losses, test_losses, train_accuracies, test_accuracies, train_time = optimize_CNN( optimizer, epochs, trainloader, valloader, model, criterion, method="CurveBall") elif model_NN == "CNN_BN": train_losses, test_losses, train_accuracies, test_accuracies, train_time = optimize_CNN( optimizer, epochs, trainloader, valloader, model, criterion, method="CurveBall") # save the parameters parameter = [] parameter.append(bs) parameters.append(parameter) times.append(train_time) training_loss.append(train_losses) test_loss.append(test_losses) training_accuracy.append(train_accuracies) test_accuracy.append(test_accuracies) results.append(training_loss) results.append(training_accuracy) results.append(test_loss) results.append(test_accuracy) results.append(times) results.append(parameters) return results
def main(): epochs = 10 batchSize = 64 lr = 0.00001 #writer = SummaryWriter('./logs') #train = pd.read_csv(f'train_sam.csv') #train.columns = ["article", "title"] TEXT = data.Field(tokenize=data.get_tokenizer('spacy'), lower=True, eos_token='_eos_') trn_data_fields = [("original", TEXT), ("summary", TEXT)] train, valid = data.TabularDataset.splits(path=f'', train='train_sam.csv', validation='valid_sam.csv', format='csv', skip_header=True, fields=trn_data_fields) TEXT.build_vocab(train, vectors=GloVe(name='6B', dim=200)) train_iter, val_iter = data.BucketIterator.splits( (train, valid), batch_sizes=(batchSize, batchSize), sort_key=lambda x: len(x.original), sort_within_batch=False, repeat=False) input_size = len(TEXT.vocab) hidden_size = 128 * 2 dropout = 0.5 num_layers = 1 bidirectional = True encoder = Encoder(input_size, hidden_size, num_layers, batchSize, bidirectional) decoder = Decoder(input_size, num_layers * hidden_size * (1 + int(bidirectional)), num_layers, dropout, batchSize) # define your LSTM loss function here #loss_func = F.cross_entropy() # define optimizer for lstm model #optim = Adam(model.parameters(), lr=lr) encoder_optimizer = Adam(encoder.parameters(), lr=lr) decoder_optimizer = Adam(decoder.parameters(), lr=lr) losses = [] valLosses = [] originals = [] summaries = [] criterion = nn.NLLLoss() for epoch in range(epochs): originals = [] genSumms = [] origSumms = [] batchNum = 0 step = 0 valBatchNum = 0 for batch in train_iter: batchS = len(batch) batchNum += 1 loss = 0 orig = batch.original summ = batch.summary encoder_outputs = torch.zeros( 80, encoder.hidden_size * (1 + int(bidirectional))) encoder_hidden = encoder.initHidden(batchS) for ei in range(len(orig)): encoder_output, encoder_hidden = encoder.forward( orig[ei], encoder_hidden, batchS) #print(encoder_outputs[ei].shape) #print(encoder_output[0,0].shape) encoder_outputs[ei] = encoder_output[0, 0] decoder_hidden = encoder_hidden decoder_input = torch.zeros(batchS).long() genSumm = [] origSumm = [] for di in range(len(summ)): decoder_output, decoder_hidden, decoder_attention = decoder.forward( decoder_hidden, encoder_outputs, decoder_input, batchS) loss += criterion(decoder_output, summ[di]) #print(decoder_output) #print(summ[di]) decoder_input = summ[di] DO = decoder_output.detach().numpy() genSumm.append(np.argmax(DO[5])) origSumm.append(summ[di][5]) #print(np.argmax(DO[5])) lossAvg = loss.item() / len(summ) encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() loss.backward() #writer.add_scalar('training loss', loss.item(), step+1) step += 1 encoder_optimizer.step() decoder_optimizer.step() genSumms.append(genSumm) origSumms.append(origSumm) originals.append(orig[:, 5]) genTensorO = torch.IntTensor(origSumms[0]) genTensor = torch.IntTensor(genSumms[0]) if (batchNum % 25 == 0): losses.append(lossAvg) print("Epoch: [{}/{}], Batch:[{}/{}], Loss: {}".format( epoch, epochs, batchNum, len(train_iter), lossAvg)) translatedOrig = word_ids_to_sentence(originals[0], TEXT.vocab, join=' ') print(translatedOrig) translatedSummO = word_ids_to_sentence(genTensorO, TEXT.vocab, join=' ') print(translatedSummO) translatedSumm = word_ids_to_sentence(genTensor, TEXT.vocab, join=' ') print(translatedSumm) #genSumms = [] if (batchNum % 25 == 0): for batchVal in val_iter: valBatchNum += 1 valLoss = 0 batchS = len(batch) valOrig = batchVal.original valSumm = batchVal.summary encoder_outputs = torch.zeros( 80, encoder.hidden_size * (1 + int(bidirectional))) encoder_hidden = encoder.initHidden(batchS) for ei in range(len(valOrig)): encoder_output, encoder_hidden = encoder.forward( valOrig[ei], encoder_hidden, batchS) encoder_outputs[ei] = encoder_output[0, 0] decoder_hidden = encoder_hidden decoder_input = torch.zeros(batchS).long() #genSumm = [] for di in range(len(valSumm)): decoder_output, decoder_hidden, decoder_attention = decoder.forward( decoder_hidden, encoder_outputs, decoder_input, batchS) valLoss += criterion(decoder_output, valSumm[di]) decoder_input = valSumm[di] #DO = decoder_output.detach().numpy() #genSumm.append(np.argmax(DO[5])) valLossAvg = valLoss.item() / len(valSumm) valLosses.append(valLossAvg) print("VALLoss: {}".format(valLossAvg)) break plt.figure() plt.plot(losses) plt.plot(valLosses) plt.ylabel('Loss') plt.show()
def main(to_csv_path, train_obj, training_iter=10, sample_size=None): """Main function.""" training_iter = int(training_iter) def _generate(start_letter): sample_char_idx = [char2idx[start_letter]] logger.debug('sample_char_idx: ', sample_char_idx) input_ = input_tensor(sample_char_idx) hidden = model.init_hidden() output_name = start_letter for i in range(MAX_LENGTH): output, hidden = model(input_, hidden) _, topi = output.data.topk(1) logger.debug('topi before: ', topi) topi = topi.item() logger.debug('topi: ', topi) logger.debug('char2idx: ', char2idx['EOS']) if topi == char2idx['EOS']: break else: letter = idx2char[topi] output_name += letter input_ = input_tensor([topi]) return output_name def generate(start_letters): for start_letter in start_letters: print(_generate(start_letter)) df = pd.read_csv(to_csv_path) text_for_train = df[train_obj].unique() all_char_set = set( [chr(i) for i in range(ord('a'), ord('z') + 1)] + [chr(i) for i in range(0x30a1, 0x30f5)] + [ '0', '@', '!', '%', '?', '、', '。', '・', '.', 'ー', '/', '【', '】', '+', '-', '{', '}', '=', '(', ')', ':' ]) print(all_char_set) char2idx = {char: i for i, char in enumerate(all_char_set)} char2idx['EOS'] = len(char2idx) idx2char = {v: k for k, v in char2idx.items()} if sample_size is None: names_idxs = [[char2idx[char] for char in name_str] for name_str in text_for_train] else: names_idxs = [[char2idx[char] for char in name_str] for name_str in text_for_train[:int(sample_size)]] print(len(names_idxs)) # build model model = LSTM(input_dim=len(char2idx), embed_dim=100, hidden_dim=128) criterion = nn.NLLLoss() optimizer = optim.RMSprop(model.parameters(), lr=0.001) for itr in range(training_iter + 1): random.shuffle(names_idxs) total_loss = 0 for i, name_idxs in enumerate(names_idxs): input_ = input_tensor(name_idxs) target = target_tensor(name_idxs[1:], char2idx['EOS']) loss = train(model, criterion, input_, target) total_loss += loss if not (i % 100): print('step: {}'.format(i)) optimizer.step() print(itr, '/', training_iter) print('loss {:.4f}'.format(float(total_loss / len(names_idxs)))) generate([chr(i) for i in range(0x30a1, 0x30f5)])