def load_mnist_data(mnist_data_file_path='./mnist_data/', random_seed=1234, validation_ratio=1 / 6): mnist_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) mnist_train_vali = datasets.MNIST(root=mnist_data_file_path, train=True, transform=mnist_transform, target_transform=None, download=True) mnist_test = datasets.MNIST(root=mnist_data_file_path, train=False, transform=mnist_transform, target_transform=None, download=True) num_train_vali = mnist_train_vali.__len__() indices = list(range(num_train_vali)) num_vali = int(validation_ratio * num_train_vali) np.random.seed(random_seed) np.random.shuffle(indices) train_sampler = SubsetRandomSampler(indices[num_vali:]) valid_sampler = SubsetRandomSampler(indices[:num_vali]) mnist_train = TensorDataset(*list( DataLoader( mnist_train_vali, num_train_vali - num_vali, sampler=train_sampler))[0]) mnist_vali = TensorDataset(*list( DataLoader(mnist_train_vali, num_vali, sampler=valid_sampler))[0]) mnist_test = TensorDataset( *list(DataLoader(mnist_test, mnist_test.__len__()))[0]) return mnist_train, mnist_vali, mnist_test
def load_learned_aae_and_clf(aaepath, optspath, clfpath, training_data_path): # load training and validation data data = joblib.load(training_data_path) # load opts dictionary opts = joblib.load(optspath)['opts'] # setup validation dataset and dataloader (training is same as before) val_dataset = TensorDataset(torch.from_numpy(data['Xval']).float(), torch.from_numpy(data['yval']).long()) del data val_dataloader = DataLoader(val_dataset, batch_size = opts['batch_size'], num_workers=0, drop_last=True) # load aae model aae = adverserial_autoencoder(val_dataloader, val_dataset.__len__(), model_savepath = '', downsample_to = (opts['downsample_to_H'], opts['downsample_to_W']), nz = opts['nz'], ngf = opts['ngf'], ncrit_steps = opts['ncrit_steps'], grad_penalty = opts['grad_penalty'], batch_size = opts['batch_size'], lr = opts['learning_rate'], disc_weight_decay = opts['Disc_weight_decay'], WGAN_loss_lambda = opts['WGAN_loss_lambda']) aae.load_state_dict(torch.load(aaepath)) aae.eval() # now load clf latent_clf = latent_classifier(nz = opts['nz'], nclasses = len(opts['label_types'])) latent_clf.load_state_dict(torch.load(clfpath)) latent_clf.eval() return aae, latent_clf, val_dataloader
def tool_hyperparams(train_val_folds, labelled_data, labels, unlabelled_data, output_folder, device): input_size = labelled_data.size(1) num_classes = labels.unique().size(0) state_path = '{}/state'.format(output_folder) hidden_layer_size = min(500, (input_size + num_classes) // 2) hidden_layers = range(1, 5) lr = 1e-3 best_accuracies = [0, 0] best_params = None normalizer = StandardScaler() all_data = torch.tensor( normalizer.fit_transform( torch.cat((labelled_data, unlabelled_data)).numpy())).float() labelled_data = all_data[:len(labels)] for h in hidden_layers: print('Ladder params {}'.format(h)) model_name = '{}'.format(h) denoising_cost = [1000.0, 10.0] + ([0.1] * h) params = { 'model name': model_name, 'input size': input_size, 'hidden layers': h * [hidden_layer_size], 'denoising cost': denoising_cost, 'num classes': num_classes } accuracies = [] for train_ind, val_ind in train_val_folds: s_d = TensorDataset(labelled_data[train_ind], labels[train_ind]) unlabelled_data = torch.cat( (all_data[len(labels):], labelled_data[train_ind])) u_d = TensorDataset(unlabelled_data, -1 * torch.ones(unlabelled_data.size(0))) v_d = TensorDataset(labelled_data[val_ind], labels[val_ind]) s_dl = DataLoader(s_d, batch_size=100, shuffle=True) u_dl = DataLoader(u_d, batch_size=100, shuffle=True) v_dl = DataLoader(v_d, batch_size=v_d.__len__()) model = LadderNetwork(input_size, [hidden_layer_size] * h, num_classes, denoising_cost, lr, device, model_name, state_path) model.train_model(100, (u_dl, s_dl, v_dl)) validation_result = model.test_model(v_dl) print('Validation accuracy: {}'.format(validation_result)) accuracies.append(validation_result) if device == 'cuda': torch.cuda.empty_cache() if mean(accuracies) > mean(best_accuracies): best_accuracies = accuracies best_params = params s_d = TensorDataset(labelled_data, labels) unlabelled_data = all_data u_d = TensorDataset(unlabelled_data, -1 * torch.ones(unlabelled_data.size(0))) s_dl = DataLoader(s_d, batch_size=100, shuffle=True) u_dl = DataLoader(u_d, batch_size=100, shuffle=True) final_model = LadderNetwork(best_params['input size'], best_params['hidden layers'], best_params['num classes'], best_params['denoising cost'], lr, device, 'ladder', state_path) final_model.train_model(100, (u_dl, s_dl, None)) return final_model, normalizer, best_accuracies
(time.time() - epoch_start_time), progress), end='\r', flush=True) model.eval() for i, data in enumerate(val_loader): val_pred = model(data[0].cuda()) batch_loss = loss(val_pred, data[1].cuda()) val_acc += np.sum(np.argmax(val_pred.cpu().data.numpy(), axis=1) == data[1].numpy()) val_loss += batch_loss.item() progress = ('#' * int(float(i)/len(val_loader)*40)).ljust(40) print ('[%03d/%03d] %2.2f sec(s) | %s |' % (epoch+1, num_epoch, \ (time.time() - epoch_start_time), progress), end='\r', flush=True) val_acc = val_acc/val_set.__len__() train_acc = train_acc/train_set.__len__() print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f loss: %3.6f' % \ (epoch + 1, num_epoch, time.time()-epoch_start_time, \ train_acc, train_loss, val_acc, val_loss)) train_acc_.append(train_acc) val_acc_.append(val_acc) if (val_acc > best_acc): with open('./acc.txt','w') as f: f.write(str(epoch)+'\t'+str(val_acc)+'\n') torch.save(model.state_dict(), model_path+'model_cnn') best_acc = val_acc print ('Model Saved!')
def main(): parser = argparse.ArgumentParser( description= 'PyTorch graph convolutional neural net for whole-graph classification' ) parser.add_argument('--dataset', type=str, default="dataset/AEF_V_0.mat", help='path of the dataset (default: data/data.mat)') parser.add_argument('--node_number', type=int, default=256, help='node number of graph (default: 256)') parser.add_argument('--batch_size', type=int, default=32, help='number of input size (default: 128)') parser.add_argument('--k_hop', type=int, default=4, help='times of aggregate (default: 1)') args = parser.parse_args() x_train, x_label, val_data, val_label = readfile( args.dataset) # 'train.csv' x_train = x_train.permute(2, 0, 1) x_label = torch.squeeze(x_label, dim=1).long() val_data = val_data.permute(2, 0, 1) val_label = torch.squeeze(val_label, dim=1).long() train_set = TensorDataset(x_train, x_label) val_set = TensorDataset(val_data, val_label) #batch_size = 128 train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True, num_workers=0) val_loader = DataLoader(val_set, batch_size=args.batch_size, shuffle=True, num_workers=0) model = CNNnet(args.node_number, args.batch_size, args.k_hop) #print(model) model loss = torch.nn.CrossEntropyLoss() #para = list(model.parameters()) #print(para) optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # optimize all cnn parameters loss_func = torch.nn.CrossEntropyLoss() best_acc = 0.0 num_epoch = 100 for epoch in range(num_epoch): epoch_start_time = time.time() train_acc = 0.0 train_loss = 0.0 val_acc = 0.0 val_loss = 0.0 model.train() for i, data in enumerate(train_loader): optimizer.zero_grad() train_pred = model(data[0]) #print(train_pred.size()) #print(data[1].size()) batch_loss = loss(train_pred, data[1]) batch_loss.backward() optimizer.step() train_acc += np.sum( np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy()) train_loss += batch_loss.item() model.eval() val_TP = 1.0 val_TN = 1.0 val_FN = 1.0 val_FP = 1.0 predict_total = [] label_total = [] for i, data in enumerate(val_loader): val_pred = model(data[0]) batch_loss = loss(val_pred, data[1]) predict_val = np.argmax(val_pred.cpu().data.numpy(), axis=1) predict_total = np.append(predict_total, predict_val) label_val = data[1].numpy() label_total = np.append(label_total, label_val) val_acc += np.sum( np.argmax(val_pred.cpu().data.numpy(), axis=1) == data[1].numpy()) val_loss += batch_loss.item() val_TP = ((predict_total == 1) & (label_total == 1)).sum().item() val_TN = ((predict_total == 0) & (label_total == 0)).sum().item() val_FN = ((predict_total == 0) & (label_total == 1)).sum().item() val_FP = ((predict_total == 1) & (label_total == 0)).sum().item() val_spe = val_TN / (val_FP + val_TN + 0.001) val_rec = val_TP / (val_TP + val_FN + 0.001) test_acc = (val_TP + val_TN) / (val_FP + val_TN + val_TP + val_FN + 0.001) val_acc = val_acc / val_set.__len__() print('%3.6f %3.6f %3.6f %3.6f' % (train_acc / train_set.__len__(), train_loss, val_acc, val_loss)) if (val_acc > best_acc): with open('save/AET_V_0.txt', 'w') as f: f.write( str(epoch) + '\t' + str(val_acc) + '\t' + str(val_spe) + '\t' + str(val_rec) + '\n') torch.save(model.state_dict(), 'save/model.pth') best_acc = val_acc for name, param in model.named_parameters(): if param.requires_grad: print(param[0])
def main(): # x_train, x_label, val_data, val_label = readfile(sys.argv[1]) # x_train, x_label, val_data, val_label = readfile("train_data.npy", "label.npy", val_num = 0.2) # x_train, x_label, val_data, val_label = readfile_from_np("/content/drive/My Drive/ML2019/hw3_torch/train_data.npy", "/content/drive/My Drive/ML2019/hw3_torch/label.npy", val_num = 0.2, augmentation = 1) # x_train, x_label, val_data, val_label = readfile_from_csv("/content/drive/My Drive/ML2019/hw3_torch/train.csv", val_num = 0.2) x_train, x_label, val_data, val_label = readfile_from_csv(sys.argv[1], val_num = 0.2) # x_train, x_label, val_data, val_label = readfile_from_np("train_data.npy", "label.npy", val_num = 0.2, augmentation = 5) train_set = MyDataset(x_train, x_label, augmentation = 8) val_set = TensorDataset(val_data, val_label) num_epoch = 50 batch_size = 256 # if (len(sys.argv) >= 3): # num_epoch = int(sys.argv[1]) # batch_size = int(sys.argv[2]) train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=8) val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False, num_workers=8) #########GPU######### model = Classifier().cuda() # model = Classifier() train_loss_log = [] train_acc_log = [] val_loss_log = [] val_acc_log = [] # print(model) loss = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.001) best_acc = 0.0 for epoch in range(num_epoch): epoch_start_time = time.time() train_acc = 0.0 train_loss = 0.0 val_acc = 0.0 val_loss = 0.0 print('Epoch [%03d/%03d]' % (epoch+1, num_epoch)) model.train() batch_num = len(train_loader) # msg = '...........' print('...........') for i, data in enumerate(train_loader): # print (msg,end = '', flush=True) optimizer.zero_grad() #########GPU######### train_pred = model(data[0].cuda()) batch_loss = loss(train_pred, data[1].cuda()) # train_pred = model(data[0]) # batch_loss = loss(train_pred, data[1]) batch_loss.backward() optimizer.step() train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy()) train_loss += batch_loss.item() # progress = (u"\u2588" * (int(float(i)/batch_num*40))).ljust(40,'.') # # progress = ('#' * int(float(i)/len(train_loader)*40)).ljust(40) # back = '\b'*len(msg) # msg = '[%0 3d/%03d] %2.2f sec(s) |%s|' % (i+1, batch_num, \ # (time.time() - epoch_start_time), progress) # print(back,end = '', flush=True) model.eval() val_batch_num = len(val_loader) for i, data in enumerate(val_loader): #########GPU######### val_pred = model(data[0].cuda()) batch_loss = loss(val_pred, data[1].cuda()) # val_pred = model(data[0]) # batch_loss = loss(val_pred, data[1]) val_acc += np.sum(np.argmax(val_pred.cpu().data.numpy(), axis=1) == data[1].numpy()) val_loss += batch_loss.item() # progress = ('=' * (int(float(i)/len(val_loader)*40)-1)+'>').ljust(40) # print ('[%03d/%03d] %2.2f sec(s) | %s |' % (i+1, num_epoch, \ # (time.time() - epoch_start_time), progress), end='\r', flush=True) val_acc = val_acc / val_set.__len__() train_acc = train_acc/train_set.__len__() print('[%0 3d/%03d] %2.2f sec(s) train_acc: %3.6f Loss: %3.6f | val_acc: %3.6f val_loss: %3.6f' % \ (batch_num, batch_num, time.time()-epoch_start_time, \ train_acc, train_loss/batch_num, val_acc, val_loss/val_batch_num)) print('') #log train_loss_log.append(train_loss/batch_num) train_acc_log.append(train_acc) val_loss_log.append(val_loss/val_batch_num) val_acc_log.append(val_acc)
def main(isplot=False): """ Main functoin - Load data - Create modeal - train and evaluate :return: """ # ----- PARAMETER -------------------- nb_pair = 1000 batch_size = [100] nb_epochs = [100] learning_rate = [5e-3] nb_iteration = 10 for ep in nb_epochs: for bs in batch_size: for lr in learning_rate: saved_train_accuracy = [] saved_test_accuracy = [] for i in range(nb_iteration): print('\n------- ITERATION - %d -------' % (i + 1)) # ----- DATASET -------------------- train_input, train_target, _, test_input, test_target, _ = prologue.generate_pair_sets( nb_pair) # Normalize by dividing it to the max RGB value train_input /= 255 test_input /= 255 # Split between training (80%) and validation (20%) train_dataset = TensorDataset(train_input, train_target) train_len = int(0.8 * train_dataset.__len__()) validation_len = train_dataset.__len__() - train_len train_data, validation_data = random_split( train_dataset, lengths=[train_len, validation_len]) train_loader = DataLoader(train_data, batch_size=bs, shuffle=False, num_workers=2) validation_loader = DataLoader(validation_data, batch_size=bs, shuffle=False, num_workers=2) # Test test_dataset = TensorDataset(test_input, test_target) test_loader = DataLoader(test_dataset, batch_size=bs, shuffle=False, num_workers=2) # ----- MODEL -------------------- # - Fully Connected model model = FCModel() # - Small cnn # model = ConvModel() # - Deeper cnn # model = DeepConvModel() # Optimizer optimizer = optim.Adam(model.parameters(), lr=lr) # Loss function criterion = nn.CrossEntropyLoss() # ----- TRAINING + VALIDATION -------------------- nb_batch_train = train_len // bs nb_batch_validation = validation_len // bs train_losses = [] train_accuracies = [] validation_losses = [] validation_accuracies = [] for epoch in range(ep): # TRAIN train_loss, train_accuracy = train( train_loader, model, criterion, optimizer, nb_batch_train) train_losses.append(train_loss) train_accuracies.append(train_accuracy) # VALIDATION validation_loss, validation_accuracy = validation( validation_loader, model, criterion, nb_batch_validation) validation_losses.append(validation_loss) validation_accuracies.append(validation_accuracy) # Print progress if (epoch + 1) % (ep / 10) == 0: print( 'Epoch [%d/%d] --- TRAIN: Loss: %.4f - Accuracy: %d%% --- ' 'VALIDATION: Loss: %.4f - Accuracy: %d%%' % (epoch + 1, ep, train_loss, train_accuracy, validation_loss, validation_accuracy)) # ----- PLOT -------------------- if isplot: plt.figure() plt.subplot(1, 2, 1) plt.plot(train_losses, label='Train loss') plt.plot(validation_losses, label='Validation loss') plt.ylabel('Loss') plt.xlabel('Epoch') plt.legend(frameon=False) plt.subplot(1, 2, 2) plt.plot(train_accuracies, label='Train accuracy') plt.plot(validation_accuracies, label='Validation accuracy') plt.ylabel('Accuracy') plt.xlabel('Epoch') plt.legend(frameon=False) # ----- TEST -------------------- train_accuracy = test(train_loader, model) saved_train_accuracy.append(train_accuracy) test_accuracy = test(test_loader, model) saved_test_accuracy.append(test_accuracy) print('Accuracy on train set: %d %%' % train_accuracy) print('Accuracy on test set: %d %%' % test_accuracy) # ----- MEAN + STD OVER ITERATION -------------------- print('\n------- NB EPOCHS - %d -------' % ep) print('------- LEARNING RATE - %f -------' % lr) print('------- BATCH SIZE - %d -------' % bs) print( 'Mean train accuracy {:.02f} --- Std train accuracy {:.02f} ' '\nMean test accuracy {:.02f} --- Std test accuracy {:.02f}' .format( torch.FloatTensor(saved_train_accuracy).mean(), torch.FloatTensor(saved_train_accuracy).std(), torch.FloatTensor(saved_test_accuracy).mean(), torch.FloatTensor(saved_test_accuracy).std()))
def __main__(): (data, labels), (t, l) = load_MNIST_data() model = SimpleNetwork(784, [400, 400], 10, 1e-3, device, 'simple_saliency', './outputs/saliency') indices = stratified_k_fold(data, labels, 2) train, val = next(indices) train_dataset = TensorDataset(data[train], labels[train]) val_dataset = TensorDataset(data[val], labels[val]) t_dl = DataLoader(train_dataset, batch_size=100, shuffle=True) v_dl = DataLoader(val_dataset, batch_size=val_dataset.__len__()) model.train_model(100, (None, t_dl, v_dl)) input = t[12].unsqueeze(0) output = model.classify(input) _, prediction = output.max(1) print(prediction) vanilla_saliency = VanillaSaliency(model.Classifier, device).generate_saliency( input, prediction) guided_saliency = GuidedSaliency(model.Classifier, device).generate_saliency( input, prediction) if device.type == 'cuda': vanilla_saliency = vanilla_saliency.cpu() guided_saliency = guided_saliency.cpu() for s in [(vanilla_saliency, 'vanilla'), (guided_saliency, 'guided')]: saliency, string = s pos_map = saliency.clamp(min=0) pos_map = pos_map / pos_map.max() neg_map = -saliency.clamp(max=0) neg_map = neg_map / neg_map.max() abs_map = saliency.abs() abs_map = abs_map / abs_map.max() input = input.detach() input = input.view(28, 28) pos_map = pos_map.view(28, 28) neg_map = neg_map.view(28, 28) abs_map = abs_map.view(28, 28) figure = plt.figure(figsize=(8, 8), facecolor='w') plt.subplot(2, 2, 1) plt.title("Original Image") plt.imshow(input, cmap="gray", interpolation=None) plt.subplot(2, 2, 2) plt.title("Positive Saliency") plt.imshow(pos_map, cmap='gray', interpolation=None) plt.subplot(2, 2, 3) plt.title("Negative Saliency") plt.imshow(neg_map, cmap='gray', interpolation=None) plt.subplot(2, 2, 4) plt.title("Absolute Saliency") plt.imshow(abs_map, cmap='gray', interpolation=None) plt.savefig('./outputs/saliency/{}_saliency_maps.pdf'.format(string))
def tool_hyperparams(train_val_folds, labelled_data, labels, unlabelled_data, output_folder, device): input_size = labelled_data.size(1) num_classes = labels.unique().size(0) state_path = '{}/state'.format(output_folder) hidden_layer_size = min(500, (input_size + num_classes) // 2) hidden_layers_vae = range(1, 3) hidden_layers_classifier = range(1, 3) z_size = [200, 100, 50] param_combinations = [(i, j, k) for i in hidden_layers_vae for j in hidden_layers_classifier for k in z_size] lr = 1e-3 best_accuracies = [0, 0] best_params = None normalizer = MinMaxScaler() data = torch.tensor( normalizer.fit_transform( torch.cat((labelled_data, unlabelled_data)).numpy())).float() labelled_data = data[:len(labels)] unlabelled_data = data[len(labels):] for p in param_combinations: print('M2 params {}'.format(p)) h_v, h_c, z = p model_name = '{}_{}_{}'.format(h_v, h_c, z) params = { 'model name': model_name, 'input size': input_size, 'hidden layers vae': h_v * [hidden_layer_size], 'hidden layers classifier': h_c * [hidden_layer_size], 'latent dim': z, 'num classes': num_classes } accuracies = [] for train_ind, val_ind in train_val_folds: s_d = TensorDataset(labelled_data[train_ind], labels[train_ind]) v_d = TensorDataset(labelled_data[val_ind], labels[val_ind]) s_dl = DataLoader(s_d, batch_size=100, shuffle=True) v_dl = DataLoader(v_d, batch_size=v_d.__len__()) if len(unlabelled_data) == 0: u_dl = None else: u_d = TensorDataset(unlabelled_data, -1 * torch.ones(unlabelled_data.size(0))) u_dl = DataLoader(u_d, batch_size=100, shuffle=True) model = M2Runner(input_size, [hidden_layer_size] * h_v, [hidden_layer_size] * h_c, z, num_classes, nn.Sigmoid(), lr, device, model_name, state_path) model.train_model(100, (u_dl, s_dl, v_dl)) validation_result = model.test_model(v_dl) print('Validation accuracy: {}'.format(validation_result)) accuracies.append(validation_result) if device == 'cuda': torch.cuda.empty_cache() if mean(accuracies) > mean(best_accuracies): best_accuracies = accuracies best_params = params s_d = TensorDataset(labelled_data, labels) s_dl = DataLoader(s_d, batch_size=100, shuffle=True) if len(unlabelled_data) == 0: u_dl = None else: u_d = TensorDataset(unlabelled_data, -1 * torch.ones(unlabelled_data.size(0))) u_dl = DataLoader(u_d, batch_size=100, shuffle=True) final_model = M2Runner(best_params['input size'], best_params['hidden layers vae'], best_params['hidden layers classifier'], best_params['latent dim'], best_params['num classes'], nn.Sigmoid(), lr, device, 'm2', state_path) final_model.train_model(100, (u_dl, s_dl, None)) return final_model, normalizer, best_accuracies
train_loss += batch_loss.item() progress = ('#' * int(float(i) / len(train_loader) * 40)).ljust(40) print ('[%03d/%03d] %2.2f sec(s) | %s |' % (epoch+1, num_epoch, \ (time.time() - epoch_start_time), progress), end='\r', flush=True) model.eval() for i, data in enumerate(val_loader): val_pred = model(data[0].cuda()) batch_loss = loss(val_pred, data[1].cuda()) val_acc += np.sum( np.argmax(val_pred.cpu().data.numpy(), axis=1) == data[1].numpy()) val_loss += batch_loss.item() progress = ('#' * int(float(i) / len(val_loader) * 40)).ljust(40) print ('[%03d/%03d] %2.2f sec(s) | %s |' % (epoch+1, num_epoch, \ (time.time() - epoch_start_time), progress), end='\r', flush=True) val_acc = val_acc / val_set.__len__() print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f loss: %3.6f' % \ (epoch + 1, num_epoch, time.time()-epoch_start_time, \ train_acc/train_set.__len__(), train_loss, val_acc, val_loss)) if (val_acc > best_acc): with open('acc.txt', 'w') as f: f.write(str(epoch) + '\t' + str(val_acc) + '\n') torch.save(model.state_dict(), 'model.pth') best_acc = val_acc print('Model Saved!')
def main(): # bash hw6_train.sh <train_x file> <train_y file> <test_x file> <dict.txt.big file> # weight, train_x, train_y, val_x, val_y = readfile_from_csv(sys.argv[1], # sys.argv[3], # sys.argv[2], # '/content/drive/My Drive/ML2019/hw6/word2vec_noHMM.wv', # val_num=0.2) weight, train_x, train_y, val_x, val_y = readfile_from_csv( sys.argv[1], sys.argv[3], sys.argv[2], 'word2vec_noHMM.wv', val_num=0.2) train_set = TensorDataset(train_x, train_y) val_set = TensorDataset(val_x, val_y) num_epoch = 20 batch_size = 1024 train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=8) val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False, num_workers=8) model = my_RNN_Net(weight).cuda() model.init_weights() train_loss_log = [] train_acc_log = [] val_loss_log = [] val_acc_log = [] loss = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # optimizer = torch.optim.SGD(model.parameters(), lr=1) best_acc = 0.0 for epoch in range(num_epoch): # print(model.embedding.weight) epoch_start_time = time.time() train_acc = 0.0 train_loss = 0.0 val_acc = 0.0 val_loss = 0.0 print('Epoch [%03d/%03d]' % (epoch + 1, num_epoch)) model.train() batch_num = len(train_loader) # msg = '...........' print('...........') for i, data in enumerate(train_loader): # print (msg,end = '', flush=True) optimizer.zero_grad() #########GPU######### train_pred = model(data[0].cuda()) batch_loss = loss(train_pred, data[1].cuda()) batch_loss.backward() # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.25) optimizer.step() train_acc += np.sum( np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy()) train_loss += batch_loss.item() model.eval() val_batch_num = len(val_loader) for i, data in enumerate(val_loader): #########GPU######### val_pred = model(data[0].cuda()) batch_loss = loss(val_pred, data[1].cuda()) val_acc += np.sum( np.argmax(val_pred.cpu().data.numpy(), axis=1) == data[1].numpy()) val_loss += batch_loss.item() val_acc = val_acc / val_set.__len__() train_acc = train_acc / train_set.__len__() print( '[%0 3d/%03d] %2.2f sec(s) train_acc: %3.6f Loss: %3.6f | val_acc: %3.6f val_loss: %3.6f' % (batch_num, batch_num, time.time() - epoch_start_time, train_acc, train_loss / batch_num, val_acc, val_loss / val_batch_num)) print('') # log train_loss_log.append(train_loss / batch_num) train_acc_log.append(train_acc) val_loss_log.append(val_loss / val_batch_num) val_acc_log.append(val_acc) if (val_acc > best_acc): # with open('/content/drive/My Drive/ML2019/hw6/models/acc.txt', 'w') as f: with open('models/acc.txt', 'w') as f: f.write('-BEST MODEL -\nepoch: ' + str(epoch) + '/' + str(num_epoch) + '\t' + 'val_acc: ' + str(val_acc) + '\n') # torch.save( # model, '/content/drive/My Drive/ML2019/hw6/models/best_model.pth') torch.save(model, 'models/best_model.pth') best_acc = val_acc print('** Best Model Updated! ***\n') # torch.save(model, # '/content/drive/My Drive/ML2019/hw6/models/final_model.pth') torch.save(model, 'models/final_model.pth') # log train_loss_log = np.array(train_loss_log) train_acc_log = np.array(train_acc_log) val_loss_log = np.array(val_loss_log) val_acc_log = np.array(val_acc_log)