def cnn_seq2seq(): # Settings args = get_parser() utils.ensure_dir(args.results_dir) # Get CNN models information # Load json file and get model's information cnn_params = load_json(args.cnn_model_path, 'parameters.json') # Seq2Seq params seq2seq_params = load_json(args.cnnseq2seq_model_path, 'parameters.json') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Load pre-trained CNN and CNN-Seq2Seq cnn_model = load_model_cnn(cnn_params, seq2seq_params['cnn_model_path'], 'cnn_model.pth')
data_type=data_type, emotion_res=emotion_res, do_normalize_data=do_normalize_data) return att_train, label_train, att_test, label_test if __name__ == '__main__': from timeit import default_timer as timer start_time = timer() # Settings args = get_parser() utils.ensure_dir(args.results_dir) if args.mode is 'test': # Load json file and get model's information json_path = os.path.join(args.results_dir, args.saved_model_parameters) with open(json_path, 'r') as fp: print("Loading JSON file for testing: " + json_path) params = json.load(fp) args.target_bin = params['target_bin'] args.data_type = params['data_type'] args.emotion_res = params['emotion_res'] args.batch_size = params['batch_size'] args.data_filename = params['data_filename'] args.data_dir = params['data_dir'] args.num_classes = params['num_classes']
def train(model, dataloader_train, dataloader_label_train, args, device): # Loss and optimizer criterion = nn.L1Loss() # SmoothL1Loss, NLLLoss(), CrossEntropyLoss() optimizer, partial_name = set_optimizer(model, args) # New results dir based on model's parameters res_dir = args.results_dir + '{}_trainSize_{}_testSize_{}/'.format( partial_name, args.train_samples_size, args.test_samples_size) args.results_dir = res_dir utils.ensure_dir(res_dir) # print("res_dir: {}".format(res_dir)) log_file = open(res_dir + 'log.txt', 'w') scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode='min', factor=args.scheduler_factor, verbose=True) total_step = len(dataloader_train.keys()) loss_arr = [] epoch_arr = [] for epoch in range(args.num_epochs): for i, (im, la) in enumerate( zip(dataloader_train.keys(), dataloader_label_train.keys())): labels = dataloader_label_train[la] images = dataloader_train[im] # print("Shape images: {}, labels: {}".format(np.shape(images), np.shape(labels))) images = images.reshape(-1, np.shape(images)[-1], args.input_size).to(device) # bsx28x28 labels = labels.reshape(-1, np.shape(labels)[-1], args.output_size).to( device) # labels.to(device) # print("Shape after reshape images: {}, labels: {}".format(np.shape(images), np.shape(labels))) # Forward pass target = labels # images # 1-images outputs, _ = model(images, target) # (2, 96, 90), (2, 6000, 8) loss = criterion(outputs, target) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() if (i + 1) % 50 == 0: log_str = 'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format( epoch + 1, args.num_epochs, i + 1, total_step, loss.item()) print(log_str) log_file.write(log_str + '\n') scheduler.step(loss) loss_arr.append(loss.item()) epoch_arr.append(epoch + 1) if (epoch + 1) % args.save_step == 0: # Input images input_data = images.cpu().data.numpy()[0] input_reshaped = np.reshape( input_data, [np.shape(input_data)[1], np.shape(input_data)[0]]) # Target audio images_white_data = target.cpu().data.numpy()[0] im_reshaped = np.reshape(images_white_data, [ np.shape(images_white_data)[1], np.shape(images_white_data)[0] ]) im_reshaped = flatten_audio(im_reshaped, args) # Generated audio outputs_data = outputs.cpu().data.numpy()[0] out_reshaped = np.reshape( outputs_data, [np.shape(outputs_data)[1], np.shape(outputs_data)[0]]) out_reshaped = flatten_audio(out_reshaped, args) # Save audio, 16KHz from scipy.io.wavfile import write scaled = -1.0 + (1.0 - (-1.0)) * ( input_reshaped - np.min(input_reshaped)) / ( np.max(input_reshaped) - np.min(input_reshaped)) imsave('{}{}_input.jpg'.format(res_dir, epoch + 1), scaled) scaled = -1.0 + (1.0 - (-1.0)) * (im_reshaped - np.min(im_reshaped)) / ( np.max(im_reshaped) - np.min(im_reshaped)) scaled = np.int16(scaled / np.max(np.abs(scaled)) * 32767) write('{}{}_target.wav'.format(res_dir, epoch + 1), 16000, scaled[0]) scaled2 = np.int16(out_reshaped / np.max(np.abs(out_reshaped)) * 32767) write('{}{}_gen.wav'.format(res_dir, epoch + 1), 16000, scaled2[0]) # imsave('{}{}_target.jpg'.format(res_dir, epoch + 1), images_white_data[0]) # imsave('{}{}_gen.jpg'.format(res_dir, epoch + 1), outputs_data[0]) # Save the model checkpoint torch.save(model.state_dict(), res_dir + args.saved_model) # Plot loss_epochs.svg file import matplotlib.pyplot as plt plt.figure(figsize=[6, 6]) plt.plot(epoch_arr, loss_arr, '*-') plt.title('Training loss') plt.xlabel('Epochs') plt.ylabel('Loss') plt.grid('on') # plt.gca().set_position([0, 0, 1, 1]) plt.savefig("{}loss_epochs.svg".format(res_dir)) plt.cla() # Save args in json file so model can be fully loaded independently with open(os.path.join(res_dir, args.saved_model_parameters), 'w') as fp: json.dump(vars(args), fp, sort_keys=True, indent=4) log_file.close() return res_dir
def train(model, params, dataloader_label_train, dataloader_train, dataloader_audio_train, dataloader_label_test, dataloader_test, dataloader_audio_test, target_bin=True): cost_function_type = params['cost_function_type'] criterion = nn.MSELoss() criterion2 = nn.L1Loss() optimizer, partial_name = set_optimizer_parameters(model.parameters(), params) # optimizer, partial_name = set_optimizer_parameters(cnn_model.parameters() + seq2seq_model.parameters()) res_dir = params['results_dir'] + '{}_{}_{}_{}_trainSize_{}_testSize_{}_cost_{}/'.\ format(params['data_type'], params['target_bin'], params['emotion_res'], partial_name, params['train_samples_size'], params['test_samples_size'], params['cost_function_type']) params['results_dir'] = res_dir utils.ensure_dir(params['results_dir']) # Save args in json file so model can be fully loaded independently save_json_with_params(params) log_file = open(res_dir + params['log_filename'], 'w') scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode='min', factor=params['scheduler_factor'], verbose=True) # Train ep_min_loss = 0 # ep_max_acc = 0 first_iter = True loss_arr = [] epoch_arr = [] max_num_epochs = params['num_epochs'] step_loss_arr = [] step_arr = [] step = 1 for epoch in range(max_num_epochs): for d, l, a in zip(dataloader_train.keys(), dataloader_label_train.keys(), dataloader_audio_train.keys()): label = dataloader_label_train[l] data = dataloader_train[d] audio = dataloader_audio_train[a] img = data lab = label lab_audio = audio target = audio.reshape( -1, np.shape(audio)[-1], params['output_size']).to( params['gpu_num'][0]) # labels.to(device) with torch.no_grad(): img = Variable(img).cuda(params['gpu_num'][0]) lab = Variable(lab).cuda(params['gpu_num'][0]) # print(np.shape(img)) # ===================forward===================== #out, cnn_model, cnn_out, hidden_enc = model(img, [[SOS_token]]) # target out, cnn_model, cnn_out, hidden_enc = model( img, target) # np.array([[SOS_token]]) if not target_bin: cnn_out = cnn_out.squeeze() # Invert hidden dimensions (bs, 90, 96) -> (bs, 96, 90), where 90 is number of frames in 30 fps # images = hidden.reshape(-1, np.shape(hidden)[-1], args.input_size).cuda(args.gpu_num[0]) # bsx28x28 # output = seq2seq_model(images, target) # print(np.shape(out)) loss1 = criterion2(out, target) loss2 = criterion(cnn_out, lab) if cost_function_type == 'audio': loss = loss1 elif cost_function_type == 'emotion': loss = loss2 else: loss = loss1 + loss2 # ===================backward==================== optimizer.zero_grad() loss.backward() # scheduler.step(loss) # Test use of scheduler to change lr after it plateaus optimizer.step() step_loss_arr.append(loss.item()) step_arr.append(step) step += 1 scheduler.step(loss) loss_arr.append(loss.item()) epoch_arr.append(epoch + 1) # Save steps if epoch % params['save_step'] == 0: # ===================log======================== # Save model torch.save(model.state_dict(), res_dir + params['saved_model']) torch.save(cnn_model.state_dict(), res_dir + params['saved_model_cnn']) #last_test_label = {'0': lab.cpu()} #last_test_img = {'0': img.cpu()} #last_test_audio = {'0': lab_audio.cpu()} #acc, euc_dist_mean = test_with_model(model, last_test_label, last_test_img, last_test_audio, args, # target_bin=target_bin, epoch=epoch+1) acc, euc_dist_mean = test_with_model(model, dataloader_label_test, dataloader_test, dataloader_audio_test, params, target_bin=target_bin, epoch=epoch + 1) if first_iter: min_loss = loss.item() max_acc = acc torch.save(model.state_dict(), res_dir + params['saved_model_best']) torch.save(cnn_model.state_dict(), res_dir + params['saved_model_cnn_best']) first_iter = False if loss.item( ) < min_loss: # early stop (best loss), should be with validation set torch.save(model.state_dict(), res_dir + params['saved_model_best']) min_loss = loss.item() ep_min_loss = epoch + 1 # if acc > max_acc: torch.save(cnn_model.state_dict(), res_dir + params['saved_model_cnn_best']) # max_acc = acc # ep_max_acc = epoch + 1 log_str = 'Epoch [{:.2f}%][{}/{}], Total loss: {:.4f}, Seq2Seq loss: {:.4f}, CNN [loss: {}, acc: {}, euc: {}]'.\ format(100*(epoch + 1)/max_num_epochs, epoch + 1, max_num_epochs, loss.item(), loss1.item(), loss2.item(), acc, euc_dist_mean) print(log_str) log_file.write(log_str + '\n') # Save model torch.save(model.state_dict(), res_dir + params['saved_model']) torch.save(cnn_model.state_dict(), res_dir + params['saved_model_cnn']) # Save log file best_model_str = "\nModel: ep {}, min loss: {}, CNN acc: {:.2f}\n".format( ep_min_loss, min_loss, acc) log_file.write(best_model_str) log_file.close() # Save training loss plt.figure(figsize=[6, 6]) plt.plot(epoch_arr, loss_arr, '*-') plt.title('Training loss') plt.xlabel('Epochs') plt.ylabel('Loss') plt.grid('on') plt.savefig("{}training_loss.svg".format(res_dir)) plt.cla() # Save training step loss plt.figure(figsize=[6, 6]) plt.plot(step_arr, step_loss_arr, '*-') plt.title('Training step loss') plt.xlabel('Epochs') plt.ylabel('Loss') plt.grid('on') plt.savefig("{}training_step_loss.svg".format(res_dir)) plt.cla() # Get weights # print("\nModel keys: {}".format(model.state_dict().keys())) return lab, img, res_dir
def __init__(self, data_filename, cnn_pth, cnn_seq2seq_pth, results_dir=''): self.results_dir = results_dir ensure_dir(self.results_dir) self.data = self.load_data(data_filename) self.cnnseq2seq_model, self.cnnseq2seq_params = self.load_model(cnn_pth, cnn_seq2seq_pth)
def train(model, args, dataloader_label_train, dataloader_train, dataloader_label_test, dataloader_test, bs=1, target_bin=True, criterion_type='MSE'): if criterion_type == 'MSE': criterion = nn.MSELoss() # .cuda() else: criterion = nn.CrossEntropyLoss(size_average=True) # .cuda() partial_name = 'res_{}_{}_{}_{}_{}_ep_{}_bs_{}_lr_{}'.format(args.model_type, args.data_type, args.target_bin, args.emotion_res, criterion_type, str(args.num_epochs), str(args.batch_size), str(args.learning_rate)) params = vars(args) mfc_lr = float(params['LR']["FUZZ"]) wc_lr = params['LR']["CONV"] mfdc_lr = float(params['LR']["DEFUZZ"]) lr_fuzz = Variable(torch.tensor(mfc_lr).float(), requires_grad=True) lr_conv1 = Variable(torch.tensor(wc_lr['conv1']).float(), requires_grad=True) lr_conv2 = Variable(torch.tensor(wc_lr['conv2']).float(), requires_grad=True) lr_conv3 = Variable(torch.tensor(wc_lr['conv3']).float(), requires_grad=True) lr_defuzz = Variable(torch.tensor(mfdc_lr).float(), requires_grad=True) #optimizer, partial_name = set_optimizer_parameters([model.parameters(), {lr_fuzz, lr_conv1, lr_conv2, lr_conv3, lr_defuzz}], # params, partial_name=partial_name) optimizer, partial_name = set_optimizer_parameters(model.parameters(), params, partial_name=partial_name) optimizer_fuzz = torch.optim.Adam({lr_fuzz, lr_conv1, lr_conv2, lr_conv3, lr_defuzz}, lr=params['learning_rate'], weight_decay=params['weight_decay']) args.results_dir = args.results_dir + partial_name + '/' utils.ensure_dir(args.results_dir) # Save args in json file so model can be fully loaded independently save_json(args) log_file = open(args.results_dir + args.log_filename, 'w') log_file_best_train = open(args.results_dir + args.log_filename_best_train, 'w') log_file_best_test = open(args.results_dir + args.log_filename_best_test, 'w') scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=args.scheduler_factor, verbose=True) scheduler_fuzz = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer_fuzz, mode='min', factor=args.scheduler_factor, verbose=True) # Train loss_arr = [] epoch_arr = [] first_iter = True train_acc_arr = [] train_acc_ep_arr = [] test_acc_arr = [] test_acc_ep_arr = [] max_train_acc = 0 for epoch in range(args.num_epochs): for d, l in zip(dataloader_train.keys(), dataloader_label_train.keys()): label = dataloader_label_train[l] data = dataloader_train[d] img = data lab = label with torch.no_grad(): img = Variable(img, requires_grad=False).cuda(args.gpu_num[0]) lab = Variable(lab, requires_grad=False).cuda(args.gpu_num[0]) # print(np.shape(img)) # ===================forward===================== output, _ = model(img) if not target_bin: output = output.squeeze() # print(np.shape(output)) loss = criterion(output, lab) # ===================backward==================== optimizer.zero_grad() optimizer_fuzz.zero_grad() loss.backward() # scheduler.step(loss) # Test use of scheduler to change lr after it plateaus optimizer.step() optimizer_fuzz.step() # If fuzzy update fuzzy variables if args.model_type == 'fuzzy': # fuzzy_update(model, params) fuzzy_update(model, params, mfc_lr=lr_fuzz, wc_lr={'conv1': lr_conv1, 'conv2': lr_conv2, 'conv3': lr_conv3}, mfdc_lr=lr_defuzz) scheduler.step(loss) scheduler_fuzz.step(loss) loss_arr.append(loss.item()) epoch_arr.append(epoch + 1) # Save steps if epoch % args.save_step == 0: # ===================log======================== # Save model torch.save(model.state_dict(), args.results_dir + args.saved_model) # Save best model if criterion_type == 'MSE': train_acc, _ = test_with_model(model, dataloader_label_train, dataloader_train, target_bin=target_bin) test_acc, _ = test_with_model(model, dataloader_label_test, dataloader_test, target_bin=target_bin) else: train_acc = test_cnfn(model, dataloader_label_train, dataloader_train) test_acc = test_cnfn(model, dataloader_label_test, dataloader_test) if first_iter: max_train_acc = train_acc train_acc_arr.append(max_train_acc) max_test_acc = test_acc test_acc_arr.append(max_test_acc) max_train_acc_ep = epoch + 1 train_acc_ep_arr.append(max_train_acc_ep) max_test_acc_ep = epoch + 1 test_acc_ep_arr.append(max_test_acc_ep) torch.save(model.state_dict(), args.results_dir + args.saved_model_best_train) torch.save(model.state_dict(), args.results_dir + args.saved_model_best_test) first_iter = False log_str_best_train = 'Train: epoch [{}/{}], loss {:.4f}, acc {:.4f}'.\ format(epoch + 1, args.num_epochs, loss.item(), max_train_acc) log_str_best_test = 'Test: epoch [{}/{}], loss {:.4f}, acc {:.4f}'.\ format(epoch + 1, args.num_epochs, loss.item(), max_test_acc) else: if (max_test_acc < test_acc) and (test_acc <= train_acc): max_test_acc = test_acc test_acc_arr.append(max_test_acc) max_test_acc_ep = epoch + 1 test_acc_ep_arr.append(max_test_acc_ep) torch.save(model.state_dict(), args.results_dir + args.saved_model_best_test) log_str_best_test = 'Test: epoch [{}/{}], loss {:.4f}, acc {:.4f}'.\ format(epoch + 1, args.num_epochs, loss.item(), max_test_acc) if max_train_acc < train_acc: max_train_acc = train_acc train_acc_arr.append(max_train_acc) max_train_acc_ep = epoch + 1 train_acc_ep_arr.append(max_train_acc_ep) torch.save(model.state_dict(), args.results_dir + args.saved_model_best_train) log_str_best_train = 'Train: epoch [{}/{}], loss {:.4f}, acc {:.4f}'.\ format(epoch + 1, args.num_epochs, loss.item(), max_train_acc) log_str = 'Training {}%, epoch [{}/{}], loss: {:.4f}, train_acc {:.4f} at {} ep, test_acc: {:.4f} at {} ep,' \ 'lr: {}'. \ format(100 * (epoch + 1) / args.num_epochs, epoch + 1, args.num_epochs, loss.item(), max_train_acc, max_train_acc_ep, max_test_acc, max_test_acc_ep, args.learning_rate) if not args.model_type == 'vanilla': log_str += ', Fyzzy params: [fuzz {:.4f}, conv1 {:.4f}, conv2 {:.4f}, conv3 {:.4f}, defuzz {:.4f}, fc {:.4f}]'.\ format(lr_fuzz, lr_conv1, lr_conv2, lr_conv3, lr_defuzz) #log_str = 'Training {}%, epoch [{}/{}], loss: {:.4f}, train_acc {:.4f} at {} ep, test_acc: {:.4f} at {} ep'\ # .format(100 * (epoch + 1) / args.num_epochs, epoch + 1, args.num_epochs, loss.item(), max_train_acc, # max_train_acc_ep, max_test_acc, max_test_acc_ep) print(log_str) log_file.write(log_str + '\n') log_file_best_train.write(log_str_best_train + '\n') log_file_best_test.write(log_str_best_test + '\n') # print("Shapes - label: {}, data: {}".format(np.shape(lab), np.shape(img))) # Save model torch.save(model.state_dict(), args.results_dir + args.saved_model) # Save training loss plt.figure(figsize=[6, 6]) plt.plot(epoch_arr, loss_arr, '*-') plt.title('Training loss') plt.xlabel('Epochs') plt.ylabel('Loss') plt.grid('on') plt.savefig("{}training_loss.svg".format(args.results_dir)) plt.savefig("{}training_loss.png".format(args.results_dir)) plt.cla() plt.figure(figsize=[6, 6]) plt.plot(train_acc_ep_arr, train_acc_arr, '*-') plt.title('Model Train Performance') plt.xlabel('Epochs') plt.ylabel('Accuracy (%)') plt.grid('on') plt.savefig("{}train_acc.svg".format(args.results_dir)) plt.savefig("{}train_acc.png".format(args.results_dir)) plt.cla() plt.figure(figsize=[6, 6]) plt.plot(train_acc_ep_arr, train_acc_arr, '*-') plt.title('Model Test Performance') plt.xlabel('Epochs') plt.ylabel('Accuracy (%)') plt.grid('on') plt.savefig("{}test_acc.svg".format(args.results_dir)) plt.savefig("{}test_acc.png".format(args.results_dir)) plt.cla() # Get weights # print("\nModel keys: {}".format(model.state_dict().keys())) return lab, img, args.results_dir, max_train_acc