def test(self, epoch): self.net.eval() total_loss = 0 with torch.no_grad(): for x, y, _ in Bar(self.dataloader_val): x = x.float().to(self.device) y = y.long() z = self.net(x) loss = torch.mean(torch.sum((z - self.c[y])**2, dim=1)) total_loss += loss.item() loss = total_loss / len(self.dataloader_val) print('Testing ClasSVDD... Epoch: {}, Loss: {:.3}'.format(epoch, loss)) stop = self.es.count(loss, self.net, self.c, self.args) return loss, stop
def save_alignments_as_fertilities(model, dataloader, folder, durations_filename): """Save extracted alignments as durations Use the duration_Extraction model checkpoint to extract alignments and convert them into durations. For dataloader, use get_dataloader(64, 'cuda', start_idx=0, end_idx=13099, shuffle=False, sampler=SequentialSampler) """ with open(os.path.join(folder, durations_filename), 'w') as file: for i, batch in enumerate(Bar(dataloader)): spectrs, slen, phonemes, plen, text = batch # supervised generation to get more reliable alignments out, alignment = model.generate(phonemes, plen, window=1, spectrograms=spectrs) fert = get_fertilities(alignment.cpu(), plen, slen) for f in fert: file.write(', '.join(str(x) for x in f) + '\n')
def train_epoch(model, data_loader, loss_fn, optimizer, device, scheduler, n_examples): model = model.train() losses = [] correct_predictions = 0 for d in Bar(data_loader): input_ids = d["input_ids"].to(device) attention_mask = d["attention_mask"].to(device) targets = d["targets"].to(device) outputs = model(input_ids=input_ids, attention_mask=attention_mask) _, preds = torch.max(outputs, dim=1) loss = loss_fn(outputs, targets) correct_predictions += torch.sum(preds == targets) losses.append(loss.item()) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) optimizer.step() scheduler.step() optimizer.zero_grad() return correct_predictions.double() / n_examples, np.mean(losses)
def test_model(dataloader, model, device): #Initialize and accumalate ground truth, predictions, and image indices GT = np.array(0) Predictions = np.array(0) Index = np.array(0) running_corrects = 0 model.eval() # Iterate over data print('Testing Model...') with torch.no_grad(): for idx, (inputs, labels, index) in enumerate(Bar(dataloader)): inputs = inputs.to(device) labels = labels.to(device) index = index.to(device) # forward outputs = model(inputs) _, preds = torch.max(outputs, 1) #If test, accumulate labels for confusion matrix GT = np.concatenate((GT, labels.detach().cpu().numpy()), axis=None) Predictions = np.concatenate( (Predictions, preds.detach().cpu().numpy()), axis=None) Index = np.concatenate((Index, index.detach().cpu().numpy()), axis=None) running_corrects += torch.sum(preds == labels.data) test_acc = running_corrects.double() / (len(dataloader.sampler)) print('Test Accuracy: {:4f}'.format(test_acc)) test_dict = { 'GT': GT[1:], 'Predictions': Predictions[1:], 'Index': Index[1:], 'test_acc': np.round(test_acc.cpu().numpy() * 100, 2) } return test_dict
def train(self): """Training the ClasSVDD model""" if self.args.pretrain == True: self.load_pretrained_weights() else: self.net.apply(weights_init_normal) self.c = torch.randn(self.args.latent_dim).to(self.device) self.es = EarlyStopping(patience=self.args.patience) optimizer = optim.Adam(self.net.parameters(), lr=self.args.lr, weight_decay=self.args.weight_decay) scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=self.args.lr_milestones, gamma=0.1) self.loss = [] self.loss_t = [] for epoch in range(self.args.num_epochs): total_loss = 0 self.net.train() for x, y, _ in Bar(self.dataloader_train): x = x.float().to(self.device) y = y.long() optimizer.zero_grad() z = self.net(x) loss = torch.mean(torch.sum((z - self.c[y])**2, dim=1)) loss.backward() optimizer.step() total_loss += loss.item() scheduler.step() print('Training ClasSVDD... Epoch: {}, Loss: {:.3f}'.format( epoch, total_loss / len(self.dataloader_train))) self.loss.append(total_loss / len(self.dataloader_train)) loss_test, stop = self.test(epoch) self.loss_t.append(loss_test) if stop: break self.load_weights()
def test_VaDE(self, epoch): self.VaDE.eval() with torch.no_grad(): total_loss = 0 total_acc = 0 total_dkl = 0 total_rec = 0 for x, y, _ in Bar(self.dataloader_test): x, y = x.to(self.device), y.to(self.device).long() loss, reconst_loss, kl_div, acc = self.forward_step.forward('test', x, y, epoch) total_loss += loss.item() total_acc += acc.item() total_dkl += kl_div.item() total_rec += reconst_loss.item() self.acc_t.append(total_acc/len(self.dataloader_test)) self.dkl_t.append(total_dkl/len(self.dataloader_test)) self.rec_t.append(total_rec/len(self.dataloader_test)) print('Testing VaDE... Epoch: {}, Loss: {:.3f}, Acc: {:.3f}'.format(epoch, total_loss/len(self.dataloader_test), total_acc/len(self.dataloader_test))) stop = self.es.count(total_rec/len(self.dataloader_test), self.VaDE) return stop
def train_VaDE(self, epoch): self.VaDE.train() total_loss = 0 total_acc = 0 total_dkl = 0 total_rec = 0 for x, y, _ in Bar(self.dataloader_train): self.optimizer.zero_grad() x, y = x.to(self.device), y.to(self.device).long() loss, reconst_loss, kl_div, acc = self.forward_step.forward('train', x, y, epoch) loss.backward() self.optimizer.step() total_loss += loss.item() total_acc += acc total_dkl += kl_div.item() total_rec += reconst_loss.item() self.acc.append(total_acc/len(self.dataloader_train)) self.dkl.append(total_dkl/len(self.dataloader_train)) self.rec.append(total_rec/len(self.dataloader_train)) print('Training VaDE... Epoch: {}, Loss: {:.3f}, Acc: {:.3f}'.format(epoch, total_loss/len(self.dataloader_train), total_acc/len(self.dataloader_train)))
def train_epoch(model: nn.Module, train_gen: DataLoader, optimizer: optim.Optimizer, criterion: nn.Module, verbose: bool = False): model.train() for images, target in Bar(train_gen): if torch.cuda.is_available(): images = images.cuda() target = target.cuda() optimizer.zero_grad() output = model(images) loss = criterion(output, target) loss.backward() optimizer.step() if verbose: print("\t", "loss: ", loss.item())
def _train_epoch(self, dataloader): self.train() t_l1, t_att = 0, 0 for i, batch in enumerate(Bar(dataloader)): self.optimizer.zero_grad() spectrs, slen, phonemes, plen, text = batch s = add_random_noise(spectrs, hp.noise) s = degrade_some(self, s, phonemes, plen, hp.feed_ratio, repeat=hp.feed_repeat) s = frame_dropout(s, hp.replace_ratio) out, att_weights = self.forward(phonemes, s, plen) l1 = self.loss_l1(out, spectrs, slen) l_att = self.loss_att(att_weights, slen, plen) loss = l1 + l_att loss.backward() torch.nn.utils.clip_grad_norm_(self.parameters(), self.grad_clip) self.optimizer.step() self.step += 1 t_l1 += l1.item() t_att += l_att.item() self.logger.add_scalar('batch/total', loss.item(), self.step) # report average cost per batch i += 1 self.logger.add_scalar('train/l1', t_l1 / i, self.epoch) self.logger.add_scalar('train/guided_att', t_att / i, self.epoch) return t_l1 / i, t_att / i
def __compute_dev_loss(self, dataloader): ''' Compute a single loss value for the trainer to return ''' # Set the network to evaluation mode self.configured_network.eval() # Define the evaluation metrics temp_iou = 0 # Stop the accumulation of gradients with torch.no_grad(): for i, data in enumerate(Bar(dataloader)): # Extract the images and labels images, labels = data # Make sure the loss can be computed on the GPU images, labels = Variable(images).cuda(), Variable(labels).cuda() # Get the prediction outputs = self.configured_network(images) # Compute the the accuracy predictions = torch.argmax(outputs, 1) # Reshape, move to the CPU, and convert to numpy predictions = predictions.reshape(-1).cpu().numpy() labels = labels.reshape(-1).cpu().numpy() # Compute the different loss scores temp_iou += sk_metrics.jaccard_score(labels, predictions, average='macro') # Reset the network to training mode self.configured_network.train() return temp_iou / len(dataloader)
def eval_uncertainty(model, test_loader, args): model.eval() # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance) with torch.no_grad(): correct, total = 0, 0 uncertainties, pred, groundtruth = list(), list(), list() for i, (x, y) in enumerate(Bar(test_loader)): x, y = x.to(args.device), y.to(args.device, dtype=torch.long) outputs, uncertainty = model(x) pred.append(outputs) groundtruth.append(y) uncertainties.append(uncertainty) pred = torch.cat(pred).cpu().detach().numpy() predict_label = np.argmax(pred, axis=1) groundtruth = torch.cat(groundtruth).cpu().detach().numpy() uncertainties = torch.cat(uncertainties).cpu().detach().numpy().flatten() binary_predicted_true = convert_predict_and_true_to_binary(predicted=predict_label, true=groundtruth) accuracy = sum(binary_predicted_true) / len(pred) fpr, tpr, _ = roc_curve(binary_predicted_true, uncertainties) roc_auc_conf = auc(fpr, tpr) return accuracy, roc_auc_conf
def compute_stats(args, images, sample_size=10000): """Compute Mean and Std. Dev. of sample of dataset""" sample_images = sample(images, sample_size) sample_data = StatsDataset(sample_images, args.data_dir, args.resize_dim) sample_loader = DataLoader(sample_data, batch_size=32, shuffle=True, num_workers=8) mean = 0.0 std = 0.0 for idx, data in enumerate(Bar(sample_loader)): batch = data.get('image') batch_samples = batch.size(0) batch = batch.view(batch_samples, batch.size(1), -1) mean += batch.mean(2).sum(0) std += batch.std(2).sum(0) mean /= len(sample_loader.dataset) std /= len(sample_loader.dataset) return mean, std
def train_step(model, gpu, optimizer, dataloader, reg_margin): model.train(True) tot_loss = 0.0 error = 0.0 num_iter = 0. # Iterate over data. tr_apm = APMeter() for data in Bar(dataloader): optimizer.zero_grad() num_iter += 1 reg = max(0.4, 0.05 * np.exp(-1 * num_iter / 500.) + (reg_margin - 0.05)) #if num_iter<200: continue outputs, loss, probs, err = run_network(model, data, gpu, reg_margin=reg) #del outputs #print(err, loss) error += err.item() #data[0] tot_loss += loss.item() #data[0] loss.backward() optimizer.step() #print(probs.shape, data[2].shape) tr_apm.add( probs.view(-1, probs.shape[-1]).detach().cpu().numpy(), data[2].view(-1, data[2].shape[-1]).cpu().numpy()) epoch_loss = tot_loss / num_iter error = error / num_iter print('train-{} Loss: {:.4f} MAP: {:.4f}'.format( dataloader.root.split('/')[-1], epoch_loss, tr_apm.value().mean())) #error tr_apm.reset()
def train(self): """Training the Deep SVDD model""" net = network().to(self.device) if self.args.pretrain == True: state_dict = torch.load('weights/pretrained_parameters.pth') net.load_state_dict(state_dict['net_dict']) c = torch.Tensor(state_dict['center']).to(self.device) else: net.apply(weights_init_normal) c = torch.randn(self.args.latent_dim).to(self.device) optimizer = optim.Adam(net.parameters(), lr=self.args.lr, weight_decay=self.args.weight_decay) scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=self.args.lr_milestones, gamma=0.1) net.train() for epoch in range(self.args.num_epochs): total_loss = 0 for x, _ in Bar(self.train_loader): x = x.float().to(self.device) optimizer.zero_grad() z = net(x) loss = torch.mean(torch.sum((z - c)**2, dim=1)) loss.backward() optimizer.step() total_loss += loss.item() scheduler.step() print('Training Deep SVDD... Epoch: {}, Loss: {:.3f}'.format( epoch, total_loss / len(self.train_loader))) self.net = net self.c = c
def __call__(self, net, dloader, n_classes): hist = torch.zeros(n_classes, n_classes).cuda().detach() hist.requires_grad_(False) for i, (imgs, label) in enumerate(Bar(dloader)): imgs = imgs.cuda() label = label.squeeze(1).cuda() N, H, W = label.shape probs = torch.zeros((N, n_classes, H, W)).cuda() probs.requires_grad_(False) for sc in self.scales: probs += self.scale_crop_eval(net, imgs, sc, n_classes) torch.cuda.empty_cache() preds = torch.argmax(probs, dim=1) keep = label != self.ignore_label hist += torch.bincount(label[keep] * n_classes + preds[keep], minlength=n_classes**2).view( n_classes, n_classes) ious = hist.diag() / (hist.sum(dim=0) + hist.sum(dim=1) - hist.diag()) miou = ious.mean() return miou.item()
def predict(dataloader, model, device, ITL=False): #Initialize and accumalate ground truth and predictions GT = np.array(0) Predictions = np.array(0) running_corrects = 0 model = model.to(device) model = nn.Sequential(model, nn.Softmax(dim=1)) model.eval() # Iterate over data. with torch.no_grad(): #for idx, (inputs, labels,index) in Bar(enumerate(dataloader)): for idx, (inputs, labels) in enumerate(Bar(dataloader)): inputs = inputs.to(device) labels = labels.to(device) # forward outputs = model(inputs) _, preds = torch.max(outputs, 1) #If test, accumulate labels for confusion matrix if (ITL): _, labels = torch.max(labels, 1) GT = np.concatenate((GT, labels.detach().cpu().numpy()), axis=None) else: GT = np.concatenate((GT, labels.detach().cpu().numpy()), axis=None) Predictions = np.concatenate( (Predictions, preds.detach().cpu().numpy()), axis=None) running_corrects += torch.sum(preds == labels.data) test_acc = running_corrects.double() / len(dataloader.sampler) print('Test Accuracy: {:4f}'.format(test_acc)) return GT[1:], Predictions[1:]
def confidnet_score(model, test_loader): model.eval( ) # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance) with torch.no_grad(): correct, total = 0, 0 uncertainties, pred, groundtruth = list(), list(), list() for i, (x, y) in enumerate(Bar(test_loader)): x, y = x.to(device), y.to(device, dtype=torch.long) outputs, uncertainty = model(x) pred.append(outputs) groundtruth.append(y) uncertainties.append(uncertainty) pred = torch.cat(pred).cpu().detach().numpy() predict_label = np.argmax(pred, axis=1) groundtruth = torch.cat(groundtruth).cpu().detach().numpy() uncertainties = torch.cat( uncertainties).cpu().detach().numpy().flatten() binary_predicted_true = convert_predict_and_true_to_binary( predicted=predict_label, true=groundtruth) binary_predicted_true = [ True if b == 1 else False for b in binary_predicted_true ] return uncertainties, binary_predicted_true
def train_model(model, dataloaders, criterion, optimizer, device, saved_bins=None, saved_widths=None, histogram=True, num_epochs=25, scheduler=None, dim_reduced=True): since = time.time() test_acc_history = [] train_acc_history = [] train_error_history = [] test_error_history = [] best_model_wts = copy.deepcopy(model.state_dict()) best_acc = 0.0 for epoch in range(num_epochs): print('Epoch {}/{}'.format(epoch, num_epochs - 1)) print('-' * 10) # Each epoch has a training and testidation phase for phase in ['train', 'test']: if phase == 'train': model.train() # Set model to training mode else: model.eval() # Set model to evaluate mode running_loss = 0.0 running_corrects = 0 # Iterate over data. for idx, (inputs, labels, index) in enumerate(Bar(dataloaders[phase])): inputs = inputs.to(device) labels = labels.to(device) index = index.to(device) # zero the parameter gradients optimizer.zero_grad() # forward # track history if only in train with torch.set_grad_enabled(phase == 'train'): # Get model outputs and calculate loss outputs = model(inputs) loss = criterion(outputs, labels) _, preds = torch.max(outputs, 1) # backward + optimize only if in training phase if phase == 'train': loss.backward() optimizer.step() # statistics running_loss += loss.item() * inputs.size(0) running_corrects += torch.sum(preds == labels.data) epoch_loss = running_loss / (len(dataloaders[phase].sampler)) epoch_acc = running_corrects.double() / (len( dataloaders[phase].sampler)) if phase == 'train': if scheduler is not None: scheduler.step() train_error_history.append(epoch_loss) train_acc_history.append(epoch_acc) if (histogram): if dim_reduced: #save bins and widths saved_bins[epoch + 1, :] = model.histogram_layer[ -1].centers.detach().cpu().numpy() saved_widths[epoch + 1, :] = model.histogram_layer[ -1].widths.reshape(-1).detach().cpu().numpy() else: #save bins and widths saved_bins[ epoch + 1, :] = model.histogram_layer.centers.detach().cpu( ).numpy() saved_widths[ epoch + 1, :] = model.histogram_layer.widths.reshape( -1).detach().cpu().numpy() # deep copy the model if phase == 'test' and epoch_acc > best_acc: best_epoch = epoch best_acc = epoch_acc best_model_wts = copy.deepcopy(model.state_dict()) if phase == 'test': test_error_history.append(epoch_loss) test_acc_history.append(epoch_acc) print() print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc)) print() time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) print('Best test Acc: {:4f}'.format(best_acc)) print() # load best model weights model.load_state_dict(best_model_wts) #Returning error (unhashable), need to fix train_dict = { 'best_model_wts': best_model_wts, 'test_acc_track': test_acc_history, 'test_error_track': test_error_history, 'train_acc_track': train_acc_history, 'train_error_track': train_error_history, 'best_epoch': best_epoch, 'saved_bins': saved_bins, 'saved_widths': saved_widths } return train_dict
def train_dnn(train_data_file='/workspace/app/triplet_v2/sd/train_data.npz', val_folder='/workspace/app/triplet_v2/val_data', col_name_file='/workspace/app/triplet_v2/sd/col_names.npz', dnn_file='/workspace/app/triplet_v2/sd/dnn_model.pth', c_dnn_file='/workspace/app/triplet_v2/sd/c_dnn_model.pth', out_file='/workspace/app/triplet_v2/results_cm.txt', num_epochs=250, lr=0.00001): print("The training DNN has been started...") # load list of validation files list_val_files = glob.glob(val_folder+'/*.csv') s_data = np.load(train_data_file, allow_pickle = True) # split into X and y X=s_data['X'] y=s_data['y'] # convert to tensor X_train=torch.tensor(X).float() y_train=torch.tensor(y).long().unsqueeze(1) # count unique values #unique_y_train, counts_y_train = np.unique(y_train, return_counts=True) # create train data set train_data = data_utils.TensorDataset(X_train, y_train) train_loader = data_utils.DataLoader(train_data, batch_size=16,shuffle=True)#, sampler=sm) # set model model =dnn.DNN_ClassifierM(X.shape[1]) # decision of training process if os.path.exists(c_dnn_file): try: model.load_state_dict(torch.load(c_dnn_file)) print("The pre-trained network has been loaded!") except Exception: os.remove(c_dnn_file) print("New network has been started to train (maybe something was wrong with source data set)!") # set optimizer optimizer = torch.optim.Adamax(model.parameters(), lr=lr) # set loss function #class_weights = torch.FloatTensor(np.sum(counts_y_train)/counts_y_train) #class_weights = torch.FloatTensor(counts_y_train) #class_weights = class_weights / torch.sum(class_weights) class_weights = dnn.get_weights(y) print("The class weights:",class_weights) criterion = torch.nn.NLLLoss(weight=class_weights) # set weights #criterion = dnn.WBCELoss # set the train mode model.train() min_prec=0 val_ac=0 if os.path.exists(dnn_file): val_res = proc_validation(list_val_files,col_name_file,dnn_file, out_file, verb=False) min_prec = val_res[0] val_ac = val_res[1] print("The best (current) minimal validation precision:",min_prec,"%") best_epoch=1 for i in range(num_epochs): #model.train() print('Epoch: {}'.format(i+1)) # set the list of true values and predictions all_y=np.array([]) all_pr=np.array([]) for k, (sX_train, sy_train) in enumerate(Bar(train_loader)): # Set gradients to zero optimizer.zero_grad() # Forward pass spr_train = model(sX_train) # get log prob # Compute Loss train_loss = criterion(spr_train,sy_train.view(-1)) # loss function #print(sy_train.view(-1)) # Backward pass train_loss.backward() # estimate gradient for training loss # Update gradients optimizer.step() # parameters update # get classes spr_train = spr_train.exp().detach() _, spr_train = torch.max(spr_train,1) # Add to list true values and predictions all_y=np.concatenate([all_y,sy_train.detach().view(-1).numpy()]) all_pr=np.concatenate([all_pr,spr_train.numpy()]) #print(sy_train.detach().view(-1).numpy().shape) # Print base results # Create confusion matrix for training results cm_train=confusion_matrix(all_y, all_pr) pcm_train=np.round(100*cm_train/(1e-3+np.sum(cm_train,axis=0)),2) #print(pcm_train) #print(np.diagonal(pcm_train)) print("The minimal training precision for one class",np.min(np.diagonal(pcm_train)),"%") print("The training accuracy",np.round(accuracy_score(all_y, all_pr)*100,2),"%") # Save current model torch.save(obj=model.state_dict(),f=c_dnn_file) # Validation if len(list_val_files)>0: val_res = proc_validation(list_val_files,col_name_file,c_dnn_file, out_file, verb=False) c_min_prec = val_res[0] c_ac_val = val_res[1] if c_min_prec>=min_prec: min_prec=c_min_prec val_ac=c_ac_val torch.save(obj=model.state_dict(),f=dnn_file) best_epoch=i+1 print("The best minimal validation precision for one class:",min_prec,"%; Epoch:",best_epoch) print("The best validation accuracy:",val_ac,"%") val_res = proc_validation(list_val_files,col_name_file,dnn_file, out_file, verb=False) min_prec = val_res[0] val_ac = val_res[1] print("The best minimal validation precision for one class:",min_prec,"%; Epoch:",best_epoch) print("The best validation accuracy:",val_ac,"%") print("The work has been finished! You can run again this program for continue training (press F5)") return True
help="Path to dataset") args = parser.parse_args() collate = Collate('cuda' if torch.cuda.is_available() else 'cpu', standardize=False) dl = DataLoader(AudioDataset(HPText.dataset, alignments=True, end_idx=args.num_samples), collate_fn=collate, batch_size=args.batch_size, shuffle=False) maxi = float('-inf') mini = float('inf') mean = 0 std = 0 w = 0 for i, b in enumerate(Bar(dl), 1): s, slen, _, plen, _, _ = b m = mask(s, slen, dim=1) maxi = max(maxi, masked_max(s, m)) mini = min(mini, masked_min(s, m)) mean = mean + (masked_mean(s, m) - mean) / i std = std + (masked_std(s, m) - std) / i ww = sum([sl / pl for sl, pl in zip(slen, plen)]) / len(slen) w = w + (ww - w) / i print('min: ', mini.item(), '\nmax: ', maxi.item(), '\nmean: ', mean.item(), '\nstd: ', std.item(), '\nw: ', w)
def train_step(x, t): model.train() preds = model(x) loss = criterion(preds, t) optimizer.zero_grad() loss.backward() optimizer.step() return loss, preds epochs = 30 for epoch in range(epochs): print('Epoch: {}'.format(epoch + 1)) train_loss = 0. train_acc = 0. for idx, (x, t) in enumerate(Bar(train_dataloader)): x, t = x.to(device), t.to(device) loss, preds = train_step(x, t) train_loss += loss.item() train_acc += \ accuracy_score(t.tolist(), preds.argmax(dim=-1).tolist()) train_loss /= len(train_dataloader) train_acc /= len(train_dataloader) print('loss: {:.3}, acc: {:.3f}'.format(train_loss, train_acc))
def main(): print('Start') parser = argparse.ArgumentParser() # Add the arguments to the parser parser.add_argument("--model_name", required= True) parser.add_argument("--checkpoint_input_path", required= False) parser.add_argument("--checkpoint_output_path", required= True) parser.add_argument("--bioasq_path", required= True) parser.add_argument("--seed", default = 1995) parser.add_argument("--learning_rate", default = 5e-5, type = float) parser.add_argument("--batch_size", default = 16, type = int) parser.add_argument("--epochs", default = 3, type=int) args = vars(parser.parse_args()) random.seed(args['seed']) device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') with open(args['bioasq_path'], 'rb') as f: bio_factoid_raw = json.load(f)['questions'] bio_factoid_raw = [question for question in bio_factoid_raw if question['type'] == 'factoid'] bio_factoid_questions = [question['body'] for question in bio_factoid_raw] bio_factoid_ids = [question['id'] for question in bio_factoid_raw] bio_factoid_answers = [question['exact_answer'][0] for question in bio_factoid_raw] bio_snippets = {question['id'] : [snippet['text'] for snippet in question['snippets']] for question in bio_factoid_raw} ids = [] snippets = [] for key, value in bio_snippets.items(): for snippet in value: ids.append(key) snippets.append(snippet) snippets_df = pd.DataFrame({'id': ids,'snippet': snippets}) questions_df = pd.DataFrame({'id': bio_factoid_ids, 'question': bio_factoid_questions, 'label': bio_factoid_answers}) val_df = pd.merge(snippets_df,questions_df, how = 'left', on = 'id') #val_df = val_df.sample(16) def get_start_answer(row): label = row['label'].lower() context = row['snippet'].lower() if label in context: return context.index(label) return None val_df['answer_start'] = val_df.apply(get_start_answer, axis = 1) clean_df = val_df[~val_df.answer_start.isnull()] bio_factoid_questions = list(clean_df.question) bio_factoid_contexts = list(clean_df.snippet) bio_factoid_answers = [{'text': row['label'], 'answer_start': int(row['answer_start'])} for index, row in clean_df.iterrows()] from transformers import BertTokenizer, BertTokenizerFast tokenizer_fast = BertTokenizerFast.from_pretrained(args['model_name'], do_lower_case=True, padding = True, truncation=True, add_special_tokens = True, model_max_length = 1000000000) from squad_processing import add_end_idx, add_token_positions add_end_idx(bio_factoid_answers,bio_factoid_contexts) bio_factoid_encodings = tokenizer_fast(bio_factoid_contexts, bio_factoid_questions, add_special_tokens=True, truncation=True, padding=True, max_length=500) # Processing of token positions add_token_positions(bio_factoid_encodings, bio_factoid_answers,tokenizer_fast) from torch.utils.data import Dataset class SquadDataset(Dataset): def __init__(self, encodings): self.encodings = encodings def __getitem__(self, idx): #print(self.encodings['start_positions'][idx]) #{key: torch.tensor(val[idx], dtype = torch.long) for key, val in self.encodings.items()} return {'input_ids':torch.tensor(self.encodings['input_ids'][idx],dtype = torch.long), 'attention_mask':torch.tensor(self.encodings['attention_mask'][idx],dtype = torch.long), 'start_positions':torch.tensor(self.encodings['start_positions'][idx],dtype = torch.long), 'end_positions':torch.tensor(self.encodings['end_positions'][idx],dtype = torch.long)} def __len__(self): return len(self.encodings.input_ids) train_bio_factoid = SquadDataset(bio_factoid_encodings) from transformers import BertPreTrainedModel, BertModel from torch import nn from torch.nn import CrossEntropyLoss from torch.nn import DataParallel from torch.utils.data import DataLoader from transformers import AdamW from transformers.modeling_outputs import QuestionAnsweringModelOutput class BertForQuestionAnswering(BertPreTrainedModel): _keys_to_ignore_on_load_unexpected = [r"pooler"] def __init__(self, config): super().__init__(config) self.num_labels = config.num_labels self.bert = BertModel(config, add_pooling_layer=False) self.qa_outputs = nn.Linear(config.hidden_size, config.num_labels) self.init_weights() def forward( self, input_ids=None, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None, inputs_embeds=None, start_positions=None, end_positions=None, output_attentions=None, output_hidden_states=None, return_dict=None, ): r""" start_positions (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`): Labels for position (index) of the start of the labelled span for computing the token classification loss. Positions are clamped to the length of the sequence (:obj:`sequence_length`). Position outside of the sequence are not taken into account for computing the loss. end_positions (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`): Labels for position (index) of the end of the labelled span for computing the token classification loss. Positions are clamped to the length of the sequence (:obj:`sequence_length`). Position outside of the sequence are not taken into account for computing the loss. """ return_dict = return_dict if return_dict is not None else self.config.use_return_dict outputs = self.bert( input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids, position_ids=position_ids, head_mask=head_mask, inputs_embeds=inputs_embeds, output_attentions=output_attentions, output_hidden_states=output_hidden_states, return_dict=return_dict, ) sequence_output = outputs[0] logits = self.qa_outputs(sequence_output) start_logits, end_logits = logits.split(1, dim=-1) start_logits = start_logits.squeeze(-1) end_logits = end_logits.squeeze(-1) total_loss = None if start_positions is not None and end_positions is not None: # If we are on multi-GPU, split add a dimension if len(start_positions.size()) > 1: start_positions = start_positions.squeeze(-1) if len(end_positions.size()) > 1: end_positions = end_positions.squeeze(-1) # sometimes the start/end positions are outside our model inputs, we ignore these terms ignored_index = start_logits.size(1) start_positions.clamp_(0, ignored_index) end_positions.clamp_(0, ignored_index) loss_fct = CrossEntropyLoss(ignore_index=ignored_index) start_loss = loss_fct(start_logits, start_positions) end_loss = loss_fct(end_logits, end_positions) total_loss = (start_loss + end_loss) / 2 if not return_dict: output = (start_logits, end_logits) + outputs[2:] return ((total_loss,) + output) if total_loss is not None else output return QuestionAnsweringModelOutput( loss=total_loss, start_logits=start_logits, end_logits=end_logits, hidden_states=outputs.hidden_states, attentions=outputs.attentions, ) train_loader_factoid = DataLoader(train_bio_factoid, batch_size=args['batch_size'], shuffle=True) squad_model = BertForQuestionAnswering.from_pretrained(args['model_name']) checkpoint = torch.load(args['checkpoint_input_path'], map_location = device) squad_model.load_state_dict({key.replace('module.',''): value for key,value in checkpoint.items()}) squad_model = DataParallel(squad_model) squad_model.to(device) squad_model.train() optim = AdamW(squad_model.parameters(), lr=args['learning_rate']) from barbar import Bar for epoch in range(args['epochs']): for i,batch in enumerate(Bar(train_loader_factoid)): optim.zero_grad() input_ids = batch['input_ids'].to(device, dtype = torch.long) attention_mask = batch['attention_mask'].to(device, dtype = torch.long) start_positions = batch['start_positions'].to(device, dtype = torch.long) end_positions = batch['end_positions'].to(device, dtype = torch.long) outputs = squad_model(input_ids, attention_mask=attention_mask, start_positions=start_positions, end_positions=end_positions) loss = outputs[0].to(device) loss.sum().backward() optim.step() squad_model.eval() print('Saving...') torch.save({ 'epoch': args['epochs'], 'model_state_dict': squad_model.state_dict(), 'optimizer_state_dict': optim.state_dict(), 'loss': loss, },#'checkpoint_mnli_squad_factoid_3epochs_seed_2.pt') args['checkpoint_output_path'] + '/checkpoint_factoid.pt')
def train(): logger = logging.getLogger() is_dist = False ## dataset dl = get_data_loader( cfg.im_root, cfg.train_im_anns, cfg.ims_per_gpu, cfg.scales, cfg.cropsize, cfg.max_iter, mode='train', distributed=is_dist) valid = get_data_loader( cfg.im_root, cfg.val_im_anns, cfg.ims_per_gpu, cfg.scales, cfg.cropsize, cfg.max_iter, mode='val', distributed=is_dist ) ## model net, criteria_pre, criteria_aux = set_model() print(net) print(f'n_parameters: {sum(p.numel() for p in net.parameters())}') ## optimizer optim = set_optimizer(net) ## fp16 if has_apex: opt_level = 'O1' if cfg.use_fp16 else 'O0' net, optim = amp.initialize(net, optim, opt_level=opt_level) ## meters time_meter, loss_meter, loss_pre_meter, loss_aux_meters = set_meters() ## lr scheduler lr_schdr = WarmupPolyLrScheduler(optim, power=0.9, max_iter=cfg.max_iter, warmup_iter=cfg.warmup_iters, warmup_ratio=0.1, warmup='exp', last_epoch=-1,) best_validation = np.inf for i in range(cfg.n_epochs): ## train loop for it, (im, lb) in enumerate(Bar(dl)): net.train() im = im.cuda() lb = lb.cuda() lb = torch.squeeze(lb, 1) optim.zero_grad() logits, *logits_aux = net(im) loss_pre = criteria_pre(logits, lb) loss_aux = [crit(lgt, lb) for crit, lgt in zip(criteria_aux, logits_aux)] loss = loss_pre + sum(loss_aux) if has_apex: with amp.scale_loss(loss, optim) as scaled_loss: scaled_loss.backward() else: loss.backward() optim.step() torch.cuda.synchronize() lr_schdr.step() time_meter.update() loss_meter.update(loss.item()) loss_pre_meter.update(loss_pre.item()) _ = [mter.update(lss.item()) for mter, lss in zip(loss_aux_meters, loss_aux)] del im del lb ## print training log message lr = lr_schdr.get_lr() lr = sum(lr) / len(lr) print_log_msg( i, cfg.max_iter, lr, time_meter, loss_meter, loss_pre_meter, loss_aux_meters) ##validation loop validation_loss = [] for it, (im, lb) in enumerate(Bar(valid)): net.eval() im = im.cuda() lb = lb.cuda() lb = torch.squeeze(lb, 1) with torch.no_grad(): logits, *logits_aux = net(im) loss_pre = criteria_pre(logits, lb) loss_aux = [crit(lgt, lb) for crit, lgt in zip(criteria_aux, logits_aux)] loss = loss_pre + sum(loss_aux) validation_loss.append(loss.item()) del im del lb ## print training log messag validation_loss = sum(validation_loss)/len(validation_loss) print(f'Validation loss: {validation_loss}') if best_validation > validation_loss: print('new best performance, storing model') best_validation = validation_loss state = net.state_dict() torch.save(state, osp.join(cfg.respth, 'best_validation.pth')) ## dump the final model and evaluate the result save_pth = osp.join(cfg.respth, 'model_final.pth') logger.info('\nsave models to {}'.format(save_pth)) state = net.state_dict() torch.save(state, save_pth) logger.info('\nevaluating the final model') torch.cuda.empty_cache() heads, mious = eval_model(net, 2, cfg.im_root, cfg.test_im_anns) logger.info(tabulate([mious, ], headers=heads, tablefmt='orgtbl')) return
def train_model(model, criterion, optimizer, #scheduler, num_epochs): since = time.time() best_model_wts = copy.deepcopy(model.state_dict()) best_loss = np.inf for epoch in range(num_epochs): print('Epoch {}/{}'.format(epoch, num_epochs)) print('-' * 10) # Each epoch has a training and validation phase for phase in ['train', 'val']: if phase == 'train': model.train() # Set model to training mode else: model.eval() # Set model to evaluate mode running_loss = 0.0 # Iterate over data. for idx,inputs in enumerate(Bar(dataloaders[phase])): inputs = inputs.to(device) # zero the parameter gradients optimizer.zero_grad() # forward # track history if only in train with torch.set_grad_enabled(phase == 'train'): outputs = model(inputs) loss = criterion(outputs, inputs) # backward + optimize only if in training phase if phase == 'train': loss.backward() optimizer.step() # statistics running_loss += loss.item() * inputs.size(0) epoch_loss = running_loss / dataset_sizes[phase] print('{} Loss: {:.4f}'.format( phase, epoch_loss)) # deep copy the model if phase == 'val' and epoch_loss < best_loss: best_loss = epoch_loss best_model_wts = copy.deepcopy(model.state_dict()) save_checkpoint(state={ 'epoch': epoch, 'state_dict': model.state_dict(), 'best_loss': best_loss, 'optimizer_state_dict':optimizer.state_dict() },filename='ckpt_epoch_{}.pt'.format(epoch)) print() time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) print('Best val Loss: {:4f}'.format(best_loss)) # load best model weights model.load_state_dict(best_model_wts) return model, optimizer, epoch_loss
def __eval_net(self, dataloader, dataset_type, epoch): ''' Calculate evaluation metrics using Pytorch Lightning classification metrics. ''' # Set the network to evaluation mode self.configured_network.eval() # Define the evaluation metrics temp_iou = 0 temp_accuracy = 0 temp_f1_score = 0 temp_precision = 0 temp_recall = 0 # Stop the accumulation of gradients with torch.no_grad(): # Iterate through the data in the dataloader for i, data in enumerate(Bar(dataloader)): # Extract the images and labels images, labels = data # Make sure the loss can be computed on the GPU images, labels = Variable(images).cuda(), Variable(labels).cuda() # Get the prediction outputs = self.configured_network(images) # Compute the the accuracy predictions = torch.argmax(outputs, 1) # Reshape, move to the CPU, and convert to numpy predictions = predictions.reshape(-1).cpu().numpy() labels = labels.reshape(-1).cpu().numpy() # Compute the different loss scores temp_iou += sk_metrics.jaccard_score(labels, predictions, average='macro') temp_accuracy += sk_metrics.accuracy_score(labels, predictions) temp_f1_score += sk_metrics.f1_score(labels, predictions, average='macro') temp_precision += sk_metrics.precision_score(labels, predictions, average='macro') temp_recall += sk_metrics.recall_score(labels, predictions, average='macro') # Compute the average of the dataset across the dataset temp_iou = temp_iou / len(dataloader) temp_accuracy = temp_accuracy / len(dataloader) temp_f1_score = temp_f1_score / len(dataloader) temp_precision = temp_precision / len(dataloader) temp_recall = temp_recall / len(dataloader) # Reset the network to training mode self.configured_network.train() # Display the training information if self.print_error and not self.silent: print('IoU', temp_iou) print('Accuracy', temp_accuracy) print('F1 Score', temp_f1_score) print('Precision', temp_precision) print('Recall', temp_recall) # Log the information metrics_list = [epoch+1, temp_accuracy, temp_iou, temp_f1_score, temp_precision, temp_recall] if dataset_type == 'train': self.training_lists.append(metrics_list) elif dataset_type == 'dev': self.dev_lists.append(metrics_list) return None
def train(folds=None, fold_idx="0", model=None, dir_name=None): if opt.checkpoint: custom_load(model, opt.checkpoint, device) if opt.VM_checkpoint: custom_load(vm, opt.VM_checkpoint, device) optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr) scheduler = get_scheduler(optimizer, opt) earlyStopper_recon = EarlyStopping(patience=6, verbose=True, delta=0.01) log_dir = os.path.join(opt.logging_dir, 'logs', dir_name, "fold_" + fold_idx) run_dir = os.path.join(opt.logging_dir, 'runs', dir_name, "fold_" + fold_idx) cond_mkdir(log_dir) cond_mkdir(run_dir) # Save all command line arguments into a txt file in the logging directory for later reference. with open(os.path.join(log_dir, "params.txt"), "w") as out_file: out_file.write('\n'.join( ["%s: %s" % (key, value) for key, value in vars(opt).items()])) writer = SummaryWriter(run_dir) train_set = Dataset_4D_multitime(opt.data_dir, sequence_list=folds[0], nb_inputs=opt.nb_inputs, horizon=opt.horizon) valid_set = Dataset_4D_multitime(opt.data_dir, sequence_list=folds[1], nb_inputs=opt.nb_inputs, horizon=opt.horizon, valid=True) train_loader = DataLoader(train_set, batch_size=opt.batch_size, shuffle=True, num_workers=4) valid_loader = DataLoader(valid_set, batch_size=1, shuffle=False, num_workers=4) iter = 0 best_val_loss = np.inf print('Begin training...') for epoch in range(opt.max_epoch): model.train() print('Epoch: {}'.format(epoch)) for ref_volume, input_volume_list, current_volume_list in Bar( train_loader): optimizer.zero_grad() ref_volume = ref_volume.unsqueeze(1).to(device) dvf = [] for vol in range(len(current_volume_list)): current_volume_list[vol] = current_volume_list[vol].unsqueeze( 1).to(device) # For > 1 future volume dvf.append(vm(ref_volume, current_volume_list[vol])) if opt.condi_type == "1": c1 = list() c1_ref = ref_volume[:, :, opt.sag_index, :, :] for q in range(opt.nb_inputs): c1temp = input_volume_list[q].unsqueeze( 1)[:, :, opt.sag_index, :, :] c1.append( torch.cat([c1temp.to(device), c1_ref.to(device)], dim=1)) c1 = torch.stack(c1, dim=2).to(device) # sagittal c2 = None # coronal else: c2 = list() c2_ref = ref_volume[:, :, :, opt.cor_index, :] for q in range(opt.nb_inputs): c2temp = input_volume_list[q].unsqueeze( 1)[:, :, :, opt.cor_index, :] c2.append( torch.cat([c2temp.to(device), c2_ref.to(device)], dim=1)) c1 = None # sagittal c2 = torch.stack(c2, dim=2).to(device) # coronal vmorph_current_volume, kl_loss, generated_dvf, generated_current_volume = model( ref_volume, current_volume_list, c1, c2, dvf=dvf) kl_loss = opt.KL_weight * kl_loss vmorph_recon_loss, recon_loss = 0, 0 for tt in range(opt.horizon): vmorph_recon_loss += criterion(vmorph_current_volume[tt], current_volume_list[tt]) recon_loss += criterion(generated_current_volume[tt], current_volume_list[tt]) loss = recon_loss + kl_loss loss.backward() optimizer.step() writer.add_scalar("vmorph_recon_loss", vmorph_recon_loss / opt.horizon, iter) writer.add_scalar("recon_loss", recon_loss / opt.horizon, iter) writer.add_scalar("kl_loss", kl_loss / opt.horizon, iter) writer.add_scalar("total_loss", loss, iter) iter += 1 # Validate model optimizer.zero_grad() with torch.no_grad(): model.eval() val_loss = 0 for idx, [ref_volume, input_volume_list, current_volume_list] in enumerate(Bar(valid_loader)): ref_volume = ref_volume.unsqueeze(1).to(device) dvf = [] for vol in range(len(current_volume_list)): current_volume_list[vol] = current_volume_list[ vol].unsqueeze(1).to(device) # For > 1 future volume dvf.append(vm(ref_volume, current_volume_list[vol])) if opt.condi_type == "1": c1 = list() c1_ref = ref_volume[:, :, opt.sag_index, :, :] for q in range(opt.nb_inputs): c1temp = input_volume_list[q].unsqueeze( 1)[:, :, opt.sag_index, :, :] c1.append( torch.cat([c1temp.to(device), c1_ref.to(device)], dim=1)) c1 = torch.stack(c1, dim=2).to(device) # sagittal c2 = None # coronal else: c2 = list() c2_ref = ref_volume[:, :, :, opt.cor_index, :] for q in range(opt.nb_inputs): c2temp = input_volume_list[q].unsqueeze( 1)[:, :, :, opt.cor_index, :] c2.append( torch.cat([c2temp.to(device), c2_ref.to(device)], dim=1)) c1 = None # sagittal c2 = torch.stack(c2, dim=2).to(device) # coronal latent = var_or_cuda(torch.as_tensor(np.random.randn( opt.latent_size), dtype=torch.float), device=device) latent = var_or_cuda(latent, device=device).unsqueeze(dim=0) generated_dvf, generated_current_volume = model( ref_volume, None, c1, c2, None, prior_post_latent=latent) avg_rec_loss = 0 for tt in range(opt.horizon): avg_rec_loss += criterion(generated_current_volume[tt], current_volume_list[tt]).item() val_loss += avg_rec_loss / opt.horizon val_loss /= (len(valid_set)) if val_loss < best_val_loss: print("val_loss improved from %0.4f to %0.4f \n" % (best_val_loss, val_loss)) best_val_loss = val_loss custom_save(model, os.path.join(log_dir, 'model_best.pth')) else: print("val_loss did not improve from %0.4f \n" % (best_val_loss)) writer.add_scalar("val_loss", val_loss, iter) scheduler.step(val_loss) earlyStopper_recon(val_loss) if earlyStopper_recon.early_stop: print("Early stopping") break
def test(fold=None, fold_idx="0", dir_name=None): with torch.no_grad(): custom_load( model, os.path.join(opt.checkpoint, "fold_" + fold_idx, "model_best.pth"), device) custom_load(vm, opt.VM_checkpoint, device) model.eval() test_set = Dataset_4D_multitime(opt.data_dir, sequence_list=fold, nb_inputs=opt.nb_inputs, horizon=opt.horizon, test=True) test_loader = DataLoader(test_set, batch_size=1, shuffle=False, num_workers=4) vol_dir = os.path.join(opt.logging_dir, "test", dir_name, "fold_" + fold_idx, "volumes") cond_mkdir(vol_dir) MSE_loss, NCC_loss, SSIM_loss = [], [], [] mse = nn.MSELoss(reduction='mean').to(device) for idx, [ref_volume, input_volume_list, current_volume_list] in enumerate(Bar(test_loader)): ref_volume = ref_volume.unsqueeze(1).to(device) vmorph_volume, dvf = [], [] for vol in range(len(current_volume_list)): current_volume_list[vol] = current_volume_list[vol].unsqueeze( 1).to(device) dvf_vm = vm(ref_volume, current_volume_list[vol]) dvf.append(dvf_vm) vmorph_volume.append(stn(ref_volume, dvf_vm)) if opt.condi_type == "1": c1 = list() c1_ref = ref_volume[:, :, opt.sag_index, :, :] for q in range(opt.nb_inputs): c1temp = input_volume_list[q].unsqueeze( 1)[:, :, opt.sag_index, :, :] c1.append( torch.cat([c1temp.to(device), c1_ref.to(device)], dim=1)) c1 = torch.stack(c1, dim=2).to(device) # sagittal c2 = None # coronal else: c2 = list() c2_ref = ref_volume[:, :, :, opt.cor_index, :] for q in range(opt.nb_inputs): c2temp = input_volume_list[q].unsqueeze( 1)[:, :, :, opt.cor_index, :] c2.append( torch.cat([c2temp.to(device), c2_ref.to(device)], dim=1)) c1 = None # sagittal c2 = torch.stack(c2, dim=2).to(device) # coronal # Inference latent = var_or_cuda(torch.as_tensor(np.random.randn( opt.latent_size), dtype=torch.float), device=device) latent = var_or_cuda(latent, device=device).unsqueeze(dim=0) generated_dvf, generated_current_volume = model( ref_volume, None, c1, c2, dvf=None, prior_post_latent=latent) avg_ncc, avg_mse, avg_ssim = 0, 0, 0 for tp in range(opt.horizon): save_tensor_as_nifti(vmorph_volume[tp][0, 0, :, :, :], "vm_volume_t" + str(tp), vol_dir, iter=idx, aff=[[3.5, 0, 0, 0], [0, 1.70 * 2, 0, 0], [0, 0, 1.70 * 2, 0], [0, 0, 0, 1]]) save_tensor_as_nifti(generated_current_volume[tp][0, 0, :, :, :], "generated_volume_t" + str(tp), vol_dir, iter=idx, aff=[[3.5, 0, 0, 0], [0, 1.70 * 2, 0, 0], [0, 0, 1.70 * 2, 0], [0, 0, 0, 1]]) avg_ncc += ncc_loss(generated_current_volume[tp], vmorph_volume[tp], device=device).item() avg_mse += mse(generated_current_volume[tp], vmorph_volume[tp]).item() avg_ssim += ss( generated_current_volume[tp][ 0, 0, :, :, :].detach().cpu().numpy(), vmorph_volume[tp][0, 0, :, :, :].detach().cpu().numpy()) NCC_loss.append(avg_ncc / opt.horizon) MSE_loss.append(avg_mse / opt.horizon) SSIM_loss.append(avg_ssim / opt.horizon) NCC_loss = np.asarray(NCC_loss) MSE_loss = np.asarray(MSE_loss) SSIM_loss = np.asarray(SSIM_loss) dir_name = os.path.join(dir_name, "fold_" + fold_idx) np.save( os.path.join(opt.logging_dir, "test", dir_name, "NCC_loss.npy"), NCC_loss) np.save( os.path.join(opt.logging_dir, "test", dir_name, "MSE_loss.npy"), MSE_loss) np.save( os.path.join(opt.logging_dir, "test", dir_name, "SSIM_loss.npy"), SSIM_loss) print("\nTest set average loss NCC: %0.4f, MSE: %0.4f, SSIM: %0.4f" % (np.mean(NCC_loss), np.mean(MSE_loss), np.mean(SSIM_loss)))
def train(self): """Training the AGBAD""" if self.args.pretrained: self.load_weights() optimizer_ge = optim.Adam(list(self.G.parameters()) + list(self.E.parameters()), lr=self.args.lr) optimizer_d = optim.Adam(self.D.parameters(), lr=self.args.lr) fixed_z = Variable(torch.randn((16, self.args.latent_dim, 1, 1)), requires_grad=False).to(self.device) criterion = nn.BCELoss() for epoch in range(self.args.num_epochs + 1): ge_losses = 0 d_losses = 0 for x, _ in Bar(self.train_loader): #Defining labels y_true = Variable(torch.ones((x.size(0), 1)).to(self.device)) y_fake = Variable(torch.zeros((x.size(0), 1)).to(self.device)) #Noise for improving training. noise1 = Variable(torch.Tensor(x.size()).normal_( 0, 0.1 * (self.args.num_epochs - epoch) / self.args.num_epochs), requires_grad=False).to(self.device) noise2 = Variable(torch.Tensor(x.size()).normal_( 0, 0.1 * (self.args.num_epochs - epoch) / self.args.num_epochs), requires_grad=False).to(self.device) #Cleaning gradients. optimizer_d.zero_grad() optimizer_ge.zero_grad() #Generator: z_fake = Variable(torch.randn( (x.size(0), self.args.latent_dim, 1, 1)).to(self.device), requires_grad=False) x_fake = self.G(z_fake) #Encoder: x_true = x.float().to(self.device) z_true = self.E(x_true) #Discriminator out_true = self.D(x_true + noise1, z_true) out_fake = self.D(x_fake + noise2, z_fake) #Losses loss_d = criterion(out_true, y_true) + criterion( out_fake, y_fake) loss_ge = criterion(out_fake, y_true) + criterion( out_true, y_fake) #Computing gradients and backpropagate. loss_d.backward(retain_graph=True) optimizer_d.step() loss_ge.backward() optimizer_ge.step() ge_losses += loss_ge.item() d_losses += loss_d.item() if epoch % 10 == 0: vutils.save_image((self.G(fixed_z).data + 1) / 2., './images/{}_fake.png'.format(epoch)) print( "Training... Epoch: {}, Discrimiantor Loss: {:.3f}, Generator Loss: {:.3f}" .format(epoch, d_losses / len(self.train_loader), ge_losses / len(self.train_loader))) self.save_weights()
def train(self): """Training the ALAD""" if self.args.pretrained: self.load_weights() optimizer_ge = optim.Adam(list(self.G.parameters()) + list(self.E.parameters()), lr=self.args.lr, betas=(0.5, 0.999)) params_ = list(self.Dxz.parameters()) \ + list(self.Dzz.parameters()) \ + list(self.Dxx.parameters()) optimizer_d = optim.Adam(params_, lr=self.args.lr, betas=(0.5, 0.999)) fixed_z = Variable(torch.randn((16, self.args.latent_dim, 1, 1)), requires_grad=False).to(self.device) criterion = nn.BCELoss() for epoch in range(self.args.num_epochs + 1): ge_losses = 0 d_losses = 0 for x, _ in Bar(self.train_loader): #Defining labels y_true = Variable(torch.ones((x.size(0), 1)).to(self.device)) y_fake = Variable(torch.zeros((x.size(0), 1)).to(self.device)) #Cleaning gradients. optimizer_d.zero_grad() optimizer_ge.zero_grad() #Generator: z_real = Variable(torch.randn( (x.size(0), self.args.latent_dim, 1, 1)).to(self.device), requires_grad=False) x_gen = self.G(z_real) #Encoder: x_real = x.float().to(self.device) z_gen = self.E(x_real) #Discriminatorxz out_truexz, _ = self.Dxz(x_real, z_gen) out_fakexz, _ = self.Dxz(x_gen, z_real) #Discriminatorzz out_truezz, _ = self.Dzz(z_real, z_real) out_fakezz, _ = self.Dzz(z_real, self.E(self.G(z_real))) #Discriminatorxx out_truexx, _ = self.Dxx(x_real, x_real) out_fakexx, _ = self.Dxx(x_real, self.G(self.E(x_real))) #Losses loss_dxz = criterion(out_truexz, y_true) + criterion( out_fakexz, y_fake) loss_dzz = criterion(out_truezz, y_true) + criterion( out_fakezz, y_fake) loss_dxx = criterion(out_truexx, y_true) + criterion( out_fakexx, y_fake) loss_d = loss_dxz + loss_dzz + loss_dxx loss_gexz = criterion(out_fakexz, y_true) + criterion( out_truexz, y_fake) loss_gezz = criterion(out_fakezz, y_true) + criterion( out_truezz, y_fake) loss_gexx = criterion(out_fakexx, y_true) + criterion( out_truexx, y_fake) cycle_consistency = loss_gezz + loss_gexx loss_ge = loss_gexz + loss_gezz + loss_gexx # + cycle_consistency #Computing gradients and backpropagate. loss_d.backward(retain_graph=True) loss_ge.backward() optimizer_d.step() optimizer_ge.step() d_losses += loss_d.item() ge_losses += loss_ge.item() if epoch % 10 == 0: vutils.save_image((self.G(fixed_z).data + 1) / 2., './images/{}_fake.png'.format(epoch)) print( "Training... Epoch: {}, Discrimiantor Loss: {:.3f}, Generator Loss: {:.3f}" .format(epoch, d_losses / len(self.train_loader), ge_losses / len(self.train_loader))) self.save_weights()
def train_model(self, learning_rate=0.01): ''' Trains a model with the given hyperparameters. ''' # Get the data loaders train_loader, dev_loader, test_loader = self.__get_data_loaders() # Instantiate the network -- note that every time the function is called # self.configured_network is overridden if not self.silent: print('Building model...') self.configured_network = self.input_network.to(torch.device(self.device)) # Load the weights if given if self.model_weights is not None: self.configured_network.load_state_dict(torch.load(self.model_weights)) # Set the network to training mode self.configured_network.train() # Load the Optimizer optimizer = torch.optim.Adam(self.configured_network.parameters(), lr=learning_rate, betas=(0.9, 0.999)) # Load in the learning rate adjuster scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=self.drop_lr_n_epochs, gamma=0.1) # Create the summary writer criterion = losses.jaccard_loss if not self.silent: print('Starting training...') print('\nTraining for {epochs} epochs.'.format(epochs=self.max_epochs)) # Initialize the iteration counter iteration_counter = 0 # Loop through the training dataset self.max_epochs number of times for epoch in range(self.max_epochs): # Initialize the minibatch loss at zero loss = 0 # Print out information about the current epoch if not self.silent: print('\nEpoch {} start:'.format(epoch + 1)) # Ensure the gradients are set to zero before the epoch starts optimizer.zero_grad() for i, data in enumerate(Bar(train_loader)): # get the inputs inputs, labels = data # Wrap them in Variable # Shapes inputs = Variable(inputs).cuda() labels = Variable(labels).cuda().long() # Forward pass to get outputs outputs = self.configured_network(inputs) #print(outputs.shape, labels.shape) # Compute the loss loss = criterion(outputs, labels) # Create the compute graph loss.backward() # Print out the loss ever N epochs -- for debugging if i % 100 == 0: print('\nJaccard Score: ', losses.jaccard_score(outputs, labels).item(), "\nGradient: ", self.__compute_gradient_norm()) # Log the loss for the current iteration iteration_counter += 1 self.training_loss.append([iteration_counter, losses.jaccard_loss(outputs, labels).item(), self.__compute_gradient_norm()]) # Optimize weights optimizer.step() # Reset the accumulated gradients and loss optimizer.zero_grad() # Step the learning (this happens in the epoch loop not the training loop) scheduler.step() # Print out training information if ((epoch+1) % self.log_error_every_n_epochs) == 0: if self.print_error and not self.silent: print('\n--- Computing Training Error ---') self.__eval_net(train_loader, 'train', epoch) if self.print_error and not self.silent: print('\n--- Computing Dev Error ---') self.__eval_net(dev_loader, 'dev', epoch) if self.print_error and not self.silent: print('\n--- Computing Test Error ---') self.__eval_net(test_loader, 'Test', epoch) # Write out the error to a directory self.__format_and_export_logs(learning_rate) # Save the model self.__save_model_weights(self.configured_network, learning_rate) # Compute the final loss value for the training fucntionb to return for an optimizer # Here, were using the IoU (Jaccard) # dev_loss = self.__compute_dev_loss(dev_loader) return #dev_loss