def define_criterion(self, name): if name.lower() == 'bce+dice': self.criterion = Loss.BCE_Dice() elif name.lower() == 'dice': self.criterion = Loss.DiceLoss() elif name.lower() == 'bce': self.criterion = nn.BCEWithLogitsLoss() elif name.lower() == 'robustfocal': self.criterion = Loss.RobustFocalLoss2d() elif name.lower() == 'lovasz-hinge' or name.lower() == 'lovasz': self.criterion = Loss.Lovasz_Hinge(per_image=True) elif name.lower() == 'bce+lovasz': self.criterion = Loss.BCE_Lovasz(per_image=True) else: raise NotImplementedError('Loss {} is not implemented'.format(name))
def ce_loss_with_abstain( logits: torch.Tensor, labels: torch.Tensor, abstain: bool = False, abstain_cost: float = 0, reduction='mean', ) -> torch.Tensor: """ If abstain is False, calculates the normal cross-entropy loss between the logits and the labels. If abstain is True, then multiplies this element-wise with the cross-entropy loss between the logits and the abstain class (the last one). In this way, the loss function is lower when the classifier either outputs the correct class OR the abstain class. """ ce_loss = nn.CrossEntropyLoss(reduction='none') loss = ce_loss(logits, labels) if abstain: abstain_labels = torch.ones_like(labels) * logits.size()[1] - 1 abstain_loss = ce_loss(logits, abstain_labels) abstain_loss = abstain_cost + (1 - abstain_cost) * abstain_loss loss *= abstain_loss from losses import Loss return Loss.reduce(loss, reduction)
def check(self, x, tfidf, y): ''' For debug purposes ''' with torch.no_grad(): self.model.eval() data_size = x.shape[0] batch_idxs = list(BatchSampler(SequentialSampler( range(data_size)), batch_size=self.batch_size, drop_last=False)) losses = [] loss_hidden_list = [] loss_ae_list = [] for batch_idx in batch_idxs: batch_x1 = x[batch_idx, :].to(self.device) batch_tfidf = tfidf[batch_idx].to(self.device) batch_y = y[batch_idx].to(self.device) x_hidden, y_hidden, y_predicted = self.model( batch_x1, batch_tfidf, batch_y) print("########################################################") print("X_HIDDEN: ", x_hidden) print("Y_HIDDEN: ", y_hidden) print("Y_HIDDEN: ", y_predicted) print("Reconstruction loss from model call: ", Loss(outdim_size=self.outdim_size, use_all_singular_values=False, device=self.device, r1=0.7, m=10).reconstructingLoss(y_predicted, batch_y).item()) x_hidden1, y_predicted1 = self.model.get_values( batch_x1, batch_tfidf) print("Reconstruction loss from model predict: ", Loss(outdim_size=self.outdim_size, use_all_singular_values=False, device=self.device, r1=0.7, m=10).reconstructingLoss(y_predicted1, batch_y).item()) print("X_HIDDEN: ", x_hidden) print("Y_HIDDEN: ", y_hidden) print("Y_PRED_fromY: ", y_predicted) print("Y_PRED_fromX: ", y_predicted1) if(not torch.equal(x_hidden, x_hidden1)): print("INEQUAL XHIDDEN") loss_hidden, loss_ae = self.loss(x_hidden, y_hidden, y_predicted, batch_y) print("Loss AE: ", loss_ae.item()) print("Loss HIDDEN {0} lossae: {1}".format( loss_hidden.item(), loss_ae.item())) loss = loss_hidden+self.lamda*loss_ae print("TOT LOSS {0}".format(loss.item())) losses.append(loss.item()) loss_hidden_list.append(loss_hidden.item()) loss_ae_list.append(loss_ae.item()) return np.mean(losses), np.mean(loss_hidden_list), np.mean(loss_ae_list)
def __init__(self, batch_size=6, n_epochs=50, device=torch.device('cuda'), lr_g=0.0001, lr_d=0.0004, betas=(0., 0.9), load_weights='', loss_type="hinge_loss", folder_save="/var/tmp/stu04", img_size=128): self.batch_size = batch_size self.n_epochs = n_epochs self.device = device self.folder_save = folder_save self.train_loader_c, self.train_loader_g = get_loadersSTL10( batch_size, img_size) self._init_models(load_weights) self._init_optimizers(lr_g, lr_d, betas) self.loss = Loss(loss_type) print("All packages loaded correctly.")
def runEpoch(br, dataset, model, device, output_path, t, config): global optimizer, lr_reducer dbg("Before train memory: {}".format(torch.cuda.memory_summary(device=device, abbreviated=False)), dbg_memory) if(model.training): print("Current mode: train!") print("Epoch: {0} - current learning rate: {1}".format(epoch, lr_reducer.get_lr())) dataset.hard_samples = [] # Reset hard samples torch.set_grad_enabled(True) else: print("Current mode: eval!") torch.set_grad_enabled(False) losses = [] batch_size = br.batch_size hard_indeces = [] for i,curr_batch in enumerate(dataset): if(model.training): optimizer.zero_grad() # Fetch images input_images = curr_batch["images"] # Predict poses predicted_poses = pipeline.process(input_images) # Prepare ground truth poses for the loss function T = np.array(t, dtype=np.float32) Rs = curr_batch["Rs"] ids = curr_batch["ids"] ts = [np.array(t[curr_id], dtype=np.float32) for curr_id in ids] # Calculate the loss loss, batch_loss, gt_images, predicted_images = Loss(predicted_poses, Rs, br, ts, ids=ids, views=views, config=config) Rs = torch.tensor(np.stack(Rs), device=device, dtype=torch.float32) print("Grad: ", loss.requires_grad) if(model.training): loss.backward() optimizer.step() # # #Save difficult samples # k = int(len(curr_batch["images"])*(dataset.hard_mining_ratio)) # batch_loss = batch_loss.squeeze() # top_val, top_ind = torch.topk(batch_loss, k) # hard_samples = Rs[top_ind] # hard_indeces = list(top_ind) # # Dump hard samples to file # hard_dump_dir= os.path.join(output_path, "images/epoch{0}/hard".format(epoch)) # prepareDir(hard_dump_dir) # hard_dict = {"Rs":hard_samples, # "losses":list(top_val)} # csv_file = os.path.join(hard_dump_dir,"hard_epoch{0}-batch{1}.csv".format(epoch,i)) # with open(csv_file, "w") as outfile: # writer = csv.writer(outfile) # writer.writerow(hard_dict.keys()) # writer.writerows(zip(*hard_dict.values())) # # Convert hard samples to a list # hard_list = [] # for h in np.arange(hard_samples.shape[0]): # hard_list.append(hard_samples[h]) # dataset.hard_samples = hard_list #detach all from gpu loss.detach().cpu().numpy() gt_images.detach().cpu().numpy() predicted_images.detach().cpu().numpy() if(model.training): print("Batch: {0}/{1} (size: {2}) - loss: {3}".format(i+1,round(dataset.max_samples/batch_size), len(Rs),torch.mean(batch_loss))) else: print("Test batch: {0}/{1} (size: {2}) - loss: {3}".format(i+1,len(dataset), len(Rs),torch.mean(batch_loss))) #print("Test batch: {0}/{1} (size: {2}) - loss: {3}".format(i+1, round(dataset.max_samples/batch_size), len(Rs),torch.mean(batch_loss))) losses = losses + batch_loss.data.detach().cpu().numpy().tolist() if(config.getboolean('Training', 'SAVE_IMAGES')): # Visualize hard samples if(model.training): hard_img_dir = os.path.join(output_path, "images/epoch{0}/hard".format(epoch)) prepareDir(hard_img_dir) for h in hard_indeces[:1]: gt_img = (gt_images[h]).detach().cpu().numpy() predicted_img = (predicted_images[h]).detach().cpu().numpy() vmin = np.linalg.norm(T)*0.9 vmax = max(np.max(gt_img), np.max(predicted_img)) fig = plt.figure(figsize=(12,3+len(views)*2)) plotView(0, len(views), vmin, vmax, input_images, gt_images, predicted_images, predicted_poses, batch_loss, batch_size, threshold=config['Loss_parameters'].getfloat('DEPTH_MAX'), img_num=h) fig.tight_layout() fig.savefig(os.path.join(hard_img_dir, "epoch{0}-batch{1}-sample{2}.png".format(epoch,i,h)), dpi=fig.dpi) plt.close() if(model.training): batch_img_dir = os.path.join(output_path, "images/epoch{0}".format(epoch)) else: batch_img_dir = os.path.join(output_path, "val-images/epoch{0}".format(epoch)) prepareDir(batch_img_dir) gt_img = (gt_images[0]).detach().cpu().numpy() predicted_img = (predicted_images[0]).detach().cpu().numpy() vmin = np.linalg.norm(T)*0.9 vmax = max(np.max(gt_img), np.max(predicted_img)) fig = plt.figure(figsize=(12,3+len(views)*2)) #for viewNum in np.arange(len(views)): plotView(0, len(views), vmin, vmax, input_images, gt_images, predicted_images, predicted_poses, batch_loss, batch_size, threshold=config['Loss_parameters'].getfloat('DEPTH_MAX')) fig.tight_layout() fig.savefig(os.path.join(batch_img_dir, "epoch{0}-batch{1}.png".format(epoch,i)), dpi=fig.dpi) plt.close() if(model.training): # Save current model model_dir = os.path.join(output_path, "models/") prepareDir(model_dir) state = {'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'lr_reducer': lr_reducer.state_dict(), 'epoch': epoch} torch.save(state, os.path.join(model_dir,"model-epoch{0}.pt".format(epoch))) lr_reducer.step() # Memory management dbg("After train memory: {}".format(torch.cuda.memory_summary(device=device, abbreviated=False)), dbg_memory) gc.collect() return np.mean(losses)
embedding_weights = load_embeddings(dataset_conf['embedding_path']) check_path = dataset_conf['check_path'] print_scores_every = int(dataset_conf['print_scores_every']) load_checkpoint = dataset_conf.getboolean('load_checkpoint') ### Common code from here ######### X_train, train_tfidf, Y_train = prepare_tensors_from_data(X_train, Y_train) X_test, test_tfidf, Y_test = prepare_tensors_from_data(X_test, Y_test) X_val, tfidf_val, Y_val, _, _, _ = split_train_val( X_test, test_tfidf, Y_test) model = AttentionModel(input_size=input_size, embedding_size=embedding_size, attention_layer_size=attention_layer_size, encoder_layer_size=encoder_layer_size, hidden_layer_size=hidden_layer_size, output_size=output_size, embedding_weight_matrix=embedding_weights).to(device) loss_func = Loss(outdim_size=hidden_layer_size, use_all_singular_values=use_all_singular_values, device=device, r1=r1, m=m).loss solver = Solver(model=model, loss=loss_func, outdim_size=output_size, params=params, lamda=lamda, dataset_name=dataset, device=device) if(is_inference and is_training_inference): path_strings = check_path.split('/') path_strings[-1] = "train_"+path_strings[-1] check_path = '/'.join(path_strings) if(not is_inference): solver.fit(X_train, train_tfidf, Y_train, X_val, tfidf_val, Y_val, checkpoint=check_path, load_checkpoint=load_checkpoint, print_scores_every=print_scores_every) else:
from torch import optim from torch.utils.data import DataLoader from data.base import CustomDataset from model.CenterNet import CenterNet import torch from losses import Loss from collections import defaultdict num_class = 2 cnet = CenterNet(num_class=num_class).to('cuda') #cnet.load_state_dict(torch.load('model_final.pth')) optimizer = optim.Adam(cnet.parameters(), lr=2e-4) criterion = Loss().to('cuda') ds = CustomDataset('../train/images', '../train/labels', num_class=num_class) sample_loader = DataLoader(ds, batch_size=32, pin_memory=True, shuffle=True) EPOCH = 100 losses = defaultdict(list) for e in range(EPOCH): running_loss, wh_loss, hm_loss, off_loss = 0, 0, 0, 0 for idx, (img, center_mask, offset_mask, size_mask) in enumerate(sample_loader): predictions = cnet(img.to('cuda')) center_predict, offset_predict, size_predict = torch.split( predictions, [num_class, 2, 2], 1) center_mask, offset_mask, size_mask = \ center_mask.to('cuda'), offset_mask.to( 'cuda'), size_mask.to('cuda') # print(centers[0]) #assert False prediction = [center_predict, offset_predict, size_predict] target = [center_mask, offset_mask, size_mask]
else: if eval('p.' + p.warm_start) > p.seq_init: model = warm_start(p, out_dir) else: model = init_specific_model(orig_dim=p.orig_dim, latent_dim=p.latent_dim, hidden_dim=p.hidden_dim).to(device) # train optimizer = torch.optim.Adam(model.parameters(), lr=p.lr) loss_f = Loss(beta=p.beta, mu=p.mu, lamPT=p.lamPT, lamCI=p.lamCI, lamNN=p.lamNN, lamH=p.lamH, lamSP=p.lamSP, alpha=p.alpha, gamma=p.gamma, tc=p.tc, is_mss=True, decoder=model.decoder) trainer = Trainer(model, optimizer, loss_f, device=device) trainer(train_loader, test_loader, epochs=p.num_epochs) # calculate losses print('calculating losses and metric...') rec_loss, kl_loss, mu_loss, mi_loss, tc_loss, dw_kl_loss, \ pt_loss, ci_loss, nearest_neighbor_loss, hessian_loss, sparsity_loss = calc_losses(model, test_loader, loss_f) s.reconstruction_loss = rec_loss s.kl_normal_loss = kl_loss s.mu_squared_loss = mu_loss
class Trainer(): def __init__(self, batch_size=6, n_epochs=50, device=torch.device('cuda'), lr_g=0.0001, lr_d=0.0004, betas=(0., 0.9), load_weights='', loss_type="hinge_loss", folder_save="/var/tmp/stu04", img_size=128): self.batch_size = batch_size self.n_epochs = n_epochs self.device = device self.folder_save = folder_save self.train_loader_c, self.train_loader_g = get_loadersSTL10( batch_size, img_size) self._init_models(load_weights) self._init_optimizers(lr_g, lr_d, betas) self.loss = Loss(loss_type) print("All packages loaded correctly.") def _init_models(self, load_weights): self.netG = GeneratorUNet() self.netD = SADiscriminator() if load_weights: checkpoint = torch.load(load_weights) self.netG.load_state_dict(checkpoint['generator_state_dict']) self.netD.load_state_dict(checkpoint['discriminator_state_dict']) else: self.netD.apply(xavier_init_weights) self.netG.to(self.device) self.netD.to(self.device) def _init_optimizers(self, lr_g, lr_d, betas): self.optimizer_g = Adam(self.netG.parameters(), lr=lr_g, betas=betas) self.optimizer_d = Adam(self.netD.parameters(), lr=lr_d, betas=betas) def _save_images(self, fakes, epoch, counter_iter, val="fakes"): vutils.save_image( fakes, f"{self.folder_save}/" f"X_{val}Q_epoch_{epoch}_iteration_{counter_iter}.png", nrow=6, normalize=True) def _save_models(self, epoch, counter_iter): torch.save( { 'generator_state_dict': self.netG.state_dict(), 'discriminator_state_dict': self.netD.state_dict(), }, f'{self.folder_save}/S_weights_{epoch}_iteration_{counter_iter}.pth' ) def train(self): counter_iter = 0 loaders = (self.train_loader_c, self.train_loader_g) with open("all_losses.txt", "w+") as file: file.write("iteration\tlossD\tlossG\n") losses_d = [] losses_g = [] for epoch in range(self.n_epochs): print("epoch :", epoch) # for idx, (img_g, img_c) in enumerate(train_loader): for idx, ((img_c, _), (img_g, _)) in enumerate(zip(*loaders)): img_g = img_g.to(self.device) img_c = img_c.to(self.device) # The last batch hasn't the same batch size so skip it bs, *_ = img_g.shape if bs != self.batch_size: continue ####################### # Train Discriminator # ####################### # Create fake colors fakes = self.netG(img_g) d_loss = self.loss.disc_loss(self.netD, img_c, fakes.detach()) m_d_loss = d_loss.item() # Backward and optimize self.netD.zero_grad() d_loss.backward() self.optimizer_d.step() # Release the gpu memory del d_loss ################### # Train Generator # ################### g_loss = self.loss.gen_loss(self.netD, fakes) # Backward and optimize self.netG.zero_grad() g_loss.backward() self.optimizer_g.step() m_g_loss = g_loss.item() if counter_iter % 10 == 0: print(f"Epoch [{epoch}/{self.n_epochs}], " f"iter[{idx}/{len(self.train_loader_g)}], " f"d_out_real: {m_d_loss}, " f"g_out_fake: {m_g_loss}") if counter_iter % 500 == 0: self._save_images(fakes.detach(), epoch, counter_iter) if counter_iter % 5000 == 0: self._save_models(epoch, counter_iter) print(">plotted and saved weights") # Release the gpu memory del fakes, g_loss losses_d.append(m_d_loss) losses_g.append(m_g_loss) torch.cuda.empty_cache() with open("all_losses.txt", "a+") as file: file.write( str(counter_iter) + "\t" + str(round(losses_d[-1], 3)) + "\t" + str(round(losses_g[-1], 3)) + "\n") counter_iter += 1
def __init__(self, conf: Config, loss: Loss, **kwargs): super().__init__(**kwargs) self.loss = loss self.save_hyperparameters(OmegaConf.create(asdict(conf))) self.conf = conf self.train_ds, self.val_ds_list, self.test_ds_list, self.id2graphlet_list = load_data(self.conf) if conf.dataset_name == "brain-net": if self.conf.num_splits > 1: self.train_ds, val_ds = stratified_ksplit( self.train_ds, self.conf.num_splits, self.conf.split ) self.val_ds_list = [val_ds] self.test_ds_list = [] else: self.val_ds_list = [self.train_ds] if self.conf.model in [ModelName.RPGNN, ModelName.GNN]: self.train_id2graphlet = None self.id2graphlet_list = None else: self.train_id2graphlet = self.id2graphlet_list[0] seed_everything(self.conf.seed) np.random.seed(self.conf.seed) torch.manual_seed(self.conf.seed) model = self.build_model(conf, (self.train_ds, self.train_id2graphlet)) if not (conf.model == ModelName.GraphletCounting and conf.num_layers == 1): h_dim, act = None, None if conf.classifier_num_hidden > 0: h_dim = conf.classifier_h_dim act = nn.ReLU batch_norm = None if conf.batch_norm.presence and conf.model not in [ModelName.GNN, ModelName.RPGNN]: batch_norm = nn.BatchNorm1d(model.out_dim, affine=conf.batch_norm.affine) if conf.model is ModelName.GraphletCounting: assert conf.classifier_num_hidden == conf.num_layers - 2 model = FinalLayers( model, num_out=conf.num_out, h_dim=h_dim, act=act, n_hidden_layers=conf.classifier_num_hidden, batch_norm=batch_norm, dropout=conf.classifier_dropout, ) self.model = model loss.on_epoch_start(epoch=0, model=model) with_mcc = conf.dataset_name in ["NCI1", "NCI109", "PROTEINS", "DD"] self.train_acc = MatthewsCoef(num_classes=conf.num_out) if with_mcc else pl.metrics.Accuracy() self.val_acc = MatthewsCoef(num_classes=conf.num_out) if with_mcc else pl.metrics.Accuracy() self.test_acc = MatthewsCoef(num_classes=conf.num_out) if with_mcc else pl.metrics.Accuracy()
def runEpoch(br, dataset, model, device, output_path, t, config): global optimizer, lr_reducer dbg( "Before train memory: {}".format( torch.cuda.memory_summary(device=device, abbreviated=False)), dbg_memory) print("Epoch: {0} - current learning rate: {1}".format( epoch, lr_reducer.get_lr())) dataset.hard_samples = [] # Reset hard samples torch.set_grad_enabled(True) losses = [] batch_size = br.batch_size hard_indeces = [] for i, curr_batch in enumerate(dataset): if (model.training): optimizer.zero_grad() # Fetch images input_images = curr_batch["images"] # Predict poses predicted_poses = pipeline.process(input_images) # Prepare ground truth poses for the loss function T = np.array(t, dtype=np.float32) Rs = curr_batch["Rs"] ids = curr_batch["ids"] ts = [np.array(t[curr_id], dtype=np.float32) for curr_id in ids] # Calculate the loss loss, batch_loss, gt_images, predicted_images = Loss(predicted_poses, Rs, br, ts, ids=ids, views=views, config=config) Rs = torch.tensor(np.stack(Rs), device=device, dtype=torch.float32) print("Grad: ", loss.requires_grad) if (model.training): loss.backward() optimizer.step() #detach all from gpu loss.detach().cpu().numpy() gt_images.detach().cpu().numpy() predicted_images.detach().cpu().numpy() losses = losses + batch_loss.data.detach().cpu().numpy().tolist() batch_img_dir = os.path.join(output_path, "images/epoch{0}".format(epoch)) prepareDir(batch_img_dir) gt_img = (gt_images[0]).detach().cpu().numpy() predicted_img = (predicted_images[0]).detach().cpu().numpy() vmin = np.linalg.norm(T) * 0.9 vmax = max(np.max(gt_img), np.max(predicted_img)) fig = plt.figure(figsize=(12, 3 + len(views) * 2)) plotView(0, len(views), vmin, vmax, input_images, gt_images, predicted_images, predicted_poses, batch_loss, batch_size, threshold=config['Loss_parameters'].getfloat('DEPTH_MAX')) fig.tight_layout() fig.savefig(os.path.join(batch_img_dir, "epoch{0}-batch{1}.png".format(epoch, i)), dpi=fig.dpi) plt.close() lr_reducer.step() break # Memory management dbg( "After train memory: {}".format( torch.cuda.memory_summary(device=device, abbreviated=False)), dbg_memory) gc.collect() return np.mean(losses)