def compute_loss( self, model_out, x, target_label ): em = model_out['em'] ev = model_out['ev'] z = model_out['z'] dm = model_out['x_pred'] mc_samples = model_out['mc_samples'] #KL Divergence kl_divergence = 0.5*torch.mean( em**2 +ev - torch.log(ev) - 1, axis=1 ) #Reconstruction Term #Proximity: L1 Loss x_pred = dm[0] s= self.cf_vae.encoded_start_cat recon_err = -torch.sum( torch.abs(x[:,s:-1] - x_pred[:,s:-1]), axis=1 ) for key in self.normalise_weights.keys(): recon_err+= -(self.normalise_weights[key][1] - self.normalise_weights[key][0])*torch.abs(x[:,key] - x_pred[:,key]) # Sum to 1 over the categorical indexes of a feature for v in self.cf_vae.encoded_categorical_feature_indexes: temp = -torch.abs( 1.0-torch.sum( x_pred[:, v[0]:v[-1]+1], axis=1) ) recon_err += temp #Validity temp_logits = self.pred_model(x_pred) validity_loss= torch.zeros(1) temp_1= temp_logits[target_label==1,:] temp_0= temp_logits[target_label==0,:] validity_loss += F.hinge_embedding_loss( F.sigmoid(temp_1[:,1]) - F.sigmoid(temp_1[:,0]), torch.tensor(-1), self.margin, reduction='mean') validity_loss += F.hinge_embedding_loss( F.sigmoid(temp_0[:,0]) - F.sigmoid(temp_0[:,1]), torch.tensor(-1), self.margin, reduction='mean') for i in range(1,mc_samples): x_pred = dm[i] recon_err += -torch.sum( torch.abs(x[:,s:-1] - x_pred[:,s:-1]), axis=1 ) for key in self.normalise_weights.keys(): recon_err+= -(self.normalise_weights[key][1] - self.normalise_weights[key][0])*torch.abs(x[:,key] - x_pred[:,key]) # Sum to 1 over the categorical indexes of a feature for v in self.cf_vae.encoded_categorical_feature_indexes: temp = -torch.abs( 1.0-torch.sum( x_pred[:, v[0]:v[-1]+1], axis=1) ) recon_err += temp #Validity temp_logits = self.pred_model(x_pred) temp_1= temp_logits[target_label==1,:] temp_0= temp_logits[target_label==0,:] validity_loss += F.hinge_embedding_loss( F.sigmoid(temp_1[:,1]) - F.sigmoid(temp_1[:,0]), torch.tensor(-1), self.margin, reduction='mean') validity_loss += F.hinge_embedding_loss( F.sigmoid(temp_0[:,0]) - F.sigmoid(temp_0[:,1]), torch.tensor(-1), self.margin, reduction='mean') recon_err = recon_err / mc_samples validity_loss = -1*self.validity_reg*validity_loss/mc_samples print('recon: ',-torch.mean(recon_err), ' KL: ', torch.mean(kl_divergence), ' Validity: ', -validity_loss) return -torch.mean(recon_err - kl_divergence) - validity_loss
def forward(self): a = torch.randn(3, 2) b = torch.rand(3, 2) c = torch.rand(3) log_probs = torch.randn(50, 16, 20).log_softmax(2).detach() targets = torch.randint(1, 20, (16, 30), dtype=torch.long) input_lengths = torch.full((16, ), 50, dtype=torch.long) target_lengths = torch.randint(10, 30, (16, ), dtype=torch.long) return len( F.binary_cross_entropy(torch.sigmoid(a), b), F.binary_cross_entropy_with_logits(torch.sigmoid(a), b), F.poisson_nll_loss(a, b), F.cosine_embedding_loss(a, b, c), F.cross_entropy(a, b), F.ctc_loss(log_probs, targets, input_lengths, target_lengths), # F.gaussian_nll_loss(a, b, torch.ones(5, 1)), # ENTER is not supported in mobile module F.hinge_embedding_loss(a, b), F.kl_div(a, b), F.l1_loss(a, b), F.mse_loss(a, b), F.margin_ranking_loss(c, c, c), F.multilabel_margin_loss(self.x, self.y), F.multilabel_soft_margin_loss(self.x, self.y), F.multi_margin_loss(self.x, torch.tensor([3])), F.nll_loss(a, torch.tensor([1, 0, 1])), F.huber_loss(a, b), F.smooth_l1_loss(a, b), F.soft_margin_loss(a, b), F.triplet_margin_loss(a, b, -b), # F.triplet_margin_with_distance_loss(a, b, -b), # can't take variable number of arguments )
def get_loss(loss_function, output, label, use_gpu): ''' get objective loss of model and backprograte to compute gradients some loss function not impelement ''' if not isinstance(loss_function, str): raise TypeError('loss_function should be str object') label = np.asarray(label) if loss_function == 'binary_cross_entropy': loss = F.binary_cross_entropy(output, label) elif loss_function == 'poisson_nll_loss': loss = F.poisson_nll_loss(output, label) elif loss_function == 'cross_entropy': loss = F.cross_entropy(output, label) elif loss_function == 'hinge_embedding_loss': loss = F.hinge_embedding_loss(output, label) elif loss_function == 'margin_ranking_loss': loss = F.margin_ranking_loss(output, label) elif loss_function == 'multilabel_soft_margin_loss': loss = F.multilabel_soft_margin_loss(output, label) elif loss_function == 'multi_margin_loss': loss = F.multi_margin_loss(output, label) elif loss_function == 'nll_loss': if use_gpu: label = Variable(torch.LongTensor(label).cuda()) label = Variable(torch.LongTensor(label)) loss = F.nll_loss(output, label) elif loss_function == 'binary_cross_entropy_with_logits': loss = F.binary_cross_entropy_with_logits(output, label) return loss
def test_loss(epoch): n = 0 for batch_idx, (data, target) in enumerate(test_loader): print('Loss this is {}_epoch: {}_th batch'.format(epoch, batch_idx)) data, target = Variable( data, requires_grad=True).cuda(), Variable(target).cuda() data = Variable(data.data, requires_grad=True) ''' if batch_idx%10==0:#show the img generated by source_m tmp_img=model.source_m(data) tmp_img=tmp_img.cpu().data.numpy()[0] tmp_img=np.reshape(tmp_img, (28,28)) cv2.imshow('kk', tmp_img) cv2.waitKey(200) cv2.destroyAllWindows() ''' x0, output = model1(data) #print x0.sum(0).size() #print data.sum(0).size() loss1 = torch.norm((x0.sum(0) - data.sum(0)) / (data.size()[0]), 2) loss2 = torch.norm(x0 - data, 2) loss = loss2 + F.nll_loss(output, target) + F.hinge_embedding_loss( data, target) pred = output.data.max(1)[ 1] # get the index of the max log-probability n += pred.eq(target.data).cpu().sum() optimizer1.zero_grad() loss.backward() # print (data.grad) optimizer1.step() print('Train loss accuracy is {}'.format(n / len(test_loader.dataset))) return n / len(test_loader.dataset)
def compute_loss(self, outputs, targets): if next(self.parameters()).is_cuda: targets = targets.cuda() L1_dist = torch.sum(torch.abs(outputs['image_embedding'] - outputs['chem_embedding']), dim=1) targets = (targets*2)-1 return F.hinge_embedding_loss(L1_dist, targets, margin=1, reduction='mean')
def test_hinge_embedding_loss(self): inp = torch.randn(128, 32, device='cuda', dtype=self.dtype) target = torch.randint(0, 1, (32, ), device='cuda') - 1 output = F.hinge_embedding_loss(inp, target, margin=1.0, size_average=None, reduce=None, reduction='mean')
def train_constraint_loss(model, train_dataset, optimizer, normalise_weights, validity_reg, constraint_reg, margin, epochs=1000, batch_size=1024): batch_num=0 train_loss=0.0 train_size=0 #train_dataset= np.array_split( train_dataset, train_dataset.shape[0]//batch_size ,axis=0 ) train_dataset= torch.tensor( train_dataset ).float().to(cuda) train_dataset= torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True) good_cf_count=0 # for i in range(len(train_dataset)): for train_x in enumerate(train_dataset): optimizer.zero_grad() #train_x = train_dataset[i] #train_x= torch.tensor( train_x ).float().to(cuda) train_x= train_x[1] train_y = 1.0-torch.argmax( pred_model(train_x), dim=1 ) train_size += train_x.shape[0] out= model(train_x, train_y) loss = compute_loss(model, out, train_x, train_y, normalise_weights, validity_reg, margin) dm = out['x_pred'] mc_samples = out['mc_samples'] x_pred = dm[0] constraint_loss = F.hinge_embedding_loss( x_pred[:,0] - train_x[:,0], torch.tensor(-1).to(cuda), 0).to(cuda) for j in range(1, mc_samples): x_pred = dm[j] constraint_loss+= F.hinge_embedding_loss( x_pred[:,0] - train_x[:,0], torch.tensor(-1).to(cuda), 0).to(cuda) constraint_loss= constraint_loss/mc_samples constraint_loss= constraint_reg*constraint_loss print('Constraint: ', constraint_loss, torch.mean(constraint_loss) ) loss+= torch.mean(constraint_loss) train_loss += loss.item() batch_num+=1 loss.backward() optimizer.step() ret= train_loss print('Train Avg Loss: ', ret, train_size) return ret
def linear_model_feature_approx(train_dataset, constrained_feature_indices, param_tensor): #Define the linear model parameters: [ effect, cause_1, cause_2, cause_3, ... ] num_params=len(constrained_feature_indices) wm= 1e-2 learning_rate=1e-3 optimizer= optim.Adam([ {'params': filter(lambda p: p.requires_grad, param_tensor),'weight_decay': wm} ], lr=learning_rate) batch_num=0 train_loss=0.0 train_size=0 train_dataset= np.array_split( train_dataset, train_dataset.shape[0]//batch_size ,axis=0 ) for i in range(len(train_dataset)): optimizer.zero_grad() train_x= torch.tensor( train_dataset[i] ).float().to(cuda) train_size += train_x.shape[0] # Forward Pass of the model model_loss= torch.zeros(train_x.shape[0]).to(cuda) + param_tensor[0] for i in range(0, len(constrained_feature_indices)): idx=constrained_feature_indices[i] if i==0: model_loss+= de_normalise( train_x[:,idx], normalise_weights[idx]) else: model_loss+= -1*param_tensor[i]*de_normalise( train_x[:,idx], normalise_weights[idx]) model_loss= torch.sum( model_loss**2, axis=0 ) model_loss= model_loss.view(1) #print('Model Loss: ', model_loss) # Constraint implications on model parameters for i in range(1, len(constrained_feature_indices)): idx= constrained_feature_indices[i] reg= 5 hinge_loss= F.hinge_embedding_loss( param_tensor[i], torch.tensor(-1).to(cuda), 0 ).to(cuda) #hinge_loss.data[hinge_loss>0.1]=0 #print('Hinge Loss: ', param_tensor[idx], hinge_loss) model_loss+= reg*hinge_loss # Backward Pass train_loss += model_loss model_loss.backward() batch_num+=1 optimizer.step() ret= train_loss print('Train Avg Loss: ', ret, train_size) print('Param: ', param_tensor) return param_tensor
def contrastive_loss(distance, labels): margin = 3.0 is_diff = (labels).float() #loss = torch.mean(((1-is_diff) * torch.pow(distance, 2)) + # ((is_diff) * torch.pow(torch.abs(labels - distance), 2))) #loss = torch.mean((1-is_diff) * torch.pow(distance, 2) + # (is_diff) * torch.pow(torch.clamp(margin - distance, min=0.0), 2)) #assert distance.shape[1] == 1 assert distance.shape[0] == is_diff.shape[0] loss = F.hinge_embedding_loss(distance, target=is_diff, margin=1.0) #print("check the loss") #print(loss) #print("check the loss vector shape") #print(loss.size()) #assert loss.shape[0] == 1 #assert loss.shape[1] == 1 return loss
def configure_criterion(self, y, t): criterion = F.cross_entropy(y, t) if self.hparams.criterion == "cross_entropy": criterion = F.cross_entropy(y, t) elif self.hparams.criterion == "binary_cross_entropy": criterion = F.binary_cross_entropy(y, t) elif self.hparams.criterion == "binary_cross_entropy_with_logits": criterion = F.binary_cross_entropy_with_logits(y, t) elif self.hparams.criterion == "poisson_nll_loss": criterion = F.poisson_nll_loss(y, t) elif self.hparams.criterion == "hinge_embedding_loss": criterion = F.hinge_embedding_loss(y, t) elif self.hparams.criterion == "kl_div": criterion = F.kl_div(y, t) elif self.hparams.criterion == "l1_loss": criterion = F.l1_loss(y, t) elif self.hparams.criterion == "mse_loss": criterion = F.mse_loss(y, t) elif self.hparams.criterion == "margin_ranking_loss": criterion = F.margin_ranking_loss(y, t) elif self.hparams.criterion == "multilabel_margin_loss": criterion = F.multilabel_margin_loss(y, t) elif self.hparams.criterion == "multilabel_soft_margin_loss": criterion = F.multilabel_soft_margin_loss(y, t) elif self.hparams.criterion == "multi_margin_loss": criterion = F.multi_margin_loss(y, t) elif self.hparams.criterion == "nll_loss": criterion = F.nll_loss(y, t) elif self.hparams.criterion == "smooth_l1_loss": criterion = F.smooth_l1_loss(y, t) elif self.hparams.criterion == "soft_margin_loss": criterion = F.soft_margin_loss(y, t) return criterion
def hinge_loss(self, prediction, label): # HingeEmbeddingLoss return F.hinge_embedding_loss(prediction, label, margin=1.0)
def compute_root_node_loss( model, model_out, x, target_label, normalise_weights, validity_reg, margin, constraint_nodes ): em = model_out['em'] ev = model_out['ev'] z = model_out['z'] dm = model_out['x_pred'] mc_samples = model_out['mc_samples'] #KL Divergence kl_divergence = 0.5*torch.mean( em**2 +ev - torch.log(ev) - 1, axis=1 ) #Reconstruction Term #Proximity: L1 Loss x_pred = dm[0] s= model.encoded_start_cat recon_err = -torch.sum( torch.abs(x[:,s:-1] - x_pred[:,s:-1]), axis=1 ) for key in normalise_weights.keys(): if int(key) not in constraint_nodes: # recon_err+= -(1/mad_feature_weights[d.encoded_feature_names[int(key)]])*(normalise_weights[key][1] - normalise_weights[key][0])*torch.abs(x[:,key] - x_pred[:,key]) recon_err+= -(normalise_weights[key][1] - normalise_weights[key][0])*torch.abs(x[:,key] - x_pred[:,key]) # Sum to 1 over the categorical indexes of a feature for v in model.encoded_categorical_feature_indexes: temp = -torch.abs( 1.0-torch.sum( x_pred[:, v[0]:v[-1]+1], axis=1) ) recon_err += temp count=0 count+= torch.sum(x_pred[:,:s]<0,axis=1).float() count+= torch.sum(x_pred[:,:s]>1,axis=1).float() #Validity temp_logits = pred_model(x_pred) #validity_loss = -F.cross_entropy(temp_logits, target_label) validity_loss= torch.zeros(1).to(cuda) temp_1= temp_logits[target_label==1,:] temp_0= temp_logits[target_label==0,:] validity_loss += F.hinge_embedding_loss( F.sigmoid(temp_1[:,1]).to(cuda) - F.sigmoid(temp_1[:,0]).to(cuda), torch.tensor(-1).to(cuda), margin, reduction='mean') validity_loss += F.hinge_embedding_loss( F.sigmoid(temp_0[:,0]).to(cuda) - F.sigmoid(temp_0[:,1]).to(cuda), torch.tensor(-1).to(cuda), margin, reduction='mean') for i in range(1,mc_samples): x_pred = dm[i] recon_err += -torch.sum( torch.abs(x[:,s:-1] - x_pred[:,s:-1]), axis=1 ) for key in normalise_weights.keys(): if int(key) not in constraint_nodes: # recon_err+= -(1/mad_feature_weights[d.encoded_feature_names[int(key)]])*(normalise_weights[key][1] - normalise_weights[key][0])*torch.abs(x[:,key] - x_pred[:,key]) recon_err+= -(normalise_weights[key][1] - normalise_weights[key][0])*torch.abs(x[:,key] - x_pred[:,key]) # Sum to 1 over the categorical indexes of a feature for v in model.encoded_categorical_feature_indexes: temp = -torch.abs( 1.0-torch.sum( x_pred[:, v[0]:v[-1]+1], axis=1) ) recon_err += temp count+= torch.sum(x_pred[:,:s]<0,axis=1).float() count+= torch.sum(x_pred[:,:s]>1,axis=1).float() #Validity temp_logits = pred_model(x_pred) #validity_loss += -F.cross_entropy(temp_logits, target_label) temp_1= temp_logits[target_label==1,:] temp_0= temp_logits[target_label==0,:] validity_loss += F.hinge_embedding_loss( F.sigmoid(temp_1[:,1]).to(cuda) - F.sigmoid(temp_1[:,0]).to(cuda), torch.tensor(-1).to(cuda), margin, reduction='mean') validity_loss += F.hinge_embedding_loss( F.sigmoid(temp_0[:,0]).to(cuda) - F.sigmoid(temp_0[:,1]).to(cuda), torch.tensor(-1).to(cuda), margin, reduction='mean') recon_err = recon_err / mc_samples validity_loss = -1*validity_reg*validity_loss/mc_samples print('Avg wrong cont dim: ', torch.mean(count)/mc_samples) print('recon: ',-torch.mean(recon_err), ' KL: ', torch.mean(kl_divergence), ' Validity: ', -validity_loss) return -torch.mean(recon_err - kl_divergence) - validity_loss
def hinge_embedding(y_pred, y_true): return F.hinge_embedding_loss(y_pred, y_true)
def compute_loss( model, model_out, x, target_label, validity_reg, margin): em = model_out['em'] ev = model_out['ev'] z = model_out['z'] dm = model_out['x_pred'] mc_samples = model_out['mc_samples'] #KL Divergence kl_divergence = 0.5*torch.mean( em**2 +ev - torch.log(ev) - 1, axis=1 ) #Reconstruction Term #Proximity: L1 Loss x_pred = dm[0] # s would be zero hence it won't make a difference, and it will be simply like a proximity term s= model.encoded_start_cat recon_err = -torch.sum( torch.abs(x[:,s:-1] - x_pred[:,s:-1]), axis=1 ) count=0 count+= torch.sum(x_pred[:,:s]<0,axis=1).float() count+= torch.sum(x_pred[:,:s]>1,axis=1).float() #Validity temp_logits = pred_model(x_pred) #validity_loss = -F.cross_entropy(temp_logits, target_label) validity_loss= torch.zeros(1).to(cuda) #Loop over total number of classes to compute the Hinge Loss num_classes=10 for t_c in range(num_classes): # Compute the validity_loss for data points with target class t_c in the given batch temp= temp_logits[target_label==t_c,:] if temp.shape[0]==0: #No data point in this batch with the target class t_c continue target_class_batch_score= temp[:, t_c] if t_c==0: temp= temp[:,t_c+1:] # Max along the batch axis in the tensor; torch.max returns both (values, indices) and taking the first argument gives values other_class_batch_score= torch.max(temp, dim=1)[0] elif t_c == num_classes-1: temp= temp[:,:t_c] # Max along the batch axis in the tensor other_class_batch_score= torch.max(temp, dim=1)[0] else: # Concatenate the tensors along the Non Batch Axis temp= torch.cat( (temp[:, :t_c], temp[:, t_c+1:]), dim=1 ) # Max along the batch axis in the tensor other_class_batch_score= torch.max(temp, dim=1)[0] validity_loss += F.hinge_embedding_loss( F.sigmoid(target_class_batch_score).to(cuda)-F.sigmoid(other_class_batch_score).to(cuda), torch.tensor(-1).to(cuda), margin, reduction='mean' ) for i in range(1,mc_samples): x_pred = dm[i] recon_err += -torch.sum( torch.abs(x[:,s:-1] - x_pred[:,s:-1]), axis=1 ) count+= torch.sum(x_pred[:,:s]<0,axis=1).float() count+= torch.sum(x_pred[:,:s]>1,axis=1).float() #Validity temp_logits = pred_model(x_pred) # validity_loss += -F.cross_entropy(temp_logits, target_label) #Loop over total number of classes to compute the Hinge Loss num_classes=10 for t_c in range(num_classes): # Compute the validity_loss for data points with target class t_c in the given batch temp= temp_logits[target_label==t_c,:] if temp.shape[0]==0: #No data point in this batch with the target class t_c continue target_class_batch_score= temp[:, t_c] if t_c==0: temp= temp[:,t_c+1:] # Max along the batch axis in the tensor; torch.max returns both (values, indices) and taking the first argument gives values other_class_batch_score= torch.max(temp, dim=1)[0] elif t_c == num_classes-1: temp= temp[:,:t_c] # Max along the batch axis in the tensor other_class_batch_score= torch.max(temp, dim=1)[0] else: # Concatenate the tensors along the Non Batch Axis temp= torch.cat( (temp[:, :t_c], temp[:, t_c+1:]), dim=1 ) # Max along the batch axis in the tensor other_class_batch_score= torch.max(temp, dim=1)[0] validity_loss += F.hinge_embedding_loss( F.sigmoid(target_class_batch_score).to(cuda)-F.sigmoid(other_class_batch_score).to(cuda), torch.tensor(-1).to(cuda), margin, reduction='mean' ) recon_err = recon_err / mc_samples validity_loss = -1*validity_reg*validity_loss/mc_samples print('Avg wrong cont dim: ', torch.mean(count)/mc_samples) print('recon: ',-torch.mean(recon_err), ' KL: ', torch.mean(kl_divergence), ' Validity: ', -validity_loss) return -torch.mean(recon_err - kl_divergence) - validity_loss
def train(self, constraint_type, constraint_variables, constraint_direction, constraint_reg, pre_trained=False): ''' pre_trained: Bool Variable to check whether pre trained model exists to avoid training again constraint_type: Binary Variable currently: (1) unary / (0) monotonic constraint_variables: List of List: [[Effect, Cause1, Cause2, .... ]] constraint_direction: -1: Negative, 1: Positive ( By default has to be one for monotonic constraints ) constraint_reg: Tunable Hyperparamter ''' if pre_trained: self.cf_vae.load_state_dict(torch.load(self.save_path)) self.cf_vae.eval() return ##TODO: Handling such dataset specific constraints in a more general way # CF Generation for only low to high income data points self.vae_train_dataset = self.vae_train_dataset[ self.vae_train_dataset[:, -1] == 0, :] self.vae_val_dataset = self.vae_val_dataset[ self.vae_val_dataset[:, -1] == 0, :] #Removing the outcome variable from the datasets self.vae_train_feat = self.vae_train_dataset[:, :-1] self.vae_val_feat = self.vae_val_dataset[:, :-1] for epoch in range(self.epochs): batch_num = 0 train_loss = 0.0 train_size = 0 train_dataset = torch.tensor(self.vae_train_feat).float() train_dataset = torch.utils.data.DataLoader( train_dataset, batch_size=self.batch_size, shuffle=True) for train_x in enumerate(train_dataset): self.cf_vae_optimizer.zero_grad() train_x = train_x[1] train_y = 1.0 - torch.argmax(self.pred_model(train_x), dim=1) train_size += train_x.shape[0] out = self.cf_vae(train_x, train_y) loss = self.compute_loss(out, train_x, train_y) #Unary Case if constraint_type: for const in constraint_variables: # Get the index from the feature name # Handle the categorical variable case here too const_idx = const[0] dm = out['x_pred'] mc_samples = out['mc_samples'] x_pred = dm[0] constraint_loss = F.hinge_embedding_loss( constraint_direction * (x_pred[:, const_idx] - train_x[:, const_idx]), torch.tensor(-1), 0) for j in range(1, mc_samples): x_pred = dm[j] constraint_loss += F.hinge_embedding_loss( constraint_direction * (x_pred[:, const_idx] - train_x[:, const_idx]), torch.tensor(-1), 0) constraint_loss = constraint_loss / mc_samples constraint_loss = constraint_reg * constraint_loss loss += constraint_loss print('Constraint: ', constraint_loss, torch.mean(constraint_loss)) else: #Train the regression model print('Yet to implement') loss.backward() train_loss += loss.item() self.cf_vae_optimizer.step() batch_num += 1 ret = loss / batch_num print('Train Avg Loss: ', ret, train_size) #Save the model after training every 10 epochs and at the last epoch if (epoch != 0 and epoch % 10 == 0) or epoch == self.epochs - 1: torch.save(self.cf_vae.state_dict(), self.save_path)
def testing(data, model): model.eval() combo_loss_avg = [] sep_loss_avg = [] pred_cluster_properties = [] edge_acc_track = np.zeros(config.test_samples, dtype=np.float) color_cycle = plt.cm.coolwarm( np.linspace(0.1, 0.9, (config.input_classes + config.input_class_delta) * config.k)) marker_hits = ['^', 'v', 's', 'h', '<', '>'] marker_centers = ['+', '1', 'x', '3', '2', '4'] '''Input Class +- Range''' # input_classes_rand = torch.randint(low = config.input_classes - config.input_class_delta, # high= config.input_classes + config.input_class_delta+1, # size= (config.train_samples,), device=torch.device('cuda')) # should be mean number of tracks + maximum variation. input_classes_rand = (config.input_classes + config.input_class_delta ) * torch.ones(size=(config.train_samples, ), device=torch.device('cuda'), dtype=torch.int) print('\n[TEST]:') t1 = timer() epoch = 0 with torch.no_grad(): '''book-keeping''' sep_loss_track = np.zeros((config.test_samples, 3), dtype=np.float) avg_loss_track = np.zeros(config.test_samples, dtype=np.float) edge_acc_track = np.zeros(config.test_samples, dtype=np.float) edge_acc_conf = np.zeros( (config.test_samples, config.ncats_out, config.ncats_out), dtype=np.int) pred_cluster_properties = [] avg_loss = 0 if (config.make_test_efficiency_plots == True): pt_true = [] matched_pt_true = [] pred_pt = [] eta_true = [] matched_eta_true = [] pred_eta = [] phi_true = [] matched_phi_true = [] pred_phi = [] nhits_true = [] matched_nhits_true = [] for idata, d in enumerate( data[config.train_samples:config.train_samples + config.test_samples]): d_gpu = d.to('cuda') y_orig = d_gpu.y d_gpu.x = d_gpu.x[ d_gpu.y < input_classes_rand[idata]] # just take the first three tracks d_gpu.x = (d_gpu.x - torch.min(d_gpu.x, axis=0).values) / ( torch.max(d_gpu.x, axis=0).values - torch.min(d_gpu.x, axis=0).values) # Normalise d_gpu.y_particle_barcodes = d_gpu.y_particle_barcodes[ d_gpu.y < input_classes_rand[idata]] d_gpu.y = d_gpu.y[d_gpu.y < input_classes_rand[idata]] ''' project data to some nd plane where it is seperable usinfg the deep model compute edge net scores and seperated cluster properties in that latent space ''' coords, edge_scores, edges, cluster_map, cluster_props, cluster_batch = model( d_gpu.x) '''Compute latent space distances''' d_hinge, y_hinge = center_embedding_truth(coords, d_gpu.y, device='cuda') # multi_simple_hinge += simple_embedding_truth(coords_interm, d_gpu.y, device='cuda') '''Compute centers in latent space ''' centers = scatter_mean(coords, d_gpu.y, dim=0, dim_size=(torch.max(d_gpu.y).item() + 1)) '''Compute Losses''' # Hinge: embedding distance based loss loss_hinge = F.hinge_embedding_loss(torch.where( y_hinge == 1, d_hinge**2, d_hinge), y_hinge, margin=2.0, reduction='mean') #Cross Entropy: Edge categories loss y_edgecat = (d_gpu.y[edges[0]] == d_gpu.y[edges[1]]).long() loss_ce = F.cross_entropy(edge_scores, y_edgecat, reduction='mean') #MSE: Cluster loss pred_cluster_match, y_properties = match_cluster_targets( cluster_map, d_gpu.y, d_gpu) mapped_props = cluster_props[pred_cluster_match].squeeze() props_pt = F.softplus(mapped_props[:, 0]) props_eta = 5.0 * (2 * torch.sigmoid(mapped_props[:, 1]) - 1) props_phi = math.pi * (2 * torch.sigmoid(mapped_props[:, 2]) - 1) loss_mse = ( F.mse_loss(props_pt, y_properties[:, 0], reduction='mean') + F.mse_loss(props_eta, y_properties[:, 1], reduction='mean') + F.mse_loss(props_phi, y_properties[:, 2], reduction='mean')) / model.nprops_out #Combined loss loss = (loss_hinge + loss_ce + loss_mse) / config.batch_size avg_loss_track[idata] = loss.item() avg_loss += loss.item() '''Track Losses, Acuracies and Properties''' sep_loss_track[ idata, 0] = loss_hinge.detach().cpu().numpy() / config.batch_size sep_loss_track[ idata, 1] = loss_ce.detach().cpu().numpy() / config.batch_size sep_loss_track[ idata, 2] = loss_mse.detach().cpu().numpy() / config.batch_size true_edges = y_edgecat.sum().item() edge_accuracy = (torch.argmax(edge_scores, dim=1) == y_edgecat).sum().item() / (y_edgecat.size()[0]) edge_acc_track[idata] = edge_accuracy edge_acc_conf[idata, :, :] = confusion_matrix( y_edgecat.detach().cpu().numpy(), torch.argmax(edge_scores, dim=1).detach().cpu().numpy()) true_prop = y_properties.detach().cpu().numpy() pred_prop = cluster_props[pred_cluster_match].squeeze().detach( ).cpu().numpy() pred_cluster_properties.append([ (1. / y_properties[:, 0], 1. / y_properties[:, 1], 1. / y_properties[:, 2]), (1. / props_pt), (1. / props_eta), (1. / props_phi) ]) '''Plot test clusters''' if (config.make_test_plots == True and idata % (config.test_samples / 10) == 0): fig = plt.figure(figsize=(8, 8)) if config.output_dim == 3: ax = fig.add_subplot(111, projection='3d') for i in range(centers.size()[0]): ax.scatter( coords[d_gpu.y == i, 0].detach().cpu().numpy(), coords[d_gpu.y == i, 1].detach().cpu().numpy(), coords[d_gpu.y == i, 2].detach().cpu().numpy(), color=color_cycle[(i * config.k) % ( (config.input_classes + config.input_class_delta) * config.k - 1)], marker=marker_hits[i % 6], s=100) ax.scatter( centers[i, 0].detach().cpu().numpy(), centers[i, 1].detach().cpu().numpy(), centers[i, 2].detach().cpu().numpy(), marker=marker_centers[i % 6], color=color_cycle[(i * config.k) % ( (config.input_classes + config.input_class_delta) * config.k - 1)], s=100) elif config.output_dim == 2: for i in range(int(centers.size()[0])): plt.scatter( coords[d_gpu.y == i, 0].detach().cpu().numpy(), coords[d_gpu.y == i, 1].detach().cpu().numpy(), color=color_cycle[(i * config.k) % ( (config.input_classes + config.input_class_delta) * config.k - 1)], marker=marker_hits[i % 6]) plt.scatter( centers[i, 0].detach().cpu().numpy(), centers[i, 1].detach().cpu().numpy(), color=color_cycle[(i * config.k) % ( (config.input_classes + config.input_class_delta) * config.k - 1)], edgecolors='b', marker=marker_centers[i % 6]) plt.title('test_plot_' + '_ex_' + str(idata) + '_EdgeAcc_' + str('{:.5e}'.format(edge_accuracy))) plt.savefig(config.plot_path + 'test_plot_' + '_ex_' + str(idata) + '.pdf') plt.close(fig) '''Plot properties' efficiency''' if (config.make_test_efficiency_plots == True): first_n_tracks = d.y < input_classes_rand[idata] n_clusters = d.y[first_n_tracks].max().item() + 1 track_lengths = [] true_lengths = [] # print(cluster_props.size()) # print(n_clusters) for i in range(n_clusters): mapped_i = pred_cluster_match[i].item() r = d_gpu.x[cluster_map == mapped_i, 0].cpu().detach().numpy() r_true = d_gpu.x[d_gpu.y == i, 0].cpu().detach().numpy() phi = d_gpu.x[cluster_map == mapped_i, 1].cpu().detach().numpy() z = d_gpu.x[cluster_map == mapped_i, 2].cpu().detach().numpy() track_lengths.append(r.shape[0]) true_lengths.append(r_true.shape[0]) if r_true.shape[0] > 1: pt_true.append(1 / y_properties[i, 0].item()) eta_true.append(y_properties[i, 1].item()) phi_true.append(y_properties[i, 2].item()) nhits_true.append(r_true.shape[0]) if r.shape[0] > 1: matched_pt_true.append(1 / y_properties[i, 0].item()) pred_pt.append( 1. / F.softplus(cluster_props[mapped_i, 0]).item()) matched_eta_true.append(y_properties[i, 1].item()) pred_eta.append( 5.0 * (2 * torch.sigmoid(cluster_props[mapped_i, 1]) - 1)) matched_phi_true.append(y_properties[i, 2].item()) pred_phi.append( math.pi * (2 * torch.sigmoid(cluster_props[mapped_i, 2]) - 1)) matched_nhits_true.append(r_true.shape[0]) if (config.make_test_efficiency_plots == True): plot_properties_efficiency(pt_true, matched_pt_true, pred_pt, eta_true, matched_eta_true, pred_eta, phi_true, matched_phi_true, pred_phi, nhits_true, matched_nhits_true) '''track test Updates''' combo_loss_avg.append(avg_loss_track.mean()) sep_loss_avg.append([ sep_loss_track[:, 0].mean(), sep_loss_track[:, 1].mean(), sep_loss_track[:, 2].mean() ]) true_0_1 = edge_acc_conf.sum(axis=2) pred_0_1 = edge_acc_conf.sum(axis=1) total_true_0_1 = true_0_1.sum(axis=0) total_pred_0_1 = pred_0_1.sum(axis=0) '''Test Stats''' print('--------------------') print( "Losses:\nCombined: {:.5e}\nHinge_distance: {:.5e}\nCrossEntr_Edges: {:.5e}\nMSE_centers: {:.5e}" .format(combo_loss_avg[epoch], sep_loss_avg[epoch][0], sep_loss_avg[epoch][1], sep_loss_avg[epoch][2])) print("Track/Class Count Variation per event: {} +/- {} ".format( config.input_classes, config.input_class_delta)) print("[TEST] Average Edge Accuracies over {} events: {:.5e}".format( config.test_samples, edge_acc_track.mean())) print("Total true edges [class_0: {:6d}] [class_1: {:6d}]".format( total_true_0_1[0], total_true_0_1[1])) print("Total pred edges [class_0: {:6d}] [class_1: {:6d}]".format( total_pred_0_1[0], total_pred_0_1[1])) logtofile(config.plot_path, config.logfile_name, '\nTEST:') logtofile( config.plot_path, config.logfile_name, "Losses:\nCombined: {:.5e}\nHinge_distance: {:.5e}\nCrossEntr_Edges: {:.5e}\nMSE_centers: {:.5e}" .format(combo_loss_avg[epoch], sep_loss_avg[epoch][0], sep_loss_avg[epoch][1], sep_loss_avg[epoch][2])) logtofile( config.plot_path, config.logfile_name, "Track/Class Count Variation per event: {} +/- {} ".format( config.input_classes, config.input_class_delta)) logtofile( config.plot_path, config.logfile_name, "Average Edge Accuracies over {} events, {} Tracks: {:.5e}".format( config.test_samples, config.input_classes, edge_acc_track.mean())) logtofile( config.plot_path, config.logfile_name, "Total true edges [class_0: {:6d}] [class_1: {:6d}]".format( total_true_0_1[0], total_true_0_1[1])) logtofile( config.plot_path, config.logfile_name, "Total pred edges [class_0: {:6d}] [class_1: {:6d}]".format( total_pred_0_1[0], total_pred_0_1[1])) logtofile(config.plot_path, config.logfile_name, '--------------------------') t2 = timer() print("Testing Completed in {:.5f}mins.\n".format((t2 - t1) / 60.0)) return combo_loss_avg, sep_loss_avg, edge_acc_track, pred_cluster_properties, edge_acc_conf
def training(data, model, opt, sched, lr_param_gp_1, lr_param_gp_2, lr_param_gp_3, \ lr_threshold_1, lr_threshold_2, converged_embedding, converged_categorizer, start_epoch, best_loss, input_classes_rand=None): model.train() combo_loss_avg = [] sep_loss_avg = [] pred_cluster_properties = [] edge_acc_track = np.zeros(config.train_samples, dtype=np.float) color_cycle = plt.cm.coolwarm( np.linspace(0.1, 0.9, (config.input_classes + config.input_class_delta) * config.k)) marker_hits = ['^', 'v', 's', 'h', '<', '>'] marker_centers = ['+', '1', 'x', '3', '2', '4'] if (input_classes_rand == None): '''Input Class +- Range''' input_classes_rand = torch.randint( low=config.input_classes - config.input_class_delta, high=config.input_classes + config.input_class_delta + 1, size=(config.train_samples, ), device=torch.device('cuda')) print('\n[TRAIN]:') t1 = timer() for epoch in range(start_epoch, start_epoch + config.total_epochs): '''book-keeping''' sep_loss_track = np.zeros((config.train_samples, 3), dtype=np.float) avg_loss_track = np.zeros(config.train_samples, dtype=np.float) edge_acc_track = np.zeros(config.train_samples, dtype=np.float) edge_acc_conf = np.zeros( (config.train_samples, config.ncats_out, config.ncats_out), dtype=np.int) pred_cluster_properties = [] avg_loss = 0 opt.zero_grad() # if opt.param_groups[0]['lr'] < lr_threshold_1 and not converged_embedding: # converged_embedding = True # opt.param_groups[1]['lr'] = lr_threshold_1 # opt.param_groups[2]['lr'] = lr_threshold_2 # if opt.param_groups[1]['lr'] < lr_threshold_1 and not converged_categorizer and converged_embedding: # converged_categorizer = True # opt.param_groups[2]['lr'] = lr_threshold_2 for idata, d in enumerate(data[0:config.train_samples]): d_gpu = d.to('cuda') y_orig = d_gpu.y d_gpu.x = d_gpu.x[d_gpu.y < input_classes_rand[idata]] d_gpu.x = (d_gpu.x - torch.min(d_gpu.x, axis=0).values) / ( torch.max(d_gpu.x, axis=0).values - torch.min(d_gpu.x, axis=0).values) # Normalise d_gpu.y_particle_barcodes = d_gpu.y_particle_barcodes[ d_gpu.y < input_classes_rand[idata]] d_gpu.y = d_gpu.y[d_gpu.y < input_classes_rand[idata]] # plot_event(d_gpu.x.detach().cpu().numpy(), d_gpu.y.detach().cpu().numpy()) ''' project embedding to some nd latent space where it is seperable using the deep model compute edge net scores and seperated cluster properties with the ebedding ''' coords, edge_scores, edges, cluster_map, cluster_props, cluster_batch = model( d_gpu.x) #-------------- LINDSEY TRAINING VERSION ------------------ '''Compute latent space distances''' d_hinge, y_hinge = center_embedding_truth(coords, d_gpu.y, device='cuda') '''Compute centers in latent space ''' centers = scatter_mean(coords, d_gpu.y, dim=0, dim_size=(torch.max(d_gpu.y).item() + 1)) '''Compute Losses''' # Hinge: embedding distance based loss loss_hinge = F.hinge_embedding_loss(torch.where( y_hinge == 1, d_hinge**2, d_hinge), y_hinge, margin=2.0, reduction='mean') #Cross Entropy: Edge categories loss y_edgecat = (d_gpu.y[edges[0]] == d_gpu.y[edges[1]]).long() loss_ce = F.cross_entropy(edge_scores, y_edgecat, reduction='mean') #MSE: Cluster loss pred_cluster_match, y_properties = match_cluster_targets( cluster_map, d_gpu.y, d_gpu) mapped_props = cluster_props[pred_cluster_match].squeeze() props_pt = F.softplus(mapped_props[:, 0]) props_eta = 5.0 * (2 * torch.sigmoid(mapped_props[:, 1]) - 1) props_phi = math.pi * (2 * torch.sigmoid(mapped_props[:, 2]) - 1) loss_mse = ( F.mse_loss(props_pt, y_properties[:, 0], reduction='mean') + F.mse_loss(props_eta, y_properties[:, 1], reduction='mean') + F.mse_loss(props_phi, y_properties[:, 2], reduction='mean')) / model.nprops_out #Combined loss loss = (loss_hinge + loss_ce + loss_mse) / config.batch_size avg_loss_track[idata] = loss.item() avg_loss += loss.item() '''Track Losses, Acuracies and Properties''' sep_loss_track[ idata, 0] = loss_hinge.detach().cpu().numpy() / config.batch_size sep_loss_track[ idata, 1] = loss_ce.detach().cpu().numpy() / config.batch_size sep_loss_track[ idata, 2] = loss_mse.detach().cpu().numpy() / config.batch_size true_edges = y_edgecat.sum().item() edge_accuracy = (torch.argmax(edge_scores, dim=1) == y_edgecat).sum().item() / (y_edgecat.size()[0]) edge_acc_track[idata] = edge_accuracy edge_acc_conf[idata, :, :] = confusion_matrix( y_edgecat.detach().cpu().numpy(), torch.argmax(edge_scores, dim=1).detach().cpu().numpy()) true_prop = y_properties.detach().cpu().numpy() pred_prop = cluster_props[pred_cluster_match].squeeze().detach( ).cpu().numpy() pred_cluster_properties.append([ (1. / y_properties[:, 0], 1. / y_properties[:, 1], 1. / y_properties[:, 2]), (1. / props_pt), (1. / props_eta), (1. / props_phi) ]) '''Plot Training Clusters''' # if (config.make_train_plots==True): if (config.make_train_plots == True and (epoch == 0 or epoch == start_epoch + config.total_epochs - 1) and idata % (config.train_samples / 10) == 0): fig = plt.figure(figsize=(8, 8)) if config.output_dim == 3: ax = fig.add_subplot(111, projection='3d') for i in range(centers.size()[0]): ax.scatter( coords[d_gpu.y == i, 0].detach().cpu().numpy(), coords[d_gpu.y == i, 1].detach().cpu().numpy(), coords[d_gpu.y == i, 2].detach().cpu().numpy(), color=color_cycle[(i * config.k) % ( (config.input_classes + config.input_class_delta) * config.k - 1)], marker=marker_hits[i % 6], s=100) ax.scatter( centers[i, 0].detach().cpu().numpy(), centers[i, 1].detach().cpu().numpy(), centers[i, 2].detach().cpu().numpy(), marker=marker_centers[i % 6], color=color_cycle[(i * config.k) % ( (config.input_classes + config.input_class_delta) * config.k - 1)], s=100) elif config.output_dim == 2: for i in range(int(centers.size()[0])): plt.scatter( coords[d_gpu.y == i, 0].detach().cpu().numpy(), coords[d_gpu.y == i, 1].detach().cpu().numpy(), color=color_cycle[(i * config.k) % ( (config.input_classes + config.input_class_delta) * config.k - 1)], marker=marker_hits[i % 6]) plt.scatter( centers[i, 0].detach().cpu().numpy(), centers[i, 1].detach().cpu().numpy(), color=color_cycle[(i * config.k) % ( (config.input_classes + config.input_class_delta) * config.k - 1)], edgecolors='b', marker=marker_centers[i % 6]) plt.title('train_plot_epoch_' + str(epoch) + '_ex_' + str(idata) + '_EdgeAcc_' + str('{:.5e}'.format(edge_accuracy))) plt.savefig(config.plot_path + 'train_plot_epoch_' + str(epoch) + '_ex_' + str(idata) + '.pdf') plt.close(fig) '''Loss Backward''' loss.backward() '''Update Weights''' if (((idata + 1) % config.batch_size == 0) or ((idata + 1) == config.train_samples)): opt.step() if (config.schedLR): sched.step(avg_loss) '''track Epoch Updates''' combo_loss_avg.append(avg_loss_track.mean()) sep_loss_avg.append([ sep_loss_track[:, 0].mean(), sep_loss_track[:, 1].mean(), sep_loss_track[:, 2].mean() ]) true_0_1 = edge_acc_conf.sum(axis=2) pred_0_1 = edge_acc_conf.sum(axis=1) total_true_0_1 = true_0_1.sum(axis=0) total_pred_0_1 = pred_0_1.sum(axis=0) # print('true_0_1:',true_0_1) # pdb.set_trace() if (epoch % 10 == 0 or epoch == start_epoch or epoch == start_epoch + config.total_epochs - 1): '''Per Epoch Stats''' print('--------------------') print( "Epoch: {}\nLosses:\nCombined: {:.5e}\nHinge_distance: {:.5e}\nCrossEntr_Edges: {:.5e}\nMSE_centers: {:.5e}" .format(epoch, combo_loss_avg[epoch - start_epoch], sep_loss_avg[epoch - start_epoch][0], sep_loss_avg[epoch - start_epoch][1], sep_loss_avg[epoch - start_epoch][2])) print( "LR: opt.param_groups \n[0]: {:.9e} \n[1]: {:.9e} \n[2]: {:.9e}" .format(opt.param_groups[0]['lr'], opt.param_groups[1]['lr'], opt.param_groups[2]['lr'])) print("Track/Class Count Variation per event: {} +/- {} ".format( config.input_classes, config.input_class_delta)) print("[TRAIN] Average Edge Accuracies over {} events: {:.5e}". format(config.train_samples, edge_acc_track.mean())) print("Total true edges [class_0: {:6d}] [class_1: {:6d}]".format( total_true_0_1[0], total_true_0_1[1])) print("Total pred edges [class_0: {:6d}] [class_1: {:6d}]".format( total_pred_0_1[0], total_pred_0_1[1])) if (epoch == start_epoch + config.total_epochs - 1 or epoch == start_epoch): logtofile( config.plot_path, config.logfile_name, "Epoch: {}\nLosses:\nCombined: {:.5e}\nHinge_distance: {:.5e}\nCrossEntr_Edges: {:.5e}\nMSE_centers: {:.5e}" .format(epoch, combo_loss_avg[epoch - start_epoch], sep_loss_avg[epoch - start_epoch][0], sep_loss_avg[epoch - start_epoch][1], sep_loss_avg[epoch - start_epoch][2])) logtofile( config.plot_path, config.logfile_name, "LR: opt.param_groups \n[0]: {:.9e} \n[1]: {:.9e} \n[2]: {:.9e}" .format(opt.param_groups[0]['lr'], opt.param_groups[1]['lr'], opt.param_groups[2]['lr'])) logtofile( config.plot_path, config.logfile_name, "Track/Class Count Variation per event: {} +/- {} ".format( config.input_classes, config.input_class_delta)) logtofile( config.plot_path, config.logfile_name, "Average Edge Accuracies over {} events, {} Tracks: {:.5e}" .format(config.train_samples, config.input_classes, edge_acc_track.mean())) logtofile( config.plot_path, config.logfile_name, "Total true edges [class_0: {:6d}] [class_1: {:6d}]". format(total_true_0_1[0], total_true_0_1[1])) logtofile( config.plot_path, config.logfile_name, "Total pred edges [class_0: {:6d}] [class_1: {:6d}]". format(total_pred_0_1[0], total_pred_0_1[1])) logtofile(config.plot_path, config.logfile_name, '--------------------------') if (combo_loss_avg[epoch - start_epoch] < best_loss): best_loss = combo_loss_avg[epoch - start_epoch] is_best = True checkpoint = { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer': opt.state_dict(), 'scheduler': sched.state_dict(), 'converged_embedding': False, 'converged_categorizer': False, 'best_loss': best_loss, 'input_classes_rand': input_classes_rand } checkpoint_name = 'event' + str( config.train_samples) + '_classes' + str( config.input_classes ) + '_epoch' + str(epoch) + '_loss' + '{:.5e}'.format( combo_loss_avg[epoch - start_epoch] ) + '_edgeAcc' + '{:.5e}'.format(edge_acc_track.mean()) save_checkpoint(checkpoint, is_best, config.checkpoint_path, checkpoint_name) t2 = timer() print('--------------------') print("Training Complted in {:.5f}mins.".format((t2 - t1) / 60.0)) # print('1/properties: ',1/y_properties) # print('pred cluster matches: ',pred_cluster_match) # print('1/cluster_prop[cluster_match]: ',1/cluster_props[pred_cluster_match].squeeze()) return combo_loss_avg, sep_loss_avg, edge_acc_track, pred_cluster_properties, edge_acc_conf
def testing(data, model): model.eval() combo_loss_avg = [] sep_loss_avg = [] pred_cluster_properties = [] edge_acc_track = np.zeros(test_samples, dtype=np.float) color_cycle = plt.cm.coolwarm(np.linspace(0.1,0.9,input_classes*k)) marker_hits = ['^','v','s','h','<','>'] marker_centers = ['+','1','x','3','2','4'] print('\n[TEST]:') t1 = timer() epoch=0 with torch.no_grad(): '''book-keeping''' sep_loss_track = np.zeros((test_samples,3), dtype=np.float) avg_loss_track = np.zeros(test_samples, dtype=np.float) edge_acc_track = np.zeros(test_samples, dtype=np.float) edge_acc_conf = np.zeros((test_samples,ncats_out,ncats_out), dtype=np.int) pred_cluster_properties = [] avg_loss = 0 if make_plots: plt.clf() for idata, d in enumerate(data[train_samples:train_samples+test_samples]): d_gpu = d.to('cuda') y_orig = d_gpu.y d_gpu.x = d_gpu.x[d_gpu.y < input_classes] # just take the first three tracks d_gpu.x = (d_gpu.x - torch.min(d_gpu.x, axis=0).values)/(torch.max(d_gpu.x, axis=0).values - torch.min(d_gpu.x, axis=0).values) # Normalise d_gpu.y_particle_barcodes = d_gpu.y_particle_barcodes[d_gpu.y < input_classes] d_gpu.y = d_gpu.y[d_gpu.y < input_classes] ''' project data to some 2d plane where it is seperable usinfg the deep model compute edge net scores and seperated cluster properties in that latent space ''' coords, edge_scores, edges, cluster_map, cluster_props, cluster_batch = model(d_gpu.x) '''Compute latent space distances''' multi_simple_hinge = simple_embedding_truth(coords, d_gpu.y, device='cuda') # multi_simple_hinge += simple_embedding_truth(coords_interm, d_gpu.y, device='cuda') '''Predict centers in latent space ''' centers = scatter_mean(coords, d_gpu.y, dim=0, dim_size=(torch.max(d_gpu.y).item()+1)) '''LOSSES''' '''Hinge: embedding distance based loss''' hinges = torch.cat([F.hinge_embedding_loss(dis**2, y, margin=1.0, reduction='mean')[None] for dis, y in multi_simple_hinge],dim=0) '''Cross Entropy: Edge categories loss''' y_edgecat = (d_gpu.y[edges[0]] == d_gpu.y[edges[1]]).long() loss_ce = F.cross_entropy(edge_scores, y_edgecat, reduction='mean') '''MSE: Cluster loss''' pred_cluster_match, y_properties = match_cluster_targets(cluster_map, d_gpu.y, d_gpu) loss_mse = F.mse_loss(cluster_props[pred_cluster_match].squeeze(), y_properties, reduction='mean') '''Combined loss''' loss = hinges.mean() + loss_ce + loss_mse avg_loss_track[idata] = loss.item() avg_loss += loss.item() '''Track Losses, Acuracies and Properties''' sep_loss_track[idata,0]= hinges.mean().detach().cpu().numpy() sep_loss_track[idata,1]= loss_ce.detach().cpu().numpy() sep_loss_track[idata,2]= loss_mse.detach().cpu().numpy() true_edges = y_edgecat.sum().item() edge_accuracy = (torch.argmax(edge_scores, dim=1) == y_edgecat).sum().item() / (y_edgecat.size()[0]) edge_acc_track[idata] = edge_accuracy edge_acc_conf[idata,:,:] = confusion_matrix(y_edgecat.detach().cpu().numpy(), torch.argmax(edge_scores, dim=1).detach().cpu().numpy()) true_prop = y_properties.detach().cpu().numpy() pred_prop = cluster_props[pred_cluster_match].squeeze().detach().cpu().numpy() pred_cluster_properties.append([1/true_prop,1/pred_prop]) '''Plot test clusters''' if (make_test_plots==True): fig = plt.figure(figsize=(8,8)) if output_dim==3: ax = fig.add_subplot(111, projection='3d') for i in range(centers.size()[0]): ax.scatter(coords[d_gpu.y == i,0].detach().cpu().numpy(), coords[d_gpu.y == i,1].detach().cpu().numpy(), coords[d_gpu.y == i,2].detach().cpu().numpy(), color=color_cycle[(i*k)%(test_samples*k - 1)], marker = marker_hits[i%6], s=100); ax.scatter(centers[i,0].detach().cpu().numpy(), centers[i,1].detach().cpu().numpy(), centers[i,2].detach().cpu().numpy(), marker=marker_centers[i%6], color=color_cycle[(i*k)%(test_samples*k - 1)], s=100); elif output_dim==2: for i in range(int(centers.size()[0])): plt.scatter(coords[d_gpu.y == i,0].detach().cpu().numpy(), coords[d_gpu.y == i,1].detach().cpu().numpy(), color=color_cycle[(i*k)%(test_samples*k - 1)], marker = marker_hits[i%6] ) plt.scatter(centers[i,0].detach().cpu().numpy(), centers[i,1].detach().cpu().numpy(), color=color_cycle[(i*k)%(test_samples*k - 1)], edgecolors='b', marker=marker_centers[i%6]) plt.title('test_plot_'+'_ex_'+str(idata)+'_EdgeAcc_'+str('{:.5e}'.format(edge_accuracy))) plt.savefig(plot_path+'test_plot_'+'_ex_'+str(idata)+'.pdf') plt.close(fig) '''track test Updates''' combo_loss_avg.append(avg_loss_track.mean()) sep_loss_avg.append([sep_loss_track[:,0].mean(), sep_loss_track[:,1].mean(), sep_loss_track[:,2].mean()]) true_0_1 = edge_acc_conf.sum(axis=2) pred_0_1 = edge_acc_conf.sum(axis=1) total_true_0_1 = true_0_1.sum(axis=0) total_pred_0_1 = pred_0_1.sum(axis=0) '''Test Stats''' print('--------------------') print("Losses:\nCombined: {:.5e}\nHinge_distance: {:.5e}\nCrossEntr_Edges: {:.5e}\nMSE_centers: {:.5e}".format( combo_loss_avg[epoch],sep_loss_avg[epoch][0],sep_loss_avg[epoch][1],sep_loss_avg[epoch][2])) print("[TEST] Average Edge Accuracies over {} events: {:.5e}".format(test_samples,edge_acc_track.mean()) ) print("Total true edges [class_0: {:6d}] [class_1: {:6d}]".format(total_true_0_1[0],total_true_0_1[1])) print("Total pred edges [class_0: {:6d}] [class_1: {:6d}]".format(total_pred_0_1[0],total_pred_0_1[1])) logtofile(plot_path, logfile_name,'\nTEST:') logtofile(plot_path, logfile_name, "Losses:\nCombined: {:.5e}\nHinge_distance: {:.5e}\nCrossEntr_Edges: {:.5e}\nMSE_centers: {:.5e}".format( combo_loss_avg[epoch],sep_loss_avg[epoch][0],sep_loss_avg[epoch][1],sep_loss_avg[epoch][2])) logtofile(plot_path, logfile_name,"Average Edge Accuracies over {} events, {} Tracks: {:.5e}".format(test_samples,input_classes,edge_acc_track.mean()) ) logtofile(plot_path, logfile_name,"Total true edges [class_0: {:6d}] [class_1: {:6d}]".format(total_true_0_1[0],total_true_0_1[1])) logtofile(plot_path, logfile_name,"Total pred edges [class_0: {:6d}] [class_1: {:6d}]".format(total_pred_0_1[0],total_pred_0_1[1])) logtofile(plot_path, logfile_name,'\nProperties:') logtofile(plot_path, logfile_name,str(pred_cluster_properties)) logtofile(plot_path, logfile_name,'--------------------------') t2 = timer() print("Testing Complted in {:.5f}mins.\n".format((t2-t1)/60.0)) return combo_loss_avg, sep_loss_avg, edge_acc_track, pred_cluster_properties, edge_acc_conf
def main(): patchsize = 56 features = 64 out_features = features * 4 image_channels = 3 epochs = 10000 # TODO Big control. # Random guss correct ratio # correct_ratio = 0.5 # while correct_ratio < 0.8: # Keep training judge = Judge(image_channels, out_features).cuda() # judge._initialize_weights() # judge = DataParallel(judge.cuda(), device_ids=gpus) train_logger = Logger("./log_t/train/") # test_logger = Logger("./log/test/") optimizer = optim.Adam(judge.parameters(), lr=0.0001) patch_set = KITTIPatchesDataset() patch_set.load_data("./data/test_patches.npy") # train_loader = DataLoader(patch_set, batch_size=64, num_workers=4, pin_memory=True, drop_last=True) # test_patch_set = KITTIPatchesDataset() # test_patch_set.load_data("./data/test_patches.npy") # test_loader = DataLoader(test_patch_set, batch_size=64, num_workers=4, pin_memory=True, drop_last=True) margin = 1. threshold = 0.3 pairs_d = torch.FloatTensor(4,2,3,56,56) labels_d = torch.FloatTensor(4,1) pairs_d[0], labels_d[0] = patch_set[7] pairs_d[1], labels_d[1] = patch_set[6] pairs_d[2], labels_d[2] = patch_set[866] pairs_d[3], labels_d[3] = patch_set[867] for e in range(epochs): # patch_set.newData() step = e pairs = Variable(pairs_d.cuda(), requires_grad=False) # print("Input pairs shape(should be [Batch size,2,3,56,56])", pairs.shape) labels = Variable(labels_d.cuda(), requires_grad=False) # print("pairs shape", pairs.shape) preds = judge(pairs) loss = F.hinge_embedding_loss(preds, labels) # final_loss = torch.mean(loss) optimizer.zero_grad() loss.backward() optimizer.step() # test_acc = AverageMeter() # test_confuse_rate = AverageMeter() if step % 10 == 0: train_acc = accuracy(preds, labels, margin, threshold) train_confuse_rate = get_confuse_rate(preds) train_logger.log_scalar("accuracy", train_acc, step) train_logger.log_scalar("confuse_rate", train_confuse_rate, step) train_logger.log_histogram("preds", to_np(preds), step) print("Step %d \tloss is %f" % (step, to_np(loss))) print("Preds : ", to_np(preds)) for tag, value in judge.named_parameters(): tag = tag.replace('.', '/') train_logger.log_histogram(tag, to_np(value), step) train_logger.log_histogram(tag+'/grad', to_np(value.grad), step) pos_image_pairs = np.random.choice(np.arange(0, 2, 2), 1, replace=False) for idx in pos_image_pairs: train_logger.log_images("pos", [to_RGB(pairs_d[idx][0]), to_RGB(pairs_d[idx][1])], step) # train_logger.log_images("pos_1", to_RGB(pairs_d[idx][1]), step) train_logger.log_images("neg", [to_RGB(pairs_d[idx+1][0]), to_RGB(pairs_d[idx+1][1])], step) # neg_image_pairs = np.random.choice(np.arange(1, train_loader.batch_size, 2), 2, replace=False) # for idx in neg_image_pairs: # train_logger.log_images("neg", [to_RGB(pairs_d[idx][0]), to_RGB(pairs_d[idx][1])], step) # # train_logger.log_images("neg_1", to_RGB(pairs_d[idx][1]), step) if step % 100 == 0: train_logger.log_scalar("loss", to_np(loss), step) # print("Iteration {} loss: {}".format(step, to_np(loss))) # for p, l in test_loader: # p = Variable(p.cuda(0,), requires_grad=False) # l = Variable(l.cuda(0,), requires_grad=False) # pred = judge(p) # test_acc.update(accuracy(pred, l)) # test_confuse_rate.update(get_confuse_rate(pred)) # test_logger.log_scalar("test_acc", test_acc.avg, step) # test_logger.log_scalar("test_confuse_rate", test_confuse_rate.avg, step) # test_acc.reset() # if i % 500 == 0: # # Log train accuracy # train_acc = accuracy(preds, labels) # # Log test accuracy # test_acc = accuracy() if step % 1000 == 0: torch.save(judge.state_dict(), "./log/check_"+str(step))
def forward(self, input, target): return F.hinge_embedding_loss(input, target, margin=self.margin, reduction=self.reduction)
def hinge_embedding_loss(input, target, *args, **kwargs): return F.hinge_embedding_loss(input.F, target, *args, **kwargs)
def train(model, loader, robust=False, adv_loader=None, lamb=0): """ Train GC_Net Parameters ---------- model: GC_NET instance loader: torch.util.data.DataLoader DataLoader with each data in torch.Data robust: bool Flag for robust training. Defualt: False Returns ------- loss: float Averaged loss on loader. """ model.train() _device = next(model.parameters()).device optimizer = Adam(model.parameters(), lr=0.001) loss_all = 0 loader = loader if not robust else adv_loader for idx, data in enumerate(loader): data = data.to(_device) optimizer.zero_grad() output = model(data) loss = F.cross_entropy(output, data.y) if robust: ''' robust training with greedy attack ''' # _W = model.conv.weight.detach().cpu().numpy() # _U = model.lin.weight.detach().cpu().numpy() # for _ in range(20): # idx = np.random.randint(len(loader.dataset)) # _g_data = loader.dataset[idx] # A, X, y = process_data(_g_data) # deg = A.sum(1) # # local budget # delta_l = np.minimum(np.maximum(deg - np.max(deg) + 2, # 0), data.x.shape[0] - 1).astype(int) # # global budget # delta_g = 4 # fc_vals_greedy = [] # for c in range(model.n_classes): # if c != y: # u = _U[y] - _U[c] # attack = Greedy_Attack(A, X@_W, u.T / data.x.shape[0], delta_l, delta_g, # activation=model.act) # greedy_sol = attack.attack(A) # fc_vals_greedy.append(-greedy_sol['opt_f']) # loss += max(max(fc_vals_greedy) + 1, 0) / 20 # for adv in adv_loader: # adv = adv.to(_device) # output = model(adv) # loss = F.hinge_embedding_loss(output, torch.eye(output.shape[1])[data.y].to(_device), margin=0.5) # loss loss += lamb * F.hinge_embedding_loss( output, torch.eye(output.shape[1])[data.y].to(_device)) # loss = F.multilabel_margin_loss(output.argmax(1), data.y).to(_device) loss.backward() optimizer.step() loss_all += data.num_graphs * loss.item() return loss_all / len(loader.dataset)