def forward(self, anchor, pos, neg, triplet_pos_label, triplet_neg_label): if self.use_sigmoid: anchor, pos, neg = F.sigmoid(anchor), F.sigmoid(pos), F.sigmoid( neg) if self.method == 'cosine': # cosine similarity loss loss_pos = F.cosine_embedding_loss(anchor, pos, triplet_pos_label, margin=self.margin, reduction=self.reduction) loss_neg = F.cosine_embedding_loss(anchor, neg, triplet_neg_label, margin=self.margin, reduction=self.reduction) losses = loss_pos + loss_neg else: # L2 loss dist_pos = (anchor - pos).pow(2).sum(1) dist_neg = (anchor - neg).pow(2).sum(1) losses = self.ratio * F.relu(dist_pos - dist_neg + self.margin) if self.size_average: losses = losses.mean() else: losses = losses.sum() return losses
def forward_lipschitz_loss_hook_fn(self,module,X,y): if not self.model.training or not self.args.lipschitz_regularization or (self.args.level == "layer" and not hasattr(module,'weight')): return module.eval() module.forward_handle.remove() X = X[0] y = module(X) noise = self.args.lipschitz_noise_factor * torch.std(X, dim=0) * torch.randn(X.size(), device=self.device) X = X + noise X = module(X) if self.args.distance_function == "cosine_loss": if self.lipschitz_loss is None: self.lipschitz_loss = F.cosine_embedding_loss(X,y,self.aux_y) else: self.lipschitz_loss += F.cosine_embedding_loss(X,y,self.aux_y) elif self.args.distance_function == "mse": if self.lipschitz_loss is None: self.lipschitz_loss = F.mse_loss(X,y) else: self.lipschitz_loss += F.mse_loss(X,y) elif self.args.distance_function == "nll": if self.lipschitz_loss is None: self.lipschitz_loss = (-F.softmax(y)+F.softmax(X).exp().sum(0).log()).mean() else: self.lipschitz_loss += (-F.softmax(y)+F.softmax(X).exp().sum(0).log()).mean() else: print("lipschitz distance function not implemented") exit() module.forward_handle = module.register_forward_hook(self.forward_lipschitz_loss_hook_fn)
def sim_step(self, stats): """ Train the similarity between mapped src and tgt """ if self.discriminator: self.discriminator.eval() # loss ids = random.sample(self.train_idx, self.params.batch_size) x, y = self.get_sim_xy(ids) ycos = torch.Tensor([1.] * self.params.batch_size) ycos = ycos.cuda() if self.params.cuda else ycos if self.params.sim_loss == "mse": loss = F.cosine_embedding_loss(x, y, Variable(ycos)) elif self.params.sim_loss == "max_margin": loss = self.max_margin(x, y) else: raise Exception('Unknown similarity loss: "%s"' % self.params.sim_loss) loss = self.params.sim_lambda * loss stats['SIM_COSTS'].append(loss.item()) # check NaN if (loss != loss).data.any(): logging.error("NaN detected (fool discriminator)") exit() # optim self.sim_optimizer.zero_grad() loss.backward() self.sim_optimizer.step() return 2 * self.params.batch_size
def forward(self, input, target, reduction="mean"): cosine_loss = F.cosine_embedding_loss(input, F.one_hot( target.long(), num_classes=input.size(-1)), self.y, reduction=reduction) # print(target.size()) # print(input.size()) # cosine_loss = F.cosine_embedding_loss(input, target, self.y, reduction=reduction) cent_loss = F.cross_entropy(F.normalize(input), target.long(), reduce=False) # cosine_loss = F.cosine_embedding_loss(F.normalize(input), F.one_hot(target, num_classes=input.size(-1)), self.y, reduction=reduction) # cent_loss = F.cross_entropy(input, target, reduce=False) pt = torch.exp(-cent_loss) focal_loss = self.alpha * (1 - pt)**self.gamma * cent_loss if reduction == "mean": focal_loss = torch.mean(focal_loss) return cosine_loss + self.xent * focal_loss
class CosineEmbeddingLoss(Module): r"""Creates a criterion that measures the loss given an input tensors x1, x2 and a `Tensor` label `y` with values 1 or -1. This is used for measuring whether two inputs are similar or dissimilar, using the cosine distance, and is typically used for learning nonlinear embeddings or semi-supervised learning. `margin` should be a number from `-1` to `1`, `0` to `0.5` is suggested. If `margin` is missing, the default value is `0`. The loss function for each sample is:: { 1 - cos(x1, x2), if y == 1 loss(x, y) = { { max(0, cos(x1, x2) - margin), if y == -1 If the internal variable `size_average` is equal to ``True``, the loss function averages the loss over the batch samples; if `size_average` is ``False``, then the loss function sums over the batch samples. By default, `size_average = True`. """ def __init__(self, margin=0.5, size_average=False): super(CosineEmbeddingLoss, self).__init__() self.margin = margin self.size_average = size_average def forward(self, (input1, input2), target): return F.cosine_embedding_loss(input1, input2, target, self.margin, self.size_average)
def forward(self, embeddings, target): positive_pairs, negative_pairs = self.pair_selector.get_pairs( embeddings, target) if embeddings.is_cuda: positive_pairs = positive_pairs.cuda() negative_pairs = negative_pairs.cuda() output1 = torch.cat([ embeddings[positive_pairs[:, 0]], embeddings[negative_pairs[:, 0]] ], dim=0).cuda() output2 = torch.cat([ embeddings[positive_pairs[:, 1]], embeddings[negative_pairs[:, 1]] ], dim=0).cuda() label = Variable( torch.cat([ torch.ones(positive_pairs.shape[0]), torch.zeros(negative_pairs.shape[0]) ], dim=0).cuda()) label[label == 0] = -1 return F.cosine_embedding_loss(output1, output2, label, self.margin, self.size_average)
def forward(self, input, target): cosine_loss = F.cosine_embedding_loss(input, F.one_hot( target, num_classes=input.size(-1)), self.y, reduction=self.reduction) cent_loss = F.cross_entropy(F.normalize( input), target, reduction=self.reduction) return cosine_loss + self.xent * cent_loss
def forward(self): a = torch.randn(3, 2) b = torch.rand(3, 2) c = torch.rand(3) log_probs = torch.randn(50, 16, 20).log_softmax(2).detach() targets = torch.randint(1, 20, (16, 30), dtype=torch.long) input_lengths = torch.full((16, ), 50, dtype=torch.long) target_lengths = torch.randint(10, 30, (16, ), dtype=torch.long) return len( F.binary_cross_entropy(torch.sigmoid(a), b), F.binary_cross_entropy_with_logits(torch.sigmoid(a), b), F.poisson_nll_loss(a, b), F.cosine_embedding_loss(a, b, c), F.cross_entropy(a, b), F.ctc_loss(log_probs, targets, input_lengths, target_lengths), # F.gaussian_nll_loss(a, b, torch.ones(5, 1)), # ENTER is not supported in mobile module F.hinge_embedding_loss(a, b), F.kl_div(a, b), F.l1_loss(a, b), F.mse_loss(a, b), F.margin_ranking_loss(c, c, c), F.multilabel_margin_loss(self.x, self.y), F.multilabel_soft_margin_loss(self.x, self.y), F.multi_margin_loss(self.x, torch.tensor([3])), F.nll_loss(a, torch.tensor([1, 0, 1])), F.huber_loss(a, b), F.smooth_l1_loss(a, b), F.soft_margin_loss(a, b), F.triplet_margin_loss(a, b, -b), # F.triplet_margin_with_distance_loss(a, b, -b), # can't take variable number of arguments )
def forward( self, subject_tokens: Dict[str, torch.LongTensor], object_tokens: Dict[str, torch.LongTensor], predicate_tokens: Dict[str, torch.LongTensor] = None ) -> Dict[str, torch.Tensor]: # Embed entities subject_embedding = self._entity_seq2vec( self._entity_embedder(subject_tokens), mask=util.get_text_field_mask(subject_tokens).float()) object_embedding = self._entity_seq2vec( self._entity_embedder(object_tokens), mask=util.get_text_field_mask(object_tokens).float()) # Concatenate the entity embeddings and forward pass entities_cat = torch.cat([subject_embedding, object_embedding], dim=1) out_embedding = self._entity_output_layer(entities_cat) # Calculate the loss and other metrics output_dict = {'embedding': out_embedding} if predicate_tokens: gold_embedding = self.embed_predicate(predicate_tokens) # Compute cosine loss between gold embedding and outputted embedding cosine_loss_label = torch.tensor([1], dtype=out_embedding.dtype, device=out_embedding.device) output_dict["loss"] = F.cosine_embedding_loss( out_embedding, gold_embedding, cosine_loss_label) return output_dict
def forward(self, state_S, state_T, mask=None): ''' This is the loss used in DistilBERT :param state_S: Tensor of shape (batch_size, length, hidden_size) :param state_T: Tensor of shape (batch_size, length, hidden_size) :param mask: Tensor of shape (batch_size, length) ''' if mask is None: state_S = state_S.view(-1, state_S.size(-1)) state_T = state_T.view(-1, state_T.size(-1)) else: mask = mask.to(state_S).unsqueeze(-1).expand_as(state_S).to( torch.uint8) # (bs,len,dim) state_S = torch.masked_select(state_S, mask).view( -1, mask.size(-1)) # (bs * select, dim) state_T = torch.masked_select(state_T, mask).view( -1, mask.size(-1)) # (bs * select, dim) target = state_S.new(state_S.size(0)).fill_(1) loss = F.cosine_embedding_loss(state_S, state_T, target, reduction='mean') return loss
def _run(self, train_loader, test_loader, optimizer, scheduler, epk): for epoch in range(1, epk + 1): self._network.train() lsc_losses = 0. # CE loss spatial_losses = 0. # width + height flat_losses = 0. # embedding correct, total = 0, 0 for i, (_, inputs, targets) in enumerate(train_loader): inputs, targets = inputs.to(self._device), targets.to( self._device) outputs = self._network(inputs) logits = outputs['logits'] features = outputs['features'] fmaps = outputs['fmaps'] # lsc_loss = F.cross_entropy(logits, targets) lsc_loss = nca(logits, targets) spatial_loss = 0. flat_loss = 0. if self._old_network is not None: with torch.no_grad(): old_outputs = self._old_network(inputs) old_features = old_outputs['features'] old_fmaps = old_outputs['fmaps'] flat_loss = F.cosine_embedding_loss( features, old_features.detach(), torch.ones(inputs.shape[0]).to( self._device)) * self.factor * lambda_f_base spatial_loss = pod_spatial_loss( fmaps, old_fmaps) * self.factor * lambda_c_base loss = lsc_loss + flat_loss + spatial_loss optimizer.zero_grad() loss.backward() optimizer.step() # record lsc_losses += lsc_loss.item() spatial_losses += spatial_loss.item( ) if self._cur_task != 0 else spatial_loss flat_losses += flat_loss.item( ) if self._cur_task != 0 else flat_loss # acc _, preds = torch.max(logits, dim=1) correct += preds.eq(targets.expand_as(preds)).cpu().sum() total += len(targets) if scheduler is not None: scheduler.step() train_acc = np.around(tensor2numpy(correct) * 100 / total, decimals=2) test_acc = self._compute_accuracy(self._network, test_loader) info1 = 'Task {}, Epoch {}/{} (LR {:.5f}) => '.format( self._cur_task, epoch, epk, optimizer.param_groups[0]['lr']) info2 = 'LSC_loss {:.2f}, Spatial_loss {:.2f}, Flat_loss {:.2f}, Train_acc {:.2f}, Test_acc {:.2f}'.format( lsc_losses / (i + 1), spatial_losses / (i + 1), flat_losses / (i + 1), train_acc, test_acc) logging.info(info1 + info2)
def loss_fn(self, prediction, target): """ Cosine loss :param prediction: :param target: :return: """ return F.cosine_embedding_loss(*prediction, target)
def compute(self, in_dict, out_dict): """ TODO """ y1 = out_dict[self.output1_name] y2 = out_dict[self.output2_name] labels = in_dict[self.labels_name] return self.weight * F.cosine_embedding_loss(y1, y2, labels)
def test(): weight_alpha = 0. weight_beta = 1. print("Loading pre-trained model: ", k_opt.current_model) dgcnn = DGCNN(8, 17, 1024, 0.5) # dgcnn = torch.nn.DataParallel(dgcnn) dgcnn.load_state_dict(torch.load(k_opt.current_model)) print("Load ", k_opt.current_model, " Success!") dgcnn.cuda() dgcnn.eval() cos_target = torch.tensor(np.ones((k_opt.batch_size, 1))) cos_target = cos_target.type(torch.FloatTensor).cuda() # all_file_list = getTestList(True) data_path_file = k_opt.data_path_file data_path = h5py.File(data_path_file, 'r') data_path = np.array(data_path["data_path"]) print("Respilt Data Success!") test_dataset = MatrixDataset(k_opt, data_path, k_opt.num_neighbors, is_train=False) test_data_loader = test_dataset.getDataloader() val_cos_loss = [] val_value_loss = [] val_loss = [] for i_test, data in enumerate(test_data_loader, 0): inputs, gt_res, gt_norm, center_norm = data inputs = inputs.type(torch.FloatTensor) inputs = inputs.permute(0, 2, 1) gt_res = gt_res.type(torch.FloatTensor) gt_norm = gt_norm.type(torch.FloatTensor) center_norm = center_norm.type(torch.FloatTensor) inputs = inputs.cuda() gt_res = gt_res.cuda() gt_norm = gt_norm.cuda() center_norm = center_norm.cuda() output = dgcnn(inputs) cos_loss = F.cosine_embedding_loss(output, gt_norm, cos_target) value_loss = F.mse_loss(output, gt_norm) loss = weight_alpha * cos_loss + weight_beta * value_loss val_loss.append(loss.data.item()) val_cos_loss.append(cos_loss.data.item()) val_value_loss.append(value_loss.data.item()) print("Val Batch: %d/%d, || cos loss: %.7f, || value loss: %.7f" % \ (i_test + 1, k_opt.num_val_batch, cos_loss.data.item(), value_loss.data.item()))
def validation_step(self, batch, batch_idx): x, y, label = batch["phrase"]["input_ids"], batch["target"], batch[ "label"] y_hat = self.forward(x) loss = F.cosine_embedding_loss(y_hat, y, label) self.log('val_loss', loss) return loss
def forward(self, input1, input2, batch_size): if batch_size == self.batch_size: y = self.y else: y = Variable(torch.ones(batch_size)) if self.use_gpu: y = y.cuda() return F.cosine_embedding_loss(input1, input2, y, self.margin, self.size_average)
def loss_function(x, recon_x, z_e, emb): vq_coef = 0.2 comit_coef = 0.4 ce_loss = F.cosine_embedding_loss(recon_x, x, torch.tensor(0.5)) vq_loss = F.mse_loss(emb, z_e.detach()) commit_loss = F.mse_loss(z_e, emb.detach()) return ce_loss + vq_coef * vq_loss + comit_coef * commit_loss
def train(args, epoch, net, trainLoader, optimizer, trainF, ranks, tboard_writer): net.train() nProcessed = 0 nTrain = len(trainLoader.dataset) ts0 = time.perf_counter() for batch_idx, (img, (caption, lengths), labels, img_indices) in enumerate(trainLoader): ts0_batch = time.perf_counter() labels = labels.float() if args.cuda: img, caption, labels, lengths = img.cuda(), caption.cuda(), labels.cuda(), lengths.cuda() img, caption, labels = Variable(img), Variable(caption), Variable(labels) optimizer.zero_grad() output = net((img, caption, lengths)) rankings, rank_count, similarity, mean_rank, rank_vals = compute_ranking(*output[::-1], labels, img_indices, tboard_writer, ranks) loss = F.cosine_embedding_loss(*output, labels) #pred = output.data.max(1)[1] # get the index of the max log-probability #incorrect = pred.ne(target.data).cpu().sum() err = 0 #100.*incorrect/len(data) del output loss.backward() optimizer.step() nProcessed += len(labels) if rank_count: expected_ranks = [min(100., r / rank_count * 100) for r in ranks] mean_rank_prop = mean_rank / rank_count * 100 else: expected_ranks = [0, ] * len(ranks) mean_rank_prop = 0 partialEpoch = epoch + batch_idx / len(trainLoader) - 1 te = time.perf_counter() s = 'Train Epoch: {:.2f} [{}/{} ({:.0f}%)]\tTime: [{:.2f}s/{:.2f}s]\tLoss: {:.6f}'.format( partialEpoch, nProcessed, nTrain, 100. * batch_idx / len(trainLoader), te - ts0_batch, te - ts0, loss.data[0], err) s_rank = '\t{:.2f}\t'.format(mean_rank) s_rank += '\t'.join((['{:.2f}', ] * len(rankings))).format(*rankings) print(s + '\tRanks: ' + s_rank) _rankings = list(itertools.chain(*zip(rankings, expected_ranks))) trainF.write('{},{},{},{},{},{}\n'.format( partialEpoch, loss.data[0], err, mean_rank, mean_rank_prop, ','.join((['{}', ] * len(_rankings))).format(*_rankings))) trainF.flush() global_step = epoch*(batch_idx+1)*partialEpoch*len(trainLoader) tboard_writer.add_scalar('train/loss', loss.data[0], global_step) for rank, n in zip(rankings, ranks): tboard_writer.add_scalar('train/Percent Accuracy (top {})'.format(n), rank, global_step) tboard_writer.add_scalar('train/Mean rank', mean_rank, global_step) tboard_writer.add_scalar('train/Mean rank percent', mean_rank_prop, global_step)
def forward_homomorphic_loss_hook_fn(self,module,X,y): if not self.model.training or not self.args.homomorphic_regularization: return module.forward_handle.remove() X = X[0] shuffled_idxs = torch.randperm(y.size(0), device=self.device, dtype=torch.long) shuffled_idxs = shuffled_idxs[:y.size(0)-y.size(0) % self.args.homomorphic_k_inputs] mini_batches_idxs = shuffled_idxs.split(y.size(0) // self.args.homomorphic_k_inputs) to_sum_groups = [] to_sum_targets = [] for mbi in mini_batches_idxs: to_sum_groups.append(X[mbi].unsqueeze(0)) to_sum_targets.append(y[mbi].unsqueeze(0)) k_weights = torch.full((1,self.args.homomorphic_k_inputs),1/self.args.homomorphic_k_inputs, device=self.device) # data = (torch.cat(to_sum_groups, dim=0).T*k_weights[:,:self.args.homomorphic_k_inputs]).T.sum(0) data = (torch.cat(to_sum_groups, dim=0).T).T.sum(0) data = module(data) # targets = (torch.cat(to_sum_targets, dim=0).T*k_weights[:,:self.args.homomorphic_k_inputs]).T.sum(0) targets = (torch.cat(to_sum_targets, dim=0).T).T.sum(0) if self.args.distance_function == "cosine_loss": if self.homomorphic_loss is None: self.homomorphic_loss = F.cosine_embedding_loss(data,targets,self.aux_y) else: self.homomorphic_loss.add(F.cosine_embedding_loss(data,targets,self.aux_y)) elif self.args.distance_function == "mse": if self.homomorphic_loss is None: self.homomorphic_loss = F.mse_loss(data,targets) else: self.homomorphic_loss += F.mse_loss(data,targets) elif self.args.distance_function == "nll": if self.homomorphic_loss is None: self.homomorphic_loss = (-F.softmax(targets)+F.softmax(data).exp().sum(0).log()).mean() else: self.homomorphic_loss += (-F.softmax(targets)+F.softmax(data).exp().sum(0).log()).mean() else: print("Homomorphic distance function not implemented") exit() module.forward_handle = module.register_forward_hook(self.forward_homomorphic_loss_hook_fn)
def val(args, epoch, net, valLoader, optimizer, testF, ranks, tboard_writer): net.eval() test_loss = 0 incorrect = 0 rank_values = [0, ] * (2 * len(ranks)) num_ranks = 0 mean_rank_total = 0 rank_vals_all = [] ts0 = time.perf_counter() for batch_idx, (img, (caption, lengths), labels, img_indices) in enumerate(valLoader): labels = labels.float() if args.cuda: img, caption, labels, lengths = img.cuda(), caption.cuda(), labels.cuda(), lengths.cuda() img, caption, labels = Variable(img), Variable(caption), Variable(labels) output = net((img, caption, lengths)) rankings, rank_count, similarity, mean_rank, rank_vals = compute_ranking(*output[::-1], labels, img_indices, tboard_writer, ranks) if rank_count: rank_vals_all.append(rank_vals) mean_rank_total += mean_rank for i, value in enumerate(rankings): rank_values[2 * i] += value rank_values[2 * i + 1] += min(100., ranks[i] / rank_count * 100) num_ranks += 1 test_loss += F.cosine_embedding_loss(*output, labels).data[0] #pred = output.data.max(1)[1] # get the index of the max log-probability #incorrect += pred.ne(target.data).cpu().sum() test_loss = test_loss test_loss /= len(valLoader) # loss function already averages over batch size nTotal = len(valLoader.dataset) err = 100.*incorrect/nTotal s = '\nTest set: Time: {:.2f}s\tAverage loss: {:.4f}\tError: {}/{} ({:.0f}%)'.format( time.perf_counter() - ts0, test_loss, incorrect, nTotal, err) rankings = [r / num_ranks for r in rank_values] s_rank = '\t{:.2f}\t'.format(mean_rank_total / num_ranks) s_rank += '\t'.join((['{:.2f}', ] * len(rankings))).format(*rankings) print(s + '\tRanks: ' + s_rank + '\n') testF.write('{},{},{},{},{}\n'.format( epoch, test_loss, err, mean_rank_total / num_ranks, ','.join((['{}', ] * len(rankings))).format(*rankings))) testF.flush() if tboard_writer is not None: tboard_writer.add_scalar('val/loss', test_loss, epoch) for rank, n in zip(rankings[::2], ranks): tboard_writer.add_scalar('val/Percent Accuracy (top {})'.format(n), rank, epoch) tboard_writer.add_scalar('val/Mean rank', mean_rank_total / num_ranks, epoch) # if rank_vals_all: # tboard_writer.add_histogram("val/rank_vals", torch.cat(rank_vals_all).numpy(), epoch, bins="auto") return err, rank_vals_all
def training_step(self, batch, batch_idx): x, y, label = batch["phrase"]["input_ids"], batch["target"], batch[ "label"] y_hat = self.forward(x) loss = F.cosine_embedding_loss(y_hat, y, label) self.log('train_loss', loss) return {'loss': loss, "emb_loss": loss}
def _cosine_loss(self, runner): if all(n in runner.outputs for n in self.cosine_inputs): cosine_inputs = [runner.outputs[n] for n in self.cosine_inputs] cosine_weight = runner.named_vars[self.cosine_loss_weight_name] cosine_loss = F.cosine_embedding_loss( *cosine_inputs, torch.tensor(1., device=cosine_inputs[0].device)) else: cosine_loss = cosine_weight = 0. return cosine_loss, cosine_weight
def FocalCosineLoss(output, target): reduction = "mean" cosine_loss = F.cosine_embedding_loss(output, F.one_hot(target, num_classes=output.size(-1)), torch.Tensor([1]).cuda(), reduction=reduction) cent_loss = F.cross_entropy(F.normalize(output), target, reduce=False) pt = torch.exp(-cent_loss) focal_loss = 1 * (1 - pt) ** 2 * cent_loss if reduction == "mean": focal_loss = torch.mean(focal_loss) return cosine_loss + 0.1 * focal_loss
def get_mapping_accuracy(mapping, src_loader, tgt_emb, eval_few=False): """ Evaluation on contextual word embedding -> definition translation. """ mapping.eval() torch.set_grad_enabled(False) result = {1: 0, 5: 0, 10: 0} num = 0 eval_loss, cos_dis = 0, 0 for i, (defID, wordID, x, y) in enumerate(src_loader): num += len(defID) defID = defID.to(device) wordID = wordID.to(device) x = x.to(device) y = y.to(device) query = mapping(x, wordID) # Find KNN similarity = query.mm(tgt_emb) # calculate a batch of queries topIDs = similarity.topk(10, dim=1, largest=True)[1] # (BS, 10) defID = defID.unsqueeze(1).expand_as(topIDs) # gold # Calculate P@K for k in [1, 5, 10]: is_match = torch.sum(defID[:, :k] == topIDs[:, :k], 1).cpu().numpy() # (batch,) result[k] += sum(is_match) if not eval_few: eval_loss += F.mse_loss(query, y, reduction='sum') cos_dis += F.cosine_embedding_loss(query, y, torch.ones( y.size(0)).to(device), reduction='sum') elif i == 2: # only evaluate on 3 batches # use when evaluating training data break for k in [1, 5, 10]: result[k] = round(result[k] * 100 / num, 2) if not eval_few: result['eval_loss'] = round((eval_loss / num).item(), 3) result['cos_dist'] = round((cos_dis / num).item(), 3) mapping.train() torch.set_grad_enabled(True) return result
def __call__(self, embeddings, targets, reduce=True): if len(embeddings) != 2: raise ValueError( f"Number of embeddings must be 2. Found {len(embeddings)} embeddings." ) return F.cosine_embedding_loss( embeddings[0], embeddings[1], targets, margin=self.margin, reduction="mean" if reduce else "none", )
def validation_step(self, batch, batch_idx): x, y, label = batch["phrase"], batch["target"], batch["label"] outputs = self.encoder(**x) #sequence_outputs = outputs[2]#.last_hidden_state #sequence_outputs = torch.cat(sequence_outputs, dim = 0) #sequence_outputs = torch.mean(sequence_outputs, 0).unsqueeze(0) #sequence_embedding = torch.mean(sequence_outputs, 1) sequence_embedding = outputs[1] y_hat = self.activation(self.map(sequence_embedding)) loss = F.cosine_embedding_loss(y_hat, y, label) self.log('val_loss', loss) return loss
def _loss_fn(class_outputs, sp_outputs, labels, sp_labels, att_features, corr_features): # import ipdb; ipdb.set_trace() loss_target = torch.ones(att_features.shape[0]).to(device) BCE_loss = F.binary_cross_entropy_with_logits(sp_outputs, sp_labels) CEL_loss = F.cross_entropy(class_outputs, labels) Feature_loss = F.cosine_embedding_loss(att_features, corr_features, loss_target) combo_loss = CEL_loss * alpha + BCE_loss * beta + Feature_loss * gamma * scaler return combo_loss
def forward(self, vec1, vec2, y): assert vec1.size(0) == vec2.size(0) ones = Variable(torch.ones(vec1.size(0), 1)) if USE_CUDA: ones = ones.cuda() # l2_1 = torch.clamp(torch.abs(ones - vec1.norm(p=2, dim=1)), max=1.0) # l2_2 = torch.clamp(torch.abs(ones - vec2.norm(p=2, dim=1)), max=1.0) # l2_1 = l2_1.mean() # l2_2 = l2_2.mean() l2_1 = F.l1_loss(ones, vec1.norm(p=2, dim=1)) l2_2 = F.l1_loss(ones, vec2.norm(p=2, dim=1)) loss = F.cosine_embedding_loss(vec1, vec2, y) return loss + self.alpha * (l2_1 + l2_2)
def forward(self, input, target, reduction="mean"): cosine_loss = F.cosine_embedding_loss(input, target, self.y, reduction=reduction) cent_loss = F.cross_entropy(F.normalize(input), target, reduce=False) pt = torch.exp(-cent_loss) focal_loss = self.alpha * (1 - pt)**self.gamma * cent_loss if reduction == "mean": focal_loss = torch.mean(focal_loss) return cosine_loss + self.xent * focal_loss
def triplet_loss(self,embeddings,labels): """For a given tensor of embeddings and corresponding labels, returns a triplet loss maximizing distance between negative examples and minimizing distance between positive examples Args ---- embeddings : pytorch tensor torch.float32 embeddings to be trained labels : numpy array Class labels of each node, labelsnp[i] = class of node with intid i""" batch_relevant_nodes = [i for i,l in enumerate(labels) if not pd.isna(l)] embeddings = embeddings[batch_relevant_nodes] labels = labels[batch_relevant_nodes] idx1,idx2,target = self.setup_pairwise_loss_tensors(labels) losstarget = th.tensor(target).to(self.device) if self.distance_metric=='cosine': input1 = embeddings[idx1] input2 = embeddings[idx2] loss = F.cosine_embedding_loss(input1, input2, losstarget, margin=0.5) elif self.distance_metric=='l2': idx1_pos = [idx for i,idx in enumerate(idx1) if target[i]==1] idx1_neg = [idx for i,idx in enumerate(idx1) if target[i]==-1] idx2_pos = [idx for i,idx in enumerate(idx2) if target[i]==1] idx2_neg = [idx for i,idx in enumerate(idx2) if target[i]==-1] input1_pos = embeddings[idx1_pos] input2_pos = embeddings[idx2_pos] input1_neg = embeddings[idx1_neg] input2_neg = embeddings[idx2_neg] loss_pos = F.mse_loss(input1_pos,input2_pos) loss_neg = th.mean(th.max(th.zeros(input1_neg.shape[0]).to(self.device),0.25-th.sum(F.mse_loss(input1_neg,input2_neg,reduce=False),dim=1))) loss = loss_pos + loss_neg else: raise ValueError('distance {} is not implemented'.format(self.distance_metric)) return loss