示例#1
0
    def __init__(self, config, model, train_loader, val_loader):
        super(Trainer, self).__init__()

        self.epoch = config.epoch
        self.train_loader = train_loader
        self.val_loader = val_loader

        self.train_batch_size = train_loader.batch_size
        self.train_epoch_step = train_loader.__len__()

        self.check_point = config.check_point if config.check_point < self.train_epoch_step else self.train_epoch_step
        self.save_point = config.save_point if config.save_point < self.train_epoch_step else self.train_epoch_step

        self.get_ce_loss = nn.CrossEntropyLoss()
        self.get_nonreduce_celoss = nn.CrossEntropyLoss(reduction='none')
        self.get_kld_loss = nn.KLDivLoss(reduction='batchmean')
        self.get_l1_loss = nn.L1Loss()
        self.get_smooth_l1_loss = nn.SmoothL1Loss()
        self.get_sim_loss_noreduce = nn.CosineEmbeddingLoss(reduction='none')
        self.get_sim_loss = nn.CosineEmbeddingLoss()
        self.get_sim = nn.CosineSimilarity()

        self.model = model
        self.optim = optim.SGD(self.model.parameters(),
                               lr=config.base_lr,
                               momentum=0.9)
        self.scheduler = lr_scheduler.MultiStepLR(self.optim,
                                                  milestones=[20, 40, 60, 80],
                                                  gamma=0.1)

        self.epoch_num = 0
        self.step = 0
示例#2
0
def compute_cycle_loss(feat1, feat2, paired=True, device='cuda'):
    if paired:
        loss = nn.CosineEmbeddingLoss(0.3)(feat1, feat2,
                                           torch.ones(
                                               feat1.shape[0]).to(device))
    else:
        loss = nn.CosineEmbeddingLoss(0.3)(
            feat1, feat2, -torch.ones(feat1.shape[0]).to(device))
    return loss
 def __init__(self, margin=None, dist="euc"):
     self.margin = margin
     if margin is not None:
         if dist == "euc":
             self.ranking_loss = nn.MarginRankingLoss(margin=margin)
         elif dist == "cos":
             self.ranking_loss = nn.CosineEmbeddingLoss(margin=margin)
     else:
         if dist == "euc":
             self.ranking_loss = self.ranking_loss = nn.SoftMarginLoss()
         elif dist == "cos":
             self.ranking_loss = nn.CosineEmbeddingLoss(margin=0)
示例#4
0
 def __init__(self, d_dim, margin, lamb, dim1, dim2, dim_label, dim_domain, num_epochs, batch_size, model_path,exp_id, use_gpu=True, validation=True):
     #Setup network
     super(ADGTrainer, self).__init__(d_dim, dim1, dim2, dim_label, num_epochs, batch_size, model_path, exp_id,use_gpu,  validation)
     self.lamb = lamb
     self.dim_domain = dim_domain
     if(use_gpu):
         self.D = scDGN(self.d_dim, self.dim1, self.dim2, self.dim_label, self.dim_domain).cuda()
     else:
         self.D = scDGN(self.d_dim, self.dim1, self.dim2, self.dim_label, self.dim_domain)
     self.L_L = nn.CrossEntropyLoss().cuda()
     self.decoder_loss1 = nn.CosineEmbeddingLoss().cuda()
     self.decoder_loss2 = nn.CosineEmbeddingLoss().cuda()
     self.L_D = ContrastiveLoss(margin=margin).cuda()
     self.optimizer = optim.SGD([{'params':self.D.parameters()}], lr=1e-3, momentum=0.9, weight_decay=1e-6, nesterov=True)
示例#5
0
    def __init__(self, config: Dict):
        super().__init__()

        self.config = config
        self.model_config = DistilBertConfig(**self.config["model"])
        self.model = DistilBertModel(self.model_config)
        self.criterion = nn.CosineEmbeddingLoss(margin=0.0, reduction='mean')
示例#6
0
def loss_func(data, decoded):
    cossim_loss = nn.CosineEmbeddingLoss() # Pytorch built-in Cosine similarity for calculating loss 
    y = torch.tensor(np.ones((data.shape[0], 1)), dtype=torch.float)
    mse_loss = nn.MSELoss()
    loss = cossim_loss(data, decoded, y)
            
    return loss
def cosine_embedding_loss(device, probe_embeddings, labels, gallery_loader,
                          train_idx_to_class):
    criterion = nn.CosineEmbeddingLoss()
    y1 = torch.ones(1).to(device)
    y2 = -torch.ones(1).to(device)
    loss1 = 0
    loss2 = []
    flag = False
    for idx, probe_embd in enumerate(probe_embeddings):
        probe_target = train_idx_to_class[labels[idx].item()]
        for gallery_embedding, gallery_target in gallery_loader:
            if probe_target == gallery_target[0]:
                loss1 = criterion(probe_embd.reshape([1, 512]),
                                  gallery_embedding.to(device), y1)
                flag = True
            else:
                if len(loss2) >= 10 and flag == True:
                    break
                elif len(loss2) >= 10 and flag == False:
                    continue
                else:
                    tmp_loss = criterion(probe_embd.reshape([1, 512]),
                                         gallery_embedding.to(device), y2)
                    loss2.append(tmp_loss)

    return loss1 + (sum(loss2) / len(loss2))
示例#8
0
def embedding_expander(source, target, logger):
    source_words = set(source.vocab.keys())
    target_words = set(target.vocab.keys())
    intersection = source_words.intersection(target_words)
    logger.info(f"Intersection words: {len(intersection)}")

    logger.info(f"Creating loader...")
    loader = create_loader(intersection, source, target, "cpu")
    model = VectorTransformer(source.vector_size, target.vector_size)
    model.to("cpu")
    optimizer = optim.Adam(model.parameters())
    loss_fn = nn.CosineEmbeddingLoss()

    logger.info(f"Training Vector Transformer...")
    for i in range(20):
        model.train()
        avg_loss = 0.
        for (x_batch, y_batch) in loader:
            y_pred = model(x_batch)
            dummy = torch.ones((y_batch.size(0), ))
            loss = loss_fn(y_pred, y_batch, dummy)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            avg_loss += loss.item() / len(loader)
        logger.info(f"Epoch {i + 1} avg_loss: {avg_loss:.4f}")
        source_only_words = source_words - intersection

    expanded_embedding = dict()
    for word in source_only_words:
        emb = source.get_vector(word)
        tensor = torch.tensor(emb, dtype=torch.float32).to("cpu")
        pred = model(tensor).detach().numpy()
        expanded_embedding[word] = pred
    return expanded_embedding
示例#9
0
def train(sen, optimizer, train_set, nspeakers, batch_size, alpha=0.5):
    criterion = nn.CosineEmbeddingLoss()
    cuda = torch.cuda.is_available()
    print("CUDA", cuda)

    sampler = ContrastiveBatchSampler(train_set.labels, nspeakers, batch_size)
    loader = DataLoader(train_set,
                        batch_sampler=sampler,
                        num_workers=20,
                        pin_memory=True)

    epoch_loss = 0
    for utterance_batch1, utterance_batch2, label_batch1, label_batch2 in loader:
        optimizer.zero_grad()
        if cuda:
            utterance_batch1, utterance_batch2, label_batch1, label_batch2 = utterance_batch1.cuda(
            ), utterance_batch2.cuda(), label_batch1.cuda(), label_batch2.cuda(
            )
        out1, out2 = sen(utterance_batch1, utterance_batch2)
        pred1, embed1 = out1
        pred2, embed2 = out2
        embed_labels = 2 * (label_batch1 == label_batch2).type(
            torch.cuda.FloatTensor) - 1
        embed_loss = criterion(embed1, embed2, embed_labels)
        closs = classification_loss(pred1, label_batch1) + classification_loss(
            pred2, label_batch2)
        loss = alpha * closs + (1 - alpha) * embed_loss
        epoch_loss += loss.item()
        loss.backward()
        optimizer.step()

    return epoch_loss
示例#10
0
    def __init__(self, n_cla_per_tsk: Union[np.ndarray, List[int]], class_names_to_idx: Dict[str, int], config: Dict):
        super(Model, self).__init__(n_cla_per_tsk, class_names_to_idx, config)

        self.sigma = True
        device = next(self.net.parameters()).device
        self.net.model.output_layer = cosine_linear.CosineLinear(in_features=self.latent_dim,
                                                                 out_features=n_cla_per_tsk[0],
                                                                 sigma=self.sigma).to(device)
        self.reset_optimizer_and_scheduler()
        self.old_net = copy_freeze(self.net)  # type: Union[ResNet, ResNetCIFAR]

        self.batch_size = config["batch_size"]

        self.lambda_base = config["lucir_lambda"]
        self.lambda_cur = self.lambda_base
        self.K = 2
        self.margin_1 = config["lucir_margin_1"]
        self.margin_2 = config["lucir_margin_2"]

        # setup losses
        # self.loss_classification = nn.CrossEntropyLoss(reduction="mean")
        self.loss_classification = nn.BCEWithLogitsLoss(reduction="mean")
        self.loss_distill = nn.CosineEmbeddingLoss(reduction="mean")
        # several losses to allow for the use of different margins
        self.loss_mr_1 = nn.MarginRankingLoss(margin=self.margin_1, reduction="mean")
        self.loss_mr_2 = nn.MarginRankingLoss(margin=self.margin_2, reduction="mean")

        self.method_variables.extend(["lambda_base", "lambda_cur", "K", "margin_1", "margin_2", "sigma"])
示例#11
0
def train_caltech(n_epoch=500, dataset_cls=Caltech10):
    dataset_name = dataset_cls.__name__
    models.caltech.set_out_features(key='softmax',
                                    value=int(dataset_name.lstrip("Caltech")))
    kwta = KWinnersTakeAllSoft(sparsity=0.05)
    model = models.caltech.resnet18(kwta=kwta)
    data_loader = DataLoader(dataset_cls)
    if kwta:
        criterion = ContrastiveLossSampler(nn.CosineEmbeddingLoss(margin=0.5))
        optimizer, scheduler = get_optimizer_scheduler(model)
        kwta_scheduler = KWTAScheduler(model=model,
                                       step_size=10,
                                       gamma_sparsity=0.7,
                                       min_sparsity=0.05,
                                       gamma_hardness=2,
                                       max_hardness=20)
        trainer = TrainerEmbeddingKWTA(model=model,
                                       criterion=criterion,
                                       data_loader=data_loader,
                                       optimizer=optimizer,
                                       scheduler=scheduler,
                                       kwta_scheduler=kwta_scheduler)
    else:
        criterion = nn.CrossEntropyLoss()
        optimizer, scheduler = get_optimizer_scheduler(model)
        trainer = TrainerGrad(model=model,
                              criterion=criterion,
                              data_loader=data_loader,
                              optimizer=optimizer,
                              scheduler=scheduler)
    trainer.train(n_epochs=n_epoch)
示例#12
0
def Cosloss(inputs1, inputs2, targets, size_avarage=False):
    mask = targets != 0
    num_ratings = torch.sum(mask.float())
    criterion = nn.CosineEmbeddingLoss(size_average=size_avarage)
    return criterion(inputs1 * mask.float(),
                     inputs2 * mask.float(), targets), Variable(
                         torch.Tensor([1.0])) if size_avarage else num_ratings
示例#13
0
def align_loss(otmap_gather_list, pred_gather_list, ctbank_gather_list,
               err_ctbank_gather_list, use_structure, use_context,
               structure_max):
    get_entropy = Entropy()
    get_sim_loss = nn.CosineEmbeddingLoss()
    get_smooth_l1_loss = nn.SmoothL1Loss()

    entropy_val = torch.tensor(0.0)
    map_loss = torch.tensor(0.0)
    otmap_len = len(otmap_gather_list)
    if otmap_len > 0 and use_structure:
        otmap_gather_stack = torch.stack(otmap_gather_list)
        otmap_best_label = [torch.eye(structure_max) for x in range(otmap_len)]
        otmap_best_label = torch.stack(otmap_best_label).cuda()
        otmap_best_label = Variable(otmap_best_label)

        entropy_val = get_entropy(otmap_gather_stack)
        map_loss = get_smooth_l1_loss(otmap_gather_stack, otmap_best_label)
        #map_loss = get_ce_loss(otmap_gather_stack.view(

    ct_cor_loss = torch.tensor(0.0)
    ct_err_loss = torch.tensor(0.0)
    if len(pred_gather_list) > 0 and use_context:
        pred_gather_stack = torch.stack(pred_gather_list)
        ctbank_gather_stack = torch.stack(ctbank_gather_list)
        err_ctbank_gather_stack = torch.stack(err_ctbank_gather_list)

        ct_cor_loss = get_sim_loss(pred_gather_stack, ctbank_gather_stack,
                                   torch.ones(len(pred_gather_list), 1).cuda())
        ct_err_loss = get_sim_loss(
            pred_gather_stack, err_ctbank_gather_stack,
            torch.zeros(len(pred_gather_list), 1).cuda())

    return entropy_val.cuda(), map_loss.cuda(), ct_cor_loss.cuda(
    ), ct_err_loss.cuda()
示例#14
0
def main():

    model = im2recipe(inst_emb=opts.inst_emb)
    model.visionMLP = torch.nn.DataParallel(model.visionMLP)
    model.to(device)

    # define loss function (criterion) and optimizer
    # cosine similarity between embeddings -> input1, input2, target
    cosine_crit = nn.CosineEmbeddingLoss(0.1).to(device)
    if opts.semantic_reg:
        weights_class = torch.Tensor(opts.numClasses).fill_(1)
        weights_class[0] = 0  # the background class is set to 0, i.e. ignore
        # CrossEntropyLoss combines LogSoftMax and NLLLoss in one single class
        class_crit = nn.CrossEntropyLoss(weight=weights_class).to(device)
        # we will use two different criteria
        criterion = [cosine_crit, class_crit]
    else:
        criterion = cosine_crit

    print("=> loading checkpoint '{}'".format(opts.model_path))
    if device.type == 'cpu':
        checkpoint = torch.load(opts.model_path,
                                encoding='latin1',
                                map_location='cpu')
    else:
        checkpoint = torch.load(opts.model_path, encoding='latin1')
    opts.start_epoch = checkpoint['epoch']
    model.load_state_dict(checkpoint['state_dict'])
    print("=> loaded checkpoint '{}' (epoch {})".format(
        opts.model_path, checkpoint['epoch']))

    # data preparation, loaders
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    # preparing test loader
    test_loader = torch.utils.data.DataLoader(
        ImagerLoader(
            opts.img_path,
            transforms.Compose([
                transforms.Resize(
                    256
                ),  # rescale the image keeping the original aspect ratio
                transforms.CenterCrop(
                    224),  # we get only the center of that rescaled
                transforms.ToTensor(),
                normalize,
            ]),
            data_path=data_path,
            sem_reg=opts.semantic_reg,
            partition='test',
            n_samples=opts.n_samples),
        batch_size=opts.batch_size,
        shuffle=False,
        num_workers=opts.workers,
        pin_memory=True)
    print('Test loader prepared.')

    # run test
    test(test_loader, model, criterion)
示例#15
0
def main(args):
    net = Net(args.vocab_size, args.hidden_size, args.embed_size)
    data = load_data()

    critic = nn.CosineEmbeddingLoss()
    params = filter(lambda p: p.requires_grad, net.parameters())
    optimizer = Adam(params, lr=args.lr)

    max_words = args.vocab_size

    vocab, _ = make_vocab(data[2], max_words)

    train_loader, test_loader = get_loaders(data, args.batch_size, 0.1, False)
    sent2num_func = make_sent2num(vocab, args.seq_len)

    step = 0
    for e in range(args.max_epochs):
        for i, batch_data in enumerate(train_loader):
            step += 1
            optimizer.zero_grad()
            description_tensor = batch2nums(batch_data["description"],
                                            sent2num_func)
            title_tensor = batch2nums(batch_data["title"], sent2num_func)
            y = Variable(torch.LongTensor(batch_data["related"]))
            context_vec, title_vec, att = net(description_tensor, title_tensor)
            loss = critic(context_vec, title_vec, y)
            loss.backward()
            optimizer.step()

            log_value("loss", loss.data[0], step=step)
示例#16
0
文件: model.py 项目: v1xerunt/COMPOSE
def get_loss(criteria, criteria_mask, ehr, ehr_mask, demo, label,
             query_network, ehr_network, ec_network, device):

    memory = ehr_network(ehr, demo, ehr_mask)  # batch_size, class_num
    criteria_embd = ec_network(criteria, criteria_mask)  #ec_num, mem_dim

    similarity_label = []
    label_mask = []
    for i in range(len(label)):
        if label[i] == 0:
            similarity_label.append(1)
            label_mask.append(1)
        elif label[i] == 1:
            similarity_label.append(-1)
            label_mask.append(1)
        elif label[i] == 2:
            similarity_label.append(1)
            label_mask.append(0)

    similarity_label = torch.tensor(similarity_label,
                                    dtype=torch.long).to(device)
    label_mask = torch.tensor(label_mask, dtype=torch.float32).to(device)

    ce_loss = nn.CrossEntropyLoss()
    sm_loss = nn.CosineEmbeddingLoss(margin=0.3, reduction='none')

    output, response, query, attention = query_network(memory,
                                                       criteria_embd)  #bs, 3
    pred = torch.softmax(output, dim=-1)
    loss = ce_loss(output, label)
    similarity = sm_loss(response, query, similarity_label)
    similarity = similarity * label_mask
    similarity = torch.sum(similarity) / torch.sum(label_mask)

    return loss, similarity, pred, attention, response, query
示例#17
0
    def __init__(self, caption_vec_size, image_vec_size, n_keys, cm_val):
        super().__init__()
        self.save_hyperparameters("caption_vec_size", "image_vec_size",
                                  "n_keys")
        self.cm_val = cm_val

        #         cap_hsize = 2*caption_vec_size

        #         self.cap_translator = nn.Sequential(
        #             nn.Linear(caption_vec_size, cap_hsize),
        #             nn.Dropout(p=.3),
        #             nn.Sigmoid(),
        #             nn.Linear(cap_hsize, image_vec_size),
        #             nn.Dropout(p=.3)
        #         )
        self.cap_translator = lambda x: x

        # self.img_translator =  #nn.Sequential(#nn.LayerNorm(torch.Size([image_vec_size])),
        self.img_translator = KVMapping(
            k_size=image_vec_size,
            n_keys=n_keys,  # similar to kd tree values
            v_size=caption_vec_size,
        )
        # )

        #  self.img_translator = lambda x : x# worked better than training both at the same time
        ## may need to tune them separately
        self.loss = nn.CosineEmbeddingLoss()
示例#18
0
    def __init__(self, config: dict):
        super().__init__(epoch=config.get('epoch', 30))
        self.lr_init = config.get('lr_init', 3e-4)
        self.batch_size = config.get('batch_size', 10)
        data_root = config.get('data_root', 'data_in')
        data_name = config.get('data_name', None)  # must be specified
        self.sample_length = config.get('sample_length', 100)

        self.input_size = (1, self.sample_length)

        model = TSNet(in_channel=1,
                      middle_channel=50,
                      out_channel=10,
                      num_layers=5)
        self.add_model('tsnet', model, input_size=self.input_size)
        self.add_optimizer(
            'adam',
            optim.Adam(model.parameters(), lr=self.lr_init, weight_decay=0.01))
        self.add_criterion('cosine_embedding_loss',
                           nn.CosineEmbeddingLoss(margin=0.3))

        self.dataloader_maker = UCRDataLoaderBuilder(
            data_root,
            data_name,
            self.batch_size,
            sample_length=self.sample_length)
        self.set_dataloaders(self.dataloader_maker)
    def __init__(self, discriminator, generator, utils, embedder):
        super(CycleGAN, self).__init__()
        self.D = discriminator
        self.G = generator
        self.R = copy.deepcopy(generator)
        self.D_opt = torch.optim.Adam(self.D.parameters())
        # self.G_opt = torch.optim.Adam(self.G.parameters())
        self.G_opt = NoamOpt(
            utils.emb_mat.shape[1], 1, 4000,
            torch.optim.Adam(self.G.parameters(),
                             lr=0,
                             betas=(0.9, 0.98),
                             eps=1e-9))
        # self.R_opt = torch.optim.Adam(self.R.parameters())
        self.R_opt = NoamOpt(
            utils.emb_mat.shape[1], 1, 4000,
            torch.optim.Adam(self.R.parameters(),
                             lr=0,
                             betas=(0.9, 0.98),
                             eps=1e-9))
        self.embed = embedder

        self.utils = utils
        self.criterion = nn.CrossEntropyLoss(ignore_index=-1)
        self.mse = nn.MSELoss()
        self.cos = nn.CosineSimilarity(dim=-1)
        self.cosloss = nn.CosineEmbeddingLoss()
        self.r_criterion = LabelSmoothing(size=utils.emb_mat.shape[0],
                                          padding_idx=0,
                                          smoothing=0.0)
        self.r_loss_compute = SimpleLossCompute(self.R.generator,
                                                self.r_criterion, self.R_opt)
示例#20
0
 def __init__(self,
              criterion: str = None,
              temperature: float = 1.,
              metric_key: str = "diff_loss"):
     """
     KL Div loss on output callback.
     Args:
         criterion: criterion for loss on outputs.
         Can be kl, mse or cos.
         temperature: temperature for logits.
         metric_key: key for metric in batch_metrics dict.
     Raises:
         TypeError: if criterion is not correct.
     """
     super().__init__(CallbackOrder.Metric)
     if criterion is None:
         criterion = "kl"
     self.criterion = criterion
     if criterion == "kl":
         self.criterion_fn = nn.KLDivLoss()
         self.temperature = temperature
     elif criterion == "mse":
         self.criterion_fn = nn.MSELoss(reduction="sum")
     elif criterion == "cos":
         self.criterion_fn = nn.CosineEmbeddingLoss(reduction="mean")
     else:
         raise TypeError(
             f"Criterion should be string one of the kl, mse or cos")
     if not (self.temperature == 1. or self.criterion == "kl"):
         Warning("Temperature affects only if criterion is kl")
     self.metric_key = metric_key
示例#21
0
def nomal_loss(pred, targetN, params, depthI, depthJ):
    depthI = depthI.permute(0, 2, 3, 1)
    depthJ = depthJ.permute(0, 2, 3, 1)

    predN_1 = torch.zeros_like(targetN)
    predN_2 = torch.zeros_like(targetN)

    f = params[:, :, :, 0]
    cx = params[:, :, :, 1]
    cy = params[:, :, :, 2]

    z1 = depthJ - pred
    z1 = torch.squeeze(z1)
    depthJ = torch.squeeze(depthJ)
    predN_1[:, :, :, 0] = ((MatJ - cx) * z1 + depthJ) * 1.0 / f
    predN_1[:, :, :, 1] = (MatI - cy) * z1 * 1.0 / f
    predN_1[:, :, :, 2] = z1

    z2 = depthI - pred
    z2 = torch.squeeze(z2)
    depthI = torch.squeeze(depthI)
    predN_2[:, :, :, 0] = (MatJ - cx) * z2 * 1.0 / f
    predN_2[:, :, :, 1] = ((MatI - cy) * z2 + depthI) * 1.0 / f
    predN_2[:, :, :, 2] = z2

    predN = torch.cross(predN_1, predN_2)
    pred_n = F.normalize(predN)
    pred_n = pred_n.contiguous().view(-1, 3)
    target_n = targetN.contiguous().view(-1, 3)

    loss_function = nn.CosineEmbeddingLoss()
    loss = loss_function(
        pred_n, target_n,
        Variable(torch.Tensor(pred_n.size(0)).cuda().fill_(1.0)))
    return loss
示例#22
0
    def train(self, dataloader, training=True, device='cpu'):

        total_loss = 0
        aug_loss_fn = nn.CosineEmbeddingLoss(reduction='sum') 
        for i, (x, lengths) in enumerate(dataloader):
            loss = 0
            aug_loss = 0
            self.optim.zero_grad()
            B, S = x.size()
            state = self.encoder(x, lengths) # (B, 2H)
            inp = th.LongTensor([[self.vocab("<bos>")]]*B).view(B, 1).to(device) # (B, 1)
            inp = self.encoder.embedding(inp).view(B, -1) # (B, E)
            for t in range(S):
                vec , logit, state = self.decoder(inp, state)
                loss += self.loss_fn(logit, x[:,t])
                # Normal seq2seq
                inp = self.encoder.embedding(x[:, t])
                # Training using embedding
                #inp = vec.view(B, -1)
                #aug_loss += aug_loss_fn(vec, self.encoder.embedding(x[:, t]).data, th.ones((B,1)))
            
            #loss += aug_loss
            loss /= lengths.sum().item()
            if training:
                loss.backward()
                self.optim.step()
            
            
            total_loss += loss.item()
            print("\rbatch:{}/{}, loss:{}, total loss:{}".format(i, len(dataloader), loss.item(), total_loss / (i + 1)), end='')
示例#23
0
 def __init__(self,
              predictor: TracePredictor,
              vocab_train: ScreenVocab,
              vocab_test: ScreenVocab,
              dataloader_train,
              dataloader_test,
              l_rate: float,
              neg_samp: int,
              loss_type='cel'):
     """
     predictor: TracePredictor module
     vocab_train: a ScreenVocab from which to find a negative sample for the training data
     vocab_test: a ScreenVocab from which to find a negative sample for the testing data
     dataloader_train, dataloader_test: dataloaders
     l_rate: learning rate for optimizer
     neg_samp: number of negative samples to compare against for training data
     """
     self.predictor = predictor
     self.loss_type = loss_type
     if self.loss_type == 'cel':
         self.loss = nn.CrossEntropyLoss(reduction='sum')
     elif self.loss_type == 'cossim':
         self.loss = nn.CosineEmbeddingLoss(reduction='sum')
     self.optimizer = Adam(self.predictor.parameters(), lr=l_rate)
     self.vocab_train = vocab_train
     self.vocab_test = vocab_test
     self.train_data = dataloader_train
     self.test_data = dataloader_test
     self.neg_sample_num = neg_samp
示例#24
0
    def __init__(self, config):
        '''
        seqlen = 16
        person_num = 150
        rnn_type = 'RNN'
        learning_rate = 0.001
        lr_decay_epoch = 300
        cuda = True
        '''

        self.config = config
        self.config['cuda'] = torch.cuda.is_available() and self.config['cuda']

        self.classify_loss = nn.NLLLoss()
        self.hinge_loss = nn.HingeEmbeddingLoss(self.config['margin'])
        self.cos_loss = nn.CosineEmbeddingLoss(0.1)

        self.model = full_model(self.config)
        if self.config['cuda'] is True:
            self.model.cuda()

        self.optimizer = optim.SGD(self.model.parameters(),
                                   lr=self.config['learning_rate'],
                                   momentum=0.9)
        # self.optimizer = optim.Adam(self.model.parameters(), lr=self.config['learning_rate'])

        self.FloatTensor = torch.cuda.FloatTensor if self.config[
            'cuda'] else torch.Tensor
        self.LongTensor = torch.cuda.LongTensor if self.config[
            'cuda'] else torch.LongTensor
示例#25
0
def cosine_loss(
    s_hidden_states: FloatTensor,
    t_hidden_states: FloatTensor,
    attention_mask: LongTensor = None,
) -> FloatTensor:
    """Cosine loss between hidden states.

    Args:
        s_hidden_states (FloatTensor): student hiddens
        t_hidden_states (FloatTensor): teacher hiddens
        attention_mask (LongTensor, optional): attention mask if you are using transformers.
            Defaults to None.

    Returns:
        FloatTensor: [description]
    """
    if attention_mask is not None:
        # HF transformers case
        return _cosine_loss_hf(
            s_hidden_states=s_hidden_states,
            t_hidden_states=t_hidden_states,
            attention_mask=attention_mask,
        )

    loss_fn = nn.CosineEmbeddingLoss()
    hidden_dim = s_hidden_states.size(-1)
    s_hidden_states = s_hidden_states.reshape(-1, hidden_dim)
    t_hidden_states = t_hidden_states.reshape(-1, hidden_dim)
    assert s_hidden_states.shape == t_hidden_states.shape
    target = torch.ones(t_hidden_states.size(0))
    return loss_fn(s_hidden_states, t_hidden_states, target)
    def __init__(self, args, Y, dicts):
        super(MultiResCNN, self).__init__()

        self.word_rep = WordRep(args, Y, dicts)

        self.conv = nn.ModuleList()
        filter_sizes = args.filter_size.split(',')

        self.filter_num = len(filter_sizes)
        for filter_size in filter_sizes:
            filter_size = int(filter_size)
            one_channel = nn.ModuleList()
            tmp = nn.Conv1d(self.word_rep.feature_size,
                            self.word_rep.feature_size,
                            kernel_size=filter_size,
                            padding=int(floor(filter_size / 2)))
            xavier_uniform(tmp.weight)
            one_channel.add_module('baseconv', tmp)

            conv_dimension = self.word_rep.conv_dict[args.conv_layer]
            for idx in range(args.conv_layer):
                tmp = ResidualBlock(conv_dimension[idx],
                                    conv_dimension[idx + 1], filter_size, 1,
                                    True, args.dropout)
                one_channel.add_module('resconv-{}'.format(idx), tmp)

            self.conv.add_module('channel-{}'.format(filter_size), one_channel)
        self.mapper = Mapper(self.filter_num * args.num_filter_maps, Y)
        self.output_layer = OutputLayer(args, Y, dicts,
                                        self.filter_num * args.num_filter_maps)

        self.sim = nn.CosineEmbeddingLoss()
示例#27
0
    def forward(self, new_outputs, new_targets, old_features, new_features,
                num_classes):
        '''Args:
    outputs: torch.tensor(). Size = [64, num_classes]. Use slicing to separate distillation and classification parts.
    targets: torch.tensor(). Size = [64, num_classes]. Use slicing to separate distillation and classification parts.
    '''

        BATCH_SIZE = 64

        lambda_base = 5  # from paper
        cur_lambda = lambda_base * sqrt(
            num_classes - 10 / num_classes)  # from paper

        #     EPS = 1e-10
        #     sigmoid= nn.Sigmoid()
        #     clf_loss = torch.mean(-new_targets[:, :num_classes-10]*torch.log(sigmoid(outputs[:, num_classes-10:])+EPS)\
        #                         + (1-new_targets[:, num_classes-10:])* torch.pow(sigmoid(outputs[:, num_classes-10:]), 2))

        clf_criterion = nn.BCEWithLogitsLoss()
        clf_loss = clf_criterion(new_outputs, new_targets)

        if num_classes == 10:
            return clf_loss

        dist_criterion = nn.CosineEmbeddingLoss()
        dist_loss = dist_criterion(new_features, old_features,
                                   torch.ones(BATCH_SIZE).cuda())

        dist = (num_classes - 10) / num_classes
        clf = 10 / num_classes

        loss = clf * clf_loss + dist * dist_loss * cur_lambda

        return loss
示例#28
0
 def __init__(self, margin=0.0):
     super(CosineSIM, self).__init__()
     self.criterion = nn.CosineEmbeddingLoss(margin=margin,
                                             size_average=None,
                                             reduce=None,
                                             reduction='sum')
     logging.info('built criterion (cosine)')
示例#29
0
def cosine2by2(representation1, representation2,device = 'cpu'):
    loss_func = nn.CosineEmbeddingLoss()
    x1 = torch.cat([representation1,representation1])
    x2 = torch.cat([representation2,torch.flipud(representation2)])
    y = torch.tensor([1,1,-1,-1],)
    idx = np.random.choice(len(y),len(y),replace = False)
    return loss_func(x1[idx].to(device),x2[idx].to(device),y[idx].to(device))
示例#30
0
文件: model.py 项目: shiv-gpt/CSE_523
    def forward(self, outputs):
        #import math
        """oss = torch.max(F.pairwise_distance(outputs[0][0], outputs[1][0]))
		print("Loss shape = " + str(loss.size(0)))
		print("Loss value = " + str(loss.data))
		
		loss = torch.max(nn.PairwiseDistance(outputs[0][0], outputs[1][0]) -nn.PairwiseDistance(outputs[0][0], outputs[2][0]) + self.margin, 0) \
								+ torch.max(nn.PairwiseDistance(outputs[0][1], outputs[2][1]) -nn.PairwiseDistance(outputs[0][1], outputs[1][1]) + self.margin, 0) \
								+ nn.BCELoss(outputs[0][2], outputs[0][3]) + nn.BCELoss(outputs[1][2], outputs[1][3]) + nn.BCELoss(outputs[2][2], outputs[2][3])
		"""
        loss_func = nn.MSELoss()
        # cos = nn.CosineSimilarity(dim=1, eps=1e-6)
        cos = nn.CosineEmbeddingLoss(margin=0.3)
        # loss = torch.mean(cos(outputs[0][0], outputs[1][0]) - cos(outputs[0][0], outputs[2][0])) + torch.mean(cos(outputs[0][1], outputs[2][1]) - cos(outputs[0][1], outputs[1][1])) + loss_func(outputs[0][2], outputs[0][3]) + loss_func(outputs[1][2], outputs[1][3]) + loss_func(outputs[2][2], outputs[2][3])
        N, L = outputs[0][0].size()
        # print(outputs[0][0].size())
        Z = Variable(torch.zeros(N).cuda())
        # loss = torch.sum(torch.max(cos(outputs[0][0], outputs[1][0]) - cos(outputs[0][0], outputs[2][0]), Z)) + torch.sum(torch.max(cos(outputs[0][1], outputs[2][1]) - cos(outputs[0][1], outputs[1][1]), Z)) + loss_func(outputs[0][2], outputs[0][3]) + loss_func(outputs[1][2], outputs[1][3]) + loss_func(outputs[2][2], outputs[2][3])
        # loss = torch.sum(torch.max(torch.abs(cos(outputs[0][0], outputs[1][0])) - torch.abs(cos(outputs[0][0], outputs[2][0])) + self.margin, Z)) + torch.sum(torch.max(torch.abs(cos(outputs[0][1], outputs[2][1])) - torch.abs(cos(outputs[0][1], outputs[1][1])) + self.margin, Z)) + loss_func(outputs[0][2], outputs[0][3]) + loss_func(outputs[1][2], outputs[1][3]) + loss_func(outputs[2][2], outputs[2][3])
        loss = cos(outputs[0][0], outputs[1][0], Variable(
            torch.ones(N).cuda())) + cos(
                outputs[0][1], outputs[1][1],
                Variable(-1 * torch.ones(N).cuda())) + cos(
                    outputs[0][1], outputs[2][1], Variable(
                        torch.ones(N).cuda())) + cos(
                            outputs[0][0], outputs[2][0],
                            Variable(-1 * torch.ones(N).cuda())) + loss_func(
                                outputs[0][2], outputs[0][3])
        """
		loss = cos(outputs[0][0], outputs[1][0]) - cos(outputs[0][0], outputs[2][0]) + cos(outputs[0][1], outputs[2][1]) - cos(outputs[0][1], outputs[1][1]) + nn.BCELoss(outputs[0][2], outputs[0][3]) + nn.BCELoss(outputs[1][2], outputs[1][3]) + nn.BCELoss(outputs[2][2], outputs[2][3]) """
        return loss