def __init__(self, margin=0.1):
     super(DistWeightContrastiveLoss, self).__init__()
     self.margin = margin
     self.ranking_loss = nn.MarginRankingLoss(margin=self.margin)
Пример #2
0
best_correct_pairs = 0
best_loss = 1000

train_iters = 0
val_iters = 0

# Optimizer (SGD)
lr = 0.0001
momentum = 0.9
weight_decay = 1e-4

variance = 0
variance_step = 0.001

# Loss
criterion = nn.MarginRankingLoss(margin=margin).cuda(gpu)
# Model
model = model.Model_Multiple_Negatives().cuda(gpu)
model = torch.nn.DataParallel(model, device_ids=gpus)

optimizer = torch.optim.SGD(model.parameters(),
                            lr=lr,
                            momentum=momentum,
                            weight_decay=weight_decay)

# Optionally resume from a checkpoint
if resume:
    print("Loading pretrained model")
    print("=> loading checkpoint '{}'".format(resume))
    checkpoint = torch.load(resume,
                            map_location={
Пример #3
0
##############################################
##############################################
##############################################
##############################################
##############################################
##############################################


def save_model(model, name, epoch, folder_name):
    print("Saving Model")
    torch.save(model.state_dict(),
               (folder_name + "trained_{}.pth").format(epoch))
    print("Done saving Model")


gat_loss_func = nn.MarginRankingLoss(margin=0.5)


def GAT_Loss(train_indices, valid_invalid_ratio):
    len_pos_triples = train_indices.shape[0] // (int(valid_invalid_ratio) + 1)

    pos_triples = train_indices[:len_pos_triples]
    neg_triples = train_indices[len_pos_triples:]

    pos_triples = pos_triples.repeat(int(valid_invalid_ratio), 1)

    source_embeds = entity_embed[pos_triples[:, 0]]
    relation_embeds = relation_embed[pos_triples[:, 1]]
    tail_embeds = entity_embed[pos_triples[:, 2]]

    x = source_embeds + relation_embeds - tail_embeds
Пример #4
0
 def __init__(self, margin=0, num_instances=None):
     super(TripletLoss, self).__init__()
     self.margin = margin
     self.ranking_loss = nn.MarginRankingLoss(margin=margin)
Пример #5
0
 def __init__(self, margin=0):
     super(CenterLoss, self).__init__()
     self.margin = margin
     self.ranking_loss_center = nn.MarginRankingLoss(margin=self.margin)
     self.centers = nn.Parameter(torch.randn(767,
                                             2048)).cuda()  # for modelent40
Пример #6
0
 def loss_func(self, p_score, n_score):
     criterion = nn.MarginRankingLoss(self.config.margin, False).cuda()
     y = Variable(torch.Tensor([-1])).cuda()
     loss = criterion(p_score, n_score, y)
     return loss
Пример #7
0
def train_gat(args):
    # Creating the gat model here.
    ####################################

    print("Defining model")

    print(
        "\nModel type -> GAT layer with {} heads used , Initital Embeddings training"
        .format(args.nheads_GAT[0]))
    model_gat = SpKBGATModified(entity_embeddings, relation_embeddings,
                                args.entity_out_dim, args.entity_out_dim,
                                args.drop_GAT, args.alpha, args.nheads_GAT,
                                args.use_simple_layer)
    wandb.watch(model_gat, log="all")

    if CUDA:
        model_gat.cuda()

    optimizer = torch.optim.Adam(model_gat.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay_gat)

    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=500,
                                                gamma=0.5,
                                                last_epoch=-1)
    torch.nn.utils.clip_grad_norm(model_gat.parameters(), 0.1)

    gat_loss_func = nn.MarginRankingLoss(margin=args.margin)

    current_batch_2hop_indices = torch.tensor([])
    if (args.use_2hop):
        current_batch_2hop_indices = Corpus_.get_batch_nhop_neighbors_all(
            args, Corpus_.unique_entities_train, node_neighbors_2hop)
    if args.use_2hop:
        if CUDA:
            current_batch_2hop_indices = Variable(
                torch.LongTensor(current_batch_2hop_indices)).cuda()
        else:
            current_batch_2hop_indices = Variable(
                torch.LongTensor(current_batch_2hop_indices))
    else:
        current_batch_2hop_indices = None

    epoch_losses = []  # losses of all epochs
    print("Number of epochs {}".format(args.epochs_gat))

    for epoch in range(args.epochs_gat):
        print("\nepoch-> ", epoch)
        random.shuffle(Corpus_.train_triples)
        Corpus_.train_indices = np.array(list(Corpus_.train_triples)).astype(
            np.int32)

        model_gat.train()  # getting in training mode
        start_time = time.time()
        epoch_loss = []

        if len(Corpus_.train_indices) % args.batch_size_gat == 0:
            num_iters_per_epoch = len(
                Corpus_.train_indices) // args.batch_size_gat
        else:
            num_iters_per_epoch = (len(Corpus_.train_indices) //
                                   args.batch_size_gat) + 1

        for iters in range(num_iters_per_epoch):
            start_time_iter = time.time()
            train_indices, train_values = Corpus_.get_iteration_batch(iters)

            if CUDA:
                train_indices = Variable(
                    torch.LongTensor(train_indices)).cuda()
                train_values = Variable(torch.FloatTensor(train_values)).cuda()

            else:
                train_indices = Variable(torch.LongTensor(train_indices))
                train_values = Variable(torch.FloatTensor(train_values))

            # forward pass
            entity_embed, relation_embed = model_gat(
                Corpus_, Corpus_.train_adj_matrix, train_indices,
                current_batch_2hop_indices)

            optimizer.zero_grad()

            loss = batch_gat_loss(gat_loss_func, train_indices, entity_embed,
                                  relation_embed)

            loss.backward()
            optimizer.step()

            epoch_loss.append(loss.data.item())

            end_time_iter = time.time()

            print(
                "Iteration-> {0}  , Iteration_time-> {1:.4f} , Iteration_loss {2:.4f}"
                .format(iters, end_time_iter - start_time_iter,
                        loss.data.item()))

        scheduler.step()
        print("Epoch {} , average loss {} , epoch_time {}".format(
            epoch,
            sum(epoch_loss) / len(epoch_loss),
            time.time() - start_time))
        epoch_losses.append(sum(epoch_loss) / len(epoch_loss))
        wandb.log({'epoch_loss': epoch_losses[-1]})
        if (epoch + 1) % 200 == 0 or (epoch + 1) == args.epochs_gat:
            save_model(model_gat, args.data, epoch, args.output_folder,
                       args.use_2hop)
        if (epoch + 1) == args.epochs_gat:
            save_final(model_gat, 'encoder', wandb.run.dir, args.use_2hop)
savefile = '_'.join([
    'Aggregator_model',
    str(batch_size),
    str(num_epochs),
    str(num_features),
    str(num_hidden_units)
])

model = aggregator_model(num_features, num_hidden_units)
model = model.to(device)
model.train()

# Dataset and loader
train_dataset = triplettrainDataset_aggregator(x_train, x_train_names)

criterion = nn.MarginRankingLoss(margin=1.0)
criterion = criterion.to(device)
if (optimi == 'ADAM'):
    optimizer = optim.Adam(model.parameters(), lr=lr)
elif (optimi == 'SGD'):
    optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9)

L_train = len(x_train_names)
total_step = int(L_train / batch_size)
training_loss = []

for epoch in range(num_epochs):
    running_loss = 0.0
    epoch_counter = 0

    I_permutation = np.random.permutation(L_train)
Пример #9
0
 def __init__(self, batch_size, margin=0.3):
     super(OriTripletLoss, self).__init__()
     self.margin = margin
     self.ranking_loss = nn.MarginRankingLoss(margin=margin)
Пример #10
0
    def train(self):
        trainData = LoadTrainData(
            self.entity2id, self.id2entity, self.relation2id, self.id2relation,
            self.train_triples, self.valid_triples, self.test_triples,
            self.headRelation2Tail, self.tailRelation2Head, self.left_entity,
            self.right_entity, self.left_num, self.right_num)

        self.entityTotal, self.relationTotal, self.trainTotal, self.validTotal, self.testTotal = trainData.get_total(
        )

        self.model = TransE(self.entityTotal,
                            self.relationTotal,
                            dim=100,
                            batch_size=self.batch_size)

        self.optimizer = optim.SGD(self.model.parameters(),
                                   lr=self.learning_rate)

        self.criterion = nn.MarginRankingLoss(margin=5.0)
        self.margin = torch.Tensor([self.margin])
        self.margin.requires_grad = False
        if self.use_gpu:
            self.model = self.model.cuda()
            self.margin = self.margin.cuda()

        prob = 500
        index_loader = DataLoader(dataset=TrainDataset(self.trainTotal),
                                  batch_size=self.batch_size,
                                  shuffle=True)
        training_range = tqdm(range(self.train_times))  # 进度条
        for epoch in training_range:  # 一个epoch 花费时间51.15秒
            running_loss = 0.0

            for batch in index_loader:
                # start = time.time()
                # print(len(batch)*26)
                self.data['h'] = [0] * self.batch_size * (1 + self.neg)
                self.data['r'] = [0] * self.batch_size * (1 + self.neg)
                self.data['t'] = [0] * self.batch_size * (1 + self.neg)
                self.data['y'] = [0] * self.batch_size * (1 + self.neg)
                # 获取每个batch数据
                i = 0

                for index in batch:
                    # print("----------")
                    # print(index)
                    # print("----------")
                    # print(type(index))
                    # 收集正样本
                    head = self.train_triples[index][0]
                    rel = self.train_triples[index][1]
                    tail = self.train_triples[index][2]
                    self.data['h'][i] = head
                    self.data['r'][i] = rel
                    self.data['t'][i] = tail
                    self.data['y'][i] = 1
                    # print(self.data['h'][i], self.data['r'][i], self.data['t'][i], self.data['y'][i])
                    last = self.batch_size

                    for neg in range(self.neg):
                        self.data['h'][last + i] = head
                        self.data['r'][last + i] = rel
                        self.data['t'][last + i] = tail
                        self.data['y'][last + i] = -1

                        if self.bern:
                            prob = 1000 * self.left_num[rel] / (
                                self.left_num[rel] + self.right_num[rel])
                        rmd = random.random() * 1000
                        # print("rmd:", rmd, "prob:", prob)
                        if rmd < prob:
                            while True:
                                corrupt_head = random.randint(
                                    0, self.entityTotal - 1)
                                if corrupt_head not in self.left_entity[rel]:
                                    self.data['h'][last + i] = corrupt_head
                                    break
                        else:
                            while True:
                                corrupt_tail = random.randint(
                                    0, self.entityTotal - 1)
                                if corrupt_tail not in self.right_entity[rel]:
                                    self.data['t'][last + i] = corrupt_tail
                                    break
                        # print(self.data['h'][i + last], self.data['r'][i + last], self.data['t'][i + last],
                        #       self.data['y'][i + last])
                        last += self.batch_size
                    # print("---------------------")
                    i += 1

                # 获取完毕batch数据

                # 中间写上代码块

                # print(self.data['h'])
                # print(self.data['r'])
                # print(self.data['t'])
                # print(self.data['y'])
                # 转变成tensor
                for key in self.data:
                    self.data[key] = self.to_var(self.data[key])

                p_score, n_score = self.model(self.data)
                # print(p_score.size())
                # print(n_score.size())
                loss = (torch.max(p_score - n_score,
                                  -self.margin)).mean() + self.margin
                running_loss += loss
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()
                # end = time.time()
                # print('Running time: %s Seconds' % (end - start))
                # 处理之后
            training_range.set_description("Epoch %d | loss: %f" %
                                           (epoch, loss))  # 设置当前阶段的输出

        cur_time = datetime.now().strftime('%Y-%m-%d')
        self.model.save_checkpoint('.', 'model_params' + cur_time + '.pkl')
Пример #11
0
#-----------------------------------------
# Setting up the model:
model.to(device)

if args.model_type in ['bert', 'albert', 'roberta']:
    model_dim = config.hidden_size
elif args.model_type in ['gpt2']:
    model_dim = config.n_embd

mlp = Context_MLP(in_size=model_dim)
mlp = mlp.to(device)
#-----------------------------------------

#-----------------------------------------
# The loss function:
criterion = nn.MarginRankingLoss(margin=args.loss_margin, reduction='none')
#-----------------------------------------

#-----------------------------------------
# Creating the data loaders:
train_dataloader, test_dataloader = create_loaders(args, Ranking_Dataset,
                                                   tokenizer)
#-----------------------------------------

#-----------------------------------------
# Tensorboard writer:
tb_writer = SummaryWriter(
    log_dir=f'{logs_path}/{datetime.now().strftime("%d%m%Y-%H_%M_%S")}/')
#-----------------------------------------

#-----------------------------------------
Пример #12
0
def incremental_train_and_eval_MR_LF_TDE(epochs, tg_model, ref_model, tg_optimizer, tg_lr_scheduler, \
            trainloader, testloader, \
            iteration, start_iteration, \
            lamda, \
            dist, K, lw_mr, causal_embed=None, \
            fix_bn=False, weight_per_class=None, device=None):
    if device is None:
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    mu = 0.9  # causal embed momentum
    if causal_embed is None:
        if iteration > start_iteration:
            causal_embed = torch.FloatTensor(1, ref_model.fc.in_features).zero_().to(device)
        else:
            causal_embed = torch.FloatTensor(1, tg_model.fc.in_features).zero_().to(device)

    if iteration > start_iteration:
        ref_model.eval()
        num_old_classes = ref_model.fc.out_features
        handle_ref_features = ref_model.fc.register_forward_hook(get_ref_features)
        handle_cur_features = tg_model.fc.register_forward_hook(get_cur_features)
        handle_old_scores_bs = tg_model.fc.fc1.register_forward_hook(get_old_scores_before_scale)
        handle_new_scores_bs = tg_model.fc.fc2.register_forward_hook(get_new_scores_before_scale)

    for epoch in range(epochs):
        #train
        tg_model.train()
        if fix_bn:
            for m in tg_model.modules():
                if isinstance(m, nn.BatchNorm2d):
                    m.eval()
                    #m.weight.requires_grad = False
                    #m.bias.requires_grad = False
        train_loss = 0
        train_loss1 = 0
        train_loss2 = 0
        train_loss3 = 0
        correct = 0
        total = 0
        tg_lr_scheduler.step()
        print('\nEpoch: %d, LR: ' % epoch, end='')
        print(tg_lr_scheduler.get_lr())
        for batch_idx, (inputs, targets) in enumerate(tqdm(trainloader)):
            inputs, targets = inputs.to(device), targets.to(device)
            tg_optimizer.zero_grad()
            outputs = tg_model(inputs)
            if iteration == start_iteration:
                loss = nn.CrossEntropyLoss(weight_per_class)(outputs, targets)
            else:
                ref_outputs = ref_model(inputs)
                loss1 = nn.CosineEmbeddingLoss()(cur_features, ref_features.detach(), \
                    torch.ones(inputs.shape[0]).to(device)) * lamda
                # update causal_embed
                with torch.no_grad():
                    cur_features_mean = cur_features.detach().mean(0, keepdim=True)
                    causal_embed = mu * causal_embed + cur_features_mean
                loss2 = nn.CrossEntropyLoss(weight_per_class)(outputs, targets)
                #################################################
                #scores before scale, [-1, 1]
                outputs_bs = torch.cat((old_scores, new_scores), dim=1)

                assert (outputs_bs.size() == outputs.size())
                #get groud truth scores
                gt_index = torch.zeros(outputs_bs.size()).to(device)
                gt_index = gt_index.scatter(1, targets.view(-1, 1), 1).ge(0.5)
                gt_scores = outputs_bs.masked_select(gt_index)
                #get top-K scores on novel classes
                max_novel_scores = outputs_bs[:, num_old_classes:].topk(K, dim=1)[0]
                #the index of hard samples, i.e., samples of old classes
                hard_index = targets.lt(num_old_classes)
                hard_num = torch.nonzero(hard_index).size(0)
                #print("hard examples size: ", hard_num)
                if hard_num > 0:
                    gt_scores = gt_scores[hard_index].view(-1, 1).repeat(1, K)
                    max_novel_scores = max_novel_scores[hard_index]
                    assert (gt_scores.size() == max_novel_scores.size())
                    assert (gt_scores.size(0) == hard_num)
                    #print("hard example gt scores: ", gt_scores.size(), gt_scores)
                    #print("hard example max novel scores: ", max_novel_scores.size(), max_novel_scores)
                    loss3 = nn.MarginRankingLoss(margin=dist)(gt_scores.view(-1, 1), \
                        max_novel_scores.view(-1, 1), torch.ones(hard_num*K).to(device)) * lw_mr
                else:
                    loss3 = torch.zeros(1).to(device)
                #################################################
                loss = loss1 + loss2 + loss3
            loss.backward()
            tg_optimizer.step()

            train_loss += loss.item()
            if iteration > start_iteration:
                train_loss1 += loss1.item()
                train_loss2 += loss2.item()
                train_loss3 += loss3.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

        if iteration == start_iteration:
            print('Train set: {}, Train Loss: {:.4f} Acc: {:.4f}'.format(\
                len(trainloader), train_loss/(batch_idx+1), 100.*correct/total))
        else:
            print('Train set: {}, Train Loss1: {:.4f}, Train Loss2: {:.4f}, Train Loss3: {:.4f},\
                Train Loss: {:.4f} Acc: {:.4f}'                                               .format(len(trainloader), \
                train_loss1/(batch_idx+1), train_loss2/(batch_idx+1), train_loss3/(batch_idx+1),
                train_loss/(batch_idx+1), 100.*correct/total))

        #eval
        tg_model.eval()
        test_loss = 0
        correct = 0
        total = 0
        with torch.no_grad():
            for batch_idx, (inputs, targets) in enumerate(testloader):
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = tg_model(inputs)
                loss = nn.CrossEntropyLoss(weight_per_class)(outputs, targets)

                test_loss += loss.item()
                _, predicted = outputs.max(1)
                total += targets.size(0)
                correct += predicted.eq(targets).sum().item()
        print('Test set: {} Test Loss: {:.4f} Acc: {:.4f}'.format(\
            len(testloader), test_loss/(batch_idx+1), 100.*correct/total))

    if iteration > start_iteration:
        print("Removing register_forward_hook")
        handle_ref_features.remove()
        handle_cur_features.remove()
        handle_old_scores_bs.remove()
        handle_new_scores_bs.remove()
    return tg_model, causal_embed
Пример #13
0
 def loss(self, positive_score, negative_score):
     """graph embedding loss function"""
     target = torch.tensor([-1], dtype=torch.long)
     loss_func = nn.MarginRankingLoss(margin=self.margin, reduction='none')
     return loss_func(positive_score, negative_score, target)
Пример #14
0
 def __init__(self, margin=0, num_instances=0, use_semi=True):
     super(TripletLoss, self).__init__()
     self.margin = margin
     self.ranking_loss = nn.MarginRankingLoss(margin=self.margin)
     self.K = num_instances
     self.use_semi = use_semi
Пример #15
0
 def __init__(self, margin=0, mode='hard'):
     super(MatrixLoss, self).__init__()
     self.margin = margin
     self.ranking_loss = nn.MarginRankingLoss(margin=margin)
     self.mode = mode
Пример #16
0
 def __init__(self, batch_size, margin=0.5):
     super(TripletLoss, self).__init__()
     self.margin = margin
     self.ranking_loss = nn.MarginRankingLoss(margin=margin)
     self.batch_size = batch_size
     self.mask = torch.eye(batch_size)
    in_size, out_size = [x.size()
                         for x in in_params], [x.size() for x in out_params]
    in_sum, out_sum = sum([np.prod(x) for x in in_size
                           ]), sum([np.prod(x) for x in out_size])

    print "IN    : {} params".format(in_sum)
    #print print_params(in_names, in_size)
    print "OUT   : {} params".format(out_sum)
    #print print_params(out_names, out_size)
    print "TOTAL : {} params".format(in_sum + out_sum)

    loss_fn = {
        'xent': nn.CrossEntropyLoss(),
        'mse': nn.MSELoss(),
        'mrl': nn.MarginRankingLoss(),
        'mlml': nn.MultiLabelMarginLoss(),
        'mml': nn.MultiMarginLoss()
    }
    tt = torch
    if not args.cpu:
        loss_fn = {k: v.cuda() for (k, v) in loss_fn.items()}
        tt = torch.cuda

    optimizer = torch.optim.Adam(in_params, lr=args.lr)

    out_data = {'train':{'x':[], 'y':[] }, \
                'valid':{'x':[], 'y':[] }, \
                'bleu':{'x':[], 'y':[] }, \
                'best_valid':{'x':[], 'y':[] } }
Пример #18
0
 def __init__(self, margin=0.3):
     super(TripletLoss, self).__init__()
     self.margin = margin
     # https://pytorch.org/docs/1.2.0/nn.html?highlight=marginrankingloss#torch.nn.MarginRankingLoss
     self.ranking_loss = nn.MarginRankingLoss(margin=margin)
Пример #19
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-task', required=True)
    parser.add_argument('-model', required=True)
    parser.add_argument('-eval_step', type=int, default=10)
    parser.add_argument('-epoch', type=int, default=400)
    parser.add_argument('-d_word_vec', type=int, default=300)
    parser.add_argument('-batch_size', type=int, default=100)
    parser.add_argument('-save_model', default=None)
    parser.add_argument('-save_mode',
                        type=str,
                        choices=['all', 'best'],
                        default='best')
    parser.add_argument('-no_cuda', action='store_true')
    parser.add_argument('-lr', type=float, default=0.001)
    parser.add_argument('-n_bins', type=float, default=21)

    opt = parser.parse_args()
    opt.cuda = not opt.no_cuda
    opt.mu = kernal_mus(opt.n_bins)
    opt.sigma = kernel_sigmas(opt.n_bins)
    print opt

    # ========= Preparing DataLoader =========#
    if opt.task == "wikiqa":
        train_filename = "./data/wikiqa/wiki_train_pair.pkl"
        test_filename = "./data/wikiqa/wiki_test.pkl"
        dev_filename = "./data/wikiqa/wiki_dev.pkl"
        train_data = pickle.load(open(train_filename, 'r'))
        test_data = pickle.load(open(test_filename, 'r'))
        dev_data = pickle.load(open(dev_filename, 'r'))
        weights = np.load("./data/wikiqa/embed.txt")

    elif opt.task == "trecqa-clean":
        train_filename = "./data/trecqa/trec_train_pair.pkl"
        test_filename = "./data/trecqa/trec_test_clean.pkl"
        dev_filename = "./data/trecqa/trec_dev_clean.pkl"
        train_data = pickle.load(open(train_filename, 'r'))
        test_data = pickle.load(open(test_filename, 'r'))
        dev_data = pickle.load(open(dev_filename, 'r'))
        weights = np.load("./data/trecqa/embed.txt")
    elif opt.task == "trecqa-all":
        train_filename = "./data/trecqa/trec_train_pair.pkl"
        test_filename = "./data/trecqa/trec_test_all.pkl"
        dev_filename = "./data/trecqa/trec_dev_all.pkl"
        train_data = pickle.load(open(train_filename, 'r'))
        test_data = pickle.load(open(test_filename, 'r'))
        dev_data = pickle.load(open(dev_filename, 'r'))
        weights = np.load("./data/trecqa/embed.txt")
    else:
        raise ("Not implement!")
    train_data = Dataloader(data=train_data, opt=opt, shuffle=True)
    test_data = DataloaderTest(data=test_data, opt=opt)
    dev_data = DataloaderTest(data=dev_data, opt=opt)
    if opt.model == "knrm":
        model = KNRM.knrm(opt, weights)
    elif opt.model == "cknrm":
        model = CKNRM.knrm(opt, weights)
    else:
        raise ("No such model!")
    crit = nn.MarginRankingLoss(margin=1, size_average=True)

    if opt.cuda:
        model = model.cuda()
        crit = crit.cuda()

    optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr)
    train(model, opt, crit, optimizer, train_data, dev_data, test_data)
Пример #20
0
 def __init__(self, p1, p2, input_var=None, var=["z"], margin=0):
     super().__init__(p1, p2, input_var)
     self.var = var
     self.loss = nn.MarginRankingLoss(margin=margin, reduce=False)
Пример #21
0
def train(device, net, dataloader, val_loader, args, logger, experiment):
    def update(engine, data):
        input_left, input_right, label = data['left_image'], data['right_image'], data['winner']
        input_left, input_right, label = input_left.to(device), input_right.to(device), label.to(device)
        rank_label = label.clone()
        inverse_label = label.clone()
        label[label==-1] = 0
        # zero the parameter gradients
        optimizer.zero_grad()
        rank_label = rank_label.float()

        start = timer()
        output_clf,output_rank_left, output_rank_right = net(input_left,input_right)
        end = timer()
        logger.info(f'FORWARD,{end-start:.4f}')

        #compute clf loss
        start = timer()
        loss_clf = clf_crit(output_clf,label)

        #compute ranking loss
        loss_rank = compute_ranking_loss(output_rank_left, output_rank_right, label, rank_crit)
        loss = loss_clf + loss_rank

        end = timer()
        logger.info(f'LOSS,{end-start:.4f}')

        #compute ranking accuracy
        start = timer()
        rank_acc = compute_ranking_accuracy(output_rank_left, output_rank_right, label)
        end = timer()
        logger.info(f'RANK-ACC,{end-start:.4f}')

        # backward step
        start = timer()
        loss.backward()
        optimizer.step()
        end = timer()
        logger.info(f'BACKWARD,{end-start:.4f}')

        #swapped forward
        start = timer()
        inverse_label*=-1 #swap label
        inverse_rank_label = inverse_label.clone()
        inverse_rank_label = inverse_rank_label.float()
        inverse_label[inverse_label==-1] = 0
        end = timer()
        logger.info(f'SWAPPED-SETUP,{end-start:.4f}')
        start = timer()
        outputs, output_rank_left, output_rank_right = net(input_right,input_left) #pass swapped input
        end = timer()
        logger.info(f'SWAPPED-FORWARD,{end-start:.4f}')
        start = timer()
        inverse_loss_clf = clf_crit(outputs, inverse_label)
        #compute ranking loss
        inverse_loss_rank = compute_ranking_loss(output_rank_left, output_rank_right, label, rank_crit)
        #swapped backward
        inverse_loss = inverse_loss_clf + inverse_loss_rank
        end = timer()
        logger.info(f'SWAPPED-LOSS,{end-start:.4f}')
        start = timer()
        inverse_loss.backward()
        optimizer.step()
        end = timer()
        logger.info(f'SWAPPED-BACKWARD,{end-start:.4f}')

        return  { 'loss':loss.item(),
                'loss_clf':loss_clf.item(),
                'loss_rank':loss_rank.item(),
                'y':label,
                'y_pred': output_clf,
                'rank_acc': rank_acc
                }

    def inference(engine,data):
        with torch.no_grad():
            start = timer()
            input_left, input_right, label = data['left_image'], data['right_image'], data['winner']
            input_left, input_right, label = input_left.to(device), input_right.to(device), label.to(device)
            rank_label = label.clone()
            label[label==-1] = 0
            rank_label = rank_label.float()
            # forward
            output_clf,output_rank_left, output_rank_right = net(input_left,input_right)
            loss_clf = clf_crit(output_clf,label)
            loss_rank = compute_ranking_loss(output_rank_left, output_rank_right, label, rank_crit)
            rank_acc = compute_ranking_accuracy(output_rank_left, output_rank_right, label)
            loss = loss_clf + loss_rank
            end = timer()
            logger.info(f'INFERENCE,{end-start:.4f}')
            return  { 'loss':loss.item(),
                'loss_clf':loss_clf.item(),
                'loss_rank':loss_rank.item(),
                'y':label,
                'y_pred': output_clf,
                'rank_acc': rank_acc
                }
    net = net.to(device)

    clf_crit = nn.NLLLoss()
    rank_crit = nn.MarginRankingLoss(reduction='mean', margin=1)
    optimizer = optim.SGD(net.parameters(), lr=args.lr, weight_decay=args.wd, momentum=0.9)
    lamb = Variable(torch.FloatTensor([1]),requires_grad = False).cuda()[0]

    trainer = Engine(update)
    evaluator = Engine(inference)

    writer = SummaryWriter()
    RunningAverage(output_transform=lambda x: x['loss']).attach(trainer, 'loss')
    RunningAverage(output_transform=lambda x: x['loss_clf']).attach(trainer, 'loss_clf')
    RunningAverage(output_transform=lambda x: x['loss_rank']).attach(trainer, 'loss_rank')
    RunningAverage(output_transform=lambda x: x['rank_acc']).attach(trainer, 'rank_acc')
    RunningAverage(Accuracy(output_transform=lambda x: (x['y_pred'],x['y']))).attach(trainer,'avg_acc')

    RunningAverage(output_transform=lambda x: x['loss']).attach(evaluator, 'loss')
    RunningAverage(output_transform=lambda x: x['loss_clf']).attach(evaluator, 'loss_clf')
    RunningAverage(output_transform=lambda x: x['loss_rank']).attach(evaluator, 'loss_rank')
    RunningAverage(output_transform=lambda x: x['rank_acc']).attach(evaluator, 'rank_acc')
    RunningAverage(Accuracy(output_transform=lambda x: (x['y_pred'],x['y']))).attach(evaluator,'avg_acc')

    if args.pbar:
        pbar = ProgressBar(persist=False)
        pbar.attach(trainer,['loss','avg_acc', 'rank_acc'])

        pbar = ProgressBar(persist=False)
        pbar.attach(evaluator,['loss','loss_clf', 'loss_rank','avg_acc'])

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_validation_results(trainer):
        net.eval()
        evaluator.run(val_loader)
        trainer.state.metrics['val_acc'] = evaluator.state.metrics['rank_acc']
        net.train()

        tb_log(
            {
                "accuracy":{
                    'accuracy':trainer.state.metrics['avg_acc'],
                    'rank_accuracy':trainer.state.metrics['rank_acc']
                },
                "loss": {
                    'total':trainer.state.metrics['loss'],
                    'clf':trainer.state.metrics['loss_clf'],
                    'rank':trainer.state.metrics['loss_rank']
                }
            },
            {
                "accuracy":{
                    'accuracy':evaluator.state.metrics['avg_acc'],
                    'rank_accuracy':evaluator.state.metrics['rank_acc']
                },
                "loss": {
                    'total':evaluator.state.metrics['loss'],
                    'clf':evaluator.state.metrics['loss_clf'],
                    'rank':evaluator.state.metrics['loss_rank']
                }
            },
            writer,
            args.attribute,
            trainer.state.epoch
        )

    handler = ModelCheckpoint(args.model_dir, '{}_{}_{}'.format(args.model, args.premodel, args.attribute),
                                n_saved=1,
                                create_dir=True,
                                save_as_state_dict=True,
                                require_empty=False,
                                score_function=lambda engine: engine.state.metrics['val_acc'])
    trainer.add_event_handler(Events.EPOCH_COMPLETED, handler, {
                'model': net
                })

    if (args.resume):
        def start_epoch(engine):
            engine.state.epoch = args.epoch
        trainer.add_event_handler(Events.STARTED, start_epoch)
        evaluator.add_event_handler(Events.STARTED, start_epoch)

    trainer.run(dataloader,max_epochs=args.max_epochs)
Пример #22
0
 def __init__(self, p1, p2, input_var=None, margin=0.5):
     super().__init__(p1, p2, input_var)
     self.loss = nn.MarginRankingLoss(margin=margin)
Пример #23
0
 def __init__(self, margin=0.3, mutual_flag=False):
     super(TripletLoss, self).__init__()
     self.margin = margin
     self.ranking_loss = nn.MarginRankingLoss(margin=margin)
     self.mutual = mutual_flag
Пример #24
0
from __future__ import absolute_import
Пример #25
0
    ['softsign', nn.Softsign()],
    ['softmin', nn.Softmin()],
    ['tanhshrink', nn.Tanhshrink()],
    ['rrelu', nn.RReLU()],
    ['glu', nn.GLU()],
])

loss = nn.ModuleDict(
    [['l1', nn.L1Loss()], ['nll', nn.NLLLoss()], ['kldiv',
                                                  nn.KLDivLoss()],
     ['mse', nn.MSELoss()], ['bce', nn.BCELoss()],
     ['bce_with_logits', nn.BCEWithLogitsLoss()],
     ['cosine_embedding', nn.CosineEmbeddingLoss()], ['ctc',
                                                      nn.CTCLoss()],
     ['hinge_embedding', nn.HingeEmbeddingLoss()],
     ['margin_ranking', nn.MarginRankingLoss()],
     ['multi_label_margin', nn.MultiLabelMarginLoss()],
     ['multi_label_soft_margin',
      nn.MultiLabelSoftMarginLoss()], ['multi_margin',
                                       nn.MultiMarginLoss()],
     ['smooth_l1', nn.SmoothL1Loss()], ['soft_margin',
                                        nn.SoftMarginLoss()],
     ['cross_entropy', nn.CrossEntropyLoss()],
     ['triplet_margin', nn.TripletMarginLoss()],
     ['poisson_nll', nn.PoissonNLLLoss()]])

optimizer = dict({
    'adadelta': optim.Adadelta,
    'adagrad': optim.Adagrad,
    'adam': optim.Adam,
    'sparse_adam': optim.SparseAdam,
Пример #26
0
 def __init__(self, margin=0.3, lamb=10.0, same_margin=1.0):
     super(weightedContrastiveLoss, self).__init__()
     self.margin = margin
     #self.margin_pos = same_margin  # Modified by Sun 2019.1.21
     self.lamb = lamb
     self.ranking_loss = nn.MarginRankingLoss(margin=margin)
Пример #27
0
 def __init__(self, margin=None):
     self.margin = margin
     if margin is not None:
         self.ranking_loss = nn.MarginRankingLoss(margin=margin)
     else:
         self.ranking_loss = nn.SoftMarginLoss()
Пример #28
0
    def forward(self, h_batch, r_batch, t_batch, h_neg_batch, r_neg_batch,
                t_neg_batch):
        '''
        :param h_batch: variable containing tensor of head entity
        :param r_batch: variable containing tensor of relation
        :param t_batch: variable containing tensor of tail entity
        :return:
        '''
        embed_h_batch = self.embed_entity(h_batch)  # size_batch * dim
        embed_r_batch = self.embed_relation(r_batch)  # size_batch * dim
        embed_t_batch = self.embed_entity(t_batch)  # size_batch * dim
        embed_h_neg_batch = self.embed_entity(h_neg_batch)  # size_batch * dim
        embed_r_neg_batch = self.embed_entity(r_neg_batch)  # size_batch * dim
        embed_t_neg_batch = self.embed_entity(t_neg_batch)  # size_batch * dim

        # get neighbor context
        neighbor_context = torch.LongTensor(
            get_neighbor_context_batch(h_batch)).type(dtype_LongTensor_cuda)
        embed_neighbor_context_r = self.embed_relation(
            Variable(neighbor_context[:, :, 0])
        )  # size_batch * size_neighbor_context * dim
        embed_neighbor_context_t = self.embed_entity(
            Variable(neighbor_context[:, :, 1])
        )  # size_batch * size_neighbor_context * dim
        neighbor_tmp = -(embed_neighbor_context_r - embed_neighbor_context_t
                         )  # size_batch * size_neighbor_context * dim
        a = torch.norm(-neighbor_tmp + embed_r_batch[:, np.newaxis, :] -
                       embed_t_batch[:, np.newaxis, :],
                       p=self.args.p,
                       dim=2,
                       keepdim=False)  # size_batch * size_neighbor_context
        alpha = F.softmin(a, dim=1)  # size_batch * size_neighbor_context
        # g_n_pos = - torch.sum(alpha * torch.norm(embed_h_batch[:,np.newaxis,:] + neighbor_tmp, p=self.args.p, dim=2, keepdim=False)) # size_batch
        # g_n_neg = - torch.sum(alpha * torch.norm(embed_h_neg_batch[:,np.newaxis,:] + neighbor_tmp, p=self.args.p, dim=2, keepdim=False)) # size_batch
        g_n_pos = -torch.norm(torch.sum(alpha[:, :, np.newaxis] * neighbor_tmp,
                                        dim=1) - embed_h_batch,
                              p=self.args.p,
                              dim=1,
                              keepdim=False)  # size_batch
        g_n_neg = -torch.norm(torch.sum(alpha[:, :, np.newaxis] * neighbor_tmp,
                                        dim=1) - embed_h_neg_batch,
                              p=self.args.p,
                              dim=1,
                              keepdim=False)  # size_batch
        # get path context
        path_context = torch.LongTensor(
            get_path_context_batch(h_batch,
                                   t_batch)).type(dtype_LongTensor_cuda)
        rel_sign = torch.sign(path_context.type(dtype_FloatTensor_cuda)
                              )  # size_batch * size_path_context * length_path
        embed_path_list = []
        for i in range(len(path_context)
                       ):  # because the indices of embedding should be <= 2
            embed_path_list.append(
                self.embed_relation(Variable(torch.abs(path_context[i]))))
        embed_path_context = torch.cat(
            [torch.unsqueeze(embed, 0) for embed in embed_path_list],
            0)  # size_batch * size_path_context * length_path * dim
        embed_path = torch.sum(
            Variable(rel_sign[:, :, :, np.newaxis], requires_grad=True) *
            embed_path_context,
            dim=2,
            keepdim=False)  # size_batch * size_path_context * dim
        b = torch.norm(embed_h_batch[:, np.newaxis, :] + embed_path -
                       embed_t_batch[:, np.newaxis, :],
                       p=self.args.p,
                       dim=2,
                       keepdim=False)  # size_batch * size_path_context
        beta = F.softmin(b, dim=1)  # size_batch * size_path_context
        g_p_pos = -torch.norm(torch.sum(beta[:, :, np.newaxis] * embed_path,
                                        dim=1) - embed_r_batch,
                              p=self.args.p,
                              dim=1,
                              keepdim=False)
        g_p_neg = -torch.norm(torch.sum(beta[:, :, np.newaxis] * embed_path,
                                        dim=1) - embed_r_neg_batch,
                              p=self.args.p,
                              dim=1,
                              keepdim=False)

        # g_t
        g_t_pos = -torch.norm(embed_h_batch + embed_r_batch - embed_t_batch,
                              p=self.args.p,
                              dim=1,
                              keepdim=False)
        g_t_neg = -torch.norm(
            embed_h_neg_batch + embed_r_neg_batch - embed_t_neg_batch,
            p=self.args.p,
            dim=1,
            keepdim=False)

        # loss_g_n_pos = - torch.sum(F.logsigmoid(g_n_pos))
        # loss_g_n_neg = - torch.sum(F.logsigmoid(- g_n_neg))
        # loss_g_p_pos = - torch.sum(F.logsigmoid(g_p_pos))
        # loss_g_p_neg = - torch.sum(F.logsigmoid(- g_p_neg))
        # loss_g_t_pos = - torch.sum(F.logsigmoid(g_t_pos))
        # loss_g_t_neg = - torch.sum(F.logsigmoid(- g_t_neg))
        # loss = loss_g_n_pos + loss_g_n_neg + loss_g_p_pos + loss_g_p_neg + loss_g_t_pos + loss_g_t_neg

        loss_function = nn.MarginRankingLoss(margin=1, size_average=False)
        target = Variable(torch.FloatTensor([1] * len(h_batch)),
                          requires_grad=False).type(dtype_FloatTensor_cuda)
        # loss = loss_function(F.sigmoid(g_n_pos) + F.sigmoid(g_p_pos) + F.sigmoid(g_t_pos), F.sigmoid(g_n_neg) + F.sigmoid(g_p_neg) + F.sigmoid(g_t_neg), target.type(dtype_FloatTensor_cuda))
        loss = loss_function(g_n_pos, g_n_neg, target) + loss_function(
            g_p_pos, g_p_neg, target) + loss_function(g_t_pos, g_t_neg, target)
        return loss
Пример #29
0
 def __init__(self, margin=0.3):
     super(TripletLoss_out, self).__init__()
     self.margin = margin
     self.ranking_loss = nn.MarginRankingLoss(margin=self.margin)
Пример #30
0
    loss_1 = target[idx,
                    idx] * (torch.log(target[idx, idx]) - inputs[idx, idx])

    print("第一个元素loss:", loss_1)

# ---------------------------------------------- 10 Margin Ranking Loss --------------------------------------------
flag = 0
# flag = 1
if flag:

    x1 = torch.tensor([[1], [2], [3]], dtype=torch.float)
    x2 = torch.tensor([[2], [2], [2]], dtype=torch.float)

    target = torch.tensor([1, 1, -1], dtype=torch.float)

    loss_f_none = nn.MarginRankingLoss(margin=0, reduction='none')

    loss = loss_f_none(x1, x2, target)

    print(loss)

# ---------------------------------------------- 11 Multi Label Margin Loss -----------------------------------------
flag = 0
# flag = 1
if flag:

    x = torch.tensor([[0.1, 0.2, 0.4, 0.8]])
    y = torch.tensor([[0, 3, -1, -1]], dtype=torch.long)

    loss_f = nn.MultiLabelMarginLoss(reduction='none')