def get_losses_for_batch(self, batch):
        indices, inputs1, inputs2, _ = batch
        outputs1 = self.forward(inputs1)

        with torch.no_grad():
            self._momentum_update_key_encoder()
            if self.use_ddp or self.use_ddp2:
                inputs2, idx_unshuffle = self._batch_shuffle_ddp(inputs2)
            outputs2 = self.model_k(inputs2)
            if self.use_ddp or self.use_ddp2:
                outputs2 = self._batch_unshuffle_ddp(outputs2, idx_unshuffle)

        loss_fn = MoCo(outputs1,
                       outputs2,
                       self.moco_queue.clone().detach(),
                       t=self.config.loss_params.t)
        loss = loss_fn.get_loss()

        with torch.no_grad():
            outputs2 = l2_normalize(outputs2, dim=1)
            self._dequeue_and_enqueue(outputs2)

            outputs1 = l2_normalize(outputs1, dim=1)
            self.memory_bank.update(indices, outputs1)

        return loss
 def __init__(self, outputs1, outputs2, queue, t=0.07):
     super().__init__()
     self.outputs1 = l2_normalize(outputs1, dim=1)
     self.outputs2 = l2_normalize(outputs2, dim=1)
     self.queue = queue.detach()
     self.t = t
     self.k = queue.size(0)
     self.device = self.outputs1.device
    def get_losses_for_batch(self, batch):
        indices, inputs1, inputs2, _ = batch
        outputs1 = self.forward(inputs1)
        outputs2 = self.forward(inputs2)
        loss_fn = SimCLR(outputs1, outputs2, t=self.config.loss_params.t)
        loss = loss_fn.get_loss()

        with torch.no_grad():  # for nearest neighbor
            new_data_memory = (l2_normalize(outputs1, dim=1) +
                               l2_normalize(outputs2, dim=1)) / 2.
            self.memory_bank.update(indices, new_data_memory)

        return loss
 def _create(self):
     # initialize random weights
     mb_init = torch.rand(self.size, self.dim, device=self.device)
     std_dev = 1. / np.sqrt(self.dim / 3)
     mb_init = mb_init * (2 * std_dev) - std_dev
     # L2 normalise so that the norm is 1
     mb_init = l2_normalize(mb_init, dim=1)
     return mb_init.detach()  # detach so its not trainable
    def __init__(self, indices, outputs, memory_bank, k=4096, t=0.07, m=0.5):
        super().__init__()
        self.k, self.t, self.m = k, t, m

        self.indices = indices.detach()
        self.outputs = l2_normalize(outputs, dim=1)

        self.memory_bank = memory_bank
        self.device = outputs.device
        self.data_len = memory_bank.size
Exemplo n.º 6
0
    def __init__(self, config):
        super().__init__(config)

        self.model_k = self.create_encoder()

        for param_q, param_k in zip(self.model.parameters(), self.model_k.parameters()):
            param_k.data.copy_(param_q.data)  # initialize
            param_k.requires_grad = False     # do not update

        # create queue (k x out_dim)
        moco_queue = torch.randn(
            self.config.loss_params.k,
            self.config.model_params.out_dim, 
        )
        self.register_buffer("moco_queue", moco_queue)
        self.moco_queue = l2_normalize(moco_queue, dim=1)
        self.register_buffer("moco_queue_ptr", torch.zeros(1, dtype=torch.long))
Exemplo n.º 7
0
if __name__ == "__main__":
    if len(sys.argv) > 1:
        checkpoint_dir = sys.argv[1]
        print('loading from checkpoint in {}'.format(constant.SAVE_DIR+'/'+checkpoint_dir))
        checkpoint = load_checkpoint(checkpoint=checkpoint_dir)
        args = checkpoint['args']

    args.embedding_size = args.glove_embedding_size + args.other_embedding_size
    state = {k: v for k, v in args.items()}
    print(args)

    dm = datamanager.TextDataManager(args)
    args.n_embed = dm.vocab.n_words
    model = text_model.TextClassifier(config=args)

    model.glove_embed.weight.data = l2_normalize(torch.Tensor(dm.vocab.get_glove_embed_vectors()))
    model.other_embed.weight.data = l2_normalize(torch.Tensor(dm.vocab.get_medw2v_embed_vectors()))

    if args.cuda:
        model.cuda()

    # Numbers of parameters
    print("number of trainable parameters found {}".format(sum(
        param.nelement() for param in model.parameters()
        if param.requires_grad)))

    pos_weight = torch.sum(1-dm.train_labels, dim=0)/torch.sum(dm.train_labels, dim=0)
    pos_weight = torch.clamp(pos_weight, min=0.1, max=10)
    if state['balance_loss']:
        criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
    else:
 def __init__(self, outputs1, outputs2, t=0.07):
     super().__init__()
     self.outputs1 = l2_normalize(outputs1, dim=1)
     self.outputs2 = l2_normalize(outputs2, dim=1)
     self.t = t
 def updated_new_data_memory(self):
     data_memory = self.memory_bank.at_idxs(self.indices)
     new_data_memory = data_memory * self.m + (1 - self.m) * self.outputs
     return l2_normalize(new_data_memory, dim=1)