Exemplo n.º 1
0
    def __call__(self, image_batch, theta_aff, theta_aff_tps, use_cuda=True):
        
        sampling_grid_aff = self.affTnf(image_batch=None,
                                        theta_batch=theta_aff.view(-1,2,3),
                                        return_sampling_grid=True,
                                        return_warped_image=False)
      
        sampling_grid_aff_tps = self.tpsTnf(image_batch=None,
                                       theta_batch=theta_aff_tps,
                                       return_sampling_grid=True,
                                       return_warped_image=False)
        
        if self.padding_crop_factor is not None:
            sampling_grid_aff_tps = sampling_grid_aff_tps*self.padding_crop_factor;

        # put 1e10 value in region out of bounds of sampling_grid_aff
        in_bound_mask_aff = ((sampling_grid_aff[:,:,:,0]>-1) * (sampling_grid_aff[:,:,:,0]<1) * (sampling_grid_aff[:,:,:,1]>-1) * (sampling_grid_aff[:,:,:,1]<1)).unsqueeze(3)
        in_bound_mask_aff = in_bound_mask_aff.expand_as(sampling_grid_aff)
        sampling_grid_aff = torch.mul(in_bound_mask_aff.float(),sampling_grid_aff)
        sampling_grid_aff = torch.add((in_bound_mask_aff.float()-1)*(1e10),sampling_grid_aff)       
        
        # compose transformations
        sampling_grid_aff_tps_comp = F.grid_sample(sampling_grid_aff.transpose(2,3).transpose(1,2), sampling_grid_aff_tps).transpose(1,2).transpose(2,3)
            
        # put 1e10 value in region out of bounds of sampling_grid_aff_tps_comp
        in_bound_mask_aff_tps=((sampling_grid_aff_tps[:,:,:,0]>-1) * (sampling_grid_aff_tps[:,:,:,0]<1) * (sampling_grid_aff_tps[:,:,:,1]>-1) * (sampling_grid_aff_tps[:,:,:,1]<1)).unsqueeze(3)
        in_bound_mask_aff_tps=in_bound_mask_aff_tps.expand_as(sampling_grid_aff_tps_comp)
        sampling_grid_aff_tps_comp=torch.mul(in_bound_mask_aff_tps.float(),sampling_grid_aff_tps_comp)
        sampling_grid_aff_tps_comp = torch.add((in_bound_mask_aff_tps.float()-1)*(1e10),sampling_grid_aff_tps_comp)       

        # sample transformed image
        warped_image_batch = F.grid_sample(image_batch, sampling_grid_aff_tps_comp)
        
        return warped_image_batch
Exemplo n.º 2
0
 def forward(self, x):
     x0 = self.conv.forward(x.float())
     x = self.pool_mil(x0)
     x = x.squeeze(2).squeeze(2)
     x1 = torch.add(torch.mul(x0.view(x.size(0), 1000, -1), -1), 1)
     cumprod = torch.cumprod(x1, 2)
     out = torch.max(x, torch.add(torch.mul(cumprod[:, :, -1], -1), 1))
     #out = F.softmax(out)
     return out
Exemplo n.º 3
0
 def forward(self, img, att_size=14):
     x0 = self.conv(img)
     x = self.pool_mil(x0)
     x = x.squeeze(2).squeeze(2)
     x = self.l1(x)
     x1 = torch.add(torch.mul(x.view(x.size(0), 1000, -1), -1), 1)
     cumprod = torch.cumprod(x1, 2)
     out = torch.max(x, torch.add(torch.mul(cumprod[:, :, -1], -1), 1))
     return out
    def match(self, passage_encoders, question_encoders, wq_matrix, wp_matrix, fw = True):
        
        '''
        passage_encoders (pn_steps, batch, hidden_size)
        question_encoders (qn_steps, batch, hidden_size)
        wq_matrix (qn_steps, batch, hidden_size)
        wp_matrix (pn_steps, batch, hidden_size)
        '''
        if fw:
            match_lstm = self.fw_match_lstm
            start = 0
            end = passage_encoders.size(0)
            stride = 1
        else:
            match_lstm = self.bw_match_lstm
            start = passage_encoders.size(0) - 1
            end = -1
            stride = -1
        
        hx = Variable(torch.zeros(passage_encoders.size(1), self.hidden_size)).cuda()
        cx = Variable(torch.zeros(passage_encoders.size(1), self.hidden_size)).cuda()
        
        match_encoders = [0 for i in range(passage_encoders.size(0))]
        
        for i in range(start, end, stride):
            
            wphp = wp_matrix[i]
            wrhr = self.whr_net(hx)

            _sum = torch.add(wphp, wrhr) # batch, hidden_size
            _sum = _sum.expand(wq_matrix.size(0), wq_matrix.size(1), self.hidden_size) # qn_steps, batch, hidden_size
            
            g = self.tanh(torch.add(wq_matrix, _sum)) # qn_steps, batch, hidden_size

            g = torch.transpose(g, 0, 1)# batch, qn_steps, hidden_size
            
            wg = self.w_net(g) # bactch, qn_steps, 1
            wg = wg.squeeze(-1) # bactch, qn_steps
            alpha = wg # bactch, qn_steps
            alpha = self.softmax(alpha).view(alpha.size(0), 1, alpha.size(1)) # batch,1, qn_steps
            
            
            attentionv = torch.bmm(alpha, question_encoders.transpose(0, 1)) # bacth, 1, hidden_size
            attentionv = attentionv.squeeze(1) # bacth, hidden_size
            
            inp = torch.cat([passage_encoders[i], attentionv], -1)
                        
            hx, cx = match_lstm(inp, (hx, cx)) # batch, hidden_size
            
            match_encoders[i] = hx.view(1, hx.size(0), -1)
            
        match_encoders = torch.cat(match_encoders)
        
        return match_encoders
Exemplo n.º 5
0
    def updateOutput(self, input):
        self.output.resize_(1)
        assert input[0].dim() == 2

        if self.diff is None:
            self.diff = input[0].new()

        torch.add(input[0], -1, input[1], out=self.diff).abs_()

        self.output.resize_(input[0].size(0))
        self.output.zero_()
        self.output.add_(self.diff.pow_(self.norm).sum(1, keepdim=False))
        self.output.pow_(1. / self.norm)

        return self.output
Exemplo n.º 6
0
    def forward(self, context_ids, doc_ids, target_noise_ids):
        """Sparse computation of scores (unnormalized log probabilities)
        that should be passed to the negative sampling loss.

        Parameters
        ----------
        context_ids: torch.Tensor of size (batch_size, num_context_words)
            Vocabulary indices of context words.

        doc_ids: torch.Tensor of size (batch_size,)
            Document indices of paragraphs.

        target_noise_ids: torch.Tensor of size (batch_size, num_noise_words + 1)
            Vocabulary indices of target and noise words. The first element in
            each row is the ground truth index (i.e. the target), other
            elements are indices of samples from the noise distribution.

        Returns
        -------
            autograd.Variable of size (batch_size, num_noise_words + 1)
        """
        # combine a paragraph vector with word vectors of
        # input (context) words
        x = torch.add(
            self._D[doc_ids, :], torch.sum(self._W[context_ids, :], dim=1))

        # sparse computation of scores (unnormalized log probabilities)
        # for negative sampling
        return torch.bmm(
            x.unsqueeze(1),
            self._O[:, target_noise_ids].permute(1, 0, 2)).squeeze()
    def get_loss(self, image_a_pred, image_b_pred, mask_a, mask_b):
        loss = 0

        # get the nonzero indices
        mask_a_indices_flat = torch.nonzero(mask_a)
        mask_b_indices_flat = torch.nonzero(mask_b)
        if len(mask_a_indices_flat) == 0:
            return Variable(torch.cuda.LongTensor([0]), requires_grad=True)
        if len(mask_b_indices_flat) == 0:
            return Variable(torch.cuda.LongTensor([0]), requires_grad=True)

        # take 5000 random pixel samples of the object, using the mask
        num_samples = 10000

        rand_numbers_a = (torch.rand(num_samples)*len(mask_a_indices_flat)).cuda()
        rand_indices_a = Variable(torch.floor(rand_numbers_a).type(torch.cuda.LongTensor), requires_grad=False)
        randomized_mask_a_indices_flat = torch.index_select(mask_a_indices_flat, 0, rand_indices_a).squeeze(1)

        rand_numbers_b = (torch.rand(num_samples)*len(mask_b_indices_flat)).cuda()
        rand_indices_b = Variable(torch.floor(rand_numbers_b).type(torch.cuda.LongTensor), requires_grad=False)
        randomized_mask_b_indices_flat = torch.index_select(mask_b_indices_flat, 0, rand_indices_b).squeeze(1)

        # index into the image and get descriptors
        M_margin = 0.5 # margin parameter
        random_img_a_object_descriptors = torch.index_select(image_a_pred, 1, randomized_mask_a_indices_flat)
        random_img_b_object_descriptors = torch.index_select(image_b_pred, 1, randomized_mask_b_indices_flat)
        pixel_wise_loss = (random_img_a_object_descriptors - random_img_b_object_descriptors).pow(2).sum(dim=2)
        pixel_wise_loss = torch.add(pixel_wise_loss, -2*M_margin)
        zeros_vec = torch.zeros_like(pixel_wise_loss)
        loss += torch.max(zeros_vec, pixel_wise_loss).sum()

        return loss
Exemplo n.º 8
0
Arquivo: ptt.py Projeto: sprax/python
def unit_test(args):
    ''' test different (kinds of) predicate detectors '''
    print("Torch uninitialized 5x3 matrix:")
    x_t = torch.Tensor(5, 3)
    print(x_t)

    print("Torch randomly initialized 5x3 matrix X:")
    x_t = torch.rand(5, 3)
    if args.verbose:
        print(x_t)
        print("size:", x_t.size())

    print("Torch randomly initialized 5x3 matrix Y:")
    y_t = torch.rand(5, 3)
    if args.verbose:
        print(y_t)
    print("X + Y:")
    z_t = torch.add(x_t, y_t)
    print(z_t)


    print("slice (X + Y)[:, 1]:")
    print(z_t[:, 1])

    num_wrong = 0
    print("unit_test:  num_tests:", 1,
          " num_wrong:", num_wrong, " -- ", "FAIL" if num_wrong else "PASS")
    def forward(self, title, pg):

        r_gate = F.sigmoid(self.wrx(title) + self.wrh(pg))
        i_gate = F.sigmoid(self.wix(title) + self.wih(pg))
        n_gate = F.tanh(self.wnx(title) + torch.mul(r_gate, self.wnh(pg)))
        result =  torch.mul(i_gate, pg) + torch.mul(torch.add(-i_gate, 1), n_gate)
        return result
Exemplo n.º 10
0
    def test_train(self):
        self._metric.train()
        calls = [[torch.FloatTensor([0.0]), torch.LongTensor([0])],
                 [torch.FloatTensor([0.0, 0.1, 0.2, 0.3]), torch.LongTensor([0, 1, 2, 3])]]
        for i in range(len(self._states)):
            self._metric.process(self._states[i])
        self.assertEqual(2, len(self._metric_function.call_args_list))
        for i in range(len(self._metric_function.call_args_list)):
            self.assertTrue(torch.eq(self._metric_function.call_args_list[i][0][0], calls[i][0]).all)
            self.assertTrue(torch.lt(torch.abs(torch.add(self._metric_function.call_args_list[i][0][1], -calls[i][1])), 1e-12).all)
        self._metric_function.reset_mock()
        self._metric.process_final({})

        self._metric_function.assert_called_once()
        self.assertTrue(torch.eq(self._metric_function.call_args_list[0][0][1], torch.LongTensor([0, 1, 2, 3, 4])).all)
        self.assertTrue(torch.lt(torch.abs(torch.add(self._metric_function.call_args_list[0][0][0], -torch.FloatTensor([0.0, 0.1, 0.2, 0.3, 0.4]))), 1e-12).all)
	def fade_in_layer(self,x,alpha):
		for l in self.layers:
			x = l(x)
		x_new = self.next_block(x)
		x = self.toRGB(x)
		x_new = self.new_toRGB(x_new)
		return torch.add(x.mul(1.0-alpha),x_new.mul(alpha))
    def forward(self, x):
        x = self.embed(x)
        x = self.dropout(x)
        # x = x.view(len(x), x.size(1), -1)
        # x = embed.view(len(x), embed.size(1), -1)
        bilstm_out, self.hidden = self.bilstm(x, self.hidden)

        bilstm_out = torch.transpose(bilstm_out, 0, 1)
        bilstm_out = torch.transpose(bilstm_out, 1, 2)
        # bilstm_out = F.max_pool1d(bilstm_out, bilstm_out.size(2)).squeeze(2)
        bilstm_out = F.max_pool1d(bilstm_out, bilstm_out.size(2))
        bilstm_out = bilstm_out.squeeze(2)

        hidden2lable = self.hidden2label1(F.tanh(bilstm_out))

        gate_layer = F.sigmoid(self.gate_layer(bilstm_out))
        # calculate highway layer values
        gate_hidden_layer = torch.mul(hidden2lable, gate_layer)
        # if write like follow ,can run,but not equal the HighWay NetWorks formula
        # gate_input = torch.mul((1 - gate_layer), hidden2lable)
        gate_input = torch.mul((1 - gate_layer), bilstm_out)
        highway_output = torch.add(gate_hidden_layer, gate_input)

        logit = self.logit_layer(highway_output)

        return logit
Exemplo n.º 13
0
 def test_local_var_binary_methods(self):
     ''' Unit tests for methods mentioned on issue 1385
         https://github.com/OpenMined/PySyft/issues/1385'''
     x = torch.FloatTensor([1, 2, 3, 4])
     y = torch.FloatTensor([[1, 2, 3, 4]])
     z = torch.matmul(x, y.t())
     assert (torch.equal(z, torch.FloatTensor([30])))
     z = torch.add(x, y)
     assert (torch.equal(z, torch.FloatTensor([[2, 4, 6, 8]])))
     x = torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]])
     y = torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]])
     z = torch.cross(x, y, dim=1)
     assert (torch.equal(z, torch.FloatTensor([[0, 0, 0], [0, 0, 0], [0, 0, 0]])))
     x = torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]])
     y = torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]])
     z = torch.dist(x, y)
     t = torch.FloatTensor([z])
     assert (torch.equal(t, torch.FloatTensor([0.])))
     x = torch.FloatTensor([1, 2, 3])
     y = torch.FloatTensor([1, 2, 3])
     z = torch.dot(x, y)
     t = torch.FloatTensor([z])
     assert torch.equal(t, torch.FloatTensor([14]))
     z = torch.eq(x, y)
     assert (torch.equal(z, torch.ByteTensor([1, 1, 1])))
     z = torch.ge(x, y)
     assert (torch.equal(z, torch.ByteTensor([1, 1, 1])))
Exemplo n.º 14
0
    def test_remote_var_binary_methods(self):
        ''' Unit tests for methods mentioned on issue 1385
            https://github.com/OpenMined/PySyft/issues/1385'''
        hook = TorchHook(verbose=False)
        local = hook.local_worker
        remote = VirtualWorker(hook, 1)
        local.add_worker(remote)

        x = Var(torch.FloatTensor([1, 2, 3, 4])).send(remote)
        y = Var(torch.FloatTensor([[1, 2, 3, 4]])).send(remote)
        z = torch.matmul(x, y.t())
        assert (torch.equal(z.get(), Var(torch.FloatTensor([30]))))
        z = torch.add(x, y)
        assert (torch.equal(z.get(), Var(torch.FloatTensor([[2, 4, 6, 8]]))))
        x = Var(torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]])).send(remote)
        y = Var(torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]])).send(remote)
        z = torch.cross(x, y, dim=1)
        assert (torch.equal(z.get(), Var(torch.FloatTensor([[0, 0, 0], [0, 0, 0], [0, 0, 0]]))))
        x = Var(torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]])).send(remote)
        y = Var(torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]])).send(remote)
        z = torch.dist(x, y)
        assert (torch.equal(z.get(), Var(torch.FloatTensor([0.]))))
        x = Var(torch.FloatTensor([1, 2, 3])).send(remote)
        y = Var(torch.FloatTensor([1, 2, 3])).send(remote)
        z = torch.dot(x, y)
        print(torch.equal(z.get(), Var(torch.FloatTensor([14]))))
        z = torch.eq(x, y)
        assert (torch.equal(z.get(), Var(torch.ByteTensor([1, 1, 1]))))
        z = torch.ge(x, y)
        assert (torch.equal(z.get(), Var(torch.ByteTensor([1, 1, 1]))))
Exemplo n.º 15
0
 def forward(self, x):
     if not self.equalInOut: x   = self.relu1(self.bn1(x))
     else:                   out = self.relu1(self.bn1(x))
     out = self.relu2(self.bn2(self.conv1(out if self.equalInOut else x)))
     if self.droprate > 0:
         out = F.dropout(out, p=self.droprate, training=self.training)
     out = self.conv2(out)
     return torch.add(x if self.equalInOut else self.convShortcut(x), out)
Exemplo n.º 16
0
    def forward(self, lvec, rvec):
        mult_dist = torch.mul(lvec, rvec)
        abs_dist = torch.abs(torch.add(lvec, -rvec))
        vec_dist = torch.cat((mult_dist, abs_dist), 1)

        out = F.sigmoid(self.wh(vec_dist))
        out = F.log_softmax(self.wp(out))
        return out
Exemplo n.º 17
0
 def forward(self, x):
     out1 = self.conv1(x)
     out = self.res_blocks(out1)
     out2 = self.conv2(out)
     out = torch.add(out1, out2)
     out = self.upsampling(out)
     out = self.conv3(out)
     return out
	def fade_in_layer(self,x,alpha):
		x_new = self.new_fromRGB(x)
		x_new = self.next_block(x_new)
		x = self.avg_pool(x)
		x = self.fromRGB(x)
		x = torch.add(x.mul(1.0-alpha),x_new.mul(alpha))
		for l in self.layers:
			x = l(x)
		return self.toOut(x.view(x.size(0),-1))
Exemplo n.º 19
0
    def test_lambda(self):
        trans = transforms.Lambda(lambda x: x.add(10))
        x = torch.randn(10)
        y = trans(x)
        assert (y.equal(torch.add(x, 10)))

        trans = transforms.Lambda(lambda x: x.add_(10))
        x = torch.randn(10)
        y = trans(x)
        assert (y.equal(x))
Exemplo n.º 20
0
    def test_validate(self):
        self._metric.eval()
        for i in range(len(self._states)):
            self._metric.process(self._states[i])
        self._metric_function.assert_not_called()
        self._metric.process_final_validate({})

        self._metric_function.assert_called_once()
        self.assertTrue(torch.eq(self._metric_function.call_args_list[0][0][1], torch.LongTensor([0, 1, 2, 3, 4])).all)
        self.assertTrue(torch.lt(torch.abs(torch.add(self._metric_function.call_args_list[0][0][0], -torch.FloatTensor([0.0, 0.1, 0.2, 0.3, 0.4]))), 1e-12).all)
    def get_loss_original(self, image_a_pred, image_b_pred, matches_a,
                          matches_b, non_matches_a, non_matches_b,
                          M_margin=0.5, non_match_loss_weight=1.0):

        # this is pegged to it's implemenation at sha 87abdb63bb5b99d9632f5c4360b5f6f1cf54245f
        """
        Computes the loss function
        DCN = Dense Correspondence Network
        num_images = number of images in this batch
        num_matches = number of matches
        num_non_matches = number of non-matches
        W = image width
        H = image height
        D = descriptor dimension
        match_loss = 1/num_matches \sum_{num_matches} ||descriptor_a - descriptor_b||_2^2
        non_match_loss = 1/num_non_matches \sum_{num_non_matches} max(0, M_margin - ||descriptor_a - descriptor_b||_2^2 )
        loss = match_loss + non_match_loss
        :param image_a_pred: Output of DCN network on image A.
        :type image_a_pred: torch.Variable(torch.FloatTensor) shape [1, W * H, D]
        :param image_b_pred: same as image_a_pred
        :type image_b_pred:
        :param matches_a: torch.Variable(torch.LongTensor) has shape [num_matches,],  a (u,v) pair is mapped
        to (u,v) ---> image_width * v + u, this matches the shape of one dimension of image_a_pred
        :type matches_a: torch.Variable(torch.FloatTensor)
        :param matches_b: same as matches_b
        :type matches_b:
        :param non_matches_a: torch.Variable(torch.FloatTensor) has shape [num_non_matches,],  a (u,v) pair is mapped
        to (u,v) ---> image_width * v + u, this matches the shape of image_a_pred
        :type non_matches_a: torch.Variable(torch.FloatTensor)
        :param non_matches_b: same as non_matches_a
        :type non_matches_b:
        :return: loss, match_loss, non_match_loss
        :rtype: torch.Variable(torch.FloatTensor) each of shape torch.Size([1])
        """

        num_matches = matches_a.size()[0]
        num_non_matches = non_matches_a.size()[0]


        matches_a_descriptors = torch.index_select(image_a_pred, 1, matches_a)
        matches_b_descriptors = torch.index_select(image_b_pred, 1, matches_b)

        match_loss = 1.0/num_matches * (matches_a_descriptors - matches_b_descriptors).pow(2).sum()

        # add loss via non_matches
        non_matches_a_descriptors = torch.index_select(image_a_pred, 1, non_matches_a)
        non_matches_b_descriptors = torch.index_select(image_b_pred, 1, non_matches_b)
        pixel_wise_loss = (non_matches_a_descriptors - non_matches_b_descriptors).pow(2).sum(dim=2)
        pixel_wise_loss = torch.add(torch.neg(pixel_wise_loss), M_margin)
        zeros_vec = torch.zeros_like(pixel_wise_loss)
        non_match_loss = non_match_loss_weight * 1.0/num_non_matches * torch.max(zeros_vec, pixel_wise_loss).sum()

        loss = match_loss + non_match_loss

        return loss, match_loss, non_match_loss
Exemplo n.º 22
0
    def forward(self, x):
        bahs, chs, _, _ = x.size()

        # Returns a new tensor with the same data as the self tensor but of a different size.
        chn_se = self.avg_pool(x).view(bahs, chs)
        chn_se = self.channel_excitation(chn_se).view(bahs, chs, 1, 1)
        chn_se = torch.mul(x, chn_se)

        spa_se = self.spatial_se(x)
        spa_se = torch.mul(x, spa_se)
        return torch.add(chn_se, 1, spa_se)
Exemplo n.º 23
0
    def updateOutput(self, input, target):
        # - log(input) * target - log(1 - input) * (1 - target)
        if input.nelement() != target.nelement():
            raise RuntimeError("input and target size mismatch")

        if self.buffer is None:
            self.buffer = input.new()

        buffer = self.buffer
        weights = self.weights

        buffer.resize_as_(input)

        if weights is not None and target.dim() != 1:
            weights = self.weights.view(1, target.size(1)).expand_as(target)

        # log(input) * target
        torch.add(input, self.eps, out=buffer).log_()
        if weights is not None:
            buffer.mul_(weights)

        target_1d = target.contiguous().view(-1)
        # don't save a 1-d view of buffer: it should already be contiguous, and it's
        # used as non-1d tensor later.
        output = torch.dot(target_1d, buffer.contiguous().view(-1))

        # log(1 - input) * (1 - target)
        torch.mul(input, -1, out=buffer).add_(1 + self.eps).log_()
        if weights is not None:
            buffer.mul_(weights)

        output = output + torch.sum(buffer)
        output = output - torch.dot(target_1d, buffer.contiguous().view(-1))

        if self.sizeAverage:
            output = output / input.nelement()

        self.output = - output.item()

        return self.output
Exemplo n.º 24
0
def theta_to_sampling_grid(out_h,out_w,theta_aff=None,theta_tps=None,theta_aff_tps=None,use_cuda=True,tps_reg_factor=0):
    affTnf = GeometricTnf(out_h=out_h,out_w=out_w,geometric_model='affine',use_cuda=use_cuda)
    tpsTnf = GeometricTnf(out_h=out_h,out_w=out_w,geometric_model='tps',use_cuda=use_cuda,tps_reg_factor=tps_reg_factor)

    if theta_aff is not None:
        sampling_grid_aff = affTnf(image_batch=None,
                                               theta_batch=theta_aff.view(1,2,3),
                                               return_sampling_grid=True,
                                               return_warped_image=False)
    else:
        sampling_grid_aff=None
    
    if theta_tps is not None:
        sampling_grid_tps = tpsTnf(image_batch=None,
                                               theta_batch=theta_tps.view(1,-1),
                                               return_sampling_grid=True,
                                               return_warped_image=False)
    else:
        sampling_grid_tps=None
        
    if theta_aff is not None and theta_aff_tps is not None:
        sampling_grid_aff_tps = tpsTnf(image_batch=None,
                                   theta_batch=theta_aff_tps.view(1,-1),
                                   return_sampling_grid=True,
                                   return_warped_image=False)
        
        # put 1e10 value in region out of bounds of sampling_grid_aff
        sampling_grid_aff = sampling_grid_aff.clone()
        in_bound_mask_aff=Variable((sampling_grid_aff.data[:,:,:,0]>-1) & (sampling_grid_aff.data[:,:,:,0]<1) & (sampling_grid_aff.data[:,:,:,1]>-1) & (sampling_grid_aff.data[:,:,:,1]<1)).unsqueeze(3)
        in_bound_mask_aff=in_bound_mask_aff.expand_as(sampling_grid_aff)
        sampling_grid_aff = torch.add((in_bound_mask_aff.float()-1)*(1e10),torch.mul(in_bound_mask_aff.float(),sampling_grid_aff))       
        # put 1e10 value in region out of bounds of sampling_grid_aff_tps_comp
        sampling_grid_aff_tps_comp = F.grid_sample(sampling_grid_aff.transpose(2,3).transpose(1,2), sampling_grid_aff_tps).transpose(1,2).transpose(2,3)
        in_bound_mask_aff_tps=Variable((sampling_grid_aff_tps.data[:,:,:,0]>-1) & (sampling_grid_aff_tps.data[:,:,:,0]<1) & (sampling_grid_aff_tps.data[:,:,:,1]>-1) & (sampling_grid_aff_tps.data[:,:,:,1]<1)).unsqueeze(3)
        in_bound_mask_aff_tps=in_bound_mask_aff_tps.expand_as(sampling_grid_aff_tps_comp)
        sampling_grid_aff_tps_comp = torch.add((in_bound_mask_aff_tps.float()-1)*(1e10),torch.mul(in_bound_mask_aff_tps.float(),sampling_grid_aff_tps_comp))       
    else:
        sampling_grid_aff_tps_comp = None

    return (sampling_grid_aff,sampling_grid_tps,sampling_grid_aff_tps_comp) 
Exemplo n.º 25
0
    def test_lambda(self):
        trans = transforms.Lambda(lambda x: x.add(10))
        x = torch.randn(10)
        y = trans(x)
        assert (y.equal(torch.add(x, 10)))

        trans = transforms.Lambda(lambda x: x.add_(10))
        x = torch.randn(10)
        y = trans(x)
        assert (y.equal(x))

        # Checking if Lambda can be printed as string
        trans.__repr__()
Exemplo n.º 26
0
 def forward(self, x):
     x1 = self.Conv1(x)
     x2 = self.BN1(x1)
     x3 = self.Relu(x2)
     x4 = self.Conv2(x3)
     x5 = self.BN2(x4)
     x6 = self.Relu(x5)
     x7 = self.Conv3(x6)
     x8 = self.BN3(x7)
     x9 = torch.add(x8,x1)
     x10 = self.Relu(x9)
     
     return x10
Exemplo n.º 27
0
    def _test_spadd_shape(self, shape_i, shape_v=None):
        shape = shape_i + (shape_v or [])
        x, _, _ = self._gen_sparse(len(shape_i), 10, shape)
        y = self.randn(*shape)
        r = random.random()

        res = torch.add(y, r, x)
        expected = y + r * self.safeToDense(x)

        self.assertEqual(res, expected)

        # Non contiguous dense tensor
        s = list(shape)
        s[0] = shape[-1]
        s[-1] = shape[0]
        y = self.randn(*s)
        y.transpose_(0, len(s) - 1)
        r = random.random()

        res = torch.add(y, r, x)
        expected = y + r * self.safeToDense(x)

        self.assertEqual(res, expected)
Exemplo n.º 28
0
    def updateGradInput(self, input, target):
        # - (target - input) / ( input (1 - input) )
        # The gradient is slightly incorrect:
        # It should have be divided by (input + self.eps) (1 - input + self.eps)
        # but it is divided by input (1 - input + self.eps) + self.eps
        # This modification requires less memory to be computed.
        if input.nelement() != target.nelement():
            raise RuntimeError("input and target size mismatch")

        if self.buffer is None:
            self.buffer = input.new()

        buffer = self.buffer
        weights = self.weights
        gradInput = self.gradInput

        if weights is not None and target.dim() != 1:
            weights = self.weights.view(1, target.size(1)).expand_as(target)

        buffer.resize_as_(input)
        # - x ( 1 + self.eps -x ) + self.eps
        torch.add(input, -1, out=buffer).add_(-self.eps).mul_(input).add_(-self.eps)

        gradInput.resize_as_(input)
        # y - x
        torch.add(target, -1, input, out=gradInput)
        # - (y - x) / ( x ( 1 + self.eps -x ) + self.eps )
        gradInput.div_(buffer)

        if weights is not None:
            gradInput.mul_(weights)

        if self.sizeAverage:
            gradInput.div_(target.nelement())

        return gradInput
Exemplo n.º 29
0
    def step(self, step, lprobs, scores):
        super()._init_buffers(lprobs)
        bsz, beam_size, vocab_size = lprobs.size()
        if beam_size % self.num_groups != 0:
            raise ValueError(
                'DiverseBeamSearch requires --beam to be divisible by the number of groups'
            )
        group_size = beam_size // self.num_groups

        # initialize diversity penalty
        if self.diversity_buf is None:
            self.diversity_buf = lprobs.new()
        torch.zeros(lprobs[:, 0, :].size(), out=self.diversity_buf)

        scores_G, indices_G, beams_G = [], [], []
        for g in range(self.num_groups):
            lprobs_g = lprobs[:, g::self.num_groups, :]
            scores_g = scores[:, g::self.num_groups, :] if step > 0 else None

            # apply diversity penalty
            if g > 0:
                lprobs_g = torch.add(lprobs_g, self.diversity_strength, self.diversity_buf.unsqueeze(1))
            else:
                lprobs_g = lprobs_g.contiguous()

            scores_buf, indices_buf, beams_buf = self.beam.step(step, lprobs_g, scores_g)
            beams_buf.mul_(self.num_groups).add_(g)

            scores_G.append(scores_buf.clone())
            indices_G.append(indices_buf.clone())
            beams_G.append(beams_buf.clone())

            # update diversity penalty
            self.diversity_buf.scatter_add_(
                1,
                indices_buf,
                self.diversity_buf.new_ones(indices_buf.size())
            )

        # interleave results from different groups
        self.scores_buf = torch.stack(scores_G, dim=2, out=self.scores_buf).view(bsz, -1)
        self.indices_buf = torch.stack(indices_G, dim=2, out=self.indices_buf).view(bsz, -1)
        self.beams_buf = torch.stack(beams_G, dim=2, out=self.beams_buf).view(bsz, -1)
        return self.scores_buf, self.indices_buf, self.beams_buf
Exemplo n.º 30
0
    def test_remote_tensor_binary_methods(self):

        hook = TorchHook(verbose = False)
        local = hook.local_worker
        remote = VirtualWorker(hook, 0)
        local.add_worker(remote)

        x = torch.FloatTensor([1, 2, 3, 4, 5]).send(remote)
        y = torch.FloatTensor([1, 2, 3, 4, 5]).send(remote)
        assert (x.add_(y).get() == torch.FloatTensor([2,4,6,8,10])).all()

        x = torch.FloatTensor([1, 2, 3, 4]).send(remote)
        y = torch.FloatTensor([[1, 2, 3, 4]]).send(remote)
        z = torch.matmul(x, y.t())
        assert (torch.equal(z.get(), torch.FloatTensor([30])))

        z = torch.add(x, y)
        assert (torch.equal(z.get(), torch.FloatTensor([[2, 4, 6, 8]])))

        x = torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]]).send(remote)
        y = torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]]).send(remote)
        z = torch.cross(x, y, dim=1)
        assert (torch.equal(z.get(), torch.FloatTensor([[0, 0, 0], [0, 0, 0], [0, 0, 0]])))

        x = torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]]).send(remote)
        y = torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]]).send(remote)
        z = torch.dist(x, y)
        t = torch.FloatTensor([z])
        assert (torch.equal(t, torch.FloatTensor([0.])))

        x = torch.FloatTensor([1, 2, 3]).send(remote)
        y = torch.FloatTensor([1, 2, 3]).send(remote)
        z = torch.dot(x, y)
        t = torch.FloatTensor([z])
        assert torch.equal(t, torch.FloatTensor([14]))

        z = torch.eq(x, y)
        assert (torch.equal(z.get(), torch.ByteTensor([1, 1, 1])))

        z = torch.ge(x, y)
        assert (torch.equal(z.get(), torch.ByteTensor([1, 1, 1])))
def add_mask(net, mask):
    for child in net.children():
        for param in child.conv1[0].parameters():
            param.data = torch.add(param.data, mask[0])
    for child in net.children():
        for param in child.conv2[0].parameters():
            param.data = torch.add(param.data, mask[1])
    for child in net.children():
        for param in child.conv3[0].parameters():
            param.data = torch.add(param.data, mask[2])
    for child in net.children():
        for param in child.conv4[0].parameters():
            param.data = torch.add(param.data, mask[3])
    for child in net.children():
        for param in child.conv5[0].parameters():
            param.data = torch.add(param.data, mask[4])
    for child in net.children():
        for param in child.conv6[0].parameters():
            param.data = torch.add(param.data, mask[5])
    for child in net.children():
        for param in child.conv7[0].parameters():
            param.data = torch.add(param.data, mask[6])
    for child in net.children():
        for param in child.conv8[0].parameters():
            param.data = torch.add(param.data, mask[7])
    for child in net.children():
        for param in child.conv9[0].parameters():
            param.data = torch.add(param.data, mask[8])
    for child in net.children():
        for param in child.conv10[0].parameters():
            param.data = torch.add(param.data, mask[9])
    for child in net.children():
        for param in child.conv11[0].parameters():
            param.data = torch.add(param.data, mask[10])
    for child in net.children():
        for param in child.conv12[0].parameters():
            param.data = torch.add(param.data, mask[11])
    for child in net.children():
        for param in child.conv13[0].parameters():
            param.data = torch.add(param.data, mask[12])

    for child in net.children():
        for param in child.fc1[1].parameters():
            param.data = torch.add(param.data, mask[13])
    for child in net.children():
        for param in child.fc2[1].parameters():
            param.data = torch.add(param.data, mask[14])
    for child in net.children():
        for param in child.fc3[0].parameters():
            param.data = torch.add(param.data, mask[15])
    def forward(self, img):
        # encoder
        e1 = self.conv1_bn(self.conv1(img))
        e10 = F.relu(e1)
        e1a = F.relu(torch.add(self.layere1a(e10), 1, e10))
        e1b = F.relu(torch.add(self.layere1b(e1a), 1, e1a))
        e1c = F.relu(torch.add(self.layere1c(e10), 1, e1b))

        e2 = self.conv2_bn(self.conv2(e1c))
        e20 = F.relu(e2)
        e2a = F.relu(torch.add(self.layere2a(e20), 1, e20))
        e2b = F.relu(torch.add(self.layere2b(e2a), 1, e2a))
        e2c = F.relu(torch.add(self.layere2c(e2b), 1, e2b))

        e3 = self.conv3_bn(self.conv3(e2c))
        e30 = F.relu(e3)
        e3a = F.relu(torch.add(self.layere3a(e30), 1, e30))
        e3b = F.relu(torch.add(self.layere3b(e3a), 1, e3a))
        e3c = F.relu(torch.add(self.layere3c(e3b), 1, e3b))

        e4 = self.conv4_bn(self.conv4(e3c))
        e40 = F.relu(e4)
        e4a = F.relu(torch.add(self.layere4a(e40), 1, e40))
        e4b = F.relu(torch.add(self.layere4b(e4a), 1, e4a))
        e4c = F.relu(torch.add(self.layere4c(e4b), 1, e4b))

        e5 = self.conv5_bn(self.conv5(e4c))
        e50 = F.relu(e5)
        e5a = F.relu(torch.add(self.layere5a(e50), 1, e50))
        e5b = F.relu(torch.add(self.layere5b(e5a), 1, e5a))
        # e5c = Identity_block_for_G(e5b, 512)

        e6 = self.conv6_bn(self.conv6(e5b))
        e60 = F.relu(e6)

        # decoder

        d10 = F.relu(torch.add(self.conv7_bn(self.conv7(e60)), 1, e5))
        d1a = F.relu(torch.add(self.layerd1a(d10), 1, d10))
        d1b = F.relu(torch.add(self.layerd1b(d1a), 1, d1a))
        # d1c = Identity_block_for_G(d1b, 512)

        d20 = F.relu(torch.add(self.conv8_bn(self.conv8(d1b)), 1, e4))
        d2a = F.relu(torch.add(self.layerd2a(d20), 1, d20))
        d2b = F.relu(torch.add(self.layerd2b(d2a), 1, d2a))
        d2c = F.relu(torch.add(self.layerd2c(d2b), 1, d2b))

        d30 = F.relu(torch.add(self.conv9_bn(self.conv9(d2c)), 1, e3))
        d3a = F.relu(torch.add(self.layerd3a(d30), 1, d30))
        d3b = F.relu(torch.add(self.layerd3b(d3a), 1, d3a))
        d3c = F.relu(torch.add(self.layerd3c(d3b), 1, d3b))

        d40 = F.relu(torch.add(self.conv10_bn(self.conv10(d3c)), 1, e2))
        d4a = F.relu(torch.add(self.layerd4a(d40), 1, d40))
        d4b = F.relu(torch.add(self.layerd4b(d4a), 1, d4a))
        d4c = F.relu(torch.add(self.layerd4c(d4b), 1, d4b))

        d50 = F.relu(torch.add(self.conv11_bn(self.conv11(d4c)), 1, e1))
        d5a = F.relu(torch.add(self.layerd5a(d50), 1, d50))
        d5b = F.relu(torch.add(self.layerd5b(d5a), 1, d5a))
        d5c = F.relu(torch.add(self.layerd5c(d5b), 1, d5b))

        d60 = self.conv12_bn(self.conv12(d5c))
        return F.tanh(d60)
Exemplo n.º 33
0
    def forward(self,
                query,
                key: Optional[Tensor],
                value: Optional[Tensor],
                key_padding_mask: Optional[Tensor] = None,
                incremental_state: Optional[Dict[str, Dict[
                    str, Optional[Tensor]]]] = None,
                need_weights: bool = True,
                static_kv: bool = False,
                attn_mask: Optional[Tensor] = None,
                before_softmax: bool = False,
                need_head_weights: bool = False,
                mask=None,
                loss_type: str = 'nmt') -> Tuple[Tensor, Optional[Tensor]]:

        if need_head_weights:
            need_weights = True

        tgt_len, bsz, embed_dim = query.size()
        assert embed_dim == self.embed_dim
        assert list(query.size()) == [tgt_len, bsz, embed_dim]

        if (not self.onnx_trace
                and not self.tpu  # don't use PyTorch version on TPUs
                and incremental_state is None and not static_kv
                # A workaround for quantization to work. Otherwise JIT compilation
                # treats bias in linear module as method.
                and not torch.jit.is_scripting()):
            assert key is not None and value is not None
            return F.multi_head_attention_forward(
                query,
                key,
                value,
                self.embed_dim,
                self.num_heads,
                torch.empty([0]),
                torch.cat(
                    (self.q_proj.bias, self.k_proj.bias, self.v_proj.bias)),
                self.bias_k,
                self.bias_v,
                self.add_zero_attn,
                self.dropout_module.p,
                self.out_proj.weight,
                self.out_proj.bias,
                self.training or self.dropout_module.apply_during_inference,
                key_padding_mask,
                need_weights,
                attn_mask,
                use_separate_proj_weight=True,
                q_proj_weight=self.q_proj.weight,
                k_proj_weight=self.k_proj.weight,
                v_proj_weight=self.v_proj.weight,
            )

        if incremental_state is not None:
            saved_state = self._get_input_buffer(incremental_state)
            if saved_state is not None and "prev_key" in saved_state:
                # previous time steps are cached - no need to recompute
                # key and value if they are static
                if static_kv:
                    assert self.encoder_decoder_attention and not self.self_attention
                    key = value = None
        else:
            saved_state = None

        if self.self_attention:
            q = self.q_proj(query)
            k = self.k_proj(query)
            v = self.v_proj(query)
        elif self.encoder_decoder_attention:
            # encoder-decoder attention
            q = self.q_proj(query)
            if key is None:
                assert value is None
                k = v = None
            else:
                k = self.k_proj(key)
                v = self.v_proj(key)

        else:
            assert key is not None and value is not None
            q = self.q_proj(query)
            k = self.k_proj(key)
            v = self.v_proj(value)
        q *= self.scaling

        if self.bias_k is not None:
            assert self.bias_v is not None
            k = torch.cat([k, self.bias_k.repeat(1, bsz, 1)])
            v = torch.cat([v, self.bias_v.repeat(1, bsz, 1)])
            if attn_mask is not None:
                attn_mask = torch.cat(
                    [attn_mask,
                     attn_mask.new_zeros(attn_mask.size(0), 1)],
                    dim=1)
            if key_padding_mask is not None:
                key_padding_mask = torch.cat(
                    [
                        key_padding_mask,
                        key_padding_mask.new_zeros(key_padding_mask.size(0),
                                                   1),
                    ],
                    dim=1,
                )

        q = (q.contiguous().view(tgt_len, bsz * self.num_heads,
                                 self.head_dim).transpose(0, 1))
        if k is not None:
            k = (k.contiguous().view(-1, bsz * self.num_heads,
                                     self.head_dim).transpose(0, 1))
        if v is not None:
            v = (v.contiguous().view(-1, bsz * self.num_heads,
                                     self.head_dim).transpose(0, 1))

        if saved_state is not None:
            # saved states are stored with shape (bsz, num_heads, seq_len, head_dim)
            if "prev_key" in saved_state:
                _prev_key = saved_state["prev_key"]
                assert _prev_key is not None
                prev_key = _prev_key.view(bsz * self.num_heads, -1,
                                          self.head_dim)
                if static_kv:
                    k = prev_key
                else:
                    assert k is not None
                    k = torch.cat([prev_key, k], dim=1)
            if "prev_value" in saved_state:
                _prev_value = saved_state["prev_value"]
                assert _prev_value is not None
                prev_value = _prev_value.view(bsz * self.num_heads, -1,
                                              self.head_dim)
                if static_kv:
                    v = prev_value
                else:
                    assert v is not None
                    v = torch.cat([prev_value, v], dim=1)
            prev_key_padding_mask: Optional[Tensor] = None
            if "prev_key_padding_mask" in saved_state:
                prev_key_padding_mask = saved_state["prev_key_padding_mask"]
            assert k is not None and v is not None
            key_padding_mask = MultiheadAttention._append_prev_key_padding_mask(
                key_padding_mask=key_padding_mask,
                prev_key_padding_mask=prev_key_padding_mask,
                batch_size=bsz,
                src_len=k.size(1),
                static_kv=static_kv,
            )

            saved_state["prev_key"] = k.view(bsz, self.num_heads, -1,
                                             self.head_dim)
            saved_state["prev_value"] = v.view(bsz, self.num_heads, -1,
                                               self.head_dim)
            saved_state["prev_key_padding_mask"] = key_padding_mask
            # In this branch incremental_state is never None
            assert incremental_state is not None
            incremental_state = self._set_input_buffer(incremental_state,
                                                       saved_state)
        assert k is not None
        src_len = k.size(1)

        # This is part of a workaround to get around fork/join parallelism
        # not supporting Optional types.
        if key_padding_mask is not None and key_padding_mask.dim() == 0:
            key_padding_mask = None

        if key_padding_mask is not None:
            assert key_padding_mask.size(0) == bsz
            assert key_padding_mask.size(1) == src_len

        if self.add_zero_attn:
            assert v is not None
            src_len += 1
            k = torch.cat([k, k.new_zeros((k.size(0), 1) + k.size()[2:])],
                          dim=1)
            v = torch.cat([v, v.new_zeros((v.size(0), 1) + v.size()[2:])],
                          dim=1)
            if attn_mask is not None:
                attn_mask = torch.cat(
                    [attn_mask,
                     attn_mask.new_zeros(attn_mask.size(0), 1)],
                    dim=1)
            if key_padding_mask is not None:
                key_padding_mask = torch.cat(
                    [
                        key_padding_mask,
                        torch.zeros(key_padding_mask.size(0),
                                    1).type_as(key_padding_mask),
                    ],
                    dim=1,
                )

        attn_weights = torch.bmm(q, k.transpose(1, 2))
        attn_weights = self.apply_sparse_mask(attn_weights, tgt_len, src_len,
                                              bsz)

        assert list(
            attn_weights.size()) == [bsz * self.num_heads, tgt_len, src_len]

        if attn_mask is not None:
            attn_mask = attn_mask.unsqueeze(0)
            if self.onnx_trace:
                attn_mask = attn_mask.repeat(attn_weights.size(0), 1, 1)
            attn_weights += attn_mask

        if key_padding_mask is not None:
            # don't attend to padding symbols
            attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len,
                                             src_len)
            if not self.tpu:
                attn_weights = attn_weights.masked_fill(
                    key_padding_mask.unsqueeze(1).unsqueeze(2).to(torch.bool),
                    float("-inf"))
            else:
                attn_weights = attn_weights.transpose(0, 2)
                attn_weights = attn_weights.masked_fill(
                    key_padding_mask, float('-inf'))
                attn_weights = attn_weights.transpose(0, 2)
            attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len,
                                             src_len)

        if before_softmax:
            return attn_weights, v

        attn_weights_float = utils.softmax(attn_weights,
                                           dim=-1,
                                           onnx_trace=self.onnx_trace)
        tmp = attn_weights_float

        g = torch.sigmoid(self.gate(q))
        if loss_type == 'nmt':
            attn_weights_float = g * attn_weights_float + (1 - g) * torch.mul(
                attn_weights_float, torch.exp(1 - mask))
        elif loss_type == 'mask':
            # attn_weights_float = torch.mul(attn_weights, mask)
            attn_weights_float = torch.add(
                torch.mul(attn_weights_float, mask),
                torch.mul(torch.mean(attn_weights_float, -1, True), 1 - mask))

        # tmp=attn_weights_float
        # if key_padding_mask is not None:
        #     attn_weights_float = attn_weights_float.view(bsz, self.num_heads, tgt_len, src_len)
        #     attn_weights_float = attn_weights_float.masked_fill(
        #             key_padding_mask.unsqueeze(1).unsqueeze(2).to(torch.bool),
        #             float("-inf")
        #         )
        #     attn_weights_float = attn_weights_float.view(bsz * self.num_heads, tgt_len, src_len)

        # attn_weights_float = utils.softmax(attn_weights_float, dim=-1, onnx_trace=self.onnx_trace)
        attn_weights = attn_weights_float.type_as(attn_weights)

        attn_probs = self.dropout_module(attn_weights)

        assert v is not None
        attn = torch.bmm(attn_probs, v)
        assert list(
            attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim]
        if self.onnx_trace and attn.size(1) == 1:
            # when ONNX tracing a single decoder step (sequence length == 1)
            # the transpose is a no-op copy before view, thus unnecessary
            attn = attn.contiguous().view(tgt_len, bsz, embed_dim)
        else:
            attn = attn.transpose(0,
                                  1).contiguous().view(tgt_len, bsz, embed_dim)
        attn = self.out_proj(attn)
        attn_weights: Optional[Tensor] = None
        if need_weights:
            attn_weights = attn_weights_float.view(bsz, self.num_heads,
                                                   tgt_len,
                                                   src_len).transpose(1, 0)
            if not need_head_weights:
                # average attention weights over heads
                attn_weights = attn_weights.mean(dim=0)
                # attn_weights = attn_weights[0]
        attn_weights = attn_weights_float.view(bsz, self.num_heads, tgt_len,
                                               src_len).transpose(1, 0)

        return attn, attn_weights, g
Exemplo n.º 34
0
def bleu_score(translate_corpus: Sequence[str],
               reference_corpus: Sequence[str],
               n_gram: int = 4,
               smooth: bool = False) -> torch.Tensor:
    """Calculate BLEU score of machine translated text with one or more references.

    Args:
        translate_corpus: An iterable of machine translated corpus
        reference_corpus: An iterable of iterables of reference corpus
        n_gram: Gram value ranged from 1 to 4 (Default 4)
        smooth: Whether or not to apply smoothing – Lin et al. 2004

    Return:
        A Tensor with BLEU Score

    Example:

        >>> translate_corpus = ['the cat is on the mat'.split()]
        >>> reference_corpus = [['there is a cat on the mat'.split(), 'a cat is on the mat'.split()]]
        >>> bleu_score(translate_corpus, reference_corpus)
        tensor(0.7598)
    """

    assert len(translate_corpus) == len(reference_corpus)
    numerator = torch.zeros(n_gram)
    denominator = torch.zeros(n_gram)
    precision_scores = torch.zeros(n_gram)
    c = 0.0
    r = 0.0
    for (translation, references) in zip(translate_corpus, reference_corpus):
        c += len(translation)
        ref_len_list = [len(ref) for ref in references]
        ref_len_diff = [abs(len(translation) - x) for x in ref_len_list]
        r += ref_len_list[ref_len_diff.index(min(ref_len_diff))]
        translation_counter = _count_ngram(translation, n_gram)
        reference_counter = Counter()
        for ref in references:
            reference_counter |= _count_ngram(ref, n_gram)

        ngram_counter_clip = translation_counter & reference_counter
        for counter_clip in ngram_counter_clip:
            numerator[len(counter_clip) -
                      1] += ngram_counter_clip[counter_clip]

        for counter in translation_counter:
            denominator[len(counter) - 1] += translation_counter[counter]

    trans_len = torch.tensor(c)
    ref_len = torch.tensor(r)
    if min(numerator) == 0.0:
        return torch.tensor(0.0)

    if smooth:
        precision_scores = torch.add(
            numerator, torch.ones(n_gram)) / torch.add(denominator,
                                                       torch.ones(n_gram))
    else:
        precision_scores = numerator / denominator
    log_precision_scores = torch.tensor(
        [1.0 / n_gram] * n_gram) * torch.log(precision_scores)
    geometric_mean = torch.exp(torch.sum(log_precision_scores))
    brevity_penalty = torch.tensor(1.0) if c > r else torch.exp(1 -
                                                                (ref_len /
                                                                 trans_len))
    bleu = brevity_penalty * geometric_mean

    return bleu
Exemplo n.º 35
0
 def forward(self, enc_w2v, w2v_lens, enc_kb_emb, key_emb, key_target_emb, mem_weights):
         '''
         (9,274,339, 100) - ent_embed.pkl.npy
         (569, 100) - rel_embed.pkl.npy
         (3,000,000, 300) - w2v
         
         '''
         
         embed_q = self.embed_A(enc_w2v) #seq_len*bs*w2v_emb
         embed_q = torch.cat((embed_q, enc_kb_emb), 2) #seq_len*bs*(w2v_emb + wiki_emb)
         
         packed_q = nn.utils.rnn.pack_padded_sequence(embed_q, w2v_lens, enforce_sorted=False)
         #  pass through GRU
         _, q_state = self.gru(packed_q) #bs*cell_size
         q_state = self.dropout(q_state)
         q_state = q_state.squeeze() # from the encoder [1, hid_s, cell_s]
         
         #TODO: one hop is enough for our experiments
         for hop in range(self.config["hops"]):
             
             # --memory addressing--
             
             q_last = self.C(q_state) # batch_size * (2*wikidata_embed_size)
             #q_last = q_state.mm(self.C).clamp(min=0) # batch_size * (2*wikidata_embed_size)
             q_temp1 = q_last.unsqueeze(1) # batch_size * 1 * (2*wikidata_embed_size)
             
             #q_temp1 = q_temp1/q_temp1.norm(dim=2)[:,:,None] # bs*1*wiki*2  #L2 normalized
             #q_temp1[q_temp1 != q_temp1] = 0
             
             #key_emb #batch_size * size_memory * (2*wikidata_embed_size)
             
             #key_emb = key_emb/key_emb.norm(dim=2)[:,:,None] #bs*sm*wiki*2
             #key_emb[key_emb != key_emb] = 0
             
             #prod = key_emb * q_temp1
             #dotted_1 = torch.sum(prod, 2) #bs * ms
             #same as
             dotted = torch.bmm(q_temp1, key_emb.transpose(2,1)) #bs*1*ms
             dotted = dotted.squeeze(1) #bs*ms
             
             probs = F.softmax(dotted, dim=1) * mem_weights # bs * ms
             probs = torch.unsqueeze(probs, 1) # bs * 1 * ms
             
             # --value reading--
             
             #key_target_emb #bs * ms * wikidata_embed_size
             #values_emb = key_target_emb.transpose(2,1) #bs * wikidata_embed_size * ms, needs this shape when values_emb * probs
             #TODO: confirm, should be a weighted sum over value entries (e.g. dim 1), not of embedding dimension.
             #o_k = torch.sum(values_emb * probs, 2) #bs * wikidata_embed_size
             o_k = torch.bmm(probs, key_target_emb) #bs * 1 * wiki_size
             o_k = o_k.squeeze(1)
            
             #o_k = o_k.mm(self.R_1).clamp(min=0) #bs * cell_size
             o_k = self.R_1(o_k) #bs * cell_size
             
             q_state = torch.add(q_state, o_k)
             
             
         # find candidates, candidates are the value cells. (there is no other candidates in the data)
         
         #temp_1 = q_state.mm(self.B).clamp(min=0) #bs * wiki_embed
         temp_1 = self.B(q_state) #bs * wiki_embed
         temp_1 = temp_1.unsqueeze(1) # bs * 1 * wiki_embed
         
         #key_target_emb #bs * ms * wikidata_embed_size
         
         prob_mem = torch.sum(temp_1 * key_target_emb, 2) # batch_size * size_memory
         #prob_mem = F.log_softmax(prob_mem, dim=1) #NOTE: do not pass trough softmax if sigmoid is used
         #prob_mem = F.softmax(prob_mem, dim=1)
         
         mem_output = torch.sigmoid(prob_mem) * mem_weights
         
         return mem_output
Exemplo n.º 36
0
def compute_loss_with_gradnorm(batch_X,
                               batch_y_segmt,
                               batch_y_depth,
                               batch_mask_segmt,
                               batch_mask_depth,
                               model,
                               task_weights=None,
                               l01=None,
                               l02=None,
                               criterion=None,
                               criterion2=None,
                               optimizer=None,
                               optimizer2=None,
                               is_train=True,
                               epoch=1):

    model.train(is_train)

    batch_X = batch_X.to(device, non_blocking=True)
    batch_y_segmt = batch_y_segmt.to(device, non_blocking=True)
    batch_y_depth = batch_y_depth.to(device, non_blocking=True)
    batch_mask_segmt = batch_mask_segmt.to(device, non_blocking=True)
    batch_mask_depth = batch_mask_depth.to(device, non_blocking=True)

    output = model(batch_X)
    image_loss, label_loss = criterion(output,
                                       batch_y_segmt,
                                       batch_y_depth,
                                       batch_mask_segmt,
                                       batch_mask_depth,
                                       task_weights=task_weights)

    if is_train:

        alpha = 0.16

        l1 = task_weights[0] * image_loss * 0.5
        l2 = task_weights[1] * label_loss * 0.5

        if epoch == 1:
            l01 = l1.data
            l02 = l2.data

        optimizer.zero_grad()
        l1.backward(retain_graph=True)
        l2.backward(retain_graph=True)

        param = list(model.pretrained_encoder.layer4[-1].conv2.parameters())
        G1R = torch.autograd.grad(l1,
                                  param[0],
                                  retain_graph=True,
                                  create_graph=True)
        G1 = torch.norm(G1R[0], 2)
        G2R = torch.autograd.grad(l2,
                                  param[0],
                                  retain_graph=True,
                                  create_graph=True)
        G2 = torch.norm(G2R[0], 2)
        G_avg = (G1 + G2) / 2

        # Calculating relative losses
        lhat1 = torch.div(l1, l01)
        lhat2 = torch.div(l2, l02)
        lhat_avg = (lhat1 + lhat2) / 2

        # Calculating relative inverse training rates for tasks
        inv_rate1 = torch.div(lhat1, lhat_avg)
        inv_rate2 = torch.div(lhat2, lhat_avg)

        # Calculating the constant target for Eq. 2 in the GradNorm paper
        C1 = G_avg * inv_rate1**alpha
        C2 = G_avg * inv_rate2**alpha
        C1 = C1.detach()
        C2 = C2.detach()

        optimizer2.zero_grad()
        # Calculating the gradient loss according to Eq. 2 in the GradNorm paper
        Lgrad = torch.add(criterion2(G1, C1), criterion2(G2, C2))
        Lgrad.backward()

        # Updating loss weights
        optimizer2.step()

        optimizer.step()

    return (task_weights[0] * image_loss).item() + (
        task_weights[1] * label_loss).item(), l01, l02
Exemplo n.º 37
0
    def _generate(self, src_tokens, beam_size=None, maxlen=None):
        bsz, srclen = src_tokens.size()
        maxlen = min(maxlen,
                     self.maxlen) if maxlen is not None else self.maxlen

        # the max beam size is the dictionary size - 1, since we never select pad
        beam_size = beam_size if beam_size is not None else self.beam_size
        beam_size = min(beam_size, self.vocab_size - 1)

        encoder_outs = []
        for model in self.models:
            if not self.retain_dropout:
                model.eval()
            if isinstance(model.decoder, FairseqIncrementalDecoder):
                model.decoder.set_beam_size(beam_size)

            # compute the encoder output for each beam
            encoder_out = model.encoder(
                src_tokens.repeat(1, beam_size).view(-1, srclen))
            encoder_outs.append(encoder_out)

        # initialize buffers
        scores = encoder_outs[0][0].data.new(bsz * beam_size).fill_(0)
        tokens = src_tokens.data.new(bsz * beam_size,
                                     maxlen + 2).fill_(self.pad)
        tokens_buf = tokens.clone()
        tokens[:, 0] = self.eos
        attn = scores.new(bsz * beam_size, src_tokens.size(1), maxlen + 2)
        attn_buf = attn.clone()

        # list of completed sentences
        finalized = [[] for i in range(bsz)]
        finished = [False for i in range(bsz)]
        worst_finalized = [{
            'idx': None,
            'score': float('Inf')
        } for i in range(bsz)]
        num_remaining_sent = bsz

        # number of candidate hypos per step
        cand_size = 2 * beam_size  # 2 x beam size in case half are EOS

        # offset arrays for converting between different indexing schemes
        bbsz_offsets = (torch.arange(0, bsz) *
                        beam_size).unsqueeze(1).type_as(tokens)
        cand_offsets = torch.arange(0, cand_size).type_as(tokens)

        # helper function for allocating buffers on the fly
        buffers = {}

        def buffer(name, type_of=tokens):  # noqa
            if name not in buffers:
                buffers[name] = type_of.new()
            return buffers[name]

        def is_finished(sent):
            """
            Check whether we've finished generation for a given sentence, by
            comparing the worst score among finalized hypotheses to the best
            possible score among unfinalized hypotheses.
            """
            assert len(finalized[sent]) <= beam_size
            if len(finalized[sent]) == beam_size:
                if self.stop_early:
                    return True
                # stop if the best unfinalized score is worse than the worst
                # finalized one
                bbsz = sent * beam_size
                best_unfinalized_score = scores[bbsz:bbsz + beam_size].max()
                if self.normalize_scores:
                    best_unfinalized_score /= maxlen
                if worst_finalized[sent]['score'] >= best_unfinalized_score:
                    return True
            return False

        def finalize_hypos(step, bbsz_idx, scores):
            """
            Finalize the given hypotheses at this step, while keeping the total
            number of finalized hypotheses per sentence <= beam_size.

            Note: the input must be in the desired finalization order, so that
            hypotheses that appear earlier in the input are preferred to those
            that appear later.

            Args:
                step: current time step
                bbsz_idx: A vector of indices in the range [0, bsz*beam_size),
                    indicating which hypotheses to finalize
                scores: A vector of the same size as bbsz_idx containing scores
                    for each hypothesis
            """
            assert bbsz_idx.numel() == scores.numel()
            norm_scores = scores / math.pow(
                step +
                1, self.len_penalty) if self.normalize_scores else scores
            sents_seen = set()
            for idx, score in zip(bbsz_idx.cpu(), norm_scores.cpu()):
                sent = idx // beam_size
                sents_seen.add(sent)

                def get_hypo():
                    hypo = tokens[
                        idx, 1:step +
                        2].clone()  # skip the first index, which is EOS
                    hypo[step] = self.eos
                    attention = attn[idx, :, 1:step + 2].clone()
                    _, alignment = attention.max(dim=0)
                    return {
                        'tokens': hypo,
                        'score': score,
                        'attention': attention,
                        'alignment': alignment,
                    }

                if len(finalized[sent]) < beam_size:
                    finalized[sent].append(get_hypo())
                elif score > worst_finalized[sent]['score']:
                    # replace worst hypo for this sentence with new/better one
                    worst_idx = worst_finalized[sent]['idx']
                    finalized[sent][worst_idx] = get_hypo()

                    # find new worst finalized hypo for this sentence
                    idx, s = min(enumerate(finalized[sent]),
                                 key=lambda r: r[1]['score'])
                    worst_finalized[sent] = {
                        'score': s['score'],
                        'idx': idx,
                    }

            # return number of hypotheses finished this step
            num_finished = 0
            for sent in sents_seen:
                # check termination conditions for this sentence
                if not finished[sent] and is_finished(sent):
                    finished[sent] = True
                    num_finished += 1
            return num_finished

        reorder_state = None
        for step in range(maxlen + 1):  # one extra step for EOS marker
            # reorder decoder internal states based on the prev choice of beams
            if reorder_state is not None:
                for model in self.models:
                    if isinstance(model.decoder, FairseqIncrementalDecoder):
                        model.decoder.reorder_incremental_state(reorder_state)

            probs, avg_attn_scores = self._decode(tokens[:, :step + 1],
                                                  encoder_outs)
            if step == 0:
                # at the first step all hypotheses are equally likely, so use
                # only the first beam
                probs = probs.unfold(0, 1, beam_size).squeeze(2).contiguous()
            else:
                # make probs contain cumulative scores for each hypothesis
                probs.add_(scores.view(-1, 1))
            probs[:, self.pad] = -math.inf  # never select pad
            probs[:, self.unk] -= self.unk_penalty  # apply unk penalty

            # Record attention scores
            attn[:, :, step + 1].copy_(avg_attn_scores)

            # take the best 2 x beam_size predictions. We'll choose the first
            # beam_size of these which don't predict eos to continue with.
            cand_scores = buffer('cand_scores', type_of=scores)
            cand_indices = buffer('cand_indices')
            cand_beams = buffer('cand_beams')
            probs.view(bsz, -1).topk(
                min(cand_size,
                    probs.view(bsz, -1).size(1) -
                    1),  # -1 so we never select pad
                out=(cand_scores, cand_indices))
            torch.div(cand_indices, self.vocab_size, out=cand_beams)
            cand_indices.fmod_(self.vocab_size)

            # cand_bbsz_idx contains beam indices for the top candidate
            # hypotheses, with a range of values: [0, bsz*beam_size),
            # and dimensions: [bsz, cand_size]
            cand_bbsz_idx = cand_beams.add_(bbsz_offsets)

            # finalize hypotheses that end in eos
            eos_mask = cand_indices.eq(self.eos)
            if step >= self.minlen:
                eos_bbsz_idx = buffer('eos_bbsz_idx')
                # only consider eos when it's among the top beam_size indices
                cand_bbsz_idx[:, :beam_size].masked_select(
                    eos_mask[:, :beam_size], out=eos_bbsz_idx)
                if eos_bbsz_idx.numel() > 0:
                    eos_scores = buffer('eos_scores', type_of=scores)
                    cand_scores[:, :beam_size].masked_select(
                        eos_mask[:, :beam_size], out=eos_scores)
                    num_remaining_sent -= finalize_hypos(
                        step, eos_bbsz_idx, eos_scores)

            assert num_remaining_sent >= 0
            if num_remaining_sent == 0:
                break

            # set active_mask so that values > cand_size indicate eos hypos
            # and values < cand_size indicate candidate active hypos.
            # After, the min values per row are the top candidate active hypos
            active_mask = buffer('active_mask')
            torch.add(eos_mask.type_as(cand_offsets) * cand_size,
                      cand_offsets[:eos_mask.size(1)],
                      out=active_mask)

            # get the top beam_size active hypotheses, which are just the hypos
            # with the smallest values in active_mask
            active_hypos, _ignore = buffer('active_hypos'), buffer('_ignore')
            active_mask.topk(beam_size,
                             1,
                             largest=False,
                             out=(_ignore, active_hypos))
            active_bbsz_idx = buffer('active_bbsz_idx')
            cand_bbsz_idx.gather(1, active_hypos, out=active_bbsz_idx)
            active_scores = cand_scores.gather(1,
                                               active_hypos,
                                               out=scores.view(bsz, beam_size))

            active_bbsz_idx = active_bbsz_idx.view(-1)
            active_scores = active_scores.view(-1)

            # finalize all active hypotheses once we hit maxlen
            # finalize_hypos will take care of adding the EOS markers
            if step == maxlen:
                num_remaining_sent -= finalize_hypos(step, active_bbsz_idx,
                                                     active_scores)
                assert num_remaining_sent == 0
                break

            # copy tokens for active hypotheses
            torch.index_select(tokens[:, :step + 1],
                               dim=0,
                               index=active_bbsz_idx,
                               out=tokens_buf[:, :step + 1])
            cand_indices.gather(1,
                                active_hypos,
                                out=tokens_buf.view(bsz, beam_size,
                                                    -1)[:, :, step + 1])

            # copy attention for active hypotheses
            torch.index_select(attn[:, :, :step + 2],
                               dim=0,
                               index=active_bbsz_idx,
                               out=attn_buf[:, :, :step + 2])

            # swap buffers
            old_tokens = tokens
            tokens = tokens_buf
            tokens_buf = old_tokens
            old_attn = attn
            attn = attn_buf
            attn_buf = old_attn

            # reorder incremental state in decoder
            reorder_state = active_bbsz_idx

        # sort by score descending
        for sent in range(bsz):
            finalized[sent] = sorted(finalized[sent],
                                     key=lambda r: r['score'],
                                     reverse=True)

        return finalized
Exemplo n.º 38
0
    def _decode_target(
        self,
        encoder_input,
        encoder_outs,
        incremental_states,
        diversity_sibling_gamma=0.0,
        beam_size=None,
        maxlen=None,
        prefix_tokens=None,
    ):
        src_tokens_tensor = pytorch_translate_utils.get_source_tokens_tensor(
            encoder_input["src_tokens"])
        beam_size = beam_size if beam_size is not None else self.beam_size
        bsz = src_tokens_tensor.size(0)
        reorder_indices = (torch.arange(bsz).view(-1, 1).repeat(
            1, beam_size).view(-1).long())
        for i, model in enumerate(self.models):
            encoder_outs[i] = model.encoder.reorder_encoder_out(
                encoder_out=encoder_outs[i],
                new_order=reorder_indices.type_as(src_tokens_tensor),
            )
        maxlen = min(maxlen,
                     self.maxlen) if maxlen is not None else self.maxlen
        # initialize buffers
        scores = src_tokens_tensor.new(bsz * beam_size,
                                       maxlen + 1).float().fill_(0)
        scores_buf = scores.clone()
        tokens = src_tokens_tensor.new(bsz * beam_size,
                                       maxlen + 2).fill_(self.pad)
        tokens_buf = tokens.clone()
        tokens[:, 0] = self.eos

        # may differ from input length
        if isinstance(encoder_outs[0], (list, tuple)):
            src_encoding_len = encoder_outs[0][0].size(0)
        elif isinstance(encoder_outs[0], dict):
            if isinstance(encoder_outs[0]["encoder_out"], tuple):
                # Fairseq compatibility
                src_encoding_len = encoder_outs[0]["encoder_out"][0].size(1)
            else:
                src_encoding_len = encoder_outs[0]["encoder_out"].size(0)

        attn = scores.new(bsz * beam_size, src_encoding_len, maxlen + 2)
        attn_buf = attn.clone()

        # list of completed sentences
        finalized = [[] for i in range(bsz)]
        finished = [False for i in range(bsz)]
        worst_finalized = [{
            "idx": None,
            "score": -math.inf
        } for i in range(bsz)]
        num_remaining_sent = bsz

        # number of candidate hypos per step
        cand_size = 2 * beam_size  # 2 x beam size in case half are EOS

        # offset arrays for converting between different indexing schemes
        bbsz_offsets = (torch.arange(0, bsz) *
                        beam_size).unsqueeze(1).type_as(tokens)
        cand_offsets = torch.arange(0, cand_size).type_as(tokens)

        # helper function for allocating buffers on the fly
        buffers = {}

        # init constraints
        constraints = self._build_constraints(src_tokens_tensor, beam_size)

        def buffer(name, type_of=tokens):  # noqa
            if name not in buffers:
                buffers[name] = type_of.new()
            return buffers[name]

        def is_finished(sent, step, unfinalized_scores=None):
            """
            Check whether we've finished generation for a given sentence, by
            comparing the worst score among finalized hypotheses to the best
            possible score among unfinalized hypotheses.
            """
            assert len(finalized[sent]) <= beam_size
            if len(finalized[sent]) == beam_size:
                if self.stop_early or step == maxlen or unfinalized_scores is None:
                    return True
                # stop if the best unfinalized score is worse than the worst
                # finalized one
                best_unfinalized_score = unfinalized_scores[sent].max()
                if self.normalize_scores:
                    best_unfinalized_score /= (maxlen + 1)**self.len_penalty
                if worst_finalized[sent]["score"] >= best_unfinalized_score:
                    return True
            return False

        def finalize_hypos(step,
                           bbsz_idx,
                           eos_scores,
                           unfinalized_scores=None):
            """
            Finalize the given hypotheses at this step, while keeping the total
            number of finalized hypotheses per sentence <= beam_size.

            Note: the input must be in the desired finalization order, so that
            hypotheses that appear earlier in the input are preferred to those
            that appear later.

            Args:
                step: current time step
                bbsz_idx: A vector of indices in the range [0, bsz*beam_size),
                    indicating which hypotheses to finalize
                eos_scores: A vector of the same size as bbsz_idx containing
                    scores for each hypothesis
                unfinalized_scores: A vector containing scores for all
                    unfinalized hypotheses
            """
            assert bbsz_idx.numel() == eos_scores.numel()

            # clone relevant token and attention tensors
            tokens_clone = tokens.index_select(0, bbsz_idx)
            tokens_clone = tokens_clone[:, 1:step +
                                        2]  # skip the first index, which is EOS
            tokens_clone[:, step] = self.eos
            attn_clone = attn.index_select(0, bbsz_idx)[:, :, 1:step + 2]

            # compute scores per token position
            pos_scores = scores.index_select(0, bbsz_idx)[:, :step + 1]
            pos_scores[:, step] = eos_scores
            # convert from cumulative to per-position scores
            pos_scores[:, 1:] = pos_scores[:, 1:] - pos_scores[:, :-1]

            # normalize sentence-level scores
            if self.normalize_scores:
                eos_scores /= (step + 1)**self.len_penalty

            sents_seen = set()
            for i, (idx, score) in enumerate(
                    zip(bbsz_idx.tolist(), eos_scores.tolist())):
                sent = idx // beam_size
                sents_seen.add(sent)

                def get_hypo():
                    _, alignment = attn_clone[i].max(dim=0)
                    return {
                        "tokens": tokens_clone[i],
                        "score": score,
                        "attention": attn_clone[i],  # src_len x tgt_len
                        "alignment": alignment,
                        "positional_scores": pos_scores[i],
                    }

                if len(finalized[sent]) < beam_size:
                    finalized[sent].append(get_hypo())
                elif not self.stop_early and score > worst_finalized[sent][
                        "score"]:
                    # replace worst hypo for this sentence with new/better one
                    worst_idx = worst_finalized[sent]["idx"]
                    if worst_idx is not None:
                        finalized[sent][worst_idx] = get_hypo()

                    # find new worst finalized hypo for this sentence
                    idx, s = min(enumerate(finalized[sent]),
                                 key=lambda r: r[1]["score"])
                    worst_finalized[sent] = {"score": s["score"], "idx": idx}

            # return number of hypotheses finished this step
            num_finished = 0
            for sent in sents_seen:
                # check termination conditions for this sentence
                if not finished[sent] and is_finished(sent, step,
                                                      unfinalized_scores):
                    finished[sent] = True
                    num_finished += 1
            return num_finished

        reorder_state = None
        for step in range(maxlen + 1):  # one extra step for EOS marker
            # reorder decoder internal states based on the prev choice of beams
            if reorder_state is not None:
                for model in self.models:
                    if isinstance(model.decoder, FairseqIncrementalDecoder):
                        model.decoder.reorder_incremental_state(
                            incremental_states[model], reorder_state)
            # Run decoder for one step
            logprobs, avg_attn, possible_translation_tokens = self._decode(
                tokens[:, :step + 1], encoder_outs, incremental_states)

            logprobs[:, self.pad] = -math.inf  # never select pad
            # apply unk reward
            if possible_translation_tokens is None:
                # No vocab reduction, so unk is represented by self.unk at
                # position self.unk
                unk_index = self.unk
                logprobs[:, unk_index] += self.unk_reward
            else:
                # When we use vocab reduction, the token value self.unk may not
                # be at the position self.unk, but somewhere else in the list
                # of possible_translation_tokens. It's also possible not to
                # show up in possible_translation_tokens at all, meaning we
                # can't generate an unk.
                unk_pos = torch.nonzero(
                    possible_translation_tokens == self.unk)
                if unk_pos.size()[0] != 0:
                    # only add unk_reward if unk index appears in
                    # possible_translation_tokens
                    unk_index = unk_pos[0][0]
                    logprobs[:, unk_index] += self.unk_reward
            # external lexicon reward
            logprobs[:, self.lexicon_indices] += self.lexicon_reward

            logprobs += self.word_reward
            logprobs[:, self.eos] -= self.word_reward
            # Record attention scores
            attn[:, :, step + 1].copy_(avg_attn)

            cand_scores = buffer("cand_scores", type_of=scores)
            cand_indices = buffer("cand_indices")
            cand_beams = buffer("cand_beams")
            eos_bbsz_idx = buffer("eos_bbsz_idx")
            eos_scores = buffer("eos_scores", type_of=scores)
            scores = scores.type_as(logprobs)
            scores_buf = scores_buf.type_as(logprobs)

            if step < maxlen:
                self._apply_constraint_penalty(scores)  # stub call
                if prefix_tokens is not None and step < prefix_tokens.size(1):
                    logprobs_slice = logprobs.view(bsz, -1,
                                                   logprobs.size(-1))[:, 0, :]
                    cand_scores = torch.gather(
                        logprobs_slice,
                        dim=1,
                        index=prefix_tokens[:, step].view(-1, 1)).expand(
                            -1, cand_size)
                    cand_indices = (prefix_tokens[:, step].view(-1, 1).expand(
                        bsz, cand_size))
                    cand_beams.resize_as_(cand_indices).fill_(0)
                else:
                    possible_tokens_size = self.vocab_size
                    if possible_translation_tokens is not None:
                        possible_tokens_size = possible_translation_tokens.size(
                            0)
                    if diversity_sibling_gamma > 0:
                        logprobs = self.diversity_sibling_rank(
                            logprobs.view(bsz, -1, possible_tokens_size),
                            diversity_sibling_gamma,
                        )
                    cand_scores, cand_indices, cand_beams = self.search.step(
                        step,
                        logprobs.view(bsz, -1, possible_tokens_size),
                        scores.view(bsz, beam_size, -1)[:, :, :step],
                    )
                    # vocabulary reduction
                    if possible_translation_tokens is not None:
                        possible_translation_tokens = possible_translation_tokens.view(
                            1, possible_tokens_size).expand(
                                cand_indices.size(0), possible_tokens_size)
                        cand_indices = torch.gather(
                            possible_translation_tokens,
                            dim=1,
                            index=cand_indices,
                            out=cand_indices,
                        )
            else:
                # finalize all active hypotheses once we hit maxlen
                # pick the hypothesis with the highest log prob of EOS right now
                logprobs.add_(scores[:, step - 1].view(-1, 1))
                torch.sort(
                    logprobs[:, self.eos],
                    descending=True,
                    out=(eos_scores, eos_bbsz_idx),
                )
                num_remaining_sent -= finalize_hypos(step, eos_bbsz_idx,
                                                     eos_scores)
                assert num_remaining_sent == 0
                break

            # cand_bbsz_idx contains beam indices for the top candidate
            # hypotheses, with a range of values: [0, bsz*beam_size),
            # and dimensions: [bsz, cand_size]
            cand_bbsz_idx = cand_beams.add_(bbsz_offsets)

            # finalize hypotheses that end in eos
            eos_mask = cand_indices.eq(self.eos)
            if step >= self.minlen:
                # only consider eos when it's among the top beam_size indices
                torch.masked_select(
                    cand_bbsz_idx[:, :beam_size],
                    mask=eos_mask[:, :beam_size],
                    out=eos_bbsz_idx,
                )
                if eos_bbsz_idx.numel() > 0:
                    torch.masked_select(
                        cand_scores[:, :beam_size],
                        mask=eos_mask[:, :beam_size],
                        out=eos_scores,
                    )
                    self._apply_eos_constraints(constraints, eos_bbsz_idx,
                                                eos_scores)
                    num_remaining_sent -= finalize_hypos(
                        step, eos_bbsz_idx, eos_scores, cand_scores)

            assert num_remaining_sent >= 0
            if num_remaining_sent == 0:
                break
            assert step < maxlen

            # set active_mask so that values > cand_size indicate eos hypos
            # and values < cand_size indicate candidate active hypos.
            # After, the min values per row are the top candidate active hypos
            active_mask = buffer("active_mask")
            torch.add(
                eos_mask.type_as(cand_offsets) * cand_size,
                cand_offsets[:eos_mask.size(1)],
                out=active_mask,
            )

            # get the top beam_size active hypotheses, which are just the hypos
            # with the smallest values in active_mask
            active_hypos, _ignore = buffer("active_hypos"), buffer("_ignore")
            torch.topk(
                active_mask,
                k=beam_size,
                dim=1,
                largest=False,
                out=(_ignore, active_hypos),
            )
            active_bbsz_idx = buffer("active_bbsz_idx")
            torch.gather(cand_bbsz_idx,
                         dim=1,
                         index=active_hypos,
                         out=active_bbsz_idx)
            active_scores = torch.gather(
                cand_scores,
                dim=1,
                index=active_hypos,
                out=scores[:, step].view(bsz, beam_size),
            )
            active_bbsz_idx = active_bbsz_idx.view(-1)
            active_scores = active_scores.view(-1)

            # copy tokens and scores for active hypotheses
            torch.index_select(
                tokens[:, :step + 1],
                dim=0,
                index=active_bbsz_idx,
                out=tokens_buf[:, :step + 1],
            )
            torch.gather(
                cand_indices,
                dim=1,
                index=active_hypos,
                out=tokens_buf.view(bsz, beam_size, -1)[:, :, step + 1],
            )
            # update constraints for next step
            constraints = self._reorder_constraints(constraints,
                                                    active_bbsz_idx)
            self._update_constraints(constraints, tokens_buf[:, step + 1],
                                     step)
            if step > 0:
                torch.index_select(
                    scores[:, :step],
                    dim=0,
                    index=active_bbsz_idx,
                    out=scores_buf[:, :step],
                )
            torch.gather(
                cand_scores,
                dim=1,
                index=active_hypos,
                out=scores_buf.view(bsz, beam_size, -1)[:, :, step],
            )

            # copy attention for active hypotheses
            torch.index_select(
                attn[:, :, :step + 2],
                dim=0,
                index=active_bbsz_idx,
                out=attn_buf[:, :, :step + 2],
            )

            # swap buffers
            tokens, tokens_buf = tokens_buf, tokens
            scores, scores_buf = scores_buf, scores
            attn, attn_buf = attn_buf, attn

            # reorder incremental state in decoder
            reorder_state = active_bbsz_idx

        # sort by score descending
        for sent in range(bsz):
            finalized[sent] = sorted(finalized[sent],
                                     key=lambda r: r["score"],
                                     reverse=True)
        self._finalize_constrained_results(finalized, scores.device)
        return finalized
Exemplo n.º 39
0
 def forward(self, inputs):
     return torch.add(*inputs)
 def discriminator_loss(gen_images, real_images):
     real = real_images.new_full((real_images.shape[0], 1), real_label)
     gen = gen_images.new_full((gen_images.shape[0], 1), fake_label)
     realloss = disc_loss_criterion(disc_net(real_images), real)
     genloss = disc_loss_criterion(disc_net(gen_images.detach()), gen)
     return torch.div(torch.add(realloss, genloss), 2)
Exemplo n.º 41
0
def compute_unary_term(heatmap, grid, bbox2D, cam, imgSize, **kwargs):
    """
    Args:
        heatmap: array of size (n * k * h * w)
                -n: number of views,  -k: number of joints
                -h: heatmap height,   -w: heatmap width
        grid: list of k ndarrays of size (nbins * 3)
                    -k: number of joints; 1 when the grid is shared in PSM
                    -nbins: number of bins in the grid
        bbox2D: bounding box on which heatmap is computed
    Returns:
        unary_of_all_joints: a list of ndarray of size nbins
    """

    n, k = heatmap.shape[0], heatmap.shape[1]
    h, w = heatmap.shape[2], heatmap.shape[3]
    nbins = grid[0].shape[0]
    current_device = torch.device('cuda:{}'.format(heatmap.get_device()))

    # unary_of_all_joints = []
    # for j in range(k):
    #     unary = np.zeros(nbins, dtype=np.float32)
    #     for c in range(n):
    #
    #         grid_id = 0 if len(grid) == 1 else j
    #         xy = cameras.project_pose(grid[grid_id], cam[c])
    #         trans = get_affine_transform(bbox2D[c]['center'],
    #                                      bbox2D[c]['scale'], 0, imgSize)
    #
    #         xy = affine_transform_pts(xy, trans) * np.array([w, h]) / imgSize
    #         # for i in range(nbins):
    #         #     xy[i] = affine_transform(xy[i], trans) * np.array([w, h]) / imgSize
    #
    #         hmap = heatmap[c, j, :, :]
    #         point_x, point_y = np.arange(hmap.shape[0]), np.arange(
    #             hmap.shape[1])
    #         rgi = RegularGridInterpolator(
    #             points=[point_x, point_y],
    #             values=hmap.transpose(),
    #             bounds_error=False,
    #             fill_value=0)
    #         score = rgi(xy)
    #         unary = unary + np.reshape(score, newshape=unary.shape)
    #     unary_of_all_joints.append(unary)
    # return unary_of_all_joints

    # torch version
    # heatmaps = torch.tensor(heatmap, dtype=torch.float32)
    heatmaps = heatmap
    grid_cords = np.zeros([n, k, nbins, 2], dtype=np.float32)
    for c in range(n):
        for j in range(k):
            grid_id = 0 if len(grid) == 1 else j
            xy = cameras.project_pose(grid[grid_id], cam[c])
            trans = get_affine_transform(bbox2D[c]['center'],
                                         bbox2D[c]['scale'], 0, imgSize)
            xy = affine_transform_pts(xy, trans) * np.array([w, h]) / imgSize
            # xy of shape (4096,2)
            # xy is cord of certain view and certain joint
            if len(grid) == 1:  # psm 4096bins
                grid_cords[c, 0, :, :] = xy / np.array(
                    [h - 1, w - 1], dtype=np.float32) * 2.0 - 1.0
                for j in range(1, k):
                    grid_cords[c, j, :, :] = grid_cords[c, 0, :, :]
                break  # since all joints share same grid, no need computing for each joint, just copy it
            else:
                grid_cords[c, j, :, :] = xy / np.array(
                    [h - 1, w - 1], dtype=np.float32) * 2.0 - 1.0

    grid_cords_tensor = torch.as_tensor(grid_cords).to(current_device)
    unary_all_views_joints = grid_sample(heatmaps, grid_cords_tensor)
    # unary_all_views_joints -> shape(4,16,16,4096)
    unary_all_views = torch.zeros(n, k, nbins).to(current_device)
    for j in range(k):
        unary_all_views[:, j, :] = unary_all_views_joints[:, j, j, :]
    unary_tensor = torch.zeros(k, nbins).to(current_device)
    for una in unary_all_views:
        unary_tensor = torch.add(unary_tensor, una)

    return unary_tensor
Exemplo n.º 42
0
 def forward(self, x, i):
     mm0 = torch.mul(x, self.w0)
     mm1 = torch.add(mm0, self.w1)
     return mm1
Exemplo n.º 43
0
def local_add(t1, t2):
    return torch.add(t1, t2)
Exemplo n.º 44
0
 def forward(self, x):
     down = self.relu1(self.bn1(self.down_conv(x)))
     out = self.do1(down)
     out = self.ops(out)
     out = self.relu2(torch.add(out, down))
     return out
Exemplo n.º 45
0
def equal(x, y, prec=1e-4):
    return torch.all(torch.lt(torch.abs(torch.add(x, -y)), prec))
Exemplo n.º 46
0
 def additionalTermsLoss(self):
     return torch.add(self.hparams[1]*torch.dot(self.flat_wdot,self.flat_wdot),\
     self.hparams[2]*torch.dot(self.flat_w,self.flat_w))
Exemplo n.º 47
0
    def forward(self,
                sentence,
                p_sentence,
                pos_tags,
                lengths,
                target_idx_in,
                region_marks,
                local_roles_voc,
                frames,
                local_roles_mask,
                sent_pred_lemmas_idx,
                dep_tags,
                dep_heads,
                targets,
                test=False):

        embeds = self.word_embeddings(sentence)
        embeds = embeds.view(self.batch_size, len(sentence[0]),
                             self.word_emb_dim)
        pos_embeds = self.pos_embeddings(pos_tags)
        fixed_embeds = self.word_fixed_embeddings(p_sentence)
        fixed_embeds = fixed_embeds.view(self.batch_size, len(sentence[0]),
                                         self.word_emb_dim)
        sent_pred_lemmas_embeds = self.p_lemma_embeddings(sent_pred_lemmas_idx)

        region_marks = region_marks.view(self.batch_size, len(sentence[0]), 1)
        embeds = torch.cat((embeds, fixed_embeds, pos_embeds,
                            sent_pred_lemmas_embeds, region_marks), 2)
        #embeds = torch.cat((embeds, fixed_embeds, pos_embeds, region_marks), 2)

        # share_layer
        embeds_sort, lengths_sort, unsort_idx = self.sort_batch(
            embeds, lengths)

        embeds_sort = rnn.pack_padded_sequence(embeds_sort,
                                               lengths_sort,
                                               batch_first=True)
        # hidden states [time_steps * batch_size * hidden_units]
        hidden_states, self.hidden = self.BiLSTM_share(embeds_sort,
                                                       self.hidden)
        # it seems that hidden states is already batch first, we don't need swap the dims
        # hidden_states = hidden_states.permute(1, 2, 0).contiguous().view(self.batch_size, -1, )
        hidden_states, lens = rnn.pad_packed_sequence(hidden_states,
                                                      batch_first=True)
        #hidden_states = hidden_states.transpose(0, 1)
        hidden_states = hidden_states[unsort_idx]

        bf_e = torch.tensor(hidden_states.data.numpy())

        concat_embeds = torch.zeros(bf_e.size()[0],
                                    bf_e.size()[1],
                                    bf_e.size()[2])

        for i in range(bf_e.size()[0]):
            for j in range(bf_e.size()[1]):
                if dep_heads[i][j] > 0:
                    concat_embeds[i, j] = bf_e[i, dep_heads[i][j] - 1]

        head_features = torch.tensor(
            F.tanh(self.hidden2tag_M(bf_e)).data.numpy())
        head_features.requires_grad_(False)

        dep_tag_space = self.MLP(
            F.tanh(self.hidden2tag_M(bf_e) +
                   self.hidden2tag_H(concat_embeds))).view(
                       len(sentence[0]) * self.batch_size, -1)

        hidden_states = torch.cat((hidden_states, head_features), 2)
        # SRL layer
        embeds_sort, lengths_sort, unsort_idx = self.sort_batch(
            hidden_states, lengths)
        embeds_sort = rnn.pack_padded_sequence(embeds_sort,
                                               lengths_sort.cpu().numpy(),
                                               batch_first=True)
        # hidden states [time_steps * batch_size * hidden_units]
        hidden_states, self.hidden_2 = self.BiLSTM_SRL(embeds_sort,
                                                       self.hidden_2)
        # it seems that hidden states is already batch first, we don't need swap the dims
        # hidden_states = hidden_states.permute(1, 2, 0).contiguous().view(self.batch_size, -1, )
        hidden_states, lens = rnn.pad_packed_sequence(hidden_states,
                                                      batch_first=True)
        # hidden_states = hidden_states.transpose(0, 1)
        hidden_states = hidden_states[unsort_idx]

        # B * H
        hidden_states_3 = hidden_states
        predicate_embeds = hidden_states_3[
            np.arange(0,
                      hidden_states_3.size()[0]), target_idx_in]
        # T * B * H
        added_embeds = Variable(
            torch.zeros(hidden_states_3.size()[1],
                        hidden_states_3.size()[0],
                        hidden_states_3.size()[2]))
        predicate_embeds = added_embeds + predicate_embeds
        # B * T * H
        predicate_embeds = predicate_embeds.transpose(0, 1)
        hidden_states = torch.cat((hidden_states_3, predicate_embeds), 2)
        # print(hidden_states)
        # non-linear map and rectify the roles' embeddings
        # roles = Variable(torch.from_numpy(np.arange(0, self.tagset_size)))

        # B * roles
        # log(local_roles_voc)
        # log(frames)

        # B * roles * h
        role_embeds = self.role_embeddings(local_roles_voc)
        frame_embeds = self.frame_embeddings(frames)

        role_embeds = torch.cat((role_embeds, frame_embeds), 2)
        mapped_roles = F.relu(self.role_map(role_embeds))
        mapped_roles = torch.transpose(mapped_roles, 1, 2)

        # b, times, roles
        tag_space = torch.matmul(hidden_states, mapped_roles)
        #tag_space = hidden_states.mm(mapped_roles)

        # b, roles
        #sub = torch.div(torch.add(local_roles_mask, -1.0), _BIG_NUMBER)
        sub = torch.add(local_roles_mask, -1.0) * _BIG_NUMBER
        sub = torch.FloatTensor(sub.numpy())
        # b, roles, times
        tag_space = torch.transpose(tag_space, 0, 1)
        tag_space += sub
        # b, T, roles
        tag_space = torch.transpose(tag_space, 0, 1)

        tag_space = tag_space.view(len(sentence[0]) * self.batch_size, -1)

        SRLprobs = F.softmax(tag_space, dim=1)

        wrong_l_nums = 0.0
        all_l_nums = 0.0
        dep_labels = np.argmax(dep_tag_space.data.numpy(), axis=1)

        for predict_l, gold_l in zip(dep_labels,
                                     dep_tags.view(-1).data.numpy()):

            if gold_l != 0:
                all_l_nums += 1
            if predict_l != gold_l and gold_l != 0:
                wrong_l_nums += 1

        #loss_function = nn.NLLLoss(ignore_index=0)
        targets = targets.view(-1)
        #tag_scores = F.log_softmax(tag_space)
        #loss = loss_function(tag_scores, targets)
        loss_function = nn.CrossEntropyLoss(ignore_index=0)

        SRLloss = loss_function(tag_space, targets)
        DEPloss = loss_function(dep_tag_space, dep_tags.view(-1))
        loss = SRLloss + DEPloss
        return SRLloss, DEPloss, loss, SRLprobs, wrong_l_nums, all_l_nums
Exemplo n.º 48
0
 def forward(self, data):
     conv1_7x7_s2 = self.conv1_7x7_s2(data)
     conv1_7x7_s2_bn = self.conv1_7x7_s2_bn(conv1_7x7_s2)
     conv1_7x7_s2_bnxx = self.conv1_relu_7x7_s2(conv1_7x7_s2_bn)
     pool1_3x3_s2 = self.pool1_3x3_s2(conv1_7x7_s2_bnxx)
     conv2_1_1x1_reduce = self.conv2_1_1x1_reduce(pool1_3x3_s2)
     conv2_1_1x1_reduce_bn = self.conv2_1_1x1_reduce_bn(conv2_1_1x1_reduce)
     conv2_1_1x1_reduce_bnxx = self.conv2_1_1x1_reduce_relu(conv2_1_1x1_reduce_bn)
     conv2_1_3x3 = self.conv2_1_3x3(conv2_1_1x1_reduce_bnxx)
     conv2_1_3x3_bn = self.conv2_1_3x3_bn(conv2_1_3x3)
     conv2_1_3x3_bnxx = self.conv2_1_3x3_relu(conv2_1_3x3_bn)
     conv2_1_1x1_increase = self.conv2_1_1x1_increase(conv2_1_3x3_bnxx)
     conv2_1_1x1_increase_bn = self.conv2_1_1x1_increase_bn(conv2_1_1x1_increase)
     conv2_1_1x1_proj = self.conv2_1_1x1_proj(pool1_3x3_s2)
     conv2_1_1x1_proj_bn = self.conv2_1_1x1_proj_bn(conv2_1_1x1_proj)
     conv2_1 = torch.add(conv2_1_1x1_proj_bn, 1, conv2_1_1x1_increase_bn)
     conv2_1x = self.conv2_1_relu(conv2_1)
     conv2_2_1x1_reduce = self.conv2_2_1x1_reduce(conv2_1x)
     conv2_2_1x1_reduce_bn = self.conv2_2_1x1_reduce_bn(conv2_2_1x1_reduce)
     conv2_2_1x1_reduce_bnxx = self.conv2_2_1x1_reduce_relu(conv2_2_1x1_reduce_bn)
     conv2_2_3x3 = self.conv2_2_3x3(conv2_2_1x1_reduce_bnxx)
     conv2_2_3x3_bn = self.conv2_2_3x3_bn(conv2_2_3x3)
     conv2_2_3x3_bnxx = self.conv2_2_3x3_relu(conv2_2_3x3_bn)
     conv2_2_1x1_increase = self.conv2_2_1x1_increase(conv2_2_3x3_bnxx)
     conv2_2_1x1_increase_bn = self.conv2_2_1x1_increase_bn(conv2_2_1x1_increase)
     conv2_2 = torch.add(conv2_1x, 1, conv2_2_1x1_increase_bn)
     conv2_2x = self.conv2_2_relu(conv2_2)
     conv2_3_1x1_reduce = self.conv2_3_1x1_reduce(conv2_2x)
     conv2_3_1x1_reduce_bn = self.conv2_3_1x1_reduce_bn(conv2_3_1x1_reduce)
     conv2_3_1x1_reduce_bnxx = self.conv2_3_1x1_reduce_relu(conv2_3_1x1_reduce_bn)
     conv2_3_3x3 = self.conv2_3_3x3(conv2_3_1x1_reduce_bnxx)
     conv2_3_3x3_bn = self.conv2_3_3x3_bn(conv2_3_3x3)
     conv2_3_3x3_bnxx = self.conv2_3_3x3_relu(conv2_3_3x3_bn)
     conv2_3_1x1_increase = self.conv2_3_1x1_increase(conv2_3_3x3_bnxx)
     conv2_3_1x1_increase_bn = self.conv2_3_1x1_increase_bn(conv2_3_1x1_increase)
     conv2_3 = torch.add(conv2_2x, 1, conv2_3_1x1_increase_bn)
     conv2_3x = self.conv2_3_relu(conv2_3)
     conv3_1_1x1_reduce = self.conv3_1_1x1_reduce(conv2_3x)
     conv3_1_1x1_reduce_bn = self.conv3_1_1x1_reduce_bn(conv3_1_1x1_reduce)
     conv3_1_1x1_reduce_bnxx = self.conv3_1_1x1_reduce_relu(conv3_1_1x1_reduce_bn)
     conv3_1_3x3 = self.conv3_1_3x3(conv3_1_1x1_reduce_bnxx)
     conv3_1_3x3_bn = self.conv3_1_3x3_bn(conv3_1_3x3)
     conv3_1_3x3_bnxx = self.conv3_1_3x3_relu(conv3_1_3x3_bn)
     conv3_1_1x1_increase = self.conv3_1_1x1_increase(conv3_1_3x3_bnxx)
     conv3_1_1x1_increase_bn = self.conv3_1_1x1_increase_bn(conv3_1_1x1_increase)
     conv3_1_1x1_proj = self.conv3_1_1x1_proj(conv2_3x)
     conv3_1_1x1_proj_bn = self.conv3_1_1x1_proj_bn(conv3_1_1x1_proj)
     conv3_1 = torch.add(conv3_1_1x1_proj_bn, 1, conv3_1_1x1_increase_bn)
     conv3_1x = self.conv3_1_relu(conv3_1)
     conv3_2_1x1_reduce = self.conv3_2_1x1_reduce(conv3_1x)
     conv3_2_1x1_reduce_bn = self.conv3_2_1x1_reduce_bn(conv3_2_1x1_reduce)
     conv3_2_1x1_reduce_bnxx = self.conv3_2_1x1_reduce_relu(conv3_2_1x1_reduce_bn)
     conv3_2_3x3 = self.conv3_2_3x3(conv3_2_1x1_reduce_bnxx)
     conv3_2_3x3_bn = self.conv3_2_3x3_bn(conv3_2_3x3)
     conv3_2_3x3_bnxx = self.conv3_2_3x3_relu(conv3_2_3x3_bn)
     conv3_2_1x1_increase = self.conv3_2_1x1_increase(conv3_2_3x3_bnxx)
     conv3_2_1x1_increase_bn = self.conv3_2_1x1_increase_bn(conv3_2_1x1_increase)
     conv3_2 = torch.add(conv3_1x, 1, conv3_2_1x1_increase_bn)
     conv3_2x = self.conv3_2_relu(conv3_2)
     conv3_3_1x1_reduce = self.conv3_3_1x1_reduce(conv3_2x)
     conv3_3_1x1_reduce_bn = self.conv3_3_1x1_reduce_bn(conv3_3_1x1_reduce)
     conv3_3_1x1_reduce_bnxx = self.conv3_3_1x1_reduce_relu(conv3_3_1x1_reduce_bn)
     conv3_3_3x3 = self.conv3_3_3x3(conv3_3_1x1_reduce_bnxx)
     conv3_3_3x3_bn = self.conv3_3_3x3_bn(conv3_3_3x3)
     conv3_3_3x3_bnxx = self.conv3_3_3x3_relu(conv3_3_3x3_bn)
     conv3_3_1x1_increase = self.conv3_3_1x1_increase(conv3_3_3x3_bnxx)
     conv3_3_1x1_increase_bn = self.conv3_3_1x1_increase_bn(conv3_3_1x1_increase)
     conv3_3 = torch.add(conv3_2x, 1, conv3_3_1x1_increase_bn)
     conv3_3x = self.conv3_3_relu(conv3_3)
     conv3_4_1x1_reduce = self.conv3_4_1x1_reduce(conv3_3x)
     conv3_4_1x1_reduce_bn = self.conv3_4_1x1_reduce_bn(conv3_4_1x1_reduce)
     conv3_4_1x1_reduce_bnxx = self.conv3_4_1x1_reduce_relu(conv3_4_1x1_reduce_bn)
     conv3_4_3x3 = self.conv3_4_3x3(conv3_4_1x1_reduce_bnxx)
     conv3_4_3x3_bn = self.conv3_4_3x3_bn(conv3_4_3x3)
     conv3_4_3x3_bnxx = self.conv3_4_3x3_relu(conv3_4_3x3_bn)
     conv3_4_1x1_increase = self.conv3_4_1x1_increase(conv3_4_3x3_bnxx)
     conv3_4_1x1_increase_bn = self.conv3_4_1x1_increase_bn(conv3_4_1x1_increase)
     conv3_4 = torch.add(conv3_3x, 1, conv3_4_1x1_increase_bn)
     conv3_4x = self.conv3_4_relu(conv3_4)
     conv4_1_1x1_reduce = self.conv4_1_1x1_reduce(conv3_4x)
     conv4_1_1x1_reduce_bn = self.conv4_1_1x1_reduce_bn(conv4_1_1x1_reduce)
     conv4_1_1x1_reduce_bnxx = self.conv4_1_1x1_reduce_relu(conv4_1_1x1_reduce_bn)
     conv4_1_3x3 = self.conv4_1_3x3(conv4_1_1x1_reduce_bnxx)
     conv4_1_3x3_bn = self.conv4_1_3x3_bn(conv4_1_3x3)
     conv4_1_3x3_bnxx = self.conv4_1_3x3_relu(conv4_1_3x3_bn)
     conv4_1_1x1_increase = self.conv4_1_1x1_increase(conv4_1_3x3_bnxx)
     conv4_1_1x1_increase_bn = self.conv4_1_1x1_increase_bn(conv4_1_1x1_increase)
     conv4_1_1x1_proj = self.conv4_1_1x1_proj(conv3_4x)
     conv4_1_1x1_proj_bn = self.conv4_1_1x1_proj_bn(conv4_1_1x1_proj)
     conv4_1 = torch.add(conv4_1_1x1_proj_bn, 1, conv4_1_1x1_increase_bn)
     conv4_1x = self.conv4_1_relu(conv4_1)
     conv4_2_1x1_reduce = self.conv4_2_1x1_reduce(conv4_1x)
     conv4_2_1x1_reduce_bn = self.conv4_2_1x1_reduce_bn(conv4_2_1x1_reduce)
     conv4_2_1x1_reduce_bnxx = self.conv4_2_1x1_reduce_relu(conv4_2_1x1_reduce_bn)
     conv4_2_3x3 = self.conv4_2_3x3(conv4_2_1x1_reduce_bnxx)
     conv4_2_3x3_bn = self.conv4_2_3x3_bn(conv4_2_3x3)
     conv4_2_3x3_bnxx = self.conv4_2_3x3_relu(conv4_2_3x3_bn)
     conv4_2_1x1_increase = self.conv4_2_1x1_increase(conv4_2_3x3_bnxx)
     conv4_2_1x1_increase_bn = self.conv4_2_1x1_increase_bn(conv4_2_1x1_increase)
     conv4_2 = torch.add(conv4_1x, 1, conv4_2_1x1_increase_bn)
     conv4_2x = self.conv4_2_relu(conv4_2)
     conv4_3_1x1_reduce = self.conv4_3_1x1_reduce(conv4_2x)
     conv4_3_1x1_reduce_bn = self.conv4_3_1x1_reduce_bn(conv4_3_1x1_reduce)
     conv4_3_1x1_reduce_bnxx = self.conv4_3_1x1_reduce_relu(conv4_3_1x1_reduce_bn)
     conv4_3_3x3 = self.conv4_3_3x3(conv4_3_1x1_reduce_bnxx)
     conv4_3_3x3_bn = self.conv4_3_3x3_bn(conv4_3_3x3)
     conv4_3_3x3_bnxx = self.conv4_3_3x3_relu(conv4_3_3x3_bn)
     conv4_3_1x1_increase = self.conv4_3_1x1_increase(conv4_3_3x3_bnxx)
     conv4_3_1x1_increase_bn = self.conv4_3_1x1_increase_bn(conv4_3_1x1_increase)
     conv4_3 = torch.add(conv4_2x, 1, conv4_3_1x1_increase_bn)
     conv4_3x = self.conv4_3_relu(conv4_3)
     conv4_4_1x1_reduce = self.conv4_4_1x1_reduce(conv4_3x)
     conv4_4_1x1_reduce_bn = self.conv4_4_1x1_reduce_bn(conv4_4_1x1_reduce)
     conv4_4_1x1_reduce_bnxx = self.conv4_4_1x1_reduce_relu(conv4_4_1x1_reduce_bn)
     conv4_4_3x3 = self.conv4_4_3x3(conv4_4_1x1_reduce_bnxx)
     conv4_4_3x3_bn = self.conv4_4_3x3_bn(conv4_4_3x3)
     conv4_4_3x3_bnxx = self.conv4_4_3x3_relu(conv4_4_3x3_bn)
     conv4_4_1x1_increase = self.conv4_4_1x1_increase(conv4_4_3x3_bnxx)
     conv4_4_1x1_increase_bn = self.conv4_4_1x1_increase_bn(conv4_4_1x1_increase)
     conv4_4 = torch.add(conv4_3x, 1, conv4_4_1x1_increase_bn)
     conv4_4x = self.conv4_4_relu(conv4_4)
     conv4_5_1x1_reduce = self.conv4_5_1x1_reduce(conv4_4x)
     conv4_5_1x1_reduce_bn = self.conv4_5_1x1_reduce_bn(conv4_5_1x1_reduce)
     conv4_5_1x1_reduce_bnxx = self.conv4_5_1x1_reduce_relu(conv4_5_1x1_reduce_bn)
     conv4_5_3x3 = self.conv4_5_3x3(conv4_5_1x1_reduce_bnxx)
     conv4_5_3x3_bn = self.conv4_5_3x3_bn(conv4_5_3x3)
     conv4_5_3x3_bnxx = self.conv4_5_3x3_relu(conv4_5_3x3_bn)
     conv4_5_1x1_increase = self.conv4_5_1x1_increase(conv4_5_3x3_bnxx)
     conv4_5_1x1_increase_bn = self.conv4_5_1x1_increase_bn(conv4_5_1x1_increase)
     conv4_5 = torch.add(conv4_4x, 1, conv4_5_1x1_increase_bn)
     conv4_5x = self.conv4_5_relu(conv4_5)
     conv4_6_1x1_reduce = self.conv4_6_1x1_reduce(conv4_5x)
     conv4_6_1x1_reduce_bn = self.conv4_6_1x1_reduce_bn(conv4_6_1x1_reduce)
     conv4_6_1x1_reduce_bnxx = self.conv4_6_1x1_reduce_relu(conv4_6_1x1_reduce_bn)
     conv4_6_3x3 = self.conv4_6_3x3(conv4_6_1x1_reduce_bnxx)
     conv4_6_3x3_bn = self.conv4_6_3x3_bn(conv4_6_3x3)
     conv4_6_3x3_bnxx = self.conv4_6_3x3_relu(conv4_6_3x3_bn)
     conv4_6_1x1_increase = self.conv4_6_1x1_increase(conv4_6_3x3_bnxx)
     conv4_6_1x1_increase_bn = self.conv4_6_1x1_increase_bn(conv4_6_1x1_increase)
     conv4_6 = torch.add(conv4_5x, 1, conv4_6_1x1_increase_bn)
     conv4_6x = self.conv4_6_relu(conv4_6)
     conv5_1_1x1_reduce = self.conv5_1_1x1_reduce(conv4_6x)
     conv5_1_1x1_reduce_bn = self.conv5_1_1x1_reduce_bn(conv5_1_1x1_reduce)
     conv5_1_1x1_reduce_bnxx = self.conv5_1_1x1_reduce_relu(conv5_1_1x1_reduce_bn)
     conv5_1_3x3 = self.conv5_1_3x3(conv5_1_1x1_reduce_bnxx)
     conv5_1_3x3_bn = self.conv5_1_3x3_bn(conv5_1_3x3)
     conv5_1_3x3_bnxx = self.conv5_1_3x3_relu(conv5_1_3x3_bn)
     conv5_1_1x1_increase = self.conv5_1_1x1_increase(conv5_1_3x3_bnxx)
     conv5_1_1x1_increase_bn = self.conv5_1_1x1_increase_bn(conv5_1_1x1_increase)
     conv5_1_1x1_proj = self.conv5_1_1x1_proj(conv4_6x)
     conv5_1_1x1_proj_bn = self.conv5_1_1x1_proj_bn(conv5_1_1x1_proj)
     conv5_1 = torch.add(conv5_1_1x1_proj_bn, 1, conv5_1_1x1_increase_bn)
     conv5_1x = self.conv5_1_relu(conv5_1)
     conv5_2_1x1_reduce = self.conv5_2_1x1_reduce(conv5_1x)
     conv5_2_1x1_reduce_bn = self.conv5_2_1x1_reduce_bn(conv5_2_1x1_reduce)
     conv5_2_1x1_reduce_bnxx = self.conv5_2_1x1_reduce_relu(conv5_2_1x1_reduce_bn)
     conv5_2_3x3 = self.conv5_2_3x3(conv5_2_1x1_reduce_bnxx)
     conv5_2_3x3_bn = self.conv5_2_3x3_bn(conv5_2_3x3)
     conv5_2_3x3_bnxx = self.conv5_2_3x3_relu(conv5_2_3x3_bn)
     conv5_2_1x1_increase = self.conv5_2_1x1_increase(conv5_2_3x3_bnxx)
     conv5_2_1x1_increase_bn = self.conv5_2_1x1_increase_bn(conv5_2_1x1_increase)
     conv5_2 = torch.add(conv5_1x, 1, conv5_2_1x1_increase_bn)
     conv5_2x = self.conv5_2_relu(conv5_2)
     conv5_3_1x1_reduce = self.conv5_3_1x1_reduce(conv5_2x)
     conv5_3_1x1_reduce_bn = self.conv5_3_1x1_reduce_bn(conv5_3_1x1_reduce)
     conv5_3_1x1_reduce_bnxx = self.conv5_3_1x1_reduce_relu(conv5_3_1x1_reduce_bn)
     conv5_3_3x3 = self.conv5_3_3x3(conv5_3_1x1_reduce_bnxx)
     conv5_3_3x3_bn = self.conv5_3_3x3_bn(conv5_3_3x3)
     conv5_3_3x3_bnxx = self.conv5_3_3x3_relu(conv5_3_3x3_bn)
     conv5_3_1x1_increase = self.conv5_3_1x1_increase(conv5_3_3x3_bnxx)
     conv5_3_1x1_increase_bn = self.conv5_3_1x1_increase_bn(conv5_3_1x1_increase)
     conv5_3 = torch.add(conv5_2x, 1, conv5_3_1x1_increase_bn)
     conv5_3x = self.conv5_3_relu(conv5_3)
     pool5_7x7_s1 = self.pool5_7x7_s1(conv5_3x)
     classifier_preflatten = self.classifier(pool5_7x7_s1)
     classifier = classifier_preflatten.view(classifier_preflatten.size(0), -1)
     
     return classifier, pool5_7x7_s1
Exemplo n.º 49
0
    def compute(self, config, budget, working_directory, *args, **kwargs):
        model_cnt = 10  #만들 모델 갯수

        feature_encoder = []
        relation_network = []
        feature_encoder_optim = []
        relation_network_optim = []

        for i in range(model_cnt):
            feature_encoder.insert(
                i,
                embedding_function(
                    num_embedding_layers=config['num_embedding_layers'],
                    num_filters_1=config['num_filters_1'],
                    num_filters_2=config['num_filters_2']
                    if 'num_filters_2' in config else None,
                    num_filters_3=config['num_filters_3']
                    if 'num_filters_3' in config else None,
                    num_filters_4=config['num_filters_4']
                    if 'num_filters_4' in config else None,
                    dropout_rate=config['dropout_rate'],
                    kernel_size=3))

            relation_network.insert(
                i,
                relation_function(
                    num_relation_layers=config['num_relation_layers'],
                    embedding_output_filter=embedding_output_filter * 2,
                    input_length=input_length,
                    num_rela_filters_1=config['num_rela_filters_1']
                    if 'num_rela_filters_1' in config else None,
                    num_rela_filters_2=config['num_rela_filters_2']
                    if 'num_rela_filters_2' in config else None,
                    num_rela_filters_3=config['num_rela_filters_3']
                    if 'num_rela_filters_3' in config else None,
                    num_rela_filters_4=config['num_rela_filters_4']
                    if 'num_rela_filters_4' in config else None,
                    rela_dropout_rate=config['rela_dropout_rate'],
                    kernel_size=3,
                    num_fc_units=config['num_fc_units']))

            feature_encoder[i].apply(weights_init)
            relation_network[i].apply(weights_init)

            if config['optimizer'] == 'Adam':
                feature_encoder_optim.insert(
                    i,
                    torch.optim.Adam(feature_encoder[i].parameters(),
                                     lr=config['lr']))
                relation_network_optim.insert(
                    i,
                    torch.optim.Adam(relation_network[i].parameters(),
                                     lr=config['lr']))
            else:
                feature_encoder_optim.insert(
                    i,
                    torch.optim.SGD(feature_encoder[i].parameters(),
                                    lr=config['lr'],
                                    momentum=config['sgd_momentum']))
                relation_network_optim.insert(
                    i,
                    torch.optim.SGD(relation_network[i].parameters(),
                                    lr=config['lr'],
                                    momentum=config['sgd_momentum']))

        #첫번째 사람에 대한 모델
        for i in range(model_cnt):
            relation = []
            for episode in range(int(budget)):
                for j in range(model_cnt):

                    support_feature1 = feature_encoder[i](
                        Support[j][:, :, 0:10].float())
                    support_feature2 = feature_encoder[i](
                        Support[j][:, :, 10:20].float())
                    support_feature3 = feature_encoder[i](
                        Support[j][:, :, 20:30].float())

                    feature = torch.add(support_feature1, support_feature2)
                    support_feature = torch.add(feature, support_feature3)

                    query_feature = feature_encoder[i](Query[i].float())

                    #feature_map 합침
                    feature_pair = torch.cat((support_feature, query_feature),
                                             dim=1)

                    #relation funcion에 대입
                    relation.insert(j, relation_network[i](feature_pair))

                    mse = nn.MSELoss()

                    if i == j:
                        label = torch.tensor(1, dtype=torch.float32)
                    else:
                        label = torch.tensor(0, dtype=torch.float32)

                    loss = mse(relation[j], label)  #정답이 1이 나와야함

                    feature_encoder[i].zero_grad()
                    relation_network[i].zero_grad()

                    loss.backward()

                    feature_encoder_optim[i].step()
                    relation_network_optim[i].step()

                    if (episode + 1) == budget:
                        predict_label = torch.max(relation[j].data)

                        print("sub:", j, "최대 예측값:", predict_label, "loss",
                              loss.item())

                        if (j + 1) == model_cnt:
                            print(
                                "\t---------------------------------------------------------"
                            )
Exemplo n.º 50
0
def simple_addition(x, y):
    """
    TODO: Implement a simple addition function that accepts two tensors and returns the result.
    """
    return torch.add(x, y)
Exemplo n.º 51
0
def from_importance_weights(target_policy_log_probs,
                            behavior_policy_log_probs,
                            log_rhos,
                            discounts,
                            rewards,
                            values,
                            bootstrap_value,
                            clip_rho_threshold=1.0,
                            clip_pg_rho_threshold=1.0,
                            behavior_relevance_threshold=1.0):
    """V-trace from log importance weights."""
    with torch.no_grad():
        kl_div = (behavior_policy_log_probs.exp() *
                  (behavior_policy_log_probs -
                   target_policy_log_probs)).sum(-1).unsqueeze(-1)
        per_step_behavioral = (kl_div < behavior_relevance_threshold).float()

        output_list = []
        prev_val = torch.ones((1, 1)).to(kl_div.device)
        for threshold_val_t in per_step_behavioral.unbind():
            threshold_val_t = threshold_val_t.view(1, -1)
            threshold_val_t *= prev_val
            prev_val = threshold_val_t
            output_list.append(threshold_val_t)
        mask = torch.cat(output_list)
        # mask = torch.cumprod(per_step_behavioral, dim=0).squeeze(-1)

        rhos = torch.exp(log_rhos)
        if clip_rho_threshold is not None:
            clipped_rhos = torch.clamp(rhos, max=clip_rho_threshold)
        else:
            clipped_rhos = rhos

        cs = torch.clamp(rhos, max=1.0)
        # Append bootstrapped value to get [v1, ..., v_t+1]
        values_t_plus_1 = torch.cat(
            [values[1:], torch.unsqueeze(bootstrap_value, 0)], dim=0)
        deltas = clipped_rhos * (rewards + discounts * values_t_plus_1 -
                                 values)

        acc = torch.zeros_like(bootstrap_value)
        result = []
        for t in range(discounts.shape[0] - 1, -1, -1):
            acc = deltas[t] + discounts[t] * cs[t] * acc * mask[t]
            result.append(acc)
        result.reverse()
        vs_minus_v_xs = torch.stack(result)
        # Add V(x_s) to get v_s.
        vs = torch.add(vs_minus_v_xs, values)

        # Advantage for policy gradient.
        broadcasted_bootstrap_values = torch.ones_like(vs[0]) * bootstrap_value
        vs_t_plus_1 = torch.cat(
            [vs[1:], broadcasted_bootstrap_values.unsqueeze(0)], dim=0)
        if clip_pg_rho_threshold is not None:
            clipped_pg_rhos = torch.clamp(rhos, max=clip_pg_rho_threshold)
        else:
            clipped_pg_rhos = rhos
        pg_advantages = clipped_pg_rhos * (rewards + discounts * vs_t_plus_1 -
                                           values) * mask

        # Make sure no gradients backpropagated through the returned values.
        return VTraceReturns(vs=vs, pg_advantages=pg_advantages, mask=mask)
Exemplo n.º 52
0
def getting_started():
    print(util.Section('Getting Started'))

    # construction
    print(util.SubSection('Construction'))
    xa1 = torch.empty(5, 3)  # uninitialized
    xa2 = torch.rand(5, 3)  # randomly initialized matrix
    xa3 = torch.zeros(5, 3, dtype=torch.long)  # filled zeros and of dtype long
    xa4 = torch.tensor([5.5, 3])  # directly from data
    xa5 = xa3.new_ones(5, 3, dtype=torch.double)  # new_* method take in sizes
    xa6 = torch.randn_like(xa5,
                           dtype=torch.float)  # override dtype with same size
    print(f'x size = {xa6.size()}')

    # operations
    xb1 = torch.rand(5, 3)
    yb1 = torch.rand(5, 3)

    # operation: add
    print(util.SubSection('Operations: Add'))
    print(f'xb1 + yb1 = {xb1 + yb1}')
    print(f'xb1 + yb1 = {torch.add(xb1, yb1)}')
    # with output argument
    rb1 = torch.empty(5, 3)
    torch.add(xb1, yb1, out=rb1)
    print(f'rb1 = {rb1}')
    # add in place
    yb1.add_(xb1)
    print(f'yb1 = {yb1}')
    # index
    print(f'xb1[:,1] = {xb1[:, 1]}')

    # operation: resize
    print(util.SubSection('Operations: Resize'))
    xb2 = torch.randn(4, 4)
    yb2 = xb2.view(16)
    zb2 = xb2.view(-1, 8)
    print(f'xb2 = {xb2}')
    print(f'yb2 = {yb2}')
    print(f'zb2 = {zb2}')
    print(
        f'xb2.size = {xb2.size()}, yb2.size = {yb2.size()}, zb2.size = {zb2.size()}'
    )
    # if only one element, can use .item() to get the values as a python number
    xb3 = torch.randn(1)
    print(f'xb3 = {xb3}')
    print(f'xb3.item() = {xb3.item()}')

    # numpy bridge, change one will change the other
    print(util.SubSection('NumPy Bridge'))
    # torch => numpy
    xc1 = torch.ones(5)
    print(f'xc1 = {xc1}')
    yc1 = xc1.numpy()
    print(f'yc1 = {yc1}')
    # add, y will also changed
    xc1.add_(1)
    print(f'xc1 = {xc1}')
    print(f'yc1 = {yc1}')
    # numpy => torch
    xc2 = np.ones(5)
    yc2 = torch.from_numpy(xc2)
    np.add(xc2, 1, out=xc2)
    print(f'xc2 = {xc2}')
    print(f'yc2 = {yc2}')

    # CUDA tensors
    print(util.SubSection('CUDA Tensors'))
    xd1 = torch.rand((3, 2))
    if torch.cuda.is_available():
        print('use CUDA')
        device = torch.device('cuda')
        yd1 = torch.ones_like(xd1,
                              device=device)  # directly create a tensor on GPU
        xd2 = xd1.to(device)
        zd1 = xd2 + yd1
        print(f'zd1 = {zd1}')
        print(f'to CPU, zd1 = {zd1.to("cpu", torch.double)}'
              )  # "to" can also change dtype together
Exemplo n.º 53
0
    def _generate(self,
                  model,
                  sample,
                  prefix_tokens=None,
                  bos_token=None,
                  **kwargs):
        if not self.retain_dropout:
            model.eval()

        # model.forward normally channels prev_output_tokens into the decoder
        # separately, but SequenceGenerator directly calls model.encoder
        encoder_input = {
            k: v
            for k, v in sample['net_input'].items()
            if k != 'prev_output_tokens'
        }

        src_tokens = encoder_input['src_tokens']
        if src_tokens.dim() > 2:
            src_lengths = encoder_input['src_lengths']
        else:
            src_lengths = (src_tokens.ne(self.eos)
                           & src_tokens.ne(self.pad)).long().sum(dim=1)
        input_size = src_tokens.size()
        # batch dimension goes first followed by source lengths
        bsz = input_size[0]
        src_len = input_size[1]
        beam_size = self.beam_size

        if self.match_source_len:
            max_len = src_lengths.max().item()
        else:
            max_len = min(
                int(self.max_len_a * src_len + self.max_len_b),
                # exclude the EOS marker
                model.max_decoder_positions() - 1,
            )
        assert self.min_len <= max_len, 'min_len cannot be larger than max_len, please adjust these!'

        # compute the encoder output for each beam
        encoder_outs = model.forward_encoder(encoder_input)
        new_order = torch.arange(bsz).view(-1, 1).repeat(1, beam_size).view(-1)
        new_order = new_order.to(src_tokens.device).long()
        encoder_outs = model.reorder_encoder_out(encoder_outs, new_order)

        # initialize buffers
        scores = src_tokens.new(bsz * beam_size, max_len + 1).float().fill_(0)
        scores_buf = scores.clone()
        tokens = src_tokens.new(bsz * beam_size,
                                max_len + 2).long().fill_(self.pad)
        tokens_buf = tokens.clone()
        tokens[:, 0] = self.eos if bos_token is None else bos_token
        attn, attn_buf = None, None

        # The blacklist indicates candidates that should be ignored.
        # For example, suppose we're sampling and have already finalized 2/5
        # samples. Then the blacklist would mark 2 positions as being ignored,
        # so that we only finalize the remaining 3 samples.
        blacklist = src_tokens.new_zeros(bsz, beam_size).eq(
            -1)  # forward and backward-compatible False mask

        # list of completed sentences
        finalized = [[] for i in range(bsz)]
        finished = [False for i in range(bsz)]
        num_remaining_sent = bsz

        # number of candidate hypos per step
        cand_size = 2 * beam_size  # 2 x beam size in case half are EOS

        # offset arrays for converting between different indexing schemes
        bbsz_offsets = (torch.arange(0, bsz) *
                        beam_size).unsqueeze(1).type_as(tokens)
        cand_offsets = torch.arange(0, cand_size).type_as(tokens)

        # helper function for allocating buffers on the fly
        buffers = {}

        def buffer(name, type_of=tokens):  # noqa
            if name not in buffers:
                buffers[name] = type_of.new()
            return buffers[name]

        def is_finished(sent, step, unfin_idx):
            """
            Check whether we've finished generation for a given sentence, by
            comparing the worst score among finalized hypotheses to the best
            possible score among unfinalized hypotheses.
            """
            assert len(finalized[sent]) <= beam_size
            if len(finalized[sent]) == beam_size or step == max_len:
                return True
            return False

        def finalize_hypos(step, bbsz_idx, eos_scores):
            """
            Finalize the given hypotheses at this step, while keeping the total
            number of finalized hypotheses per sentence <= beam_size.

            Note: the input must be in the desired finalization order, so that
            hypotheses that appear earlier in the input are preferred to those
            that appear later.

            Args:
                step: current time step
                bbsz_idx: A vector of indices in the range [0, bsz*beam_size),
                    indicating which hypotheses to finalize
                eos_scores: A vector of the same size as bbsz_idx containing
                    scores for each hypothesis
            """
            assert bbsz_idx.numel() == eos_scores.numel()

            # clone relevant token and attention tensors
            tokens_clone = tokens.index_select(0, bbsz_idx)
            tokens_clone = tokens_clone[:, 1:step +
                                        2]  # skip the first index, which is EOS
            assert not tokens_clone.eq(self.eos).any()
            tokens_clone[:, step] = self.eos
            attn_clone = attn.index_select(
                0, bbsz_idx)[:, :, 1:step + 2] if attn is not None else None

            # compute scores per token position
            pos_scores = scores.index_select(0, bbsz_idx)[:, :step + 1]
            pos_scores[:, step] = eos_scores
            # convert from cumulative to per-position scores
            pos_scores[:, 1:] = pos_scores[:, 1:] - pos_scores[:, :-1]

            # normalize sentence-level scores
            if self.normalize_scores:
                eos_scores /= (step + 1)**self.len_penalty

            cum_unfin = []
            prev = 0
            for f in finished:
                if f:
                    prev += 1
                else:
                    cum_unfin.append(prev)

            sents_seen = set()
            for i, (idx, score) in enumerate(
                    zip(bbsz_idx.tolist(), eos_scores.tolist())):
                unfin_idx = idx // beam_size
                sent = unfin_idx + cum_unfin[unfin_idx]

                sents_seen.add((sent, unfin_idx))

                if self.match_source_len and step > src_lengths[unfin_idx]:
                    score = -math.inf

                def get_hypo():

                    if attn_clone is not None:
                        # remove padding tokens from attn scores
                        hypo_attn = attn_clone[i]
                    else:
                        hypo_attn = None

                    return {
                        'tokens': tokens_clone[i],
                        'score': score,
                        'attention': hypo_attn,  # src_len x tgt_len
                        'alignment': None,
                        'positional_scores': pos_scores[i],
                    }

                if len(finalized[sent]) < beam_size:
                    finalized[sent].append(get_hypo())

            newly_finished = []
            for sent, unfin_idx in sents_seen:
                # check termination conditions for this sentence
                if not finished[sent] and is_finished(sent, step, unfin_idx):
                    finished[sent] = True
                    newly_finished.append(unfin_idx)
            return newly_finished

        reorder_state = None
        batch_idxs = None
        for step in range(max_len + 1):  # one extra step for EOS marker
            # reorder decoder internal states based on the prev choice of beams
            if reorder_state is not None:
                if batch_idxs is not None:
                    # update beam indices to take into account removed sentences
                    corr = batch_idxs - torch.arange(
                        batch_idxs.numel()).type_as(batch_idxs)
                    reorder_state.view(-1, beam_size).add_(
                        corr.unsqueeze(-1) * beam_size)
                model.reorder_incremental_state(reorder_state)
                encoder_outs = model.reorder_encoder_out(
                    encoder_outs, reorder_state)

            lprobs, avg_attn_scores = model.forward_decoder(
                tokens[:, :step + 1],
                encoder_outs,
                temperature=self.temperature,
            )
            lprobs[lprobs != lprobs] = -math.inf

            lprobs[:, self.pad] = -math.inf  # never select pad
            lprobs[:, self.unk] -= self.unk_penalty  # apply unk penalty

            # handle max length constraint
            if step >= max_len:
                lprobs[:, :self.eos] = -math.inf
                lprobs[:, self.eos + 1:] = -math.inf
            elif self.eos_factor is not None:
                # only consider EOS if its score is no less than a specified
                # factor of the best candidate score
                disallow_eos_mask = lprobs[:, self.
                                           eos] < self.eos_factor * lprobs.max(
                                               dim=1)[0]
                lprobs[disallow_eos_mask, self.eos] = -math.inf

            # handle prefix tokens (possibly with different lengths)
            if prefix_tokens is not None and step < prefix_tokens.size(
                    1) and step < max_len:
                prefix_toks = prefix_tokens[:, step].unsqueeze(-1).repeat(
                    1, beam_size).view(-1)
                prefix_lprobs = lprobs.gather(-1, prefix_toks.unsqueeze(-1))
                prefix_mask = prefix_toks.ne(self.pad)
                lprobs[prefix_mask] = -math.inf
                lprobs[prefix_mask] = lprobs[prefix_mask].scatter_(
                    -1, prefix_toks[prefix_mask].unsqueeze(-1),
                    prefix_lprobs[prefix_mask])
                # if prefix includes eos, then we should make sure tokens and
                # scores are the same across all beams
                eos_mask = prefix_toks.eq(self.eos)
                if eos_mask.any():
                    # validate that the first beam matches the prefix
                    first_beam = tokens[eos_mask].view(
                        -1, beam_size, tokens.size(-1))[:, 0, 1:step + 1]
                    eos_mask_batch_dim = eos_mask.view(-1, beam_size)[:, 0]
                    target_prefix = prefix_tokens[eos_mask_batch_dim][:, :step]
                    assert (first_beam == target_prefix).all()

                    def replicate_first_beam(tensor, mask):
                        tensor = tensor.view(-1, beam_size, tensor.size(-1))
                        tensor[mask] = tensor[mask][:, :1, :]
                        return tensor.view(-1, tensor.size(-1))

                    # copy tokens, scores and lprobs from the first beam to all beams
                    tokens = replicate_first_beam(tokens, eos_mask_batch_dim)
                    scores = replicate_first_beam(scores, eos_mask_batch_dim)
                    lprobs = replicate_first_beam(lprobs, eos_mask_batch_dim)
            elif step < self.min_len:
                # minimum length constraint (does not apply if using prefix_tokens)
                lprobs[:, self.eos] = -math.inf

            if self.no_repeat_ngram_size > 0:
                # for each beam and batch sentence, generate a list of previous ngrams
                gen_ngrams = [{} for bbsz_idx in range(bsz * beam_size)]
                for bbsz_idx in range(bsz * beam_size):
                    gen_tokens = tokens[bbsz_idx].tolist()
                    for ngram in zip(*[
                            gen_tokens[i:]
                            for i in range(self.no_repeat_ngram_size)
                    ]):
                        gen_ngrams[bbsz_idx][tuple(ngram[:-1])] = \
                                gen_ngrams[bbsz_idx].get(tuple(ngram[:-1]), []) + [ngram[-1]]

            # Record attention scores
            if type(avg_attn_scores) is list:
                avg_attn_scores = avg_attn_scores[0]
            if avg_attn_scores is not None:
                if attn is None:
                    if src_tokens.dim() > 2:
                        attn = scores.new(
                            bsz * beam_size,
                            encoder_outs[0]["encoder_out"][0].size(0),
                            max_len + 2,
                        )
                    else:
                        attn = scores.new(bsz * beam_size, src_tokens.size(1),
                                          max_len + 2)
                    attn_buf = attn.clone()
                attn[:, :, step + 1].copy_(avg_attn_scores)

            scores = scores.type_as(lprobs)
            scores_buf = scores_buf.type_as(lprobs)
            eos_bbsz_idx = buffer('eos_bbsz_idx')
            eos_scores = buffer('eos_scores', type_of=scores)

            self.search.set_src_lengths(src_lengths)

            if self.no_repeat_ngram_size > 0:

                def calculate_banned_tokens(bbsz_idx):
                    # before decoding the next token, prevent decoding of ngrams that have already appeared
                    ngram_index = tuple(
                        tokens[bbsz_idx, step + 2 -
                               self.no_repeat_ngram_size:step + 1].tolist())
                    return gen_ngrams[bbsz_idx].get(ngram_index, [])

                if step + 2 - self.no_repeat_ngram_size >= 0:
                    # no banned tokens if we haven't generated no_repeat_ngram_size tokens yet
                    banned_tokens = [
                        calculate_banned_tokens(bbsz_idx)
                        for bbsz_idx in range(bsz * beam_size)
                    ]
                else:
                    banned_tokens = [[] for bbsz_idx in range(bsz * beam_size)]

                for bbsz_idx in range(bsz * beam_size):
                    lprobs[bbsz_idx, banned_tokens[bbsz_idx]] = -math.inf

            cand_scores, cand_indices, cand_beams = self.search.step(
                step,
                lprobs.view(bsz, -1, self.vocab_size),
                scores.view(bsz, beam_size, -1)[:, :, :step],
            )

            # cand_bbsz_idx contains beam indices for the top candidate
            # hypotheses, with a range of values: [0, bsz*beam_size),
            # and dimensions: [bsz, cand_size]
            cand_bbsz_idx = cand_beams.add(bbsz_offsets)

            # finalize hypotheses that end in eos, except for blacklisted ones
            # or candidates with a score of -inf
            eos_mask = cand_indices.eq(self.eos) & cand_scores.ne(-math.inf)
            eos_mask[:, :beam_size][blacklist] = 0

            # only consider eos when it's among the top beam_size indices
            torch.masked_select(
                cand_bbsz_idx[:, :beam_size],
                mask=eos_mask[:, :beam_size],
                out=eos_bbsz_idx,
            )

            finalized_sents = set()
            if eos_bbsz_idx.numel() > 0:
                torch.masked_select(
                    cand_scores[:, :beam_size],
                    mask=eos_mask[:, :beam_size],
                    out=eos_scores,
                )
                finalized_sents = finalize_hypos(step, eos_bbsz_idx,
                                                 eos_scores)
                num_remaining_sent -= len(finalized_sents)

            assert num_remaining_sent >= 0
            if num_remaining_sent == 0:
                break
            assert step < max_len

            if len(finalized_sents) > 0:
                new_bsz = bsz - len(finalized_sents)

                # construct batch_idxs which holds indices of batches to keep for the next pass
                batch_mask = cand_indices.new_ones(bsz)
                batch_mask[cand_indices.new(finalized_sents)] = 0
                batch_idxs = batch_mask.nonzero().squeeze(-1)

                eos_mask = eos_mask[batch_idxs]
                cand_beams = cand_beams[batch_idxs]
                bbsz_offsets.resize_(new_bsz, 1)
                cand_bbsz_idx = cand_beams.add(bbsz_offsets)
                cand_scores = cand_scores[batch_idxs]
                cand_indices = cand_indices[batch_idxs]
                if prefix_tokens is not None:
                    prefix_tokens = prefix_tokens[batch_idxs]
                src_lengths = src_lengths[batch_idxs]
                blacklist = blacklist[batch_idxs]

                scores = scores.view(bsz, -1)[batch_idxs].view(
                    new_bsz * beam_size, -1)
                scores_buf.resize_as_(scores)
                tokens = tokens.view(bsz, -1)[batch_idxs].view(
                    new_bsz * beam_size, -1)
                tokens_buf.resize_as_(tokens)
                if attn is not None:
                    attn = attn.view(bsz, -1)[batch_idxs].view(
                        new_bsz * beam_size, attn.size(1), -1)
                    attn_buf.resize_as_(attn)
                bsz = new_bsz
            else:
                batch_idxs = None

            # Set active_mask so that values > cand_size indicate eos or
            # blacklisted hypos and values < cand_size indicate candidate
            # active hypos. After this, the min values per row are the top
            # candidate active hypos.
            active_mask = buffer('active_mask')
            eos_mask[:, :beam_size] |= blacklist
            torch.add(
                eos_mask.type_as(cand_offsets) * cand_size,
                cand_offsets[:eos_mask.size(1)],
                out=active_mask,
            )

            # get the top beam_size active hypotheses, which are just the hypos
            # with the smallest values in active_mask
            active_hypos, new_blacklist = buffer('active_hypos'), buffer(
                'new_blacklist')
            torch.topk(active_mask,
                       k=beam_size,
                       dim=1,
                       largest=False,
                       out=(new_blacklist, active_hypos))

            # update blacklist to ignore any finalized hypos
            blacklist = new_blacklist.ge(cand_size)[:, :beam_size]
            assert (~blacklist).any(dim=1).all()

            active_bbsz_idx = buffer('active_bbsz_idx')
            torch.gather(
                cand_bbsz_idx,
                dim=1,
                index=active_hypos,
                out=active_bbsz_idx,
            )
            active_scores = torch.gather(
                cand_scores,
                dim=1,
                index=active_hypos,
                out=scores[:, step].view(bsz, beam_size),
            )

            active_bbsz_idx = active_bbsz_idx.view(-1)
            active_scores = active_scores.view(-1)

            # copy tokens and scores for active hypotheses
            torch.index_select(
                tokens[:, :step + 1],
                dim=0,
                index=active_bbsz_idx,
                out=tokens_buf[:, :step + 1],
            )
            torch.gather(
                cand_indices,
                dim=1,
                index=active_hypos,
                out=tokens_buf.view(bsz, beam_size, -1)[:, :, step + 1],
            )
            if step > 0:
                torch.index_select(
                    scores[:, :step],
                    dim=0,
                    index=active_bbsz_idx,
                    out=scores_buf[:, :step],
                )
            torch.gather(
                cand_scores,
                dim=1,
                index=active_hypos,
                out=scores_buf.view(bsz, beam_size, -1)[:, :, step],
            )

            # copy attention for active hypotheses
            if attn is not None:
                torch.index_select(
                    attn[:, :, :step + 2],
                    dim=0,
                    index=active_bbsz_idx,
                    out=attn_buf[:, :, :step + 2],
                )

            # swap buffers
            tokens, tokens_buf = tokens_buf, tokens
            scores, scores_buf = scores_buf, scores
            if attn is not None:
                attn, attn_buf = attn_buf, attn

            # reorder incremental state in decoder
            reorder_state = active_bbsz_idx

        # sort by score descending
        for sent in range(len(finalized)):
            finalized[sent] = sorted(finalized[sent],
                                     key=lambda r: r['score'],
                                     reverse=True)
        return finalized
Exemplo n.º 54
0
	def forward(self, rgb_inputs, depth_inputs):

		########  DEPTH ENCODER  ########
		# Stage 1
		#x = self.conv11d(depth_inputs)
		x_1 = self.CBR1_DEPTH_ENC(depth_inputs)
		x, id1_d = self.pool1_d(x_1)

		# Stage 2
		x_2 = self.CBR2_DEPTH_ENC(x)
		x, id2_d = self.pool2_d(x_2)

		# Stage 3
		x_3 = self.CBR3_DEPTH_ENC(x)
		x, id3_d = self.pool4_d(x_3)
		x = self.dropout3_d(x)

		# Stage 4
		x_4 = self.CBR4_DEPTH_ENC(x)
		x, id4_d = self.pool4_d(x_4)
		x = self.dropout4_d(x)

		# Stage 5
		x_5 = self.CBR5_DEPTH_ENC(x)

		########  RGB ENCODER  ########

		# Stage 1
		y = self.CBR1_RGB_ENC(rgb_inputs)
		y = torch.add(y,x_1)
		y = torch.div(y,2)
		y, id1 = self.pool1(y)

		# Stage 2
		y = self.CBR2_RGB_ENC(y)
		y = torch.add(y,x_2)
		y = torch.div(y,2)
		y, id2 = self.pool2(y)

		# Stage 3
		y = self.CBR3_RGB_ENC(y)
		y = torch.add(y,x_3)
		y = torch.div(y,2)
		y, id3 = self.pool3(y)
		y = self.dropout3(y)

		# Stage 4
		y = self.CBR4_RGB_ENC(y)
		y = torch.add(y,x_4)
		y = torch.div(y,2)
		y, id4 = self.pool4(y)
		y = self.dropout4(y)

		# Stage 5
		y = self.CBR5_RGB_ENC(y)
		y = torch.add(y,x_5)
		y = torch.div(y,2)
		y_size = y.size()

		y, id5 = self.pool5(y)
		y = self.dropout5(y)

		########  DECODER  ########

		# Stage 5 dec
		y = self.unpool5(y, id5,output_size=y_size)
		y = self.CBR5_RGB_DEC(y)
		y = self.dropout5_dec(y)

		# Stage 4 dec
		y = self.unpool4(y, id4)
		y = self.CBR4_RGB_DEC(y)
		y = self.dropout4_dec(y)

		# Stage 3 dec
		y = self.unpool3(y, id3)
		y = self.CBR3_RGB_DEC(y)
		y = self.dropout3_dec(y)

		# Stage 2 dec
		y = self.unpool2(y, id2)
		y = self.CBR2_RGB_DEC(y)

		# Stage 1 dec
		y = self.unpool1(y, id1)
		y = self.CBR1_RGB_DEC(y)

		return y
Exemplo n.º 55
0
    def forward(self, x):

        #print('x',x.size())
        e1 = self.encoder1(x)  #; print('e1',e1.size())
        e2 = self.encoder2(e1)  #; print('e2',e2.size())
        e3 = self.encoder3(e2)  #; print('e3',e3.size())
        e4 = self.encoder4(e3)  #; print('e4',e4.size())
        e5 = self.encoder5(e4)  #; print('e5',e5.size())
        f = self.center(e5)  #; print('center',f.size())
        if self.dilation:
            f1 = self.center1(f)  #; print('center',f1.size())
            f2 = self.center2(f1)  #; print('center',f2.size())
            # f3=self.center3(f2); print('center',f3.size())
            # f4=self.center4(f3); print('center',f4.size())
            #f5=self.center5(f4)
            f = torch.add(f, 1, f1)
            f = torch.add(f, 1, f2)
        # f=torch.cat((
        #     f,
        #     f1,
        # f2,
        # f3,
        # f4,
        # ),1)
        f = F.interpolate(f,
                          scale_factor=2,
                          mode='bilinear',
                          align_corners=True)
        d5 = self.decoder5(torch.cat([f, e5], 1))  #; print('d5',d5.size())
        d5 = F.interpolate(d5,
                           scale_factor=2,
                           mode='bilinear',
                           align_corners=True)
        d4 = self.decoder4(torch.cat([d5, e4], 1))  #; print('d4',d4.size())
        d4 = F.interpolate(d4,
                           scale_factor=2,
                           mode='bilinear',
                           align_corners=True)
        d3 = self.decoder3(torch.cat([d4, e3], 1))  #; print('d3',d3.size())
        d3 = F.interpolate(d3,
                           scale_factor=2,
                           mode='bilinear',
                           align_corners=True)
        d2 = self.decoder2(torch.cat([d3, e2], 1))  #; print('d2',d2.size())
        d2 = F.interpolate(d2,
                           scale_factor=2,
                           mode='bilinear',
                           align_corners=True)
        d1 = self.decoder1(d2)  #; print('d1',d1.size())

        f = torch.cat(
            (d1,
             F.interpolate(
                 d2, scale_factor=1, mode='bilinear', align_corners=False),
             F.interpolate(
                 d3, scale_factor=2, mode='bilinear', align_corners=False),
             F.interpolate(
                 d4, scale_factor=4, mode='bilinear', align_corners=False),
             F.interpolate(
                 d5, scale_factor=8, mode='bilinear', align_corners=False)), 1)
        f = F.dropout2d(f, p=0.20)
        logit = self.logit(f)  #; print('logit',logit.size())
        return logit
Exemplo n.º 56
0
    def forward(self, x, state=None):
        h = x[:, :3]
        f = x[:, 3:]

        h = self.relu1_1(self.conv1_1(h))
        h = self.relu1_2(self.conv1_2(h))
        h = self.pool1(h)

        h = self.relu2_1(self.conv2_1(h))
        h = self.relu2_2(self.conv2_2(h))
        h = self.pool2(h)

        h = self.relu3_1(self.conv3_1(h))
        h = self.relu3_2(self.conv3_2(h))
        h = self.relu3_3(self.conv3_3(h))

        #Flow forward-pass here
        f = self.relu_flow1_1(self.conv_flow1_1(f))
        f = self.relu_flow1_2(self.conv_flow1_2(f))
        f = self.pool_flow1(f)

        f = self.relu_flow2_1(self.conv_flow2_1(f))
        f = self.relu_flow2_2(self.conv_flow2_2(f))
        f = self.pool_flow2(f)

        f = self.relu_flow3_1(self.conv_flow3_1(f))
        f = self.relu_flow3_2(self.conv_flow3_2(f))
        f = self.relu_flow3_3(self.conv_flow3_3(f))

        #Sum flow and RGB features (could concatenate instead)
        h = torch.add(h, f)
        h = self.pool3(h)
        pool3 = h  # 1/8

        h = self.relu4_1(self.conv4_1(h))
        h = self.relu4_2(self.conv4_2(h))
        h = self.relu4_3(self.conv4_3(h))
        h = self.pool4(h)
        pool4 = h  # 1/16

        h = self.relu5_1(self.conv5_1(h))
        h = self.relu5_2(self.conv5_2(h))
        h = self.relu5_3(self.conv5_3(h))
        h = self.pool5(h)

        h = self.relu6(self.fc6(h))
        h = self.drop6(h)
        #h, _ = self.fc7(h, None)
        h = self.fc7(h)  #This is the ConvLSTM Block

        h = self.relu7(h)
        h = self.drop7(h)

        h = self.score_fr(h)
        h = self.upscore2(h)
        upscore2 = h  # 1/16

        h = self.score_pool4(pool4)
        h = h[:, :, 5:5 + upscore2.size()[2], 5:5 + upscore2.size()[3]]
        score_pool4c = h  # 1/16

        h = upscore2 + score_pool4c  # 1/16
        h = self.upscore_pool4(h)
        upscore_pool4 = h  # 1/8

        h = self.score_pool3(pool3)
        h = h[:, :, 9:9 + upscore_pool4.size()[2],
              9:9 + upscore_pool4.size()[3]]
        score_pool3c = h  # 1/8

        h = upscore_pool4 + score_pool3c  # 1/8

        h = self.upscore8(h)
        h = h[:, :, 31:31 + x.size()[2], 31:31 + x.size()[3]].contiguous()
        #print(h[:,1].shape)
        h[:, 0] = F.sigmoid(h[:, 0].clone())
        return h
Exemplo n.º 57
0
    def forward(self,
                x,
                lengths,
                volatile=False,
                target_align=None,
                length_whole=None):
        """ sample a tree for each sentence """
        lengths = length_whole
        max_select_cnt = int(lengths.max(dim=0)[0].item()) - 1

        tree_indices = list()
        tree_probs = list()
        span_bounds = list()
        features = list()
        left_span_features = list()
        right_span_features = list()

        # closed range: [left_bounds[i], right_bounds[i]]
        left_bounds = utils.add_dim(
            torch.arange(0,
                         max_select_cnt + 1,
                         dtype=torch.long,
                         device=x.device), 0, x.size(0))
        right_bounds = left_bounds
        # use the embedding layer to generate the embeddings for the whole sentence
        # * is correct?
        # TODO use more hidden layers from bert model
        if self.embedding_type == 'bert':
            #! segment_ids
            segment_ids = torch.ones_like(x)
            #import ipdb; ipdb.set_trace()
            # TODO get whole word embedding here
            sem_embeddings, _ = self.sem_embedding(x, segment_ids)
            #! ADD embeddings together
            ind = target_align
            #import ipdb; ipdb.set_trace()
            # import ipdb; ipdb.set_trace()
            sem_embeddings = torch.matmul(ind.permute(0, 2, 1), sem_embeddings)
            sem_embeddings = sem_embeddings / sem_embeddings.max(
                dim=2, keepdim=True)[0]
            # print("bert go!")
        else:
            sem_embeddings = self.sem_embedding(x)

        syn_embeddings = sem_embeddings

        output_word_embeddings = sem_embeddings * \
            sequence_mask(lengths, max_length=lengths.max()).unsqueeze(-1).float()

        valid_bs = lengths.size(0)
        for i in range(max_select_cnt):
            seq_length = sem_embeddings.size(1)
            # set invalid positions to 0 prob
            # [0, 0, ..., 1, 1, ...]
            length_mask = 1 - sequence_mask((lengths - 1 - i).clamp(min=0),
                                            max_length=seq_length - 1).float()
            # 0 = done
            undone_mask = 1 - length_mask[:, 0]

            syn_feats = torch.cat((l2norm(
                syn_embeddings[:, 1:]), l2norm(syn_embeddings[:, :-1])),
                                  dim=2)
            prob_logits = self.syn_score(syn_feats).squeeze(-1)

            prob_logits = prob_logits - 1e10 * length_mask
            probs = F.softmax(prob_logits, dim=1)

            if not volatile:
                sampler = Categorical(probs)
                indices = sampler.sample()
            else:
                indices = probs.max(1)[1]
            tree_indices.append(indices)
            tree_probs.append(index_one_hot_ellipsis(probs, 1, indices))

            this_spans = torch.stack([
                index_one_hot_ellipsis(left_bounds, 1, indices),
                index_one_hot_ellipsis(right_bounds, 1, indices + 1)
            ],
                                     dim=1)
            this_features = torch.add(
                index_one_hot_ellipsis(sem_embeddings, 1, indices),
                index_one_hot_ellipsis(sem_embeddings, 1, indices + 1))
            this_left_features = index_one_hot_ellipsis(
                sem_embeddings, 1, indices)
            this_right_features = index_one_hot_ellipsis(
                sem_embeddings, 1, indices + 1)
            this_features = l2norm(this_features)
            this_left_features = l2norm(this_left_features)
            this_right_features = l2norm(this_right_features)

            span_bounds.append(this_spans)
            features.append(
                l2norm(this_features) * undone_mask.unsqueeze(-1).float())
            left_span_features.append(this_left_features *
                                      undone_mask.unsqueeze(-1).float())
            right_span_features.append(this_right_features *
                                       undone_mask.unsqueeze(-1).float())

            # update word embeddings
            left_mask = sequence_mask(indices, max_length=seq_length).float()
            right_mask = 1 - sequence_mask(indices + 2,
                                           max_length=seq_length).float()
            center_mask = index_mask(indices, max_length=seq_length).float()
            update_masks = (left_mask, right_mask, center_mask)

            this_features_syn = torch.add(
                index_one_hot_ellipsis(syn_embeddings, 1, indices),
                index_one_hot_ellipsis(syn_embeddings, 1, indices + 1))
            this_features_syn = l2norm(this_features_syn)
            syn_embeddings = self.update_with_mask(syn_embeddings,
                                                   syn_embeddings,
                                                   this_features_syn,
                                                   *update_masks)

            sem_embeddings = self.update_with_mask(sem_embeddings,
                                                   sem_embeddings,
                                                   this_features,
                                                   *update_masks)
            left_bounds = self.update_with_mask(left_bounds, left_bounds,
                                                this_spans[:,
                                                           0], *update_masks)
            right_bounds = self.update_with_mask(right_bounds, right_bounds,
                                                 this_spans[:,
                                                            1], *update_masks)

        return features, left_span_features, right_span_features, output_word_embeddings, tree_indices, \
               tree_probs, span_bounds
Exemplo n.º 58
0
 def forward(self, x):
     out = self.conv_block(x)
     repeat_num = 16 // self.in_channels
     x16 = x.repeat([1, repeat_num, 1, 1, 1][: self.spatial_dims + 2])
     out = self.act_function(torch.add(out, x16))
     return out
Exemplo n.º 59
0
    def se(self,
           X,
           Y,
           Re=None,
           batch_size=25,
           parallel=0,
           sampling=100,
           each_species=True):
        dataLoader = self._get_DataLoader(X,
                                          Y,
                                          Re,
                                          batch_size=batch_size,
                                          shuffle=False)
        loss_func = self.__build_loss_function(train=True)
        se = []
        y_dim = np.size(self.weights_numpy[0][0], 1)
        weights = self.weights[0][0]
        zero = torch.tensor(0.0, dtype=self.dtype).to(self.device)
        one = torch.tensor(1.0, dtype=self.dtype).to(self.device)
        re_loss = lambda value: torch.distributions.Normal(zero, one).log_prob(
            value)

        _ = sys.stdout.write("\nCalculating standard errors...\n")

        if each_species:
            for i in range(y_dim):
                _ = sys.stdout.write("\rSpecies: {}/{} ".format(i + 1, y_dim))
                sys.stdout.flush()
                weights = torch.tensor(self.weights_numpy[0][0][:, i].reshape(
                    [-1, 1]),
                                       device=self.device,
                                       dtype=self.dtype,
                                       requires_grad=True).to(self.device)
                if i == 0:
                    constants = torch.tensor(self.weights_numpy[0][0][:, (i +
                                                                          1):],
                                             device=self.device,
                                             dtype=self.dtype).to(self.device)
                    w = torch.cat([weights, constants], dim=1)
                elif i < y_dim:
                    w = torch.cat([
                        torch.tensor(self.weights_numpy[0][0][:, 0:i],
                                     device=self.device,
                                     dtype=self.dtype).to(self.device),
                        weights,
                        torch.tensor(self.weights_numpy[0][0][:, (i + 1):],
                                     device=self.device,
                                     dtype=self.dtype).to(self.device)
                    ],
                                  dim=1)
                else:
                    constants = torch.tensor(self.weights_numpy[0][0][:, 0:i],
                                             device=self.device,
                                             dtype=self.dtype).to(self.device)
                    w = torch.cat([constants, weights], dim=1)
                for step, (x, y, re) in enumerate(dataLoader):
                    x = x.to(self.device, non_blocking=True)
                    y = y.to(self.device, non_blocking=True)
                    spatial_re = self.re.gather(
                        0, re.to(self.device, non_blocking=True))
                    mu = torch.nn.functional.linear(x, w.t())
                    loss = loss_func(mu, y, spatial_re, x.shape[0],
                                     sampling).sum().add(
                                         re_loss(spatial_re).sum())
                    #loss = torch.add(torch.sum(loss), torch.sum(re_loss(spatial_re)))
                    first_gradients = torch.autograd.grad(loss,
                                                          weights,
                                                          retain_graph=True,
                                                          create_graph=True,
                                                          allow_unused=True)
                    second = []
                    for j in range(self.input_shape):
                        second.append(
                            torch.autograd.grad(first_gradients[0][j, 0],
                                                inputs=weights,
                                                retain_graph=True,
                                                create_graph=False,
                                                allow_unused=False)[0])
                        hessian = torch.cat(second, dim=1)
                    if step < 1:
                        hessian_out = hessian
                    else:
                        hessian_out += hessian
                se.append(
                    torch.sqrt(torch.diag(
                        torch.inverse(hessian_out))).data.cpu().numpy())
            return se
        else:
            for step, (x, y, re) in enumerate(dataLoader):
                x = x.to(self.device, non_blocking=True)
                y = y.to(self.device, non_blocking=True)
                spatial_re = self.re.gather(
                    0, re.to(self.device, non_blocking=True))
                mu = self.layers[0](x)
                loss = torch.add(
                    torch.sum(
                        loss_func(mu, y, spatial_re, x.shape[0], sampling)),
                    torch.sum(re_loss(spatial_re)))
                first_gradients = torch.autograd.grad(
                    loss,
                    weights,
                    retain_graph=True,
                    create_graph=True,
                    allow_unused=True)[0].reshape([-1])
                hessian = []
                for j in range(first_gradients.shape[0]):
                    hessian.append(
                        torch.autograd.grad(
                            first_gradients[j],
                            inputs=weights,
                            retain_graph=True,
                            create_graph=False,
                            allow_unused=False)[0].reshape([-1]).reshape(
                                [y_dim * self.input_shape, 1]))
                hessian = torch.cat(hessian, dim=1)
                if step < 1:
                    hessian_out = hessian
                else:
                    hessian_out += hessian
            return hessian_out.data.cpu().numpy()
    def forward(self, x):
        r"""
        The :func:`~gpytorch.variational.VariationalStrategy.forward` method determines how to marginalize out the
        inducing point function values. Specifically, forward defines how to transform a variational distribution
        over the inducing point values, :math:`q(u)`, in to a variational distribution over the function values at
        specified locations x, :math:`q(f|x)`, by integrating :math:`\int p(f|x, u)q(u)du`

        :param torch.Tensor x: Locations x to get the variational posterior of the function values at.
        :rtype: ~gpytorch.distributions.MultivariateNormal
        :return: The distribution :math:`q(f|x)`
        """
        variational_dist = self.variational_distribution
        inducing_points = self.inducing_points
        if inducing_points.dim() < x.dim():
            inducing_points = inducing_points.expand(*x.shape[:-2], *inducing_points.shape[-2:])
        if len(variational_dist.batch_shape) < x.dim() - 2:
            variational_dist = variational_dist.expand(x.shape[:-2])

        # If our points equal the inducing points, we're done
        if torch.equal(x, inducing_points):
            # De-whiten the prior covar
            prior_covar = self.prior_distribution.lazy_covariance_matrix
            if isinstance(variational_dist.lazy_covariance_matrix, RootLazyTensor):
                predictive_covar = RootLazyTensor(prior_covar @ variational_dist.lazy_covariance_matrix.root.evaluate())
            else:
                predictive_covar = MatmulLazyTensor(prior_covar @ variational_dist.covariance_matrix, prior_covar)

            # Cache some values for the KL divergence
            if self.training:
                self._mean_diff_inv_quad_memo, self._logdet_memo = prior_covar.inv_quad_logdet(
                    (variational_dist.mean - self.prior_distribution.mean), logdet=True
                )

            return MultivariateNormal(variational_dist.mean, predictive_covar)

        # Otherwise, we have to marginalize
        else:
            num_induc = inducing_points.size(-2)
            full_inputs = torch.cat([inducing_points, x], dim=-2)
            full_output = self.model.forward(full_inputs)
            full_mean, full_covar = full_output.mean, full_output.lazy_covariance_matrix

            # Mean terms
            test_mean = full_mean[..., num_induc:]
            induc_mean = full_mean[..., :num_induc]
            mean_diff = (variational_dist.mean - induc_mean).unsqueeze(-1)

            # Covariance terms
            induc_induc_covar = full_covar[..., :num_induc, :num_induc].add_jitter()
            induc_data_covar = full_covar[..., :num_induc, num_induc:].evaluate()
            data_data_covar = full_covar[..., num_induc:, num_induc:]

            # If we're less than a certain size, we'll compute the Cholesky decomposition of induc_induc_covar
            cholesky = False
            if settings.fast_computations.log_prob.off() or (num_induc <= settings.max_cholesky_size.value()):
                induc_induc_covar = CholLazyTensor(induc_induc_covar.cholesky())
                cholesky = True

            # Cache the CG results
            # Do not use preconditioning for whitened VI, as it does not seem to improve performance.
            with settings.max_preconditioner_size(0):
                with torch.no_grad():
                    eager_rhs = torch.cat([induc_data_covar, mean_diff], -1)
                    solve, probe_vecs, probe_vec_norms, probe_vec_solves, tmats = CachedCGLazyTensor.precompute_terms(
                        induc_induc_covar,
                        eager_rhs.detach(),
                        logdet_terms=(not cholesky),
                        include_tmats=(not settings.skip_logdet_forward.on() and not cholesky),
                    )
                    eager_rhss = [eager_rhs.detach()]
                    solves = [solve.detach()]
                    if settings.skip_logdet_forward.on() and self.training:
                        eager_rhss.append(torch.cat([probe_vecs, eager_rhs], -1))
                        solves.append(torch.cat([probe_vec_solves, solve[..., : eager_rhs.size(-1)]], -1))
                    elif not self.training:
                        eager_rhss.append(eager_rhs[..., :-1])
                        solves.append(solve[..., :-1])

                induc_induc_covar = CachedCGLazyTensor(
                    induc_induc_covar,
                    eager_rhss=eager_rhss,
                    solves=solves,
                    probe_vectors=probe_vecs,
                    probe_vector_norms=probe_vec_norms,
                    probe_vector_solves=probe_vec_solves,
                    probe_vector_tmats=tmats,
                )

            # Compute some terms that will be necessary for the predicitve covariance and KL divergence
            if self.training:
                interp_data_data_var_plus_mean_diff_inv_quad, logdet = induc_induc_covar.inv_quad_logdet(
                    torch.cat([induc_data_covar, mean_diff], -1), logdet=True, reduce_inv_quad=False
                )
                interp_data_data_var = interp_data_data_var_plus_mean_diff_inv_quad[..., :-1]
                mean_diff_inv_quad = interp_data_data_var_plus_mean_diff_inv_quad[..., -1]

            # Compute predictive mean
            predictive_mean = torch.add(
                test_mean,
                induc_induc_covar.inv_matmul(mean_diff, left_tensor=induc_data_covar.transpose(-1, -2)).squeeze(-1),
            )

            # Compute the predictive covariance
            is_root_lt = isinstance(variational_dist.lazy_covariance_matrix, RootLazyTensor)
            is_repeated_root_lt = isinstance(
                variational_dist.lazy_covariance_matrix, BatchRepeatLazyTensor
            ) and isinstance(variational_dist.lazy_covariance_matrix.base_lazy_tensor, RootLazyTensor)
            if is_root_lt:
                predictive_covar = RootLazyTensor(
                    induc_data_covar.transpose(-1, -2) @ variational_dist.lazy_covariance_matrix.root.evaluate()
                )
            elif is_repeated_root_lt:
                predictive_covar = RootLazyTensor(
                    induc_data_covar.transpose(-1, -2)
                    @ variational_dist.lazy_covariance_matrix.root_decomposition().root.evaluate()
                )
            else:
                predictive_covar = MatmulLazyTensor(
                    induc_data_covar.transpose(-1, -2), predictive_covar @ induc_data_covar
                )

            if self.training:
                data_covariance = DiagLazyTensor((data_data_covar.diag() - interp_data_data_var).clamp(0, math.inf))
            else:
                neg_induc_data_data_covar = torch.matmul(
                    induc_data_covar.transpose(-1, -2).mul(-1), induc_induc_covar.inv_matmul(induc_data_covar)
                )
                data_covariance = data_data_covar + neg_induc_data_data_covar
            predictive_covar = PsdSumLazyTensor(predictive_covar, data_covariance)

            # Save the logdet, mean_diff_inv_quad, prior distribution for the ELBO
            if self.training:
                self._memoize_cache["prior_distribution_memo"] = MultivariateNormal(induc_mean, induc_induc_covar)
                self._memoize_cache["logdet_memo"] = -logdet
                self._memoize_cache["mean_diff_inv_quad_memo"] = mean_diff_inv_quad

            return MultivariateNormal(predictive_mean, predictive_covar)