Пример #1
0
    def _forward_alg(self, feats):
        # Do the forward algorithm to compute the partition function
        init_alphas = torch.Tensor(1, self.tagset_size).fill_(-10000.)
        # START_TAG has all of the score.
        init_alphas[0][self.tag_to_ix[START_TAG]] = 0.

        # Wrap in a variable so that we will get automatic backprop
        forward_var = autograd.Variable(init_alphas)

        # Iterate through the sentence
        for feat in feats:
            alphas_t = []  # The forward variables at this timestep
            for next_tag in range(self.tagset_size):
                # broadcast the emission score: it is the same regardless of
                # the previous tag
                emit_score = feat[next_tag].view(1, -1).expand(
                    1, self.tagset_size)
                # the ith entry of trans_score is the score of transitioning to
                # next_tag from i
                trans_score = self.transitions[next_tag].view(1, -1)
                # The ith entry of next_tag_var is the value for the
                # edge (i -> next_tag) before we do log-sum-exp
                next_tag_var = forward_var + trans_score + emit_score
                # The forward variable for this tag is log-sum-exp of all the
                # scores.
                alphas_t.append(log_sum_exp(next_tag_var))
            forward_var = torch.cat(alphas_t).view(1, -1)
        terminal_var = forward_var + self.transitions[self.tag_to_ix[STOP_TAG]]
        alpha = log_sum_exp(terminal_var)
        return alpha
Пример #2
0
    def _forward_alg(self, fts):
        bsz, tag_size = fts.shape[1], fts.shape[2]
        # init_alphas: (bsz, tag_size)
        init_alphas = self.dummy.new(bsz, self.tag_size).fill_(NINF)
        init_alphas[:][self.tag2idx[TAG_BOS]] = 0.

        # forward_var: (bsz, tag_size)
        forward_var = init_alphas

        # fts: (seq_len, bsz, tag_size)
        # ft: (bsz, tag_size)
        # trans: (tag_size, tag_size)
        for ft in fts:
            alphas_t = []
            for next_tag in range(self.tag_size):
                # emit_score: (bsz, 1)
                emit_score = ft[:, next_tag].unsqueeze(-1)
                # trans_score: (bsz, tag_size)
                trans_score = self.trans[next_tag].expand(bsz, tag_size)
                # next_tag_var: (bsz, tag_size)
                next_tag_var = forward_var + trans_score + emit_score

                alphas_t.append(utils.log_sum_exp(next_tag_var))
            # alphas_t(list): tag_size * (bsz, 1)
            # forward_var: (bsz, tag_size)
            forward_var = torch.cat(alphas_t, dim=1)

        terminal_var = forward_var + self.trans[self.tag2idx[TAG_EOS]]
        alpha = utils.log_sum_exp(terminal_var)
        return alpha
Пример #3
0
 def _forward_alg(self, feats):
     # Do the forward algorithm to compute the partition function
     if self.gpu:
         init_alphas = torch.full((1, self.tagset_size), -10000., device='cuda:0')
     else:
         init_alphas = torch.full((1, self.tagset_size), -10000.)
     # START_TAG has all of the score.
     init_alphas[0][self.tag_to_ix[START_TAG]] = 0.
     forward_var = init_alphas
     for feat in feats:
         alphas_t = []  # The forward tensors at this timestep
         for next_tag in range(self.tagset_size):
             # broadcast the emission score
             emit_score = feat[next_tag].view(1, 1).expand(1, self.tagset_size)
             # the ith entry of trans_score is the score of transitioning to next_tag from i
             trans_score = self.transitions[next_tag].view(1, -1)
             # The ith entry of next_tag_var is the value for the
             # edge (i -> next_tag) before we do log-sum-exp
             next_tag_var = forward_var + trans_score + emit_score
             # The forward variable for this tag is log-sum-exp of all the
             # scores.
             alphas_t.append(log_sum_exp(next_tag_var).view(1))
         forward_var = torch.cat(alphas_t).view(1, -1)
     terminal_var = forward_var + self.transitions[self.tag_to_ix[STOP_TAG]]
     alpha = log_sum_exp(terminal_var)
     return alpha
Пример #4
0
    def _e_step(self):

        # compute alphas and betas
        self._forward()
        # print 'alphas: {}'.format(self.alphas)
        self._backward()
        # print 'betas: {}'.format(self.betas)

        # gammas
        self.gammas = self.alphas + self.betas
        for tidx in range(self.T):
            self.gammas[tidx, :] -= utils.log_sum_exp(self.gammas[tidx, :])
        self.gammas = np.exp(self.gammas)
        # print 'gammas: {}'.format(self.gammas)

        # etas
        for tidx in range(self.T - 1):
            for i in range(self.k):
                for j in range(self.k):
                    a = self.alphas[tidx, i]
                    b = self.betas[tidx + 1, j]
                    transition_prob = np.log(self.A[i, j])
                    emission_prob = log_poisson_density(self.data[tidx + 1], self.B[j])
                    self.etas[tidx, i, j] = a + transition_prob + emission_prob + b

        self.etas -= utils.log_sum_exp(self.alphas[-1, :])
        self.etas = np.exp(self.etas)
        # print 'etas: {}'.format(self.etas)
        # raw_input()

        return np.random.rand() * 1000
Пример #5
0
    def _forward_alg_pp(self, feats):
        # Do the forward algorithm to compute the partition function
        init_alphas = torch.full((1, self.tagset_size),
                                 -100,
                                 dtype=torch.float,
                                 requires_grad=True).to(device=self.device)
        # START_TAG has all of the score.
        init_alphas[0][self.tag_to_ix[DatasetPreprosessed.__START_TAG__]] = 0.

        # Wrap in a variable so that we will get automatic backprop
        forward_var = init_alphas

        # Iterate through the sentence
        for feat in feats[:-1]:
            alphas_t = []  # The forward tensors at this timestep
            for next_tag in range(self.tagset_size):
                # the ith entry of trans_score is the score of transitioning to
                # next_tag from i
                trans_score = feat.view(self.tagset_size,
                                        self.tagset_size)[next_tag].view(
                                            1, -1).expand(1, self.tagset_size)
                # The ith entry of next_tag_var is the value for the
                # edge (i -> next_tag) before we do log-sum-exp
                next_tag_var = forward_var + trans_score
                # The forward variable for this tag is log-sum-exp of all the
                # scores.
                alphas_t.append(utils.log_sum_exp(next_tag_var).view(1))
            forward_var = torch.cat(alphas_t).view(1, -1)
        terminal_var = forward_var + feats[-1].view(
            self.tagset_size,
            self.tagset_size)[self.tag_to_ix[DatasetPreprosessed.__STOP_TAG__]]
        alpha = utils.log_sum_exp(terminal_var)
        return alpha
Пример #6
0
def U_z(z, uid=0):
    eps = 1e-8
    z1 = z[:, 0]
    z2 = z[:, 1]
    w1z = torch.sin(math.pi / 2 * z1)
    w2z = 3.0 * torch.exp(-0.5 * ((z1 - 1) / 0.6)**2)
    w3z = 3.0 * F.sigmoid((z1 - 1) / 0.3)

    if uid == 1:  # Potential 1 in NF paper
        tmp = torch.cat(((-0.5 * ((z1 - 2) / 0.6)**2).view(-1, 1),
                         (-0.5 * ((z1 + 2) / 0.6)**2).view(-1, 1)), 1)
        return 0.5 * (
            (z.norm(p=2, dim=1) - 2) / 0.4)**2 - log_sum_exp(tmp, dim=1)
    elif uid == 2:  # Potentital 2 in NF paper
        return 0.5 * ((z2 - w1z) / 0.4)**2
    elif uid == 3:  # Potential 3 in NF paper
        tmp = torch.cat(((-0.5 * ((z2 - w1z) / 0.35)**2).view(-1, 1),
                         (-0.5 * ((z2 - w1z + w2z) / 0.35)**2).view(-1, 1)), 1)
        return -log_sum_exp(tmp, dim=1)
    elif uid == 4:  # Potential 4 in NF paper
        tmp = torch.cat(((-0.5 * ((z2 - w1z) / 0.4)**2).view(-1, 1),
                         (-0.5 * ((z2 - w1z + w3z) / 0.35)**2).view(-1, 1)), 1)
        return -log_sum_exp(tmp, dim=1)
    else:
        return 1
Пример #7
0
    def train_c(self, labeled_loader, unlabeled_loader):
        args = self.args
        set_require_grad(self.classifier, requires_grad=True)
        # standard classification loss
        lab_data, lab_labels = labeled_loader.next()
        lab_data, lab_labels = tensor2Var(lab_data), tensor2Var(lab_labels)

        lab_labels = lab_labels.view(-1)
        unl_data, _ = unlabeled_loader.next()
        unl_data = tensor2Var(unl_data)

        noise = create_noise(unl_data.size(0), args.noise_size)
        noise = tensor2Var(noise)

        gen_data = self.gen(noise).detach()

        lab_logits = self.classifier(lab_data, 'class')
        unl_logits = self.classifier(unl_data, 'class')
        gen_logits = self.classifier(gen_data, 'class')

        lab_loss = F.cross_entropy(lab_logits, lab_labels)

        unl_logsumexp = log_sum_exp(unl_logits)
        gen_logsumexp = log_sum_exp(gen_logits)

        unl_acc = torch.mean(torch.sigmoid(unl_logsumexp.detach()).gt(0.5).float())
        gen_acc = torch.mean(torch.sigmoid(gen_logsumexp.detach()).lt(0.5).float())

        # This is the typical GAN cost, where sumexp(logits) is seen as the input to the sigmoid
        true_loss = - 0.5 * torch.mean(unl_logsumexp) + 0.5 * torch.mean(F.softplus(unl_logsumexp))
        fake_loss = 0.5 * torch.mean(F.softplus(gen_logsumexp))

        # max_unl_acc = torch.mean(unl_logits.max(1)[0].detach().gt(0.0).float())
        # max_gen_acc = torch.mean(gen_logits.max(1)[0].detach().gt(0.0).float())

        unl_prob = F.softmax(unl_logits, dim=1)

        entropy = -(unl_prob * torch.log(unl_prob + 1e-8)).sum(1).mean()

        unl_loss = true_loss + fake_loss
         
        c_loss = lab_loss + args.lambda_gan * unl_loss + args.lambda_e * entropy

        if args.lambda_consistency > 0:
            unl_logits_2 = self.classifier(unl_data, 'class')
            unl_prob_2 = F.softmax(unl_logits_2, dim=1)
            consistency_loss = ((unl_prob - unl_prob_2) ** 2).mean()

            c_loss += args.lambda_consistency * consistency_loss

            if self.total_iter % 1000 == 0:
                print(consistency_loss)

        self.classifier_opt.zero_grad()
        c_loss.backward()
        self.classifier_opt.step()

        return lab_loss.cpu().item(), unl_loss.cpu().item(), entropy.cpu().item()
Пример #8
0
    def _forward_alg(self, feats, sentence_masks, device):
        """
        Get alpha values for CRF
        :param feats: LSTM output, batch x max_seq x tag
        :param sentence_masks: binary (0,1) int matrix, batch x max_seq
        :param device: device info
        :return: alpha values for each sentence
        """

        batch_size, max_seq_length, tag_num = feats.size()
        sentence_lengths = torch.sum(sentence_masks, 1)

        # initialize alpha with a Tensor with values all equal to Constants.Invalid_Transition, 1 x tag
        init_alphas = torch.Tensor(1, self.tag_set_size).fill_(
            Constants.Invalid_Transition)
        init_alphas[0][self.tag_to_id[Constants.Tag_Start]] = 0.

        # batch x 1 x tag
        forward_var = init_alphas.view(1, 1,
                                       tag_num).expand(batch_size, 1, tag_num)

        all_alphas = torch.zeros((max_seq_length, batch_size, tag_num),
                                 dtype=torch.float)
        if self.use_gpu:
            forward_var = forward_var.to(device)
            all_alphas = all_alphas.to(device)

        for i in range(max_seq_length):
            # batch x tag
            feat = feats[:, i, :]
            # batch x tag x 1
            emit_score = feat.view(batch_size, tag_num, 1)
            # batch x tag x tag
            transition_expanded = self.transitions.view(
                1, tag_num, tag_num).expand(batch_size, tag_num, tag_num)
            # batch x tag x tag
            tag_var = forward_var + transition_expanded + emit_score
            # batch x tag --> batch x 1 x tag
            new_forward_var = log_sum_exp(tag_var, dim=2)
            forward_var = new_forward_var.unsqueeze(1)
            all_alphas[i] = new_forward_var

        # max_seq x batch x tag
        forward_var_selection = (sentence_lengths - 1).view(1, -1, 1).expand(
            1, -1, tag_num)
        # batch x tag
        forward_var_last = torch.gather(all_alphas, 0,
                                        forward_var_selection).squeeze(0)

        terminal_var = forward_var_last + self.transitions[
            self.tag_to_id[Constants.Tag_End], :].view(1, -1)
        # batch
        Z = log_sum_exp(terminal_var, dim=1)

        return Z
Пример #9
0
 def forward(self, h, mask):  # forward algorithm
     # initialize forward variables in log space
     score = Tensor(BATCH_SIZE, self.num_tags).fill_(-10000.)  # [B, C]
     score[:, SOS_IDX] = 0.
     trans = self.trans.unsqueeze(0)  # [1, C, C]
     for t in range(h.size(1)):  # recursion through the sequence
         mask_t = mask[:, t].unsqueeze(1)
         emit_t = h[:, t].unsqueeze(2)  # [B, C, 1]
         score_t = score.unsqueeze(
             1) + emit_t + trans  # [B, 1, C] -> [B, C, C]
         score_t = log_sum_exp(score_t)  # [B, C, C] -> [B, C]
         score = score_t * mask_t + score * (1 - mask_t)
     score = log_sum_exp(score + self.trans[EOS_IDX])
     return score  # partition function
Пример #10
0
 def partition(self,unary_pot):
     score = utils.Tensor(unary_pot.size()[0], self.total_labels).fill_(LOW_POT)
     score[:, self.START_IDX] = 0.0
     score = Variable(score)
     for t in range(unary_pot.size(1)): # iterate through the sequence
         score_t = score.unsqueeze(-1).expand(-1,-1,self.total_labels)
         emit = unary_pot[:, t,:].unsqueeze(-1).expand(-1,-1,self.total_labels).transpose(1,2)
         trans = self.transition_table.unsqueeze(0).expand(unary_pot.size()[0],-1,-1).transpose(1,2)
         score = utils.log_sum_exp(score_t + emit + trans,1)
     #
     #take care of transition to self.END_IDX
     score = score + self.transition_table[self.END_IDX].unsqueeze(0).expand_as(score)
     score = utils.log_sum_exp(score)
     return score # partition function        
Пример #11
0
    def iwae(nll, p_nu, q_nu, p_z, q_z, p_a, q_a, batch_sz, sz,
             num_importance_samples):
        kl_divergence = distributions.kl_divergence

        # the global variables are repeated (because sampled once per batch - local are not scaled
        logK = np.log(num_importance_samples)
        components = (
            -log_sum_exp(-nll.sum(1).view(num_importance_samples, batch_sz), 0)
            + logK, kl_divergence(q_nu, p_nu).sum().repeat(batch_sz) / sz,
            -log_sum_exp(
                -kl_divergence(q_z, p_z).sum(1).view(num_importance_samples,
                                                     batch_sz), 0) + logK,
            kl_divergence(q_a, p_a).sum().repeat(batch_sz) / sz)
        return components
Пример #12
0
    def calc_weights(self, beta=1.0):
        """
        Calculate the canonical weights to be in a canonical average
        for a list of energies.
        """

        if (numpy.any(self.support)):
            # Calculate the probability of each bin according to the
            # canonical ensemble within the common support
            lnGs = self.lnG[self.support]
            Es = self.bin_centers[self.support]

            lnZ = log_sum_exp(lnGs - beta*Es)

            P = numpy.zeros(self.support.shape)
            P[self.support] = exp(lnGs - beta*Es - lnZ)

            # Normalize the probabilities by the counts in the histogram
            P[self.support] /=  self.histogram[self.support]

            # Calculate the weight for each energy
            weights = []

            for bin_number in self.bin_number_for_energies:
                if 0 <= bin_number:
                    weights.append(P[bin_number])
                else:
                    weights.append(0.0)

            return weights
        else:
            return [0.0 for energy in self.energies]
Пример #13
0
    def get_loss(self, scores, target, mask):
        """
        calculate viterbi loss

        args:
            scores (seq_len, bat_size, target_size_from, target_size_to) : class score for CRF
            target (seq_len, bat_size, 1) : crf label
            mask   (seq_len, bat_size) : mask for crf label

        """

        seq_len = scores.size(0)
        bat_size = scores.size(1)

        tg_energy = torch.gather(scores.view(seq_len, bat_size, -1), 2,
                                 target).view(seq_len,
                                              bat_size)  # seq_len * bat_size
        tg_energy = tg_energy.masked_select(mask).sum()

        seq_iter = enumerate(scores)
        _, inivalues = seq_iter.next()
        partition = inivalues[:, self.start_tag, :].clone()
        for idx, cur_values in seq_iter:
            cur_values = cur_values + partition.contiguous().view(bat_size, self.tagset_size, 1).\
                expand(bat_size, self.tagset_size, self.tagset_size)
            cur_partition = utils.log_sum_exp(cur_values, self.tagset_size)
            mask_idx = mask[idx, :].view(bat_size,
                                         1).expand(bat_size, self.tagset_size)
            partition.masked_scatter_(mask_idx,
                                      cur_partition.masked_select(mask_idx))

        partition = partition[:, self.end_tag].sum()
        loss = (partition - tg_energy) / bat_size

        return loss
Пример #14
0
    def calc_weights(self, beta=1.0):
        """
        Calculate the canonical weights to be in a canonical average
        for a list of energies.
        """

        if (numpy.any(self.support)):
            # Calculate the probability of each bin according to the
            # canonical ensemble within the common support
            lnGs = self.lnG[self.support]
            Es = self.bin_centers[self.support]

            lnZ = log_sum_exp(lnGs - beta*Es)

            P = numpy.zeros(self.support.shape)
            P[self.support] = exp(lnGs - beta*Es - lnZ)

            # Normalize the probabilities by the counts in the histogram
            P[self.support] /=  self.histogram[self.support]

            # Calculate the weight for each energy
            weights = []

            for bin_number in self.bin_number_for_energies:
                if 0 <= bin_number:
                    weights.append(P[bin_number])
                else:
                    weights.append(0.0)

            return weights
        else:
            return [0.0 for energy in self.energies]
Пример #15
0
    def forward(self, x, logdet, dsparams, mollify=0.0, delta=nn_.delta):

        ndim = self.num_ds_dim
        a_ = self.act_a(dsparams[:, :, 0 * ndim:1 * ndim])
        b_ = self.act_b(dsparams[:, :, 1 * ndim:2 * ndim])
        w = self.act_w(dsparams[:, :, 2 * ndim:3 * ndim])

        a = a_ * (1 - mollify) + 1.0 * mollify
        b = b_ * (1 - mollify) + 0.0 * mollify

        pre_sigm = a * x[:, :, None] + b
        sigm = torch.sigmoid(pre_sigm)
        x_pre = torch.sum(w * sigm, dim=2)
        x_pre_clipped = x_pre * (1 - delta) + delta * 0.5
        x_ = log(x_pre_clipped) - log(1 - x_pre_clipped)
        xnew = x_

        logj = F.log_softmax(dsparams[:,:,2*ndim:3*ndim], dim=2) + \
            nn_.logsigmoid(pre_sigm) + \
            nn_.logsigmoid(-pre_sigm) + log(a)

        logj = utils.log_sum_exp(logj, 2).sum(2)
        logdet_ = logj + np.log(1-delta) - \
        (log(x_pre_clipped) + log(-x_pre_clipped+1))
        logdet = logdet_.sum(1) + logdet

        return xnew, logdet
Пример #16
0
    def _backward(self):
        # initialize first timestep value of beta to zero
        # and then iterate backward starting from the end
        # zero because operating in log space
        self.betas[-1, :] = 0

        # allocate a buffer to reuse in inner loop
        timestep_values = np.empty(self.k)

        # start from second to last
        for tidx in range(self.T - 2, -1, -1):

            # iterate over k values to fill (timestep t)
            # note that i and j are flipped from forward pass
            for i in range(self.k):

                # iterate over next k values (timestep t + 1)
                for j in range(self.k):
                    emission_prob = log_poisson_density(self.data[tidx + 1], self.B[j])
                    transition_prob = np.log(self.A[i, j])
                    beta_prob = self.betas[tidx + 1, j]
                    timestep_values[j] = emission_prob + transition_prob + beta_prob

                # numerically stable sum 
                timestep_total = utils.log_sum_exp(timestep_values)

                # set value for jth class at time t
                self.betas[tidx, i] = timestep_total
Пример #17
0
    def forward_unlabeled(self, features):
        init_alphas = [-1e10] * self.num_labels
        init_alphas[self.label2idx[START]] = 0

        for_expr = dy.inputVector(init_alphas)
        for obs in features:
            alphas_t = []
            for next_tag in range(self.num_labels):
                obs_broadcast = dy.concatenate([dy.pick(obs, next_tag)] *
                                               self.num_labels)
                next_tag_expr = for_expr + self.transition[
                    next_tag] + obs_broadcast
                alphas_t.append(log_sum_exp(next_tag_expr, self.num_labels))
            for_expr = dy.concatenate(alphas_t)
        terminal_expr = for_expr + self.transition[self.label2idx[STOP]]
        alpha = log_sum_exp(terminal_expr, self.num_labels)
        return alpha
Пример #18
0
def latent_loss(outputs, target, device):
    """Numerically stable implementation of the language modeling loss

    """
    #target dim # btchsize x numtags x sentLen
    tag_logits = outputs[0]  #btchsize x sentlen x numtags
    word_dist_logits = outputs[
        1]  #list #for jth tag -> batch_size, sent_len, j_vocab_size

    numtags = len(word_dist_logits)
    btchSize = tag_logits.shape[0]
    sentLen = tag_logits.shape[1]

    #calculate loss for tags
    crossEntropy_tag = nn.CrossEntropyLoss(reduction='none')
    taglogitloss = [
        -crossEntropy_tag(
            tag_logits.transpose(1, 2),
            torch.zeros(
                (btchSize, sentLen), dtype=torch.long, device=device) + j)
        for j in range(numtags)
    ]

    #calculate loss for words
    ignore_mask = ((target == Vocabulary.TOKEN_NOT_IN_TAGVOCAB) |
                   (target == Vocabulary.PADTOKEN_FOR_TAGVOCAB))
    target_with_ignore = target.clone()
    target_with_ignore[ignore_mask] = -100
    crossEntropy_word = nn.CrossEntropyLoss(reduction='none',
                                            ignore_index=-100)
    wordlogitloss = [
        -crossEntropy_word(word_logit.transpose(1, 2),
                           target_with_ignore[:, j, :])
        for j, word_logit in enumerate(word_dist_logits)
    ]

    taglogitloss = torch.stack(taglogitloss)
    wordlogitloss = torch.stack(wordlogitloss)
    totalloss = taglogitloss + wordlogitloss

    #0 loss for a tag if output word is not present in tag's vocab
    outofvocab_mask = (torch.transpose(target, 0,
                                       1) == Vocabulary.TOKEN_NOT_IN_TAGVOCAB)
    totalloss[outofvocab_mask] = float('-inf')

    finalLoss = -log_sum_exp(totalloss, dim=0)

    #mask the loss from tokens, if the output token is not present in even single tag category
    presentInZeroTagMask = torch.all(
        (torch.transpose(target, 1, 2) == Vocabulary.TOKEN_NOT_IN_TAGVOCAB),
        dim=-1)
    #mask the loss of padding tokens
    paddingMask = (target[:, 0, :] == Vocabulary.PADTOKEN_FOR_TAGVOCAB)
    tokenContributingToZeroLoss = (presentInZeroTagMask | paddingMask)
    num_useful_tokens = (~tokenContributingToZeroLoss).sum().item()

    return torch.sum(
        finalLoss[~tokenContributingToZeroLoss]), num_useful_tokens
Пример #19
0
    def forward(self, scores, target, mask):
        """
        args:
            scores (seq_len, bat_size, target_size_from, target_size_to) : crf scores
            target (seq_len, bat_size, 1) : golden state
            mask (size seq_len, bat_size) : mask for padding
        return:
            loss
        """

        # calculate batch size and seq len
        seq_len = scores.size(0)
        bat_size = scores.size(1)

        # calculate sentence score
        tg_energy = torch.gather(scores.view(seq_len, bat_size, -1), 2,
                                 target).view(seq_len,
                                              bat_size)  # seq_len * bat_size
        tg_energy = tg_energy.masked_select(mask).sum()

        # calculate forward partition score

        # build iter
        seq_iter = enumerate(scores)
        # the first score should start with <start>
        _, inivalues = seq_iter.__next__(
        )  # bat_size * from_target_size * to_target_size
        # only need start from start_tag
        partition = inivalues[:, self.start_tag, :].clone(
        )  # bat_size * to_target_size
        # iter over last scores
        for idx, cur_values in seq_iter:
            # previous to_target is current from_target
            # partition: previous results log(exp(from_target)), #(batch_size * from_target)
            # cur_values: bat_size * from_target * to_target
            cur_values = cur_values + partition.contiguous().view(
                bat_size, self.tagset_size, 1).expand(
                    bat_size, self.tagset_size, self.tagset_size)
            cur_partition = utils.log_sum_exp(cur_values, self.tagset_size)
            # (bat_size * from_target * to_target) -> (bat_size * to_target)
            # partition = utils.switch(partition, cur_partition, mask[idx].view(bat_size, 1).expand(bat_size, self.tagset_size)).view(bat_size, -1)
            mask_idx = mask[idx, :].view(bat_size,
                                         1).expand(bat_size, self.tagset_size)
            partition.masked_scatter_(
                mask_idx, cur_partition.masked_select(
                    mask_idx))  #0 for partition, 1 for cur_partition

        #only need end at end_tag
        partition = partition[:, self.end_tag].sum()
        # average = mask.sum()

        # average_batch
        if self.average_batch:
            loss = (partition - tg_energy) / bat_size
        else:
            loss = (partition - tg_energy)

        return loss
Пример #20
0
    def forward_alg_pairwise(self, feats):
        init_alphas = torch.full((1, self.tagset_size),
                                 0,
                                 dtype=torch.float,
                                 requires_grad=True).to(device=self.device)
        forward_var = init_alphas

        for feat in feats:
            alphas_t = []
            for next_tag in range(self.tagset_size):
                trans_score = feat.view(self.tagset_size,
                                        self.tagset_size)[:, next_tag].view(
                                            1, -1)
                next_tag_var = forward_var + trans_score
                alphas_t.append(utils.log_sum_exp(next_tag_var).view(1))
            forward_var = torch.cat(alphas_t).view(1, -1)
            terminal_var = forward_var
        alpha = utils.log_sum_exp(terminal_var)
        return alpha
Пример #21
0
    def _log_p(self, data, params):
        ll = []

        for cn_n, cn_r, cn_v, mu_n, mu_r, mu_v, log_pi in zip(
                data.cn_n, data.cn_r, data.cn_v, data.mu_n, data.mu_r,
                data.mu_v, data.log_pi):
            temp = log_pi + self._log_binomial_likelihood(
                data.b, data.d, cn_n, cn_r, cn_v, mu_n, mu_r, mu_v, params.x)

            ll.append(temp)

        return log_sum_exp(ll)
Пример #22
0
    def forward(self, x, logdet, dsparams):
        inv = np.log(np.exp(1 - nn_.delta) - 1)
        ndim = self.hidden_dim
        pre_u = self.u_[None, None, :, :] + dsparams[:, :,
                                                     -self.in_dim:][:, :,
                                                                    None, :]
        pre_w = self.w_[None, None, :, :] + dsparams[:, :, 2 * ndim:3 *
                                                     ndim][:, :, None, :]
        a = self.act_a(dsparams[:, :, 0 * ndim:1 * ndim] + inv)
        b = self.act_b(dsparams[:, :, 1 * ndim:2 * ndim])
        w = self.act_w(pre_w)
        u = self.act_u(pre_u)

        pre_sigm = torch.sum(u * a[:, :, :, None] * x[:, :, None, :], 3) + b
        sigm = torch.sigmoid(pre_sigm)
        x_pre = torch.sum(w * sigm[:, :, None, :], dim=3)
        x_pre_clipped = x_pre * (1 - nn_.delta) + nn_.delta * 0.5
        x_ = log(x_pre_clipped) - log(1 - x_pre_clipped)
        xnew = x_

        logj = F.log_softmax(pre_w, dim=3) + \
            nn_.logsigmoid(pre_sigm[:,:,None,:]) + \
            nn_.logsigmoid(-pre_sigm[:,:,None,:]) + log(a[:,:,None,:])
        # n, d, d2, dh

        logj = logj[:, :, :, :, None] + F.log_softmax(pre_u, dim=3)[:, :,
                                                                    None, :, :]
        # n, d, d2, dh, d1

        logj = utils.log_sum_exp(logj, 3).sum(3)
        # n, d, d2, d1

        logdet_ = logj + np.log(1-nn_.delta) - \
            (log(x_pre_clipped) + log(-x_pre_clipped+1))[:,:,:,None]

        logdet = utils.log_sum_exp(
            logdet_[:, :, :, :, None] + logdet[:, :, None, :, :], 3).sum(3)
        # n, d, d2, d1, d0 -> n, d, d2, d0

        return xnew, logdet
Пример #23
0
    def _forward_alg(self, feats):
        # Do the forward algorithm to compute the partition function
        #         init_alphas = torch.randn(1, self.tagset_size, dtype=torch.float, requires_grad=True).to(device=self.device)
        init_alphas = torch.full((1, self.tagset_size),
                                 -100,
                                 dtype=torch.float,
                                 requires_grad=True).to(device=self.device)
        # START_TAG has all of the score.
        #         init_alphas = feats[0].view(1, -1).expand(1, self.tagset_size) + self.transitions[self.tag_to_ix[START_TAG]]
        init_alphas[0][self.tag_to_ix[DatasetPreprosessed.__START_TAG__]] = 0.

        # Wrap in a variable so that we will get automatic backprop
        forward_var = init_alphas

        # Iterate through the sentence
        for feat in feats:
            alphas_t = []  # The forward tensors at this timestep
            for next_tag in range(self.tagset_size):
                # broadcast the emission score: it is the same regardless of
                # the previous tag
                emit_score = feat[next_tag].view(1, -1).expand(
                    1, self.tagset_size)
                # the ith entry of trans_score is the score of transitioning to
                # next_tag from i
                trans_score = self.transitions[next_tag].view(1, -1).expand(
                    1, self.tagset_size)
                assert emit_score.size() == trans_score.size()
                # The ith entry of next_tag_var is the value for the
                # edge (i -> next_tag) before we do log-sum-exp
                next_tag_var = forward_var + trans_score + emit_score
                # The forward variable for this tag is log-sum-exp of all the
                # scores.
                #                 print(next_tag_var, next_tag_var.size())
                #                 print(utils.log_sum_exp(next_tag_var).view(1))
                alphas_t.append(utils.log_sum_exp(next_tag_var).view(1))
            forward_var = torch.cat(alphas_t).view(1, -1)
        terminal_var = forward_var + self.transitions[self.tag_to_ix[
            DatasetPreprosessed.__STOP_TAG__]]
        alpha = utils.log_sum_exp(terminal_var)
        return alpha
Пример #24
0
    def fit(self, x, max_iter=1):
        # Initialize parameters
        params = self.params
        n_comp = params['n_components']
        n_states = params['n_states']
        
        transition_matrix = normalize(np.random.rand(n_comp, n_states, n_states), axis=1)
        self.params['transition_matrix'] = transition_matrix

        init_probs = normalize(np.random.rand(n_comp, n_states), axis=1)
        self.params['initial_probs'] = init_probs
        
        n_seq, n_t = x.shape
        comp_post = normalize(np.random.rand(n_comp, n_seq))
                               
        comp_probs = normalize(np.random.rand(n_comp))
        self.params['component_probs'] = comp_probs
        
        transition_counts = np.zeros((n_seq, n_states, n_states))
        init_states_dummy = np.zeros((n_seq, n_states))
        for n, seq in enumerate(x):
            init_states_dummy[n, seq[0]] = 1
            for i in range(1, n_t):
                transition_counts[n, seq[i], seq[i-1]] += 1
        transition_counts = transition_counts.reshape(-1, n_states**2)
        
        # Fit
        iters = 0
        for i in range(max_iter):
            log_transition_matrix = log_clip(transition_matrix).reshape(n_comp, -1)
            log_init_probs = log_clip(init_probs)
            log_comp_probs = log_clip(comp_probs)
            
            # E-Step
            comp_loglikes = ((log_init_probs @ init_states_dummy.T)
                             + (log_transition_matrix @ transition_counts.T)
                             + log_comp_probs.reshape(-1, 1))
            
            comp_post = exp_normalize(comp_loglikes)
            self.history['train_loglike'].append(log_sum_exp(comp_loglikes))
            
            # M-Step
            init_probs = normalize(comp_post @ init_states_dummy, axis=1)
            
            transition_matrix = normalize((comp_post @ transition_counts).reshape(-1, n_states, n_states), axis=1)
            
            comp_probs = normalize(comp_post.sum(axis=1, keepdims=True)).reshape(-1)
        
        # Update
        self.params['transition_matrix'] = transition_matrix
        self.params['initial_probs'] = init_probs
        self.params['component_probs'] = comp_probs
Пример #25
0
    def forward_alg_unary(self, feats):
        init_alphas = torch.full((1, self.tagset_size),
                                 -100.,
                                 dtype=torch.float,
                                 requires_grad=True).to(device=self.device)
        init_alphas[0][self.__start__] = 0.

        forward_var = init_alphas

        for i, feat in enumerate(feats):
            alphas_t = []
            for next_tag in range(self.tagset_size):
                emit_score = feat[next_tag].view(1, -1).expand(
                    1, self.tagset_size)
                trans_score = self.transitions[:, next_tag].view(1, -1)
                assert emit_score.size() == trans_score.size()
                next_tag_var = forward_var + emit_score + trans_score
                alphas_t.append(utils.log_sum_exp(next_tag_var).view(1))
            forward_var = torch.cat(alphas_t).view(1, -1)
        terminal_var = forward_var + self.transitions[:, self.__stop__].view(
            1, -1)
        alpha = utils.log_sum_exp(terminal_var)
        return alpha
Пример #26
0
def estimate_agg_posterior(z: MultiGaussian, z_samples=None
                           ) -> torch.Tensor:
    batch_size, zdim = z.mu.size()
    if z_samples is None:
        z_samples = z.sample()
    log_qzx = MultiGaussian(
        mu=(z.mu.unsqueeze(0).expand(batch_size, batch_size, -1)
            .reshape(-1, zdim)),
        var_logit=(z.var_logit.unsqueeze(0).expand(batch_size, batch_size, -1)
                   .reshape(-1, zdim))
    ).log_prob(z_samples.unsqueeze(1).expand(batch_size, batch_size, -1)
               .reshape(-1, zdim))
    return (utils.log_sum_exp(log_qzx.reshape(batch_size, batch_size), 1)
            - math.log(batch_size))
Пример #27
0
    def _get_useful_funcs(self):
        super(MLPWeightNorm_BHN_dais, self)._get_useful_funcs()

        self.project = theano.function([self.input_var], self.hs)

        input2 = T.matrix('input2')
        h2 = get_output(self.hiddens, input2)
        y2 = get_output(self.p_net, input2)
        self.dais_ = theano.function([
            self.input_var, self.target_var, input2, self.weight,
            self.dataset_size
        ], [self.loss, y2] + h2)

        imps_ = T.vector('imps_')
        logsoftmax_exp = theano.function([imps_],
                                         T.exp(imps_ - log_sum_exp(imps_)))

        def dais_y(refx, refy, newx, n_iw, n=None):

            if n is None:
                n = refx.shape[0]
            imps = np.zeros(n_iw).astype('float32')
            ys = np.zeros(
                (n_iw, newx.shape[0], self.n_classes)).astype('float32')
            for i in range(n_iw):
                outs = self.dais_(refx, refy, newx, 1.0, n)
                imps[i] = outs[0]
                ys[i] = outs[1]

            imps = logsoftmax_exp(imps)
            return (ys * imps[:, None, None]).sum(0)

        def dais_h(refx, refy, newx, n_iw, n=None):

            if n is None:
                n = refx.shape[0]
            imps = np.zeros(n_iw).astype('float32')
            hs = list()
            for i in range(n_iw):
                outs = self.dais_(refx, refy, newx, 1.0, n)
                imps[i] = outs[0]
                hs.append(outs[2:])

            imps = logsoftmax_exp(imps)
            ind = np.random.multinomial(1, imps).argmax()
            return hs[ind]

        self.dais_y = dais_y
        self.dais_h = dais_h
Пример #28
0
    def forward_labeled(self, id, features, marginals):
        init_alphas = [-1e10] * self.num_labels
        init_alphas[self.label2idx[START]] = 0
        for_expr = dy.inputVector(init_alphas)
        # print(id)
        # print(len(features))
        # print(self.mask_tensor[id].dim())
        marginal = dy.inputTensor(marginals)
        for pos, obs in enumerate(features):

            alphas_t = []
            for next_tag in range(self.num_labels):
                obs_broadcast = dy.concatenate([dy.pick(obs, next_tag)] *
                                               self.num_labels)
                next_tag_expr = for_expr + self.transition[
                    next_tag] + obs_broadcast
                score = log_sum_exp(next_tag_expr, self.num_labels)
                alphas_t.append(score)
                # print(self.transition[next_tag].value())
                # print(" pos is %d,  tag is %s, label score is %.2f "% ( pos, self.labels[next_tag],score.value()) )
            for_expr = dy.concatenate(alphas_t) + marginal[pos]
        terminal_expr = for_expr + self.transition[self.label2idx[STOP]]
        alpha = log_sum_exp(terminal_expr, self.num_labels)
        return alpha
Пример #29
0
 def _calc_log_probs(self, labels, q):
     if self._dist_type == 'logistic':
         return discretized_mix_logistic_log_probs_nd(
             labels, q, nr_mix=self._num_components, ndims=self._ndims)
     if self._dist_type == 'gaussian':
         if len(self._num_classes) == 1:
             pi = tf.nn.softmax(q[:, :self._num_components])
             mus = q[:, self._num_components:2 * self._num_components]
             sigmas = tf.nn.softplus(q[:, self._num_components * 2:])
             return log_sum_exp(
                 tf.log(pi) + tf.contrib.distributions.Normal(
                     mus, sigmas).log_prob(labels))
         else:
             return mvn_mix_log_probs(labels, q, self._ndims,
                                      self._num_components)
Пример #30
0
    def elbo(self,
             logits,
             targets,
             criterion,
             means,
             logvars,
             args,
             iwae=False,
             num_importance_samples=3,
             prior_means=None):
        """
        If iwae == False, then this returns (elbo, elbo, ...), otherwise it returns (iwae, elbo, ...)
        """
        seq_len, batch_size, ntokens = logits.size()

        # compute NLL
        NLL = criterion(logits.view(-1, ntokens),
                        targets.view(-1))  # takes the sum, not the mean
        if iwae:
            NLL = torch.stack(
                torch.chunk(
                    NLL.view(seq_len, batch_size).sum(0),
                    num_importance_samples, 0))

        # compute KL
        KL = 0
        if prior_means is None:
            for mean, logvar in zip(means, logvars):
                KL += -0.5 * torch.sum(1 + logvar - mean.pow(2) - logvar.exp(),
                                       -1)
        else:
            for mean, prior_mean, logvar in zip(means, prior_means, logvars):
                # KL += -0.5 * torch.sum(1 + logvar - mean.pow(2) - logvar.exp())
                KL += -0.5 * torch.sum(
                    1 + logvar - (mean - prior_mean).pow(2) - logvar.exp(), -1)

        if iwae:
            KL = torch.stack(torch.chunk(KL, num_importance_samples, 0))
            assert args.anneal == 1, "can't mix annealing and IWAE"
            iwae_loss = (-(log_sum_exp(-(NLL + KL), dim=0)) +
                         math.log(num_importance_samples)).sum()
            elbo_loss = (NLL + KL).mean(0).sum()
            return iwae_loss, elbo_loss, NLL.mean(0).sum(), KL.mean(0).sum(), (
                (seq_len * batch_size) / num_importance_samples)
        else:
            elbo_loss = NLL.sum() + args.anneal * KL.sum()
            return elbo_loss, elbo_loss, NLL.sum(), KL.sum(
            ), seq_len * batch_size
Пример #31
0
    def hard_mining(self, conf_data, conf_t, pos, num):
        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1, self.num_classes)

        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.long().view(-1, 1))


        # Hard Negative Mining
        loss_c[pos.view(-1, 1)] = 0  # filter out pos boxes for now
        loss_c = loss_c.view(num, -1)
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        neg = idx_rank < num_neg.expand_as(idx_rank)
        return neg
Пример #32
0
    def _forward_alg(self, feats):
        '''
        This function performs the forward algorithm explained above
        '''
        # calculate in log domain
        # feats is len(sentence) * tagset_size
        # initialize alpha with a Tensor with values all equal to -10000.

        # Do the forward algorithm to compute the partition function
        init_alphas = torch.Tensor(1, self.tagset_size).fill_(-10000.)

        # START_TAG has all of the score.
        init_alphas[0][self.tag_to_ix[START_TAG]] = 0.

        # Wrap in a variable so that we will get automatic backprop
        forward_var = autograd.Variable(init_alphas)
        if self.use_gpu:
            forward_var = forward_var.cuda()

        # Iterate through the sentence
        for feat in feats:
            # broadcast the emission score: it is the same regardless of
            # the previous tag
            emit_score = feat.view(-1, 1)

            # the ith entry of trans_score is the score of transitioning to
            # next_tag from i
            tag_var = forward_var + self.transitions + emit_score

            # The ith entry of next_tag_var is the value for the
            # edge (i -> next_tag) before we do log-sum-exp
            max_tag_var, _ = torch.max(tag_var, dim=1)

            # The forward variable for this tag is log-sum-exp of all the
            # scores.
            tag_var = tag_var - max_tag_var.view(-1, 1)

            # Compute log sum exp in a numerically stable way for the forward algorithm
            forward_var = max_tag_var + \
                torch.log(torch.sum(torch.exp(tag_var), dim=1)
                          ).view(1, -1)  # ).view(1, -1)
        terminal_var = (forward_var +
                        self.transitions[self.tag_to_ix[STOP_TAG]]).view(
                            1, -1)
        alpha = log_sum_exp(terminal_var)
        # Z(x)
        return alpha
Пример #33
0
    def _log_p(self, data, params):
        ll = []

        for cn_n, cn_r, cn_v, mu_n, mu_r, mu_v, log_pi  in zip(data.cn_n, data.cn_r, data.cn_v, data.mu_n, data.mu_r, data.mu_v, data.log_pi):
            temp = log_pi + self._log_binomial_likelihood(data.b,
                                                          data.d,
                                                          cn_n,
                                                          cn_r,
                                                          cn_v,
                                                          mu_n,
                                                          mu_r,
                                                          mu_v,
                                                          params.x)

            ll.append(temp)

        return log_sum_exp(ll)
Пример #34
0
    def calc_weights(self, energies, beta=1.0):
        """
        Calculate the canonical weights to be in a canonical average
        for a list of energies.
        """

        # Make a histogram of the energies
        histogram = numpy.histogram(energies, bins=self.binning)[0]

        # In some versions of numpy, there will be an extra bin, with
        # values faling outside the histogram
        if len(histogram)==len(self.lnG_support)+1:
            histogram = histogram[:-1]

        # Calculate the support between lnG and the histogram
        support = self.lnG_support & (histogram>0)

        if (numpy.any(support)):
            # Calculate the probability of each bin according to the
            # canonical ensemble within the common support
            lnGs = self.lnG[support]
            Es = self.bin_centers[support]

            lnZ = log_sum_exp(lnGs - beta*Es)

            P = numpy.zeros(support.shape)
            P[support] = exp(lnGs - beta*Es - lnZ)

            # Normalize the probabilities by the counts in the histogram
            P[support] /=  histogram[support]

            # Calculate the weight for each energy
            weights = []

            for energy in energies:
                bin_number = self.calc_bin(energy)

                if 0 <= bin_number < len(self.bin_centers):
                    weights.append(P[bin_number])
                else:
                    weights.append(0.0)

            return weights
        else:
            return [0.0 for energy in energies]
Пример #35
0
    def calc_weights(self, energies, beta=1.0):
        """
        Calculate the canonical weights to be in a canonical average
        for a list of energies.
        """

        # Make a histogram of the energies
        histogram = numpy.histogram(energies, bins=self.binning)[0]

        # In some versions of numpy, there will be an extra bin, with
        # values faling outside the histogram
        if len(histogram)==len(self.lnG_support)+1:
            histogram = histogram[:-1]

        # Calculate the support between lnG and the histogram
        support = self.lnG_support & (histogram>0)

        if (numpy.any(support)):
            # Calculate the probability of each bin according to the
            # canonical ensemble within the common support
            lnGs = self.lnG[support]
            Es = self.bin_centers[support]

            lnZ = log_sum_exp(lnGs - beta*Es)

            P = numpy.zeros(support.shape)
            P[support] = exp(lnGs - beta*Es - lnZ)

            # Normalize the probabilities by the counts in the histogram
            P[support] /=  histogram[support]

            # Calculate the weight for each energy
            weights = []

            for energy in energies:
                bin_number = self.calc_bin(energy)

                if 0 <= bin_number < len(self.bin_centers):
                    weights.append(P[bin_number])
                else:
                    weights.append(0.0)

            return weights
        else:
            return [0.0 for energy in energies]
Пример #36
0
    def _forward(self):
        """
        The forward pass computes for each sample, for each timestep, 
        and for each latent class, the probability that the latent state
        was the latent class, and stores these values in self.alphas.
        
        This is accomplished using a dynamic programming approach that takes
        advantage of the assumption that the future depends only upon the previous 
        timestep. Specifically, it iterates through each sequence keeping track
        of the probability of each class up until that timestep. Then, to compute
        the probability of each time step at t + 1, it sums over a set of 
        probabilities where each is the probability of transitioning from a 
        previous class times the probability of the current class given the 
        observation times the probability of the previous class. This sum gives
        the total probability of being in a certain class at timestep t + 1.
        """
        # initialize first timestep value of alpha for each sample 
        # to the start probability of the corresponding latent class in A
        self.alphas[0, :] = np.log(self.pi)
        for i in range(self.k):
            self.alphas[0, i] += log_poisson_density(self.data[0], self.B[i])

        # allocate a buffer to reuse in inner loop
        timestep_values = np.empty(self.k)

        # tidx starts at 1 since zeroth timestep 
        # of alphas has already been initialized
        for tidx, value in enumerate(self.data[1:], 1):

            # iterate over k values to fill
            for j in range(self.k):

                # iterate over previous k values
                for i in range(self.k):
                    timestep_values[i] = np.log(self.A[i, j]) + self.alphas[tidx - 1, i]

                # numerically stable sum over timestep_values
                timestep_total = utils.log_sum_exp(timestep_values)

                # probability of emitting value
                emission_prob = log_poisson_density(value, self.B[j])

                # set value for jth class at time t
                self.alphas[tidx, j] = timestep_total + emission_prob
Пример #37
0
    def e_step(self):
        # # assert valid tau
        # assert np.all([abs(v - 1) < 1e-5 for v in np.sum(self.taus, axis=1)])
        # assert not np.any([v < 0 for v in self.taus.flatten()])

        # # assert valid pi
        # assert abs(np.sum(self.pis) - 1) < 1e-5 and not np.any([v < 0 for v in self.pis])

        # # assert valid gammas
        # assert not np.any([v < 0 for v in self.gammas.flatten()])

        # use deepcopy
        # start total at a value
        for idx in range(self.e_iterations):
            tau_copy = copy.deepcopy(self.taus)
            for i in range(self.N):
                for k in range(self.k):
                    total = np.log(self.pis[k])
                    for j in range(self.N):
                        if i == j: continue
                        for l in range(self.k):
                            edge_prob = log_poisson_density(self.data[i,j], self.gammas[k,l])
                            total += tau_copy[j,l] * edge_prob

                    self.taus[i,k] = total

            # normalize
            for i in range(self.N):
                self.taus[i, :] -= utils.log_sum_exp(self.taus[i, :]) 

            # exponentiate
            self.taus = np.exp(self.taus)

            # find residual
            residual = np.max(np.abs(self.taus - tau_copy))

            if residual < 1:
                break
Пример #38
0
 def log_likelihood(self):
     return utils.log_sum_exp(self.log_c())
Пример #39
0
 def lnZ(self, beta):
     """
     Calculates the logarithm to partition function at beta
     """
     return log_sum_exp(self.lnGs - beta*self.Es)