示例#1
0
def shrink_M_to_1(output, loss_type, N, num_proto, multi_policy_proto):
    batch_size = output.shape[0]
    output_dim = output.data.shape[1]
    if 'max_out' in loss_type:
        first_pos = output.index_select(
            1, Variable(torch.arange(0, N).long().cuda()))
        second_neg = output.index_select(
            1, Variable(torch.arange(N, output_dim).long().cuda()))
        second_max_neg, _ = torch.max(second_neg, dim=1, keepdim=True)
        output = torch.cat((first_pos, second_max_neg), dim=1)
    else:
        if num_proto > 1:
            if multi_policy_proto == 'max_softmax':
                first = output[:, :(N * num_proto)].contiguous().view(
                    batch_size, N, num_proto)
                first_max, _ = torch.max(first, dim=2)
                second = output[:, (N * num_proto):]
                pred = torch.cat((first_max, second), dim=1)
                output = F.softmax(pred, dim=1)
            else:
                assert False

        output = F.softmax(output, dim=1)
        first_pos = output.index_select(
            1, Variable(torch.arange(0, N).long().cuda()))
        second_neg = output.index_select(
            1, Variable(torch.arange(N, output_dim).long().cuda()))
        prob_neg = torch.sum(second_neg, 1, keepdim=True)
        output = torch.cat((first_pos, prob_neg), dim=1)
    return output
示例#2
0
文件: sampler.py 项目: neonnnnn/ml
def categorical(mean, temp):
    g = -torch.log(1e-10 - torch.log(1e-10+Variable(mean.data.new(mean.size()).uniform_())))
    if mean.ndim != 3:
        return F.softmax((torch.log(mean + 1e-10) + g)/temp)
    else:
        shape = (mean.size()[0] * mean.size()[1], mean.size(2))
        samples = F.softmax(((torch.log(mean + 1e-10) + g)/temp).view(shape))

        return samples.view_as(mean)
示例#3
0
文件: linalg.py 项目: TYSSSY/Apb-gcn
def masked_softmax(x, valid_len):
    """Perform softmax by filtering out some elements."""
    # x: 3-D tensor, valid_len: 1-D or 2-D tensor
    if valid_len is None:
        return fn.softmax(x, dim=-1)
    else:
        shape = x.shape
        if valid_len.dim() == 1:
            valid_len = torch.repeat_interleave(valid_len, repeats=shape[1],
                                                dim=0)
        else:
            valid_len = valid_len.reshape(-1)
        # Fill masked elements with a large negative, whose exp is 0
        x = sequence_mask(x.reshape(-1, shape[-1]), valid_len, value=-1e6)
        return fn.softmax(x.reshape(shape), dim=-1)
示例#4
0
def main():

    # Read sentences
    sentences = readFile("words2.txt")
    print(sentences)

    # Make uniq words list
    words = []
    uniqWords = []
    for sentence in sentences:
        for word in sentence:
            words.append(word)
            if word not in uniqWords:
                uniqWords.append(word)
    print(uniqWords)
    uniqWordSize = len(uniqWords)

    # Make trainPairs
    trainPairs = trainGenerator(sentences, uniqWords)

    dims = 5
    W1 = Variable(torch.randn(dims, uniqWordSize).float(), requires_grad=True)
    W2 = Variable(torch.randn(uniqWordSize, dims).float(), requires_grad=True)

    epo = 1001

    for i in range(epo):
        avg_loss = 0
        samples = 0
        for x, y in trainPairs:
            x = Variable(torch.from_numpy(x)).float()
            y = Variable(torch.from_numpy(np.array([y])).long())

            samples += len(y)

            a1 = torch.matmul(W1, x)
            a2 = torch.matmul(W2, a1)

            logSoftmax = F.log_softmax(a2, dim=0)
            loss = F.nll_loss(logSoftmax.view(1, -1), y)
            loss.backward()

            avg_loss += loss.item()

            W1.data -= 0.002 * W1.grad.data
            W2.data -= 0.002 * W2.grad.data

            W1.grad.data.zero_()
            W2.grad.data.zero_()

            if i != 0 and 100 < i and i % 100 == 0:
                print(avg_loss / samples)

    parisVecter = W1[:, uniqWords.index('paris')].data.numpy()
    context_to_predict = parisVecter
    hidden = Variable(torch.from_numpy(context_to_predict)).float()
    a = torch.matmul(W2, hidden)
    probs = F.softmax(a, dim=0).data.numpy()
    for context, prob in zip(uniqWords, probs):
        print(f'{context}: {prob:.2f}')
示例#5
0
 def forward(self, hidden, encoder_outputs, src_len=None):
     '''
     :param hidden: 
         previous hidden state of the decoder, in shape (layers*directions,B,H)
     :param encoder_outputs:
         encoder outputs from Encoder, in shape (T,B,H)
     :param src_len:
         used for masking. NoneType or tensor in shape (B) indicating sequence length
     :return
         attention energies in shape (B,T)
     '''
     print(encoder_outputs.data.shape)
     max_len = encoder_outputs.size(0)
     this_batch_size = encoder_outputs.size(1)
     H = hidden.repeat(max_len,1,1).transpose(0,1)
     encoder_outputs = encoder_outputs.transpose(0,1) # [B*T*H]
     print(encoder_outputs.data.shape)
     attn_energies = self.score(H,encoder_outputs) # compute attention score
     
     if src_len is not None:
         mask = []
         for b in range(src_len.size(0)):
             mask.append([0] * src_len[b].item() + [1] * (encoder_outputs.size(1) - src_len[b].item()))
         mask = cuda_(torch.ByteTensor(mask).unsqueeze(1)) # [B,1,T]
         attn_energies = attn_energies.masked_fill(mask, -1e18)
     
     return F.softmax(attn_energies).unsqueeze(1) # normalize with softmax
示例#6
0
 def on_epoch_end(self, last_target, last_output, **kwargs):
     if len(self.output) > 0:
         output = torch.cat(self.output)
         target = torch.cat(self.target)
         preds = F.softmax(output, dim=1)
         metric = auroc_score(preds, target)
         print(f'AUC: {metric:.5f}')
示例#7
0
    def forward(self, outputs, target_sizes):
        """
        Perform the computation
        Parameters:
            outputs: raw outputs of the model
            target_sizes: tensor of dimension [batch_size x 2] containing the size of each images of the batch
                          For evaluation, this must be the original image size (before any data augmentation)
                          For visualization, this should be the image size after data augment, but before padding
        """
        out_logits, out_bbox = outputs['pred_logits'], outputs['pred_boxes']

        assert len(out_logits) == len(target_sizes)
        assert target_sizes.shape[1] == 2

        prob = F.softmax(out_logits, -1)
        scores, labels = prob[..., :-1].max(-1)

        # convert to [x0, y0, x1, y1] format
        # boxes = box_ops.box_cxcywh_to_xyxy(out_bbox)
        boxes = box_convert(out_bbox, in_fmt="cxcywh", out_fmt="xyxy")
        # and from relative [0, 1] to absolute [0, height] coordinates
        img_h, img_w = target_sizes.unbind(1)
        scale_fct = torch.stack([img_w, img_h, img_w, img_h], dim=1)
        boxes = boxes * scale_fct[:, None, :]

        results = [{
            'scores': s,
            'labels': l,
            'boxes': b
        } for s, l, b in zip(scores, labels, boxes)]

        return results
 def select_action(self, observation):
     state = T.Tensor([observation]).to(self.policy.device)
     probabilities = F.softmax(self.policy.forward(state[0]))
     action_probs = T.distributions.Categorical(probabilities)
     action = action_probs.sample()
     log_probs = action_probs.log_prob(action)
     self.action_memory.append(log_probs)
     return action.item()
示例#9
0
 def _forward_test(self, cls_logits, bbox_pred):
     if self.priors is None:
         self.priors = PriorBox(self.cfg)().to(bbox_pred.device)
     scores = F.softmax(cls_logits, dim=2)
     boxes = box_utils.convert_locations_to_boxes(
         bbox_pred, self.priors, self.cfg.MODEL.CENTER_VARIANCE,
         self.cfg.MODEL.SIZE_VARIANCE)
     boxes = box_utils.center_form_to_corner_form(boxes)
     detections = (scores, boxes)
     detections = self.post_processor(detections)
     return detections, {}
示例#10
0
    def forward(self, x, sampling=True):
        if self.network is not None:
            nn_output = self.network.forward(x)
        else:
            nn_output = x

        mean = F.softmax(self.mean_layer.forward(nn_output))
        if not sampling:
            return mean
        else:
            z = categorical(mean, temp=self.temp)
            return mean, z
示例#11
0
    def get_prediction(self, x_input, hard=True):
        n_imgs = x_input.shape[0]
        out_list = list()
        n_batches = int(np.ceil(n_imgs / 128))
        for i in range(n_batches):
            x = torch.tensor(x_input[i * 128:(i + 1) * 128]).float()
            x = lib.cuda(x)
            out = self.forward(x)
            out = torch.argmax(out, dim=1) if hard else F.softmax(out, dim=1)
            out_list.extend(out.data.cpu().numpy())

        return np.array(out_list)
示例#12
0
def train(epoch):
    for model in models:
        model.train()
    optimizer.zero_grad()

    global rate
    rate = min((epoch + 1) / epochs, 0.05)

    encoded_source = encode(source_data, "source")
    encoded_target = encode(target_data, "target")
    source_logits = cls_model(encoded_source)

    # use source classifier loss:
    cls_loss = loss_func(source_logits, source_data.y)

    for model in models:
        for name, param in model.named_parameters():
            if "weight" in name:
                cls_loss = cls_loss + param.mean() * 3e-3

    if use_UDAGCN:
        # use domain classifier loss:
        source_domain_preds = domain_model(encoded_source)
        target_domain_preds = domain_model(encoded_target)

        source_domain_cls_loss = loss_func(
            source_domain_preds,
            torch.zeros(source_domain_preds.size(0)).type(
                torch.LongTensor).to(device))
        target_domain_cls_loss = loss_func(
            target_domain_preds,
            torch.ones(target_domain_preds.size(0)).type(
                torch.LongTensor).to(device))
        loss_grl = source_domain_cls_loss + target_domain_cls_loss
        loss = cls_loss + loss_grl

        # use target classifier loss:
        target_logits = cls_model(encoded_target)
        target_probs = F.softmax(target_logits, dim=-1)
        target_probs = torch.clamp(target_probs, min=1e-9, max=1.0)

        loss_entropy = torch.mean(
            torch.sum(-target_probs * torch.log(target_probs), dim=-1))

        loss = loss + loss_entropy * (epoch / epochs * 0.01)

    else:
        loss = cls_loss

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
示例#13
0
    def scaled_dot_product(self, x, Q, K, V):
        ## bmm은 batch 단위 matmul이고, broadcasting이 지원되지 않는다
        ## 사실 matmul과 정확히 어떤 차이인지 잘 모르겠다
        tmp = torch.matmul(Q, K)
        tmp = torch.div(tmp, torch.sqrt(self.d_k))

        if self.mask:
            pass

        tmp = F.softmax(tmp)
        tmp = torch.matmul(tmp, V)

        return tmp
示例#14
0
 def log_rank_loss(self, y_pos, y_neg, temp=0):
     M = y_pos.size(0)
     N = y_neg.size(0)
     y_pos = self.gamma - y_pos
     y_neg = self.gamma - y_neg
     C = int(N / M)
     y_neg = y_neg.view(C, -1).transpose(0, 1)
     p = F.softmax(temp * y_neg)
     loss_pos = torch.sum(F.softplus(-1 * y_pos))
     loss_neg = torch.sum(p * F.softplus(y_neg))
     loss = (loss_pos + loss_neg) / 2 / M
     if self.gpu:
         loss = loss.cuda()
     return loss
示例#15
0
def attention(query, key, value, mask=None, dropout=None):
    "Compute 'Scaled Dot Product Attention'"
    d_k = query.size(-1)
    # noinspection PyUnresolvedReferences
    scores = torch.matmul(query, key.transpose(-2, -1)) \
             / math.sqrt(d_k)
    if mask is not None:
        scores = scores.masked_fill(mask == 0, -1e9)
    # noinspection PyUnresolvedReferences
    p_attn = F.softmax(scores, dim=-1)
    if dropout is not None:
        p_attn = dropout(p_attn)
    # noinspection PyUnresolvedReferences
    return torch.matmul(p_attn, value), p_attn
示例#16
0
    def eval_probs_on_grid(self, extent, res=400):
        """
        Evaluate the ensemble on a res x res grid spanning from [-extent, extent].

        :return: Numpy array of probabilities predicted by the model with
        shape [num_eval_points, num_classes, num_models]
        """
        xrange = (-extent, extent)
        yrange = (-extent, extent)

        xx, yy = get_grid(xrange, yrange, res)
        eval_points = torch.from_numpy(
            np.stack((xx.ravel(), yy.ravel()), axis=1))
        with torch.no_grad():
            probs = F.softmax(self(eval_points), dim=1).cpu().numpy()
        return probs
示例#17
0
    def forward(self, x_level_0, x_level_1, x_level_2):
        # Feature Resizing过程
        if self.level == 0:
            level_0_resized = x_level_0
            level_1_resized = self.stride_level_1(x_level_1)
            level_2_downsampled_inter = F.max_pool2d(x_level_2,
                                                     3,
                                                     stride=2,
                                                     padding=1)
            level_2_resized = self.stride_level_2(level_2_downsampled_inter)
        elif self.level == 1:
            level_0_compressed = self.compress_level_0(x_level_0)
            level_0_resized = F.interpolate(level_0_compressed,
                                            2,
                                            mode='nearest')
            level_1_resized = x_level_1
            level_2_resized = self.stride_level_2(x_level_2)
        elif self.level == 2:
            level_0_compressed = self.compress_level_0(x_level_0)
            level_0_resized = F.interpolate(level_0_compressed,
                                            4,
                                            mode='nearest')
            if self.dim[1] != self.dim[2]:
                level_1_compressed = self.compress_level_1(x_level_1)
                level_1_resized = F.interpolate(level_1_compressed,
                                                2,
                                                mode='nearest')
            else:
                level_1_resized = F.interpolate(x_level_1, 2, mode='nearest')
            level_2_resized = x_level_2

        # 融合权重也是来自于网络学习
        level_0_weight_v = self.weight_level_0(level_0_resized)
        level_1_weight_v = self.weight_level_1(level_1_resized)
        level_2_weight_v = self.weight_level_2(level_2_resized)
        levels_weight_v = torch.cat(
            (level_0_weight_v, level_1_weight_v, level_2_weight_v), 1)
        levels_weight = self.weight_levels(levels_weight_v)
        levels_weight = F.softmax(levels_weight, dim=1)  # alpha

        # 自适应融合
        fused_out_reduced = level_0_resized * levels_weight[:,0:1,:,:] +\
                            level_1_resized * levels_weight[:,1:2,:,:] +\
                            level_2_resized * levels_weight[:,2:,:,:]

        out = self.expand(fused_out_reduced)
        return out
示例#18
0
    def forward(self, output, target):
        B, C, H, W = output.size()
        out = output.permute(0, 2, 3,
                             1).contiguous().view(B, H * W * 5, 5 + 20)

        xy_pred = torch.sigmoid(out[:, :, 0:2])
        conf_pred = torch.sigmoid(out[:, :, 4:5])
        hw_pred = torch.exp(out[:, :, 2:4])
        class_score = out[:, :, 5:]
        class_pred = F.softmax(class_score, dim=-1)
        delta_pred = torch.cat([xy_pred, hw_pred], dim=-1)

        #
        output_var = (delta_pred, conf_pred, class_score)
        output_data = [e.data for e in output_var]
        #gt_boxes,gt_classes,num_boxes = target
        target_var = self.build_target(output_data, target, H, W)
        box_loss, iou_loss, class_loss = self.cal_loss(output_var, target_var)
        return box_loss, iou_loss, class_loss
示例#19
0
def model_test(test_loader, net, num_pos_classes, loss_type=[]):
    net.eval()
    with_cuda = True
    correct = 0
    for inputs, labels in test_loader:
        if with_cuda:
            inputs, labels = inputs.cuda(), labels.cuda()
        v_inputs = Variable(inputs)
        v_labels = Variable(labels)
        output = net(v_inputs)
        output_dim = output.data.shape[1]
        if output_dim != num_pos_classes:
            if 'max_out' in loss_type:
                '''
                deprecated. call shrink_M_to_1
                '''
                first_pos = output.index_select(
                    1,
                    Variable(torch.arange(0, num_pos_classes).long().cuda()))
                second_neg = output.index_select(
                    1,
                    Variable(
                        torch.arange(num_pos_classes,
                                     output_dim).long().cuda()))
                second_max_neg, _ = torch.max(second_neg, dim=1, keepdim=True)
                output = torch.cat((first_pos, second_max_neg), dim=1)
            else:
                output = F.softmax(output, dim=1)
                first_pos = output.index_select(
                    1,
                    Variable(torch.arange(0, num_pos_classes).long().cuda()))
                second_neg = output.index_select(
                    1,
                    Variable(
                        torch.arange(num_pos_classes,
                                     output_dim).long().cuda()))
                prob_neg = torch.sum(second_neg, 1, keepdim=True)
                output = torch.cat((first_pos, prob_neg), dim=1)

        pred_idx = output.data.max(1, keepdim=True)[1]
        correct += pred_idx.eq(labels.view_as(pred_idx)).long().cpu().sum()

    return 1. * correct / len(test_loader.dataset)
示例#20
0
def attention(query, key, value, mask=None, dropout=None):
    "Compute 'Scaled Dot Product Attention'"
    """
    The two most commonly used attention functions are additive attention,
    and dot-product(multiplicative) attention. Here we adopt the dot product one, 
    and applied a scaling factor. Additive attention computes the compatibility
    using a feed-forward network with a single hidden layer. While the two are similar
    in theoretical complexity, dot-product attention is much faster and more space-efficient 
    in practice, since it can be implemented using highly optimized matrix
    multiplication code
    """
    d_k = query.size(-1)
    scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(d_k)
    if mask is not None:
        scores = scores.masked_fill(mask == 0, -1e9)
    p_attn = F.softmax(scores, dim=-1)
    if dropout is not None:
        p_attn = dropout(p_attn)
    return torch.matmul(p_attn, value), p_attn
示例#21
0
文件: util.py 项目: gansem/Thesis
def top_k_top_p_filtering(logits,
                          top_k=100,
                          top_p=0.95,
                          filter_value=-float('Inf')):
    """ Filter a distribution of logits using top-k and/or nucleus (top-p) filtering
        Args:
            logits: logits distribution shape (vocabulary size)
            top_k > 0: keep only top k tokens with highest probability (top-k filtering).
            top_p > 0.0: keep the top tokens with cumulative probability >= top_p (nucleus filtering).
                Nucleus filtering is described in Holtzman et al. (http://arxiv.org/abs/1904.09751)
        From: https://gist.github.com/thomwolf/1a5a29f6962089e871b94cbd09daf317
    """
    top_k = min(top_k, logits.size(-1))  # Safety check
    if top_k > 0:
        # Remove all tokens with a probability less than the last token of the top-k
        indices_to_remove = logits < torch.topk(logits, top_k)[0][..., -1,
                                                                  None]
        logits[indices_to_remove] = filter_value

    if top_p > 0.0:
        sorted_logits, sorted_indices = torch.sort(logits, descending=True)
        cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1),
                                        dim=-1)

        # Remove tokens with cumulative probability above the threshold
        sorted_indices_to_remove = cumulative_probs > top_p
        # Shift the indices to the right to keep also the first token above the threshold
        sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[
            ..., :-1].clone()
        sorted_indices_to_remove[..., 0] = 0

        # scatter sorted tensors to original indexing
        indices_to_remove = sorted_indices_to_remove.scatter(
            dim=1, index=sorted_indices, src=sorted_indices_to_remove)
        logits[indices_to_remove] = filter_value

    return logits
示例#22
0
    def activation(self, x): return (ToXYXY(x[0]), F.softmax(x[1], dim=-1))

    def decodes(self, x, pad=True):
示例#23
0
 def predict(self, x):
     return torch.mean(F.softmax(self.forward(x), dim=1), dim=2)
示例#24
0
                                              state,
                                              new_episode=True)
 state = stacked_state
 log_probs = []
 values = []
 advantages = []
 rewards = []
 entropys = []
 critics = []
 actors = []
 overall_entropy = 0
 for t in count():
     state = np.reshape(state, (1, 4, 84, 84))
     policy, value = actor_critic.forward(
         torch.from_numpy(state).float().to(device))
     probs = Categorical(F.softmax(policy, dim=1))
     # value = critic.forward(torch.from_numpy(state).float().to(device))
     action = probs.sample()
     log_prob = probs.log_prob(action)
     entropy = probs.entropy()
     log_probs.append(log_prob)
     values.append(value)
     entropys.append(entropy)
     # print("entropy {} and probs {}".format(entropy, probs))
     overall_entropy += entropy.item()
     # step in environment
     next_state, reward, done, _ = env.step(action)
     stacked_next_state, stacked_frames = stack_frames(
         stacked_frames, next_state, new_episode=False)
     next_state = stacked_next_state
     next_state = next_state.reshape((1, 4, 84, 84))
示例#25
0
    def forward(self, pred, target):
        '''
        pred should be the linear output. softmax will be calculated here
        '''
        batch_size = pred.data.size(0)
        pred = pred.view(batch_size, self.M + self._num_proto * self.N)
        if self._num_proto > 1:
            if self._multi_policy_proto == 'max_softmax':
                first = pred[:, :(self.N * self._num_proto)].contiguous().view(
                    batch_size, self.N, self._num_proto)
                first_max, _ = torch.max(first, dim=2)
                second = pred[:, (self.N * self._num_proto):]
                pred = torch.cat((first_max, second), dim=1)
                prediction = F.softmax(pred, dim=1)
            elif self._multi_policy_proto == 'softmax_sum':
                prediction = F.softmax(pred, dim=1)
                first = prediction[:, :(self.N *
                                        self._num_proto)].contiguous().view(
                                            batch_size, self.N,
                                            self._num_proto)
                first_sum = torch.sum(first, dim=2)
                second = prediction[:, (self.N * self._num_proto):]
                prediction = torch.cat((first_max, second), dim=1)
        else:
            prediction = F.softmax(pred, dim=1)

        loss = 0

        # cross entropy loss
        loss_ce = 0
        if 'cross_entropy' in self.loss_type:
            prob_N = prediction.index_select(
                1,
                torch.autograd.Variable(torch.arange(0, self.N).long().cuda()))
            prob_M = prediction.index_select(
                1,
                torch.autograd.Variable(
                    torch.arange(self.N, self.N + self.M).long().cuda()))
            prob_sM = torch.sum(prob_M, 1, keepdim=True)
            prob_N1 = torch.cat((prob_N, prob_sM), dim=1)
            log_prob_N1 = torch.log(prob_N1 + self.eps)
            loss_ce = F.nll_loss(log_prob_N1, target)
            loss += loss_ce * self.loss_type.get('cross_entropy', 1)

        # entropy loss
        loss_en = 0
        if 'entropy_loss' in self.loss_type or \
                'uniform_loss' in self.loss_type:
            negative_prob_M = prob_M[(
                target.data == self.N).nonzero().squeeze(1), :]
            norm_neg_prob_M = negative_prob_M / (
                torch.sum(negative_prob_M, dim=1) + self.eps).view(
                    -1, 1).expand_as(negative_prob_M)

        if 'entropy_loss' in self.loss_type:
            #loss_en = - torch.mean(torch.sum(norm_neg_prob_M * torch.log(norm_neg_prob_M+
            #self.eps), dim=1))
            loss_en = -torch.mean(
                torch.sum(prediction * torch.log(prediction + self.eps),
                          dim=1))
            loss += loss_en * self.loss_type.get('entropy_loss', 1)

        # loss to make sure all
        loss_uniform = 0
        if 'uniform_loss' in self.loss_type:
            avg_norm_neg_prob_M = torch.mean(norm_neg_prob_M, dim=0)
            loss_uniform = -torch.mean(
                torch.log(avg_norm_neg_prob_M + self.eps)) - Variable(
                    torch.log(torch.FloatTensor([self.M]).cuda()))
            #loss_uniform *= Variable(torch.FloatTensor([0.001]).cuda())
            loss += loss_uniform * self.loss_type.get('uniform_loss', 1)
            if (self._iter % 100) == 0:
                logging.info(
                    'loss ce = {}; loss en = {}; loss uniform = {}'.format(
                        loss_ce.data.cpu()[0],
                        loss_en.data.cpu()[0],
                        loss_uniform.data.cpu()[0]))

        if 'max_out' in self.loss_type:
            pred_N = pred.index_select(
                1,
                torch.autograd.Variable(torch.arange(0, self.N).long().cuda()))
            pred_M = pred.index_select(
                1,
                torch.autograd.Variable(
                    torch.arange(self.N, self.N + self.M).long().cuda()))
            pred_maxM, _ = torch.max(pred_M, dim=1, keepdim=True)
            pred_NmaxM = torch.cat((pred_N, pred_maxM), dim=1)
            loss += self._ce(pred_NmaxM, target)

        self._iter = self._iter + 1
        return loss
示例#26
0
print(z.grad_fn)

x = x.requires_grad_()
y = y.requires_grad_()

z = x + y

print(z)
print(z.grad_fn)
print(z.requires_grad)

print("===========")

new_z = z.detach()

print(new_z)

print(new_z.grad_fn)

data = torch.randn(5)
print(data)

print(F.softmax(data, dim=0))
print(F.softmax(data, dim=0).sum())

print(F.softmax(data, dim=0).sum())

print(F.log_softmax(data, dim=0))
1
示例#27
0
 def forward(self, inputs):
     stacked = torch.stack(inputs, dim=1)
     weights = F.softmax(self.dense_weight(stacked), dim=1)
     outputs = torch.sum(stacked * weights, dim=1)
     return outputs
示例#28
0
 def forward(self, x):
     x = F.relu(self.hidden1(x))
     x = F.relu(self.hidden2(x))
     x = F.relu(self.hidden3(x))
     return F.softmax(self.hidden4(x))