示例#1
0
    def forward(self, input):
        x = self.model(input)

        gap = adaptive_pool2d(x, 1, pool_type='avg')
        gap_logit = self.gap_fc(reshape(gap, shape=[x.shape[0], -1]))
        gap_weight = list(self.gap_fc.parameters())[0]
        gap_weight = transpose(gap_weight, perm=[1, 0])
        gap = x * unsqueeze(unsqueeze(gap_weight, 2), 3)

        gmp = adaptive_pool2d(x, 1, pool_type='max')
        gmp_logit = self.gmp_fc(reshape(gmp, shape=[x.shape[0], -1]))
        gmp_weight = list(self.gmp_fc.parameters())[0]
        gmp_weight = transpose(gmp_weight, perm=[1, 0])
        gmp = x * unsqueeze(unsqueeze(gmp_weight, 2), 3)

        cam_logit = concat([gap_logit, gmp_logit], 1)
        x = concat([gap, gmp], 1)
        x = self.leaky_relu(self.conv1x1(x))

        heatmap = reduce_sum(x, dim=1, keep_dim=True)

        x = self.pad(x)
        out = self.conv(x)

        return out, cam_logit, heatmap
示例#2
0
def masks_to_boxes(masks):
    """
    Compute the bounding boxes around the provided masks

    The masks should be in format [N, H, W] where N is the number
    of masks, (H, W) are the spatial dimensions.

    Returns a [N, 4] tensors, with the boxes in xyxy format
    """
    if np.sum(masks.shape) == 0:
        return dg.to_variable(np.zeros((0, 4)))

    h, w = masks.shape[-2:]
    y = dg.to_variable(np.arange(0, h, 1, dtype="float32"))
    x = dg.to_variable(np.arange(0, w, 1, dtype="float32"))
    y, x = T.meshgrid([y, x])  # [h, w]

    x_mask = (masks * L.unsqueeze(x, [0]))  # [N, H, W]
    x_max = L.reduce_max(L.flatten(x_mask, axis=1), dim=-1)
    non_mask = dg.to_variable(~masks.numpy())
    x_mask[non_mask] = 1e8
    x_min = L.reduce_min(L.flatten(x_mask, axis=1), dim=-1)

    y_mask = (masks * L.unsqueeze(y, [0]))  # [N, H, W]
    y_max = L.reduce_max(L.flatten(y_mask, axis=1), dim=-1)
    y_mask[non_mask] = 1e8
    y_min = L.reduce_min(L.flatten(y_mask, axis=1), dim=-1)

    return L.stack([x_min, y_min, x_max, y_max], 1)
示例#3
0
    def forward(self, input):
        x = self.DownBlock(input)

        gap = L.adaptive_pool2d(x, 1, pool_type='avg')
        gap_logit = self.gap_fc(L.reshape(gap, (x.shape[0], -1)))
        gap_weight = self.gap_fc.weight
        gap = x * L.unsqueeze(gap_weight, (2, 3))

        gmp = L.adaptive_pool2d(x, 1, pool_type='max')
        gmp_logit = self.gmp_fc(L.reshape(gmp, (x.shape[0], -1)))
        gmp_weight = self.gmp_fc.weight
        gmp = x * L.unsqueeze(gmp_weight, (2, 3))

        cam_logit = L.concat([gap_logit, gmp_logit], 1)
        x = L.concat([gap, gmp], 1)
        x = self.relu(self.conv1x1(x))

        heatmap = L.reduce_sum(x, dim=1, keep_dim=True)

        if self.light:
            x_ = L.adaptive_pool2d(x, 1, pool_type='avg')
            x_ = self.FC(L.reshape(x_, (x_.shape[0], -1)))
        else:
            x_ = self.FC(L.reshape(x, (x.shape[0], -1)))
        gamma, beta = self.gamma(x_), self.beta(x_)

        for i in range(self.n_blocks):
            x = getattr(self, 'UpBlock1_' + str(i + 1))(x, gamma, beta)
        out = self.UpBlock2(x)

        return out, cam_logit, heatmap
示例#4
0
def generalied_box_iou(boxes1, boxes2):
    """
    Generalized IoU from https://giou.stanford.edu/

    The boxes should be in [x0, y0, x1, y1] format

    Returns a [N, M] pairwise matrix, where N = len(boxes1)
    and M = len(boxes2)
    """
    # degenerate boxes gives inf / nan results
    # so do an early check
    assert L.reduce_all(boxes1[:, 2:] >= boxes1[:, :2])
    assert L.reduce_all(boxes2[:, 2:] >= boxes2[:, :2])
    iou, union = box_iou(boxes1, boxes2)

    N, M = boxes1.shape[0], boxes2.shape[0]
    boxes1 = L.unsqueeze(boxes1, axes=[1])  # [N, 1, 4]
    boxes1 = L.expand(boxes1, [1, M, 1])  # [N, M, 4]
    boxes2 = L.unsqueeze(boxes2, axes=[0])  # [1, M, 4]
    boxes2 = L.expand(boxes2, [N, 1, 1])  # [N, M, 4]
    lt = L.elementwise_min(boxes1[:, :, :2], boxes2[:, :, :2])  # [N, M, 2]
    rb = L.elementwise_max(boxes1[:, :, 2:], boxes2[:, :, 2:])  # [N, M, 2]

    wh = L.clip(rb - lt, min=0, max=1e8)  # [N, M, 2]
    area = wh[:, :, 0] * wh[:, :, 1] + 1e-4  # prevent devided by zero

    return iou - (area - union) / area
    def get_embedding(self, num_embeddings,
                      embedding_dim, padding_idx=None):
        """
        Build sinusoidal embeddings.
        This matches the implementation in tensor2tensor,
        but differs slightly from the description
        in Section 3.5 of "Attention Is All You Need".
        """
        half_dim = embedding_dim // 2
        emb = layers.log(float(10000)) / (half_dim - -1)
        emb = layers.exp(layers.arange(
            start=0, end=half_dim, dtype='float32') * -emb)

        # [num_embeddings, embedding_dim // 2]
        emb = layers.unsqueeze(layers.arange(-num_embeddings // 2,
                                             num_embeddings // 2, dtype='float32'), axis=1) *\
            layers.unsqueeze(emb, axis=0)

        emb = layers.concat([layers.sin(emb), layers.cos(emb)], dim=1)
        # [num_embeddings, embedding_dim]
        if embedding_dim % 2 == 1:
            emb = layers.concat(
                [emb, layers.zeros(shape=(num_embeddings, 1))], dim=1)
        if padding_idx is not None:
            emb[paddings_idx, :] = 0
        self.origin_shift = num_embeddings // 2
        return emb
示例#6
0
def no_nms(bboxes,
           scores,
           score_threshold,
           keep_top_k):
    scores = L.transpose(scores, [1, 0])
    inds = L.where(scores > score_threshold)
    if len(inds) == 0:
        return L.zeros((0, 6), 'float32') - 1.0

    cate_scores = L.gather_nd(scores, inds)
    cate_labels = inds[:, 1]
    bboxes = L.gather(bboxes, inds[:, 0])

    # sort and keep top keep_top_k
    _, sort_inds = L.argsort(cate_scores, descending=True)
    if keep_top_k > 0 and len(sort_inds) > keep_top_k:
        sort_inds = sort_inds[:keep_top_k]
    bboxes = L.gather(bboxes, sort_inds)
    cate_scores = L.gather(cate_scores, sort_inds)
    cate_labels = L.gather(cate_labels, sort_inds)

    cate_scores = L.unsqueeze(cate_scores, 1)
    cate_labels = L.unsqueeze(cate_labels, 1)
    cate_labels = L.cast(cate_labels, 'float32')
    pred = L.concat([cate_labels, cate_scores, bboxes], 1)

    return pred
示例#7
0
    def forward(self, input, gamma, beta):

        in_mean, in_var = reduce_mean(input, dim=[2, 3],
                                      keep_dim=True), my_var(input,
                                                             dim=[2, 3],
                                                             keep_dim=True)

        out_in = (input - in_mean) / sqrt(in_var + self.eps)

        ln_mean, ln_var = reduce_mean(input, dim=[1, 2, 3],
                                      keep_dim=True), my_var(input,
                                                             dim=[1, 2, 3],
                                                             keep_dim=True)

        out_ln = (input - ln_mean) / sqrt(ln_var + self.eps)

        ex_rho = expand(self.rho, (input.shape[0], 1, 1, 1))

        out = ex_rho * out_in + (1 - ex_rho) * out_ln

        gamma = unsqueeze(gamma, axes=2)
        gamma = unsqueeze(gamma, axes=3)
        beta = unsqueeze(beta, axes=2)
        beta = unsqueeze(beta, axes=3)
        out = out * gamma + beta

        return out
示例#8
0
    def forward(self, *args, **kwargs):
        """
        Args:
            start_pos (optional, `Variable` of shape [batch_size]): 
                token index of start of answer span in `context`
            end_pos (optional, `Variable` of shape [batch_size]): 
                token index of end of answer span in `context`
        Returns:
            loss (`Variable` of shape []):
                Cross entropy loss mean over batch and time, ignore positions where label == -100
                if labels not set, returns None
            start_logits (`Variable` of shape [batch_size, hidden_size]):
                output logits of start position, use argmax(start_logit) to get start index
            end_logits (`Variable` of shape [batch_size, hidden_size]):
                output logits of end position, use argmax(end_logit) to get end index
        """

        start_pos = kwargs.pop('start_pos', None)
        end_pos = kwargs.pop('end_pos', None)
        pooled, encoded = super(ErnieModelForQuestionAnswering,
                                self).forward(*args, **kwargs)
        encoded = self.dropout(encoded)
        encoded = self.classifier(encoded)
        start_logit, end_logits = L.unstack(encoded, axis=-1)
        if start_pos is not None and end_pos is not None:
            if len(start_pos.shape) == 1:
                start_pos = L.unsqueeze(start_pos, axes=[-1])
            if len(end_pos.shape) == 1:
                end_pos = L.unsqueeze(end_pos, axes=[-1])
            start_loss = L.softmax_with_cross_entropy(start_logit, start_pos)
            end_loss = L.softmax_with_cross_entropy(end_logits, end_pos)
            loss = (L.reduce_mean(start_loss) + L.reduce_mean(end_loss)) / 2.
        else:
            loss = None
        return loss, start_logit, end_logits
示例#9
0
    def forward(self, input):
        x = self.DownBlock(input)
        print('x: '+str(x.shape))
        gap = layers.adaptive_pool2d(x, 1, pool_type='avg')
        gap_logit = self.gap_fc(layers.reshape(gap, [x.shape[0], -1]))
        gap_weight = list(self.gap_fc.parameters())[0]
        gap = x * layers.unsqueeze(layers.unsqueeze(gap_weight, 2), 3)

        gmp = layers.adaptive_pool2d(x, 1, pool_type='max')
        gmp_logit = self.gmp_fc(layers.reshape(gmp, [x.shape[0], -1]))
        gmp_weight = list(self.gmp_fc.parameters())[0]
        gmp = x * layers.unsqueeze(layers.unsqueeze(gmp_weight, 2), 3)
        
        cam_logit = layers.concat([gap_logit, gmp_logit], 1)
        x = layers.concat([gap, gmp], 1)
        x = self.relu(self.conv1x1(x))

        heatmap = layers.reduce_sum(x, dim=1, keepdim=True)

        if self.light:
            x_ = layers.adaptive_pool2d(x, 1, pool_type='avg')
            x_ = self.FC(layers.reshape(x_, [x_.shape[0], -1]))
        else:
            x_ = self.FC(layers.reshape(x, [x.shape[0], -1]))
        gamma, beta = self.gamma(x_), self.beta(x_)


        for i in range(self.n_blocks):
            x = getattr(self, 'UpBlock1_' + str(i+1))(x, gamma, beta)
        out = self.UpBlock2(x)

        return out, cam_logit, heatmap
示例#10
0
    def forward(self, input):
        x = self.DownBlock(input)

        # gap = torch.nn.functional.adaptive_avg_pool2d(x, 1)
        # gap_logit = self.gap_fc(gap.view(x.shape[0], -1))
        # gap_weight = list(self.gap_fc.parameters())[0]
        # gap = x * gap_weight.unsqueeze(2).unsqueeze(3)
        # adaptive_avg_pool2d_1 = dygraph.Pool2D(pool_size=x.shape[-2:], pool_type='avg') # pool into 1x1 feature map
        # gap = adaptive_avg_pool2d_1(x)
        # print('x', x.shape)
        gap = layers.adaptive_pool2d(x, 1, pool_type='avg')
        # print('gap', gap.shape)
        gap_logit = self.gap_fc(layers.reshape(gap, shape=(x.shape[0], -1)))
        # print('gap_logit', gap_logit.shape)
        gap_weight = self.gap_fc.parameters()[0]
        gap_weight = layers.reshape(gap_weight, shape=(1, -1))
        # print('gap_weight', gap_weight.shape)
        gap = x * layers.unsqueeze(layers.unsqueeze(gap_weight, 2), 3)
        # print('gap', gap.shape)

        # gmp = torch.nn.functional.adaptive_max_pool2d(x, 1)
        # gmp_logit = self.gmp_fc(gmp.view(x.shape[0], -1))
        # gmp_weight = list(self.gmp_fc.parameters())[0]
        # gmp = x * gmp_weight.unsqueeze(2).unsqueeze(3)
        # adaptive_max_pool2d_1 = dygraph.Pool2D(pool_size=x.shape[-2:], pool_type='max') # pool into 1x1 feature map
        # gmp = adaptive_max_pool2d_1(x)
        gmp = layers.adaptive_pool2d(x, 1, pool_type='max')
        gmp_logit = self.gmp_fc(layers.reshape(gmp, shape=(x.shape[0], -1)))
        gmp_weight = self.gmp_fc.parameters()[0]
        gmp_weight = layers.reshape(gmp_weight, shape=(1, -1))
        gmp = x * layers.unsqueeze(layers.unsqueeze(gmp_weight, 2), 3)

        # cam_logit = torch.cat([gap_logit, gmp_logit], 1)
        # x = torch.cat([gap, gmp], 1)
        # x = self.relu(self.conv1x1(x))
        cam_logit = layers.concat([gap_logit, gmp_logit], 1)
        x = layers.concat([gap, gmp], 1)
        x = self.relu(self.conv1x1(x))

        # heatmap = torch.sum(x, dim=1, keepdim=True)
        heatmap = layers.reduce_sum(x, dim=1, keep_dim=True)

        if self.light:
            # x_ = torch.nn.functional.adaptive_avg_pool2d(x, 1)
            # x_ = self.FC(x_.view(x_.shape[0], -1))
            # adaptive_avg_pool2d_1 = dygraph.Pool2D(pool_size=x.shape[-2:], pool_type='avg')
            # x_ = adaptive_avg_pool2d_1(x)
            x_ = layers.adaptive_pool2d(x, 1, pool_type='avg')
            x_ = self.FC(layers.reshape(x_, shape=(x_.shape[0], -1)))
        else:
            # x_ = self.FC(x.view(x.shape[0], -1))
            x_ = self.FC(layers.reshape(x, shape=(x.shape[0], -1)))

        gamma, beta = self.gamma(x_), self.beta(x_)

        for i in range(self.n_blocks):
            x = getattr(self, 'UpBlock1_' + str(i + 1))(x, gamma, beta)
        out = self.UpBlock2(x)

        return out, cam_logit, heatmap
def decoder_step(gru_unit,
                 cue_gru_unit,
                 step_in,
                 hidden,
                 input_size,
                 hidden_size,
                 memory,
                 memory_mask,
                 knowledge,
                 mask=None):
    """ decoder step """
    # get attention out
    # get hidden top layers
    top_hidden = layers.slice(hidden, axes=[0], starts=[0], ends=[1])
    top_hidden = layers.squeeze(top_hidden, axes=[0])
    top_hidden = layers.unsqueeze(top_hidden, axes=[1])

    weight_memory, attn = dot_attention(top_hidden, memory, memory_mask)

    step_in = layers.unsqueeze(step_in, axes=[1])
    rnn_input_list = [step_in, weight_memory]
    if weight_memory.shape[0] == -1:
        knowledge_1 = layers.reshape(knowledge, shape=weight_memory.shape)
    else:
        knowledge_1 = knowledge
    cue_input_list = [knowledge_1, weight_memory]
    output_list = [weight_memory]

    rnn_input = layers.concat(rnn_input_list, axis=2)

    rnn_input = layers.squeeze(rnn_input, axes=[1])
    rnn_output, rnn_last_hidden = gru_unit(rnn_input, hidden, mask)

    cue_input = layers.concat(cue_input_list, axis=2)
    cue_input = layers.squeeze(cue_input, axes=[1])
    cue_rnn_out, cue_rnn_last_hidden = cue_gru_unit(cue_input, hidden, mask)

    h_y = layers.tanh(
        fc(rnn_last_hidden, hidden_size, hidden_size, name="dec_fc1"))
    h_cue = layers.tanh(
        fc(cue_rnn_last_hidden, hidden_size, hidden_size, name="dec_fc2"))

    concate_y_cue = layers.concat([h_y, h_cue], axis=2)
    k = layers.sigmoid(fc(concate_y_cue, hidden_size * 2, 1, name='dec_fc3'))

    new_hidden = h_y * k - h_cue * (k - 1.0)

    new_hidden_tmp = layers.transpose(new_hidden, perm=[1, 0, 2])
    output_list.append(new_hidden_tmp)

    real_out = layers.concat(output_list, axis=2)

    if mask:
        mask_tmp = layers.unsqueeze(mask, axes=[0])
        new_hidden = layers.elementwise_mul((new_hidden - hidden),
                                            mask_tmp,
                                            axis=0)
        new_hidden += hidden

    return real_out, new_hidden
    def forward(self, x, y, **kargs):
        """
        Adaptive Normalization forward.

        Args:
            x (N x C1 x *): input, 
            y (N x C2): Conditional information.
        Returns:
            out (N x c1 x *): output
        """
        residual_dim = len(x.shape) - len(y.shape)
        if self.projection:
            if self.separate_projection:
                gamma = self.fc_gamma(y)
                beta = self.fc_beta(y)
                for _ in range(residual_dim):
                    gamma = L.unsqueeze(gamma, -1)
                    beta = L.unsqueeze(beta, -1)
            else:
                y = self.fc(x)
                for _ in range(residual_dim):
                    y = L.unsqueeze(y, -1)
                gamma, beta = L.split(y, num_or_sections=2, dim=1)
        else:
            for _ in range(residual_dim):
                y = L.unsqueeze(y, -1)
            gamma, beta = L.split(y, 2, 1)
        
        x = self.norm(x) if self.norm is not None else x
        out = x * (1 + gamma) + beta
        return out
示例#13
0
def get_enc_bias(source_inputs):
    """
        get_enc_bias
    """
    source_inputs = layers.cast(source_inputs, 'float32')
    emb_sum = layers.reduce_sum(layers.abs(source_inputs), dim=-1)
    zero = layers.fill_constant([1], 'float32', value=0) 
    bias = layers.cast(layers.equal(emb_sum, zero), 'float32') * -1e9
    return layers.unsqueeze(layers.unsqueeze(bias, axes=[1]), axes=[1])
示例#14
0
    def forward(self, input, gamma, beta):
        in_mean, in_var = reduce_mean(input, dim=[2, 3], keepdim=True), var(input, axis=[2, 3], keepdim=True)
        out_in = (input - in_mean) / layers.sqrt(in_var + self.eps)
        ln_mean, ln_var = reduce_mean(input, dim=[1, 2, 3], keepdim=True), var(input, axis=[1, 2, 3], keepdim=True)
        out_ln = (input - ln_mean) / layers.sqrt(ln_var + self.eps)
        out = layers.expand(self.rho, [input.shape[0], -1, -1, -1]) * out_in + (1-layers.expand[input.shape[0], -1, -1, -1]) * out_in
        out = out * layers.unsqueeze(layers.unsqueeze(gamma, 2), 3) + layers.unsqueeze(layers.unsqueeze(beta, 2), 3)

        return out
示例#15
0
        def erniesage_v3_aggregator(gw, feature, hidden_size, act, initializer, learning_rate, name):
            msg = gw.send(copy_send, nfeat_list=[("h", feature)])
            neigh_feature = gw.recv(msg, ernie_recv)
            neigh_feature = L.cast(L.unsqueeze(neigh_feature, [-1]), "int64")

            feature = L.unsqueeze(feature, [-1])
            cls = L.fill_constant_batch_size_like(feature, [-1, 1, 1], "int64", 1)
            term_ids = L.concat([cls, feature[:, :-1], neigh_feature], 1)
            term_ids.stop_gradient = True
            return term_ids
示例#16
0
    def forward(self,
                tgt,
                memory,
                tgt_mask=None,
                memory_mask=None,
                pos=None,
                query_pos=None):
        output = tgt

        intermediate = []

        assert tgt_mask is None, "Not implement compute tgt_mask's attn_mask."

        if memory_mask is not None:
            bs, tgt_length = tgt.shape[:2]
            memory_length = memory.shape[1]
            attn_mask = L.zeros([bs, tgt_length, memory_length],
                                dtype="float32")
            memory_mask = L.expand(
                L.unsqueeze(memory_mask, [1]),
                (1, tgt_length, 1))  # [bs, tgt_length, memory_length]
            attn_mask = attn_mask.numpy()
            memory_mask = memory_mask.numpy()
            attn_mask[memory_mask] = -1e8
            attn_mask = dg.to_variable(attn_mask)
            attn_mask = L.expand(L.unsqueeze(attn_mask, [1]),
                                 (1, self.nhead, 1,
                                  1))  # [bs, nhead, tgt_length, memory_length]
            memory_mask = attn_mask

        attention_weight = []
        for layer in self.layers:
            output, self_attn_weights, multihead_attn_weights = layer(
                output,
                memory,
                tgt_mask=tgt_mask,
                memory_mask=memory_mask,
                pos=pos,
                query_pos=query_pos)

            attention_weight.append(
                (self_attn_weights, multihead_attn_weights))
            if self.return_intermediate:
                intermediate.append(self.norm(output))

        if self.norm is not None:
            output = self.norm(output)
            if self.return_intermediate:
                intermediate.pop()
                intermediate.append(output)

        if self.return_intermediate:
            return L.stack(intermediate), attention_weight

        return L.unsqueeze(output, [0]), attention_weight
示例#17
0
    def forward(self, features):
        src_ids, sent_ids = features
        dtype = 'float16' if self.hparam['fp16'] else 'float32'
        zero = L.fill_constant([1], dtype='int64', value=0)
        input_mask = L.cast(L.logical_not(L.equal(src_ids, zero)), dtype) # assume pad id == 0
        #input_mask = L.unsqueeze(input_mask, axes=[2])
        d_shape = L.shape(src_ids)
        seqlen = d_shape[1]
        batch_size = d_shape[0]
        pos_ids = L.unsqueeze(L.range(0, seqlen, 1, dtype='int32'), axes=[0])
        pos_ids = L.expand(pos_ids, [batch_size, 1])
        pos_ids = L.unsqueeze(pos_ids, axes=[2])
        pos_ids = L.cast(pos_ids, 'int64')
        pos_ids.stop_gradient = True
        input_mask.stop_gradient = True
        task_ids = L.zeros_like(src_ids) + self.hparam.task_id #this shit wont use at the moment
        task_ids.stop_gradient = True

        bert = ErnieModel(
            src_ids=src_ids,
            position_ids=pos_ids,
            sentence_ids=sent_ids,
            task_ids=task_ids,
            input_mask=input_mask,
            config=self.hparam,
            use_fp16=self.hparam['fp16']
        )

        cls_feats = bert.get_pooled_output()

        cls_feats = L.dropout(
            x=cls_feats,
            dropout_prob=0.1,
            dropout_implementation="upscale_in_train"
        )

        logits = L.fc(
            input=cls_feats,
            size=self.hparam['num_label'],
            param_attr=F.ParamAttr(
                name="cls_out_w",
                initializer=F.initializer.TruncatedNormal(scale=0.02)),
            bias_attr=F.ParamAttr(
                name="cls_out_b", initializer=F.initializer.Constant(0.))
        )

        propeller.summary.histogram('pred', logits)

        if self.mode is propeller.RunMode.PREDICT:
            probs = L.softmax(logits)
            return probs
        else:
            return logits
示例#18
0
    def forward(self, q, k, v, lengths, speaker_embed, start_index, 
                force_monotonic=False, prev_coeffs=None, window=None):
        # add position encoding as an inductive bias 
        if self.has_bias: # multi-speaker model
            omega_q = 2 * F.sigmoid(
                F.squeeze(self.q_pos_affine(speaker_embed), axes=[-1]))
            omega_k = 2 * self.omega_initial * F.sigmoid(F.squeeze(
                self.k_pos_affine(speaker_embed), axes=[-1]))
        else: # single-speaker case
            batch_size = q.shape[0]
            omega_q = F.ones((batch_size, ), dtype="float32")
            omega_k = F.ones((batch_size, ), dtype="float32") * self.omega_default
        q += self.position_encoding_weight * positional_encoding(q, start_index, omega_q)
        k += self.position_encoding_weight * positional_encoding(k, 0, omega_k)

        q, k, v = self.q_affine(q), self.k_affine(k), self.v_affine(v)
        activations = F.matmul(q, k, transpose_y=True)
        activations /= np.sqrt(self.attention_dim)

        if self.training:
            # mask the <pad> parts from the encoder
            mask = F.sequence_mask(lengths, dtype="float32")
            attn_bias = F.scale(1. - mask, -1000)
            activations += F.unsqueeze(attn_bias, [1])
        elif force_monotonic:
            assert window is not None
            backward_step, forward_step = window
            T_enc = k.shape[1]
            batch_size, T_dec, _ = q.shape

            # actually T_dec = 1 here
            alpha = F.fill_constant((batch_size, T_dec), value=0, dtype="int64") \
                   if prev_coeffs is None \
                   else F.argmax(prev_coeffs, axis=-1)
            backward = F.sequence_mask(alpha - backward_step, maxlen=T_enc, dtype="bool")
            forward = F.sequence_mask(alpha + forward_step, maxlen=T_enc, dtype="bool")
            mask = F.cast(F.logical_xor(backward, forward), "float32")
            # print("mask's shape:", mask.shape)
            attn_bias = F.scale(1. - mask, -1000)
            activations += attn_bias

        # softmax
        coefficients = F.softmax(activations, axis=-1)
        # context vector
        coefficients = F.dropout(coefficients, 1. - self.keep_prob,
                                 dropout_implementation='upscale_in_train')
        contexts = F.matmul(coefficients, v)
        # context normalization
        enc_lengths = F.cast(F.unsqueeze(lengths, axes=[1, 2]), "float32")
        contexts *= F.sqrt(enc_lengths)
        # out affine
        contexts = self.out_affine(contexts)
        return contexts, coefficients
示例#19
0
    def forward(self, input):

        x = self.DownBlock(input)

        gap = adaptive_pool2d(x, pool_size=[1, 1], pool_type='avg')

        gap_ = reshape(x=gap, shape=(x.shape[0], -1))

        gap_logit = self.gap_fc(gap_)

        gap_weight = self.gap_fc.parameters()[0]
        gap_weight = transpose(gap_weight, perm=[1, 0])
        gap_weight = unsqueeze(gap_weight, axes=2)
        gap_weight = unsqueeze(gap_weight, axes=3)

        gap = x * gap_weight

        gmp = adaptive_pool2d(x, pool_size=[1, 1], pool_type='max')

        gmp_ = reshape(x=gmp, shape=(x.shape[0], -1))

        gmp_logit = self.gmp_fc(gmp_)

        gmp_weight = self.gmp_fc.parameters()[0]
        gmp_weight = transpose(gmp_weight, perm=[1, 0])
        gmp_weight = unsqueeze(gmp_weight, axes=2)
        gmp_weight = unsqueeze(gmp_weight, axes=3)

        gmp = x * gmp_weight

        cam_logit = concat(input=[gap_logit, gmp_logit], axis=1)

        x = concat(input=[gap, gmp], axis=1)

        x = self.relu(self.conv1x1(x))

        heatmap = reduce_sum(x, dim=1, keep_dim=True)

        if self.light:
            x_ = adaptive_pool2d(x, pool_size=[1, 1], pool_type='avg')
            x_ = reshape(x=x_, shape=(x_.shape[0], -1))
            x_ = self.FC(x_)
        else:
            x_ = reshape(x, shape=(x.shape[0], -1))
            x_ = self.FC(x_)

        gamma, beta = self.gamma(x_), self.beta(x_)

        for i in range(self.n_blocks):
            x = getattr(self, 'UpBlock1_' + str(i + 1))(x, gamma, beta)
        out = self.UpBlock2(x)

        return out, cam_logit, heatmap
示例#20
0
def get_attention_mask(mask, nhead):
    # mask: [bs, L] -> attn_mask: [bs, nhead, L, L]
    bs, l = mask.shape
    row_mask = L.expand(L.unsqueeze(mask, [2]), (1, 1, l)) # [bs, L, L]
    col_mask = L.expand(L.unsqueeze(mask, [1]), (1, l, 1)) # [bs, L, L]
    mask = L.logical_or(row_mask, col_mask)
    attn_mask = L.zeros([bs, l, l], dtype="float32")
    attn_mask = attn_mask.numpy()
    mask = mask.numpy()
    attn_mask[mask] = -1e8
    attn_mask = dg.to_variable(attn_mask)
    attn_mask = L.expand(L.unsqueeze(attn_mask, [1]), (1, nhead, 1, 1)) # [bs, nhead, L1, L2]
    return attn_mask
示例#21
0
    def forward(self, input, gamma, beta):
        rho_ = L.clip(self.rho, min=0, max=1)
        in_mean = L.reduce_mean(input, dim=[2, 3], keep_dim=True)
        in_var = var(input, dim=[2, 3], keepdim=True)
        out_in = (input - in_mean) / L.sqrt(in_var + self.eps)
        ln_mean = L.reduce_mean(input, dim=[1, 2, 3], keep_dim=True)
        ln_var = var(input, dim=[1, 2, 3], keepdim=True)
        out_ln = (input - ln_mean) / L.sqrt(ln_var + self.eps)
        out = rho_ * out_in + (1 - rho_) * out_ln
        out = out * L.unsqueeze(gamma, axes=[2, 3]) + L.unsqueeze(beta,
                                                                  axes=[2, 3])

        return out
示例#22
0
def matrix_nms(bboxes,
               scores,
               score_threshold,
               post_threshold,
               nms_top_k,
               keep_top_k,
               use_gaussian=False,
               gaussian_sigma=2.):
    scores = L.transpose(scores, [1, 0])
    inds = L.where(scores > score_threshold)
    if len(inds) == 0:
        return L.zeros((0, 6), 'float32') - 1.0

    cate_scores = L.gather_nd(scores, inds)
    cate_labels = inds[:, 1]
    bboxes = L.gather(bboxes, inds[:, 0])

    # sort and keep top nms_top_k
    _, sort_inds = L.argsort(cate_scores, descending=True)
    if nms_top_k > 0 and len(sort_inds) > nms_top_k:
        sort_inds = sort_inds[:nms_top_k]
    bboxes = L.gather(bboxes, sort_inds)
    cate_scores = L.gather(cate_scores, sort_inds)
    cate_labels = L.gather(cate_labels, sort_inds)

    # Matrix NMS
    kernel = 'gaussian' if use_gaussian else 'linear'
    cate_scores = _matrix_nms(bboxes, cate_labels, cate_scores, kernel=kernel, sigma=gaussian_sigma)

    # filter.
    keep = L.where(cate_scores >= post_threshold)
    if len(keep) == 0:
        return L.zeros((0, 6), 'float32') - 1.0
    bboxes = L.gather(bboxes, keep)
    cate_scores = L.gather(cate_scores, keep)
    cate_labels = L.gather(cate_labels, keep)

    # sort and keep keep_top_k
    _, sort_inds = L.argsort(cate_scores, descending=True)
    if len(sort_inds) > keep_top_k:
        sort_inds = sort_inds[:keep_top_k]
    bboxes = L.gather(bboxes, sort_inds)
    cate_scores = L.gather(cate_scores, sort_inds)
    cate_labels = L.gather(cate_labels, sort_inds)

    cate_scores = L.unsqueeze(cate_scores, 1)
    cate_labels = L.unsqueeze(cate_labels, 1)
    cate_labels = L.cast(cate_labels, 'float32')
    pred = L.concat([cate_labels, cate_scores, bboxes], 1)

    return pred
示例#23
0
    def _decode(self,
                x,
                y,
                w,
                h,
                anchors,
                stride,
                scale_x_y,
                eps,
                is_gt=False):
        conv_shape = x.shape  # (8, 13, 13, 3)
        batch_size = conv_shape[0]
        n_grid = conv_shape[1]
        anchor_per_scale = conv_shape[3]

        _x = L.unsqueeze(x, 4)
        _y = L.unsqueeze(y, 4)
        conv_raw_dxdy = L.concat([_x, _y], -1)  # (8, 13, 13, 3, 2)
        _w = L.unsqueeze(w, 4)
        _h = L.unsqueeze(h, 4)
        conv_raw_dwdh = L.concat([_w, _h], -1)  # (8, 13, 13, 3, 2)

        rows = L.range(0, n_grid, 1, 'float32')
        cols = L.range(0, n_grid, 1, 'float32')
        rows = L.expand(L.reshape(rows, (1, -1, 1)), [n_grid, 1, 1])
        cols = L.expand(L.reshape(cols, (-1, 1, 1)), [1, n_grid, 1])
        offset = L.concat([rows, cols], axis=-1)
        offset = L.reshape(offset, (1, n_grid, n_grid, 1, 2))
        offset = L.expand(offset, [batch_size, 1, 1, anchor_per_scale, 1])

        if is_gt:
            decode_xy = (conv_raw_dxdy + offset) / n_grid
        else:
            if (abs(scale_x_y - 1.0) < eps):
                decode_xy = L.sigmoid(conv_raw_dxdy)
                decode_xy = (decode_xy + offset) / n_grid
            else:
                # Grid Sensitive
                decode_xy = scale_x_y * L.sigmoid(conv_raw_dxdy) - 0.5 * (
                    scale_x_y - 1.0)
                decode_xy = (decode_xy + offset) / n_grid
        anchor_t = fluid.layers.assign(np.copy(anchors).astype(np.float32))
        decode_wh = (L.exp(conv_raw_dwdh) * anchor_t) / (n_grid * stride)
        decode_xywh = L.concat([decode_xy, decode_wh], axis=-1)
        if is_gt:
            decode_xywh.stop_gradient = True

        return decode_xywh  # (8, 13, 13, 3, 4)
示例#24
0
def extract_valid_pose_labels(pose_map,
                              pose_type,
                              remove_face_labels,
                              do_remove=True):
    """
    Remove some labels (e.g. face regions) in the pose map if necessary.

    Args:
        pose_map (3D, 4D or 5D tensor): input pose map.
        pose_type (str): 'both' or 'open'
        remove_face_labels (bool): Whether to remove labels for the face region.
        do_remove (bool): Do remove face labels.
    
    Returns:
        pose_map (3D, 4D or 5D tensor): Output pose map.
    """
    if pose_map is None:
        return pose_map

    if type(pose_map) == list:
        return [
            extract_valid_pose_labels(p, pose_type, remove_face_labels,
                                      do_remove) for p in pose_map
        ]

    orig_dim = len(pose_map.shape)
    assert (orig_dim >= 3 and orig_dim <= 5)
    if orig_dim == 3:
        pose_map = L.unsqueeze(pose_map, axes=[0, 1])
    elif orig_dim == 4:
        pose_map = L.unsqueeze(pose_map, [0])

    if pose_type == 'open':
        # If input is only openpose, remove densepose part.
        pose_map = pose_map[:, :, 3:]
    elif remove_face_labels and do_remove:
        # Remove face part for densepose input.
        densepose, openpose = pose_map[:, :, :3], pose_map[:, :, 3:]
        face_mask = get_face_mask(pose_map[:, :, 2])
        face_mask = L.unsqueeze(face_mask, [2])
        pose_map = L.concat(
            [densepose * (1 - face_mask) - face_mask, openpose], axis=2)

    if orig_dim == 3:
        pose_map = pose_map[0, 0]
    elif orig_dim == 4:
        pose_map = pose_map[0]
    return pose_map
示例#25
0
 def test_sequence_unsqueeze(self):
     program = Program()
     with program_guard(program):
         x = layers.data(name='x', shape=[8, 2], dtype='float32')
         out = layers.unsqueeze(input=x, axes=[1])
         self.assertIsNotNone(out)
     print(str(program))
示例#26
0
    def add_input(self, x, condition=None):
        """compute the output distribution (represented by its parameters) for a step. It works similarily with the `forward` method but in a `step-in-step-out` fashion.

        Args:
            x (Variable): shape(B, T=1), dtype float32, a step of the input waveform.
            condition (Variable, optional): shape(B, C_cond, T=1), dtype float32, a step of the upsampled condition. Defaults to None.

        Returns:
            Variable: shape(B, T=1, C_output), dtype float32, the parameter of the output distributions.
        """
        # Causal Conv
        if self.loss_type == "softmax":
            x = F.clip(x, min=-1., max=0.99999)
            x = quantize(x, self.output_dim)
            x = self.embed(x)  # (B, T, C), T=1
        else:
            x = F.unsqueeze(x, axes=[-1])  # (B, T, 1), T=1
            x = self.embed(x)  # (B, T, C)
        x = F.transpose(x, perm=[0, 2, 1])

        # Residual & Skip-conenection & linears
        z = self.resnet.add_input(x, condition)
        z = F.transpose(z, [0, 2, 1])
        z = F.relu(self.proj2(F.relu(self.proj1(z))))  # (B, T, C)

        # Output
        y = self.proj3(z)
        return y
示例#27
0
    def forward(self, x, condition=None):
        """compute the output distribution (represented by its parameters).

        Args:
            x (Variable): shape(B, T), dtype float32, the input waveform.
            condition (Variable, optional): shape(B, C_cond, T), dtype float32, the upsampled condition. Defaults to None.

        Returns:
            Variable: shape(B, T, C_output), dtype float32, the parameter of the output distributions.
        """

        # Causal Conv
        if self.loss_type == "softmax":
            x = F.clip(x, min=-1., max=0.99999)
            x = quantize(x, self.output_dim)
            x = self.embed(x)  # (B, T, C)
        else:
            x = F.unsqueeze(x, axes=[-1])  # (B, T, 1)
            x = self.embed(x)  # (B, T, C)
        x = F.transpose(x, perm=[0, 2, 1])  # (B, C, T)

        # Residual & Skip-conenection & linears
        z = self.resnet(x, condition)

        z = F.transpose(z, [0, 2, 1])
        z = F.relu(self.proj2(F.relu(self.proj1(z))))

        y = self.proj3(z)
        return y
示例#28
0
        def erniesage_v2_aggregator(gw, feature, hidden_size, act, initializer,
                                    learning_rate, name):
            feature = L.unsqueeze(feature, [-1])
            msg = gw.send(ernie_send, nfeat_list=[("term_ids", feature)])
            neigh_feature = gw.recv(
                msg,
                lambda feat: F.layers.sequence_pool(feat, pool_type="sum"))

            term_ids = feature
            cls = L.fill_constant_batch_size_like(term_ids, [-1, 1, 1],
                                                  "int64", 1)
            term_ids = L.concat([cls, term_ids], 1)
            term_ids.stop_gradient = True
            ernie = ErnieModel(term_ids,
                               L.zeros_like(term_ids),
                               config=self.config.ernie_config)
            self_feature = ernie.get_pooled_output()

            self_feature = L.fc(
                self_feature,
                hidden_size,
                act=act,
                param_attr=F.ParamAttr(name=name + "_l",
                                       learning_rate=learning_rate),
            )
            neigh_feature = L.fc(
                neigh_feature,
                hidden_size,
                act=act,
                param_attr=F.ParamAttr(name=name + "_r",
                                       learning_rate=learning_rate),
            )
            output = L.concat([self_feature, neigh_feature], axis=1)
            output = L.l2_normalize(output, axis=1)
            return output
示例#29
0
    def forward(self, *args, **kwargs):
        """
        Args:
            labels (optional, `Variable` of shape [batch_size, seq_len]): 
                ground truth label id for each token
        Returns:
            loss (`Variable` of shape []):
                Cross entropy loss mean over batch and time, ignore positions where label == -100
                if labels not set, returns None
            logits (`Variable` of shape [batch_size, seq_len, hidden_size]):
                output logits of classifier
        """

        labels = kwargs.pop('labels', None)
        pooled, encoded = super(ErnieModelForTokenClassification, self).forward(*args, **kwargs)
        hidden = self.dropout(encoded) # maybe not?
        logits = self.classifier(hidden)

        if labels is not None:
            if len(labels.shape) == 2:
                labels = L.unsqueeze(labels, axes=[-1])
            loss = L.softmax_with_cross_entropy(logits, labels)
            loss = L.reduce_mean(loss)
        else:
            loss = None
        return loss, logits
示例#30
0
    def forward(self, src, src_length):
        # encoding
        encoder_output, encoder_final_state = self.encoder(src, src_length)

        # decoder initial states
        decoder_initial_states = [
            encoder_final_state,
            self.decoder.lstm_attention.cell.get_initial_states(
                batch_ref=encoder_output, shape=[self.hidden_size])
        ]
        # attention mask to avoid paying attention on padddings
        src_mask = layers.sequence_mask(
            src_length,
            maxlen=layers.shape(src)[1],
            dtype=encoder_output.dtype)
        encoder_padding_mask = (src_mask - 1.0) * 1e9
        encoder_padding_mask = layers.unsqueeze(encoder_padding_mask, [1])

        # Tile the batch dimension with beam_size
        encoder_output = BeamSearchDecoder.tile_beam_merge_with_batch(
            encoder_output, self.beam_size)
        encoder_padding_mask = BeamSearchDecoder.tile_beam_merge_with_batch(
            encoder_padding_mask, self.beam_size)

        # dynamic decoding with beam search
        rs, _ = self.beam_search_decoder(
            inits=decoder_initial_states,
            encoder_output=encoder_output,
            encoder_padding_mask=encoder_padding_mask)
        return rs