Python stack示例，paddle.fluid.layers.stack Python示例

示例#1

0

显示文件

    def forward_grad(self, x):
        #grad_x = F.conv2d(F.pad(x, (0, 0, 0, 1)), self.f_grad)  # , groups=self.channels)
        #grad_y = F.conv2d(F.pad(x, (0, 0, 0, 1)), self.f_grad2)  # , groups=self.channels)
        x1 = fluid.layers.pad2d(x, paddings=[0, 0, 0, 1])
        #pdb.set_trace()

        grad_x = self.conv2df_grad(x1)
        #grad_x[:, :, :, -1] = 0

        temp = unstack(grad_x, axis=3)

        temp[-1] = temp[-1] * 0
        grad_x = stack(temp, axis=3)

        x2 = fluid.layers.pad2d(x, paddings=[0, 1, 0, 0])
        grad_y = self.conv2df_grad2(x2)  # , groups=self.channels)

        #grad_y[:, :, -1, :] = 0
        temp = unstack(grad_y, axis=2)
        temp[-1] = temp[-1] * 0
        grad_y = stack(temp, axis=2)

        bt, c, h, w = grad_x.shape

        grad_x = fluid.layers.reshape(grad_x, [-1, c, h, w])
        grad_y = fluid.layers.reshape(grad_y, [-1, c, h, w])
        return grad_x, grad_y

示例#2

0

显示文件

def greedy_search_infilling(model,
                            q_ids,
                            q_sids,
                            sos_id,
                            eos_id,
                            attn_id,
                            max_encode_len=640,
                            max_decode_len=100,
                            tgt_type_id=3):
    model.eval()
    _, logits, info = model(q_ids, q_sids)
    gen_ids = L.argmax(logits, -1)
    d_batch, d_seqlen = q_ids.shape
    seqlen = L.reduce_sum(L.cast(q_ids != 0, 'int64'), 1, keep_dim=True)
    has_stopped = np.zeros([d_batch], dtype=np.bool)
    gen_seq_len = np.zeros([d_batch], dtype=np.int64)
    output_ids = []

    past_cache = info['caches']

    cls_ids = L.ones([d_batch], dtype='int64') * sos_id
    attn_ids = L.ones([d_batch], dtype='int64') * attn_id
    ids = L.stack([cls_ids, attn_ids], -1)
    for step in range(max_decode_len):
        bias = gen_bias(q_ids, ids, step)
        pos_ids = D.to_variable(
            np.tile(np.array([[step, step + 1]], dtype=np.int64),
                    [d_batch, 1]))
        pos_ids += seqlen
        _, logits, info = model(ids,
                                L.ones_like(ids) * tgt_type_id,
                                pos_ids=pos_ids,
                                attn_bias=bias,
                                past_cache=past_cache)
        gen_ids = L.argmax(logits, -1)

        past_cached_k, past_cached_v = past_cache
        cached_k, cached_v = info['caches']
        cached_k = [
            L.concat([pk, k[:, :1, :]], 1)
            for pk, k in zip(past_cached_k, cached_k)
        ]  # concat cached
        cached_v = [
            L.concat([pv, v[:, :1, :]], 1)
            for pv, v in zip(past_cached_v, cached_v)
        ]
        past_cache = (cached_k, cached_v)

        gen_ids = gen_ids[:, 1]
        ids = L.stack([gen_ids, attn_ids], 1)

        gen_ids = gen_ids.numpy()
        has_stopped |= (gen_ids == eos_id).astype(np.bool)
        gen_seq_len += (1 - has_stopped.astype(np.int64))
        output_ids.append(gen_ids.tolist())
        if has_stopped.all():
            break
    output_ids = np.array(output_ids).transpose([1, 0])
    return output_ids

示例#3

0

显示文件

文件： lm_model.py 项目： guoshengCS/rnn-benchmark

    def seq2seq_api_rnn(input_embedding,
                        len=3,
                        init_hiddens=None,
                        init_cells=None):
        class EncoderCell(layers.RNNCell):
            def __init__(self,
                         num_layers,
                         hidden_size,
                         dropout_prob=0.,
                         forget_bias=0.):
                self.num_layers = num_layers
                self.hidden_size = hidden_size
                self.dropout_prob = dropout_prob
                self.lstm_cells = []
                for i in range(num_layers):
                    self.lstm_cells.append(
                        layers.LSTMCell(
                            hidden_size,
                            forget_bias=forget_bias,
                            param_attr=fluid.ParamAttr(
                                initializer=fluid.initializer.
                                UniformInitializer(low=-init_scale,
                                                   high=init_scale))))

            def call(self, step_input, states):
                new_states = []
                for i in range(self.num_layers):
                    out, new_state = self.lstm_cells[i](step_input, states[i])
                    step_input = layers.dropout(
                        out,
                        self.dropout_prob,
                        dropout_implementation='upscale_in_train'
                    ) if self.dropout_prob > 0 else out
                    new_states.append(new_state)
                return step_input, new_states

        cell = EncoderCell(num_layers, hidden_size, dropout)
        output, new_states = layers.rnn(
            cell,
            inputs=input_embedding,
            initial_states=[[hidden, cell] for hidden, cell in zip([
                layers.reshape(init_hidden, shape=[-1, hidden_size])
                for init_hidden in layers.split(
                    init_hiddens, num_or_sections=num_layers, dim=0)
            ], [
                layers.reshape(init_cell, shape=[-1, hidden_size])
                for init_cell in layers.split(
                    init_cells, num_or_sections=num_layers, dim=0)
            ])],
            time_major=False)
        last_hidden = layers.stack([hidden for hidden, _ in new_states], 0)
        last_cell = layers.stack([cell for _, cell in new_states], 0)
        return output, last_hidden, last_cell

示例#4

0

显示文件

文件： rep_flow_layer.py 项目： liu824/representation-flow-for-action-recognition-paddlepaddle

    def forward_grad(self, x):
        grad_x = self.conv4u(layers.pad(x, (0, 0, 0, 0, 0, 0, 0, 1)))
        tmp = layers.unstack(grad_x, axis=2)
        tmp[-1] = tmp[-1] - tmp[-1]  #tmp[-1]=0

        grad_x = layers.stack(tmp, axis=2)

        grad_y = self.conv4v(layers.pad(x, (0, 0, 0, 0, 0, 1, 0, 0)))

        tmp = layers.unstack(grad_y, axis=2)
        tmp[-1] = tmp[-1] - tmp[-1]  # tmp[-1]=0
        grad_y = layers.stack(tmp, axis=2)
        return grad_x, grad_y

示例#5

0

显示文件

        def loop_body(i,
                      mel_input,
                      outputs,
                      hiddens,
                      attentions,
                      state=None,
                      coeffs=None):
            # state is None coeffs is None for the first step
            decoded, hidden, new_coeffs, new_state = self.decoder(
                mel_input, keys, values, text_lengths, i, speaker_embed, state,
                force_monotonic_attention, coeffs, window)
            new_coeffs = F.stack(new_coeffs)  # (N, B, T_dec=1, T_enc)

            attentions.append(new_coeffs)  # (N, B, T_dec=1, T_enc)
            outputs.append(decoded)  # (B, T_dec=1, rC_mel)
            hiddens.append(hidden)  # (B, T_dec=1, C_dec)

            # slice the last frame out of r generated frames to be used as the input for the next step
            batch_size = mel_input.shape[0]
            frames = F.reshape(decoded, [
                batch_size, -1, self.decoder.reduction_factor,
                self.decoder.in_channels
            ])
            input_frame = frames[:, :, -1, :]
            return (i + 1, input_frame, outputs, hiddens, attentions,
                    new_state, new_coeffs)

示例#6

0

显示文件

文件： box_ops.py 项目： August66/DETR-Paddle

def masks_to_boxes(masks):
    """
    Compute the bounding boxes around the provided masks

    The masks should be in format [N, H, W] where N is the number
    of masks, (H, W) are the spatial dimensions.

    Returns a [N, 4] tensors, with the boxes in xyxy format
    """
    if np.sum(masks.shape) == 0:
        return dg.to_variable(np.zeros((0, 4)))

    h, w = masks.shape[-2:]
    y = dg.to_variable(np.arange(0, h, 1, dtype="float32"))
    x = dg.to_variable(np.arange(0, w, 1, dtype="float32"))
    y, x = T.meshgrid([y, x])  # [h, w]

    x_mask = (masks * L.unsqueeze(x, [0]))  # [N, H, W]
    x_max = L.reduce_max(L.flatten(x_mask, axis=1), dim=-1)
    non_mask = dg.to_variable(~masks.numpy())
    x_mask[non_mask] = 1e8
    x_min = L.reduce_min(L.flatten(x_mask, axis=1), dim=-1)

    y_mask = (masks * L.unsqueeze(y, [0]))  # [N, H, W]
    y_max = L.reduce_max(L.flatten(y_mask, axis=1), dim=-1)
    y_mask[non_mask] = 1e8
    y_min = L.reduce_min(L.flatten(y_mask, axis=1), dim=-1)

    return L.stack([x_min, y_min, x_max, y_max], 1)

示例#7

0

显示文件

文件： model_unimp_large.py 项目： Yelrose/PGL

        def __call__(self, msg):
            alpha = msg["alpha"]  # lod-tensor (batch_size, num_heads)
            if attn_drop:
                old_h = alpha
                dropout = F.data(name='attn_drop', shape=[1], dtype="int64")
                u = L.uniform_random(shape=L.cast(L.shape(alpha)[:1], 'int64'),
                                     min=0.,
                                     max=1.)
                keeped = L.cast(u > dropout, dtype="float32")
                self_attn_mask = L.scale(x=keeped,
                                         scale=10000.0,
                                         bias=-1.0,
                                         bias_after_scale=False)
                n_head_self_attn_mask = L.stack(x=[self_attn_mask] * num_heads,
                                                axis=1)
                n_head_self_attn_mask.stop_gradient = True
                alpha = n_head_self_attn_mask + alpha
                alpha = L.lod_reset(alpha, old_h)

            h = msg["v"]
            alpha = paddle_helper.sequence_softmax(alpha)

            self.alpha = alpha
            old_h = h
            h = h * alpha
            h = L.lod_reset(h, old_h)
            h = L.sequence_pool(h, "sum")

            if concat:
                h = L.reshape(h, [-1, num_heads * hidden_size])
            else:
                h = L.reduce_mean(h, dim=1)
            return h

示例#8

0

显示文件

文件： transformer_encoder.py 项目： githubutilities/PGL

def build_graph_attn_bias(input_mask, n_head, dtype, slot_seqlen):

    input_shape = L.shape(input_mask)
    input_batch = input_shape[0]
    input_seqlen = input_shape[1]
    num_slot = input_seqlen / slot_seqlen
    num_b = num_slot - 1
    ones = L.ones([num_b], dtype="float32")  # [num_b]
    diag_ones = L.diag(ones)  # [num_b, num_b]
    diag_ones = L.unsqueeze(diag_ones, [1, -1])  # [num_b, 1, num_b, 1]
    diag_ones = L.expand(
        diag_ones,
        [1, slot_seqlen, 1, slot_seqlen])  # [num_b, seqlen, num_b, seqlen]
    diag_ones = L.reshape(diag_ones,
                          [1, num_b * slot_seqlen, num_b * slot_seqlen
                           ])  # [1, num_b*seqlen, num_b*seqlen]

    graph_attn_bias = L.concat([
        L.ones([1, num_b * slot_seqlen, slot_seqlen], dtype="float32"),
        diag_ones
    ], 2)
    graph_attn_bias = L.concat([
        L.ones([1, slot_seqlen, num_slot * slot_seqlen], dtype="float32"),
        graph_attn_bias
    ], 1)  # [1, seq, seq]

    pad_attn_bias = L.matmul(input_mask, input_mask,
                             transpose_y=True)  # [batch, seq, seq]
    attn_bias = graph_attn_bias * pad_attn_bias

    attn_bias = (1. - attn_bias) * -10000.
    attn_bias = L.stack([attn_bias] * n_head, 1)  # [batch, n_head, seq, seq]
    if attn_bias.dtype != dtype:
        attn_bias = L.cast(attn_bias, dtype)
    return attn_bias

示例#9

0

显示文件

文件： transformer_encoder.py 项目： githubutilities/PGL

def build_attn_bias(input_mask, n_head, dtype):
    attn_bias = L.matmul(input_mask, input_mask,
                         transpose_y=True)  # [batch, seq, seq]
    attn_bias = (1. - attn_bias) * -10000.
    attn_bias = L.stack([attn_bias] * n_head, 1)  # [batch, n_head, seq, seq]
    if attn_bias.dtype != dtype:
        attn_bias = L.cast(attn_bias, dtype)
    return attn_bias

示例#10

0

显示文件

    def forward(self, tensor_list: NestedTensor):
        x = tensor_list.tensors
        mask = tensor_list.mask
        assert mask is not None
        bs, h, w = mask.shape

        mask = mask.numpy()
        not_mask = ~mask
        not_mask = dg.to_variable(not_mask).astype('float32')
        y_embed = L.cumsum(not_mask, axis=1)  # [batch_size, h, w]
        x_embed = L.cumsum(not_mask, axis=2)  # [batch_size, h, w]
        if self.normalize:
            eps = 1e-6
            y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale
            x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale

        dim_t = (np.arange(0, self.num_pos_feats, 1,
                           dtype="float32"))  # [num_pos_feats]
        dim_t = self.temperature**(2 * (dim_t // 2) / self.num_pos_feats
                                   )  # [num_pos_feats]
        dim_t = dg.to_variable(dim_t)

        x_embed = L.unsqueeze(x_embed, 3)  # [batch_size, h, w, 1]
        y_embed = L.unsqueeze(y_embed, 3)  # [batch_size, h, w, 1]
        pos_x = x_embed / dim_t  # [batch_size, h, w, num_pos_feats]
        pos_y = y_embed / dim_t  # [batch_size, h, w, num_pos_feats]
        pos_x_1 = L.sin(pos_x[:, :, :,
                              0::2])  # [batch_size, h, w, num_pos_feats / 2]
        pos_x_2 = L.cos(pos_x[:, :, :,
                              1::2])  # [batch_size, h, w, num_pos_feats / 2]
        pos_y_1 = L.sin(pos_y[:, :, :,
                              0::2])  # [batch_size, h, w, num_pos_feats / 2]
        pos_y_2 = L.cos(pos_y[:, :, :,
                              1::2])  # [batch_size, h, w, num_pos_feats / 2]
        pos_x = L.reshape(L.stack([pos_x_1, pos_x_2], axis=4),
                          (bs, h, w, -1))  # [batch_size, h, w, num_pos_feats]
        pos_y = L.reshape(L.stack([pos_y_1, pos_y_2], axis=4),
                          (bs, h, w, -1))  # [batch_size, h, w, num_pos_feats]

        pos = L.concat((pos_y, pos_x),
                       axis=3)  # [batch_size, h, w, num_pos_feats * 2]
        pos = L.transpose(pos,
                          perm=(0, 3, 1,
                                2))  # [batch_size, num_pos_feats * 2, h, w]
        return pos

示例#11

0

显示文件

文件： mixed_precision.py 项目： byhwdy/insects

def update_loss_scale(grads):
    state = mixed_precision_global_state()
    if state is None or not state.dynamic_scaling:
        return
    per_grad_check = layers.stack([layers.reduce_sum(g) for g in grads])
    grad_valid = layers.isfinite(per_grad_check)
    layers.cond(grad_valid, lambda: state.increment(),
                lambda: state.decrement())
    return grad_valid

示例#12

0

显示文件

文件： unified_transformer.py 项目： zhongerqiandan/Knover

    def _gen_input(self,
                   token_ids,
                   type_ids,
                   pos_ids,
                   input_mask,
                   aux_emb=None):
        token_emb_out = layers.embedding(
            input=token_ids,
            size=[self.vocab_size, self.emb_size],
            dtype=self.dtype,
            param_attr=fluid.ParamAttr(name=self.token_emb_name,
                                       initializer=self.param_initializer))
        type_emb_out = layers.embedding(
            input=type_ids,
            size=[self.type_size, self.emb_size],
            dtype=self.dtype,
            param_attr=fluid.ParamAttr(name=self.type_emb_name,
                                       initializer=self.param_initializer))
        pos_emb_out = layers.embedding(
            input=pos_ids,
            size=[self.max_position_seq_len, self.emb_size],
            dtype=self.dtype,
            param_attr=fluid.ParamAttr(name=self.pos_emb_name,
                                       initializer=self.param_initializer))
        emb_out = token_emb_out + type_emb_out + pos_emb_out

        # auxiliary memory embeddings
        if aux_emb is not None:
            emb_out = layers.concat([aux_emb, emb_out], axis=1)

        # post process of embedding
        emb_out = pre_process_layer(emb_out,
                                    self.pre_encoder_cmd,
                                    self.prepostprocess_dropout,
                                    name="pre_encoder",
                                    epsilon=self.epsilon)
        if self.emb_mapping_in:
            emb_out = layers.fc(input=emb_out,
                                num_flatten_dims=2,
                                size=self.hidden_size,
                                param_attr=fluid.ParamAttr(
                                    name="emb_hidden_mapping",
                                    initializer=self.param_initializer),
                                bias_attr="emb_hidden_mapping_bias")

        # generate n-head self-attention mask
        self_attn_mask = input_mask
        self_attn_mask = layers.scale(x=self_attn_mask,
                                      scale=1e4,
                                      bias=-1.0,
                                      bias_after_scale=False)
        n_head_self_attn_mask = layers.stack(x=[self_attn_mask] * self.n_head,
                                             axis=1)
        n_head_self_attn_mask.stop_gradient = True

        return emb_out, n_head_self_attn_mask

示例#13

0

显示文件

文件： length_regulator.py 项目： JiaXiao243/trigger

 def pad(self, input_ele):
     max_len = max([input_ele[i].shape[0] for i in range(len(input_ele))])
     out_list = []
     for i in range(len(input_ele)):
         pad_len = max_len - input_ele[i].shape[0]
         one_batch_padded = layers.pad(input_ele[i], [0, pad_len, 0, 0],
                                       pad_value=0.0)
         out_list.append(one_batch_padded)
     out_padded = layers.stack(out_list)
     return out_padded

示例#14

0

显示文件

    def forward(self,
                tgt,
                memory,
                tgt_mask=None,
                memory_mask=None,
                pos=None,
                query_pos=None):
        output = tgt

        intermediate = []

        assert tgt_mask is None, "Not implement compute tgt_mask's attn_mask."

        if memory_mask is not None:
            bs, tgt_length = tgt.shape[:2]
            memory_length = memory.shape[1]
            attn_mask = L.zeros([bs, tgt_length, memory_length],
                                dtype="float32")
            memory_mask = L.expand(
                L.unsqueeze(memory_mask, [1]),
                (1, tgt_length, 1))  # [bs, tgt_length, memory_length]
            attn_mask = attn_mask.numpy()
            memory_mask = memory_mask.numpy()
            attn_mask[memory_mask] = -1e8
            attn_mask = dg.to_variable(attn_mask)
            attn_mask = L.expand(L.unsqueeze(attn_mask, [1]),
                                 (1, self.nhead, 1,
                                  1))  # [bs, nhead, tgt_length, memory_length]
            memory_mask = attn_mask

        attention_weight = []
        for layer in self.layers:
            output, self_attn_weights, multihead_attn_weights = layer(
                output,
                memory,
                tgt_mask=tgt_mask,
                memory_mask=memory_mask,
                pos=pos,
                query_pos=query_pos)

            attention_weight.append(
                (self_attn_weights, multihead_attn_weights))
            if self.return_intermediate:
                intermediate.append(self.norm(output))

        if self.norm is not None:
            output = self.norm(output)
            if self.return_intermediate:
                intermediate.pop()
                intermediate.append(output)

        if self.return_intermediate:
            return L.stack(intermediate), attention_weight

        return L.unsqueeze(output, [0]), attention_weight

示例#15

0

显示文件

文件： bilstm.py 项目： zw331/DDParser

    def forward(self, x, seq_mask, pad_index, hx=None):
        """Forward network"""
        x, batch_sizes, sorted_indices = self.pack_padded_sequence(
            x, seq_mask, pad_index)
        _, unsorted_indices = layers.argsort(sorted_indices)
        batch_size = batch_sizes[0]
        h_n, c_n = [], []

        if hx is None:
            ih = layers.zeros(shape=(self.num_layers * 2, batch_size,
                                     self.hidden_size),
                              dtype=x[0].dtype)
            h, c = ih, ih
        else:
            h, c = self.permute_hidden(hx, sorted_indices)
        h = layers.reshape(h, shape=(self.num_layers, 2, -1, self.hidden_size))
        c = layers.reshape(c, shape=(self.num_layers, 2, -1, self.hidden_size))

        for i in range(self.num_layers):
            x = layers.split(x, batch_sizes, dim=0)
            if self.training and self.dropout > 0:
                mask = SharedDropout.get_mask(x[0], self.dropout)
                x = [j * mask[:len(j)] for j in x]
            x_f, (h_f, c_f) = self.layer_forward(x=x,
                                                 hx=(h[i, 0], c[i, 0]),
                                                 cell=self.f_cells[i],
                                                 batch_sizes=batch_sizes)
            x_b, (h_b, c_b) = self.layer_forward(x=x,
                                                 hx=(h[i, 1], c[i, 1]),
                                                 cell=self.b_cells[i],
                                                 batch_sizes=batch_sizes,
                                                 reverse=True)
            x = layers.concat((x_f, x_b), axis=-1)
            h_n.append(layers.stack((h_f, h_b)))
            c_n.append(layers.stack((c_f, c_b)))
        x = self.pad_packed_sequence(x, batch_sizes, unsorted_indices)
        hx = layers.concat(h_n, axis=0), layers.concat(c_n, axis=0)
        hx = self.permute_hidden(hx, unsorted_indices)

        return x, hx

示例#16

0

显示文件

def update_loss_scale(grads):
    state = mixed_precision_global_state()
    if state is None or not state.dynamic_scaling:
        return
    per_grad_check = layers.stack([layers.reduce_sum(g) for g in grads])
    grad_valid = layers.isfinite(per_grad_check)

    with layers.Switch() as switch:
        with switch.case(grad_valid):
            state.increment()
        with switch.default():
            state.decrement()
    return grad_valid

示例#17

0

显示文件

文件： unified_transformer.py 项目： egillian1/AblationPLATO

    def _ranking(self, inputs, predictions):
        """ Reranking generated responses. """
        src_token = inputs["src_token"]
        src_mask = inputs["src_mask"]
        src_pos = inputs["src_pos"]
        src_type = inputs["src_type"]
        src_turn = inputs["src_turn"]
        src_embed = self.embedder(src_token, src_pos, src_type, src_turn)

        batch_size, num_latent, tgt_seq_len = predictions.shape

        # shape: [batch_size, num_latent, seq_len, 1]
        preds_token = F.unsqueeze(predictions, [3])
        preds_mask = F.not_equal(preds_token, self.padding_idx, "int64")
        preds_pos = layers.range(0, tgt_seq_len, 1, dtype="float32")
        preds_pos = F.unsqueeze(preds_pos, [0, 0, 1])
        preds_pos = layers.expand(preds_pos, [batch_size, num_latent, 1, 1])
        preds_pos = layers.cast(preds_pos, "int64")
        preds_type = layers.zeros_like(preds_token)
        preds_turn = layers.zeros_like(preds_token)

        scores = []
        for i in range(num_latent):
            pred_token = preds_token[:, i]
            pred_mask = preds_mask[:, i]
            pred_pos = preds_pos[:, i]
            pred_type = preds_type[:, i]
            pred_turn = preds_turn[:, i]

            input_mask = layers.concat([src_mask, pred_mask], axis=1)
            input_mask.stop_gradient = True
            pred_embed = self.embedder(pred_token, pred_pos, pred_type,
                                       pred_turn)
            embed = layers.concat([src_embed, pred_embed], axis=1)
            embed = self.embed_layer_norm(embed)

            mask_embed = self.mask_embed
            mask_embed = layers.expand(mask_embed, [batch_size, 1, 1])
            mask_embed = self.embed_layer_norm(mask_embed)

            out = layers.concat([mask_embed, embed], axis=1)
            mask = self._create_mask(input_mask, append_head=True)

            for layer in self.layers:
                out = layer(out, mask, None)

            mask_embed = out[:, 0]
            score = self.discriminator(mask_embed)
            scores.append(score[:, 0])
        scores = layers.stack(scores, axis=1)
        return scores

示例#18

0

显示文件

文件： bilstm.py 项目： zw331/DDParser

 def pad_packed_sequence(self, x, batch_sizes, unsorted_indices):
     """Pads a packed sequences."""
     h_size = x.shape[1]
     split_x = layers.split(x, batch_sizes, dim=0)
     max_bs = batch_sizes[0]
     step_embs = []
     for step, cur_bs in enumerate(batch_sizes):
         pad_emb = layers.zeros(shape=(max_bs - cur_bs, h_size),
                                dtype=x.dtype)
         step_emb = layers.concat(input=(split_x[step], pad_emb))
         step_embs.append(step_emb)
     new_x = layers.stack(step_embs, axis=1)
     new_x = layers.index_select(new_x, unsorted_indices)
     return new_x

示例#19

0

显示文件

文件： gnn.py 项目： zhuzhibin1988/DDParser

    def forward(self, x, adj):
        """Forward network"""

        x = layers.dropout(x, self.dropout)
        if self.layer == 1:
            x = layers.stack([att.forward(x, adj) for att in self.attentions],
                             dim=2)
            x = layers.reduce_sum(x, 2)
            x = layers.dropout(x, self.dropout)
            return layers.log_softmax(x, axis=2)
        else:
            x = layers.concat([att.forward(x, adj) for att in self.attentions],
                              axis=2)
            x = layers.dropout(x, self.dropout)
            return self.out_att.forward(x, adj)

示例#20

0

显示文件

def pad_sequence_paddle(sequences, padding_value=0):
    """Fill sequences(variable) into a fixed-length matrix"""
    max_size = sequences[0].shape
    trailing_dims = max_size[1:]
    max_len = max([s.shape[0] for s in sequences])
    out_tensor = []
    for tensor in sequences:
        length = tensor.shape[0]
        pad_tensor = layers.concat((tensor,
                                    layers.fill_constant(
                                        (max_len - length, *trailing_dims),
                                        dtype=tensor.dtype,
                                        value=padding_value)))
        out_tensor.append(pad_tensor)
    out_tensor = layers.stack(out_tensor)
    return out_tensor

示例#21

0

显示文件

    def teacher_forced_train(self, keys, values, text_lengths, speaker_embed, mel):
        # build decoder inputs by shifting over by one frame and add all zero <start> frame
        # the mel input is downsampled by a reduction factor
        batch_size = mel.shape[0]
        mel_input = F.reshape(mel, (batch_size, -1, self.decoder.reduction_factor, self.decoder.in_channels))
        zero_frame = F.zeros((batch_size, 1, self.decoder.in_channels), dtype="float32")
        # downsample mel input as a regularization
        mel_input = F.concat([zero_frame, mel_input[:, :-1, -1, :]], axis=1)

        # decoder
        decoded, hidden, attentions, final_state = self.decoder(mel_input, keys, values, text_lengths, 0, speaker_embed)
        attentions = F.stack(attentions) # (N, B, T_dec, T_encs)
        # unfold frames
        decoded = F.reshape(decoded, (batch_size, -1, self.decoder.in_channels))
        # postnet
        refined = self.postnet(hidden, speaker_embed)
        return decoded, refined, attentions, final_state

示例#22

0

显示文件

def pick_image(images, idx):
    """
    Pick the image among images according to idx.

    Args:
        images (B x N x C x H x W), N images, 
        idx (B ) indices to select.
    """
    if type(images) == list:
        return [pick_image(r, idx) for r in images]
    if idx is None:
        return images[:, 0]
    elif type(idx) == int:
        return images[:, idx]

    idx = idx.astype('long').numpy()
    images = L.stack([images[i][int(idx[i])] for i in range(images.shape[0])])
    return images

示例#23

0

显示文件

文件： test_dynamic_rnn_stop_gradient.py 项目： wuhuachaocoding/Paddle

def build_and_run_program(place, batch_size, beam_size, stop_gradient=False):
    fluid.default_startup_program().random_seed = 1
    fluid.default_main_program().random_seed = 1
    np.random.seed(2)

    x = layers.assign(
        np.random.rand(batch_size, beam_size, 32).astype("float32"))
    indices = fluid.data(shape=[None, beam_size], dtype="int64", name="indices")
    step_idx = layers.fill_constant(
        shape=[1], dtype="int64", value=0, force_cpu=True)
    max_len = layers.fill_constant(
        shape=[1], dtype="int64", value=10, force_cpu=True)
    cond = layers.less_than(x=step_idx, y=max_len)
    while_op = layers.While(cond)
    scores = layers.array_write(x, step_idx)
    with while_op.block():
        bs = layers.cast(layers.shape(x)[0], "int64")
        for _ in range(20):
            bs = layers.cast(bs, 'int64')
        bs.stop_gradient = stop_gradient
        batch_pos = layers.expand(
            layers.unsqueeze(
                layers.range(
                    0, bs, 1, dtype=bs.dtype), [1]), [1, beam_size])
        topk_coordinates = layers.stack([batch_pos, indices], axis=2)
        topk_coordinates.stop_gradient = stop_gradient
        score = layers.gather_nd(x, topk_coordinates)
        layers.increment(x=step_idx, value=1.0, in_place=True)
        layers.array_write(score, i=step_idx, array=scores)
        length_cond = layers.less_than(x=step_idx, y=max_len)
        layers.assign(length_cond, cond)

    out = layers.tensor_array_to_tensor(scores, axis=0, use_stack=True)[0]
    loss = layers.reduce_mean(out)
    opt = fluid.optimizer.Adam(0.01)
    opt.minimize(loss)
    exe = fluid.Executor(place)
    data = np.random.random_integers(
        low=0, high=beam_size - 1, size=(batch_size, beam_size)).astype("int64")
    loss_val, = exe.run(feed={"indices": data}, fetch_list=[loss])

    return loss_val

示例#24

0

显示文件

文件： position_embedding.py 项目： zhouwei25/Parakeet

    def forward(self, indices, speaker_position_rate=None):
        """
        Args:
            indices (Variable): shape (B, T), dtype: int64, position
                indices, where B means the batch size, T means the time steps.
            speaker_position_rate (Variable | float, optional), position
                rate. It can be a float point number or a Variable with 
                shape (1,), then this speaker_position_rate is used for every 
                example. It can also be a Variable with shape (B, ), which 
                contains a speaker position rate for each utterance.
        Returns:
            out (Variable): shape(B, T, C_pos), dtype float32, position embedding, where C_pos 
                means position embedding size.
        """
        batch_size, time_steps = indices.shape

        # convert speaker_position_rate to a Variable with shape(B, )
        if isinstance(speaker_position_rate, float):
            speaker_position_rate = dg.to_variable(
                np.array([speaker_position_rate]).astype("float32"))
            speaker_position_rate = F.expand(speaker_position_rate,
                                             [batch_size])
        elif isinstance(speaker_position_rate, fluid.framework.Variable) \
            and list(speaker_position_rate.shape) == [1]:
            speaker_position_rate = F.expand(speaker_position_rate,
                                             [batch_size])
        assert len(speaker_position_rate.shape) == 1 and \
            list(speaker_position_rate.shape) == [batch_size]

        weight = compute_position_embedding(self.weight,
                                            speaker_position_rate)  # (B, V, C)
        # make indices for gather_nd
        batch_id = F.expand(
            F.unsqueeze(
                F.range(
                    0, batch_size, 1, dtype="int64"), [1]), [1, time_steps])
        # (B, T, 2)
        gather_nd_id = F.stack([batch_id, indices], -1)

        out = F.gather_nd(weight, gather_nd_id)
        return out

示例#25

0

显示文件

文件： net.py 项目： sshuster/Parakeet

def crop(x, audio_start, audio_length):
    """Crop the upsampled condition to match audio_length. The upsampled condition has the same time steps as the whole audio does. But since audios are sliced to 0.5 seconds randomly while conditions are not, upsampled conditions should also be sliced to extaclt match the time steps of the audio slice.

    Args:
        x (Variable): shape(B, C, T), dtype float32, the upsample condition.
        audio_start (Variable): shape(B, ), dtype: int64, the index the starting point.
        audio_length (int): the length of the audio (number of samples it contaions).

    Returns:
        Variable: shape(B, C, audio_length), cropped condition.
    """
    # crop audio
    slices = []  # for each example
    starts = audio_start.numpy()
    for i in range(x.shape[0]):
        start = starts[i]
        end = start + audio_length
        slice = F.slice(x[i], axes=[1], starts=[start], ends=[end])
        slices.append(slice)
    out = F.stack(slices)
    return out

示例#26

0

显示文件

文件： visualize_detr.py 项目： August66/DETR-Paddle

    def forward(self, outputs, target_sizes):
        """
        Perform the computation
        Parameters:
            outputs: raw outputs of the model
            target_sizes: tensor of dimension [batch_size x 2] containing the size of each image
                          For evaluation, this must be the original image size (before any data augmentation)
                          For visualization, this should be the image size after data augment, but before padding
        """
        out_logits, out_bbox = outputs["pred_logits"], outputs["pred_boxes"]

        assert len(out_logits) == len(target_sizes)
        assert target_sizes.shape[1] == 2

        prob = L.softmax(out_logits, -1)  # [bs, num_queries, num_classes + 1]
        labels = L.argmax(prob[:, :, :], axis=-1)  # [bs, num_queries]
        scores = L.reduce_max(prob, dim=-1)  # [bs, num_queries]

        # convert to [x0, y0, x1, y1] format
        bs, num_queries, _ = out_bbox.shape
        out_bbox = L.reshape(out_bbox, (-1, 4))
        boxes = box_ops.box_cxcywh_to_xyxy(out_bbox)
        boxes = L.reshape(boxes, (bs, num_queries, 4))
        # and fromm relative [0, 1] to absolute [0, height] coordinates
        img_h, img_w = target_sizes[:, 0], target_sizes[:, 1]
        scale_fct = L.stack([img_w, img_h, img_w, img_h], 1)  # [bs, 4]
        scale_fct = L.expand(L.unsqueeze(scale_fct, [1]), (1, num_queries, 1))
        boxes = boxes * scale_fct

        results = [{
            'scores': s,
            'labels': l,
            'boxes': b
        } for s, l, b in zip(scores.numpy(), labels.numpy(), boxes.numpy())]

        return results

示例#27

0

显示文件

文件： decode.py 项目： xiaoyangyang2/PaddleHub

def beam_search_infilling(model,
                          q_ids,
                          q_sids,
                          sos_id,
                          eos_id,
                          attn_id,
                          max_encode_len=640,
                          max_decode_len=100,
                          beam_width=5,
                          tgt_type_id=3,
                          length_penalty=1.0):
    model.eval()
    _, __, info = model(q_ids, q_sids)
    d_batch, d_seqlen = q_ids.shape

    state = BeamSearchState(log_probs=L.zeros([d_batch, beam_width],
                                              'float32'),
                            lengths=L.zeros([d_batch, beam_width], 'int64'),
                            finished=L.zeros([d_batch, beam_width], 'int64'))
    outputs = []

    def reorder_(t, parent_id):
        """reorder cache according to parent beam id"""
        gather_idx = L.where(parent_id != -1)[:, 0] * beam_width + L.reshape(
            parent_id, [-1])
        t = L.gather(t, gather_idx)
        return t

    def tile_(t, times):
        _shapes = list(t.shape[1:])
        ret = L.reshape(
            L.expand(L.unsqueeze(t, [1]), [
                1,
                times,
            ] + [
                1,
            ] * len(_shapes)), [
                -1,
            ] + _shapes)
        return ret

    cached_k, cached_v = info['caches']
    cached_k = [tile_(k, beam_width) for k in cached_k]
    cached_v = [tile_(v, beam_width) for v in cached_v]
    past_cache = (cached_k, cached_v)

    q_ids = tile_(q_ids, beam_width)
    seqlen = L.reduce_sum(L.cast(q_ids != 0, 'int64'), 1, keep_dim=True)

    cls_ids = L.ones([d_batch * beam_width], dtype='int64') * sos_id
    attn_ids = L.ones([d_batch * beam_width], dtype='int64') * attn_id  # SOS
    ids = L.stack([cls_ids, attn_ids], -1)
    for step in range(max_decode_len):
        bias = gen_bias(q_ids, ids, step)
        pos_ids = D.to_variable(
            np.tile(np.array([[step, step + 1]], dtype=np.int64),
                    [d_batch * beam_width, 1]))
        pos_ids += seqlen
        _, logits, info = model(ids,
                                L.ones_like(ids) * tgt_type_id,
                                pos_ids=pos_ids,
                                attn_bias=bias,
                                past_cache=past_cache)

        output, state = beam_search_step(state,
                                         logits[:, 1],
                                         eos_id=eos_id,
                                         beam_width=beam_width,
                                         is_first_step=(step == 0),
                                         length_penalty=length_penalty)
        outputs.append(output)

        past_cached_k, past_cached_v = past_cache
        cached_k, cached_v = info['caches']
        cached_k = [
            reorder_(L.concat([pk, k[:, :1, :]], 1), output.beam_parent_ids)
            for pk, k in zip(past_cached_k, cached_k)
        ]  # concat cached
        cached_v = [
            reorder_(L.concat([pv, v[:, :1, :]], 1), output.beam_parent_ids)
            for pv, v in zip(past_cached_v, cached_v)
        ]
        past_cache = (cached_k, cached_v)

        pred_ids_flatten = L.reshape(output.predicted_ids,
                                     [d_batch * beam_width])
        ids = L.stack([pred_ids_flatten, attn_ids], 1)

        if state.finished.numpy().all():
            break

    final_ids = L.stack([o.predicted_ids for o in outputs], 0)
    final_parent_ids = L.stack([o.beam_parent_ids for o in outputs], 0)
    final_ids = L.gather_tree(final_ids, final_parent_ids)[:, :,
                                                           0]  # pick best beam
    final_ids = L.transpose(L.reshape(final_ids, [-1, d_batch * 1]), [1, 0])
    return final_ids

示例#28

0

显示文件

文件： model.py 项目： zhousanfu/paddle-demo

    def beam_search(self,
                    src_word,
                    src_pos,
                    src_slf_attn_bias,
                    trg_word,
                    trg_src_attn_bias,
                    bos_id=0,
                    eos_id=1,
                    beam_size=4,
                    max_len=256):
        def expand_to_beam_size(tensor, beam_size):
            tensor = layers.reshape(tensor,
                                    [tensor.shape[0], 1] + tensor.shape[1:])
            tile_dims = [1] * len(tensor.shape)
            tile_dims[1] = beam_size
            return layers.expand(tensor, tile_dims)

        def merge_batch_beams(tensor):
            return layers.reshape(tensor, [tensor.shape[0] * tensor.shape[1]] +
                                  tensor.shape[2:])

        def split_batch_beams(tensor):
            return fluid.layers.reshape(tensor,
                                        shape=[-1, beam_size] +
                                        list(tensor.shape[1:]))

        def mask_probs(probs, finished, noend_mask_tensor):
            # TODO: use where_op
            finished = layers.cast(finished, dtype=probs.dtype)
            probs = layers.elementwise_mul(layers.expand(
                layers.unsqueeze(finished, [2]), [1, 1, self.trg_vocab_size]),
                                           noend_mask_tensor,
                                           axis=-1) - layers.elementwise_mul(
                                               probs, (finished - 1), axis=0)
            return probs

        def gather(x, indices, batch_pos):
            topk_coordinates = fluid.layers.stack([batch_pos, indices], axis=2)
            return layers.gather_nd(x, topk_coordinates)

        # run encoder
        enc_output = self.encoder(src_word, src_pos, src_slf_attn_bias)

        # constant number
        inf = float(1. * 1e7)
        batch_size = enc_output.shape[0]
        max_len = (enc_output.shape[1] + 20) if max_len is None else max_len
        vocab_size_tensor = layers.fill_constant(shape=[1],
                                                 dtype="int64",
                                                 value=self.trg_vocab_size)
        end_token_tensor = to_variable(
            np.full([batch_size, beam_size], eos_id, dtype="int64"))
        noend_array = [-inf] * self.trg_vocab_size
        noend_array[eos_id] = 0
        noend_mask_tensor = to_variable(np.array(noend_array, dtype="float32"))
        batch_pos = layers.expand(
            layers.unsqueeze(
                to_variable(np.arange(0, batch_size, 1, dtype="int64")), [1]),
            [1, beam_size])

        predict_ids = []
        parent_ids = []
        ### initialize states of beam search ###
        log_probs = to_variable(
            np.array([[0.] + [-inf] * (beam_size - 1)] * batch_size,
                     dtype="float32"))
        finished = to_variable(
            np.full([batch_size, beam_size], 0, dtype="bool"))
        ### initialize inputs and states of transformer decoder ###
        ## init inputs for decoder, shaped `[batch_size*beam_size, ...]`
        trg_word = layers.fill_constant(shape=[batch_size * beam_size, 1],
                                        dtype="int64",
                                        value=bos_id)
        trg_pos = layers.zeros_like(trg_word)
        trg_src_attn_bias = merge_batch_beams(
            expand_to_beam_size(trg_src_attn_bias, beam_size))
        enc_output = merge_batch_beams(
            expand_to_beam_size(enc_output, beam_size))
        ## init states (caches) for transformer, need to be updated according to selected beam
        caches = [{
            "k":
            layers.fill_constant(
                shape=[batch_size * beam_size, self.n_head, 0, self.d_key],
                dtype=enc_output.dtype,
                value=0),
            "v":
            layers.fill_constant(
                shape=[batch_size * beam_size, self.n_head, 0, self.d_value],
                dtype=enc_output.dtype,
                value=0),
        } for i in range(self.n_layer)]

        for i in range(max_len):
            trg_pos = layers.fill_constant(shape=trg_word.shape,
                                           dtype="int64",
                                           value=i)
            caches = map_structure(  # can not be reshaped since the 0 size
                lambda x: x if i == 0 else merge_batch_beams(x), caches)
            logits = self.decoder(trg_word, trg_pos, None, trg_src_attn_bias,
                                  enc_output, caches)
            caches = map_structure(split_batch_beams, caches)
            step_log_probs = split_batch_beams(
                fluid.layers.log(fluid.layers.softmax(logits)))
            step_log_probs = mask_probs(step_log_probs, finished,
                                        noend_mask_tensor)
            log_probs = layers.elementwise_add(x=step_log_probs,
                                               y=log_probs,
                                               axis=0)
            log_probs = layers.reshape(log_probs,
                                       [-1, beam_size * self.trg_vocab_size])
            scores = log_probs
            topk_scores, topk_indices = fluid.layers.topk(input=scores,
                                                          k=beam_size)
            beam_indices = fluid.layers.elementwise_floordiv(
                topk_indices, vocab_size_tensor)
            token_indices = fluid.layers.elementwise_mod(
                topk_indices, vocab_size_tensor)

            # update states
            caches = map_structure(
                lambda x: gather(x, beam_indices, batch_pos), caches)
            log_probs = gather(log_probs, topk_indices, batch_pos)
            finished = gather(finished, beam_indices, batch_pos)
            finished = layers.logical_or(
                finished, layers.equal(token_indices, end_token_tensor))
            trg_word = layers.reshape(token_indices, [-1, 1])

            predict_ids.append(token_indices)
            parent_ids.append(beam_indices)

            if layers.reduce_all(finished).numpy():
                break

        predict_ids = layers.stack(predict_ids, axis=0)
        parent_ids = layers.stack(parent_ids, axis=0)
        finished_seq = layers.transpose(
            layers.gather_tree(predict_ids, parent_ids), [1, 2, 0])
        finished_scores = topk_scores

        return finished_seq, finished_scores

示例#29

0

显示文件

    def forward(self):
        """Build the GATNE net.
        """
        param_attr_init = fluid.initializer.Uniform(
            low=-1.0, high=1.0, seed=np.random.randint(100))
        embed_param_attrs = fluid.ParamAttr(name='Base_node_embed',
                                            initializer=param_attr_init)

        # node_embeddings
        base_node_embed = fl.embedding(
            input=fl.reshape(self.train_inputs, shape=[-1, 1]),
            size=[self.num_nodes, self.embedding_size],
            param_attr=embed_param_attrs)

        node_features = []
        for edge_type in self.edge_types:
            param_attr_init = fluid.initializer.Uniform(
                low=-1.0, high=1.0, seed=np.random.randint(100))
            embed_param_attrs = fluid.ParamAttr(name='%s_node_embed' %
                                                edge_type,
                                                initializer=param_attr_init)

            features = fl.embedding(
                input=self.gw[edge_type].node_feat['index'],
                size=[self.num_nodes, self.embedding_u_size],
                param_attr=embed_param_attrs)

            node_features.append(features)

        # mp_output: list of embedding(self.num_nodes, dim)
        mp_output = self.message_passing(self.gw, self.edge_types,
                                         node_features)

        # U : (num_type[m], num_nodes, dim[s])
        node_type_embed = fl.stack(mp_output, axis=0)

        # U : (num_nodes, num_type[m], dim[s])
        node_type_embed = fl.transpose(node_type_embed, perm=[1, 0, 2])

        #gather node_type_embed from train_inputs
        node_type_embed = fl.gather(node_type_embed, self.train_inputs)

        # M_r
        trans_weights = fl.create_parameter(
            shape=[
                self.edge_type_count, self.embedding_u_size,
                self.embedding_size // self.att_head
            ],
            attr=fluid.initializer.TruncatedNormalInitializer(
                loc=0.0, scale=1.0 / math.sqrt(self.embedding_size)),
            dtype='float32',
            name='trans_w')

        # W_r
        trans_weights_s1 = fl.create_parameter(
            shape=[self.edge_type_count, self.embedding_u_size, self.dim_a],
            attr=fluid.initializer.TruncatedNormalInitializer(
                loc=0.0, scale=1.0 / math.sqrt(self.embedding_size)),
            dtype='float32',
            name='trans_w_s1')

        # w_r
        trans_weights_s2 = fl.create_parameter(
            shape=[self.edge_type_count, self.dim_a, self.att_head],
            attr=fluid.initializer.TruncatedNormalInitializer(
                loc=0.0, scale=1.0 / math.sqrt(self.embedding_size)),
            dtype='float32',
            name='trans_w_s2')

        trans_w = fl.gather(trans_weights, self.train_types)
        trans_w_s1 = fl.gather(trans_weights_s1, self.train_types)
        trans_w_s2 = fl.gather(trans_weights_s2, self.train_types)

        attention = self.attention(node_type_embed, trans_w_s1, trans_w_s2)
        node_type_embed = fl.matmul(attention, node_type_embed)
        node_embed = base_node_embed + fl.reshape(
            fl.matmul(node_type_embed, trans_w), [-1, self.embedding_size])

        self.last_node_embed = fl.l2_normalize(node_embed, axis=1)

        nce_weight_initializer = fluid.initializer.TruncatedNormalInitializer(
            loc=0.0, scale=1.0 / math.sqrt(self.embedding_size))
        nce_weight_attrs = fluid.ParamAttr(name='nce_weight',
                                           initializer=nce_weight_initializer)

        weight_pos = fl.embedding(input=self.train_labels,
                                  size=[self.num_nodes, self.embedding_size],
                                  param_attr=nce_weight_attrs)
        weight_neg = fl.embedding(input=self.train_negs,
                                  size=[self.num_nodes, self.embedding_size],
                                  param_attr=nce_weight_attrs)
        tmp_node_embed = fl.unsqueeze(self.last_node_embed, axes=[1])
        pos_logits = fl.matmul(tmp_node_embed, weight_pos,
                               transpose_y=True)  # [B, 1, 1]

        neg_logits = fl.matmul(tmp_node_embed, weight_neg,
                               transpose_y=True)  # [B, 1, neg_num]

        pos_score = fl.squeeze(pos_logits, axes=[1])
        pos_score = fl.clip(pos_score, min=-10, max=10)
        pos_score = -1.0 * fl.logsigmoid(pos_score)

        neg_score = fl.squeeze(neg_logits, axes=[1])
        neg_score = fl.clip(neg_score, min=-10, max=10)
        neg_score = -1.0 * fl.logsigmoid(-1.0 * neg_score)

        neg_score = fl.reduce_sum(neg_score, dim=1, keep_dim=True)
        self.loss = fl.reduce_mean(pos_score + neg_score)

示例#30

0

显示文件

    def plot_results(self, samples, outputs, targets):
        # samples: [batch_size, 3, H, W]
        samples = [sample.numpy() for sample in samples]

        target_sizes = L.stack([t["size"] for t in targets], 0)

        results = self.postprocessor(outputs, target_sizes)

        for i, item in enumerate(zip(samples, results, targets)):
            image, result, target = item
            image = np.transpose(image, (1, 2, 0))
            std = np.array([0.229, 0.224, 0.225])
            mean = np.array([0.485, 0.456, 0.406])
            image = (image * std + mean) * 255
            image = image.astype(np.uint8)[:, :, ::-1]  # RGB -> BGR
            targ_image = image.copy()
            pred_img = image.copy()

            colors = [
                (0, 0, 255),
                (0, 255, 0),
                (255, 0, 0),
                (255, 255, 0),
                (255, 0, 255),
                (0, 255, 255),
                (0, 0, 128),
                (0, 128, 0),
                (128, 0, 0),
                (128, 128, 0),
                (128, 0, 128),
                (0, 128, 128),
            ]
            rect_num = len(target["boxes"])
            colors = colors * math.ceil(rect_num / 12)

            h, w = target["size"].numpy()
            for i, item in enumerate(zip(target["labels"], target["boxes"])):
                l, box = item
                color = colors[i]
                box = L.unsqueeze(box, [0])
                box = box_cxcywh_to_xyxy(box)  # [1, 4]
                box = L.squeeze(box, [0])  # [4]
                box = (box.numpy() * np.array([w, h, w, h])).astype(np.int)
                left_top, bottom_down = (box[0], box[1]), (box[2], box[3])
                cv2.rectangle(targ_image, left_top, bottom_down, color, 2)
                l = l.numpy()[0]
                if isinstance(self.label_to_text, dict):
                    label_name = self.label_to_text.get(str(l), str(l))
                else:
                    if l < len(self.label_to_text):
                        label_name = self.label_to_text[l]
                    else:
                        label_name = str(l)

                cv2.putText(targ_image, label_name, left_top,
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)

            rect_num = len(result["labels"])
            colors = colors * math.ceil(rect_num / 12)
            for i, item in enumerate(
                    zip(result["scores"], result["labels"], result["boxes"])):
                s, l, box = item

                if l == self.background:
                    continue

                color = colors[i]
                left_top, bottom_down = (box[0], box[1]), (box[2], box[3])
                cv2.rectangle(pred_img, left_top, bottom_down, color, 2)

                if isinstance(self.label_to_text, dict):
                    label_name = self.label_to_text.get(str(l), str(l))
                else:
                    if l < len(self.label_to_text):
                        label_name = self.label_to_text[l]
                    else:
                        label_name = str(l)

                cv2.putText(pred_img, label_name + " [" + str(s)[:4] + "]",
                            left_top, cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)

            show_image = np.concatenate((targ_image, pred_img), 1)
            cv2.imwrite(
                os.path.join(self.output_dir,
                             str(self.index) + ".jpg"), show_image)
            self.index = (self.index + 1) % self.pool_size