Пример #1
0
def elmo_encoder(word_ids, elmo_l2_coef):
    """
    param:word_ids
    param:elmo_l2_coef
    """
    x_emb = layers.embedding(input=word_ids,
                             size=[vocab_size, emb_size],
                             dtype='float32',
                             is_sparse=False,
                             param_attr=fluid.ParamAttr(name='embedding_para'))

    x_emb_r = fluid.layers.sequence_reverse(x_emb, name=None)
    fw_hiddens, fw_hiddens_ori = encoder_wrapper(x_emb,
                                                 vocab_size,
                                                 emb_size,
                                                 para_name='fw_',
                                                 args=None)
    bw_hiddens, bw_hiddens_ori = encoder_wrapper(x_emb_r,
                                                 vocab_size,
                                                 emb_size,
                                                 para_name='bw_',
                                                 args=None)

    num_layers = len(fw_hiddens_ori)
    token_embeddings = layers.concat(input=[x_emb, x_emb], axis=1)
    token_embeddings.stop_gradient = True
    concate_embeddings = [token_embeddings]
    for index in range(num_layers):
        embedding = layers.concat(
            input=[fw_hiddens_ori[index], bw_hiddens_ori[index]], axis=1)
        embedding = dropout(embedding)
        embedding.stop_gradient = True
        concate_embeddings.append(embedding)
    weighted_emb = weight_layers(concate_embeddings, l2_coef=elmo_l2_coef)
    return weighted_emb
Пример #2
0
    def __split_heads_qkv(queries, keys, values, n_head, d_key, d_value):
        """
        Reshape input tensors at the last dimension to split multi-heads 
        and then transpose. Specifically, transform the input tensor with shape
        [bs, max_sequence_length, n_head * hidden_dim] to the output tensor
        with shape [bs, n_head, max_sequence_length, hidden_dim].
        """
        # The value 0 in shape attr means copying the corresponding dimension
        # size of the input as the output dimension size.
        reshaped_q = layers.reshape(x=queries,
                                    shape=[0, 0, n_head, d_key],
                                    inplace=True)
        # permuate the dimensions into:
        # [batch_size, n_head, max_sequence_len, hidden_size_per_head]
        q = layers.transpose(x=reshaped_q, perm=[0, 2, 1, 3])
        # For encoder-decoder attention in inference, insert the ops and vars
        # into global block to use as cache among beam search.
        reshape_layer = _wrap_layer_with_block(
            layers.reshape,
            fluid.default_main_program().current_block().parent_idx
        ) if cache is not None and static_kv else layers.reshape
        transpose_layer = _wrap_layer_with_block(
            layers.transpose,
            fluid.default_main_program().current_block().parent_idx
        ) if cache is not None and static_kv else layers.transpose
        reshaped_k = reshape_layer(x=keys,
                                   shape=[0, 0, n_head, d_key],
                                   inplace=True)
        k = transpose_layer(x=reshaped_k, perm=[0, 2, 1, 3])
        reshaped_v = reshape_layer(x=values,
                                   shape=[0, 0, n_head, d_value],
                                   inplace=True)
        v = transpose_layer(x=reshaped_v, perm=[0, 2, 1, 3])

        if cache is not None:  # only for faster inference
            if static_kv:  # For encoder-decoder attention in inference
                cache_k, cache_v = cache["static_k"], cache["static_v"]
                # To init the static_k and static_v in cache.
                # Maybe we can use condition_op(if_else) to do these at the first
                # step in while loop to replace these, however it might be less
                # efficient.
                static_cache_init = _wrap_layer_with_block(
                    layers.assign,
                    fluid.default_main_program().current_block().parent_idx)
                static_cache_init(k, cache_k)
                static_cache_init(v, cache_v)
            else:  # For decoder self-attention in inference
                cache_k, cache_v = cache["k"], cache["v"]
            # gather cell states corresponding to selected parent
            select_k = layers.gather(cache_k, index=gather_idx)
            select_v = layers.gather(cache_v, index=gather_idx)
            if not static_kv:
                # For self attention in inference, use cache and concat time steps.
                select_k = layers.concat([select_k, k], axis=2)
                select_v = layers.concat([select_v, v], axis=2)
            # update cell states(caches) cached in global block
            layers.assign(select_k, cache_k)
            layers.assign(select_v, cache_v)
            return q, select_k, select_v
        return q, k, v
Пример #3
0
def greedy_search_infilling(model,
                            q_ids,
                            q_sids,
                            sos_id,
                            eos_id,
                            attn_id,
                            max_encode_len=640,
                            max_decode_len=100,
                            tgt_type_id=3):
    model.eval()
    _, logits, info = model(q_ids, q_sids)
    gen_ids = L.argmax(logits, -1)
    d_batch, d_seqlen = q_ids.shape
    seqlen = L.reduce_sum(L.cast(q_ids != 0, 'int64'), 1, keep_dim=True)
    has_stopped = np.zeros([d_batch], dtype=np.bool)
    gen_seq_len = np.zeros([d_batch], dtype=np.int64)
    output_ids = []

    past_cache = info['caches']

    cls_ids = L.ones([d_batch], dtype='int64') * sos_id
    attn_ids = L.ones([d_batch], dtype='int64') * attn_id
    ids = L.stack([cls_ids, attn_ids], -1)
    for step in range(max_decode_len):
        bias = gen_bias(q_ids, ids, step)
        pos_ids = D.to_variable(
            np.tile(np.array([[step, step + 1]], dtype=np.int64),
                    [d_batch, 1]))
        pos_ids += seqlen
        _, logits, info = model(ids,
                                L.ones_like(ids) * tgt_type_id,
                                pos_ids=pos_ids,
                                attn_bias=bias,
                                past_cache=past_cache)
        gen_ids = L.argmax(logits, -1)

        past_cached_k, past_cached_v = past_cache
        cached_k, cached_v = info['caches']
        cached_k = [
            L.concat([pk, k[:, :1, :]], 1)
            for pk, k in zip(past_cached_k, cached_k)
        ]  # concat cached
        cached_v = [
            L.concat([pv, v[:, :1, :]], 1)
            for pv, v in zip(past_cached_v, cached_v)
        ]
        past_cache = (cached_k, cached_v)

        gen_ids = gen_ids[:, 1]
        ids = L.stack([gen_ids, attn_ids], 1)

        gen_ids = gen_ids.numpy()
        has_stopped |= (gen_ids == eos_id).astype(np.bool)
        gen_seq_len += (1 - has_stopped.astype(np.int64))
        output_ids.append(gen_ids.tolist())
        if has_stopped.all():
            break
    output_ids = np.array(output_ids).transpose([1, 0])
    return output_ids
Пример #4
0
    def bbox_ciou(self, boxes1_x0y0x1y1, boxes2_x0y0x1y1):
        '''
        计算ciou = iou - p2/c2 - av
        :param boxes1: (batch_size, num_priors, 4)   pred_x0y0x1y1
        :param boxes2: (batch_size, num_priors, 4)   label_x0y0x1y1
        :return:
        '''

        # 得到中心点坐标、宽高
        boxes1 = P.concat(
            [(boxes1_x0y0x1y1[:, :, :2] + boxes1_x0y0x1y1[:, :, 2:]) * 0.5,
             boxes1_x0y0x1y1[:, :, 2:] - boxes1_x0y0x1y1[:, :, :2]],
            axis=-1)
        boxes2 = P.concat(
            [(boxes2_x0y0x1y1[:, :, :2] + boxes2_x0y0x1y1[:, :, 2:]) * 0.5,
             boxes2_x0y0x1y1[:, :, 2:] - boxes2_x0y0x1y1[:, :, :2]],
            axis=-1)

        # 两个矩形的面积
        boxes1_area = (boxes1_x0y0x1y1[:, :, 2] - boxes1_x0y0x1y1[:, :, 0]) * (
            boxes1_x0y0x1y1[:, :, 3] - boxes1_x0y0x1y1[:, :, 1])
        boxes2_area = (boxes2_x0y0x1y1[:, :, 2] - boxes2_x0y0x1y1[:, :, 0]) * (
            boxes2_x0y0x1y1[:, :, 3] - boxes2_x0y0x1y1[:, :, 1])

        # 相交矩形的左上角坐标、右下角坐标
        left_up = P.elementwise_max(boxes1_x0y0x1y1[:, :, :2],
                                    boxes2_x0y0x1y1[:, :, :2])
        right_down = P.elementwise_min(boxes1_x0y0x1y1[:, :, 2:],
                                       boxes2_x0y0x1y1[:, :, 2:])

        # 相交矩形的面积inter_area。iou
        inter_section = P.relu(right_down - left_up)
        inter_area = inter_section[:, :, 0] * inter_section[:, :, 1]
        union_area = boxes1_area + boxes2_area - inter_area
        iou = inter_area / union_area

        # 包围矩形的左上角坐标、右下角坐标
        enclose_left_up = P.elementwise_min(boxes1_x0y0x1y1[:, :, :2],
                                            boxes2_x0y0x1y1[:, :, :2])
        enclose_right_down = P.elementwise_max(boxes1_x0y0x1y1[:, :, 2:],
                                               boxes2_x0y0x1y1[:, :, 2:])

        # 包围矩形的对角线的平方
        enclose_wh = enclose_right_down - enclose_left_up
        enclose_c2 = P.pow(enclose_wh[:, :, 0], 2) + P.pow(
            enclose_wh[:, :, 1], 2)

        # 两矩形中心点距离的平方
        p2 = P.pow(boxes1[:, :, 0] - boxes2[:, :, 0], 2) + P.pow(
            boxes1[:, :, 1] - boxes2[:, :, 1], 2)

        # 增加av。分母boxes2[:, :, 3]可能为0,所以加了极小的常数防止nan
        atan1 = P.atan(boxes1[:, :, 2] / (boxes1[:, :, 3] + 1e-9))
        atan2 = P.atan(boxes2[:, :, 2] / (boxes2[:, :, 3] + 1e-9))
        v = 4.0 * P.pow(atan1 - atan2, 2) / (math.pi**2)
        a = v / (1 - iou + v)

        ciou = iou - 1.0 * p2 / enclose_c2 - 1.0 * a * v
        return ciou
Пример #5
0
def std_gen_interpolate(batch_size=8, seed=None, out_path='data/out',
                        levels=None, interpolate_mode=0):
    default_levels = ("y;z0;z11;z12;z21;z22;z31;z32;z41;z42;z51;z52;z61;z62")
    if levels is None:
        levels = default_levels
    default_levels = default_levels.split(';')

    img_save_dir = os.path.join('/tmp', out_path+'.dir')
    os.system(f'rm -rf {img_save_dir}')
    os.system(f'mkdir {img_save_dir} -p')

    with dg.no_grad():
        model_cache.train_mode = False
        model_cache.initialized = False
        if seed is not None:
            rds.rng = np.random.RandomState(seed)
        elif rds.rng is None:
            rds.rng = np.random
        G = model_cache.G
        x_np = rds.rng.randn(batch_size,140).astype('float32')
        y_np = rds.rng.randint(0,1000,size=[batch_size]).astype('int64')
        x = dg.to_variable(x_np)
        y_cls = dg.to_variable(y_np)
        y_hot = layers.one_hot(layers.unsqueeze(y_cls,[1]), depth=1000)
        y_embed = G.embed_y(y_hot)
        x = layers.concat([x, x[:1]], 0)
        y_embed = layers.concat([y_embed, y_embed[:1]], 0)
        levels = levels.split(';')
        for level in default_levels:
            if len(level) == 1:
                locals()[level] = y_embed
                locals()['_'+level] = y_embed[:1]
            if len(level) >= 2:
                idx = int(level[1])*20
                locals()[level] = x[:,idx:idx+20]
                locals()['_'+level] = x[:1,idx:idx+20]
        imgs = []
        for i in range(batch_size):
            for j in range(40):
                alpha = j / 40
                if interpolate_mode == 1:
                    alpha = alpha**2 * (3 - 2 * alpha)
                for level in levels:
                    locals()['_'+level] = (1 - alpha) *  locals()[level][i:i+1] + alpha * locals()[level][i+1:i+2]
                inputs = []
                for level in default_levels[1:]:
                    inputs.append(locals()['_'+level])
                img_pd = G(inputs, locals()['_'+default_levels[0]], True)
                img = np.uint8(img_pd.numpy().clip(0,1)*255)[0].transpose([1,2,0])
                imgs.append(Image.fromarray(img))
                stdout.write(f'{i*40+j+1}/{40*batch_size}\r')
                stdout.flush()
        print('')
        for i, img in enumerate(imgs):
            img.save(os.path.join(img_save_dir, str(i).zfill(5)+'.png'))
        imgs[0].save(out_path+'.gif', save_all=True, append_images=imgs[1:], duration=40, loop=0)
        out_path = out_path + '.mp4'
        os.system(f'ffmpeg -r 40 -i {img_save_dir}/%05d.png -hide_banner -loglevel warning -nostats -c:v libx264 -crf 23 -y {out_path}')
        os.system(f'rm -rf {img_save_dir}')
Пример #6
0
    def __split_heads_qkv(queries, keys, values, n_head, d_key, d_value):
        """
        Reshape input tensors at the last dimension to split multi-heads
        and then transpose. Specifically, transform the input tensor with shape
        [bs, max_sequence_length, n_head * hidden_dim] to the output tensor
        with shape [bs, n_head, max_sequence_length, hidden_dim].
        """
        # The value 0 in shape attr means copying the corresponding dimension
        # size of the input as the output dimension size.
        reshaped_q = layers.reshape(x=queries,
                                    shape=[0, 0, n_head, d_key],
                                    inplace=True)
        # permuate the dimensions into:
        # [batch_size, n_head, max_sequence_len, hidden_size_per_head]
        q = layers.transpose(x=reshaped_q, perm=[0, 2, 1, 3])
        # For encoder-decoder attention in inference, insert the ops and vars
        # into global block to use as cache among beam search.
        reshape_layer = wrap_layer_with_block(
            layers.reshape,
            fluid.default_main_program().current_block().parent_idx
        ) if cache is not None and static_kv else layers.reshape
        transpose_layer = wrap_layer_with_block(
            layers.transpose,
            fluid.default_main_program().current_block().parent_idx
        ) if cache is not None and static_kv else layers.transpose
        reshaped_k = reshape_layer(x=keys,
                                   shape=[0, 0, n_head, d_key],
                                   inplace=True)
        k = transpose_layer(x=reshaped_k, perm=[0, 2, 1, 3])
        reshaped_v = reshape_layer(x=values,
                                   shape=[0, 0, n_head, d_value],
                                   inplace=True)
        v = transpose_layer(x=reshaped_v, perm=[0, 2, 1, 3])

        if cache is not None:  # only for faster inference
            cache_, i = cache
            if static_kv:  # For encoder-decoder attention in inference
                cache_k, cache_v = cache_["static_k"], cache_["static_v"]
                # To init the static_k and static_v in global block.
                static_cache_init = wrap_layer_with_block(
                    layers.assign,
                    fluid.default_main_program().current_block().parent_idx)
                static_cache_init(
                    k,
                    fluid.default_main_program().global_block().var(
                        "static_k_%d" % i))
                static_cache_init(
                    v,
                    fluid.default_main_program().global_block().var(
                        "static_v_%d" % i))
                k, v = cache_k, cache_v
            else:  # For decoder self-attention in inference
                # use cache and concat time steps.
                cache_k, cache_v = cache_["k"], cache_["v"]
                k = layers.concat([cache_k, k], axis=2)
                v = layers.concat([cache_v, v], axis=2)
                cache_["k"], cache_["v"] = (k, v)
        return q, k, v
Пример #7
0
def bbox_iou(boxes1, boxes2):
    '''
    预测框          boxes1 (?, grid_h, grid_w, 3,   1, 4),神经网络的输出(tx, ty, tw, th)经过了后处理求得的(bx, by, bw, bh)
    图片中所有的gt  boxes2 (?,      1,      1, 1, 150, 4)
    paddle里不支持省略号,boxes1_area = boxes1[..., 2] * boxes1[..., 3]
    冒号要写完
    '''
    boxes1_area = boxes1[:, :, :, :, :, 2] * boxes1[:, :, :, :, :,
                                                    3]  # 所有格子的3个预测框的面积
    boxes2_area = boxes2[:, :, :, :, :, 2] * boxes2[:, :, :, :, :,
                                                    3]  # 所有ground truth的面积

    # (x, y, w, h)变成(x0, y0, x1, y1)
    boxes1 = P.concat([
        boxes1[:, :, :, :, :, :2] - boxes1[:, :, :, :, :, 2:] * 0.5,
        boxes1[:, :, :, :, :, :2] + boxes1[:, :, :, :, :, 2:] * 0.5
    ],
                      axis=-1)
    boxes2 = P.concat([
        boxes2[:, :, :, :, :, :2] - boxes2[:, :, :, :, :, 2:] * 0.5,
        boxes2[:, :, :, :, :, :2] + boxes2[:, :, :, :, :, 2:] * 0.5
    ],
                      axis=-1)

    # 所有格子的3个预测框 分别 和  150个ground truth  计算iou。 所以left_up和right_down的shape = (?, grid_h, grid_w, 3, 150, 2)
    expand_boxes1 = P.expand(boxes1,
                             [1, 1, 1, 1, P.shape(boxes2)[4], 1
                              ])  # 不同于pytorch和tf,boxes1和boxes2都要扩展为相同shape
    expand_boxes2 = P.expand(
        boxes2,
        [1,
         P.shape(boxes1)[1],
         P.shape(boxes1)[2],
         P.shape(boxes1)[3], 1, 1])  # 不同于pytorch和tf,boxes1和boxes2都要扩展为相同shape
    left_up = P.elementwise_max(expand_boxes1[:, :, :, :, :, :2],
                                expand_boxes2[:, :, :, :, :, :2])  # 相交矩形的左上角坐标
    right_down = P.elementwise_min(expand_boxes1[:, :, :, :, :, 2:],
                                   expand_boxes2[:, :, :, :, :,
                                                 2:])  # 相交矩形的右下角坐标

    inter_section = P.relu(
        right_down -
        left_up)  # 相交矩形的w和h,是负数时取0  (?, grid_h, grid_w, 3, 150, 2)
    inter_area = inter_section[:, :, :, :, :,
                               0] * inter_section[:, :, :, :, :,
                                                  1]  # 相交矩形的面积              (?, grid_h, grid_w, 3, 150)
    expand_boxes1_area = P.expand(boxes1_area,
                                  [1, 1, 1, 1, P.shape(boxes2)[4]])
    expand_boxes2_area = P.expand(boxes2_area, [
        1,
        P.shape(expand_boxes1_area)[1],
        P.shape(expand_boxes1_area)[2],
        P.shape(expand_boxes1_area)[3], 1
    ])
    union_area = expand_boxes1_area + expand_boxes2_area - inter_area  # union_area                (?, grid_h, grid_w, 3, 150)
    iou = 1.0 * inter_area / union_area  # iou                       (?, grid_h, grid_w, 3, 150)

    return iou
Пример #8
0
 def forward(self, input_tensor, cur_state):
     h_cur = cur_state
     x_in = concat([input_tensor, h_cur], axis=1)
     update = sigmoid(self.update_gate(x_in))
     reset = sigmoid(self.reset_gate(x_in))
     x_out = tanh(
         self.out_gate(concat([input_tensor, h_cur * reset], axis=1)))
     h_new = h_cur * (1 - update) + x_out * update
     return h_new
Пример #9
0
def beam_search_step(state, logits, eos_id, beam_width, is_first_step,
                     length_penalty):
    """logits.shape == [B*W, V]"""
    _, vocab_size = logits.shape

    bsz, beam_width = state.log_probs.shape
    onehot_eos = L.cast(F.one_hot(L.ones([1], 'int64') * eos_id, vocab_size),
                        'int64')  #[1, V]

    probs = L.log(L.softmax(logits))  #[B*W, V]
    probs = mask_prob(probs, onehot_eos, state.finished)  #[B*W, V]
    allprobs = L.reshape(state.log_probs, [-1, 1]) + probs  #[B*W, V]

    not_finished = 1 - L.reshape(state.finished, [-1, 1])  #[B*W,1]
    not_eos = 1 - onehot_eos
    length_to_add = not_finished * not_eos  #[B*W,V]
    alllen = L.reshape(state.lengths, [-1, 1]) + length_to_add

    allprobs = L.reshape(allprobs, [-1, beam_width * vocab_size])
    alllen = L.reshape(alllen, [-1, beam_width * vocab_size])
    allscore = hyp_score(allprobs, alllen, length_penalty)
    if is_first_step:
        allscore = L.reshape(
            allscore,
            [bsz, beam_width, -1])[:, 0, :]  # first step only consiter beam 0
    scores, idx = L.topk(allscore, k=beam_width)  #[B, W]
    next_beam_id = idx // vocab_size  #[B, W]
    next_word_id = idx % vocab_size

    gather_idx = L.concat([L.where(idx != -1)[:, :1],
                           L.reshape(idx, [-1, 1])], 1)
    next_probs = L.reshape(L.gather_nd(allprobs, gather_idx), idx.shape)
    next_len = L.reshape(L.gather_nd(alllen, gather_idx), idx.shape)

    gather_idx = L.concat(
        [L.where(next_beam_id != -1)[:, :1],
         L.reshape(next_beam_id, [-1, 1])], 1)
    next_finished = L.reshape(
        L.gather_nd(state.finished, gather_idx), state.finished.shape
    )  #[gather new beam state according to new beam id]
    #log.debug(gather_idx.numpy())
    #log.debug(state.finished.numpy())
    #log.debug(next_finished.numpy())

    next_finished += L.cast(next_word_id == eos_id, 'int64')
    next_finished = L.cast(next_finished > 0, 'int64')

    #log.debug(next_word_id.numpy())
    #log.debug(next_beam_id.numpy())
    next_state = BeamSearchState(log_probs=next_probs,
                                 lengths=next_len,
                                 finished=next_finished)
    output = BeamSearchOutput(scores=scores,
                              predicted_ids=next_word_id,
                              beam_parent_ids=next_beam_id)

    return output, next_state
Пример #10
0
def beam_search_step(state, logits, eos_id, beam_width, is_first_step,
                     length_penalty):
    """logits.shape == [B*W, V]"""
    beam_size, vocab_size = logits.shape  # as batch size=1 in this hub module. the first dim means bsz * beam_size equals beam_size
    logits_np = logits.numpy()
    for i in range(beam_size):
        logits_np[i][17963] = 0  # make [UNK] prob = 0
    logits = D.to_variable(logits_np)

    bsz, beam_width = state.log_probs.shape
    onehot_eos = L.cast(F.one_hot(L.ones([1], 'int64') * eos_id, vocab_size),
                        'int64')  #[1, V]

    probs = L.log(L.softmax(logits))  #[B*W, V]
    probs = mask_prob(probs, onehot_eos, state.finished)  #[B*W, V]
    allprobs = L.reshape(state.log_probs, [-1, 1]) + probs  #[B*W, V]

    not_finished = 1 - L.reshape(state.finished, [-1, 1])  #[B*W,1]
    not_eos = 1 - onehot_eos
    length_to_add = not_finished * not_eos  #[B*W,V]
    alllen = L.reshape(state.lengths, [-1, 1]) + length_to_add

    allprobs = L.reshape(allprobs, [-1, beam_width * vocab_size])
    alllen = L.reshape(alllen, [-1, beam_width * vocab_size])
    allscore = hyp_score(allprobs, alllen, length_penalty)
    if is_first_step:
        allscore = L.reshape(
            allscore,
            [bsz, beam_width, -1])[:, 0, :]  # first step only consiter beam 0
    scores, idx = L.topk(allscore, k=beam_width)  #[B, W]
    next_beam_id = idx // vocab_size  #[B, W]
    next_word_id = idx % vocab_size

    gather_idx = L.concat([L.where(idx != -1)[:, :1],
                           L.reshape(idx, [-1, 1])], 1)
    next_probs = L.reshape(L.gather_nd(allprobs, gather_idx), idx.shape)
    next_len = L.reshape(L.gather_nd(alllen, gather_idx), idx.shape)

    gather_idx = L.concat(
        [L.where(next_beam_id != -1)[:, :1],
         L.reshape(next_beam_id, [-1, 1])], 1)
    next_finished = L.reshape(
        L.gather_nd(state.finished, gather_idx), state.finished.shape
    )  #[gather new beam state according to new beam id]

    next_finished += L.cast(next_word_id == eos_id, 'int64')
    next_finished = L.cast(next_finished > 0, 'int64')

    next_state = BeamSearchState(log_probs=next_probs,
                                 lengths=next_len,
                                 finished=next_finished)
    output = BeamSearchOutput(scores=scores,
                              predicted_ids=next_word_id,
                              beam_parent_ids=next_beam_id)

    return output, next_state
Пример #11
0
    def get_single_direction_output(rnn_input,
                                    encode_hidden,
                                    unit_list,
                                    mask=None,
                                    direc_index=0):
        rnn = StaticRNN()
        #print(rnn_input.shape)
        with rnn.step():
            step_input = rnn.step_input(rnn_input)

            if mask:
                step_mask = rnn.step_input(mask)

            for i in range(num_layers):
                if init_hidden:
                    pre_hidden = rnn.memory(init=init_hidden[i, direc_index])
                else:
                    pre_hidden = rnn.memory(batch_ref=rnn_input,
                                            shape=[-1, hidden_size],
                                            ref_batch_dim_idx=1)
                encode_h = encode_hidden[i]
                pre_encode_hidden = layers.concat([pre_hidden, encode_h], axis=1)
                new_hidden = unit_list[i](step_input, pre_encode_hidden)

                if mask:
                    new_hidden = layers.elementwise_mul(
                        new_hidden, step_mask, axis=0) - layers.elementwise_mul(
                        pre_hidden, (step_mask - 1), axis=0)
                rnn.update_memory(pre_hidden, new_hidden)

                rnn.step_output(new_hidden)

                step_input = new_hidden
                if dropout_prob is not None and dropout_prob > 0.0:
                    step_input = layers.dropout(step_input, dropout_prob=dropout_prob, )

            rnn.step_output(step_input)

        rnn_out = rnn()

        last_hidden_array = []
        all_hidden_array = []  # 增加这个来得到所有隐含状态
        rnn_output = rnn_out[-1]

        for i in range(num_layers):
            last_hidden = rnn_out[i]
            all_hidden_array.append(last_hidden)
            last_hidden = last_hidden[-1]
            last_hidden_array.append(last_hidden)

        all_hidden_array = layers.concat(all_hidden_array, axis=0)
        all_hidden_array = layers.reshape(all_hidden_array, shape=[num_layers, input.shape[0], -1, hidden_size])
        last_hidden_output = layers.concat(last_hidden_array, axis=0)
        last_hidden_output = layers.reshape(last_hidden_output, shape=[num_layers, -1, hidden_size])

        return rnn_output, last_hidden_output, all_hidden_array
Пример #12
0
    def flow_generation(self, label, ref_labels, ref_images, prev_labels,
                        prev_images, ref_idx):
        """
        Generates flows and masks for warping reference / previous images.

        Args:
            label (NxCxHxW): Target label map. 
            ref_labels (NxKxCxHxW): Reference label maps.
            ref_images (NxKx3xHxW): Reference images.
            prev_labels (NxTxCxHxW): Previous label maps.
            prev_images (NxTx3xHxW): Previous images.
            ref_idx (Nx1): index for which image to use from the reference images.

        Returns:
            - flow (list of Nx2xHxW): Optical flows.
            - occ_mask (list of Nx1xHxW): Occlusion masks.
            - img_warp (list of Nx3xHxW): Warped reference /previous images.
            - cond_inputs (list of Nx4xHxW): conditional inputs for SPADE combination
        """
        # Pick an image in the reference imagegs using ref_idx.
        ref_label, ref_image = pick_image([ref_labels, ref_images], ref_idx)

        # Only start using prev frames when enough prev frames are generated.
        has_prev = prev_labels is not None and prev_labels.shape[
            1] == self.num_frames_G - 1

        flow, occ_mask, img_warp, cond_inputs = [None] * 2, [None] * 2, [
            None
        ] * 2, [None] * 2

        if self.warp_ref:
            # Generate flows / masks for warping the reference image.
            flow_ref, occ_mask_ref = self.flow_network_ref(
                label, ref_label, ref_image)

            ref_image_warp = resample(ref_image, flow_ref)
            flow[0], occ_mask[0], img_warp[
                0] = flow_ref, occ_mask_ref, ref_image_warp[:, :3]

            # Concat warped image and occlusion mask to form the conditional input.
            cond_inputs[0] = L.concat([img_warp[0], occ_mask[0]], axis=1)

        if self.temporal_initialized and has_prev:
            # Generate flows / masks for warping the previous image.
            b, t, c, h, w = prev_labels.shape
            prev_labels_concat = L.reshape(prev_labels, (b, -1, h, w))
            prev_images_concat = L.reshape(prev_images, (b, -1, h, w))
            flow_prev, occ_mask_prev = self.flow_network_temp(
                label, prev_labels_concat, prev_images_concat)

            img_prev_warp = resample(prev_images[:, -1], flow_prev)
            flow[1], occ_mask[1], img_warp[
                1] = flow_prev, occ_mask_prev, img_prev_warp
            cond_inputs[1] = L.concat([img_warp[1], occ_mask[1]], axis=1)

        return flow, occ_mask, img_warp, cond_inputs
 def __call__(self, x):
     x_1 = x
     x_2 = self.max_pool1(x)
     x_3 = self.max_pool2(x)
     x_4 = self.max_pool3(x)
     if self.seq == 'desc':
         out = L.concat([x_4, x_3, x_2, x_1], axis=1)
     else:
         out = L.concat([x_1, x_2, x_3, x_4], axis=1)
     return out
Пример #14
0
 def _build_distribution(self, enc_final_state=None):
     enc_hidden = [
         layers.concat(state, axis=-1) for state in enc_final_state
     ]
     enc_hidden = layers.concat(enc_hidden, axis=-1)
     z_mean_log_var = layers.fc(input=enc_hidden,
                                size=self.latent_size * 2,
                                name='fc_dist')
     z_mean, z_log_var = layers.split(z_mean_log_var, 2, -1)
     return z_mean, z_log_var
Пример #15
0
    def build_model(self):
        node_features = self.graph_wrapper.node_feat["feat"]

        output = self.gcn(gw=self.graph_wrapper,
                          feature=node_features,
                          hidden_size=self.hidden_size,
                          activation="relu",
                          norm=self.graph_wrapper.node_feat["norm"],
                          name="gcn_layer_1")
        output1 = output
        output = self.gcn(gw=self.graph_wrapper,
                          feature=output,
                          hidden_size=self.hidden_size,
                          activation="relu",
                          norm=self.graph_wrapper.node_feat["norm"],
                          name="gcn_layer_2")
        output2 = output
        output = self.gcn(gw=self.graph_wrapper,
                          feature=output,
                          hidden_size=self.hidden_size,
                          activation="relu",
                          norm=self.graph_wrapper.node_feat["norm"],
                          name="gcn_layer_3")

        output = L.concat(input=[output1, output2, output], axis=-1)

        output, ratio_length = sag_pool(gw=self.graph_wrapper,
                                        feature=output,
                                        ratio=self.pooling_ratio,
                                        graph_id=self.graph_id,
                                        dataset=self.args.dataset_name,
                                        name="sag_pool_1")
        output = L.lod_reset(output, self.graph_wrapper.graph_lod)
        cat1 = L.sequence_pool(output, "sum")
        ratio_length = L.cast(ratio_length, dtype="float32")
        cat1 = L.elementwise_div(cat1, ratio_length, axis=-1)
        cat2 = L.sequence_pool(output, "max")
        output = L.concat(input=[cat2, cat1], axis=-1)

        output = L.fc(output, size=self.hidden_size, act="relu")
        output = L.dropout(output, dropout_prob=self.dropout_ratio)
        output = L.fc(output, size=self.hidden_size // 2, act="relu")
        output = L.fc(output,
                      size=self.num_classes,
                      act=None,
                      param_attr=fluid.ParamAttr(name="final_fc"))

        self.labels = L.cast(self.labels, dtype="float32")
        loss = L.sigmoid_cross_entropy_with_logits(x=output, label=self.labels)
        self.loss = L.mean(loss)
        pred = L.sigmoid(output)
        self.pred = L.argmax(x=pred, axis=-1)
        correct = L.equal(self.pred, self.labels_1dim)
        correct = L.cast(correct, dtype="int32")
        self.correct = L.reduce_sum(correct)
Пример #16
0
    def forward(self):
        """ forward
        """
        src, dst = L.read_file(self.pyreader)

        if self.is_sparse:
            # sparse mode use 2 dims input.
            src = L.reshape(src, [-1, 1])
            dst = L.reshape(dst, [-1, 1])

        src_embed = split_embedding(src, self.num_nodes, self.hidden_size,
                                    self.embed_init, "weight", self.num_part,
                                    self.is_sparse)

        dst_embed = split_embedding(dst, self.num_nodes, self.hidden_size,
                                    self.embed_init, "weight", self.num_part,
                                    self.is_sparse)

        if self.is_sparse:
            src_embed = L.reshape(src_embed,
                                  [-1, 1, self.num_featuers, self.hidden_size])
            dst_embed = L.reshape(
                dst_embed,
                [-1, self.neg_num + 1, self.num_featuers, self.hidden_size])

        src_embed = L.reduce_mean(src_embed, 2)
        dst_embed = L.reduce_mean(dst_embed, 2)

        logits = L.matmul(src_embed, dst_embed,
                          transpose_y=True)  # [batch_size, 1, neg_num+1]

        pos_label = L.fill_constant_batch_size_like(logits, [-1, 1, 1],
                                                    "float32", 1)
        neg_label = L.fill_constant_batch_size_like(logits,
                                                    [-1, 1, self.neg_num],
                                                    "float32", 0)
        label = L.concat([pos_label, neg_label], -1)

        pos_weight = L.fill_constant_batch_size_like(logits, [-1, 1, 1],
                                                     "float32", self.neg_num)
        neg_weight = L.fill_constant_batch_size_like(logits,
                                                     [-1, 1, self.neg_num],
                                                     "float32", 1)
        weight = L.concat([pos_weight, neg_weight], -1)

        weight.stop_gradient = True
        label.stop_gradient = True

        loss = L.sigmoid_cross_entropy_with_logits(logits, label)
        loss = loss * weight
        loss = L.reduce_mean(loss)
        loss = loss * ((self.neg_num + 1) / 2 / self.neg_num)
        loss.persistable = True
        self.loss = loss
        return loss
Пример #17
0
    def forward(self, input):

        x = self.DownBlock(input)

        gap = adaptive_pool2d(x, pool_size=[1, 1], pool_type='avg')

        gap_ = reshape(x=gap, shape=(x.shape[0], -1))

        gap_logit = self.gap_fc(gap_)

        gap_weight = self.gap_fc.parameters()[0]
        gap_weight = transpose(gap_weight, perm=[1, 0])
        gap_weight = unsqueeze(gap_weight, axes=2)
        gap_weight = unsqueeze(gap_weight, axes=3)

        gap = x * gap_weight

        gmp = adaptive_pool2d(x, pool_size=[1, 1], pool_type='max')

        gmp_ = reshape(x=gmp, shape=(x.shape[0], -1))

        gmp_logit = self.gmp_fc(gmp_)

        gmp_weight = self.gmp_fc.parameters()[0]
        gmp_weight = transpose(gmp_weight, perm=[1, 0])
        gmp_weight = unsqueeze(gmp_weight, axes=2)
        gmp_weight = unsqueeze(gmp_weight, axes=3)

        gmp = x * gmp_weight

        cam_logit = concat(input=[gap_logit, gmp_logit], axis=1)

        x = concat(input=[gap, gmp], axis=1)

        x = self.relu(self.conv1x1(x))

        heatmap = reduce_sum(x, dim=1, keep_dim=True)

        if self.light:
            x_ = adaptive_pool2d(x, pool_size=[1, 1], pool_type='avg')
            x_ = reshape(x=x_, shape=(x_.shape[0], -1))
            x_ = self.FC(x_)
        else:
            x_ = reshape(x, shape=(x.shape[0], -1))
            x_ = self.FC(x_)

        gamma, beta = self.gamma(x_), self.beta(x_)

        for i in range(self.n_blocks):
            x = getattr(self, 'UpBlock1_' + str(i + 1))(x, gamma, beta)
        out = self.UpBlock2(x)

        return out, cam_logit, heatmap
Пример #18
0
def gen_bias(encoder_inputs, decoder_inputs, step):
    decoder_bsz, decoder_seqlen = decoder_inputs.shape[:2]
    attn_bias = L.reshape(L.range(0, decoder_seqlen, 1, dtype='float32') + 1, [1, -1, 1])
    decoder_bias = L.cast((L.matmul(attn_bias, 1. / attn_bias, transpose_y=True) >= 1.),
                          'float32')  #[1, 1, decoderlen, decoderlen]
    encoder_bias = L.unsqueeze(L.cast(L.ones_like(encoder_inputs), 'float32'), [1])  #[bsz, 1, encoderlen]
    encoder_bias = L.expand(encoder_bias, [1, decoder_seqlen, 1])  #[bsz,decoderlen, encoderlen]
    decoder_bias = L.expand(decoder_bias, [decoder_bsz, 1, 1])  #[bsz, decoderlen, decoderlen]
    if step > 0:
        bias = L.concat([encoder_bias, L.ones([decoder_bsz, decoder_seqlen, step], 'float32'), decoder_bias], -1)
    else:
        bias = L.concat([encoder_bias, decoder_bias], -1)
    return bias
Пример #19
0
def rotation_z(theta):
    """
    :param theta: must be a scale, shape [1], 'float32'
    :return:
    """

    cos_value = cos(theta/2)
    sin_value = sin(theta/2)
    zero_pd = pp_zeros([1], "float32")
    rz_re = concat([cos_value, zero_pd, zero_pd, cos_value], axis=0)
    rz_im = concat([-sin_value, zero_pd, zero_pd, sin_value], axis=0)

    return ComplexVariable(reshape(rz_re, [2, 2]), reshape(rz_im, [2, 2]))
Пример #20
0
    def _ranking(self, inputs, predictions):
        """ Reranking generated responses. """
        src_token = inputs["src_token"]
        src_mask = inputs["src_mask"]
        src_pos = inputs["src_pos"]
        src_type = inputs["src_type"]
        src_turn = inputs["src_turn"]
        src_embed = self.embedder(src_token, src_pos, src_type, src_turn)

        batch_size, num_latent, tgt_seq_len = predictions.shape

        # shape: [batch_size, num_latent, seq_len, 1]
        preds_token = F.unsqueeze(predictions, [3])
        preds_mask = F.not_equal(preds_token, self.padding_idx, "int64")
        preds_pos = layers.range(0, tgt_seq_len, 1, dtype="float32")
        preds_pos = F.unsqueeze(preds_pos, [0, 0, 1])
        preds_pos = layers.expand(preds_pos, [batch_size, num_latent, 1, 1])
        preds_pos = layers.cast(preds_pos, "int64")
        preds_type = layers.zeros_like(preds_token)
        preds_turn = layers.zeros_like(preds_token)

        scores = []
        for i in range(num_latent):
            pred_token = preds_token[:, i]
            pred_mask = preds_mask[:, i]
            pred_pos = preds_pos[:, i]
            pred_type = preds_type[:, i]
            pred_turn = preds_turn[:, i]

            input_mask = layers.concat([src_mask, pred_mask], axis=1)
            input_mask.stop_gradient = True
            pred_embed = self.embedder(pred_token, pred_pos, pred_type,
                                       pred_turn)
            embed = layers.concat([src_embed, pred_embed], axis=1)
            embed = self.embed_layer_norm(embed)

            mask_embed = self.mask_embed
            mask_embed = layers.expand(mask_embed, [batch_size, 1, 1])
            mask_embed = self.embed_layer_norm(mask_embed)

            out = layers.concat([mask_embed, embed], axis=1)
            mask = self._create_mask(input_mask, append_head=True)

            for layer in self.layers:
                out = layer(out, mask, None)

            mask_embed = out[:, 0]
            score = self.discriminator(mask_embed)
            scores.append(score[:, 0])
        scores = layers.stack(scores, axis=1)
        return scores
def _attn_forward(self,
                  queries,
                  keys,
                  values,
                  attn_bias,
                  past_cache,
                  head_mask=None):
    assert len(queries.shape) == len(keys.shape) == len(values.shape) == 3

    q = self.q(queries)
    k = self.k(keys)
    v = self.v(values)

    cache = (k, v)
    if past_cache is not None:
        cached_k, cached_v = past_cache
        k = L.concat([cached_k, k], 1)
        v = L.concat([cached_v, v], 1)

    if hasattr(self.q, 'fn') and self.q.fn.cur_config['expand_ratio'] != None:
        n_head = int(self.n_head * self.q.fn.cur_config['expand_ratio'])
    else:
        n_head = self.n_head

    q = L.transpose(
        L.reshape(q, [0, 0, n_head, q.shape[-1] // n_head]),
        [0, 2, 1, 3])  #[batch, head, seq, dim]
    k = L.transpose(
        L.reshape(k, [0, 0, n_head, k.shape[-1] // n_head]),
        [0, 2, 1, 3])  #[batch, head, seq, dim]
    v = L.transpose(
        L.reshape(v, [0, 0, n_head, v.shape[-1] // n_head]),
        [0, 2, 1, 3])  #[batch, head, seq, dim]

    q = L.scale(q, scale=self.d_key**-0.5)
    score = L.matmul(q, k, transpose_y=True)
    if attn_bias is not None:
        score += attn_bias

    score = L.softmax(score, use_cudnn=True)
    score = self.dropout(score)
    if head_mask is not None:
        score = score * head_mask

    out = L.matmul(score, v)
    out = L.transpose(out, [0, 2, 1, 3])
    out = L.reshape(out, [0, 0, out.shape[2] * out.shape[3]])

    out = self.o(out)
    return out, cache
Пример #22
0
    def get_l2_norm_pow(params_grads, sum_dtype=None):
        sum_square_list = []
        sum_square_list_fp16 = []
        sum_square_list_fp32 = []
        for p, g in params_grads:
            if g is None:
                continue
            if getattr(p, 'need_clip', True) is False:
                continue
            merge_grad = g
            if g.type == core.VarDesc.VarType.SELECTED_ROWS:
                merge_grad = layers.merge_selected_rows(g)
                merge_grad = layers.get_tensor_from_selected_rows(merge_grad)
            sum_square = _squared_l2_norm(merge_grad)
            if sum_square.dtype == core.VarDesc.VarType.FP16:
                sum_square_list_fp16.append(sum_square)
            elif sum_square.dtype == core.VarDesc.VarType.FP32:
                sum_square_list_fp32.append(sum_square)
            else:
                sum_square_list.append(sum_square)

        # all parameters have been filterd out
        if len(sum_square_list) + len(sum_square_list_fp16) + len(
                sum_square_list_fp32) == 0:
            return None, None
        assert sum_dtype in ["float64", "float32", None], \
            "sum's type must be float64/ float32 / None"
        if sum_dtype != "float64":
            sum_dtype = 'float64' if len(sum_square_list) > 0 else "float32"

        global_norm_var = []
        if len(sum_square_list_fp16) > 0:
            global_norm_var_fp16 = layers.concat(sum_square_list_fp16)
            global_norm_var_fp16 = layers.reduce_sum(global_norm_var_fp16)
            global_norm_var.append(global_norm_var_fp16.astype(sum_dtype))
        if len(sum_square_list_fp32) > 0:
            global_norm_var_fp32 = layers.concat(sum_square_list_fp32)
            global_norm_var_fp32 = layers.reduce_sum(global_norm_var_fp32)
            if sum_dtype == 'float32':
                global_norm_var.append(global_norm_var_fp32)
            else:
                global_norm_var.append(global_norm_var_fp32.astype(sum_dtype))
        if len(sum_square_list) > 0:
            global_norm_var_fp64 = layers.concat(sum_square_list)
            global_norm_var_fp64 = layers.reduce_sum(global_norm_var_fp64)
            global_norm_var.append(global_norm_var_fp64)
        global_norm_var = layers.concat(global_norm_var)
        global_norm_var = layers.reduce_sum(global_norm_var)
        return global_norm_var, sum_dtype
Пример #23
0
    def forward(self, x):
        features = self.feature(x)
        x1 = self.stage1(features)
        x2 = L.concat([x1, features], 1)
        x2 = self.stage2(x2)
        x3 = L.concat([x2, features], 1)
        x3 = self.stage3(x3)
        x4 = L.concat([x3, features], 1)
        x4 = self.stage4(x4)
        x5 = L.concat([x4, features], 1)
        x5 = self.stage5(x5)
        x6 = L.concat([x5, features], 1)
        x6 = self.stage6(x6)

        return [x1, x2, x3, x4, x5, x6]
Пример #24
0
    def forward(self, gw):
        x = self._atom_encoder(gw)
        patch_repr = []
        for i in range(self.num_layers):
            e = self._bond_encoder(gw, name='l%d'%i)
            x = gin_layer(gw, x, e, 'gin_%s' % i)
            x = L.batch_norm(
                x, param_attr=F.ParamAttr(name='batchnorm_%s' % i))
            patch_repr.append(x)  # $h_i^{(k)}$

        patch_summary = L.concat(patch_repr, axis=1)  # $h_{\phi}^i$
        patch_pool = [pgl.layers.graph_pooling(gw, x, 'sum')
                      for x in patch_repr]
        global_repr = L.concat(patch_pool, axis=1)
        return global_repr, patch_summary
Пример #25
0
    def _decode(self,
                x,
                y,
                w,
                h,
                anchors,
                stride,
                scale_x_y,
                eps,
                is_gt=False):
        conv_shape = x.shape  # (8, 13, 13, 3)
        batch_size = conv_shape[0]
        n_grid = conv_shape[1]
        anchor_per_scale = conv_shape[3]

        _x = L.unsqueeze(x, 4)
        _y = L.unsqueeze(y, 4)
        conv_raw_dxdy = L.concat([_x, _y], -1)  # (8, 13, 13, 3, 2)
        _w = L.unsqueeze(w, 4)
        _h = L.unsqueeze(h, 4)
        conv_raw_dwdh = L.concat([_w, _h], -1)  # (8, 13, 13, 3, 2)

        rows = L.range(0, n_grid, 1, 'float32')
        cols = L.range(0, n_grid, 1, 'float32')
        rows = L.expand(L.reshape(rows, (1, -1, 1)), [n_grid, 1, 1])
        cols = L.expand(L.reshape(cols, (-1, 1, 1)), [1, n_grid, 1])
        offset = L.concat([rows, cols], axis=-1)
        offset = L.reshape(offset, (1, n_grid, n_grid, 1, 2))
        offset = L.expand(offset, [batch_size, 1, 1, anchor_per_scale, 1])

        if is_gt:
            decode_xy = (conv_raw_dxdy + offset) / n_grid
        else:
            if (abs(scale_x_y - 1.0) < eps):
                decode_xy = L.sigmoid(conv_raw_dxdy)
                decode_xy = (decode_xy + offset) / n_grid
            else:
                # Grid Sensitive
                decode_xy = scale_x_y * L.sigmoid(conv_raw_dxdy) - 0.5 * (
                    scale_x_y - 1.0)
                decode_xy = (decode_xy + offset) / n_grid
        anchor_t = fluid.layers.assign(np.copy(anchors).astype(np.float32))
        decode_wh = (L.exp(conv_raw_dwdh) * anchor_t) / (n_grid * stride)
        decode_xywh = L.concat([decode_xy, decode_wh], axis=-1)
        if is_gt:
            decode_xywh.stop_gradient = True

        return decode_xywh  # (8, 13, 13, 3, 4)
Пример #26
0
    def get_prediction(self, body_feats, im_size):
        """
        Get prediction result of YOLOv3 network

        Args:
            input (list): List of Variables, output of backbone stages
            im_size (Variable): Variable of size([h, w]) of each image

        Returns:
            pred (Variable): shape = [bs, keep_top_k, 6]

        """
        # outputs里为大中小感受野的输出
        outputs = self._get_outputs(body_feats)

        boxes = []
        scores = []
        for i, output in enumerate(outputs):
            if self.iou_aware:
                output = get_iou_aware_score(output,
                                             len(self.anchor_masks[i]),
                                             self.num_classes,
                                             self.iou_aware_factor)
            box, score = fluid.layers.yolo_box(
                x=output,
                img_size=im_size,
                anchors=self.mask_anchors[i],
                class_num=self.num_classes,
                conf_thresh=self.nms_cfg['score_threshold'],
                downsample_ratio=self.downsample[i],
                name="yolo_box" + str(i),
                clip_bbox=self.clip_bbox,
                scale_x_y=self.scale_x_y)
            boxes.append(box)
            scores.append(fluid.layers.transpose(score, perm=[0, 2, 1]))
        yolo_boxes = L.concat(boxes, axis=1)
        yolo_scores = L.concat(scores, axis=2)


        # nms
        nms_cfg = copy.deepcopy(self.nms_cfg)
        nms_type = nms_cfg.pop('nms_type')
        batch_size = 1
        if nms_type == 'matrix_nms':
            pred = fluid.layers.matrix_nms(yolo_boxes, yolo_scores, background_label=-1, **nms_cfg)
        elif nms_type == 'multiclass_nms':
            pred = fluid.layers.multiclass_nms(yolo_boxes, yolo_scores, background_label=-1, **nms_cfg)
        return pred
Пример #27
0
    def test_nce(self):
        window_size = 5
        words = []
        for i in range(window_size):
            words.append(
                layers.data(name='word_{0}'.format(i),
                            shape=[1],
                            dtype='int64'))

        dict_size = 10000
        label_word = int(window_size // 2) + 1

        embs = []
        for i in range(window_size):
            if i == label_word:
                continue

            emb = layers.embedding(input=words[i],
                                   size=[dict_size, 32],
                                   param_attr='emb.w',
                                   is_sparse=True)

            embs.append(emb)

        embs = layers.concat(input=embs, axis=1)
        loss = layers.nce(input=embs,
                          label=words[label_word],
                          num_total_classes=dict_size,
                          param_attr='nce.w',
                          bias_attr='nce.b')
        avg_loss = layers.mean(loss)
        self.assertIsNotNone(avg_loss)
        print(str(default_main_program()))
Пример #28
0
    def loss_boxes(self, outputs, targets, indices, num_boxes):
        """
        Compute the losses related to the bounding boxes, the L1 regression loss and the GIoU loss
        targets dicts must contain the key "boxes" containing a tensor of dim [nb_target_boxes, 4]
        The target boxes are expected in format (center_x, center_y, w, h), normalized by the image size.
        """
        assert "pred_boxes" in outputs
        idx = self._get_src_permutation_idx(indices)
        src_boxes = outputs["pred_boxes"].numpy()[
            idx[0].numpy(), idx[1].numpy(), :]  # [num_objects, 4]
        src_boxes = dg.to_variable(src_boxes)

        target_boxes = [
            t["boxes"].numpy()[i.numpy()]
            for t, (_, i) in zip(targets, indices)
        ]
        target_boxes = [dg.to_variable(t) for t in target_boxes]
        target_boxes = L.concat(target_boxes,
                                0).astype("float32")  # [num_objects, 4]
        loss_bbox = F.loss.l1_loss(src_boxes, target_boxes, reduction="sum")

        losses = {}
        losses["loss_bbox"] = loss_bbox / num_boxes

        num_boxes = src_boxes.shape[0]
        mask = T.creation.diag(dg.to_variable(
            np.ones(num_boxes)))  # mask out non-diag element
        loss_giou = (1 - box_ops.generalied_box_iou(
            box_ops.box_cxcywh_to_xyxy(src_boxes),
            box_ops.box_cxcywh_to_xyxy(target_boxes))) * mask
        losses["loss_giou"] = L.reduce_sum(loss_giou) / num_boxes
        return losses
Пример #29
0
def distributed_embedding(input,
                          dict_size,
                          hidden_size,
                          initializer,
                          name,
                          num_part=16,
                          is_sparse=False,
                          learning_rate=1.0):
    _part_size = hidden_size // num_part
    if hidden_size % num_part != 0:
        _part_size += 1
    output_embedding = []
    p_num = 0
    while hidden_size > 0:
        _part_size = min(_part_size, hidden_size)
        hidden_size -= _part_size
        print("part", p_num, "size=", (dict_size, _part_size))
        part_embedding = L.embedding(input=input,
                                     size=(dict_size, int(_part_size)),
                                     is_sparse=is_sparse,
                                     is_distributed=False,
                                     param_attr=F.ParamAttr(
                                         name=name + '_part%s' % p_num,
                                         initializer=initializer,
                                         learning_rate=learning_rate))
        p_num += 1
        output_embedding.append(part_embedding)
    return L.concat(output_embedding, -1)
Пример #30
0
    def test_nce(self):
        window_size = 5
        words = []
        for i in xrange(window_size):
            words.append(
                layers.data(
                    name='word_{0}'.format(i), shape=[1], dtype='int64'))

        dict_size = 10000
        label_word = int(window_size / 2) + 1

        embs = []
        for i in xrange(window_size):
            if i == label_word:
                continue

            emb = layers.embedding(
                input=words[i],
                size=[dict_size, 32],
                param_attr='emb.w',
                is_sparse=True)

            embs.append(emb)

        embs = layers.concat(input=embs, axis=1)
        loss = layers.nce(input=embs,
                          label=words[label_word],
                          num_total_classes=dict_size,
                          param_attr='nce.w',
                          bias_attr='nce.b')
        avg_loss = layers.mean(loss)
        self.assertIsNotNone(avg_loss)
        print(str(default_main_program()))
Пример #31
0
def get_activations(data_loader,
                    key_real,
                    key_fake,
                    generator=None,
                    sample_size=None,
                    preprocess=None):
    inception = build_inception()
    inception.eval()
    batch_y = []
    for it, data in enumerate(data_loader.batch_reader(2)()):
        if preprocess is not None:
            data = preprocess(data)
        if generator is None:
            images = data[key_real]
        else:
            net_G_output = generator(data)
            images = net_G_output[key_fake]
        # Clamp the image for models that do not set the output to between
        # -1, 1. For models that employ tanh, this has not effect.
        # images = L.clip(images, -1, 1)
        images = apply_image_net_normalization(images)
        images = nn.functional.interpolate(images,
                                           size=(299, 299),
                                           mode='bilinear',
                                           align_corners=True)
        y = inception(images)
        batch_y += [y]
    batch_y = L.concat(batch_y).numpy()
    if sample_size is not None:
        batch_y = batch_y[:sample_size]
    # print(batch_y.shape)
    return batch_y
def get_usr_combined_features():
    # FIXME(dzh) : old API integer_value(10) may have range check.
    # currently we don't have user configurated check.

    USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1

    uid = layers.data(name='user_id', shape=[1], dtype='int64')

    usr_emb = layers.embedding(
        input=uid,
        dtype='float32',
        size=[USR_DICT_SIZE, 32],
        param_attr='user_table',
        is_sparse=IS_SPARSE)

    usr_fc = layers.fc(input=usr_emb, size=32)

    USR_GENDER_DICT_SIZE = 2

    usr_gender_id = layers.data(name='gender_id', shape=[1], dtype='int64')

    usr_gender_emb = layers.embedding(
        input=usr_gender_id,
        size=[USR_GENDER_DICT_SIZE, 16],
        param_attr='gender_table',
        is_sparse=IS_SPARSE)

    usr_gender_fc = layers.fc(input=usr_gender_emb, size=16)

    USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table)
    usr_age_id = layers.data(name='age_id', shape=[1], dtype="int64")

    usr_age_emb = layers.embedding(
        input=usr_age_id,
        size=[USR_AGE_DICT_SIZE, 16],
        is_sparse=IS_SPARSE,
        param_attr='age_table')

    usr_age_fc = layers.fc(input=usr_age_emb, size=16)

    USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1
    usr_job_id = layers.data(name='job_id', shape=[1], dtype="int64")

    usr_job_emb = layers.embedding(
        input=usr_job_id,
        size=[USR_JOB_DICT_SIZE, 16],
        param_attr='job_table',
        is_sparse=IS_SPARSE)

    usr_job_fc = layers.fc(input=usr_job_emb, size=16)

    concat_embed = layers.concat(
        input=[usr_fc, usr_gender_fc, usr_age_fc, usr_job_fc], axis=1)

    usr_combined_features = layers.fc(input=concat_embed, size=200, act="tanh")

    return usr_combined_features
def get_mov_combined_features():

    MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1

    mov_id = layers.data(name='movie_id', shape=[1], dtype='int64')

    mov_emb = layers.embedding(
        input=mov_id,
        dtype='float32',
        size=[MOV_DICT_SIZE, 32],
        param_attr='movie_table',
        is_sparse=IS_SPARSE)

    mov_fc = layers.fc(input=mov_emb, size=32)

    CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories())

    category_id = layers.data(
        name='category_id', shape=[1], dtype='int64', lod_level=1)

    mov_categories_emb = layers.embedding(
        input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE)

    mov_categories_hidden = layers.sequence_pool(
        input=mov_categories_emb, pool_type="sum")

    MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict())

    mov_title_id = layers.data(
        name='movie_title', shape=[1], dtype='int64', lod_level=1)

    mov_title_emb = layers.embedding(
        input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE)

    mov_title_conv = nets.sequence_conv_pool(
        input=mov_title_emb,
        num_filters=32,
        filter_size=3,
        act="tanh",
        pool_type="sum")

    concat_embed = layers.concat(
        input=[mov_fc, mov_categories_hidden, mov_title_conv], axis=1)

    # FIXME(dzh) : need tanh operator
    mov_combined_features = layers.fc(input=concat_embed, size=200, act="tanh")

    return mov_combined_features
Пример #34
0
    def test_word_embedding(self):
        program = Program()
        with program_guard(program, startup_program=Program()):
            dict_size = 10000
            embed_size = 32
            first_word = layers.data(name='firstw', shape=[1], dtype='int64')
            second_word = layers.data(name='secondw', shape=[1], dtype='int64')
            third_word = layers.data(name='thirdw', shape=[1], dtype='int64')
            forth_word = layers.data(name='forthw', shape=[1], dtype='int64')
            next_word = layers.data(name='nextw', shape=[1], dtype='int64')

            embed_first = layers.embedding(
                input=first_word,
                size=[dict_size, embed_size],
                dtype='float32',
                param_attr='shared_w')
            embed_second = layers.embedding(
                input=second_word,
                size=[dict_size, embed_size],
                dtype='float32',
                param_attr='shared_w')

            embed_third = layers.embedding(
                input=third_word,
                size=[dict_size, embed_size],
                dtype='float32',
                param_attr='shared_w')
            embed_forth = layers.embedding(
                input=forth_word,
                size=[dict_size, embed_size],
                dtype='float32',
                param_attr='shared_w')

            concat_embed = layers.concat(
                input=[embed_first, embed_second, embed_third, embed_forth],
                axis=1)

            hidden1 = layers.fc(input=concat_embed, size=256, act='sigmoid')
            predict_word = layers.fc(input=hidden1,
                                     size=dict_size,
                                     act='softmax')
            cost = layers.cross_entropy(input=predict_word, label=next_word)
            avg_cost = layers.mean(cost)
            self.assertIsNotNone(avg_cost)

        print(str(program))