Exemplo n.º 1
0
def attn_flow(q_enc, p_enc, p_ids_name, args):
    """Bidirectional Attention layer"""
    tag = p_ids_name + "__"
    drnn = layers.DynamicRNN()
    with drnn.block():
        h_cur = drnn.step_input(p_enc)
        u_all = drnn.static_input(q_enc)
        h_expd = layers.sequence_expand(x=h_cur, y=u_all)
        s_t_mul = layers.elementwise_mul(x=u_all, y=h_expd, axis=0)
        s_t_sum = layers.reduce_sum(input=s_t_mul, dim=1, keep_dim=True)
        s_t_re = layers.reshape(s_t_sum, shape=[-1, 0])
        s_t = layers.sequence_softmax(input=s_t_re)
        u_expr = layers.elementwise_mul(x=u_all, y=s_t, axis=0)
        u_expr = layers.sequence_pool(input=u_expr, pool_type='sum')

        b_t = layers.sequence_pool(input=s_t_sum, pool_type='max')
        drnn.output(u_expr, b_t)
    U_expr, b = drnn()
    b_norm = layers.sequence_softmax(input=b)
    h_expr = layers.elementwise_mul(x=p_enc, y=b_norm, axis=0)
    h_expr = layers.sequence_pool(input=h_expr, pool_type='sum')

    H_expr = layers.sequence_expand(x=h_expr, y=p_enc)
    H_expr = layers.lod_reset(x=H_expr, y=p_enc)
    h_u = layers.elementwise_mul(x=p_enc, y=U_expr, axis=0)
    h_h = layers.elementwise_mul(x=p_enc, y=H_expr, axis=0)

    g = layers.concat(input=[p_enc, U_expr, h_u, h_h], axis=1)
    return dropout(g, args)
Exemplo n.º 2
0
        def custom_dynamic_rnn(p_vec, init_state, decoder_size):
            context = layers.fc(input=p_vec,
			    size=decoder_size,
			    act=None)

	    drnn = layers.DynamicRNN()
	    with drnn.block():
		H_s = drnn.step_input(p_vec)
		ctx = drnn.static_input(context)

		c_prev = drnn.memory(init=init_state, need_reorder=True)
		m_prev = drnn.memory(init=init_state, need_reorder=True)
		m_prev1 = layers.fc(input=m_prev, size=decoder_size, act=None)
		m_prev1 = layers.sequence_expand(x=m_prev1, y=ctx)

		Fk = ctx + m_prev1
		Fk = layers.fc(input=Fk, size=decoder_size, act='tanh')
		logits = layers.fc(input=Fk, size=1, act=None)

		scores = layers.sequence_softmax(input=logits)
		attn_ctx = layers.elementwise_mul(x=ctx, y=scores, axis=0)
		attn_ctx = layers.sequence_pool(input=attn_ctx, pool_type='sum')
		hidden_t, cell_t = lstm_step(attn_ctx, hidden_t_prev=m_prev1, cell_t_prev=c_prev, size=decoder_size)

		drnn.update_memory(ex_mem=m_prev, new_mem=hidden_t)
		drnn.update_memory(ex_mem=c_prev, new_mem=cell_t)
      
		drnn.output(scores)
	    beta = drnn()
            return beta
Exemplo n.º 3
0
def exp_sequence_expand():
    # x = fluid.data(name='x', shape=[1], dtype='float32')
    # y = fluid.data(name='y', shape=[1], dtype='float32', lod_level=1)
    # out = layers.sequence_expand(x=x, y=y, ref_level=0)

    x = fluid.data(name='x', shape=[4, 1], dtype='float32')
    y = fluid.data(name='y', shape=[8, 1], dtype='float32', lod_level=1)
    out = layers.sequence_expand(x=x, y=y, ref_level=0)

    place = fluid.CPUPlace()
    exe = fluid.Executor(place)

    np_data = np.array([[1], [2], [3], [4]]).astype('float32')
    x_lod_tensor = fluid.create_lod_tensor(np_data, [[2, 2]], place)
    print(x_lod_tensor)

    y_lod_tensor = fluid.create_random_int_lodtensor([[2, 2], [3, 3, 1, 1]], [1],
                                                     place, low=0, high=1)
    y_lod_tensor2 = fluid.create_random_int_lodtensor([[2, 2], [3, 3, 1, 1]], [9, 16], place, low=0, high=1)
    print(y_lod_tensor)
    print(y_lod_tensor2)
    # lod: [[0, 2, 4][0, 3, 6, 7, 8]]
    #    dim: 8, 1
    #    layout: NCHW
    #    dtype: int64_t
    #    data: [0 0 1 1 1 1 1 0]

    out_main = exe.run(fluid.default_main_program(),
                       feed={'x': x_lod_tensor, 'y': y_lod_tensor},
                       fetch_list=[out], return_numpy=False)
    print(out_main[0])
Exemplo n.º 4
0
 def test_sequence_expand(self):
     program = Program()
     with program_guard(program):
         x = layers.data(name='x', shape=[10], dtype='float32')
         y = layers.data(
             name='y', shape=[10, 20], dtype='float32', lod_level=2)
         self.assertIsNotNone(layers.sequence_expand(x=x, y=y, ref_level=1))
     print(str(program))
Exemplo n.º 5
0
 def test_sequence_expand(self):
     program = Program()
     with program_guard(program):
         x = layers.data(name='x', shape=[10], dtype='float32')
         y = layers.data(
             name='y', shape=[10, 20], dtype='float32', lod_level=2)
         self.assertIsNotNone(layers.sequence_expand(x=x, y=y, ref_level=1))
     print(str(program))
Exemplo n.º 6
0
        def static_rnn(step,
                       p_vec=p_vec,
                       init_state=None,
                       para_name='',
                       args=args):
            tag = para_name + "static_rnn_"
            ctx = layers.fc(
                input=p_vec,
                param_attr=fluid.ParamAttr(name=tag + 'context_fc_w'),
                bias_attr=fluid.ParamAttr(name=tag + 'context_fc_b'),
                size=hidden_size,
                act=None)

            beta = []
            c_prev = init_state
            m_prev = init_state
            for i in range(step):
                m_prev0 = layers.fc(
                    input=m_prev,
                    size=hidden_size,
                    act=None,
                    param_attr=fluid.ParamAttr(name=tag + 'm_prev0_fc_w'),
                    bias_attr=fluid.ParamAttr(name=tag + 'm_prev0_fc_b'))
                m_prev1 = layers.sequence_expand(x=m_prev0, y=ctx)

                Fk = ctx + m_prev1
                Fk = layers.tanh(Fk)
                logits = layers.fc(
                    input=Fk,
                    size=1,
                    act=None,
                    param_attr=fluid.ParamAttr(name=tag + 'logits_fc_w'),
                    bias_attr=fluid.ParamAttr(name=tag + 'logits_fc_b'))

                scores = layers.sequence_softmax(input=logits)
                attn_ctx = layers.elementwise_mul(x=p_vec, y=scores, axis=0)
                attn_ctx = layers.sequence_pool(input=attn_ctx, pool_type='sum')

                hidden_t, cell_t = lstm_step(
                    attn_ctx,
                    hidden_t_prev=m_prev,
                    cell_t_prev=c_prev,
                    size=hidden_size,
                    para_name=tag,
                    args=args)
                m_prev = hidden_t
                c_prev = cell_t
                beta.append(scores)
            return beta
Exemplo n.º 7
0
 def simple_attention(self, encoder_vec, encoder_proj, decoder_state,
                      decoder_size):
     decoder_state_proj = layers.fc(input=decoder_state,
                                    size=decoder_size,
                                    bias_attr=False,
                                    name="decoder_state_proj_fc")
     decoder_state_expand = layers.sequence_expand(
         x=decoder_state_proj, y=encoder_proj)
     concated = layers.elementwise_add(encoder_proj, decoder_state_expand)
     concated = layers.tanh(x=concated)
     attention_weights = layers.fc(input=concated,
                                   size=1,
                                   act=None,
                                   bias_attr=False,
                                   name="attention_weights_fc")
     attention_weights = layers.sequence_softmax(input=attention_weights)
     weigths_reshape = layers.reshape(x=attention_weights, shape=[-1])
     scaled = layers.elementwise_mul(
         x=encoder_vec, y=weigths_reshape, axis=0)
     context = layers.sequence_pool(input=scaled, pool_type='sum')
     return context
def decoder_decode(context, is_sparse):
    init_state = context
    array_len = pd.fill_constant(shape=[1], dtype='int64', value=max_length)
    counter = pd.zeros(shape=[1], dtype='int64', force_cpu=True)

    # fill the first element with init_state
    state_array = pd.create_array('float32')
    pd.array_write(init_state, array=state_array, i=counter)

    # ids, scores as memory
    ids_array = pd.create_array('int64')
    scores_array = pd.create_array('float32')

    init_ids = pd.data(name="init_ids", shape=[1], dtype="int64", lod_level=2)
    init_scores = pd.data(name="init_scores",
                          shape=[1],
                          dtype="float32",
                          lod_level=2)

    pd.array_write(init_ids, array=ids_array, i=counter)
    pd.array_write(init_scores, array=scores_array, i=counter)

    cond = pd.less_than(x=counter, y=array_len)

    while_op = pd.While(cond=cond)
    with while_op.block():
        pre_ids = pd.array_read(array=ids_array, i=counter)
        pre_state = pd.array_read(array=state_array, i=counter)
        pre_score = pd.array_read(array=scores_array, i=counter)

        # expand the lod of pre_state to be the same with pre_score
        pre_state_expanded = pd.sequence_expand(pre_state, pre_score)

        pre_ids_emb = pd.embedding(input=pre_ids,
                                   size=[dict_size, word_dim],
                                   dtype='float32',
                                   is_sparse=is_sparse)

        # use rnn unit to update rnn
        current_state = pd.fc(input=[pre_state_expanded, pre_ids_emb],
                              size=decoder_size,
                              act='tanh')
        current_state_with_lod = pd.lod_reset(x=current_state, y=pre_score)
        # use score to do beam search
        current_score = pd.fc(input=current_state_with_lod,
                              size=target_dict_dim,
                              act='softmax')
        topk_scores, topk_indices = pd.topk(current_score, k=50)
        selected_ids, selected_scores = pd.beam_search(pre_ids,
                                                       topk_indices,
                                                       topk_scores,
                                                       beam_size,
                                                       end_id=10,
                                                       level=0)

        pd.increment(x=counter, value=1, in_place=True)

        # update the memories
        pd.array_write(current_state, array=state_array, i=counter)
        pd.array_write(selected_ids, array=ids_array, i=counter)
        pd.array_write(selected_scores, array=scores_array, i=counter)

        pd.less_than(x=counter, y=array_len, cond=cond)

    translation_ids, translation_scores = pd.beam_search_decode(
        ids=ids_array, scores=scores_array)

    # return init_ids, init_scores

    return translation_ids, translation_scores
def decoder_decode(context, is_sparse):
    init_state = context
    array_len = pd.fill_constant(shape=[1], dtype='int64', value=max_length)
    counter = pd.zeros(shape=[1], dtype='int64', force_cpu=True)

    # fill the first element with init_state
    state_array = pd.create_array('float32')
    pd.array_write(init_state, array=state_array, i=counter)

    # ids, scores as memory
    ids_array = pd.create_array('int64')
    scores_array = pd.create_array('float32')

    init_ids = pd.data(name="init_ids", shape=[1], dtype="int64", lod_level=2)
    init_scores = pd.data(
        name="init_scores", shape=[1], dtype="float32", lod_level=2)

    pd.array_write(init_ids, array=ids_array, i=counter)
    pd.array_write(init_scores, array=scores_array, i=counter)

    cond = pd.less_than(x=counter, y=array_len)

    while_op = pd.While(cond=cond)
    with while_op.block():
        pre_ids = pd.array_read(array=ids_array, i=counter)
        pre_state = pd.array_read(array=state_array, i=counter)
        pre_score = pd.array_read(array=scores_array, i=counter)

        # expand the recursive_sequence_lengths of pre_state to be the same with pre_score
        pre_state_expanded = pd.sequence_expand(pre_state, pre_score)

        pre_ids_emb = pd.embedding(
            input=pre_ids,
            size=[dict_size, word_dim],
            dtype='float32',
            is_sparse=is_sparse)

        # use rnn unit to update rnn
        current_state = pd.fc(input=[pre_state_expanded, pre_ids_emb],
                              size=decoder_size,
                              act='tanh')
        current_state_with_lod = pd.lod_reset(x=current_state, y=pre_score)
        # use score to do beam search
        current_score = pd.fc(input=current_state_with_lod,
                              size=target_dict_dim,
                              act='softmax')
        topk_scores, topk_indices = pd.topk(current_score, k=beam_size)
        # calculate accumulated scores after topk to reduce computation cost
        accu_scores = pd.elementwise_add(
            x=pd.log(topk_scores), y=pd.reshape(
                pre_score, shape=[-1]), axis=0)
        selected_ids, selected_scores = pd.beam_search(
            pre_ids,
            pre_score,
            topk_indices,
            accu_scores,
            beam_size,
            end_id=10,
            level=0)

        pd.increment(x=counter, value=1, in_place=True)

        # update the memories
        pd.array_write(current_state, array=state_array, i=counter)
        pd.array_write(selected_ids, array=ids_array, i=counter)
        pd.array_write(selected_scores, array=scores_array, i=counter)

        # update the break condition: up to the max length or all candidates of
        # source sentences have ended.
        length_cond = pd.less_than(x=counter, y=array_len)
        finish_cond = pd.logical_not(pd.is_empty(x=selected_ids))
        pd.logical_and(x=length_cond, y=finish_cond, out=cond)

    translation_ids, translation_scores = pd.beam_search_decode(
        ids=ids_array, scores=scores_array, beam_size=beam_size, end_id=10)

    # return init_ids, init_scores

    return translation_ids, translation_scores
Exemplo n.º 10
0
    def beam_search():
        max_len = layers.fill_constant(
            shape=[1], dtype=start_tokens.dtype, value=max_out_len)
        step_idx = layers.fill_constant(
            shape=[1], dtype=start_tokens.dtype, value=0)
        cond = layers.less_than(x=step_idx, y=max_len)
        while_op = layers.While(cond)
        # array states will be stored for each step.
        ids = layers.array_write(start_tokens, step_idx)
        scores = layers.array_write(init_scores, step_idx)
        # cell states will be overwrited at each step.
        # caches contains states of history steps to reduce redundant
        # computation in decoder.
        caches = [{
            "k": layers.fill_constant_batch_size_like(
                input=start_tokens,
                shape=[-1, 0, d_model],
                dtype=enc_output.dtype,
                value=0),
            "v": layers.fill_constant_batch_size_like(
                input=start_tokens,
                shape=[-1, 0, d_model],
                dtype=enc_output.dtype,
                value=0)
        } for i in range(n_layer)]
        with while_op.block():
            pre_ids = layers.array_read(array=ids, i=step_idx)
            pre_scores = layers.array_read(array=scores, i=step_idx)
            # sequence_expand can gather sequences according to lod thus can be
            # used in beam search to sift states corresponding to selected ids.
            pre_src_attn_bias = layers.sequence_expand(
                x=trg_src_attn_bias, y=pre_scores)
            pre_enc_output = layers.sequence_expand(x=enc_output, y=pre_scores)
            pre_caches = [{
                "k": layers.sequence_expand(
                    x=cache["k"], y=pre_scores),
                "v": layers.sequence_expand(
                    x=cache["v"], y=pre_scores),
            } for cache in caches]
            pre_pos = layers.elementwise_mul(
                x=layers.fill_constant_batch_size_like(
                    input=pre_enc_output,  # cann't use pre_ids here since it has lod
                    value=1,
                    shape=[-1, 1],
                    dtype=pre_ids.dtype),
                y=layers.increment(
                    x=step_idx, value=1.0, in_place=False),
                axis=0)
            logits = wrap_decoder(
                trg_vocab_size,
                max_in_len,
                n_layer,
                n_head,
                d_key,
                d_value,
                d_model,
                d_inner_hid,
                dropout_rate,
                weight_sharing,
                dec_inputs=(
                    pre_ids, pre_pos, None, pre_src_attn_bias, trg_data_shape,
                    slf_attn_pre_softmax_shape, slf_attn_post_softmax_shape,
                    src_attn_pre_softmax_shape, src_attn_post_softmax_shape),
                enc_output=pre_enc_output,
                caches=pre_caches)
            topk_scores, topk_indices = layers.topk(
                input=layers.softmax(logits), k=beam_size)
            accu_scores = layers.elementwise_add(
                x=layers.log(topk_scores),
                y=layers.reshape(
                    pre_scores, shape=[-1]),
                axis=0)
            # beam_search op uses lod to distinguish branches.
            topk_indices = layers.lod_reset(topk_indices, pre_ids)
            selected_ids, selected_scores = layers.beam_search(
                pre_ids=pre_ids,
                pre_scores=pre_scores,
                ids=topk_indices,
                scores=accu_scores,
                beam_size=beam_size,
                end_id=eos_idx)
            layers.increment(x=step_idx, value=1.0, in_place=True)
            # update states
            layers.array_write(selected_ids, i=step_idx, array=ids)
            layers.array_write(selected_scores, i=step_idx, array=scores)
            layers.assign(pre_src_attn_bias, trg_src_attn_bias)
            layers.assign(pre_enc_output, enc_output)
            for i in range(n_layer):
                layers.assign(pre_caches[i]["k"], caches[i]["k"])
                layers.assign(pre_caches[i]["v"], caches[i]["v"])
            layers.assign(
                layers.elementwise_add(
                    x=slf_attn_pre_softmax_shape,
                    y=attn_pre_softmax_shape_delta),
                slf_attn_pre_softmax_shape)
            layers.assign(
                layers.elementwise_add(
                    x=slf_attn_post_softmax_shape,
                    y=attn_post_softmax_shape_delta),
                slf_attn_post_softmax_shape)

            length_cond = layers.less_than(x=step_idx, y=max_len)
            finish_cond = layers.logical_not(layers.is_empty(x=selected_ids))
            layers.logical_and(x=length_cond, y=finish_cond, out=cond)

        finished_ids, finished_scores = layers.beam_search_decode(
            ids, scores, beam_size=beam_size, end_id=eos_idx)
        return finished_ids, finished_scores
Exemplo n.º 11
0
    def gru_attention_infer(self, decoder_boot, max_length, char_num,
                            word_vector_dim, encoded_vector, encoded_proj,
                            decoder_size):
        init_state = decoder_boot
        beam_size = 1
        array_len = layers.fill_constant(
            shape=[1], dtype='int64', value=max_length)
        counter = layers.zeros(shape=[1], dtype='int64', force_cpu=True)

        # fill the first element with init_state
        state_array = layers.create_array('float32')
        layers.array_write(init_state, array=state_array, i=counter)

        # ids, scores as memory
        ids_array = layers.create_array('int64')
        scores_array = layers.create_array('float32')
        rois_shape = layers.shape(init_state)
        batch_size = layers.slice(
            rois_shape, axes=[0], starts=[0], ends=[1]) + 1
        lod_level = layers.range(
            start=0, end=batch_size, step=1, dtype=batch_size.dtype)

        init_ids = layers.fill_constant_batch_size_like(
            input=init_state, shape=[-1, 1], value=0, dtype='int64')
        init_ids = layers.lod_reset(init_ids, lod_level)
        init_ids = layers.lod_append(init_ids, lod_level)

        init_scores = layers.fill_constant_batch_size_like(
            input=init_state, shape=[-1, 1], value=1, dtype='float32')
        init_scores = layers.lod_reset(init_scores, init_ids)
        layers.array_write(init_ids, array=ids_array, i=counter)
        layers.array_write(init_scores, array=scores_array, i=counter)

        full_ids = fluid.layers.fill_constant_batch_size_like(
            input=init_state, shape=[-1, 1], dtype='int64', value=1)

        cond = layers.less_than(x=counter, y=array_len)
        while_op = layers.While(cond=cond)
        with while_op.block():
            pre_ids = layers.array_read(array=ids_array, i=counter)
            pre_state = layers.array_read(array=state_array, i=counter)
            pre_score = layers.array_read(array=scores_array, i=counter)
            pre_ids_emb = layers.embedding(
                input=pre_ids,
                size=[char_num, word_vector_dim],
                dtype='float32')

            context = self.simple_attention(encoded_vector, encoded_proj,
                                            pre_state, decoder_size)

            # expand the recursive_sequence_lengths of pre_state 
            # to be the same with pre_score
            pre_state_expanded = layers.sequence_expand(pre_state, pre_score)
            context_expanded = layers.sequence_expand(context, pre_score)

            fc_1 = layers.fc(input=context_expanded,
                             size=decoder_size * 3,
                             bias_attr=False,
                             name="rnn_fc1")

            fc_2 = layers.fc(input=pre_ids_emb,
                             size=decoder_size * 3,
                             bias_attr=False,
                             name="rnn_fc2")

            decoder_inputs = fc_1 + fc_2
            current_state, _, _ = layers.gru_unit(
                input=decoder_inputs,
                hidden=pre_state_expanded,
                size=decoder_size * 3)
            current_state_with_lod = layers.lod_reset(
                x=current_state, y=pre_score)
            # use score to do beam search
            current_score = layers.fc(input=current_state_with_lod,
                                      size=char_num,
                                      bias_attr=True,
                                      act='softmax',
                                      name="rnn_out_fc")
            topk_scores, topk_indices = layers.topk(current_score, k=beam_size)

            new_ids = fluid.layers.concat([full_ids, topk_indices], axis=1)
            fluid.layers.assign(new_ids, full_ids)

            layers.increment(x=counter, value=1, in_place=True)

            # update the memories
            layers.array_write(current_state, array=state_array, i=counter)
            layers.array_write(topk_indices, array=ids_array, i=counter)
            layers.array_write(topk_scores, array=scores_array, i=counter)

            # update the break condition: 
            # up to the max length or all candidates of
            # source sentences have ended.
            length_cond = layers.less_than(x=counter, y=array_len)
            finish_cond = layers.logical_not(layers.is_empty(x=topk_indices))
            layers.logical_and(x=length_cond, y=finish_cond, out=cond)
        return full_ids
Exemplo n.º 12
0
    def attn_flow(q_enc, p_enc, p_ids_name):
        tag = p_ids_name + "::" 
	drnn = layers.DynamicRNN()
	with drnn.block():
	    h_cur = drnn.step_input(p_enc)
	    u_all = drnn.static_input(q_enc)
	    h_expd = layers.sequence_expand(x=h_cur, y=u_all)
	    s_t_ = layers.elementwise_mul(x=u_all, y=h_expd, axis=0)
	    s_t1 = layers.reduce_sum(input=s_t_, dim=1) 
	    s_t = layers.sequence_softmax(input=s_t1)
	    u_expr = layers.elementwise_mul(x=u_all, y=s_t, axis=0)
	    u_expr = layers.sequence_pool(input=u_expr, pool_type='sum') 
	    
     
	    if args.debug == True:
		'''
		layers.Print(h_expd, message='h_expd')
		layers.Print(h_cur, message='h_cur')
		layers.Print(u_all, message='u_all')
		layers.Print(s_t, message='s_t')
		layers.Print(s_t_, message='s_t_')
		layers.Print(u_expr, message='u_expr')
		'''
	    drnn.output(u_expr)
	    
	U_expr = drnn() 
	#'''
	drnn2 = layers.DynamicRNN()
	with drnn2.block():
	    h_cur = drnn2.step_input(p_enc)
	    u_all = drnn2.static_input(q_enc)
	    h_expd = layers.sequence_expand(x=h_cur, y=u_all)
	    s_t_ = layers.elementwise_mul(x=u_all, y=h_expd, axis=0)
	    s_t2 = layers.reduce_sum(input=s_t_, dim=1, keep_dim=True) 
	    b_t = layers.sequence_pool(input=s_t2, pool_type='max') 
	   
     
	    if args.debug == True:
		'''
		layers.Print(s_t2, message='s_t2')
		layers.Print(b_t, message='b_t')
		'''
	    drnn2.output(b_t)
	b = drnn2()
	b_norm = layers.sequence_softmax(input=b) 
	h_expr = layers.elementwise_mul(x=p_enc, y=b_norm, axis=0)
	h_expr = layers.sequence_pool(input=h_expr, pool_type='sum') 
	    

	H_expr = layers.sequence_expand(x=h_expr, y=p_enc)
	H_expr = layers.lod_reset(x=H_expr, y=p_enc) 
	h_u = layers.elementwise_mul(x=H_expr, y=U_expr, axis=0)
	h_h = layers.elementwise_mul(x=H_expr, y=p_enc, axis=0) 
	
	g = layers.concat(input=[H_expr, U_expr, h_u, h_h], axis = 1) 

        #fusion
	m = bi_lstm_encoder(input_seq=g, gate_size=embedding_dim) 
	if args.debug == True:
	    layers.Print(U_expr, message=tag + 'U_expr')
	    layers.Print(H_expr, message=tag + 'H_expr')
	    layers.Print(b, message=tag + 'b')
	    layers.Print(b_norm, message=tag + 'b_norm')
	    layers.Print(g, message=tag +'g')
	    layers.Print(m, message=tag + 'm')
	    layers.Print(h_h, message=tag + 'h_h')
	    layers.Print(q_enc, message=tag + 'q_enc')
	    layers.Print(p_enc, message=tag + 'p_enc')
       
        return m, g
Exemplo n.º 13
0
    def point_network_decoder(p_vec, q_vec, decoder_size):
        random_attn = layers.gaussian_random(shape=[1, decoder_size])
	random_attn = layers.sequence_expand(x=random_attn, y=q_vec)
        random_attn = layers.fc(input=random_attn, size=decoder_size, act=None)
        U = layers.fc(input=q_vec,
			    size=decoder_size,
			    act=None) + random_attn
        U = layers.tanh(U)
        
        logits = layers.fc(input=U,
			    size=1,
			    act=None)
        scores = layers.sequence_softmax(input=logits)
	pooled_vec = layers.elementwise_mul(x=q_vec, y=scores, axis=0)
	pooled_vec = layers.sequence_pool(input=pooled_vec, pool_type='sum')

        init_state = layers.fc(input=pooled_vec,
			    size=decoder_size,
			    act=None)

        def custom_dynamic_rnn(p_vec, init_state, decoder_size):
            context = layers.fc(input=p_vec,
			    size=decoder_size,
			    act=None)

	    drnn = layers.DynamicRNN()
	    with drnn.block():
		H_s = drnn.step_input(p_vec)
		ctx = drnn.static_input(context)

		c_prev = drnn.memory(init=init_state, need_reorder=True)
		m_prev = drnn.memory(init=init_state, need_reorder=True)
		m_prev1 = layers.fc(input=m_prev, size=decoder_size, act=None)
		m_prev1 = layers.sequence_expand(x=m_prev1, y=ctx)

		Fk = ctx + m_prev1
		Fk = layers.fc(input=Fk, size=decoder_size, act='tanh')
		logits = layers.fc(input=Fk, size=1, act=None)

		scores = layers.sequence_softmax(input=logits)
		attn_ctx = layers.elementwise_mul(x=ctx, y=scores, axis=0)
		attn_ctx = layers.sequence_pool(input=attn_ctx, pool_type='sum')
		hidden_t, cell_t = lstm_step(attn_ctx, hidden_t_prev=m_prev1, cell_t_prev=c_prev, size=decoder_size)

		drnn.update_memory(ex_mem=m_prev, new_mem=hidden_t)
		drnn.update_memory(ex_mem=c_prev, new_mem=cell_t)
      
		drnn.output(scores)
	    beta = drnn()
            return beta

        fw_outputs = custom_dynamic_rnn(p_vec, init_state, decoder_size) 
        bw_outputs = custom_dynamic_rnn(p_vec, init_state, decoder_size)
       
        def sequence_slice(x, index):
            #offset = layers.fill_constant(shape=[1, args.batch_size], value=index, dtype='float32')
            #length = layers.fill_constant(shape=[1, args.batch_size], value=1, dtype='float32')
            #return layers.sequence_slice(x, offset, length)
            idx = layers.fill_constant(shape=[1], value=1, dtype='int32')
            idx.stop_gradient = True
            from paddle.fluid.layers.control_flow import lod_rank_table 
            from paddle.fluid.layers.control_flow import lod_tensor_to_array 
            from paddle.fluid.layers.control_flow import array_read 
            from paddle.fluid.layers.control_flow import array_to_lod_tensor 
            table = lod_rank_table(x, level=0)
            table.stop_gradient = True
            array = lod_tensor_to_array(x, table)
            slice_array = array_read(array=array, i=idx)
            return array_to_lod_tensor(slice_array, table)
        
        start_prob = layers.elementwise_mul(x=sequence_slice(fw_outputs, 0), y=sequence_slice(bw_outputs, 1), axis=0) / 2
        end_prob = layers.elementwise_mul(x=sequence_slice(fw_outputs, 1), y=sequence_slice(bw_outputs, 0), axis=0) / 2
        return start_prob, end_prob
Exemplo n.º 14
0
def decode(context, is_sparse):
    init_state = context
    array_len = pd.fill_constant(shape=[1], dtype='int64', value=max_length)
    counter = pd.zeros(shape=[1], dtype='int64', force_cpu=True)

    # fill the first element with init_state
    state_array = pd.create_array('float32')
    pd.array_write(init_state, array=state_array, i=counter)

    # ids, scores as memory
    ids_array = pd.create_array('int64')
    scores_array = pd.create_array('float32')

    init_ids = pd.data(name="init_ids", shape=[1], dtype="int64", lod_level=2)
    init_scores = pd.data(
        name="init_scores", shape=[1], dtype="float32", lod_level=2)

    pd.array_write(init_ids, array=ids_array, i=counter)
    pd.array_write(init_scores, array=scores_array, i=counter)

    cond = pd.less_than(x=counter, y=array_len)

    while_op = pd.While(cond=cond)
    with while_op.block():
        pre_ids = pd.array_read(array=ids_array, i=counter)
        pre_state = pd.array_read(array=state_array, i=counter)
        pre_score = pd.array_read(array=scores_array, i=counter)

        # expand the lod of pre_state to be the same with pre_score
        pre_state_expanded = pd.sequence_expand(pre_state, pre_score)

        pre_ids_emb = pd.embedding(
            input=pre_ids,
            size=[dict_size, word_dim],
            dtype='float32',
            is_sparse=is_sparse)

        # use rnn unit to update rnn
        current_state = pd.fc(input=[pre_state_expanded, pre_ids_emb],
                              size=decoder_size,
                              act='tanh')
        current_state_with_lod = pd.lod_reset(x=current_state, y=pre_score)
        # use score to do beam search
        current_score = pd.fc(input=current_state_with_lod,
                              size=target_dict_dim,
                              act='softmax')
        topk_scores, topk_indices = pd.topk(current_score, k=topk_size)
        selected_ids, selected_scores = pd.beam_search(
            pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0)

        pd.increment(x=counter, value=1, in_place=True)

        # update the memories
        pd.array_write(current_state, array=state_array, i=counter)
        pd.array_write(selected_ids, array=ids_array, i=counter)
        pd.array_write(selected_scores, array=scores_array, i=counter)

        pd.less_than(x=counter, y=array_len, cond=cond)

    translation_ids, translation_scores = pd.beam_search_decode(
        ids=ids_array, scores=scores_array)

    # return init_ids, init_scores

    return translation_ids, translation_scores