def rc_model(hidden_size, vocab, args): emb_shape = [vocab.size(), vocab.embed_dim] start_labels = layers.data(name="start_lables", shape=[1], dtype='float32', lod_level=1) end_labels = layers.data(name="end_lables", shape=[1], dtype='float32', lod_level=1) # stage 1:encode q_id0 = get_data('q_id0', 1, args) q_ids = get_data('q_ids', 2, args) p_ids_name = 'p_ids' p_ids = get_data('p_ids', 2, args) p_embs = embedding(p_ids, emb_shape, args) q_embs = embedding(q_ids, emb_shape, args) drnn = layers.DynamicRNN() with drnn.block(): p_emb = drnn.step_input(p_embs) q_emb = drnn.step_input(q_embs) p_enc = encoder(p_emb, 'p_enc', hidden_size, args) q_enc = encoder(q_emb, 'q_enc', hidden_size, args) # stage 2:match g_i = attn_flow(q_enc, p_enc, p_ids_name, args) # stage 3:fusion m_i = fusion(g_i, args) drnn.output(m_i, q_enc) ms, q_encs = drnn() p_vec = layers.lod_reset(x=ms, y=start_labels) q_vec = layers.lod_reset(x=q_encs, y=q_id0) # stage 4:decode start_probs, end_probs = point_network_decoder(p_vec=p_vec, q_vec=q_vec, hidden_size=hidden_size, args=args) cost0 = layers.sequence_pool( layers.cross_entropy(input=start_probs, label=start_labels, soft_label=True), 'sum') cost1 = layers.sequence_pool( layers.cross_entropy(input=end_probs, label=end_labels, soft_label=True), 'sum') cost0 = layers.mean(cost0) cost1 = layers.mean(cost1) cost = cost0 + cost1 cost.persistable = True feeding_list = ["q_ids", "start_lables", "end_lables", "p_ids", "q_id0"] return cost, start_probs, end_probs, ms, feeding_list
def attn_flow(q_enc, p_enc, p_ids_name, args): """Bidirectional Attention layer""" tag = p_ids_name + "__" drnn = layers.DynamicRNN() with drnn.block(): h_cur = drnn.step_input(p_enc) u_all = drnn.static_input(q_enc) h_expd = layers.sequence_expand(x=h_cur, y=u_all) s_t_mul = layers.elementwise_mul(x=u_all, y=h_expd, axis=0) s_t_sum = layers.reduce_sum(input=s_t_mul, dim=1, keep_dim=True) s_t_re = layers.reshape(s_t_sum, shape=[-1, 0]) s_t = layers.sequence_softmax(input=s_t_re) u_expr = layers.elementwise_mul(x=u_all, y=s_t, axis=0) u_expr = layers.sequence_pool(input=u_expr, pool_type='sum') b_t = layers.sequence_pool(input=s_t_sum, pool_type='max') drnn.output(u_expr, b_t) U_expr, b = drnn() b_norm = layers.sequence_softmax(input=b) h_expr = layers.elementwise_mul(x=p_enc, y=b_norm, axis=0) h_expr = layers.sequence_pool(input=h_expr, pool_type='sum') H_expr = layers.sequence_expand(x=h_expr, y=p_enc) H_expr = layers.lod_reset(x=H_expr, y=p_enc) h_u = layers.elementwise_mul(x=p_enc, y=U_expr, axis=0) h_h = layers.elementwise_mul(x=p_enc, y=H_expr, axis=0) g = layers.concat(input=[p_enc, U_expr, h_u, h_h], axis=1) return dropout(g, args)
def sequence_softmax(x, beta=None): """Compute sequence softmax over paddle LodTensor This function compute softmax normalization along with the length of sequence. This function is an extention of :code:`L.sequence_softmax` which can only deal with LodTensor whose last dimension is 1. Args: x: The input variable which is a LodTensor. beta: Inverse Temperature Return: Output of sequence_softmax """ if beta is not None: x = x * beta x_max = L.sequence_pool(x, "max") x_max = L.sequence_expand_as(x_max, x) x = x - x_max exp_x = L.exp(x) sum_exp_x = L.sequence_pool(exp_x, "sum") sum_exp_x = L.sequence_expand_as(sum_exp_x, exp_x) return exp_x / sum_exp_x
def build_model(self): node_features = self.graph_wrapper.node_feat["feat"] output = self.gcn(gw=self.graph_wrapper, feature=node_features, hidden_size=self.hidden_size, activation="relu", norm=self.graph_wrapper.node_feat["norm"], name="gcn_layer_1") output1 = output output = self.gcn(gw=self.graph_wrapper, feature=output, hidden_size=self.hidden_size, activation="relu", norm=self.graph_wrapper.node_feat["norm"], name="gcn_layer_2") output2 = output output = self.gcn(gw=self.graph_wrapper, feature=output, hidden_size=self.hidden_size, activation="relu", norm=self.graph_wrapper.node_feat["norm"], name="gcn_layer_3") output = L.concat(input=[output1, output2, output], axis=-1) output, ratio_length = sag_pool(gw=self.graph_wrapper, feature=output, ratio=self.pooling_ratio, graph_id=self.graph_id, dataset=self.args.dataset_name, name="sag_pool_1") output = L.lod_reset(output, self.graph_wrapper.graph_lod) cat1 = L.sequence_pool(output, "sum") ratio_length = L.cast(ratio_length, dtype="float32") cat1 = L.elementwise_div(cat1, ratio_length, axis=-1) cat2 = L.sequence_pool(output, "max") output = L.concat(input=[cat2, cat1], axis=-1) output = L.fc(output, size=self.hidden_size, act="relu") output = L.dropout(output, dropout_prob=self.dropout_ratio) output = L.fc(output, size=self.hidden_size // 2, act="relu") output = L.fc(output, size=self.num_classes, act=None, param_attr=fluid.ParamAttr(name="final_fc")) self.labels = L.cast(self.labels, dtype="float32") loss = L.sigmoid_cross_entropy_with_logits(x=output, label=self.labels) self.loss = L.mean(loss) pred = L.sigmoid(output) self.pred = L.argmax(x=pred, axis=-1) correct = L.equal(self.pred, self.labels_1dim) correct = L.cast(correct, dtype="int32") self.correct = L.reduce_sum(correct)
def __call__(self, msg): alpha = msg["alpha"] # lod-tensor (batch_size, num_heads) if attn_drop: old_h = alpha dropout = F.data(name='attn_drop', shape=[1], dtype="int64") u = L.uniform_random(shape=L.cast(L.shape(alpha)[:1], 'int64'), min=0., max=1.) keeped = L.cast(u > dropout, dtype="float32") self_attn_mask = L.scale(x=keeped, scale=10000.0, bias=-1.0, bias_after_scale=False) n_head_self_attn_mask = L.stack(x=[self_attn_mask] * num_heads, axis=1) n_head_self_attn_mask.stop_gradient = True alpha = n_head_self_attn_mask + alpha alpha = L.lod_reset(alpha, old_h) h = msg["v"] alpha = paddle_helper.sequence_softmax(alpha) self.alpha = alpha old_h = h h = h * alpha h = L.lod_reset(h, old_h) h = L.sequence_pool(h, "sum") if concat: h = L.reshape(h, [-1, num_heads * hidden_size]) else: h = L.reduce_mean(h, dim=1) return h
def custom_dynamic_rnn(p_vec, init_state, decoder_size): context = layers.fc(input=p_vec, size=decoder_size, act=None) drnn = layers.DynamicRNN() with drnn.block(): H_s = drnn.step_input(p_vec) ctx = drnn.static_input(context) c_prev = drnn.memory(init=init_state, need_reorder=True) m_prev = drnn.memory(init=init_state, need_reorder=True) m_prev1 = layers.fc(input=m_prev, size=decoder_size, act=None) m_prev1 = layers.sequence_expand(x=m_prev1, y=ctx) Fk = ctx + m_prev1 Fk = layers.fc(input=Fk, size=decoder_size, act='tanh') logits = layers.fc(input=Fk, size=1, act=None) scores = layers.sequence_softmax(input=logits) attn_ctx = layers.elementwise_mul(x=ctx, y=scores, axis=0) attn_ctx = layers.sequence_pool(input=attn_ctx, pool_type='sum') hidden_t, cell_t = lstm_step(attn_ctx, hidden_t_prev=m_prev1, cell_t_prev=c_prev, size=decoder_size) drnn.update_memory(ex_mem=m_prev, new_mem=hidden_t) drnn.update_memory(ex_mem=c_prev, new_mem=cell_t) drnn.output(scores) beta = drnn() return beta
def forward(self, is_test=False): """ Build the network. """ substruct_graph_wrapper = GraphWrapper( name="graph", node_feat=[('atom_type', [None, 1], "int64"), ('chirality_tag', [None, 1], "int64")], edge_feat=[('bond_type', [None, 1], "int64"), ('bond_direction', [None, 1], "int64")]) context_graph_wrapper = GraphWrapper( name="context_graph", node_feat=[('atom_type', [None, 1], "int64"), ('chirality_tag', [None, 1], "int64")], edge_feat=[('bond_type', [None, 1], "int64"), ('bond_direction', [None, 1], "int64")]) substruct_center_idx = layers.data(name="substruct_center_idx", shape=[-1, 1], dtype="int64") context_overlap_idx = layers.data(name="context_overlap_idx", shape=[-1, 1], dtype="int64") context_overlap_lod = layers.data(name="context_overlap_lod", shape=[1, -1], dtype="int32") context_cycle_index = layers.data(name="context_cycle_index", shape=[-1, 1], dtype="int64") substruct_node_repr = self.substruct_model.forward( substruct_graph_wrapper, is_test=is_test) substruct_repr = layers.gather(substruct_node_repr, substruct_center_idx) context_node_repr = self.context_model.forward(context_graph_wrapper, is_test=is_test) context_overlap_repr = layers.gather(context_node_repr, context_overlap_idx) context_repr = layers.sequence_pool( layers.lod_reset(context_overlap_repr, context_overlap_lod), self.context_pooling) neg_context_repr = layers.gather(context_repr, context_cycle_index) pred_pos = layers.reduce_sum(substruct_repr * context_repr, 1) pred_neg = layers.reduce_sum(substruct_repr * neg_context_repr, 1) label_pos = pred_pos * 0.0 + 1.0 label_pos.stop_gradient = True label_neg = pred_neg * 0.0 label_neg.stop_gradient = True loss = layers.sigmoid_cross_entropy_with_logits(x=pred_pos, label=label_pos) \ + layers.sigmoid_cross_entropy_with_logits(x=pred_neg, label=label_neg) loss = layers.reduce_mean(loss) self.substruct_graph_wrapper = substruct_graph_wrapper self.context_graph_wrapper = context_graph_wrapper self.loss = loss
def _build_net(self): self.pool1 = layers.sequence_pool(input=self.src_emb, pool_type="average") self.output = layers.fc(self.pool1, 2) self.output = layers.softmax(self.output) return self.output
def graphsage_sum(feature, gw, hidden_size, name, act): msg = gw.send(lambda s, d, e: s["h"], nfeat_list=[("h", feature)]) neigh_feature = gw.recv( msg, lambda feat: L.sequence_pool(feat, pool_type="sum")) hidden_size = hidden_size self_feature = linear(feature, hidden_size, name + "_l", act) neigh_feature = linear(neigh_feature, hidden_size, name + "_r", act) output = L.concat([self_feature, neigh_feature], axis=1) output = L.l2_normalize(output, axis=1) return output
def get_mov_combined_features(): MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1 mov_id = layers.data(name='movie_id', shape=[1], dtype='int64') mov_emb = layers.embedding(input=mov_id, dtype='float32', size=[MOV_DICT_SIZE, 32], param_attr='movie_table', is_sparse=IS_SPARSE) mov_fc = layers.fc(input=mov_emb, size=32) CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories()) category_id = layers.data(name='category_id', shape=[1], dtype='int64', lod_level=1) mov_categories_emb = layers.embedding(input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE) mov_categories_hidden = layers.sequence_pool(input=mov_categories_emb, pool_type="sum") MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict()) mov_title_id = layers.data(name='movie_title', shape=[1], dtype='int64', lod_level=1) mov_title_emb = layers.embedding(input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE) # 电影标题名称(title)是一个序列的整数,整数代表的是这个词在索引序列中的下标。 # 这个序列会被送入 sequence_conv_pool 层,这个层会在时间维度上使用卷积和池化。 # 因为如此,所以输出会是固定长度,尽管输入的序列长度各不相同。 mov_title_conv = nets.sequence_conv_pool(input=mov_title_emb, num_filters=32, filter_size=3, act="tanh", pool_type="sum") concat_embed = layers.concat( input=[mov_fc, mov_categories_hidden, mov_title_conv], axis=1) mov_combined_features = layers.fc(input=concat_embed, size=200, act="tanh") return mov_combined_features
def recv_func(message): # 每条边的终点的特征 dst_feat = message['dst_node_feat'] # 每条边的出发点的特征 src_feat = message['src_node_feat'] # 每个中心点自己的特征 x = L.sequence_pool(dst_feat, 'average') # 每个中心点的邻居的特征的平均值 z = L.sequence_pool(src_feat, 'average') # 计算 gate feat_gate = message['feat_gate'] g_max = L.sequence_pool(feat_gate, 'max') g = L.concat([x, g_max, z], axis=1) g = L.fc(g, heads, bias_attr=False, act="sigmoid") # softmax alpha = message['alpha'] alpha = paddle_helper.sequence_softmax(alpha) # E * M feat_value = message['feat_value'] # E * (M * D2) old = feat_value feat_value = L.reshape(feat_value, [-1, heads, hidden_size_v]) # E * M * D2 feat_value = L.elementwise_mul(feat_value, alpha, axis=0) feat_value = L.reshape(feat_value, [-1, heads * hidden_size_v]) # E * (M * D2) feat_value = L.lod_reset(feat_value, old) feat_value = L.sequence_pool(feat_value, 'sum') # N * (M * D2) feat_value = L.reshape(feat_value, [-1, heads, hidden_size_v]) # N * M * D2 output = L.elementwise_mul(feat_value, g, axis=0) output = L.reshape(output, [-1, heads * hidden_size_v]) # N * (M * D2) output = L.concat([x, output], axis=1) return output
def get_mov_combined_features(): MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1 mov_id = layers.data(name='movie_id', shape=[1], dtype='int64') mov_emb = layers.embedding(input=mov_id, dtype='float32', size=[MOV_DICT_SIZE, 32], param_attr='movie_table', is_sparse=IS_SPARSE) mov_fc = layers.fc(input=mov_emb, size=32) CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories()) category_id = layers.data(name='category_id', shape=[1], dtype='int64', lod_level=1) mov_categories_emb = layers.embedding(input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE) mov_categories_hidden = layers.sequence_pool(input=mov_categories_emb, pool_type="sum") MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict()) mov_title_id = layers.data(name='movie_title', shape=[1], dtype='int64', lod_level=1) mov_title_emb = layers.embedding(input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE) mov_title_conv = nets.sequence_conv_pool(input=mov_title_emb, num_filters=32, filter_size=3, act="tanh", pool_type="sum") concat_embed = layers.concat( input=[mov_fc, mov_categories_hidden, mov_title_conv], axis=1) # FIXME(dzh) : need tanh operator mov_combined_features = layers.fc(input=concat_embed, size=200, act="tanh") return mov_combined_features
def recv_func(message): nt = message["nt"] att = message["att"] h = message["h"] output_h = [] for i in range(2): mask = L.cast(nt == i, dtype="float32") rel_att = att[:, i:i+1] + ( 1 - mask ) * -10000 rel_att = paddle_helper.sequence_softmax(rel_att) rel_h = L.sequence_pool(h * rel_att * mask, "sum") output_h.append(rel_h) output_h = L.concat(output_h, -1) return output_h
def static_rnn(step, p_vec=p_vec, init_state=None, para_name='', args=args): tag = para_name + "static_rnn_" ctx = layers.fc( input=p_vec, param_attr=fluid.ParamAttr(name=tag + 'context_fc_w'), bias_attr=fluid.ParamAttr(name=tag + 'context_fc_b'), size=hidden_size, act=None) beta = [] c_prev = init_state m_prev = init_state for i in range(step): m_prev0 = layers.fc( input=m_prev, size=hidden_size, act=None, param_attr=fluid.ParamAttr(name=tag + 'm_prev0_fc_w'), bias_attr=fluid.ParamAttr(name=tag + 'm_prev0_fc_b')) m_prev1 = layers.sequence_expand(x=m_prev0, y=ctx) Fk = ctx + m_prev1 Fk = layers.tanh(Fk) logits = layers.fc( input=Fk, size=1, act=None, param_attr=fluid.ParamAttr(name=tag + 'logits_fc_w'), bias_attr=fluid.ParamAttr(name=tag + 'logits_fc_b')) scores = layers.sequence_softmax(input=logits) attn_ctx = layers.elementwise_mul(x=p_vec, y=scores, axis=0) attn_ctx = layers.sequence_pool(input=attn_ctx, pool_type='sum') hidden_t, cell_t = lstm_step( attn_ctx, hidden_t_prev=m_prev, cell_t_prev=c_prev, size=hidden_size, para_name=tag, args=args) m_prev = hidden_t c_prev = cell_t beta.append(scores) return beta
def recv_func(message): # feature of src and dst node on each edge dst_feat = message['dst_node_feat'] src_feat = message['src_node_feat'] # feature of center node x = L.sequence_pool(dst_feat, 'average') # feature of neighbors of center node z = L.sequence_pool(src_feat, 'average') # compute gate feat_gate = message['feat_gate'] g_max = L.sequence_pool(feat_gate, 'max') g = L.concat([x, g_max, z], axis=1) g = L.fc(g, heads, bias_attr=False, act="sigmoid") # softmax alpha = message['alpha'] alpha = paddle_helper.sequence_softmax(alpha) # E * M feat_value = message['feat_value'] # E * (M * D2) old = feat_value feat_value = L.reshape(feat_value, [-1, heads, hidden_size_v]) # E * M * D2 feat_value = L.elementwise_mul(feat_value, alpha, axis=0) feat_value = L.reshape(feat_value, [-1, heads*hidden_size_v]) # E * (M * D2) feat_value = L.lod_reset(feat_value, old) feat_value = L.sequence_pool(feat_value, 'sum') # N * (M * D2) feat_value = L.reshape(feat_value, [-1, heads, hidden_size_v]) # N * M * D2 output = L.elementwise_mul(feat_value, g, axis=0) output = L.reshape(output, [-1, heads * hidden_size_v]) # N * (M * D2) output = L.concat([x, output], axis=1) return output
def get_mov_combined_features(): MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1 mov_id = layers.data(name='movie_id', shape=[1], dtype='int64') mov_emb = layers.embedding( input=mov_id, dtype='float32', size=[MOV_DICT_SIZE, 32], param_attr='movie_table', is_sparse=IS_SPARSE) mov_fc = layers.fc(input=mov_emb, size=32) CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories()) category_id = layers.data( name='category_id', shape=[1], dtype='int64', lod_level=1) mov_categories_emb = layers.embedding( input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE) mov_categories_hidden = layers.sequence_pool( input=mov_categories_emb, pool_type="sum") MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict()) mov_title_id = layers.data( name='movie_title', shape=[1], dtype='int64', lod_level=1) mov_title_emb = layers.embedding( input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE) mov_title_conv = nets.sequence_conv_pool( input=mov_title_emb, num_filters=32, filter_size=3, act="tanh", pool_type="sum") concat_embed = layers.concat( input=[mov_fc, mov_categories_hidden, mov_title_conv], axis=1) # FIXME(dzh) : need tanh operator mov_combined_features = layers.fc(input=concat_embed, size=200, act="tanh") return mov_combined_features
def reduce_attention(msg): alpha = msg["alpha"] # lod-tensor (batch_size, seq_len, num_heads) h = msg["h"] alpha = paddle_helper.sequence_softmax(alpha) old_h = h h = L.reshape(h, [-1, num_heads, hidden_size]) alpha = L.reshape(alpha, [-1, num_heads, 1]) if attn_drop > 1e-15: alpha = L.dropout(alpha, dropout_prob=attn_drop, is_test=is_test, dropout_implementation="upscale_in_train") h = h * alpha h = L.reshape(h, [-1, num_heads * hidden_size]) h = L.lod_reset(h, old_h) return L.sequence_pool(h, "sum")
def recv_score_v_spmm(msg): score = msg["score"] score = paddle_helper.sequence_softmax(score) score = layers.dropout(score, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) score = L.reshape(score, [-1, n_head, 1]) _v = msg["value"] _new_v = L.reshape(_v, [-1, n_head, _v.shape[-1] // n_head]) _new_v = _new_v * score _new_v = L.reshape(_new_v, [-1, _v.shape[-1]]) _new_v = L.lod_reset(_new_v, _v) return L.sequence_pool(_new_v, "sum")
def graph_pooling(gw, node_feat, pool_type): """Implementation of graph pooling This is an implementation of graph pooling Args: gw: Graph wrapper object (:code:`StaticGraphWrapper` or :code:`GraphWrapper`) node_feat: A tensor with shape (num_nodes, feature_size). pool_type: The type of pooling ("sum", "average" , "min") Return: A tensor with shape (num_graph, hidden_size) """ graph_feat = op.nested_lod_reset(node_feat, gw.graph_lod) graph_feat = L.sequence_pool(graph_feat, pool_type) return graph_feat
def __call__(self, inputs, labels=None, mode=None): encoder_features = self.encoder(inputs) char_num = self.char_num word_vector_dim = self.word_vector_dim decoder_size = self.decoder_size if self.encoder_type == "reshape": encoder_input = encoder_features encoded_vector = encoder_features else: encoder_input = encoder_features[1] encoded_vector = layers.concat(encoder_features, axis=1) encoded_proj = layers.fc(input=encoded_vector, size=decoder_size, bias_attr=False, name="encoded_proj_fc") backward_first = layers.sequence_pool( input=encoder_input, pool_type='first') decoder_boot = layers.fc(input=backward_first, size=decoder_size, bias_attr=False, act="relu", name='decoder_boot') if mode == "train": label_in = labels['label_in'] label_out = labels['label_out'] label_in = layers.cast(x=label_in, dtype='int64') trg_embedding = layers.embedding( input=label_in, size=[char_num, word_vector_dim], dtype='float32') predict = self.gru_decoder_with_attention( trg_embedding, encoded_vector, encoded_proj, decoder_boot, decoder_size, char_num) _, decoded_out = layers.topk(input=predict, k=1) decoded_out = layers.lod_reset(decoded_out, y=label_out) predicts = {'predict': predict, 'decoded_out': decoded_out} else: ids = self.gru_attention_infer( decoder_boot, self.max_length, char_num, word_vector_dim, encoded_vector, encoded_proj, decoder_size) predicts = {'decoded_out': ids} return predicts
def simple_attention(self, encoder_vec, encoder_proj, decoder_state, decoder_size): decoder_state_proj = layers.fc(input=decoder_state, size=decoder_size, bias_attr=False, name="decoder_state_proj_fc") decoder_state_expand = layers.sequence_expand( x=decoder_state_proj, y=encoder_proj) concated = layers.elementwise_add(encoder_proj, decoder_state_expand) concated = layers.tanh(x=concated) attention_weights = layers.fc(input=concated, size=1, act=None, bias_attr=False, name="attention_weights_fc") attention_weights = layers.sequence_softmax(input=attention_weights) weigths_reshape = layers.reshape(x=attention_weights, shape=[-1]) scaled = layers.elementwise_mul( x=encoder_vec, y=weigths_reshape, axis=0) context = layers.sequence_pool(input=scaled, pool_type='sum') return context
def max_recv(feat): """tbd""" return layers.sequence_pool(feat, pool_type="max")
def sum_recv(feat): """tbd""" return layers.sequence_pool(feat, pool_type="sum")
def mean_recv(feat): """tbd""" return layers.sequence_pool(feat, pool_type="average")
def max_recv(feat): return L.sequence_pool(feat, pool_type="max")
def point_network_decoder(p_vec, q_vec, hidden_size, args): """Output layer - pointer network""" tag = 'pn_decoder_' init_random = fluid.initializer.Normal(loc=0.0, scale=1.0) random_attn = layers.create_parameter( shape=[1, hidden_size], dtype='float32', default_initializer=init_random) random_attn = layers.fc( input=random_attn, size=hidden_size, act=None, param_attr=fluid.ParamAttr(name=tag + 'random_attn_fc_w'), bias_attr=fluid.ParamAttr(name=tag + 'random_attn_fc_b')) random_attn = layers.reshape(random_attn, shape=[-1]) U = layers.fc(input=q_vec, param_attr=fluid.ParamAttr(name=tag + 'q_vec_fc_w'), bias_attr=False, size=hidden_size, act=None) + random_attn U = layers.tanh(U) logits = layers.fc(input=U, param_attr=fluid.ParamAttr(name=tag + 'logits_fc_w'), bias_attr=fluid.ParamAttr(name=tag + 'logits_fc_b'), size=1, act=None) scores = layers.sequence_softmax(input=logits) pooled_vec = layers.elementwise_mul(x=q_vec, y=scores, axis=0) pooled_vec = layers.sequence_pool(input=pooled_vec, pool_type='sum') init_state = layers.fc( input=pooled_vec, param_attr=fluid.ParamAttr(name=tag + 'init_state_fc_w'), bias_attr=fluid.ParamAttr(name=tag + 'init_state_fc_b'), size=hidden_size, act=None) def custom_dynamic_rnn(p_vec, init_state, hidden_size, para_name, args): tag = para_name + "custom_dynamic_rnn_" def static_rnn(step, p_vec=p_vec, init_state=None, para_name='', args=args): tag = para_name + "static_rnn_" ctx = layers.fc( input=p_vec, param_attr=fluid.ParamAttr(name=tag + 'context_fc_w'), bias_attr=fluid.ParamAttr(name=tag + 'context_fc_b'), size=hidden_size, act=None) beta = [] c_prev = init_state m_prev = init_state for i in range(step): m_prev0 = layers.fc( input=m_prev, size=hidden_size, act=None, param_attr=fluid.ParamAttr(name=tag + 'm_prev0_fc_w'), bias_attr=fluid.ParamAttr(name=tag + 'm_prev0_fc_b')) m_prev1 = layers.sequence_expand(x=m_prev0, y=ctx) Fk = ctx + m_prev1 Fk = layers.tanh(Fk) logits = layers.fc( input=Fk, size=1, act=None, param_attr=fluid.ParamAttr(name=tag + 'logits_fc_w'), bias_attr=fluid.ParamAttr(name=tag + 'logits_fc_b')) scores = layers.sequence_softmax(input=logits) attn_ctx = layers.elementwise_mul(x=p_vec, y=scores, axis=0) attn_ctx = layers.sequence_pool(input=attn_ctx, pool_type='sum') hidden_t, cell_t = lstm_step( attn_ctx, hidden_t_prev=m_prev, cell_t_prev=c_prev, size=hidden_size, para_name=tag, args=args) m_prev = hidden_t c_prev = cell_t beta.append(scores) return beta return static_rnn( 2, p_vec=p_vec, init_state=init_state, para_name=para_name) fw_outputs = custom_dynamic_rnn(p_vec, init_state, hidden_size, tag + "fw_", args) bw_outputs = custom_dynamic_rnn(p_vec, init_state, hidden_size, tag + "bw_", args) start_prob = layers.elementwise_add( x=fw_outputs[0], y=bw_outputs[1], axis=0) / 2 end_prob = layers.elementwise_add( x=fw_outputs[1], y=bw_outputs[0], axis=0) / 2 return start_prob, end_prob
from __future__ import print_function import paddle.fluid as fluid import paddle.fluid.layers as layers #################### # original program # #################### main_prog = fluid.Program() start_prog = fluid.Program() with fluid.program_guard(main_prog, start_prog): slot = fluid.data('slot', [-1, 1], dtype='int64', lod_level=1) label = fluid.data('label', [-1, 1]) emb = layers.embedding(slot, [4, 12], param_attr=fluid.ParamAttr(name="emb")) pool = layers.sequence_pool(emb, 'sum') fc = layers.fc(pool, 12, act='relu') logit = layers.fc(fc, 1) loss = layers.sigmoid_cross_entropy_with_logits(logit, label) exe = fluid.Executor(fluid.CUDAPlace(0)) # if no GPU is available, use statement below: #exe = fluid.Executor(fluid.CPUPlace()) # initialize all parameters exe.run(start_prog) fluid.io.save_persistables(exe, dirname="model", main_program=main_prog) # show all parameters in the original model param_names = {var.name for var in main_prog.list_vars() if var.persistable} print(param_names)
from __future__ import print_function import numpy as np import paddle.fluid as fluid import paddle.fluid.layers as layers slot = fluid.data('slot', [-1, 1], dtype='int64', lod_level=1) ones = layers.ones_like(slot) float_ones = layers.cast(ones, dtype='float32') value = layers.sequence_pool(float_ones, pool_type='sum') feed_list = { 'slot': fluid.create_lod_tensor(np.array([[0], [1], [2], [3], [4]], dtype='int64'), [[3, 2]], fluid.CPUPlace()) } fetch_list = [value] exe = fluid.Executor(fluid.CPUPlace()) result = exe.run(fluid.default_main_program(), feed=feed_list, fetch_list=fetch_list) print('sequence length:', result)
def mean_recv(feat): return L.sequence_pool(feat, pool_type="average")
def _is_backward_op(op, op_role_key): return op_role_key in op.attr_names and \ int(op.all_attrs()[op_role_key]) & int(OpRole.Backward) avgw_list = [] # 自定义Main Program和Start Program main_program = fluid.Program() start_program = fluid.Program() with fluid.program_guard(main_program, start_program): # 组网 slot = fluid.data('slot', [-1, 1], dtype='int64', lod_level=1) label = fluid.data('label', [-1, 1]) emb = layers.embedding(slot, [5, 12]) pool = layers.sequence_pool(emb, 'sum') logit = layers.fc(pool, 1) loss = layers.sigmoid_cross_entropy_with_logits(logit, label) avg_cost = layers.mean(loss) # 定义优化器 sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.01) sgd_optimizer.minimize(avg_cost) decay_var = layers.fill_constant(shape=[1], value=0.9, dtype='float32') rev_decay_var = layers.fill_constant(shape=[1], value=0.1, dtype='float32') block = main_program.global_block() op_maker = core.op_proto_and_checker_maker op_role_key = op_maker.kOpRoleAttrName() # "op_role" op_role_var_key = op_maker.kOpRoleVarAttrName() # "op_role_var"
def sum_recv(feat): return L.sequence_pool(feat, pool_type="sum")