def inference_program(words, is_sparse): embed_first = fluid.embedding(input=words[0], size=[dict_size, EMBED_SIZE], dtype='float32', is_sparse=is_sparse, param_attr='shared_w') embed_second = fluid.embedding(input=words[1], size=[dict_size, EMBED_SIZE], dtype='float32', is_sparse=is_sparse, param_attr='shared_w') embed_third = fluid.embedding(input=words[2], size=[dict_size, EMBED_SIZE], dtype='float32', is_sparse=is_sparse, param_attr='shared_w') embed_fourth = fluid.embedding(input=words[3], size=[dict_size, EMBED_SIZE], dtype='float32', is_sparse=is_sparse, param_attr='shared_w') concat_embed = fluid.layers.concat( input=[embed_first, embed_second, embed_third, embed_fourth], axis=1) hidden1 = fluid.layers.fc(input=concat_embed, size=HIDDEN_SIZE, act='sigmoid') predict_word = fluid.layers.fc(input=hidden1, size=dict_size, act='softmax') return predict_word
def sparse_fm_layer(input, emb_dict_size, factor_size, fm_param_attr): """ sparse_fm_layer """ first_embeddings = fluid.embedding( input=input, dtype='float32', size=[emb_dict_size, 1], is_sparse=True) first_embeddings = fluid.layers.squeeze( input=first_embeddings, axes=[1]) first_order = fluid.layers.sequence_pool( input=first_embeddings, pool_type='sum') nonzero_embeddings = fluid.embedding( input=input, dtype='float32', size=[emb_dict_size, factor_size], param_attr=fm_param_attr, is_sparse=True) nonzero_embeddings = fluid.layers.squeeze( input=nonzero_embeddings, axes=[1]) summed_features_emb = fluid.layers.sequence_pool( input=nonzero_embeddings, pool_type='sum') summed_features_emb_square = fluid.layers.square(summed_features_emb) squared_features_emb = fluid.layers.square(nonzero_embeddings) squared_sum_features_emb = fluid.layers.sequence_pool( input=squared_features_emb, pool_type='sum') second_order = 0.5 * ( summed_features_emb_square - squared_sum_features_emb) return first_order, second_order
def prepare_encoder_decoder(src_word, src_pos, src_vocab_size, src_emb_dim, src_max_len, dropout_rate=0., bos_idx=0, word_emb_param_name=None, pos_enc_param_name=None): """Add word embeddings and position encodings. The output tensor has a shape of: [batch_size, max_src_length_in_batch, d_model]. This module is used at the bottom of the encoder stacks. """ src_word_emb = fluid.embedding( src_word, size=[src_vocab_size, src_emb_dim], padding_idx=bos_idx, # set embedding of bos to 0 param_attr=fluid.ParamAttr(name=word_emb_param_name, initializer=fluid.initializer.Normal( 0., src_emb_dim**-0.5))) src_word_emb = layers.scale(x=src_word_emb, scale=src_emb_dim**0.5) src_pos_enc = fluid.embedding( src_pos, size=[src_max_len, src_emb_dim], param_attr=fluid.ParamAttr(name=pos_enc_param_name, trainable=False)) src_pos_enc.stop_gradient = True enc_input = src_word_emb + src_pos_enc return layers.dropout( enc_input, dropout_prob=dropout_rate, seed=dropout_seed, is_test=False) if dropout_rate else enc_input
def _infer_net(self, inputs): user_data = inputs[0] all_item_data = inputs[1] pos_label = inputs[2] user_emb = fluid.embedding(input=user_data, size=[self.vocab_size, self.emb_dim], param_attr="emb.item") all_item_emb = fluid.embedding(input=all_item_data, size=[self.vocab_size, self.emb_dim], param_attr="emb.item") all_item_emb_re = fluid.layers.reshape(x=all_item_emb, shape=[-1, self.emb_dim]) user_encoder = GrnnEncoder() user_enc = user_encoder.forward(user_emb) user_hid = fluid.layers.fc(input=user_enc, size=self.hidden_size, param_attr='user.w', bias_attr="user.b") user_exp = fluid.layers.expand(x=user_hid, expand_times=[1, self.vocab_size]) user_re = fluid.layers.reshape(x=user_exp, shape=[-1, self.hidden_size]) all_item_hid = fluid.layers.fc(input=all_item_emb_re, size=self.hidden_size, param_attr='item.w', bias_attr="item.b") cos_item = fluid.layers.cos_sim(X=all_item_hid, Y=user_re) all_pre_ = fluid.layers.reshape(x=cos_item, shape=[-1, self.vocab_size]) acc = fluid.layers.accuracy(input=all_pre_, label=pos_label, k=20) self._infer_results['recall20'] = acc
def infer_net(self): self.infer_input() # lookup embedding for each slot q_embs = [ fluid.embedding(input=query, size=self.emb_shape, param_attr="emb") for query in self.q_slots ] pt_embs = [ fluid.embedding(input=title, size=self.emb_shape, param_attr="emb") for title in self.pt_slots ] # encode each embedding field with encoder q_encodes = [ self.query_encoders[i].forward(emb) for i, emb in enumerate(q_embs) ] pt_encodes = [ self.title_encoders[i].forward(emb) for i, emb in enumerate(pt_embs) ] # concat multi view for query, pos_title, neg_title q_concat = fluid.layers.concat(q_encodes) pt_concat = fluid.layers.concat(pt_encodes) # projection of hidden layer q_hid = fluid.layers.fc(q_concat, size=self.hidden_size, param_attr='q_fc.w', bias_attr='q_fc.b') pt_hid = fluid.layers.fc(pt_concat, size=self.hidden_size, param_attr='t_fc.w', bias_attr='t_fc.b') # cosine of hidden layers cos = fluid.layers.cos_sim(q_hid, pt_hid) self._infer_results['query_pt_sim'] = cos
def __init__(self, num_layers, hidden_size, dropout_prob, src_vocab_size, trg_vocab_size, start_token, end_token, decoding_strategy="infer_sample", max_decoding_length=20, beam_size=4): self.start_token, self.end_token = start_token, end_token self.max_decoding_length, self.beam_size = max_decoding_length, beam_size self.src_embeder = lambda x: fluid.embedding( input=x, size=[src_vocab_size, hidden_size], dtype="float32", param_attr=fluid.ParamAttr(name="source_embedding")) self.trg_embeder = lambda x: fluid.embedding( input=x, size=[trg_vocab_size, hidden_size], dtype="float32", param_attr=fluid.ParamAttr(name="target_embedding")) self.encoder = Encoder(num_layers, hidden_size, dropout_prob) self.decoder = Decoder(num_layers, hidden_size, dropout_prob, decoding_strategy, max_decoding_length) self.output_layer = lambda x: layers.fc( x, size=trg_vocab_size, num_flatten_dims=len(x.shape) - 1, param_attr=fluid.ParamAttr(name="output_w"), bias_attr=False)
def test_errors(self): with program_guard(Program(), Program()): input_data = np.random.randint(0, 10, (4, 6)).astype("int64") def test_Variable(): # the input type must be Variable fluid.embedding(input=input_data, size=(10, 64)) self.assertRaises(TypeError, test_Variable) def test_input_dtype(): # the input dtype must be int64 input = fluid.data(name='x1', shape=[4, 6], dtype='float32') fluid.embedding(input=input, size=(10, 64)) self.assertRaises(TypeError, test_input_dtype) def test_param_dtype(): # dtype must be float32 or float64 input2 = fluid.data(name='x2', shape=[4, 6], dtype='int64') fluid.embedding(input=input2, size=(10, 64), dtype='int64') self.assertRaises(TypeError, test_param_dtype) input3 = fluid.data(name='x3', shape=[4, 6], dtype='int64') fluid.embedding(input=input3, size=(10, 64), dtype='float16')
def train(self): vocab_size = envs.get_global_env("hyper_parameters.vocab_size", None, self._namespace) emb_dim = envs.get_global_env("hyper_parameters.emb_dim", None, self._namespace) hidden_size = envs.get_global_env("hyper_parameters.hidden_size", None, self._namespace) emb_shape = [vocab_size, emb_dim] self.user_encoder = GrnnEncoder() self.item_encoder = BowEncoder() self.pairwise_hinge_loss = PairwiseHingeLoss() user_data = fluid.data(name="user", shape=[None, 1], dtype="int64", lod_level=1) pos_item_data = fluid.data(name="p_item", shape=[None, 1], dtype="int64", lod_level=1) neg_item_data = fluid.data(name="n_item", shape=[None, 1], dtype="int64", lod_level=1) self._data_var.extend([user_data, pos_item_data, neg_item_data]) user_emb = fluid.embedding(input=user_data, size=emb_shape, param_attr="emb.item") pos_item_emb = fluid.embedding(input=pos_item_data, size=emb_shape, param_attr="emb.item") neg_item_emb = fluid.embedding(input=neg_item_data, size=emb_shape, param_attr="emb.item") user_enc = self.user_encoder.forward(user_emb) pos_item_enc = self.item_encoder.forward(pos_item_emb) neg_item_enc = self.item_encoder.forward(neg_item_emb) user_hid = fluid.layers.fc(input=user_enc, size=hidden_size, param_attr='user.w', bias_attr="user.b") pos_item_hid = fluid.layers.fc(input=pos_item_enc, size=hidden_size, param_attr='item.w', bias_attr="item.b") neg_item_hid = fluid.layers.fc(input=neg_item_enc, size=hidden_size, param_attr='item.w', bias_attr="item.b") cos_pos = fluid.layers.cos_sim(user_hid, pos_item_hid) cos_neg = fluid.layers.cos_sim(user_hid, neg_item_hid) hinge_loss = self.pairwise_hinge_loss.forward(cos_pos, cos_neg) avg_cost = fluid.layers.mean(hinge_loss) correct = self.get_correct(cos_neg, cos_pos) self._cost = avg_cost self._metrics["correct"] = correct self._metrics["hinge_loss"] = hinge_loss
def infer_network(vocab_size, batch_size, hid_size, dropout=0.2): src = fluid.data(name="src", shape=[None, 1], dtype="int64", lod_level=1) emb_src = fluid.embedding( input=src, size=[vocab_size, hid_size], param_attr="emb") emb_src_drop = fluid.layers.dropout( emb_src, dropout_prob=dropout, is_test=True) fc0 = fluid.layers.fc(input=emb_src_drop, size=hid_size * 3, param_attr="gru_fc", bias_attr=False) gru_h0 = fluid.layers.dynamic_gru( input=fc0, size=hid_size, param_attr="dy_gru.param", bias_attr="dy_gru.bias") gru_h0_drop = fluid.layers.dropout( gru_h0, dropout_prob=dropout, is_test=True) all_label = fluid.data( name="all_label", shape=[vocab_size, 1], dtype="int64") emb_all_label = fluid.embedding( input=all_label, size=[vocab_size, hid_size], param_attr="emb") emb_all_label = fluid.layers.squeeze(input=emb_all_label, axes=[1]) emb_all_label_drop = fluid.layers.dropout( emb_all_label, dropout_prob=dropout, is_test=True) all_pre = fluid.layers.matmul( gru_h0_drop, emb_all_label_drop, transpose_y=True) pos_label = fluid.data( name="pos_label", shape=[None, 1], dtype="int64", lod_level=1) acc = fluid.layers.accuracy(input=all_pre, label=pos_label, k=20) return acc
def net(self, input, is_infer=False): """ network""" text = input[0] pos_tag = input[1] neg_tag = input[2] text_emb = fluid.embedding(input=text, size=[self.vocab_text_size, self.emb_dim], param_attr="text_emb") text_emb = fluid.layers.squeeze(input=text_emb, axes=[1]) pos_tag_emb = fluid.embedding(input=pos_tag, size=[self.vocab_tag_size, self.emb_dim], param_attr="tag_emb") pos_tag_emb = fluid.layers.squeeze(input=pos_tag_emb, axes=[1]) neg_tag_emb = fluid.embedding(input=neg_tag, size=[self.vocab_tag_size, self.emb_dim], param_attr="tag_emb") neg_tag_emb = fluid.layers.squeeze(input=neg_tag_emb, axes=[1]) conv_1d = fluid.nets.sequence_conv_pool(input=text_emb, num_filters=self.hid_dim, filter_size=self.win_size, act="tanh", pool_type="max", param_attr="cnn") text_hid = fluid.layers.fc(input=conv_1d, size=self.emb_dim, param_attr="text_hid") cos_pos = nn.cos_sim(pos_tag_emb, text_hid) mul_text_hid = fluid.layers.sequence_expand_as(x=text_hid, y=neg_tag_emb) mul_cos_neg = nn.cos_sim(neg_tag_emb, mul_text_hid) cos_neg_all = fluid.layers.sequence_reshape(input=mul_cos_neg, new_dim=self.neg_size) #choose max negtive cosine cos_neg = nn.reduce_max(cos_neg_all, dim=1, keep_dim=True) #calculate hinge loss loss_part1 = nn.elementwise_sub( tensor.fill_constant_batch_size_like(input=cos_pos, shape=[-1, 1], value=self.margin, dtype='float32'), cos_pos) loss_part2 = nn.elementwise_add(loss_part1, cos_neg) loss_part3 = nn.elementwise_max( tensor.fill_constant_batch_size_like(input=loss_part2, shape=[-1, 1], value=0.0, dtype='float32'), loss_part2) avg_cost = nn.mean(loss_part3) less = tensor.cast(cf.less_than(cos_neg, cos_pos), dtype='float32') correct = nn.reduce_sum(less) self._cost = avg_cost if is_infer: self._infer_results["correct"] = correct self._infer_results["cos_pos"] = cos_pos else: self._metrics["correct"] = correct self._metrics["cos_pos"] = cos_pos
def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): # padding id in vocabulary must be set to 0 emb_out = fluid.embedding( input=src_ids, size=[self._voc_size, self._emb_size], dtype=self._dtype, param_attr=fluid.ParamAttr( name=self._word_emb_name, initializer=self._param_initializer), is_sparse=False) position_emb_out = fluid.embedding( input=position_ids, size=[self._max_position_seq_len, self._emb_size], dtype=self._dtype, param_attr=fluid.ParamAttr( name=self._pos_emb_name, initializer=self._param_initializer)) sent_emb_out = fluid.embedding( sentence_ids, size=[self._sent_types, self._emb_size], dtype=self._dtype, param_attr=fluid.ParamAttr( name=self._sent_emb_name, initializer=self._param_initializer)) emb_out = emb_out + position_emb_out emb_out = emb_out + sent_emb_out emb_out = pre_process_layer( emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') if self._dtype == "float16": input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) self_attn_mask = fluid.layers.matmul( x=input_mask, y=input_mask, transpose_y=True) self_attn_mask = fluid.layers.scale( x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) n_head_self_attn_mask = fluid.layers.stack( x=[self_attn_mask] * self._n_head, axis=1) n_head_self_attn_mask.stop_gradient = True self._enc_out = encoder( enc_input=emb_out, attn_bias=n_head_self_attn_mask, n_layer=self._n_layer, n_head=self._n_head, d_key=self._emb_size // self._n_head, d_value=self._emb_size // self._n_head, d_model=self._emb_size, d_inner_hid=self._emb_size * 4, prepostprocess_dropout=self._prepostprocess_dropout, attention_dropout=self._attention_dropout, relu_dropout=0, hidden_act=self._hidden_act, preprocess_cmd="", postprocess_cmd="dan", param_initializer=self._param_initializer, name='encoder')
def __init__(self, hidden_size, latent_size, src_vocab_size, tar_vocab_size, batch_size, num_layers=1, init_scale=0.1, dec_dropout_in=0.5, dec_dropout_out=0.5, enc_dropout_in=0., enc_dropout_out=0., word_keep_prob=0.5, batch_first=True, attr_init="normal_initializer"): self.hidden_size = hidden_size self.latent_size = latent_size self.src_vocab_size = src_vocab_size self.tar_vocab_size = tar_vocab_size self.batch_size = batch_size self.num_layers = num_layers self.init_scale = init_scale self.dec_dropout_in = dec_dropout_in self.dec_dropout_out = dec_dropout_out self.enc_dropout_in = enc_dropout_in self.enc_dropout_out = enc_dropout_out self.word_keep_prob = word_keep_prob self.batch_first = batch_first if attr_init == "normal_initializer": self.param_attr_initializer = normal_initializer self.param_attr_scale = hidden_size elif attr_init == "uniform_initializer": self.param_attr_initializer = uniform_initializer self.param_attr_scale = init_scale else: raise TypeError("The type of 'attr_initializer' is not supported") self.src_embeder = lambda x: fluid.embedding( input=x, size=[self.src_vocab_size, self.hidden_size], dtype='float32', is_sparse=False, param_attr=fluid.ParamAttr(name='source_embedding', initializer=self.param_attr_initializer( self.param_attr_scale))) self.tar_embeder = lambda x: fluid.embedding( input=x, size=[self.tar_vocab_size, self.hidden_size], dtype='float32', is_sparse=False, param_attr=fluid.ParamAttr(name='target_embedding', initializer=self.param_attr_initializer( self.param_attr_scale)))
def a_star(sequence, current, goal, config): """a_star""" input_dim = config.INPUT_SIZE class_dim = config.CLASS_SIZE embed_dim = config.EMBED_SIZE hidden_dim = config.HIDDEN_SIZE stacked_num = config.STACKED_NUM weight_data = np.random.random(size=(input_dim, embed_dim)) my_param_attrs = fluid.ParamAttr( name="embedding", learning_rate=config.LEARNING_RATE, initializer=fluid.initializer.NumpyArrayInitializer(weight_data), trainable=True) seq_embed = fluid.embedding(input=sequence, size=[input_dim, embed_dim], param_attr=my_param_attrs) curr_embed = fluid.embedding(input=current, size=[input_dim, embed_dim], param_attr=my_param_attrs) goal_embed = fluid.embedding(input=goal, size=[input_dim, embed_dim], param_attr=my_param_attrs) fc1 = fluid.layers.fc(input=seq_embed, size=hidden_dim) lstm1, cell1 = fluid.layers.dynamic_lstm(input=fc1, size=hidden_dim) inputs = [fc1, lstm1] for i in range(2, stacked_num + 1): fc = fluid.layers.fc(input=inputs, size=hidden_dim) lstm, cell = fluid.layers.dynamic_lstm(input=fc, size=hidden_dim, is_reverse=(i % 2) == 0) inputs = [fc, lstm] fc_last = fluid.layers.sequence_pool(input=inputs[0], pool_type='max') lstm_last = fluid.layers.sequence_pool(input=inputs[1], pool_type='max') current_cost_embed = [fc_last, lstm_last, curr_embed] remain_cost_embed = [fc_last, lstm_last, goal_embed] pred_curr_fc1 = fluid.layers.fc(input=current_cost_embed, size=64, act="relu") current_cost = fluid.layers.fc(input=pred_curr_fc1, size=1, act="sigmoid") pred_goal_fc1 = fluid.layers.fc(input=remain_cost_embed, size=64, act="relu") remain_cost = fluid.layers.fc(input=pred_goal_fc1, size=1, act="sigmoid") prediction = 0.5 * current_cost + 0.5 * remain_cost return prediction
def infer(self): vocab_size = envs.get_global_env("hyper_parameters.vocab_size", None, self._namespace) emb_dim = envs.get_global_env("hyper_parameters.emb_dim", None, self._namespace) hidden_size = envs.get_global_env("hyper_parameters.hidden_size", None, self._namespace) user_data = fluid.data(name="user", shape=[None, 1], dtype="int64", lod_level=1) all_item_data = fluid.data(name="all_item", shape=[None, vocab_size], dtype="int64") pos_label = fluid.data(name="pos_label", shape=[None, 1], dtype="int64") self._infer_data_var = [user_data, all_item_data, pos_label] self._infer_data_loader = fluid.io.DataLoader.from_generator( feed_list=self._infer_data_var, capacity=64, use_double_buffer=False, iterable=False) user_emb = fluid.embedding(input=user_data, size=[vocab_size, emb_dim], param_attr="emb.item") all_item_emb = fluid.embedding(input=all_item_data, size=[vocab_size, emb_dim], param_attr="emb.item") all_item_emb_re = fluid.layers.reshape(x=all_item_emb, shape=[-1, emb_dim]) user_encoder = GrnnEncoder() user_enc = user_encoder.forward(user_emb) user_hid = fluid.layers.fc(input=user_enc, size=hidden_size, param_attr='user.w', bias_attr="user.b") user_exp = fluid.layers.expand(x=user_hid, expand_times=[1, vocab_size]) user_re = fluid.layers.reshape(x=user_exp, shape=[-1, hidden_size]) all_item_hid = fluid.layers.fc(input=all_item_emb_re, size=hidden_size, param_attr='item.w', bias_attr="item.b") cos_item = fluid.layers.cos_sim(X=all_item_hid, Y=user_re) all_pre_ = fluid.layers.reshape(x=cos_item, shape=[-1, vocab_size]) acc = fluid.layers.accuracy(input=all_pre_, label=pos_label, k=20) self._infer_results['recall20'] = acc
def def_seq2seq_model(num_layers, hidden_size, dropout_prob, src_vocab_size, trg_vocab_size): "vanilla seq2seq model" # data source = fluid.data(name="src", shape=[None, None], dtype="int64") source_length = fluid.data(name="src_sequence_length", shape=[None], dtype="int64") target = fluid.data(name="trg", shape=[None, None], dtype="int64") target_length = fluid.data(name="trg_sequence_length", shape=[None], dtype="int64") label = fluid.data(name="label", shape=[None, None, 1], dtype="int64") # embedding src_emb = fluid.embedding(source, (src_vocab_size, hidden_size)) tar_emb = fluid.embedding(target, (src_vocab_size, hidden_size)) # encoder enc_cell = EncoderCell(num_layers, hidden_size, dropout_prob) enc_output, enc_final_state = dynamic_rnn(cell=enc_cell, inputs=src_emb, sequence_length=source_length) # decoder dec_cell = DecoderCell(num_layers, hidden_size, dropout_prob) dec_output, dec_final_state = dynamic_rnn(cell=dec_cell, inputs=tar_emb, initial_states=enc_final_state) logits = layers.fc(dec_output, size=trg_vocab_size, num_flatten_dims=len(dec_output.shape) - 1, bias_attr=False) # loss loss = layers.softmax_with_cross_entropy(logits=logits, label=label, soft_label=False) loss = layers.unsqueeze(loss, axes=[2]) max_tar_seq_len = layers.shape(target)[1] tar_mask = layers.sequence_mask(target_length, maxlen=max_tar_seq_len, dtype="float32") loss = loss * tar_mask loss = layers.reduce_mean(loss, dim=[0]) loss = layers.reduce_sum(loss) # optimizer optimizer = fluid.optimizer.Adam(0.001) optimizer.minimize(loss) return loss
def dynamic_rnn_lstm(data, input_dim, class_dim, emb_dim, lstm_size): if args.embedding_type == "dense": emb = fluid.embedding(input=data, size=[input_dim, emb_dim], is_sparse=False, dtype="float32") elif args.embedding_type == "sparse": emb = fluid.embedding(input=data, size=[input_dim, emb_dim], is_sparse=True, dtype="float32") else: print("not valid embedding type: ", args.embedding_type) exit() sentence = fluid.layers.fc(input=emb, size=lstm_size * 4, act='tanh') lstm, _ = fluid.layers.dynamic_lstm(sentence, size=lstm_size * 4, dtype="float32") last = fluid.layers.sequence_last_step(lstm) prediction = fluid.layers.fc(input=last, size=class_dim, act="softmax") return prediction
def net(self, input, is_infer=False): """ network definition """ data = input[0] label = input[1] seq_len = input[2] # embedding layer emb = fluid.embedding(input=data, size=[self.dict_dim, self.emb_dim], is_sparse=self.is_sparse) emb = fluid.layers.sequence_unpad(emb, length=seq_len) # convolution layer conv = fluid.nets.sequence_conv_pool(input=emb, num_filters=self.cnn_dim, filter_size=self.cnn_filter_size, act="tanh", pool_type="max") # full connect layer fc_1 = fluid.layers.fc(input=[conv], size=self.hid_dim) # softmax layer prediction = fluid.layers.fc(input=[fc_1], size=self.class_dim, act="softmax") cost = fluid.layers.cross_entropy(input=prediction, label=label) avg_cost = fluid.layers.mean(x=cost) acc = fluid.layers.accuracy(input=prediction, label=label) self._cost = avg_cost if is_infer: self._infer_results["acc"] = acc else: self._metrics["acc"] = acc
def train_net(self): """ network definition """ data = fluid.data(name="input", shape=[None, self.max_len], dtype='int64') label = fluid.data(name="label", shape=[None, 1], dtype='int64') seq_len = fluid.data(name="seq_len", shape=[None], dtype='int64') self._data_var = [data, label, seq_len] # embedding layer emb = fluid.embedding(input=data, size=[self.dict_dim, self.emb_dim]) emb = fluid.layers.sequence_unpad(emb, length=seq_len) # convolution layer conv = fluid.nets.sequence_conv_pool(input=emb, num_filters=self.cnn_dim, filter_size=self.cnn_filter_size, act="tanh", pool_type="max") # full connect layer fc_1 = fluid.layers.fc(input=[conv], size=self.hid_dim) # softmax layer prediction = fluid.layers.fc(input=[fc_1], size=self.class_dim, act="softmax") cost = fluid.layers.cross_entropy(input=prediction, label=label) avg_cost = fluid.layers.mean(x=cost) acc = fluid.layers.accuracy(input=prediction, label=label) self.cost = avg_cost self._metrics["acc"] = acc
def forward(self, word): word_input = [word] emb_layers = [ fluid.embedding(size=[self.word_dict_len, self.word_dim], input=x, param_attr=fluid.ParamAttr( name='emb', learning_rate=self.hidden_lr, trainable=True)) for x in word_input ] hidden_0_layers = [ fluid.layers.fc(input=emb, size=self.hidden_size, act='tanh') for emb in emb_layers ] hidden_0 = fluid.layers.sums(input=hidden_0_layers) lstm_0 = fluid.layers.dynamic_lstm(input=hidden_0, size=self.hidden_size, candidate_activation='relu', gate_activation='sigmoid', cell_activation='sigmoid') # stack L-LSTM and R-LSTM with direct edges input_tmp = [hidden_0, lstm_0] for i in range(1, self.depth): mix_hidden = fluid.layers.sums(input=[ fluid.layers.fc( input=input_tmp[0], size=self.hidden_size, act='tanh'), fluid.layers.fc( input=input_tmp[1], size=self.hidden_size, act='tanh') ]) lstm = fluid.layers.dynamic_lstm(input=mix_hidden, size=self.hidden_size, candidate_activation='relu', gate_activation='sigmoid', cell_activation='sigmoid', is_reverse=((i % 2) == 1)) input_tmp = [mix_hidden, lstm] feature_out = fluid.layers.sums(input=[ fluid.layers.fc( input=input_tmp[0], size=self.label_dict_len, act='tanh'), fluid.layers.fc( input=input_tmp[1], size=self.label_dict_len, act='tanh') ]) crf_cost = fluid.layers.linear_chain_crf( input=feature_out, label=self.target, param_attr=fluid.ParamAttr(name='crfw', learning_rate=self.mix_hidden_lr)) avg_cost = fluid.layers.mean(crf_cost) self.backward(avg_cost) return avg_cost, feature_out
def model_func(inputs, is_train=True): src = inputs[0] src_sequence_length = inputs[1] # source embedding src_embeder = lambda x: fluid.embedding( input=x, size=[source_dict_size, hidden_dim], dtype="float32", param_attr=fluid.ParamAttr(name="src_emb_table")) src_embedding = src_embeder(src) # encoder encoder_output, encoder_state = encoder(src_embedding, src_sequence_length) encoder_output_proj = layers.fc(input=encoder_output, size=decoder_size, num_flatten_dims=2, bias_attr=False) src_mask = layers.sequence_mask(src_sequence_length, maxlen=layers.shape(src)[1], dtype="float32") encoder_padding_mask = (src_mask - 1.0) * 1e9 trg = inputs[2] if is_train else None # decoder output = decoder(encoder_output=encoder_output, encoder_output_proj=encoder_output_proj, encoder_state=encoder_state, encoder_padding_mask=encoder_padding_mask, trg=trg, is_train=is_train) return output
def bow_net(data, seq_len, label, dict_dim, emb_dim=128, hid_dim=128, hid_dim2=96, class_dim=2, is_prediction=False): """ Bow net """ # embedding layer emb = fluid.embedding(input=data, size=[dict_dim, emb_dim]) emb = fluid.layers.sequence_unpad(emb, length=seq_len) # bow layer bow = fluid.layers.sequence_pool(input=emb, pool_type='sum') bow_tanh = fluid.layers.tanh(bow) # full connect layer fc_1 = fluid.layers.fc(input=bow_tanh, size=hid_dim, act="tanh") fc_2 = fluid.layers.fc(input=fc_1, size=hid_dim2, act="tanh") # softmax layer prediction = fluid.layers.fc(input=[fc_2], size=class_dim, act="softmax") if is_prediction: return prediction cost = fluid.layers.cross_entropy(input=prediction, label=label) avg_cost = fluid.layers.mean(x=cost) acc = fluid.layers.accuracy(input=prediction, label=label) return avg_cost, prediction
def cnn_net(data, seq_len, label, dict_dim, emb_dim=128, hid_dim=128, hid_dim2=96, class_dim=2, win_size=3, is_prediction=False): """ Conv net """ # embedding layer emb = fluid.embedding(input=data, size=[dict_dim, emb_dim]) emb = fluid.layers.sequence_unpad(emb, length=seq_len) # convolution layer conv_3 = fluid.nets.sequence_conv_pool(input=emb, num_filters=hid_dim, filter_size=win_size, act="tanh", pool_type="max") # full connect layer fc_1 = fluid.layers.fc(input=[conv_3], size=hid_dim2) # softmax layer prediction = fluid.layers.fc(input=[fc_1], size=class_dim, act="softmax") if is_prediction: return prediction cost = fluid.layers.cross_entropy(input=prediction, label=label) avg_cost = fluid.layers.mean(x=cost) acc = fluid.layers.accuracy(input=prediction, label=label) return avg_cost, prediction
def gru_net(data, seq_len, label, dict_dim, emb_dim=128, hid_dim=128, hid_dim2=96, class_dim=2, emb_lr=30.0, is_prediction=False): """ gru net """ emb = fluid.embedding(input=data, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr(learning_rate=emb_lr)) emb = fluid.layers.sequence_unpad(emb, length=seq_len) fc0 = fluid.layers.fc(input=emb, size=hid_dim * 3) gru_h = fluid.layers.dynamic_gru(input=fc0, size=hid_dim, is_reverse=False) gru_max = fluid.layers.sequence_pool(input=gru_h, pool_type='max') gru_max_tanh = fluid.layers.tanh(gru_max) fc1 = fluid.layers.fc(input=gru_max_tanh, size=hid_dim2, act='tanh') prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax') if is_prediction: return prediction cost = fluid.layers.cross_entropy(input=prediction, label=label) avg_cost = fluid.layers.mean(x=cost) acc = fluid.layers.accuracy(input=prediction, label=label) return avg_cost, prediction
def cnn_pd(inputs, output_dim, kernel_size=5, dimension=100, conv_filters=40, stride=2, act='relu', words_num=10000, use_bias=True, padding_id=0): emb = fluid.embedding(inputs, size=[words_num, dimension], is_sparse=True, padding_idx=padding_id, param_attr="shared_w") print('emb', emb.shape) emb = fluid.layers.reshape( emb, shape=[emb.shape[0], 1, emb.shape[1], emb.shape[2]]) print('emb', emb.shape) conv_out = fluid.layers.conv2d(emb, num_filters=conv_filters, stride=(stride, 1), filter_size=(kernel_size, dimension), act=act, bias_attr=use_bias) print('conv_out', conv_out.shape) pool = fluid.layers.pool2d(conv_out, pool_size=(2, 1)) print('pool', pool.shape) pred = fluid.layers.fc([pool], size=output_dim, act='softmax') return pred
def _create_embedding_input(self, data_dict): # sparse embedding sparse_emb_dict = OrderedDict( (name, fluid.embedding(input=fluid.layers.cast(data_dict[name], dtype='int64'), size=[ self.feat_dims_dict[name] + 1, 6 * int(pow(self.feat_dims_dict[name], 0.25)) ], is_sparse=self.is_sparse)) for name in self.sparse_feat_names) # combine dense and sparse_emb dense_input_list = [ data_dict[name] for name in data_dict if name.startswith('I') ] sparse_emb_list = list(sparse_emb_dict.values()) sparse_input = fluid.layers.concat(sparse_emb_list, axis=-1) sparse_input = fluid.layers.flatten(sparse_input) dense_input = fluid.layers.concat(dense_input_list, axis=-1) dense_input = fluid.layers.flatten(dense_input) dense_input = fluid.layers.cast(dense_input, 'float32') net_input = fluid.layers.concat([dense_input, sparse_input], axis=-1) return net_input
def model_func(inputs, is_train=True): # inputs = [src, src_sequence_length, trg, trg_sequence_length, label] # src = fluid.data(name="src", shape=[None, None], dtype="int64") # 源语言输入 src = inputs[0] src_sequence_length = inputs[1] src_embedding = fluid.embedding( input=src, size=[source_dict_size, hidden_dim], dtype="float32", param_attr=fluid.ParamAttr(name="src_emb_table")) # 编码器 encoder_output, encoder_state = encoder(src_embedding, src_sequence_length) encoder_output_proj = layers.fc(input=encoder_output, size=decoder_size, num_flatten_dims=2, bias_attr=False) src_mask = layers.sequence_mask(src_sequence_length, maxlen=layers.shape(src)[1], dtype="float32") encoder_padding_mask = (src_mask - 1.0) * 1e9 # 目标语言输入,训练时有、预测生成时无该输入 trg = inputs[2] if is_train else None # 解码器 output = decoder(encoder_output=encoder_output, encoder_output_proj=encoder_output_proj, encoder_state=encoder_state, encoder_padding_mask=encoder_padding_mask, trg=trg, is_train=is_train) return output
def _create_embedding_input(self): # sparse embedding sparse_emb_dict = OrderedDict() for var in self.sparse_inputs: sparse_emb_dict[var.name] = fluid.embedding( input=var, size=[ self.feat_dims_dict[var.name] + 1, 6 * int(pow(self.feat_dims_dict[var.name], 0.25)) ], is_sparse=self.is_sparse) # combine dense and sparse_emb dense_input_list = self.dense_inputs sparse_emb_list = list(sparse_emb_dict.values()) sparse_input = fluid.layers.concat(sparse_emb_list, axis=-1) sparse_input = fluid.layers.flatten(sparse_input) dense_input = fluid.layers.concat(dense_input_list, axis=-1) dense_input = fluid.layers.flatten(dense_input) dense_input = fluid.layers.cast(dense_input, 'float32') net_input = fluid.layers.concat([dense_input, sparse_input], axis=-1) return net_input
def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num): #计算词向量 emb = fluid.embedding(input=data, size=[input_dim, emb_dim], is_sparse=True) #第一层栈 #全连接层 fc1 = fluid.layers.fc(input=emb, size=hid_dim) #lstm层 lstm1, cell1 = fluid.layers.dynamic_lstm(input=fc1, size=hid_dim) inputs = [fc1, lstm1] #其余的所有栈结构 for i in range(2, stacked_num + 1): fc = fluid.layers.fc(input=inputs, size=hid_dim) lstm, cell = fluid.layers.dynamic_lstm(input=fc, size=hid_dim, is_reverse=(i % 2) == 0) inputs = [fc, lstm] #池化层 fc_last = fluid.layers.sequence_pool(input=inputs[0], pool_type='max') lstm_last = fluid.layers.sequence_pool(input=inputs[1], pool_type='max') #全连接层,softmax预测 prediction = fluid.layers.fc(input=[fc_last, lstm_last], size=class_dim, act='softmax') return prediction
def label_embed_input(self, feature): label = F.data(name="label", shape=[None, 1], dtype="int64") label_idx = F.data(name='label_idx', shape=[None], dtype="int64") label = L.reshape(label, shape=[-1]) label = L.gather(label, label_idx, overwrite=False) lay_norm_attr = F.ParamAttr( initializer=F.initializer.ConstantInitializer(value=1)) lay_norm_bias = F.ParamAttr( initializer=F.initializer.ConstantInitializer(value=0)) feature = L.layer_norm(feature, name='layer_norm_feature_input1', param_attr=lay_norm_attr, bias_attr=lay_norm_bias) embed_attr = F.ParamAttr( initializer=F.initializer.NormalInitializer(loc=0.0, scale=1.0)) embed = F.embedding(input=label, size=(self.out_size, self.embed_size), param_attr=embed_attr) lay_norm_attr = F.ParamAttr( initializer=F.initializer.ConstantInitializer(value=1)) lay_norm_bias = F.ParamAttr( initializer=F.initializer.ConstantInitializer(value=0)) embed = L.layer_norm(embed, name='layer_norm_feature_input2', param_attr=lay_norm_attr, bias_attr=lay_norm_bias) embed = L.relu(embed) feature_label = L.gather(feature, label_idx, overwrite=False) feature_label = feature_label + embed feature = L.scatter(feature, label_idx, feature_label, overwrite=True) return feature
def bilstm_net(data, dict_dim, class_dim, emb_dim=128, hid_dim=128, hid_dim2=96, emb_lr=30.0): # embedding layer emb = fluid.embedding(input=data, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr(learning_rate=emb_lr)) # bi-lstm layer fc0 = fluid.layers.fc(input=emb, size=hid_dim * 4) rfc0 = fluid.layers.fc(input=emb, size=hid_dim * 4) lstm_h, c = fluid.layers.dynamic_lstm(input=fc0, size=hid_dim * 4, is_reverse=False) rlstm_h, c = fluid.layers.dynamic_lstm(input=rfc0, size=hid_dim * 4, is_reverse=True) # extract last layer lstm_last = fluid.layers.sequence_last_step(input=lstm_h) rlstm_last = fluid.layers.sequence_last_step(input=rlstm_h) # concat layer lstm_concat = fluid.layers.concat(input=[lstm_last, rlstm_last], axis=1) # full connect layer fc1 = fluid.layers.fc(input=lstm_concat, size=hid_dim2, act='tanh') # softmax layer prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax') return prediction