def _net_conf(word_ids, target): """ Configure the network """ word_embedding = fluid.layers.embedding( input=word, size=[word_dict_len, word_emb_dim], dtype='float32', is_sparse=IS_SPARSE, param_attr=fluid.ParamAttr( learning_rate=emb_lr, name="word_emb", initializer=fluid.initializer.Uniform( low=-init_bound, high=init_bound))) # add elmo embedding elmo_emb = elmo_encoder(word_ids, args.elmo_l2_coef) input_feature = layers.concat(input=[elmo_emb, word_embedding], axis=1) for i in range(bigru_num): bigru_output = _bigru_layer(input_feature) input_feature = bigru_output emission = fluid.layers.fc( size=label_dict_len, input=bigru_output, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Uniform( low=-init_bound, high=init_bound), regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=1e-4))) crf_cost = fluid.layers.linear_chain_crf( input=emission, label=target, param_attr=fluid.ParamAttr( name='crfw', learning_rate=crf_lr)) crf_decode = fluid.layers.crf_decoding( input=emission, param_attr=fluid.ParamAttr(name='crfw')) avg_cost = fluid.layers.mean(x=crf_cost) return avg_cost, crf_decode
def rc_model(hidden_size, vocab, args): emb_shape = [vocab.size(), vocab.embed_dim] start_labels = layers.data( name="start_lables", shape=[1], dtype='float32', lod_level=1) end_labels = layers.data( name="end_lables", shape=[1], dtype='float32', lod_level=1) vocab_size=52445 # stage 1:encode q_id0 = get_data('q_id0', 1, args) q_ids = get_data('q_ids', 2, args) p_ids_name = 'p_ids' p_ids = get_data('p_ids', 2, args) q_ids_elmo = get_data('q_ids_elmo', 2, args) p_ids_elmo = get_data('p_ids_elmo', 2, args) #layers.Print(p_ids_elmo, message='p_ids_elmo', summarize=10) #layers.Print(p_ids, message='p_ids', summarize=10) #layers.Print(q_ids_elmo, message='q_ids_elmo', summarize=10) #layers.Print(q_ids, message='q_ids', summarize=10) p_embs = embedding(p_ids, emb_shape, args) q_embs = embedding(q_ids, emb_shape, args) if args.elmo==True: q_embs_elmo = emb(q_ids_elmo) p_embs_elmo = emb(p_ids_elmo) drnn = layers.DynamicRNN() with drnn.block(): p_emb = drnn.step_input(p_embs) q_emb = drnn.step_input(q_embs) if args.elmo==True: q_emb_elmo = drnn.step_input(q_embs_elmo) p_emb_elmo = drnn.step_input(p_embs_elmo) p_encs_elmo= elmo_encoder(p_emb_elmo) q_encs_elmo= elmo_encoder(q_emb_elmo) #layers.Print(p_encs_elmo, message='p_encs_elmo', summarize=10) #layers.Print(q_encs_elmo, message='q_encs_elmo', summarize=10) #layers.Print(p_emb, message='p_emb', summarize=10) p_emb=layers.concat(input=[p_emb, p_emb_elmo], axis=1) q_emb=layers.concat(input=[q_emb, q_emb_elmo], axis=1) p_enc = encoder(p_emb,'p_enc', hidden_size, args) q_enc = encoder(q_emb, 'q_enc', hidden_size, args) g_i = attn_flow(q_enc, p_enc, p_ids_name, args) # stage 3:fusion m_i = fusion(g_i, args) drnn.output(m_i, q_enc) ms, q_encs = drnn() p_vec = layers.lod_reset(x=ms, y=start_labels) q_vec = layers.lod_reset(x=q_encs, y=q_id0) # stage 4:decode start_probs, end_probs = point_network_decoder( p_vec=p_vec, q_vec=q_vec, hidden_size=hidden_size, args=args) cost0 = layers.sequence_pool( layers.cross_entropy( input=start_probs, label=start_labels, soft_label=True), 'sum') cost1 = layers.sequence_pool( layers.cross_entropy( input=end_probs, label=end_labels, soft_label=True), 'sum') cost0 = layers.mean(cost0) cost1 = layers.mean(cost1) cost = cost0 + cost1 cost.persistable = True feeding_list=[] if args.elmo==True: feeding_list = ["q_ids", "start_lables", "end_lables", "p_ids", "q_id0","q_ids_elmo","p_ids_elmo"] else: feeding_list = ["q_ids", "start_lables", "end_lables", "p_ids", "q_id0"] return cost, start_probs, end_probs, ms, feeding_list