def __init__(self, config, sess): self._data_file = config.data_file self._margin = 4 self._batch_size = config.batch_size self._vocab_size = config.nwords self._rel_size = config.nrels self._ent_size = config.nents self._sentence_size = config.query_size self._embedding_size = config.edim self._path_size = config.path_size self._memory_size = config.nrels self._hops = config.nhop self._max_grad_norm = config.max_grad_norm self._init = tf.contrib.layers.xavier_initializer() #self._init = tf.random_normal_initializer(stddev=config.init_std) self._opt = tf.train.AdamOptimizer() self._name = "IRN" self._checkpoint_dir = config.checkpoint_dir + '/' + self._name if not os.path.exists(self._checkpoint_dir): os.makedirs(self._checkpoint_dir) self._build_inputs() self._build_vars() self._saver = tf.train.Saver(max_to_keep=1) self._encoding = tf.constant(position_encoding(self._sentence_size, self._embedding_size), name="encoding") KB_batch_loss = self._pretranse() KB_loss_op = tf.reduce_sum(KB_batch_loss, name="KB_loss_op") KB_grads_and_vars = self._opt.compute_gradients( KB_loss_op, [self.EE, self.RE, self.Mse]) KB_nil_grads_and_vars = [] for g, v in KB_grads_and_vars: if v.name in self._nil_vars: KB_nil_grads_and_vars.append((zero_nil_slot(g), v)) else: KB_nil_grads_and_vars.append((g, v)) print "KB_grads_and_vars" for g, v in KB_nil_grads_and_vars: print g, v.name KB_train_op = self._opt.apply_gradients(KB_grads_and_vars, name="KB_train_op") #cross entropy as loss for QA: batch_loss, p = self._inference() # (b,1), (batch_size, 5) QA_loss_op = tf.reduce_sum(batch_loss, name="QA_loss_op") QA_params = [self.QE, self.Mrq, self.Mrs] QA_grads_and_vars = self._opt.compute_gradients(QA_loss_op, QA_params) QA_grads_and_vars = [(tf.clip_by_norm(g, self._max_grad_norm), v) for g, v in QA_grads_and_vars if g is not None] QA_grads_and_vars = [(add_gradient_noise(g), v) for g, v in QA_grads_and_vars] QA_nil_grads_and_vars = [] for g, v in QA_grads_and_vars: if v.name in self._nil_vars: QA_nil_grads_and_vars.append((zero_nil_slot(g), v)) else: QA_nil_grads_and_vars.append((g, v)) print "QA_grads_and_vars" for g, v in QA_nil_grads_and_vars: print g, v.name #grads_and_vars = [(tf.Print(g, [v.name,str(g.get_shape()),g], summarize=1e1/2), v) for g, v in grads_and_vars] QA_train_op = self._opt.apply_gradients(QA_nil_grads_and_vars, name="QA_train_op") # predict ops QA_predict_op = p # assign ops self.KB_loss_op = KB_loss_op self.KB_train_op = KB_train_op self.QA_loss_op = QA_loss_op self.QA_predict_op = QA_predict_op self.QA_train_op = QA_train_op init_op = tf.global_variables_initializer() self._sess = sess self._sess.run(init_op)
def build_model(self): # init ph, weights and dropout rate self.input_feature_ph_dict = dict() # 建立特征权重字典 self.weight_dropout_ph_dict = dict() self.feature_weight_dict = dict() self.nil_vars = set() self.dropout_rate_ph = tf.placeholder(tf.float32, name='dropout_rate_ph') # label ph self.input_label_ph = tf.placeholder( dtype=tf.int32, shape=[None, self.sequence_length], name='input_label_ph') # 读入特征,并搭建特征结构 for feature_name in self.feature_names: # input ph self.input_feature_ph_dict[feature_name] = tf.placeholder( dtype=tf.int32, shape=[None, self.sequence_length], name='input_feature_ph_%s' % feature_name) # dropout rate ph self.weight_dropout_ph_dict[feature_name] = tf.placeholder( tf.float32, name='dropout_ph_%s' % feature_name) # init feature weights, 初始化未指定的 if feature_name not in self.feature_init_weight_dict: feature_weight = uniform_tensor( shape=self.feature_weight_shape_dict[feature_name], name='f_w_%s' % feature_name) self.feature_weight_dict[feature_name] = tf.Variable( initial_value=feature_weight, name='feature_weigth_%s' % feature_name) else: self.feature_weight_dict[feature_name] = tf.Variable( initial_value=self.feature_init_weight_dict[feature_name], name='feature_weight_%s' % feature_name) self.nil_vars.add(self.feature_weight_dict[feature_name].name) # init dropout rate, 初始化未指定的 if feature_name not in self.feature_weight_dropout_dict: self.feature_weight_dropout_dict[feature_name] = 0. # init embeddings # 对特征进行编码并连接 self.embedding_features = [] for feature_name in self.feature_names: embedding_feature = tf.nn.dropout(tf.nn.embedding_lookup( self.feature_weight_dict[feature_name], ids=self.input_feature_ph_dict[feature_name], name='embedding_feature_%s' % feature_name), keep_prob=1.-self.weight_dropout_ph_dict[feature_name], name='embedding_feature_dropout_%s' % feature_name) self.embedding_features.append(embedding_feature) # concat all features # 多个词拼接成一句话 input_features = self.embedding_features[0] if len(self.embedding_features) == 1 \ else tf.concat(values=self.embedding_features, axis=2, name='input_features') # cnn cnn_output=self.IDCNN_layer(input_features) # # bi-lstm # # if self.rnn_unit == 'lstm': # fw_cell = rnn.BasicLSTMCell(self.nb_hidden, forget_bias=1., state_is_tuple=True) # bw_cell = rnn.BasicLSTMCell(self.nb_hidden, forget_bias=1., state_is_tuple=True) # elif self.rnn_unit == 'gru': # fw_cell = rnn.GRUCell(self.nb_hidden) # bw_cell = rnn.GRUCell(self.nb_hidden) # else: # raise ValueError('rnn_unit must in (lstm, gru)!') # 计算self.input_features[feature_names[0]]的实际长度(0为padding值) self.sequence_actual_length = get_sequence_actual_length( # 每个句子的实际长度 self.input_feature_ph_dict[self.feature_names[0]]) # # print(input_features) # rnn_outputs, _ = tf.nn.bidirectional_dynamic_rnn( # fw_cell, bw_cell, input_features, scope='bi-lstm', # dtype=tf.float32, sequence_length=self.sequence_actual_length) # # shape = [batch_size, max_len, nb_hidden*2] # # dropout 之后由[m,n]变成[1,1]输入输出维度保持不变s # lstm_output = tf.nn.dropout( # tf.concat(rnn_outputs, axis=2, name='lstm_output'), # keep_prob=1.-self.dropout_rate_ph, name='lstm_output_dropout') # # # softmax # # 重新规整输出形式 # self.outputs = tf.reshape(lstm_output, [-1, self.nb_hidden*2], name='outputs') self.softmax_w = tf.get_variable('softmax_w', [self.cnn_output_width, self.nb_classes]) self.softmax_b = tf.get_variable('softmax_b', [self.nb_classes]) self.logits = tf.reshape( tf.matmul(cnn_output, self.softmax_w) + self.softmax_b, shape=[-1, self.sequence_length, self.nb_classes], name='logits') # 计算loss self.loss = self.compute_loss() self.l2_loss = self.l2_rate * (tf.nn.l2_loss(self.softmax_w) + tf.nn.l2_loss(self.softmax_b)) self.total_loss = self.loss + self.l2_loss # train op optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate) grads_and_vars = optimizer.compute_gradients(self.total_loss) nil_grads_and_vars = [] for g, v in grads_and_vars: if v.name in self.nil_vars: nil_grads_and_vars.append((zero_nil_slot(g), v)) else: nil_grads_and_vars.append((g, v)) global_step = tf.Variable(0, name='global_step', trainable=False) if self.clip: # clip by global norm gradients, variables = zip(*nil_grads_and_vars) gradients, _ = tf.clip_by_global_norm(gradients, self.clip) self.train_op = optimizer.apply_gradients( zip(gradients, variables), name='train_op', global_step=global_step) else: self.train_op = optimizer.apply_gradients( nil_grads_and_vars, name='train_op', global_step=global_step) # TODO sess, visible_device_list待修改 gpu_options = tf.GPUOptions(visible_device_list='0', allow_growth=True) self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) # init all variable init = tf.global_variables_initializer() self.sess.run(init)
def __init__(self, config, sess): self._data_file = config.data_file self._margin = 2 self._batch_size = config.batch_size self._vocab_size = config.nwords self._rel_size = config.nrels self._ent_size = config.nents self._sentence_size = config.query_size self._embedding_size = config.edim self._path_size = config.path_size self._memory_size = config.nrels self._hops = config.nhop self._max_grad_norm = config.max_grad_norm self._init = tf.contrib.layers.xavier_initializer() #self._init = tf.random_normal_initializer(stddev=config.init_std) self._opt = tf.train.AdamOptimizer() self._name = "IRN_C" self._checkpoint_dir = config.checkpoint_dir + '/' + self._name if not os.path.exists(self._checkpoint_dir): os.makedirs(self._checkpoint_dir) self._build_inputs() self._build_vars() self._saver = tf.train.Saver(max_to_keep=10) self._encoding = tf.constant(position_encoding(self._sentence_size, self._embedding_size), name="encoding") KB_batch_loss = self._pretranse() KB_loss_op = tf.reduce_sum(KB_batch_loss, name="KB_loss_op") KB_grads_and_vars = self._opt.compute_gradients( KB_loss_op, [self.EE, self.RE, self.Mse]) KB_nil_grads_and_vars = [] for g, v in KB_grads_and_vars: if v.name in self._nil_vars: KB_nil_grads_and_vars.append((zero_nil_slot(g), v)) else: KB_nil_grads_and_vars.append((g, v)) print "KB_grads_and_vars" for g, v in KB_nil_grads_and_vars: print g, v.name KB_train_op = self._opt.apply_gradients(KB_grads_and_vars, name="KB_train_op") KBE_norm_op = tf.nn.l2_normalize(self.EE, 1) KBR_norm_op = tf.nn.l2_normalize(self.RE, 1) #cross entropy as loss for QA: batch_loss_1, p_1, ans_1 = self._inference(self._paths[:, 0, :]) batch_loss_2, p_2, ans_2 = self._inference(self._paths[:, 1, :]) QA_loss_op = tf.reduce_sum(batch_loss_1 + batch_loss_2, name="QA_loss_op") # gradient pipeline, seem not affect much QA_grads_and_vars = self._opt.compute_gradients(QA_loss_op) QA_grads_and_vars = [(tf.clip_by_norm(g, self._max_grad_norm), v) for g, v in QA_grads_and_vars if g is not None] QA_grads_and_vars = [(add_gradient_noise(g), v) for g, v in QA_grads_and_vars] QA_nil_grads_and_vars = [] for g, v in QA_grads_and_vars: if v.name in self._nil_vars: QA_nil_grads_and_vars.append((zero_nil_slot(g), v)) else: QA_nil_grads_and_vars.append((g, v)) print "QA_grads_and_vars" for g, v in QA_nil_grads_and_vars: print g, v.name #grads_and_vars = [(tf.Print(g, [v.name,str(g.get_shape()),g], summarize=1e1/2), v) for g, v in grads_and_vars] QA_train_op = self._opt.apply_gradients(QA_nil_grads_and_vars, name="QA_train_op") fans = ans_1 + ans_2 final_ans = tf.reshape(tf.cast(tf.argmax(fans, 1), tf.int32), [-1, 1]) # predict ops QA_predict_op = tf.concat(axis=1, values=[p_1, p_2, final_ans]) #(none,11) # assign ops self.KB_loss_op = KB_loss_op self.KB_train_op = KB_train_op self.KBE_norm_op = KBE_norm_op self.KBR_norm_op = KBR_norm_op self.QA_loss_op = QA_loss_op self.QA_predict_op = QA_predict_op self.QA_train_op = QA_train_op init_op = tf.global_variables_initializer() self._sess = sess self._sess.run(init_op)
def build_model(self): for feature_name in self._feature_names: # input ph self.input_feature_ph_dict[feature_name] = tf.placeholder( dtype=tf.int32, shape=[None, self._sequence_length], name='input_feature_ph_%s' % feature_name) # dropout rate ph self.weight_dropout_ph_dict[feature_name] = tf.placeholder( tf.float32, name='dropout_ph_%s' % feature_name) # init feature weights, 初始化未指定的 if feature_name not in self._feature_init_weight_dict: feature_weight = uniform_tensor( shape=self._feature_weight_shape_dict[feature_name], name='f_w_%s' % feature_name) self.feature_weight_dict[feature_name] = tf.Variable( initial_value=feature_weight, name='feature_weigth_%s' % feature_name) else: self.feature_weight_dict[feature_name] = tf.Variable( initial_value=self._feature_init_weight_dict[feature_name], name='feature_weight_%s' % feature_name) self.nil_vars.add(self.feature_weight_dict[feature_name].name) # init dropout rate, 初始化未指定的 if feature_name not in self._feature_weight_dropout_dict: self._feature_weight_dropout_dict[feature_name] = 0. # char feature if self._use_char_feature: # char feature weights feature_weight = uniform_tensor( shape=self._feature_weight_shape_dict['char'], name='f_w_%s' % 'char') self.feature_weight_dict['char'] = tf.Variable( initial_value=feature_weight, name='feature_weigth_%s' % 'char') self.nil_vars.add(self.feature_weight_dict['char'].name) self.nil_vars.add(self.feature_weight_dict['char'].name) self.input_feature_ph_dict['char'] = tf.placeholder( dtype=tf.int32, shape=[None, self._sequence_length, self._word_length], name='input_feature_ph_%s' % 'char') # init embeddings self.embedding_features = [] for feature_name in self._feature_names: embedding_feature = tf.nn.dropout(tf.nn.embedding_lookup( self.feature_weight_dict[feature_name], ids=self.input_feature_ph_dict[feature_name], name='embedding_feature_%s' % feature_name), keep_prob=1.-self.weight_dropout_ph_dict[feature_name], name='embedding_feature_dropout_%s' % feature_name) self.embedding_features.append(embedding_feature) # char embedding if self._use_char_feature: char_embedding_feature = tf.nn.embedding_lookup( self.feature_weight_dict['char'], ids=self.input_feature_ph_dict['char'], name='embedding_feature_%s' % 'char') # conv couv_feature_char = MultiConvolutional3D( char_embedding_feature, filter_length_list=self._conv_filter_len_list, nb_filter_list=self._conv_filter_size_list).output couv_feature_char = tf.nn.dropout( couv_feature_char, keep_prob=1-self.cnn_dropout_rate_ph) # concat all features input_features = self.embedding_features[0] if len(self.embedding_features) == 1 \ else tf.concat(values=self.embedding_features, axis=2, name='input_features') if self._use_char_feature: input_features = tf.concat([input_features, couv_feature_char], axis=-1) # multi bi-lstm layer _fw_cells = [] _bw_cells = [] for _ in range(self._num_layers): fw, bw = self._get_rnn_unit(self._rnn_unit) _fw_cells.append(tf.nn.rnn_cell.DropoutWrapper(fw, output_keep_prob=1-self.rnn_dropout_rate_ph)) _bw_cells.append(tf.nn.rnn_cell.DropoutWrapper(bw, output_keep_prob=1-self.rnn_dropout_rate_ph)) fw_cell = tf.nn.rnn_cell.MultiRNNCell(_fw_cells) bw_cell = tf.nn.rnn_cell.MultiRNNCell(_bw_cells) # 计算self.input_features[feature_names[0]]的实际长度(0为padding值) self.sequence_actual_length = get_sequence_actual_length( # 每个句子的实际长度 self.input_feature_ph_dict[self._feature_names[0]]) rnn_outputs, _ = tf.nn.bidirectional_dynamic_rnn( fw_cell, bw_cell, input_features, scope='bi-lstm', dtype=tf.float32, sequence_length=self.sequence_actual_length) # shape = [batch_size, max_len, nb_hidden*2] lstm_output = tf.nn.dropout( tf.concat(rnn_outputs, axis=2, name='lstm_output'), keep_prob=1.-self.dropout_rate_ph, name='lstm_output_dropout') # softmax hidden_size = int(lstm_output.shape[-1]) self.outputs = tf.reshape(lstm_output, [-1, hidden_size], name='outputs') self.softmax_w = tf.get_variable('softmax_w', [hidden_size, self._nb_classes]) self.softmax_b = tf.get_variable('softmax_b', [self._nb_classes]) self.logits = tf.reshape( tf.matmul(self.outputs, self.softmax_w) + self.softmax_b, shape=[-1, self._sequence_length, self._nb_classes], name='logits') # 计算loss self.loss = self.compute_loss() self.l2_loss = self._l2_rate * (tf.nn.l2_loss(self.softmax_w) + tf.nn.l2_loss(self.softmax_b)) self.total_loss = self.loss + self.l2_loss # train op optimizer = tf.train.AdamOptimizer(learning_rate=self._learning_rate) grads_and_vars = optimizer.compute_gradients(self.total_loss) nil_grads_and_vars = [] for g, v in grads_and_vars: if v.name in self.nil_vars: nil_grads_and_vars.append((zero_nil_slot(g), v)) else: nil_grads_and_vars.append((g, v)) global_step = tf.Variable(0, name='global_step', trainable=False) if self._clip: # clip by global norm gradients, variables = zip(*nil_grads_and_vars) gradients, _ = tf.clip_by_global_norm(gradients, self._clip) self.train_op = optimizer.apply_gradients( zip(gradients, variables), name='train_op', global_step=global_step) else: self.train_op = optimizer.apply_gradients( nil_grads_and_vars, name='train_op', global_step=global_step) # TODO sess, visible_device_list待修改 gpu_options = tf.GPUOptions(visible_device_list='0', allow_growth=True) self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) # init all variable init = tf.global_variables_initializer() self.sess.run(init)
def build_model(self): for feature_name in self._feature_names: # input ph self.input_feature_ph_dict[feature_name] = tf.placeholder( dtype=tf.int32, shape=[None, self._sequence_length], name='input_feature_ph_%s' % feature_name) # dropout rate ph self.weight_dropout_ph_dict[feature_name] = tf.placeholder( tf.float32, name='dropout_ph_%s' % feature_name) # init feature weights, 初始化未指定的 if feature_name not in self._feature_init_weight_dict: feature_weight = uniform_tensor( shape=self._feature_weight_shape_dict[feature_name], name='f_w_%s' % feature_name) self.feature_weight_dict[feature_name] = tf.Variable( initial_value=feature_weight, name='feature_weigth_%s' % feature_name) else: self.feature_weight_dict[feature_name] = tf.Variable( initial_value=self._feature_init_weight_dict[feature_name], name='feature_weight_%s' % feature_name) self.nil_vars.add(self.feature_weight_dict[feature_name].name) # init dropout rate, 初始化未指定的 if feature_name not in self._feature_weight_dropout_dict: self._feature_weight_dropout_dict[feature_name] = 0. # char feature if self._use_char_feature: # char feature weights feature_weight = uniform_tensor( shape=self._feature_weight_shape_dict['char'], name='f_w_%s' % 'char') self.feature_weight_dict['char'] = tf.Variable( initial_value=feature_weight, name='feature_weigth_%s' % 'char') self.nil_vars.add(self.feature_weight_dict['char'].name) self.nil_vars.add(self.feature_weight_dict['char'].name) self.input_feature_ph_dict['char'] = tf.placeholder( dtype=tf.int32, shape=[None, self._sequence_length, self._word_length], name='input_feature_ph_%s' % 'char') # init embeddings self.embedding_features = [] for feature_name in self._feature_names: print(self.input_feature_ph_dict[feature_name].shape) embedding_feature = tf.nn.dropout( tf.nn.embedding_lookup( self.feature_weight_dict[feature_name], ids=self.input_feature_ph_dict[feature_name], name='embedding_feature_%s' % feature_name), keep_prob=1. - self.weight_dropout_ph_dict[feature_name], name='embedding_feature_dropout_%s' % feature_name) self.embedding_features.append(embedding_feature) print(embedding_feature.shape) # char embedding if self._use_char_feature: char_embedding_feature = tf.nn.embedding_lookup( self.feature_weight_dict['char'], ids=self.input_feature_ph_dict['char'], name='embedding_feature_%s' % 'char') # conv couv_feature_char = MultiConvolutional3D( char_embedding_feature, filter_length_list=self._conv_filter_len_list, nb_filter_list=self._conv_filter_size_list).output couv_feature_char = tf.nn.dropout(couv_feature_char, keep_prob=1 - self.cnn_dropout_rate_ph) # concat all features input_features = self.embedding_features[0] if len(self.embedding_features) == 1 \ else tf.concat(values=self.embedding_features, axis=len(self._feature_names), name='input_features') print('input features shape', input_features.shape) if self._use_char_feature: input_features = tf.concat([input_features, couv_feature_char], axis=-1) # multi bi-lstm layer _fw_cells = [] _bw_cells = [] for _ in range(self._num_layers): fw, bw = self._get_rnn_unit(self._rnn_unit) _fw_cells.append( tf.nn.rnn_cell.DropoutWrapper(fw, output_keep_prob=1 - self.rnn_dropout_rate_ph)) _bw_cells.append( tf.nn.rnn_cell.DropoutWrapper(bw, output_keep_prob=1 - self.rnn_dropout_rate_ph)) fw_cell = tf.nn.rnn_cell.MultiRNNCell(_fw_cells) bw_cell = tf.nn.rnn_cell.MultiRNNCell(_bw_cells) # 计算self.input_features[feature_names[0]]的实际长度(0为padding值) self.sequence_actual_length = get_sequence_actual_length( # 每个句子的实际长度 self.input_feature_ph_dict[self._feature_names[0]], dim=1) print(self.sequence_actual_length.shape) input_size = input_features.shape[-1] print('input_features shape ', input_features.shape) rnn_inputs = tf.reshape(input_features, [-1, self._sequence_length, input_size]) print('rnn inputs shape ', rnn_inputs.shape) rnn_lengths = tf.reshape(self.sequence_actual_length, [-1]) # todo: add encoder output rnn_outputs, rnn_state = tf.nn.bidirectional_dynamic_rnn( fw_cell, bw_cell, rnn_inputs, scope='bi-lstm', dtype=tf.float32, sequence_length=rnn_lengths) # shape = [batch_size, max_len, nb_hidden*2] rnn_outputs = tf.concat(rnn_outputs, axis=2, name='lstm_output') rnn_outputs = tf.nn.dropout(rnn_outputs, keep_prob=1. - self.dropout_rate_ph, name='lstm_output_dropout') rnn_hidden = self.merge_bi_rnn_state(rnn_state).h # batch_size = tf.shape(input_features)[0] # print('rnn outputs shape', rnn_outputs.shape) print('rnn hidden shape', rnn_hidden.shape) # # rnn_outputs = tf.reshape(rnn_outputs, # [batch_size, turn_size, self._sequence_length, self._nb_hidden * 2]) # # rnn_hidden = tf.reshape(rnn_hidden, [batch_size, turn_size, self._nb_hidden * 2]) # # rnn_hidden = tf.nn.dropout(rnn_hidden, keep_prob=1. - self.dropout_rate_ph) # print('rnn outputs shape', rnn_outputs.shape) # print('rnn hidden shape', rnn_hidden.shape) # # # context rnn # ctx_cell = rnn.BasicLSTMCell(self._nb_hidden * 2, forget_bias=1., state_is_tuple=True) # ctx_lengths = get_sequence_actual_length(self.input_feature_ph_dict[self._feature_names[0]], dim=[1, 2]) # print("ctx inputs shape", rnn_hidden.shape) # print('ctx lengths shape', ctx_lengths.shape) # # ctx_outputs, _ = tf.nn.dynamic_rnn(cell=ctx_cell, # inputs=rnn_hidden, # sequence_length=ctx_lengths, # dtype=tf.float32) # # predict intents intent_logits = tf.layers.dense(rnn_hidden, 24, activation=None) #!!!! label_intents = tf.reshape(self.input_label_intent, [-1]) print('intent_logits shape', intent_logits.shape) print('input_label_intent shape', self.input_label_intent.shape) intent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=label_intents, logits=intent_logits) intent_mask = tf.reshape(tf.sign(self.sequence_actual_length), [-1]) intent_mask = tf.cast(intent_mask, dtype=tf.float32) print('intent_mask shape', intent_mask.shape) self.intent_loss = tf.reduce_sum( intent_loss * intent_mask) / tf.reduce_sum(intent_mask) pred_intents = tf.argmax(intent_logits, axis=1) self.pred_intents = tf.reshape(pred_intents, [-1]) print('pred_intents shape', self.pred_intents.shape) correct_preds = tf.equal(tf.cast(pred_intents, dtype=tf.int32), tf.cast(label_intents, dtype=tf.int32)) self.intent_accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32) * intent_mask) \ / tf.reduce_sum(intent_mask) self.intent_count = tf.cast(tf.reduce_sum(intent_mask), tf.int32) self.intent_logits = intent_logits # predict slots # batch * turn * hidden # print('ctx outputs shape', ctx_outputs.shape) # ctx_outputs = tf.reshape(ctx_outputs, [batch_size, turn_size, self._nb_hidden * 2]) # rnn_intent_outputs = [ctx_outputs for _ in range(self._sequence_length)] # rnn_intent_outputs = tf.stack(rnn_intent_outputs, axis=2) # print('rnn intent outputs', rnn_intent_outputs.shape) # # ctx_h = tf.reshape(ctx_outputs[:, :, :self._nb_hidden], [-1, self._nb_hidden]) # ctx_c = tf.reshape(ctx_outputs[:, :, self._nb_hidden:], [-1, self._nb_hidden]) # # init_fw_hidden = [] # init_bw_hidden = [] # for _ in range(self._num_layers): # lstm_hidden = rnn.LSTMStateTuple(h=ctx_h, c=ctx_c) # init_fw_hidden += [lstm_hidden] # init_bw_hidden += [lstm_hidden] # init_fw_hidden = tuple(init_fw_hidden) # init_bw_hidden = tuple(init_bw_hidden) # slot_outputs = tf.concat([rnn_outputs, rnn_intent_outputs], axis=3) # slot_outputs = tf.reshape(slot_outputs, [-1, self._nb_hidden * 4]) # run the rnn again with init state # slot_outputs, _ = tf.nn.bidirectional_dynamic_rnn( # fw_cell, bw_cell, rnn_inputs, # scope='bi-lstm', # initial_state_fw=init_fw_hidden, # initial_state_bw=init_bw_hidden, # dtype=tf.float32, # sequence_length=rnn_lengths) # slot_outputs = tf.concat(slot_outputs, axis=2, name='slot_output') slot_logits = tf.layers.dense(rnn_outputs, self._nb_classes, activation=None) self.slot_logits = tf.reshape( slot_logits, [batch_size, self._sequence_length, self._nb_classes]) print('slot logits shape', self.slot_logits.shape) slot_labels = tf.reshape(self.input_label_ph, [-1, self._sequence_length]) slot_logits = tf.reshape(self.slot_logits, [-1, self._sequence_length, self._nb_classes]) slot_lengths = tf.reshape(self.sequence_actual_length, [-1]) log_likelihood, self.transition_params = tf.contrib.crf.crf_log_likelihood( slot_logits, slot_labels, slot_lengths) print('transition params shape', self.transition_params.shape) print('log likelihood loss', log_likelihood.shape) self.slot_loss = tf.reduce_sum( -log_likelihood * intent_mask) / tf.reduce_sum(intent_mask) self.total_loss = self.intent_loss + self.slot_loss self.train_loss = self.slot_loss + self.intent_loss * self.intent_weight_ph # train op optimizer = tf.train.AdamOptimizer(learning_rate=self._learning_rate) self.train_op = optimizer.minimize(self.train_loss) grads_and_vars = optimizer.compute_gradients(self.train_loss) nil_grads_and_vars = [] for g, v in grads_and_vars: if v.name in self.nil_vars: nil_grads_and_vars.append((zero_nil_slot(g), v)) else: nil_grads_and_vars.append((g, v)) global_step = tf.Variable(0, name='global_step', trainable=False) if self._clip: # clip by global norm gradients, variables = zip(*nil_grads_and_vars) gradients, _ = tf.clip_by_global_norm(gradients, self._clip) self.train_op = optimizer.apply_gradients(zip( gradients, variables), name='train_op', global_step=global_step) else: self.train_op = optimizer.apply_gradients(nil_grads_and_vars, name='train_op', global_step=global_step) # TODO sess, visible_device_list待修改 gpu_options = tf.GPUOptions(visible_device_list='0', allow_growth=True) self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) # init all variable init = tf.global_variables_initializer() self.sess.run(init)
def __init__(self, config, multi_kb: MultiKnowledgeBase, sess): self._margin = 4 self._dataset = config.dataset self._batch_size = config.batch_size self._sentence_size = config.sentence_size self._embedding_size = config.embedding_dimension self._max_grad_norm = config.max_grad_norm self._multi_kb = multi_kb self._vocab_size = config.question_words self._is_direct_align = config.direct_align self._is_dual_matrices = config.dual_matrices self._hops = config.hops self._steps = config.steps self._lan_labels = config.lan_labels self._lan_que = config.lan_que self._rel_size_1 = self._multi_kb.kb1.n_relations self._rel_size_2 = self._multi_kb.kb2.n_relations self._ent_size_1 = self._multi_kb.kb1.n_entities self._ent_size_2 = self._multi_kb.kb2.n_entities self._init = tf.contrib.layers.xavier_initializer() self._orthogonal_init = orthogonal_initializer() self._opt = tf.train.AdamOptimizer(learning_rate=config.lr, epsilon=config.epsilon, name="opt") # self._AM_opt = tf.train.AdamOptimizer(learning_rate=config.lr * config.ar, epsilon=config.epsilon, # name="AM_opt") self._name = "M_IRN" self._checkpoint_dir = config.checkpoint_dir + '/' + config.kb_dir + '/' + self._name if not self._is_direct_align: self.this_k_1_2 = config.this_k_1_2 self.this_k_2_1 = config.this_k_2_1 if not os.path.exists(self._checkpoint_dir): os.makedirs(self._checkpoint_dir) self._build_inputs() self._build_vars() self._saver = tf.train.Saver(max_to_keep=1) # kg1 train and loss kg1_batch_loss = self._kg1_to_train() kg1_loss_op = tf.reduce_sum(kg1_batch_loss, name="KG1_loss_op") kg1_grads_and_vars = self._opt.compute_gradients( kg1_loss_op, [self._kg1_ent_emb, self._kg1_rel_emb, self._kg1_Mse]) kg1_nil_grads_and_vars = [] for g, v in kg1_grads_and_vars: if v.name in self._nil_vars: # not _kg1_Mse kg1_nil_grads_and_vars.append((zero_nil_slot(g), v)) else: kg1_nil_grads_and_vars.append((g, v)) print("Gradients and Variables for KG1:") for g, v in kg1_nil_grads_and_vars: print(g, v.name) kg1_train_op = self._opt.apply_gradients(kg1_grads_and_vars, name="kg1_train_op") # kg2 train and loss kg2_batch_loss = self._kg2_to_train() kg2_loss_op = tf.reduce_sum(kg2_batch_loss, name="kg2_loss_op") kg2_grads_and_vars = self._opt.compute_gradients( kg2_loss_op, [self._kg2_ent_emb, self._kg2_rel_emb, self._kg2_Mse]) kg2_nil_grads_and_vars = [] for g, v in kg2_grads_and_vars: if v.name in self._nil_vars: # not _kg2_Mse kg2_nil_grads_and_vars.append((zero_nil_slot(g), v)) else: kg2_nil_grads_and_vars.append((g, v)) print("Gradients and Variables for KG2:") for g, v in kg2_nil_grads_and_vars: print(g, v.name) kg2_train_op = self._opt.apply_gradients(kg2_grads_and_vars, name="kg2_train_op") # # alignment train and loss # alignment_batch_loss = self._align_to_train() # alignment_loss_op = tf.reduce_sum(alignment_batch_loss, name="alignment_loss_op") # alignment_train_op = self._AM_opt.minimize(alignment_loss_op) # ali_res_1_op, ali_res_2_op = self._align_kNN() # cross entropy as loss for inference: batch_loss, inference_path = self._inference( ) # (batch_size, 1), (batch_size, 6) inference_loss_op = tf.reduce_sum(batch_loss, name="inference_loss_op") inference_params = [ self._que_emb, self._kg1_Mrq, self._kg1_Mrs, self._kg2_Mrq, self._kg2_Mrs ] inference_grads_and_vars = self._opt.compute_gradients( inference_loss_op, inference_params) inference_grads_and_vars = [(tf.clip_by_norm(g, self._max_grad_norm), v) for g, v in inference_grads_and_vars if g is not None] inference_grads_and_vars = [(add_gradient_noise(g), v) for g, v in inference_grads_and_vars] inference_nil_grads_and_vars = [] for g, v in inference_grads_and_vars: if v.name in self._nil_vars: inference_nil_grads_and_vars.append((zero_nil_slot(g), v)) else: inference_nil_grads_and_vars.append((g, v)) print("Gradients and variables for inference:") for g, v in inference_nil_grads_and_vars: print(g, v.name) inference_train_op = self._opt.apply_gradients( inference_nil_grads_and_vars, name="inference_train_op") # batch_predict ops inference_predict_op = inference_path # assign ops self.kg1_loss_op = kg1_loss_op self.kg1_train_op = kg1_train_op self.kg2_loss_op = kg2_loss_op self.kg2_train_op = kg2_train_op # self.alignment_loss_op = alignment_loss_op # self.alignment_train_op = alignment_train_op self.inference_loss_op = inference_loss_op self.inference_predict_op = inference_predict_op self.inference_train_op = inference_train_op # self.ali_res_1 = ali_res_1_op # self.ali_res_2 = ali_res_2_op init_op = tf.global_variables_initializer() table_op = tf.tables_initializer() self._sess = sess self._sess.run(init_op) self._sess.run(table_op)
def build_model(self): # init ph, weights and dropout rate self.input_feature_ph_dict = dict() self.input_char_ph_dict=dict() self.char_weight_dict=dict() # 建立特征权重字典 self.weight_dropout_ph_dict = dict() self.weight_dropout_ph_dict['char']=tf.placeholder(tf.float32,name='dropout_char') self.feature_weight_dict = dict() self.nil_vars = set() self.dropout_rate_ph = tf.placeholder(tf.float32, name='dropout_rate_ph') # label ph self.input_label_ph = tf.placeholder( dtype=tf.int32, shape=[None, self.sequence_length], name='input_label_ph') # 读入特征,并搭建特征结构 ph for feature_name in self.feature_names: # input ph,每个feature建立一次placeholder self.input_feature_ph_dict[feature_name] = tf.placeholder( dtype=tf.int32, shape=[None, self.sequence_length], name='input_feature_ph_%s' % feature_name) # dropout rate ph self.weight_dropout_ph_dict[feature_name] = tf.placeholder( tf.float32, name='dropout_ph_%s' % feature_name) # init feature weights, 初始化随机变量 if feature_name not in self.feature_init_weight_dict: feature_weight = uniform_tensor( shape=self.feature_weight_shape_dict[feature_name], name='f_w_%s' % feature_name) self.feature_weight_dict[feature_name] = tf.Variable( initial_value=feature_weight, name='feature_weigth_%s' % feature_name) else: self.feature_weight_dict[feature_name] = tf.Variable( initial_value=self.feature_init_weight_dict[feature_name], name='feature_weight_%s' % feature_name) self.nil_vars.add(self.feature_weight_dict[feature_name].name) # init dropout rate, 初始化未指定的 if feature_name not in self.feature_weight_dropout_dict: self.feature_weight_dropout_dict[feature_name] = 0. # 初始化字向量空间 self.input_char_ph_dict['char']=tf.placeholder(dtype=tf.int32,shape=[None,self.sequence_length,self.word_length], name='char_input') self.input_char_flat=tf.reshape(self.input_char_ph_dict['char'],[-1,self.word_length*self.sequence_length], name='input_x_char_flat') self.char_weight_dict['char']=tf.Variable(initial_value=self.char_init_weight_dict['char'],name='char_init') self.char_embedding_init=tf.nn.dropout(tf.nn.embedding_lookup( self.char_weight_dict['char'], ids=self.input_char_flat, name='char_embedding',), keep_prob=1.-self.weight_dropout_ph_dict['char'], name='char_dropout') with tf.name_scope('char_conv'): self.filter_shape=[self.filter_size,self.char_embedding_size,self.num_filter] self.W_conv=tf.Variable(tf.truncated_normal(self.filter_shape,stddev=0.1),name='W_conv') self.b_conv=tf.Variable(tf.constant(0.1,shape=[self.num_filter]),name='b_conv') self.conv=tf.nn.conv1d(self.char_embedding_init,self.W_conv,stride=1,padding='SAME',name='conv') self.h_expand=tf.expand_dims(self.conv,-1) self.pooled=tf.nn.max_pool(self.h_expand,ksize=[1,self.sequence_length*self.word_length,1,1], strides=[1,self.word_length,1,1],padding='SAME',name='pooled') self.char_pool_flat=tf.reshape(self.pooled,[-1,self.sequence_length,self.num_filter],name='char_pool_flat') # init embeddings # 对特征进行编码并连接 self.embedding_features = [] for feature_name in self.feature_names: embedding_feature = tf.nn.dropout(tf.nn.embedding_lookup( self.feature_weight_dict[feature_name], ids=self.input_feature_ph_dict[feature_name], name='embedding_feature_%s' % feature_name), keep_prob=1. - self.weight_dropout_ph_dict[feature_name], name='embedding_feature_dropout_%s' % feature_name) self.embedding_features.append(embedding_feature) # concat all features # 多个词拼接成一句话 input_features = self.embedding_features[0] if len(self.embedding_features) == 1 \ else tf.concat(values=self.embedding_features, axis=2, name='input_features') input_features=tf.concat([input_features,self.char_pool_flat],axis=2) # bi-lstm if self.rnn_unit == 'lstm': fw_cell = rnn.BasicLSTMCell(self.nb_hidden, forget_bias=1., state_is_tuple=True) bw_cell = rnn.BasicLSTMCell(self.nb_hidden, forget_bias=1., state_is_tuple=True) elif self.rnn_unit == 'gru': fw_cell = rnn.GRUCell(self.nb_hidden) bw_cell = rnn.GRUCell(self.nb_hidden) else: raise ValueError('rnn_unit must in (lstm, gru)!') # 计算self.input_features[feature_names[0]]的实际长度(0为padding值) self.sequence_actual_length = get_sequence_actual_length( # 每个句子的实际长度 self.input_feature_ph_dict[self.feature_names[0]]) # print(input_features) rnn_outputs, _ = tf.nn.bidirectional_dynamic_rnn( fw_cell, bw_cell, input_features, scope='bi-lstm', dtype=tf.float32, sequence_length=self.sequence_actual_length) # shape = [batch_size, max_len, nb_hidden*2] # dropout 之后由[m,n]变成[1,1]输入输出维度保持不变s lstm_output = tf.nn.dropout( tf.concat(rnn_outputs, axis=2, name='lstm_output'), keep_prob=1. - self.dropout_rate_ph, name='lstm_output_dropout') # softmax # 重新规整输出形式 self.outputs = tf.reshape(lstm_output, [-1, self.nb_hidden * 2], name='outputs') self.softmax_w = tf.get_variable('softmax_w', [self.nb_hidden * 2, self.nb_classes]) self.softmax_b = tf.get_variable('softmax_b', [self.nb_classes]) self.logits = tf.reshape( tf.matmul(self.outputs, self.softmax_w) + self.softmax_b, shape=[-1, self.sequence_length, self.nb_classes], name='logits') # 计算loss self.loss = self.compute_loss() self.l2_loss = self.l2_rate * (tf.nn.l2_loss(self.softmax_w) + tf.nn.l2_loss(self.softmax_b)) self.total_loss = self.loss + self.l2_loss # train op optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate) grads_and_vars = optimizer.compute_gradients(self.total_loss) nil_grads_and_vars = [] for g, v in grads_and_vars: if v.name in self.nil_vars: nil_grads_and_vars.append((zero_nil_slot(g), v)) else: nil_grads_and_vars.append((g, v)) global_step = tf.Variable(0, name='global_step', trainable=False) if self.clip: # clip by global norm gradients, variables = zip(*nil_grads_and_vars) gradients, _ = tf.clip_by_global_norm(gradients, self.clip) self.train_op = optimizer.apply_gradients( zip(gradients, variables), name='train_op', global_step=global_step) else: self.train_op = optimizer.apply_gradients( nil_grads_and_vars, name='train_op', global_step=global_step) # TODO sess, visible_device_list待修改 gpu_options = tf.GPUOptions(visible_device_list='0', allow_growth=True) self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) # init all variable init = tf.global_variables_initializer() self.sess.run(init)