def build_model(self): # init ph, weights and dropout rate self.input_feature_ph_dict = dict() # 建立特征权重字典 self.weight_dropout_ph_dict = dict() self.feature_weight_dict = dict() self.nil_vars = set() self.dropout_rate_ph = tf.placeholder(tf.float32, name='dropout_rate_ph') # label ph self.input_label_ph = tf.placeholder( dtype=tf.int32, shape=[None, self.sequence_length], name='input_label_ph') # 读入特征,并搭建特征结构 for feature_name in self.feature_names: # input ph self.input_feature_ph_dict[feature_name] = tf.placeholder( dtype=tf.int32, shape=[None, self.sequence_length], name='input_feature_ph_%s' % feature_name) # dropout rate ph self.weight_dropout_ph_dict[feature_name] = tf.placeholder( tf.float32, name='dropout_ph_%s' % feature_name) # init feature weights, 初始化未指定的 if feature_name not in self.feature_init_weight_dict: feature_weight = uniform_tensor( shape=self.feature_weight_shape_dict[feature_name], name='f_w_%s' % feature_name) self.feature_weight_dict[feature_name] = tf.Variable( initial_value=feature_weight, name='feature_weigth_%s' % feature_name) else: self.feature_weight_dict[feature_name] = tf.Variable( initial_value=self.feature_init_weight_dict[feature_name], name='feature_weight_%s' % feature_name) self.nil_vars.add(self.feature_weight_dict[feature_name].name) # init dropout rate, 初始化未指定的 if feature_name not in self.feature_weight_dropout_dict: self.feature_weight_dropout_dict[feature_name] = 0. # init embeddings # 对特征进行编码并连接 self.embedding_features = [] for feature_name in self.feature_names: embedding_feature = tf.nn.dropout(tf.nn.embedding_lookup( self.feature_weight_dict[feature_name], ids=self.input_feature_ph_dict[feature_name], name='embedding_feature_%s' % feature_name), keep_prob=1.-self.weight_dropout_ph_dict[feature_name], name='embedding_feature_dropout_%s' % feature_name) self.embedding_features.append(embedding_feature) # concat all features # 多个词拼接成一句话 input_features = self.embedding_features[0] if len(self.embedding_features) == 1 \ else tf.concat(values=self.embedding_features, axis=2, name='input_features') # cnn cnn_output=self.IDCNN_layer(input_features) # # bi-lstm # # if self.rnn_unit == 'lstm': # fw_cell = rnn.BasicLSTMCell(self.nb_hidden, forget_bias=1., state_is_tuple=True) # bw_cell = rnn.BasicLSTMCell(self.nb_hidden, forget_bias=1., state_is_tuple=True) # elif self.rnn_unit == 'gru': # fw_cell = rnn.GRUCell(self.nb_hidden) # bw_cell = rnn.GRUCell(self.nb_hidden) # else: # raise ValueError('rnn_unit must in (lstm, gru)!') # 计算self.input_features[feature_names[0]]的实际长度(0为padding值) self.sequence_actual_length = get_sequence_actual_length( # 每个句子的实际长度 self.input_feature_ph_dict[self.feature_names[0]]) # # print(input_features) # rnn_outputs, _ = tf.nn.bidirectional_dynamic_rnn( # fw_cell, bw_cell, input_features, scope='bi-lstm', # dtype=tf.float32, sequence_length=self.sequence_actual_length) # # shape = [batch_size, max_len, nb_hidden*2] # # dropout 之后由[m,n]变成[1,1]输入输出维度保持不变s # lstm_output = tf.nn.dropout( # tf.concat(rnn_outputs, axis=2, name='lstm_output'), # keep_prob=1.-self.dropout_rate_ph, name='lstm_output_dropout') # # # softmax # # 重新规整输出形式 # self.outputs = tf.reshape(lstm_output, [-1, self.nb_hidden*2], name='outputs') self.softmax_w = tf.get_variable('softmax_w', [self.cnn_output_width, self.nb_classes]) self.softmax_b = tf.get_variable('softmax_b', [self.nb_classes]) self.logits = tf.reshape( tf.matmul(cnn_output, self.softmax_w) + self.softmax_b, shape=[-1, self.sequence_length, self.nb_classes], name='logits') # 计算loss self.loss = self.compute_loss() self.l2_loss = self.l2_rate * (tf.nn.l2_loss(self.softmax_w) + tf.nn.l2_loss(self.softmax_b)) self.total_loss = self.loss + self.l2_loss # train op optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate) grads_and_vars = optimizer.compute_gradients(self.total_loss) nil_grads_and_vars = [] for g, v in grads_and_vars: if v.name in self.nil_vars: nil_grads_and_vars.append((zero_nil_slot(g), v)) else: nil_grads_and_vars.append((g, v)) global_step = tf.Variable(0, name='global_step', trainable=False) if self.clip: # clip by global norm gradients, variables = zip(*nil_grads_and_vars) gradients, _ = tf.clip_by_global_norm(gradients, self.clip) self.train_op = optimizer.apply_gradients( zip(gradients, variables), name='train_op', global_step=global_step) else: self.train_op = optimizer.apply_gradients( nil_grads_and_vars, name='train_op', global_step=global_step) # TODO sess, visible_device_list待修改 gpu_options = tf.GPUOptions(visible_device_list='0', allow_growth=True) self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) # init all variable init = tf.global_variables_initializer() self.sess.run(init)
def build_model(self): for feature_name in self._feature_names: # input ph self.input_feature_ph_dict[feature_name] = tf.placeholder( dtype=tf.int32, shape=[None, self._sequence_length], name='input_feature_ph_%s' % feature_name) # dropout rate ph self.weight_dropout_ph_dict[feature_name] = tf.placeholder( tf.float32, name='dropout_ph_%s' % feature_name) # init feature weights, 初始化未指定的 if feature_name not in self._feature_init_weight_dict: feature_weight = uniform_tensor( shape=self._feature_weight_shape_dict[feature_name], name='f_w_%s' % feature_name) self.feature_weight_dict[feature_name] = tf.Variable( initial_value=feature_weight, name='feature_weigth_%s' % feature_name) else: self.feature_weight_dict[feature_name] = tf.Variable( initial_value=self._feature_init_weight_dict[feature_name], name='feature_weight_%s' % feature_name) self.nil_vars.add(self.feature_weight_dict[feature_name].name) # init dropout rate, 初始化未指定的 if feature_name not in self._feature_weight_dropout_dict: self._feature_weight_dropout_dict[feature_name] = 0. # char feature if self._use_char_feature: # char feature weights feature_weight = uniform_tensor( shape=self._feature_weight_shape_dict['char'], name='f_w_%s' % 'char') self.feature_weight_dict['char'] = tf.Variable( initial_value=feature_weight, name='feature_weigth_%s' % 'char') self.nil_vars.add(self.feature_weight_dict['char'].name) self.nil_vars.add(self.feature_weight_dict['char'].name) self.input_feature_ph_dict['char'] = tf.placeholder( dtype=tf.int32, shape=[None, self._sequence_length, self._word_length], name='input_feature_ph_%s' % 'char') # init embeddings self.embedding_features = [] for feature_name in self._feature_names: embedding_feature = tf.nn.dropout(tf.nn.embedding_lookup( self.feature_weight_dict[feature_name], ids=self.input_feature_ph_dict[feature_name], name='embedding_feature_%s' % feature_name), keep_prob=1.-self.weight_dropout_ph_dict[feature_name], name='embedding_feature_dropout_%s' % feature_name) self.embedding_features.append(embedding_feature) # char embedding if self._use_char_feature: char_embedding_feature = tf.nn.embedding_lookup( self.feature_weight_dict['char'], ids=self.input_feature_ph_dict['char'], name='embedding_feature_%s' % 'char') # conv couv_feature_char = MultiConvolutional3D( char_embedding_feature, filter_length_list=self._conv_filter_len_list, nb_filter_list=self._conv_filter_size_list).output couv_feature_char = tf.nn.dropout( couv_feature_char, keep_prob=1-self.cnn_dropout_rate_ph) # concat all features input_features = self.embedding_features[0] if len(self.embedding_features) == 1 \ else tf.concat(values=self.embedding_features, axis=2, name='input_features') if self._use_char_feature: input_features = tf.concat([input_features, couv_feature_char], axis=-1) # multi bi-lstm layer _fw_cells = [] _bw_cells = [] for _ in range(self._num_layers): fw, bw = self._get_rnn_unit(self._rnn_unit) _fw_cells.append(tf.nn.rnn_cell.DropoutWrapper(fw, output_keep_prob=1-self.rnn_dropout_rate_ph)) _bw_cells.append(tf.nn.rnn_cell.DropoutWrapper(bw, output_keep_prob=1-self.rnn_dropout_rate_ph)) fw_cell = tf.nn.rnn_cell.MultiRNNCell(_fw_cells) bw_cell = tf.nn.rnn_cell.MultiRNNCell(_bw_cells) # 计算self.input_features[feature_names[0]]的实际长度(0为padding值) self.sequence_actual_length = get_sequence_actual_length( # 每个句子的实际长度 self.input_feature_ph_dict[self._feature_names[0]]) rnn_outputs, _ = tf.nn.bidirectional_dynamic_rnn( fw_cell, bw_cell, input_features, scope='bi-lstm', dtype=tf.float32, sequence_length=self.sequence_actual_length) # shape = [batch_size, max_len, nb_hidden*2] lstm_output = tf.nn.dropout( tf.concat(rnn_outputs, axis=2, name='lstm_output'), keep_prob=1.-self.dropout_rate_ph, name='lstm_output_dropout') # softmax hidden_size = int(lstm_output.shape[-1]) self.outputs = tf.reshape(lstm_output, [-1, hidden_size], name='outputs') self.softmax_w = tf.get_variable('softmax_w', [hidden_size, self._nb_classes]) self.softmax_b = tf.get_variable('softmax_b', [self._nb_classes]) self.logits = tf.reshape( tf.matmul(self.outputs, self.softmax_w) + self.softmax_b, shape=[-1, self._sequence_length, self._nb_classes], name='logits') # 计算loss self.loss = self.compute_loss() self.l2_loss = self._l2_rate * (tf.nn.l2_loss(self.softmax_w) + tf.nn.l2_loss(self.softmax_b)) self.total_loss = self.loss + self.l2_loss # train op optimizer = tf.train.AdamOptimizer(learning_rate=self._learning_rate) grads_and_vars = optimizer.compute_gradients(self.total_loss) nil_grads_and_vars = [] for g, v in grads_and_vars: if v.name in self.nil_vars: nil_grads_and_vars.append((zero_nil_slot(g), v)) else: nil_grads_and_vars.append((g, v)) global_step = tf.Variable(0, name='global_step', trainable=False) if self._clip: # clip by global norm gradients, variables = zip(*nil_grads_and_vars) gradients, _ = tf.clip_by_global_norm(gradients, self._clip) self.train_op = optimizer.apply_gradients( zip(gradients, variables), name='train_op', global_step=global_step) else: self.train_op = optimizer.apply_gradients( nil_grads_and_vars, name='train_op', global_step=global_step) # TODO sess, visible_device_list待修改 gpu_options = tf.GPUOptions(visible_device_list='0', allow_growth=True) self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) # init all variable init = tf.global_variables_initializer() self.sess.run(init)
def build(self): """ """ self.input_ph = tf.placeholder(dtype=tf.int32, shape=[None, self.sequence_length], name='input') self.sequence_actual_lengths_ph = tf.placeholder( dtype=tf.int32, shape=[None], name='sequence_actual_lengths') self.sequence_last_index_ph = tf.placeholder( dtype=tf.int32, name='sequence_last_index') self.dropout_rate_ph = tf.placeholder(tf.float32, name='dropout_rate') self.label_ph = tf.placeholder(tf.int32, shape=[None, self.sequence_length], name='label') self.weight_dropout_ph = tf.placeholder(dtype=tf.float32, name='weight_dropout') if self.feature_init_weight is None: self.feature_weight = tf.Variable( initial_value=uniform_tensor(shape=self.feature_weight_shape, name='f_W', dtype=tf.float32), name='feature_W', ) else: self.feature_weight = tf.Variable( initial_value=self.feature_init_weight, name='feature_W', dtype=tf.float32, trainable=True) self.feature_embedding = tf.nn.dropout( x=tf.nn.embedding_lookup(self.feature_weight, self.input_ph, name='feature_embedding'), keep_prob=1.0 - self.weight_dropout_ph, name='feature_embedding_dropout') # define rnn cell def cell(): if self.rnn_unit == 'lstm': return rnn.BasicLSTMCell(self.hidden_size, state_is_tuple=True) elif self.rnn_unit == 'gru': return rnn.GRUCell(self.hidden_size) else: raise ValueError('rnn unit must in (lstm, gru)') return def attn_cell(): return rnn.DropoutWrapper(cell(), output_keep_prob=1.0) # cell_fw = rnn.MultiRNNCell( [attn_cell() for _ in range(self.num_layers)], state_is_tuple=True) cell_bw = rnn.MultiRNNCell( [attn_cell() for _ in range(self.num_layers)], state_is_tuple=True) # create gan's generator g_cell_fw = rnn.MultiRNNCell( [attn_cell() for _ in range(self.num_layers)], state_is_tuple=True) g_cell_bw = rnn.MultiRNNCell( [attn_cell() for _ in range(self.num_layers)], state_is_tuple=True) self.feature_entity_embedding = self.feature_embedding self.bi_rnn_outputs, _ = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, inputs=self.feature_entity_embedding, sequence_length=self.sequence_actual_lengths_ph, dtype=tf.float32, scope='bi-rnn') # outputs of the gan's generator self.g_outputs, _ = tf.nn.bidirectional_dynamic_rnn( g_cell_fw, g_cell_bw, inputs=self.feature_entity_embedding, sequence_length=self.sequence_actual_lengths_ph, dtype=tf.float32, scope='g_bi-rnn') self.bi_rnn_outputs_dropout = tf.nn.dropout( tf.concat(self.bi_rnn_outputs, axis=-1, name='bi_rnn_outputs'), keep_prob=1.0, name='bi_rnn_outputs_dropout') self.g_bi_rnn_outputs_dropout = tf.nn.dropout( tf.concat(self.g_outputs, axis=-1, name='g_bi_rnn_outputs'), keep_prob=1.0, name='g_bi_rnn_outputs_dropout') # outputs of sequences without paddings mask = tf.sequence_mask(self.sequence_actual_lengths_ph, self.sequence_length) self.outputs = tf.boolean_mask(self.bi_rnn_outputs_dropout, mask, name='outputs') g_outputs = tf.boolean_mask(self.g_bi_rnn_outputs_dropout, mask, name='g_outputs') self.softmax_W = tf.get_variable( 'softmax_W', [self.hidden_size * 2, self.classes], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) self.g_softmax_W = tf.get_variable( 'g_softmax_W', [self.hidden_size * 2, self.classes], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) self.softmax_b = tf.get_variable( 'softmax_b', [self.classes], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) self.g_softmax_b = tf.get_variable( 'g_softmax_b', [self.classes], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) self.softmax_W_binary = tf.get_variable( 'softmax_W_binary', [self.hidden_size * 2, 2], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) self.softmax_b_binary = tf.get_variable( 'softmax_b_binary', [2], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) self.logits = tf.nn.xw_plus_b(self.outputs, self.softmax_W, self.softmax_b, name='logits') self.g_logits = tf.nn.xw_plus_b(g_outputs, self.g_softmax_W, self.g_softmax_b, name='g_logits') labels = tf.contrib.layers.one_hot_encoding(tf.boolean_mask( self.label_ph, mask), num_classes=self.classes) self.d_loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( labels=labels, logits=self.g_logits, )) self.g_loss = tf.reduce_mean( tf.multiply(labels, tf.to_float(tf.log(1 - tf.nn.softmax(self.g_logits))))) # compute diff loss self.diff_loss = tf.norm( tf.matmul(tf.transpose(g_outputs, [1, 0]), self.outputs)) # all trainable variables all_trainable_vars = tf.trainable_variables() # variables related to discriminator vars_d = [ var for var in all_trainable_vars if var.op.name == 'g_softmax_W' or var.op.name == 'g_softmax_b' ] optimizer_d = tf.train.GradientDescentOptimizer(0.1) self.train_op_d = optimizer_d.minimize(self.d_loss, var_list=vars_d) # variables related to generator vars_g = [var for var in all_trainable_vars if var.op.name in \ ['g_bi-rnn/fw/multi_rnn_cell/cell_0/basic_lstm_cell/weights', \ 'g_bi-rnn/fw/multi_rnn_cell/cell_0/basic_lstm_cell/biases', \ 'g_bi-rnn/bw/multi_rnn_cell/cell_0/basic_lstm_cell/weights', \ 'g_bi-rnn/bw/multi_rnn_cell/cell_0/basic_lstm_cell/biases'] ] optimizer_g = tf.train.GradientDescentOptimizer(0.1) self.train_op_g = optimizer_g.minimize(self.g_loss + 0.1 * self.diff_loss, var_list=vars_g) self.logits_binary = tf.nn.xw_plus_b(self.outputs, self.softmax_W_binary, self.softmax_b_binary, name='logits_binary') self.loss = self.compute_loss() self.l2_loss = self.l2_rate * (tf.nn.l2_loss(self.softmax_W) + tf.nn.l2_loss(self.softmax_b)) self.total_loss = self.loss + self.l2_loss + self.diff_loss * 0.00001 optimizer = tf.train.GradientDescentOptimizer(self.learning_rate) grads_and_vars = optimizer.compute_gradients(self.total_loss) self.global_step = tf.Variable(0, name='global_step', trainable=False) self.train_op = optimizer.apply_gradients( grads_and_vars=grads_and_vars, global_step=self.global_step) gpu_options = tf.GPUOptions(visible_device_list='4,5,6,7', allow_growth=True) session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False, gpu_options=gpu_options) self.sess = tf.Session(config=session_config) self.sess.run(tf.global_variables_initializer())
def build_model(self): for feature_name in self._feature_names: # input ph self.input_feature_ph_dict[feature_name] = tf.placeholder( dtype=tf.int32, shape=[None, self._sequence_length], name='input_feature_ph_%s' % feature_name) # dropout rate ph self.weight_dropout_ph_dict[feature_name] = tf.placeholder( tf.float32, name='dropout_ph_%s' % feature_name) # init feature weights, 初始化未指定的 if feature_name not in self._feature_init_weight_dict: feature_weight = uniform_tensor( shape=self._feature_weight_shape_dict[feature_name], name='f_w_%s' % feature_name) self.feature_weight_dict[feature_name] = tf.Variable( initial_value=feature_weight, name='feature_weigth_%s' % feature_name) else: self.feature_weight_dict[feature_name] = tf.Variable( initial_value=self._feature_init_weight_dict[feature_name], name='feature_weight_%s' % feature_name) self.nil_vars.add(self.feature_weight_dict[feature_name].name) # init dropout rate, 初始化未指定的 if feature_name not in self._feature_weight_dropout_dict: self._feature_weight_dropout_dict[feature_name] = 0. # char feature if self._use_char_feature: # char feature weights feature_weight = uniform_tensor( shape=self._feature_weight_shape_dict['char'], name='f_w_%s' % 'char') self.feature_weight_dict['char'] = tf.Variable( initial_value=feature_weight, name='feature_weigth_%s' % 'char') self.nil_vars.add(self.feature_weight_dict['char'].name) self.nil_vars.add(self.feature_weight_dict['char'].name) self.input_feature_ph_dict['char'] = tf.placeholder( dtype=tf.int32, shape=[None, self._sequence_length, self._word_length], name='input_feature_ph_%s' % 'char') # init embeddings self.embedding_features = [] for feature_name in self._feature_names: print(self.input_feature_ph_dict[feature_name].shape) embedding_feature = tf.nn.dropout( tf.nn.embedding_lookup( self.feature_weight_dict[feature_name], ids=self.input_feature_ph_dict[feature_name], name='embedding_feature_%s' % feature_name), keep_prob=1. - self.weight_dropout_ph_dict[feature_name], name='embedding_feature_dropout_%s' % feature_name) self.embedding_features.append(embedding_feature) print(embedding_feature.shape) # char embedding if self._use_char_feature: char_embedding_feature = tf.nn.embedding_lookup( self.feature_weight_dict['char'], ids=self.input_feature_ph_dict['char'], name='embedding_feature_%s' % 'char') # conv couv_feature_char = MultiConvolutional3D( char_embedding_feature, filter_length_list=self._conv_filter_len_list, nb_filter_list=self._conv_filter_size_list).output couv_feature_char = tf.nn.dropout(couv_feature_char, keep_prob=1 - self.cnn_dropout_rate_ph) # concat all features input_features = self.embedding_features[0] if len(self.embedding_features) == 1 \ else tf.concat(values=self.embedding_features, axis=len(self._feature_names), name='input_features') print('input features shape', input_features.shape) if self._use_char_feature: input_features = tf.concat([input_features, couv_feature_char], axis=-1) # multi bi-lstm layer _fw_cells = [] _bw_cells = [] for _ in range(self._num_layers): fw, bw = self._get_rnn_unit(self._rnn_unit) _fw_cells.append( tf.nn.rnn_cell.DropoutWrapper(fw, output_keep_prob=1 - self.rnn_dropout_rate_ph)) _bw_cells.append( tf.nn.rnn_cell.DropoutWrapper(bw, output_keep_prob=1 - self.rnn_dropout_rate_ph)) fw_cell = tf.nn.rnn_cell.MultiRNNCell(_fw_cells) bw_cell = tf.nn.rnn_cell.MultiRNNCell(_bw_cells) # 计算self.input_features[feature_names[0]]的实际长度(0为padding值) self.sequence_actual_length = get_sequence_actual_length( # 每个句子的实际长度 self.input_feature_ph_dict[self._feature_names[0]], dim=1) print(self.sequence_actual_length.shape) input_size = input_features.shape[-1] print('input_features shape ', input_features.shape) rnn_inputs = tf.reshape(input_features, [-1, self._sequence_length, input_size]) print('rnn inputs shape ', rnn_inputs.shape) rnn_lengths = tf.reshape(self.sequence_actual_length, [-1]) # todo: add encoder output rnn_outputs, rnn_state = tf.nn.bidirectional_dynamic_rnn( fw_cell, bw_cell, rnn_inputs, scope='bi-lstm', dtype=tf.float32, sequence_length=rnn_lengths) # shape = [batch_size, max_len, nb_hidden*2] rnn_outputs = tf.concat(rnn_outputs, axis=2, name='lstm_output') rnn_outputs = tf.nn.dropout(rnn_outputs, keep_prob=1. - self.dropout_rate_ph, name='lstm_output_dropout') rnn_hidden = self.merge_bi_rnn_state(rnn_state).h # batch_size = tf.shape(input_features)[0] # print('rnn outputs shape', rnn_outputs.shape) print('rnn hidden shape', rnn_hidden.shape) # # rnn_outputs = tf.reshape(rnn_outputs, # [batch_size, turn_size, self._sequence_length, self._nb_hidden * 2]) # # rnn_hidden = tf.reshape(rnn_hidden, [batch_size, turn_size, self._nb_hidden * 2]) # # rnn_hidden = tf.nn.dropout(rnn_hidden, keep_prob=1. - self.dropout_rate_ph) # print('rnn outputs shape', rnn_outputs.shape) # print('rnn hidden shape', rnn_hidden.shape) # # # context rnn # ctx_cell = rnn.BasicLSTMCell(self._nb_hidden * 2, forget_bias=1., state_is_tuple=True) # ctx_lengths = get_sequence_actual_length(self.input_feature_ph_dict[self._feature_names[0]], dim=[1, 2]) # print("ctx inputs shape", rnn_hidden.shape) # print('ctx lengths shape', ctx_lengths.shape) # # ctx_outputs, _ = tf.nn.dynamic_rnn(cell=ctx_cell, # inputs=rnn_hidden, # sequence_length=ctx_lengths, # dtype=tf.float32) # # predict intents intent_logits = tf.layers.dense(rnn_hidden, 24, activation=None) #!!!! label_intents = tf.reshape(self.input_label_intent, [-1]) print('intent_logits shape', intent_logits.shape) print('input_label_intent shape', self.input_label_intent.shape) intent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=label_intents, logits=intent_logits) intent_mask = tf.reshape(tf.sign(self.sequence_actual_length), [-1]) intent_mask = tf.cast(intent_mask, dtype=tf.float32) print('intent_mask shape', intent_mask.shape) self.intent_loss = tf.reduce_sum( intent_loss * intent_mask) / tf.reduce_sum(intent_mask) pred_intents = tf.argmax(intent_logits, axis=1) self.pred_intents = tf.reshape(pred_intents, [-1]) print('pred_intents shape', self.pred_intents.shape) correct_preds = tf.equal(tf.cast(pred_intents, dtype=tf.int32), tf.cast(label_intents, dtype=tf.int32)) self.intent_accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32) * intent_mask) \ / tf.reduce_sum(intent_mask) self.intent_count = tf.cast(tf.reduce_sum(intent_mask), tf.int32) self.intent_logits = intent_logits # predict slots # batch * turn * hidden # print('ctx outputs shape', ctx_outputs.shape) # ctx_outputs = tf.reshape(ctx_outputs, [batch_size, turn_size, self._nb_hidden * 2]) # rnn_intent_outputs = [ctx_outputs for _ in range(self._sequence_length)] # rnn_intent_outputs = tf.stack(rnn_intent_outputs, axis=2) # print('rnn intent outputs', rnn_intent_outputs.shape) # # ctx_h = tf.reshape(ctx_outputs[:, :, :self._nb_hidden], [-1, self._nb_hidden]) # ctx_c = tf.reshape(ctx_outputs[:, :, self._nb_hidden:], [-1, self._nb_hidden]) # # init_fw_hidden = [] # init_bw_hidden = [] # for _ in range(self._num_layers): # lstm_hidden = rnn.LSTMStateTuple(h=ctx_h, c=ctx_c) # init_fw_hidden += [lstm_hidden] # init_bw_hidden += [lstm_hidden] # init_fw_hidden = tuple(init_fw_hidden) # init_bw_hidden = tuple(init_bw_hidden) # slot_outputs = tf.concat([rnn_outputs, rnn_intent_outputs], axis=3) # slot_outputs = tf.reshape(slot_outputs, [-1, self._nb_hidden * 4]) # run the rnn again with init state # slot_outputs, _ = tf.nn.bidirectional_dynamic_rnn( # fw_cell, bw_cell, rnn_inputs, # scope='bi-lstm', # initial_state_fw=init_fw_hidden, # initial_state_bw=init_bw_hidden, # dtype=tf.float32, # sequence_length=rnn_lengths) # slot_outputs = tf.concat(slot_outputs, axis=2, name='slot_output') slot_logits = tf.layers.dense(rnn_outputs, self._nb_classes, activation=None) self.slot_logits = tf.reshape( slot_logits, [batch_size, self._sequence_length, self._nb_classes]) print('slot logits shape', self.slot_logits.shape) slot_labels = tf.reshape(self.input_label_ph, [-1, self._sequence_length]) slot_logits = tf.reshape(self.slot_logits, [-1, self._sequence_length, self._nb_classes]) slot_lengths = tf.reshape(self.sequence_actual_length, [-1]) log_likelihood, self.transition_params = tf.contrib.crf.crf_log_likelihood( slot_logits, slot_labels, slot_lengths) print('transition params shape', self.transition_params.shape) print('log likelihood loss', log_likelihood.shape) self.slot_loss = tf.reduce_sum( -log_likelihood * intent_mask) / tf.reduce_sum(intent_mask) self.total_loss = self.intent_loss + self.slot_loss self.train_loss = self.slot_loss + self.intent_loss * self.intent_weight_ph # train op optimizer = tf.train.AdamOptimizer(learning_rate=self._learning_rate) self.train_op = optimizer.minimize(self.train_loss) grads_and_vars = optimizer.compute_gradients(self.train_loss) nil_grads_and_vars = [] for g, v in grads_and_vars: if v.name in self.nil_vars: nil_grads_and_vars.append((zero_nil_slot(g), v)) else: nil_grads_and_vars.append((g, v)) global_step = tf.Variable(0, name='global_step', trainable=False) if self._clip: # clip by global norm gradients, variables = zip(*nil_grads_and_vars) gradients, _ = tf.clip_by_global_norm(gradients, self._clip) self.train_op = optimizer.apply_gradients(zip( gradients, variables), name='train_op', global_step=global_step) else: self.train_op = optimizer.apply_gradients(nil_grads_and_vars, name='train_op', global_step=global_step) # TODO sess, visible_device_list待修改 gpu_options = tf.GPUOptions(visible_device_list='0', allow_growth=True) self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) # init all variable init = tf.global_variables_initializer() self.sess.run(init)
def build_model(self): # init ph, weights and dropout rate self.input_feature_ph_dict = dict() self.input_char_ph_dict=dict() self.char_weight_dict=dict() # 建立特征权重字典 self.weight_dropout_ph_dict = dict() self.weight_dropout_ph_dict['char']=tf.placeholder(tf.float32,name='dropout_char') self.feature_weight_dict = dict() self.nil_vars = set() self.dropout_rate_ph = tf.placeholder(tf.float32, name='dropout_rate_ph') # label ph self.input_label_ph = tf.placeholder( dtype=tf.int32, shape=[None, self.sequence_length], name='input_label_ph') # 读入特征,并搭建特征结构 ph for feature_name in self.feature_names: # input ph,每个feature建立一次placeholder self.input_feature_ph_dict[feature_name] = tf.placeholder( dtype=tf.int32, shape=[None, self.sequence_length], name='input_feature_ph_%s' % feature_name) # dropout rate ph self.weight_dropout_ph_dict[feature_name] = tf.placeholder( tf.float32, name='dropout_ph_%s' % feature_name) # init feature weights, 初始化随机变量 if feature_name not in self.feature_init_weight_dict: feature_weight = uniform_tensor( shape=self.feature_weight_shape_dict[feature_name], name='f_w_%s' % feature_name) self.feature_weight_dict[feature_name] = tf.Variable( initial_value=feature_weight, name='feature_weigth_%s' % feature_name) else: self.feature_weight_dict[feature_name] = tf.Variable( initial_value=self.feature_init_weight_dict[feature_name], name='feature_weight_%s' % feature_name) self.nil_vars.add(self.feature_weight_dict[feature_name].name) # init dropout rate, 初始化未指定的 if feature_name not in self.feature_weight_dropout_dict: self.feature_weight_dropout_dict[feature_name] = 0. # 初始化字向量空间 self.input_char_ph_dict['char']=tf.placeholder(dtype=tf.int32,shape=[None,self.sequence_length,self.word_length], name='char_input') self.input_char_flat=tf.reshape(self.input_char_ph_dict['char'],[-1,self.word_length*self.sequence_length], name='input_x_char_flat') self.char_weight_dict['char']=tf.Variable(initial_value=self.char_init_weight_dict['char'],name='char_init') self.char_embedding_init=tf.nn.dropout(tf.nn.embedding_lookup( self.char_weight_dict['char'], ids=self.input_char_flat, name='char_embedding',), keep_prob=1.-self.weight_dropout_ph_dict['char'], name='char_dropout') with tf.name_scope('char_conv'): self.filter_shape=[self.filter_size,self.char_embedding_size,self.num_filter] self.W_conv=tf.Variable(tf.truncated_normal(self.filter_shape,stddev=0.1),name='W_conv') self.b_conv=tf.Variable(tf.constant(0.1,shape=[self.num_filter]),name='b_conv') self.conv=tf.nn.conv1d(self.char_embedding_init,self.W_conv,stride=1,padding='SAME',name='conv') self.h_expand=tf.expand_dims(self.conv,-1) self.pooled=tf.nn.max_pool(self.h_expand,ksize=[1,self.sequence_length*self.word_length,1,1], strides=[1,self.word_length,1,1],padding='SAME',name='pooled') self.char_pool_flat=tf.reshape(self.pooled,[-1,self.sequence_length,self.num_filter],name='char_pool_flat') # init embeddings # 对特征进行编码并连接 self.embedding_features = [] for feature_name in self.feature_names: embedding_feature = tf.nn.dropout(tf.nn.embedding_lookup( self.feature_weight_dict[feature_name], ids=self.input_feature_ph_dict[feature_name], name='embedding_feature_%s' % feature_name), keep_prob=1. - self.weight_dropout_ph_dict[feature_name], name='embedding_feature_dropout_%s' % feature_name) self.embedding_features.append(embedding_feature) # concat all features # 多个词拼接成一句话 input_features = self.embedding_features[0] if len(self.embedding_features) == 1 \ else tf.concat(values=self.embedding_features, axis=2, name='input_features') input_features=tf.concat([input_features,self.char_pool_flat],axis=2) # bi-lstm if self.rnn_unit == 'lstm': fw_cell = rnn.BasicLSTMCell(self.nb_hidden, forget_bias=1., state_is_tuple=True) bw_cell = rnn.BasicLSTMCell(self.nb_hidden, forget_bias=1., state_is_tuple=True) elif self.rnn_unit == 'gru': fw_cell = rnn.GRUCell(self.nb_hidden) bw_cell = rnn.GRUCell(self.nb_hidden) else: raise ValueError('rnn_unit must in (lstm, gru)!') # 计算self.input_features[feature_names[0]]的实际长度(0为padding值) self.sequence_actual_length = get_sequence_actual_length( # 每个句子的实际长度 self.input_feature_ph_dict[self.feature_names[0]]) # print(input_features) rnn_outputs, _ = tf.nn.bidirectional_dynamic_rnn( fw_cell, bw_cell, input_features, scope='bi-lstm', dtype=tf.float32, sequence_length=self.sequence_actual_length) # shape = [batch_size, max_len, nb_hidden*2] # dropout 之后由[m,n]变成[1,1]输入输出维度保持不变s lstm_output = tf.nn.dropout( tf.concat(rnn_outputs, axis=2, name='lstm_output'), keep_prob=1. - self.dropout_rate_ph, name='lstm_output_dropout') # softmax # 重新规整输出形式 self.outputs = tf.reshape(lstm_output, [-1, self.nb_hidden * 2], name='outputs') self.softmax_w = tf.get_variable('softmax_w', [self.nb_hidden * 2, self.nb_classes]) self.softmax_b = tf.get_variable('softmax_b', [self.nb_classes]) self.logits = tf.reshape( tf.matmul(self.outputs, self.softmax_w) + self.softmax_b, shape=[-1, self.sequence_length, self.nb_classes], name='logits') # 计算loss self.loss = self.compute_loss() self.l2_loss = self.l2_rate * (tf.nn.l2_loss(self.softmax_w) + tf.nn.l2_loss(self.softmax_b)) self.total_loss = self.loss + self.l2_loss # train op optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate) grads_and_vars = optimizer.compute_gradients(self.total_loss) nil_grads_and_vars = [] for g, v in grads_and_vars: if v.name in self.nil_vars: nil_grads_and_vars.append((zero_nil_slot(g), v)) else: nil_grads_and_vars.append((g, v)) global_step = tf.Variable(0, name='global_step', trainable=False) if self.clip: # clip by global norm gradients, variables = zip(*nil_grads_and_vars) gradients, _ = tf.clip_by_global_norm(gradients, self.clip) self.train_op = optimizer.apply_gradients( zip(gradients, variables), name='train_op', global_step=global_step) else: self.train_op = optimizer.apply_gradients( nil_grads_and_vars, name='train_op', global_step=global_step) # TODO sess, visible_device_list待修改 gpu_options = tf.GPUOptions(visible_device_list='0', allow_growth=True) self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) # init all variable init = tf.global_variables_initializer() self.sess.run(init)