def __init__(self, source_vocab_size, tag_vocab_size, label_vocab_size, buckets, word_embedding_size, size, num_layers, max_gradient_norm, batch_size, dropout_keep_prob=1.0, use_lstm=False, bidirectional_rnn=True, num_samples=1024, use_attention=False, task=None, forward_only=False, pred_only=False): self.source_vocab_size = source_vocab_size self.tag_vocab_size = tag_vocab_size self.label_vocab_size = label_vocab_size self.word_embedding_size = word_embedding_size self.cell_size = size self.num_layers = num_layers self.buckets = buckets self.batch_size = batch_size self.bidirectional_rnn = bidirectional_rnn self.global_step = tf.Variable(0, trainable=False) self.pred_only = pred_only # If we use sampled softmax, we need an output projection. softmax_loss_function = None # Create the internal multi-layer cell for our RNN. def create_cell(): if not forward_only and dropout_keep_prob < 1.0: single_cell = lambda: BasicLSTMCell(self.cell_size) cell = MultiRNNCell([single_cell() for _ in range(self.num_layers)]) cell = DropoutWrapper(cell, input_keep_prob=dropout_keep_prob, output_keep_prob=dropout_keep_prob) else: single_cell = lambda: BasicLSTMCell(self.cell_size) cell = MultiRNNCell([single_cell() for _ in range(self.num_layers)]) return cell self.cell_fw = create_cell() self.cell_bw = create_cell() # Feeds for inputs. self.encoder_inputs = [] self.tags = [] self.tag_weights = [] self.labels = [] self.sequence_length = tf.placeholder(tf.int32, [None], name="sequence_length") for i in xrange(buckets[-1][0]): self.encoder_inputs.append(tf.placeholder(tf.int32, shape=[None], name="encoder{0}".format(i))) if not self.pred_only: for i in xrange(buckets[-1][1]): self.tags.append(tf.placeholder(tf.float32, shape=[None], name="tag{0}".format(i))) self.tag_weights.append(tf.placeholder(tf.float32, shape=[None], name="weight{0}".format(i))) self.labels.append(tf.placeholder(tf.float32, shape=[None], name="label")) base_rnn_output = self.generate_rnn_output() encoder_outputs, encoder_state, attention_states = base_rnn_output if task['tagging'] == 1: seq_labeling_outputs = seq_labeling.generate_sequence_output( self.source_vocab_size, encoder_outputs, encoder_state, self.sequence_length, self.tag_vocab_size, self.tags, self.tag_weights, buckets, softmax_loss_function=softmax_loss_function, use_attention=use_attention) self.tagging_output, self.tagging_loss = seq_labeling_outputs if task['intent'] == 1: seq_intent_outputs = seq_classification.generate_single_output( encoder_state, attention_states, self.sequence_length, self.labels, self.label_vocab_size, buckets, softmax_loss_function=softmax_loss_function, use_attention=use_attention) self.classification_output, self.classification_loss = seq_intent_outputs if task['tagging'] == 1: self.loss = self.tagging_loss elif task['intent'] == 1: self.loss = self.classification_loss # Gradients and SGD update operation for training the model. params = tf.trainable_variables() if not forward_only: opt = tf.train.AdamOptimizer() if task['joint'] == 1: # backpropagate the intent and tagging loss, one may further adjust # the weights for the two costs. gradients = tf.gradients([self.tagging_loss, self.classification_loss], params) elif task['tagging'] == 1: gradients = tf.gradients(self.tagging_loss, params) elif task['intent'] == 1: gradients = tf.gradients(self.classification_loss, params) clipped_gradients, norm = tf.clip_by_global_norm(gradients, max_gradient_norm) self.gradient_norm = norm self.update = opt.apply_gradients( zip(clipped_gradients, params), global_step=self.global_step) self.saver = tf.train.Saver(tf.global_variables())
def __init__(self, source_vocab_size, tag_vocab_size, label_vocab_size, buckets, word_embedding_size, size, num_layers, max_gradient_norm, batch_size, dropout_keep_prob=1.0, use_lstm=False, bidirectional_rnn=True, num_samples=1024, use_attention=False, task=None, forward_only=False): self.source_vocab_size = source_vocab_size self.tag_vocab_size = tag_vocab_size self.label_vocab_size = label_vocab_size self.buckets = buckets self.batch_size = batch_size self.global_step = tf.Variable(0, trainable=False) # If we use sampled softmax, we need an output projection. softmax_loss_function = None # Create the internal multi-layer cell for our RNN. single_cell = tf.contrib.rnn.GRUCell(size) if use_lstm: single_cell = tf.contrib.rnn.BasicLSTMCell(size) cell = single_cell if num_layers > 1: cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * num_layers) if not forward_only and dropout_keep_prob < 1.0: cell = tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=dropout_keep_prob, output_keep_prob=dropout_keep_prob) # Feeds for inputs. self.encoder_inputs = [] self.tags = [] self.tag_weights = [] self.labels = [] self.sequence_length = tf.placeholder(tf.int32, [None], name="sequence_length") for i in xrange(buckets[-1][0]): self.encoder_inputs.append(tf.placeholder(tf.int32, shape=[None], name="encoder{0}".format(i))) for i in xrange(buckets[-1][1]): self.tags.append(tf.placeholder(tf.float32, shape=[None], name="tag{0}".format(i))) self.tag_weights.append(tf.placeholder(tf.float32, shape=[None], name="weight{0}".format(i))) self.labels.append(tf.placeholder(tf.float32, shape=[None], name="label")) base_rnn_output = generate_encoder_output.generate_embedding_RNN_output(self.encoder_inputs, cell, self.source_vocab_size, word_embedding_size, dtype=dtypes.float32, scope=None, sequence_length=self.sequence_length, bidirectional_rnn=bidirectional_rnn) encoder_outputs, encoder_state, attention_states = base_rnn_output if task['tagging'] == 1: self.tagging_output, self.tagging_loss = seq_labeling.generate_sequence_output( self.source_vocab_size, encoder_outputs, encoder_state, self.tags, self.sequence_length, self.tag_vocab_size, self.tag_weights, buckets, softmax_loss_function=softmax_loss_function, use_attention=use_attention) if task['intent'] == 1: self.classification_output, self.classification_loss = seq_classification.generate_single_output( encoder_state, attention_states, self.sequence_length, self.labels, self.label_vocab_size, buckets, softmax_loss_function=softmax_loss_function, use_attention=use_attention) if task['tagging'] == 1: self.loss = self.tagging_loss elif task['intent'] == 1: self.loss = self.classification_loss # Gradients and SGD update operation for training the model. params = tf.trainable_variables() if not forward_only: opt = tf.train.AdamOptimizer() if task['joint'] == 1: # backpropagate the intent and tagging loss, one may further adjust # the weights for the two costs. gradients = tf.gradients([self.tagging_loss, self.classification_loss], params) elif task['tagging'] == 1: gradients = tf.gradients(self.tagging_loss, params) elif task['intent'] == 1: gradients = tf.gradients(self.classification_loss, params) clipped_gradients, norm = tf.clip_by_global_norm(gradients, max_gradient_norm) self.gradient_norm = norm self.update = opt.apply_gradients( zip(clipped_gradients, params), global_step=self.global_step) self.saver = tf.train.Saver(tf.global_variables())
def __init__(self, source_vocab_size, tag_vocab_size, label_vocab_size, buckets, word_embedding_size, size, num_layers, max_gradient_norm, batch_size, dropout_keep_prob=1.0, use_lstm=False, bidirectional_rnn=True, num_samples=1, use_attention=False, task=None, forward_only=False): self.source_vocab_size = source_vocab_size self.tag_vocab_size = tag_vocab_size self.label_vocab_size = label_vocab_size self.word_embedding_size = word_embedding_size self.cell_size = size self.num_layers = num_layers self.buckets = buckets self.batch_size = batch_size self.bidirectional_rnn = bidirectional_rnn self.global_step = tf.Variable(0, trainable=False) # If we use sampled softmax, we need an output projection. softmax_loss_function = None # 2-1. Make multi-layer cells def create_cell(): # Add Dropout if not forward_only and dropout_keep_prob < 1.0: single_cell = lambda: BasicLSTMCell(self.cell_size ) # cell_size = 1 cell = MultiRNNCell( [single_cell() for _ in range(self.num_layers)]) cell = DropoutWrapper(cell, input_keep_prob=dropout_keep_prob, output_keep_prob=dropout_keep_prob) # Not Dropout else: single_cell = lambda: BasicLSTMCell(self.cell_size) cell = MultiRNNCell( [single_cell() for _ in range(self.num_layers)]) return cell # 2-1-1. Create Forwadr/Backward cell of encoder. self.cell_fw = create_cell() self.cell_bw = create_cell() # 2-2. Define Placeholder(=input) self.encoder_inputs = [] self.tags = [] self.tag_weights = [] self.labels = [] self.sequence_length = tf.placeholder(tf.int32, [None], name="sequence_length") # 2-2-1. Define Sentence placeholder( =encoder_inputs) for i in range( buckets[-1] [0]): # bucket[-1][0] = encoder_length, xrange --) range self.encoder_inputs.append( tf.placeholder(tf.int32, shape=[None], name="encoder{0}".format(i))) # 2-2-2. Define tags and tags weights for i in range(buckets[-1][1]): # xrange --) range self.tags.append( tf.placeholder(tf.float32, shape=[None], name="tag{0}".format(i))) self.tag_weights.append( tf.placeholder(tf.float32, shape=[None], name="weight{0}".format(i))) self.labels.append( tf.placeholder(tf.float32, shape=[None], name="label")) # self.labels = [ [] ] # 2-3-5. Get the bi-directional outputs base_rnn_output = self.generate_rnn_output() encoder_outputs, encoder_state, attention_states = base_rnn_output # 2-4. Sequence labeling or sequence classification. # 2-4-1. get decoder output if task['tagging'] == 1: # task = One of Class input seq_labeling_outputs = seq_labeling.generate_sequence_output( self.source_vocab_size, encoder_outputs, encoder_state, self.tags, self.sequence_length, self.tag_vocab_size, # num_decoder_symbols self.tag_weights, buckets, softmax_loss_function=softmax_loss_function, use_attention=use_attention) self.tagging_output, self.tagging_loss = seq_labeling_outputs # 2-4-2. Sequence classification. if task['intent'] == 1: seq_intent_outputs = seq_classification.generate_single_output( encoder_state, attention_states, self.sequence_length, self.labels, self.label_vocab_size, buckets, softmax_loss_function=softmax_loss_function, use_attention=use_attention) self.classification_output, self.classification_loss = seq_intent_outputs # 2-4-3. Define Loss. if task['tagging'] == 1: self.loss = self.tagging_loss elif task['intent'] == 1: self.loss = self.classification_loss # 2-5. Define Gradients and SGD and train the model. params = tf.trainable_variables() if not forward_only: # 2-5-1. Define optimizer opt = tf.train.AdamOptimizer() # 2-5-2. Define gradients if task['joint'] == 1: gradients = tf.gradients( [self.tagging_loss, self.classification_loss], params) elif task['tagging'] == 1: gradients = tf.gradients(self.tagging_loss, params) elif task['intent'] == 1: gradients = tf.gradients(self.classification_loss, params) # clipped clipped_gradients, norm = tf.clip_by_global_norm( gradients, max_gradient_norm) self.gradient_norm = norm # 2-5-3. Train self.update = opt.apply_gradients(zip(clipped_gradients, params), global_step=self.global_step) self.saver = tf.train.Saver(tf.global_variables())
def __init__(self, source_vocab_size, tag_vocab_size, label_vocab_size, buckets, word_embedding_size, size, num_layers, max_gradient_norm, batch_size, dropout_keep_prob=1.0, use_lstm=False, bidirectional_rnn=True, num_samples=1024, use_attention=False, task=None, forward_only=False): self.source_vocab_size = source_vocab_size self.tag_vocab_size = tag_vocab_size self.label_vocab_size = label_vocab_size self.buckets = buckets self.batch_size = batch_size self.global_step = tf.Variable(0, trainable=False) # If we use sampled softmax, we need an output projection. softmax_loss_function = None # Create the internal multi-layer cell for our RNN. single_cell = tf.contrib.rnn.GRUCell(size) if use_lstm: single_cell = tf.contrib.rnn.BasicLSTMCell(size) cell = single_cell if num_layers > 1: cell = tf.contrib.rnn.MultiRNNCell([single_cell for _ in range(num_layers)]) #cell = tf.contrib.rnn.MultiRNNCell([single_cell] * num_layers) if not forward_only and dropout_keep_prob < 1.0: cell = tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=dropout_keep_prob, output_keep_prob=dropout_keep_prob) # Feeds for inputs. self.encoder_inputs = [] self.tags = [] self.tag_weights = [] self.labels = [] self.sequence_length = tf.placeholder(tf.int32, [None], name="sequence_length") for i in xrange(buckets[-1][0]): self.encoder_inputs.append(tf.placeholder(tf.int32, shape=[None], name="encoder{0}".format(i))) for i in xrange(buckets[-1][1]): self.tags.append(tf.placeholder(tf.float32, shape=[None], name="tag{0}".format(i))) self.tag_weights.append(tf.placeholder(tf.float32, shape=[None], name="weight{0}".format(i))) self.labels.append(tf.placeholder(tf.float32, shape=[None], name="label")) # Initiate embedding self.embedding = variable_scope.get_variable("embedding", [self.source_vocab_size, word_embedding_size]) #self.embedding = tf.Variable(tf.constant(0.0, shape= [self.source_vocab_size, word_embedding_size]), name="embedding") base_rnn_output = generate_encoder_output.generate_embedding_RNN_output(self.encoder_inputs, cell, self.source_vocab_size, word_embedding_size, embedding=self.embedding, dtype=dtypes.float32, scope=None, sequence_length=self.sequence_length, bidirectional_rnn=bidirectional_rnn) encoder_outputs, encoder_state, attention_states = base_rnn_output if task['tagging'] == 1: self.tagging_output, self.tagging_loss = seq_labeling.generate_sequence_output( self.source_vocab_size, encoder_outputs, encoder_state, self.tags, self.sequence_length, self.tag_vocab_size, self.tag_weights, buckets, softmax_loss_function=softmax_loss_function, use_attention=use_attention) if task['intent'] == 1: self.classification_output, self.classification_loss = seq_classification.generate_single_output( encoder_state, attention_states, self.sequence_length, self.labels, self.label_vocab_size, buckets, softmax_loss_function=softmax_loss_function, use_attention=use_attention) if task['tagging'] == 1: self.loss = self.tagging_loss elif task['intent'] == 1: self.loss = self.classification_loss # Gradients and SGD update operation for training the model. params = tf.trainable_variables() if not forward_only: opt = tf.train.AdamOptimizer() if task['joint'] == 1: # backpropagate the intent and tagging loss, one may further adjust # the weights for the two costs. gradients = tf.gradients([self.tagging_loss, self.classification_loss], params) elif task['tagging'] == 1: gradients = tf.gradients(self.tagging_loss, params) elif task['intent'] == 1: gradients = tf.gradients(self.classification_loss, params) clipped_gradients, norm = tf.clip_by_global_norm(gradients, max_gradient_norm) self.gradient_norm = norm self.update = opt.apply_gradients( zip(clipped_gradients, params), global_step=self.global_step) self.saver = tf.train.Saver(tf.all_variables())
def __init__(self, source_vocab_size, tag_vocab_size, label_vocab_size, buckets, word_embedding_size, size, num_layers, max_gradient_norm, batch_size, dropout_keep_prob=1.0, use_lstm=False, bidirectional_rnn=True, num_samples=1024, use_attention=False, task=None, forward_only=False): self.source_vocab_size = source_vocab_size self.tag_vocab_size = tag_vocab_size self.label_vocab_size = label_vocab_size self.word_embedding_size = word_embedding_size self.cell_size = size self.num_layers = num_layers self.buckets = buckets self.batch_size = batch_size self.bidirectional_rnn = bidirectional_rnn self.global_step = tf.Variable(0, trainable=False) # 如果我们使用采样softmax,我们需要一个输出投影。 softmax_loss_function = None # 为我们的RNN创建内部的多层单元。 def create_cell(): if not forward_only and dropout_keep_prob < 1.0: single_cell = lambda: BasicLSTMCell(self.cell_size) cell = MultiRNNCell([single_cell() for _ in range(self.num_layers)]) cell = DropoutWrapper(cell, input_keep_prob=dropout_keep_prob, output_keep_prob=dropout_keep_prob) else: single_cell = lambda: BasicLSTMCell(self.cell_size) cell = MultiRNNCell([single_cell() for _ in range(self.num_layers)]) return cell self.cell_fw = create_cell() self.cell_bw = create_cell() # 输入源。 self.encoder_inputs = [] self.tags = [] self.tag_weights = [] self.labels = [] self.sequence_length = tf.placeholder(tf.int32, [None], name="sequence_length") for i in xrange(buckets[-1][0]): self.encoder_inputs.append(tf.placeholder(tf.int32, shape=[None], name="encoder{0}".format(i))) for i in xrange(buckets[-1][1]): self.tags.append(tf.placeholder(tf.float32, shape=[None], name="tag{0}".format(i))) self.tag_weights.append(tf.placeholder(tf.float32, shape=[None], name="weight{0}".format(i))) self.labels.append(tf.placeholder(tf.float32, shape=[None], name="label")) base_rnn_output = self.generate_rnn_output() encoder_outputs, encoder_state, attention_states = base_rnn_output if task['tagging'] == 1: seq_labeling_outputs = seq_labeling.generate_sequence_output( self.source_vocab_size, encoder_outputs, encoder_state, self.tags, self.sequence_length, self.tag_vocab_size, self.tag_weights, buckets, softmax_loss_function=softmax_loss_function, use_attention=use_attention) self.tagging_output, self.tagging_loss = seq_labeling_outputs if task['intent'] == 1: seq_intent_outputs = seq_classification.generate_single_output( encoder_state, attention_states, self.sequence_length, self.labels, self.label_vocab_size, buckets, softmax_loss_function=softmax_loss_function, use_attention=use_attention) self.classification_output, self.classification_loss = seq_intent_outputs if task['tagging'] == 1: self.loss = self.tagging_loss elif task['intent'] == 1: self.loss = self.classification_loss # 梯度和SGD更新操作对模型进行训练。 params = tf.trainable_variables() if not forward_only: opt = tf.train.AdamOptimizer() gradients = None if task['joint'] == 1: # 反向传播意图和标记损失,可以进一步调整这两种成本的权重。 gradients = tf.gradients([self.tagging_loss, self.classification_loss], params) elif task['tagging'] == 1: gradients = tf.gradients(self.tagging_loss, params) elif task['intent'] == 1: gradients = tf.gradients(self.classification_loss, params) clipped_gradients, norm = tf.clip_by_global_norm(gradients, max_gradient_norm) self.gradient_norm = norm self.update = opt.apply_gradients( zip(clipped_gradients, params), global_step=self.global_step) self.saver = tf.train.Saver(tf.global_variables())