def gather_indexes(sequence_tensor, positions): """Gathers the vectors at the specific positions over a minibatch.""" sequence_shape = modeling.get_shape_list(sequence_tensor, expected_rank=3) batch_size = sequence_shape[0] seq_length = sequence_shape[1] width = sequence_shape[2] flat_offsets = tf.reshape( tf.range(0, batch_size, dtype=tf.int32) * seq_length, [-1, 1]) flat_positions = tf.reshape(positions + flat_offsets, [-1]) flat_sequence_tensor = tf.reshape(sequence_tensor, [batch_size * seq_length, width]) output_tensor = tf.gather(flat_sequence_tensor, flat_positions) return output_tensor
def built_model(self): bert_config = modeling.BertConfig.from_json_file( self.__bert_config_path) model = modeling.BertModel(config=bert_config, is_training=self.__is_training, input_ids=self.input_ids, input_mask=self.input_masks, token_type_ids=self.segment_ids, use_one_hot_embeddings=False) final_hidden = model.get_sequence_output() final_hidden_shape = modeling.get_shape_list(final_hidden, expected_rank=3) seq_length = final_hidden_shape[1] hidden_size = final_hidden_shape[2] with tf.name_scope("output"): output_weights = tf.get_variable( "output_weights", [2, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable("output_bias", [2], initializer=tf.zeros_initializer()) final_hidden_matrix = tf.reshape(final_hidden, [-1, hidden_size]) logits = tf.matmul(final_hidden_matrix, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) logits = tf.reshape(logits, [-1, seq_length, 2]) logits = tf.transpose(logits, [2, 0, 1]) unstacked_logits = tf.unstack(logits, axis=0) # [batch_size, seq_length] start_logits, end_logits = (unstacked_logits[0], unstacked_logits[1]) self.start_logits = start_logits self.end_logits = end_logits if self.__is_training: with tf.name_scope("loss"): start_losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=start_logits, labels=self.start_position) end_losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=end_logits, labels=self.end_position) losses = tf.concat([start_losses, end_losses], axis=0) self.loss = tf.reduce_mean(losses, name="loss") with tf.name_scope('train_op'): self.train_op = optimization.create_optimizer( self.loss, self.__learning_rate, self.__num_train_step, self.__num_warmup_step, use_tpu=False)