def decode_train(self, init_state, dec_input_tokens, dec_input_lengths, dec_output_lengths): with tf.variable_scope(self.shared_scope or "RNNDecoder", reuse=tf.AUTO_REUSE) as scope: state_size = shape(init_state, -1) self.cell = setup_cell(self.cell_type, state_size, self.num_layers, keep_prob=self.keep_prob) with tf.variable_scope('projection') as scope: self.projection = tf.layers.Dense(shape(self.embeddings, 0), use_bias=True, trainable=True) with tf.name_scope('Train'): dec_input_embs = tf.nn.embedding_lookup( self.embeddings, dec_input_tokens) helper = tf.contrib.seq2seq.TrainingHelper( dec_input_embs, sequence_length=dec_input_lengths, time_major=False) train_decoder = tf.contrib.seq2seq.BasicDecoder( self.cell, helper, init_state, output_layer=self.projection) train_dec_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode( train_decoder, impute_finished=True, maximum_iterations=tf.reduce_max(dec_output_lengths), scope=scope) logits = train_dec_outputs.rnn_output return logits
def __init__(self, sess, config, encoder, activation=tf.nn.relu): super(CategoryClassification, self).__init__(sess, config) self.sess = sess self.encoder = encoder self.activation = activation self.is_training = encoder.is_training self.keep_prob = 1.0 - tf.to_float( self.is_training) * config.dropout_rate self.vocab = encoder.vocab with tf.name_scope('Placeholder'): self.ph = recDotDefaultDict() # [batch_size, max_num_context, max_num_words] self.ph.text.word = tf.placeholder( tf.int32, name='contexts.word', shape=[None, None, None]) if self.encoder.wbase else None self.ph.text.char = tf.placeholder( tf.int32, name='contexts.char', shape=[None, None, None, None]) if self.encoder.cbase else None self.ph.link = tf.placeholder(tf.int32, name='link', shape=[None, None, 2]) self.ph.target = tf.placeholder(tf.int32, name='link', shape=[None]) self.sentence_length = tf.count_nonzero(self.ph.text.word, axis=-1) self.num_contexts = tf.cast( tf.count_nonzero(self.sentence_length, axis=-1), tf.float32) with tf.name_scope('Encoder'): word_repls = encoder.word_encoder.word_encode(self.ph.text.word) char_repls = encoder.word_encoder.char_encode(self.ph.text.char) text_emb, text_outputs, state = encoder.encode( [word_repls, char_repls], self.sentence_length) mention_starts, mention_ends = tf.unstack(self.ph.link, axis=-1) mention_repls, head_scores = encoder.get_batched_mention_emb( text_emb, text_outputs, mention_starts, mention_ends) # [batch_size, max_n_contexts, mention_size] self.adv_outputs = tf.reshape( text_outputs, [ shape(text_outputs, 0) * shape(text_outputs, 1), shape(text_outputs, 2), shape(text_outputs, 3) ] ) # [batch_size * max_n_contexts, max_sentence_length, output_size] with tf.variable_scope('Inference'): self.outputs = self.inference(mention_repls) self.predictions = tf.argmax(self.outputs, axis=-1) with tf.name_scope("Loss"): self.losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.outputs, labels=self.ph.target) self.loss = tf.reduce_mean(self.losses)
def predict_relation(self, query_emb, mention_emb, mention_scores, is_query_subjective): ''' Args: - query_emb: [emb] - mention_emb: [n_mentions, emb] - is_query_subjective: A boolean. If true, this function outputs a distribution of relation label probabilities for a triple (query, rel, mention) across rel, otherwise for (mention, rel, query) - reuse: A boolean. The variables of this network should be reused by both query-subjective and query-objective predictions by switching the orders of input representations. ''' with tf.variable_scope('pair_emb'): n_mentions = shape(mention_emb, -2) query_emb = tf.tile(query_emb, [n_mentions, 1]) # [n_mentions, emb] if is_query_subjective: pair_emb = tf.concat([query_emb, mention_emb], -1) # [n_mentions, emb] else: pair_emb = tf.concat([mention_emb, query_emb], -1) # [n_mentions, emb] for i in range(self.ffnn_depth): with tf.variable_scope('Forward%d' % i): pair_emb = linear(pair_emb, output_size=self.ffnn_size, activation=self.activation) pair_emb = tf.nn.dropout(pair_emb, keep_prob=self.keep_prob) with tf.variable_scope('Output'): w = self.rel_w b = tf.get_variable('biases', [self.vocab.rel.size - 1]) x = pair_emb logits = tf.nn.xw_plus_b(x, w, b) no_relation = tf.zeros([shape(mention_scores, 0), 1], tf.float32) logits = tf.concat([no_relation, logits], axis=-1) # type A mention_unconfidence_penalty = tf.concat([ no_relation, tf.tile(tf.expand_dims(mention_scores, 1), [1, self.vocab.rel.size - 1]) ], axis=-1) # type B # mention_unconfidence_penalty = tf.concat([ # -tf.expand_dims(mention_scores, 1), # #tf.tile(tf.expand_dims(mention_scores, 1), [1, shape(logits, 1)-1]) # tf.zeros([shape(logits, 0), self.vocab.rel.size-1], dtype=tf.float32) # ], axis=-1) tf.get_variable_scope().reuse_variables() return logits + mention_unconfidence_penalty
def decode_test(self, init_state, start_token=PAD_ID, end_token=PAD_ID): with tf.variable_scope(self.shared_scope or "RNNDecoder", reuse=tf.AUTO_REUSE) as scope: with tf.name_scope('Test'): tiled_init_state = tf.contrib.seq2seq.tile_batch( init_state, multiplier=self.beam_width) batch_size = shape(init_state, 0) start_tokens = tf.tile( tf.constant([start_token], dtype=tf.int32), [batch_size]) test_decoder = tf.contrib.seq2seq.BeamSearchDecoder( self.cell, self.embeddings, start_tokens, end_token, tiled_init_state, self.beam_width, output_layer=self.projection, length_penalty_weight=self.length_penalty_weight) test_dec_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode( test_decoder, impute_finished=False, maximum_iterations=self.max_output_len, scope=scope) predictions = test_dec_outputs.predicted_ids # [batch_size, T, beam_width] predictions = tf.transpose( predictions, perm=[0, 2, 1]) # [batch_size, beam_width, T] #return predictions return predictions
def batch_inference(self, text_emb, text_outputs, sentence_length): # To handle batched inputs. document_length = tf.reduce_sum(sentence_length, axis=-1) batch_size = shape(text_emb, 0) max_num_mentions = tf.to_int32( tf.floor( tf.to_float(tf.reduce_max(document_length)) * self.mention_ratio)) def loop_func(idx, relations, mentions, losses): r, m, l = self.inference(text_emb[idx], text_outputs[idx], sentence_length[idx], self.ph.query[idx], self.ph.mentions[idx], self.ph.num_mentions[idx], self.ph.target.subjective[idx], self.ph.target.objective[idx], self.ph.loss_weights_by_label[idx], max_num_mentions=max_num_mentions) idx = idx + 1 relations = tf.concat( [relations, tf.expand_dims(r, axis=0)], axis=0) mentions = tf.concat([mentions, tf.expand_dims(m, axis=0)], axis=0) losses = tf.concat([losses, tf.expand_dims(l, axis=0)], axis=0) return idx, relations, mentions, losses idx = tf.zeros((), dtype=tf.int32) cond = lambda idx, *args: idx < batch_size loop_vars = [ idx, tf.zeros((0, max_num_mentions, 2), dtype=tf.int32), tf.zeros((0, max_num_mentions, 2), dtype=tf.int32), tf.zeros((0), dtype=tf.float32), ] _, relations, mentions, losses = tf.while_loop( cond, loop_func, loop_vars, shape_invariants=[ idx.get_shape(), tf.TensorShape([None, None, 2]), tf.TensorShape([None, None, 2]), tf.TensorShape([None]), ], parallel_iterations=self.max_batch_size, ) predictions = [relations, mentions] loss = tf.reduce_mean(losses, axis=-1) return predictions, loss
def extract_span(encoder_outputs, spans): ''' Args: - encoder_outputs: [batch_size, max_num_word, hidden_size] ''' with tf.name_scope('ExtractSpan'): def loop_func(idx, span_repls, begin, end): res = tf.reduce_mean(span_repls[idx][begin[idx]:end[idx] + 1], axis=0) return tf.expand_dims(res, axis=0) beginning_of_link, end_of_link = tf.unstack(spans, axis=1) batch_size = shape(encoder_outputs, 0) hidden_size = shape(encoder_outputs, -1) idx = tf.zeros((), dtype=tf.int32) # Continue concatenating the obtained representation of each span. res = tf.zeros((0, hidden_size)) cond = lambda idx, res: idx < batch_size body = lambda idx, res: (idx + 1, tf.concat([ res, loop_func(idx, encoder_outputs, beginning_of_link, end_of_link) ], axis=0)) loop_vars = [idx, res] _, res = tf.while_loop(cond, body, loop_vars, shape_invariants=[ idx.get_shape(), tf.TensorShape([None, hidden_size]) ]) return res
def __init__(self, sess, config, encoder, tasks): super().__init__(sess, config) self.sess = sess self.encoder = encoder adv_outputs = [] task_ids = [] for i, t in enumerate(tasks): # inputs = [] # if self.encoder.wbase: # inputs.append(t.text_ph.word) # if self.encoder.cbase: # inputs.append(t.text_ph.char) print('adv_outputs', t, t.adv_outputs) if isinstance(t.encoder, MultiEncoderWrapper): # Split the encoders' represantions into the task-shared and the task-private. assert len(t.adv_outputs.get_shape() ) == 3 # [*, max_sentence_length, hidden_size] shared_repls, private_repls = tf.split(t.adv_outputs, 2, axis=2) # Take average of the representations across all the time step. shared_repls = tf.reduce_mean(shared_repls, axis=1) private_repls = tf.reduce_mean(private_repls, axis=1) # 論文ではこうなっているけど, 違う文を読んだベクトル同士も引き離す必要あるのか? #similarities = tf.matmul(tf.transpose(shared_repls), private_repls) similarities = tf.matmul(tf.transpose(shared_repls), private_repls) l_diff = squared_frobenius_norm(similarities) else: shared_repls = t.adv_outputs l_diff = 0.0 task_id = tf.tile([i], [shape(shared_repls, 0)]) adv_outputs.append(shared_repls) task_ids.append(task_id) adv_outputs = flip_gradient(tf.concat(adv_outputs, axis=0)) task_ids = tf.concat(task_ids, axis=0) task_ids = tf.one_hot(task_ids, len(tasks)) self.outputs = tf.nn.softmax(linear(adv_outputs, len(tasks))) l_adv = tf.nn.softmax_cross_entropy_with_logits(logits=self.outputs, labels=task_ids) l_adv = tf.reduce_sum(l_adv) self.loss = config.adv_weight * l_adv + config.diff_weight * l_diff
def flatten_emb_by_sentence(self, emb, text_len_mask): num_sentences = tf.shape(emb)[0] max_sentence_length = tf.shape(emb)[1] emb_rank = len(emb.get_shape()) if emb_rank == 2: flattened_emb = tf.reshape(emb, [num_sentences * max_sentence_length]) elif emb_rank == 3: flattened_emb = tf.reshape( emb, [num_sentences * max_sentence_length, tf_utils.shape(emb, 2)]) else: raise ValueError("Unsupported rank: {}".format(emb_rank)) return tf.boolean_mask(flattened_emb, text_len_mask) # remove masked elements
def get_q_values(self, state, action, next_state, next_candidates): q_values = self.calc_q_values(state) # [batch_size, vocab_size] # The Q values only of the action chosen in the current step. with tf.name_scope('dynamic_batch_size'): batch_size = shape(state, 0) with tf.name_scope('q_values_of_selected_action'): q_values_of_selected_action = tf.reshape( batch_gather(q_values, action), [batch_size]) with tf.name_scope('next_q_values'): next_q_values = self.calc_q_values( next_state) # [batch_size, vocab_size] with tf.name_scope('mask_by_next_candidates'): next_candidates_mask = tf.one_hot( next_candidates, self.config.vocab_size.card ) # [batch_size, num_next_candidates_samples, NUM_CANDIDATES, vocab_size] next_candidates_mask = tf.reduce_sum( next_candidates_mask, axis=2 ) # [batch_size, num_next_candidates_samples, vocab_size] tiled_q_values = tf.tile( tf.expand_dims(next_q_values, 1), tf.constant([ 1, self.config.num_next_candidates_samples, 1, ]) ) # [batch_size, num_next_candidates_samples, vocab_size] # Mask q_values. masked_next_q_values = next_candidates_mask * tiled_q_values # masked_next_q_values = tiled_q_values # Take the maximum q-values by each of the sampled 3 candidates, and average them. with tf.name_scope('expected_next_q_value'): expected_next_q_value = tf.reduce_mean(tf.reduce_max( masked_next_q_values, axis=-1), axis=-1) return q_values, q_values_of_selected_action, expected_next_q_value
def __init__(self, sess, config, manager, encoder, activation=tf.nn.relu): """ Args: """ super(DescriptionGeneration, self).__init__(sess, config) self.config = config self.activation = activation self.encoder = encoder self.other_tasks = manager.tasks self.vocab = manager.vocab self.is_training = encoder.is_training self.dataset = config.dataset self.train_shared = config.train_shared self.keep_prob = 1.0 - tf.to_float( self.is_training) * config.dropout_rate self.ph = self.setup_placeholders() enc_sentence_length = tf.count_nonzero(self.ph.text.word, axis=-1, dtype=tf.int32) enc_context_length = tf.count_nonzero(enc_sentence_length, axis=-1, dtype=tf.float32) word_repls = encoder.word_encoder.word_encode(self.ph.text.word) char_repls = encoder.word_encoder.char_encode(self.ph.text.char) enc_inputs = [word_repls, char_repls] # Encode input text enc_inputs, enc_outputs, enc_state = self.encoder.encode( enc_inputs, enc_sentence_length, prop_gradients=self.train_shared) self.adv_outputs = enc_outputs mention_starts, mention_ends = tf.unstack(self.ph.link, axis=-1) mention_repls, head_scores = encoder.get_batched_mention_emb( enc_inputs, enc_outputs, mention_starts, mention_ends) # [batch_size, max_n_contexts, mention_size] if not self.train_shared: mention_repls = tf.stop_gradient(mention_repls) head_scores = tf.stop_gradient(head_scores) # Aggregate context representations. init_state = tf.reduce_sum(mention_repls, axis=1) init_state = init_state / tf.expand_dims(enc_context_length, -1) # Add BOS and EOS to the decoder's inputs and outputs. batch_size = shape(self.ph.target, 0) with tf.name_scope('start_tokens'): start_tokens = tf.tile(tf.constant([START_TOKEN], dtype=tf.int32), [batch_size]) with tf.name_scope('end_tokens'): end_tokens = tf.tile(tf.constant([END_TOKEN], dtype=tf.int32), [batch_size]) dec_input_tokens = tf.concat( [tf.expand_dims(start_tokens, 1), self.ph.target], axis=1) dec_output_tokens = tf.concat( [self.ph.target, tf.expand_dims(end_tokens, 1)], axis=1) # Length of description + end_token (or start_token) dec_input_lengths = dec_output_lengths = tf.count_nonzero( self.ph.target, axis=1, dtype=tf.int32) + 1 with tf.variable_scope('Decoder') as scope: self.decoder = RNNDecoder(config.decoder, self.is_training, self.vocab.decoder, shared_scope=scope) self.logits = self.decoder.decode_train(init_state, dec_input_tokens, dec_input_lengths, dec_output_lengths) self.predictions = self.decoder.decode_test(init_state) # Convert dec_output_lengths to binary masks dec_output_weights = tf.sequence_mask(dec_output_lengths, dtype=tf.float32) # Compute loss self.loss = tf.contrib.seq2seq.sequence_loss( self.logits, dec_output_tokens, dec_output_weights, average_across_timesteps=True, average_across_batch=True) #self.debug_ops = [self.ph.text.word, enc_sentence_length, enc_context_length] # Combined tests with coref task. coref_model = [ x for x in self.other_tasks.values() if isinstance(x, CorefModelBase) ] if coref_model: coref_model = coref_model[0].generate_mention_desc(self.decoder)
def inference(self, text_emb, text_outputs, sentence_length, query, gold_mentions, num_gold_mentions, subj_targets, obj_targets, loss_weights_by_label, max_num_mentions=None): ''' Args: - text_emb: - text_outputs: - sentence_length: - query: - gold_mentions: - num_gold_mentions: - subj_targets, obj_targets: [max_sequence_len, max_mention_width] - loss_weights_by_label: [num_relations] Return: - predicted_relations: [num_mentions, 2 (= subj/obj)] - predicted_mentions: [num_mentions, 2 (= start/end)] - losses: [num_mentions] - max_num_mentions: None or An integer tensor. If not None, the first dimentions of predicted_relations and predicted_mentions are padded up to this value for batching. ''' # self.sentence_length = tf.count_nonzero(self.ph.text.word, axis=-1) # word_repls = encoder.word_encoder.word_encode(self.ph.text.word) # char_repls = encoder.word_encoder.char_encode(self.ph.text.char) # text_emb, text_outputs, state = encoder.encode([word_repls, char_repls], # self.sentence_length) if self.reuse: tf.get_variable_scope().reuse_variables() with tf.name_scope('flatten_text'): flattened_text_emb, flattened_text_outputs, flattened_sentence_indices = self.flatten_doc_to_sent( text_emb, text_outputs, sentence_length) with tf.name_scope('get_query_emb'): query_starts, query_ends = tf.unstack(tf.expand_dims(query, 0), axis=-1) query_emb = self.get_mention_emb(flattened_text_emb, flattened_text_outputs, query_starts, query_ends) with tf.name_scope('get_mentions'): _, _, _, pred_mention_starts, pred_mention_ends, pred_mention_scores, pred_mention_emb = self.get_mentions( flattened_text_emb, flattened_text_outputs, flattened_sentence_indices) # Concatenated [subjective, objective] relations with each mention. with tf.name_scope('calc_logits'): pred_subj_logits = self.predict_relation(query_emb, pred_mention_emb, pred_mention_scores, True) pred_obj_logits = self.predict_relation(query_emb, pred_mention_emb, pred_mention_scores, False) with tf.name_scope('predict_mention_and_relation'): predicted_relations = tf.concat( [ tf.expand_dims(tf.argmax(pred_subj_logits, axis=-1), -1), tf.zeros( [shape(pred_subj_logits, 0), 1], dtype=tf.int64) # no prediction as for obj for now. #tf.expand_dims(tf.argmax(pred_obj_logits, axis=-1), -1) ], axis=-1) # [num_mentions, 2] predicted_relations = tf.cast(predicted_relations, tf.int32) predicted_mentions = tf.concat([ tf.expand_dims(pred_mention_starts, -1), tf.expand_dims(pred_mention_ends, -1) ], axis=-1) # [num_mentions, 2] if max_num_mentions is not None: num_pads = max_num_mentions - shape(predicted_relations, 0) pad_shape = [[0, num_pads], [0, 0]] predicted_relations = tf.pad(predicted_relations, pad_shape) predicted_mentions = tf.pad(predicted_mentions, pad_shape) with tf.name_scope('merge_logits'): mention_starts = [] mention_ends = [] subj_logits = [] obj_logits = [] if self.use_predicted_mentions: mention_starts.append(pred_mention_starts) mention_ends.append(pred_mention_ends) subj_logits.append(pred_subj_logits) obj_logits.append(pred_obj_logits) if self.use_gold_mentions: gold_mentions = tf.slice(gold_mentions, [0, 0], [num_gold_mentions, 2]) gold_mentions = tf.reshape(gold_mentions, [shape(gold_mentions, 0), 2]) gold_mention_starts, gold_mention_ends = tf.unstack( gold_mentions, axis=-1) gold_mention_emb = self.get_mention_emb( flattened_text_emb, flattened_text_outputs, gold_mention_starts, gold_mention_ends) gold_mention_scores = self.get_mention_scores(gold_mention_emb) gold_subj_logits = self.predict_relation( query_emb, gold_mention_emb, gold_mention_scores, True) gold_obj_logits = self.predict_relation( query_emb, gold_mention_emb, gold_mention_scores, False) mention_starts.append(gold_mention_starts) mention_ends.append(gold_mention_ends) subj_logits.append(gold_subj_logits) obj_logits.append(gold_obj_logits) assert self.use_gold_mentions or self.use_predicted_mentions mention_starts = tf.concat(mention_starts, axis=0) mention_ends = tf.concat(mention_ends, axis=0) subj_logits = tf.concat(subj_logits, axis=0) obj_logits = tf.concat(obj_logits, axis=0) with tf.name_scope('loss'): mention_indices = tf.stack( [mention_starts, mention_ends - mention_starts], axis=-1) # [num_mentions, 2] # Gold mentions longer than self.max_mention_width should be cut. mention_indices = tf.clip_by_value(mention_indices, 0, shape(subj_targets, -1) - 1) # [num_mentions, 2] subj_targets = tf.gather_nd(subj_targets, mention_indices) # [num_mentions] obj_targets = tf.gather_nd(obj_targets, mention_indices) # [num_mentions] subj_loss_weights = tf.gather(loss_weights_by_label, subj_targets) obj_loss_weights = tf.gather(loss_weights_by_label, obj_targets) subj_losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=subj_logits, labels=subj_targets) obj_losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=obj_logits, labels=obj_targets) #losses = tf.concat([subj_losses, obj_losses], axis=-1) * tf.concat([subj_loss_weights, obj_loss_weights], axis=-1) losses = subj_losses * subj_loss_weights loss = tf.reduce_mean(losses, axis=-1) self.reuse = True return predicted_relations, predicted_mentions, loss
def __init__(self, sess, config, encoder, activation=tf.nn.relu): super(GraphLinkPrediction, self).__init__(sess, config) self.sess = sess self.encoder = encoder self.activation = activation self.is_training = encoder.is_training self.keep_prob = 1.0 - tf.to_float( self.is_training) * config.dropout_rate self.ffnn_size = config.ffnn_size self.cnn_filter_widths = config.cnn.filter_widths self.cnn_filter_size = config.cnn.filter_size # Placeholders with tf.name_scope('Placeholder'): self.ph = recDotDefaultDict() self.ph.text.word = tf.placeholder( tf.int32, name='text.word', shape=[None, None]) if self.encoder.wbase else None self.ph.text.char = tf.placeholder( tf.int32, name='text.char', shape=[None, None, None]) if self.encoder.cbase else None self.ph.subj = tf.placeholder(tf.int32, name='subj.position', shape=[None, 2]) self.ph.obj = tf.placeholder(tf.int32, name='obj.position', shape=[None, 2]) self.ph.rel = dotDict() self.ph.rel.word = tf.placeholder( tf.int32, name='rel.word', shape=[None, None]) if self.encoder.wbase else None self.ph.rel.char = tf.placeholder( tf.int32, name='rel.char', shape=[None, None, None]) if self.encoder.cbase else None self.ph.target = tf.placeholder(tf.int32, name='target', shape=[None]) self.sentence_length = tf.count_nonzero(self.ph.text.word, axis=1) with tf.name_scope('Encoder'): text_emb, encoder_outputs, encoder_state = self.encoder.encode( [self.ph.text.word, self.ph.text.char], self.sentence_length) self.encoder_outputs = encoder_outputs with tf.variable_scope('Subject') as scope: mention_starts, mention_ends = tf.unstack(self.ph.subj, axis=1) subj_outputs, _ = self.encoder.get_batched_mention_emb( text_emb, encoder_outputs, mention_starts, mention_ends) with tf.variable_scope('Object') as scope: mention_starts, mention_ends = tf.unstack(self.ph.obj, axis=1) obj_outputs, _ = self.encoder.get_batched_mention_emb( text_emb, encoder_outputs, mention_starts, mention_ends) with tf.variable_scope('Relation') as scope: # Stop gradient to prevent biased learning to the words used as relation labels. rel_words_emb = tf.stop_gradient( self.encoder.word_encoder.encode( [self.ph.rel.word, self.ph.rel.char])) with tf.name_scope("compose_words"): rel_outputs = cnn(rel_words_emb, self.cnn_filter_widths, self.cnn_filter_size) with tf.variable_scope('Inference'): score_outputs = self.inference(subj_outputs, rel_outputs, obj_outputs) # [batch_size, 1] self.outputs = tf.round( tf.reshape(score_outputs, [shape(score_outputs, 0)])) # [batch_size] with tf.name_scope("Loss"): self.losses = self.cross_entropy(score_outputs, self.ph.target) self.loss = tf.reduce_mean(self.losses)
def get_antecedent_scores(self, mention_emb, mention_scores, antecedents, antecedents_len, mention_starts, mention_ends, mention_speaker_ids, genre_emb): num_mentions = tf_utils.shape(mention_emb, 0) max_antecedents = tf_utils.shape(antecedents, 1) feature_emb_list = [] if self.use_metadata: antecedent_speaker_ids = tf.gather( mention_speaker_ids, antecedents) # [num_mentions, max_ant] same_speaker = tf.equal( tf.expand_dims(mention_speaker_ids, 1), antecedent_speaker_ids) # [num_mentions, max_ant] speaker_pair_emb = tf.gather( self.same_speaker_emb, tf.to_int32(same_speaker)) # [num_mentions, max_ant, emb] feature_emb_list.append(speaker_pair_emb) tiled_genre_emb = tf.tile( tf.expand_dims(tf.expand_dims(genre_emb, 0), 0), [num_mentions, max_antecedents, 1 ]) # [num_mentions, max_ant, emb] feature_emb_list.append(tiled_genre_emb) if self.use_distance_feature: target_indices = tf.range(num_mentions) # [num_mentions] mention_distance = tf.expand_dims( target_indices, 1) - antecedents # [num_mentions, max_ant] mention_distance_bins = coref_ops.distance_bins( mention_distance) # [num_mentions, max_ant] mention_distance_bins.set_shape([None, None]) mention_distance_emb = tf.gather( self.mention_distance_emb, mention_distance_bins) # [num_mentions, max_ant] feature_emb_list.append(mention_distance_emb) feature_emb = tf.concat(feature_emb_list, 2) # [num_mentions, max_ant, emb] feature_emb = tf.nn.dropout( feature_emb, self.keep_prob) # [num_mentions, max_ant, emb] antecedent_emb = tf.gather(mention_emb, antecedents) # [num_mentions, max_ant, emb] target_emb_tiled = tf.tile( tf.expand_dims(mention_emb, 1), [1, max_antecedents, 1]) # [num_mentions, max_ant, emb] similarity_emb = antecedent_emb * target_emb_tiled # [num_mentions, max_ant, emb] pair_emb = tf.concat( [target_emb_tiled, antecedent_emb, similarity_emb, feature_emb], 2) # [num_mentions, max_ant, emb] with tf.variable_scope("iteration"): with tf.variable_scope("antecedent_scoring"): antecedent_scores = tf_utils.ffnn( pair_emb, self.ffnn_depth, self.ffnn_size, 1, self.keep_prob) # [num_mentions, max_ant, 1] antecedent_scores = tf.squeeze(antecedent_scores, 2) # [num_mentions, max_ant] antecedent_mask = tf.log( tf.sequence_mask(antecedents_len, max_antecedents, dtype=tf.float32)) # [num_mentions, max_ant] antecedent_scores += antecedent_mask # [num_mentions, max_ant] antecedent_scores += tf.expand_dims(mention_scores, 1) + tf.gather( mention_scores, antecedents) # [num_mentions, max_ant] no_antecedent = tf.zeros([tf_utils.shape(mention_scores, 0), 1]) # [num_mentions, 1] antecedent_scores = tf.concat([no_antecedent, antecedent_scores], 1) # [num_mentions, max_ant + 1] return antecedent_scores # [num_mentions, max_ant + 1]
def get_mention_scores(self, mention_emb): with tf.variable_scope("mention_scores"): scores = tf_utils.ffnn(mention_emb, self.ffnn_depth, self.ffnn_size, 1, self.keep_prob) # [num_mentions, 1] return tf.reshape(scores, [tf_utils.shape(scores, 0)])