def metric_fn(label_ids, logits,num_labels,mask): predictions = tf.math.argmax(logits, axis=-1, output_type=tf.int32) #cm = metrics.streaming_confusion_matrix(label_ids, predictions, num_labels-1, weights=mask) cm = metrics.streaming_confusion_matrix(label_ids, predictions, num_labels, weights=mask) return { "confusion_matrix":cm }
def metric_fn(label_ids, logits, num_labels, mask): predictions = tf.math.argmax(logits, axis=-1, output_type=tf.int32) cm = metrics.streaming_confusion_matrix(label_ids, predictions, num_labels - 1, weights=mask) accuracy = tf.metrics.accuracy(labels=label_ids, predictions=predictions, weights=mask) precision = tf.metrics.precision(labels=label_ids, predictions=predictions, weights=mask) recall = tf.metrics.recall(labels=label_ids, predictions=predictions, weights=mask) return { "confusion_matrix": cm, "eval_accuracy": accuracy, "eval_precision": precision, "eval_recall": recall, }
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, logits, decode_tags, mask_length) = create_model(bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels) tvars = tf.trainable_variables() initialized_variable_names = {} if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps) output_spec = tf.estimator.EstimatorSpec(mode=mode, loss=total_loss, train_op=train_op) elif mode == tf.estimator.ModeKeys.EVAL: predictions = tf.math.argmax(logits, axis=-1, output_type=tf.int32) cm = metrics.streaming_confusion_matrix(label_ids, predictions, num_labels, weights=input_mask) evl_metrics = { 'accuracy': tf.metrics.accuracy(label_ids, decode_tags, input_mask), 'cm': cm, } for metric_name, op in evl_metrics.items(): tf.summary.scalar(metric_name, op[1]) eval_to_log = {"label_ids": label_ids, "decode_tags": decode_tags} eval_hooks = tf.train.LoggingTensorHook(eval_to_log, every_n_iter=100) output_spec = tf.estimator.EstimatorSpec(mode=mode, loss=total_loss, evaluation_hooks=[eval_hooks], eval_metric_ops=evl_metrics) else: output_spec = tf.estimator.EstimatorSpec(mode=mode, predictions=decode_tags) return output_spec
def metric_fn(intent_per_example_loss, intent_label_ids, intent_logits, slot_label_ids, num_slot_labels, slot_predict, is_real_example, mask): # slot_predictions = tf.math.argmax(slot_logits, axis=-1, output_type=tf.int32) slot_cm = metrics.streaming_confusion_matrix(slot_label_ids, slot_predict, num_slot_labels, weights=mask) intent_predictions = tf.argmax(intent_logits, axis=-1, output_type=tf.int32) intent_accuracy = tf.metrics.accuracy( labels=intent_label_ids, predictions=intent_predictions, weights=is_real_example) intent_loss = tf.metrics.mean( values=intent_per_example_loss, weights=is_real_example) return { "intent_eval_accuracy": intent_accuracy, "intent_eval_loss": intent_loss, "slot_cm": slot_cm, }
def _build_graph(self, hparams, scope=None): """Construct the train, evaluation, and inference graphs. Args: hparams: The hyperparameters for configuration scope: The variable scope name for this subgraph Returns: A tuple with (logits, loss, metrics, update_ops) """ sample = self.iterator.get_next() inputs, tgt_outputs, seq_len = sample with tf.variable_scope(scope or "dynamic_bdrnn", dtype=tf.float32): # TODO: hidden activations are passed thru FC net # TODO: hidden-to-hidden network has skip connections (residual) # TODO: initial hidden and cell states are learned # create bdrnn fw_cells = mdl_help.create_rnn_cell( unit_type=hparams.unit_type, num_units=hparams.num_units, num_layers=hparams.num_layers, depth=0, num_residual_layers=0, forget_bias=hparams.forget_bias, dropout=0., mode=self.mode, num_gpus=1, base_gpu=0) bw_cells = mdl_help.create_rnn_cell( unit_type=hparams.unit_type, num_units=hparams.num_units, num_layers=hparams.num_layers, depth=0, num_residual_layers=0, forget_bias=hparams.forget_bias, dropout=0., mode=self.mode, num_gpus=1, base_gpu=0) # print(fw_cells.zero_state(1, dtype=tf.float32)) # initial_fw_state = tf.get_variable("initial_fw_state", shape=fw_cells.state_size) # initial_bw_state = tf.get_variable("initial_bw_state", shape=bw_cells.state_size) # initial_fw_state_tiled = tf.tile(initial_fw_state, [hparams.batch_size, 1]) # initial_bw_state_tiled = tf.tile(initial_bw_state, [hparams.batch_size, 1]) # run bdrnn outputs, output_states = tf.nn.bidirectional_dynamic_rnn( cell_fw=fw_cells, cell_bw=bw_cells, inputs=inputs, sequence_length=seq_len, initial_state_fw=None, initial_state_bw=None, dtype=tf.float32) # outputs is a tuple (output_fw, output_bw) # output_fw/output_bw are tensors [batch_size, max_time, cell.output_size] # outputs_states is a tuple (output_state_fw, output_state_bw) containing final states for # forward and backward rnn # concatenate the outputs of each direction combined_outputs = tf.concat([outputs[0], outputs[1]], axis=-1) # dense output layers dense1 = tf.layers.dense(inputs=combined_outputs, units=hparams.num_dense_units, activation=tf.nn.relu, use_bias=True) drop1 = tf.layers.dropout( inputs=dense1, rate=hparams.dropout, training=self.mode == tf.contrib.learn.ModeKeys.TRAIN) dense2 = tf.layers.dense(inputs=drop1, units=hparams.num_dense_units, activation=tf.nn.relu, use_bias=True) drop2 = tf.layers.dropout( inputs=dense2, rate=hparams.dropout, training=self.mode == tf.contrib.learn.ModeKeys.TRAIN) logits = tf.layers.dense(inputs=drop2, units=hparams.num_labels, use_bias=False) # mask out entries longer than target sequence length mask = tf.sequence_mask(seq_len, dtype=tf.float32) #stop gradient thru labels by crossent op tgt_outputs = tf.stop_gradient(tgt_outputs) crossent = tf.nn.softmax_cross_entropy_with_logits_v2( logits=logits, labels=tgt_outputs, name="crossent") # divide loss by batch_size * mean(seq_len) loss = (tf.reduce_sum(crossent * mask) / (hparams.batch_size * tf.reduce_mean(tf.cast(seq_len, tf.float32)))) metrics = [] update_ops = [] if self.mode == tf.contrib.learn.ModeKeys.EVAL: predictions = tf.argmax(input=logits, axis=-1) tgt_labels = tf.argmax(input=tgt_outputs, axis=-1) acc, acc_update = tf.metrics.accuracy(predictions=predictions, labels=tgt_labels, weights=mask) # confusion matrix targets_flat = tf.reshape(tgt_labels, [-1]) predictions_flat = tf.reshape(predictions, [-1]) mask_flat = tf.reshape(mask, [-1]) cm, cm_update = streaming_confusion_matrix( labels=targets_flat, predictions=predictions_flat, num_classes=hparams.num_labels, weights=mask_flat) tf.add_to_collection("eval", cm_summary(cm, hparams.num_labels)) metrics = [acc, cm] update_ops = [acc_update, cm_update] return logits, loss, metrics, update_ops
def _build_graph(self, hparams, scope=None): """Construct the train, evaluation, and inference graphs. Args: hparams: The hyperparameters for configuration scope: The variable scope name for this subgraph, default "dynamic_seq2seq" Returns: A tuple with (logits, loss, metrics, update_ops) """ enc_inputs, dec_inputs, dec_outputs, seq_len = self.iterator.get_next() # get the size of the batch batch_size = tf.shape(enc_inputs)[0] with tf.variable_scope(scope or "dynamic_seq2seq", dtype=tf.float32): # create encoder dense_input_layer = tf.layers.Dense(hparams.num_units, use_bias=False) if hparams.dense_input: enc_inputs = dense_input_layer(enc_inputs) enc_cells = mdl_help.create_rnn_cell(unit_type=hparams.unit_type, num_units=hparams.num_units, num_layers=hparams.num_layers, depth=hparams.depth, num_residual_layers=hparams.num_residual_layers, forget_bias=hparams.forget_bias, dropout=hparams.dropout, mode=self.mode, use_highway_as_residual=hparams.use_highway_as_residual) # run encoder enc_outputs, enc_state = tf.nn.dynamic_rnn(cell=enc_cells, inputs=enc_inputs, sequence_length=seq_len, swap_memory=True, dtype=tf.float32, scope="encoder") tgt_seq_len = tf.add(seq_len, tf.constant(1, tf.int32)) # TODO: Add Inference decoder # create decoder dec_cells = mdl_help.create_rnn_cell(unit_type=hparams.unit_type, num_units=hparams.num_units, num_layers=hparams.num_layers, depth=hparams.depth, num_residual_layers=hparams.num_residual_layers, forget_bias=hparams.forget_bias, dropout=hparams.dropout, mode=self.mode, use_highway_as_residual=hparams.use_highway_as_residual) # decoder embedding decoder_embedding = tf.get_variable("decoder_embedding", [hparams.num_labels, hparams.num_units]) if hparams.dense_input: # convert to int32 argmax values for embedding to work dec_inputs = tf.argmax(dec_inputs, axis=-1, output_type=tf.int32) dec_inputs = tf.nn.embedding_lookup(decoder_embedding, dec_inputs) # output project layer projection_layer = tf.layers.Dense(hparams.num_labels, use_bias=False) if self.mode == tf.contrib.learn.ModeKeys.TRAIN: if hparams.train_helper == "teacher": # teacher forcing helper = tf.contrib.seq2seq.TrainingHelper(inputs=dec_inputs, sequence_length=tgt_seq_len) elif hparams.train_helper == "sched": if hparams.dense_input: embedding = decoder_embedding else: embedding = tf.eye(hparams.num_labels) # scheduled sampling helper = tf.contrib.seq2seq.\ ScheduledEmbeddingTrainingHelper(inputs=dec_inputs, sequence_length=tgt_seq_len, embedding=embedding, sampling_probability=self.sample_probability, ) elif self.mode == tf.contrib.learn.ModeKeys.EVAL: if hparams.dense_input: embedding = decoder_embedding else: embedding = tf.eye(hparams.num_labels) helper = tf.contrib.seq2seq.\ ScheduledEmbeddingTrainingHelper(inputs=dec_inputs, sequence_length=tgt_seq_len, embedding=embedding, sampling_probability=tf.constant(1.0)) decoder = tf.contrib.seq2seq.BasicDecoder(cell=dec_cells, helper=helper, initial_state=enc_state, output_layer=projection_layer) # run decoder final_outputs, final_states, _ = tf.contrib.seq2seq.dynamic_decode( decoder=decoder, impute_finished=True, swap_memory=True, scope="decoder") logits = final_outputs.rnn_output # mask out entries longer than target sequence length mask = tf.sequence_mask(tgt_seq_len, dtype=tf.float32) #stop gradient thru labels by crossent op labels = tf.stop_gradient(dec_outputs) crossent = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=labels, name="crossent") # loss = (tf.reduce_sum(crossent*mask)/(hparams.batch_size*tf.reduce_mean(tf.cast(tgt_seq_len, # tf.float32)))) loss = tf.reduce_sum((crossent * mask) / tf.expand_dims( tf.expand_dims(tf.cast(tgt_seq_len, tf.float32), -1), -1)) / tf.cast(batch_size, tf.float32) metrics = [] update_ops = [] if self.mode == tf.contrib.learn.ModeKeys.EVAL: predictions = tf.argmax(input=logits, axis=-1) targets = tf.argmax(input=dec_outputs, axis=-1) acc, acc_update = tf.metrics.accuracy(predictions=predictions, labels=targets, weights=mask) # flatten for confusion matrix targets_flat = tf.reshape(targets, [-1]) predictions_flat = tf.reshape(predictions, [-1]) mask_flat = tf.reshape(mask, [-1]) cm, cm_update = streaming_confusion_matrix(labels=targets_flat, predictions=predictions_flat, num_classes=hparams.num_labels, weights=mask_flat) tf.add_to_collection("eval", cm_summary(cm, hparams.num_labels)) metrics = [acc, cm] update_ops = [acc_update, cm_update] return logits, loss, metrics, update_ops