def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) unique_ids = features["unique_ids"] input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (start_logits, end_logits) = create_model( bert_config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: seq_length = modeling.get_shape_list(input_ids)[1] def compute_loss(logits, positions): one_hot_positions = tf.one_hot( positions, depth=seq_length, dtype=tf.float32) log_probs = tf.nn.log_softmax(logits, axis=-1) loss = -tf.reduce_mean( tf.reduce_sum(one_hot_positions * log_probs, axis=-1)) return loss start_positions = features["start_positions"] end_positions = features["end_positions"] start_loss = compute_loss(start_logits, start_positions) end_loss = compute_loss(end_logits, end_positions) total_loss = (start_loss + end_loss) / 2.0 train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.PREDICT: predictions = { "unique_ids": unique_ids, "start_logits": start_logits, "end_logits": end_logits, } output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) else: raise ValueError( "Only TRAIN and PREDICT modes are supported: %s" % (mode)) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits, probabilities) = create_model(bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None # *** 寻找模型最后全连接层的最优参数(调用AdamWeightDecayOptimizer()) if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: # *** 预测结果评价指标 def metric_fn(per_example_loss, label_ids, logits): ''' # 原始脚本 predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) # *** 得到一个向量中最大值所处的位置 #predictions = tf.cast(predictions,tf.float32) # new add accuracy = tf.metrics.accuracy(label_ids, predictions) loss = tf.metrics.mean(per_example_loss) return { "eval_accuracy": accuracy, "eval_loss": loss, } ''' # 新脚本 logits_split = tf.split(probabilities, num_labels, axis=-1) label_ids_split = tf.split(label_ids, num_labels, axis=-1) # metrics change to auc of every class eval_dict = {} for j, logits in enumerate(logits_split): label_id_ = tf.cast(label_ids_split[j], dtype=tf.int32) current_auc, update_op_auc = tf.metrics.auc( label_id_, logits) eval_dict[str(j)] = (current_auc, update_op_auc) eval_dict['eval_loss'] = tf.metrics.mean( values=per_example_loss) return eval_dict eval_metrics = (metric_fn, [per_example_loss, label_ids, logits]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: # *** 结果预测 ''' output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=probabilities, scaffold_fn=scaffold_fn) ''' # used for savedmodel # Generate Predictions # v15 -- predict (this is work!) predictions = tf.argmax( probabilities, axis=-1, output_type=tf.int32) #logits-->probabilities export_outputs = { 'classes': tf.estimator.export.PredictOutput({ "probabilities": probabilities, "classid": predictions }) } output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=probabilities, scaffold_fn=scaffold_fn, export_outputs=export_outputs) ''' # v13 -- classify predictions = tf.argmax(probabilities, axis=-1, output_type=tf.int32) export_outputs = { 'classes': tf.estimator.export.ClassificationOutput( scores=probabilities, classes=tf.as_string(predictions)) } output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=probabilities, scaffold_fn=scaffold_fn, export_outputs=export_outputs) ''' return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] masked_lm_positions = features["masked_lm_positions"] masked_lm_ids = features["masked_lm_ids"] masked_lm_weights = features["masked_lm_weights"] next_sentence_labels = features["next_sentence_labels"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) (masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output( bert_config, model.get_sequence_output(), model.get_embedding_table(), model.get_embedding_table_2(), masked_lm_positions, masked_lm_ids, masked_lm_weights) (next_sentence_loss, next_sentence_example_loss, next_sentence_log_probs) = get_next_sentence_output( bert_config, model.get_pooled_output(), next_sentence_labels) total_loss = masked_lm_loss + next_sentence_loss tvars = tf.trainable_variables() initialized_variable_names = {} print("init_checkpoint:", init_checkpoint) scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, next_sentence_example_loss, next_sentence_log_probs, next_sentence_labels): """Computes the loss and accuracy of the model.""" masked_lm_log_probs = tf.reshape( masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]]) masked_lm_predictions = tf.argmax(masked_lm_log_probs, axis=-1, output_type=tf.int32) masked_lm_example_loss = tf.reshape(masked_lm_example_loss, [-1]) masked_lm_ids = tf.reshape(masked_lm_ids, [-1]) masked_lm_weights = tf.reshape(masked_lm_weights, [-1]) masked_lm_accuracy = tf.metrics.accuracy( labels=masked_lm_ids, predictions=masked_lm_predictions, weights=masked_lm_weights) masked_lm_mean_loss = tf.metrics.mean( values=masked_lm_example_loss, weights=masked_lm_weights) next_sentence_log_probs = tf.reshape( next_sentence_log_probs, [-1, next_sentence_log_probs.shape[-1]]) next_sentence_predictions = tf.argmax(next_sentence_log_probs, axis=-1, output_type=tf.int32) next_sentence_labels = tf.reshape(next_sentence_labels, [-1]) next_sentence_accuracy = tf.metrics.accuracy( labels=next_sentence_labels, predictions=next_sentence_predictions) next_sentence_mean_loss = tf.metrics.mean( values=next_sentence_example_loss) return { "masked_lm_accuracy": masked_lm_accuracy, "masked_lm_loss": masked_lm_mean_loss, "next_sentence_accuracy": next_sentence_accuracy, "next_sentence_loss": next_sentence_mean_loss, } # next_sentence_example_loss=0.0 TODO # next_sentence_log_probs=0.0 # TODO eval_metrics = (metric_fn, [ masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, next_sentence_example_loss, next_sentence_log_probs, next_sentence_labels ]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode)) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] vals = features["vals"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, pred_vals) = create_model(bert_config, is_training, input_ids, input_mask, segment_ids, vals, use_one_hot_embeddings) tvars = tf.trainable_variables() scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(preds, vals): return { "eval_loss": tf.metrics.mean_squared_error(vals, preds), } eval_metrics = (metric_fn, [pred_vals, vals]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=pred_vals, scaffold_fn=scaffold_fn) return output_spec
def build(self, is_training=True): print("#" * 100) print(self.prembed) with self.graph.as_default(): self.x = tf.placeholder(tf.float32, [None, self.max_len, self.n_channel]) self.y = tf.placeholder(tf.int32, [ None, ]) x_shape = tf.shape(self.x) num_data = x_shape[0] masks = None if self.masking: print("*" * 100) print("Masking") pitch_size = int(np.sqrt(self.max_len)) single_mask = self.get_mask(pitch_size=pitch_size) single_mask = single_mask.flatten() single_mask = tf.convert_to_tensor(np.expand_dims(single_mask, axis=0), dtype=tf.bool) masks = tf.tile(single_mask, [num_data, 1]) masks = create_attention_mask_from_input_mask(self.x, masks) print("self.prembed", self.prembed) emb_in = self.x if self.prembed: #print("#"*100) print("#prembed with positional embedding no bias") emb_in = tf.layers.dense(emb_in, self.prembed_dim, activation=tf.tanh, use_bias=False) #emb_in = feedforward(emb_in, [self.num_hidden, self.prembed_dim], scope="prembed", reuse=False) #emb_in = tf.layers.dropout(emb_in, rate=self.drop_rate) #emb_in = layer_norm(emb_in, name="prembed") emb_in = embedding_postprocessor(emb_in, max_position_embeddings=200) print("my_emb_dim", emb_in.get_shape().as_list()[-1]) #emb_dim = tf.shape(emb_in)[-1] emb_dim = emb_in.get_shape().as_list()[-1] print("emb_dim", emb_dim) #emb_in = tf.reshape(emb_in, [num_classes*num_support, emb_dim]) attens = [] for i in range(self.max_depth): name_scope = "Transformer_Encoder_" + str(i) if i == 0: enc_embs, atten = transformer_encoder( emb_in, num_units=emb_dim, num_heads=self.num_head, num_hidden=self.num_hidden, dropout_rate=self.drop_rate, attention_dropout=self.attention_dropout, mask=masks, scope=name_scope) else: enc_embs, atten = transformer_encoder( enc_embs, num_units=emb_dim, num_heads=self.num_head, num_hidden=self.num_hidden, dropout_rate=self.drop_rate, attention_dropout=self.attention_dropout, mask=masks, scope=name_scope) attens.append(atten) #emb_x = tf.reshape(emb_in, [1, num_classes*num_support, self.im_height*self.im_width*self.channels]) if self.pooling: print("*" * 100) print("Pooling") pooled_data = self.simple_pooler( enc_embs) #self.rect_pooler(enc_embs) enc_embs = tf.reshape(pooled_data, [num_data, emb_dim]) else: print("8" * 100) print("No Pooling") enc_embs = tf.reshape(enc_embs, [num_data, self.max_len * emb_dim]) self.enc_embs = enc_embs self.istarget = tf.to_float(tf.not_equal(self.y, -99)) self.logits = tf.layers.dense(enc_embs, self.num_classes) self.preds = tf.to_int32(tf.argmax(self.logits, axis=-1)) self.acc = tf.reduce_sum(tf.to_float(tf.equal( self.preds, self.y))) / tf.reduce_sum(self.istarget) self.attens = attens tf.summary.scalar('acc', self.acc) if is_training: # Loss self.y_smoothed = label_smoothing( tf.one_hot(self.y, depth=self.num_classes)) self.loss = tf.nn.softmax_cross_entropy_with_logits_v2( logits=self.logits, labels=self.y_smoothed) print("self.loss") self.mean_loss = tf.reduce_sum(self.loss) / tf.reduce_sum( self.istarget) # Training Scheme #self.global_step = tf.Variable(0, name='global_step', trainable=False) #self.learning_rate = tf.train.exponential_decay( # learning_rate=starter_learning_rate, global_step=self.global_step, decay_steps=20, decay_rate=0.95, staircase=False) #self.learning_rate = tf.constant(self.start_learning_rate) #self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)#tf.train.AdamOptimizer(learning_rate=1e-4, beta1=0.9, beta2=0.98, epsilon=1e-8) #self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate) #self.train_op = self.optimizer.minimize(self.mean_loss, global_step=self.global_step) self.train_op, self.learning_rate = optimization.create_optimizer( self.mean_loss, self.start_learning_rate, num_train_steps=10000, num_warmup_steps=200, use_tpu=False) # Summary tf.summary.scalar('mean_loss', self.mean_loss) tf.summary.scalar('learning rate', self.learning_rate) self.merged = tf.summary.merge_all() self.saver = tf.train.Saver()
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" # tf.logging.info("*** Features ***") # for name in sorted(features.keys()): # tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_real_example = None if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits, probabilities) = create_model( bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) prediction = tf.cast(probabilities, tf.float32) threshold = float(0.5) prediction = tf.cast(tf.greater(prediction, threshold), tf.int64) acc, acc_op = tf.metrics.accuracy(label_ids, prediction) with tf.name_scope('summary'): tf.summary.scalar('total_loss', total_loss) tf.summary.scalar('accuracy', acc) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" # tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape,init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) prediction = tf.cast(probabilities, tf.float32) threshold = float(0.5) prediction = tf.cast(tf.greater(prediction, threshold), tf.int64) acc, acc_op = tf.metrics.accuracy(label_ids, prediction) logging_hook = tf.train.LoggingTensorHook({"loss": total_loss, "accuracy": acc_op}, every_n_iter=10) output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold=scaffold_fn, training_hooks=[logging_hook], ) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, probabilities, is_real_example): logits_split = tf.split(probabilities, num_labels, axis=-1) label_ids_split = tf.split(label_ids, num_labels, axis=-1) # metrics change to auc of every class eval_dict = {} for j, logits in enumerate(logits_split): label_id_ = tf.cast(label_ids_split[j], dtype=tf.int32) current_auc, update_op_auc = tf.metrics.auc(label_id_, logits) eval_dict[str(j)] = (current_auc, update_op_auc) eval_dict['eval_loss'] = tf.metrics.mean(values=per_example_loss) return eval_dict ## original eval metrics # predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) # accuracy = tf.metrics.accuracy( # labels=label_ids, predictions=predictions, weights=is_real_example) # loss = tf.metrics.mean(values=per_example_loss, weights=is_real_example) # return { # "eval_accuracy": accuracy, # "eval_loss": loss, # } eval_metrics = metric_fn(per_example_loss, label_ids, probabilities, is_real_example) prediction = tf.cast(probabilities, tf.float32) threshold = float(0.5) prediction = tf.cast(tf.greater(prediction, threshold), tf.int64) acc, acc_op = tf.metrics.accuracy(label_ids, prediction) logging_hook = tf.train.LoggingTensorHook({"loss": total_loss, "accuracy": acc_op}, every_n_iter=2) # accuracy = {"accuracy" : acc[1]} eval_metrics = metric_fn(per_example_loss, label_ids, probabilities, is_real_example) output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, eval_metric_ops=eval_metrics, scaffold=scaffold_fn, evaluation_hooks=[logging_hook] ) # output_spec = tf.estimator.EstimatorSpec( # mode=mode, # loss=total_loss, # eval_metric_ops=eval_metrics, # scaffold=scaffold_fn) else: print("mode:", mode, "probabilities:", probabilities) output_spec = tf.estimator.EstimatorSpec( mode=mode, predictions={"probabilities": probabilities}, scaffold=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): """The `model_fn` for TPUEstimator.""" # reading features input input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_real_example = None if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) # checking if training mode is_training = (mode == tf.estimator.ModeKeys.TRAIN) # create simple classification model (total_loss, per_example_loss, logits, probabilities) = create_model( bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) # getting variables for intialization and using pretrained init checkpoint tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: # defining optimizar function train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) # Training estimator spec output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: # accuracy, loss, auc, F1, precision and recall metrics for evaluation def metric_fn(per_example_loss, label_ids, logits, is_real_example): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) loss = tf.metrics.mean(values=per_example_loss, weights=is_real_example) accuracy = tf.metrics.accuracy( labels=label_ids, predictions=predictions, weights=is_real_example) f1_score = tf.contrib.metrics.f1_score( label_ids, predictions) auc = tf.metrics.auc( label_ids, predictions) recall = tf.metrics.recall( label_ids, predictions) precision = tf.metrics.precision( label_ids, predictions) return { "eval_accuracy": accuracy, "eval_loss": loss, "f1_score": f1_score, "auc": auc, "precision": precision, "recall": recall } eval_metrics = (metric_fn, [per_example_loss, label_ids, logits, is_real_example]) # estimator spec for evalaution output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: # estimator spec for predictions output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions={"probabilities": probabilities}, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] header_ids = features["header_ids"] extra_features = features["extra_features"] label_ids = features["label_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits, probabilities) = create_model(bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings, header_ids, extra_features) tvars = tf.trainable_variables() initialized_variable_names = {} if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, False) output_spec = tf.estimator.EstimatorSpec(mode=mode, loss=total_loss, train_op=train_op) elif mode == tf.estimator.ModeKeys.EVAL: metrics = { 'accuracy': tf.metrics.accuracy(labels=label_ids, predictions=tf.argmax(probabilities, 1), name='accuracy') } output_spec = tf.estimator.EstimatorSpec(mode=mode, loss=total_loss, eval_metric_ops=metrics) else: output_spec = tf.estimator.EstimatorSpec(mode=mode, predictions=probabilities) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" # tf.logging.info("*** Features ***") # for name in sorted(features.keys()): # tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] word_ids = features["word_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits, probabilities) = create_model( bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) alpha = 0.1 print("!!!!!!!!!!!!!!!alpha") print(alpha) cnn_loss, cnn_prob = create_cnn_model(is_training, word_ids, label_ids, bert_config, [3,4,5], 128, 0.2, num_labels, logits, alpha) # total_score = logits + cnn_prob # total_loss = alpha*total_loss + (1-alpha) *cnn_loss total_loss = cnn_loss tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" # tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, # init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) logging_hook = tf.train.LoggingTensorHook({"loss": total_loss}, every_n_iter=10) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, training_hooks=[logging_hook], scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) accuracy = tf.metrics.accuracy(label_ids, predictions) f1 = tf_metrics.f1(label_ids, predictions, average="macro",num_classes = 2) # = tf.contrib.metrics.f1_score(label_ids, predictions) loss = tf.metrics.mean(per_example_loss) return { "eval_accuracy": accuracy, "eval_loss": loss, "eval_f1" : f1, } eval_metrics = (metric_fn, [per_example_loss, label_ids, logits]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=probabilities, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] label_mask = features["label_mask"] is_real_example = None if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) (d_loss, g_loss, per_example_loss, logits, probabilities) = create_model(bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings, label_mask) tvars = tf.trainable_variables() bert_vars = [v for v in tvars if 'bert' in v.name] d_vars = bert_vars + [v for v in tvars if 'Discriminator' in v.name] g_vars = [v for v in tvars if 'Generator' in v.name] initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: d_train_op = optimization.create_optimizer("d", d_vars, d_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) g_train_op = optimization.create_optimizer("g", g_vars, g_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) logging_hook = tf.train.LoggingTensorHook( { "d_loss": d_loss, "g_loss": g_loss, "per_example_loss": per_example_loss }, every_n_iter=1) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=d_loss + g_loss, train_op=tf.group(d_train_op, g_train_op), training_hooks=[logging_hook], scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, logits, is_real_example): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) accuracy = tf.metrics.accuracy(labels=label_ids, predictions=predictions, weights=is_real_example) precision = tf_metrics.precision(labels=label_ids, predictions=predictions, num_classes=num_labels, weights=is_real_example) recall = tf_metrics.recall(labels=label_ids, predictions=predictions, num_classes=num_labels, weights=is_real_example) f1_micro = tf_metrics.f1(labels=label_ids, predictions=predictions, num_classes=num_labels, weights=is_real_example, average='micro') f1_macro = tf_metrics.f1(labels=label_ids, predictions=predictions, num_classes=num_labels, weights=is_real_example, average='macro') loss = tf.metrics.mean(values=per_example_loss, weights=is_real_example) return { "eval_accuracy": accuracy, "eval_precision": precision, "eval_recall": recall, "eval_f1_micro": f1_micro, "eval_f1_macro": f1_macro, "eval_loss": loss, } eval_metrics = (metric_fn, [ per_example_loss, label_ids, logits, is_real_example ]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=d_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions={"probabilities": probabilities}, scaffold_fn=scaffold_fn) return output_spec
logits = tf.matmul(output_layer, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) probabilities = tf.nn.softmax(logits, axis=-1) predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) log_probs = tf.nn.log_softmax(logits, axis=-1) one_hot_labels = tf.one_hot(label_ids, depth=len(labels), dtype=tf.float32) per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) loss = tf.reduce_mean(per_example_loss) loss_pl = tf.placeholder(tf.float32) loss_summary = tf.summary.scalar('Loss', loss_pl) # Optimisation function with tf.name_scope('optimizer'): optimizer = optimization.create_optimizer(loss, learning_rate, num_training_steps, 0, False) # Calculate accuracy with tf.name_scope('accuracy'): correct_prediction = tf.equal(predictions, tf.cast(label_ids, tf.int32)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) accuracy_pl = tf.placeholder(tf.float32) accuracy_summary = tf.summary.scalar('Accuracy', accuracy_pl) # Restore model from checkpoint trainable_vars = tf.trainable_variables() if init_checkpoint: assignment_map, _ = bert.get_assignment_map_from_checkpoint( trainable_vars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_real_example = None if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) # probabilities # 使用参数构建模型,input_idx 就是输入的样本idx表示,label_ids 就是标签的idx表示 # (total_loss, per_example_loss, logits) = create_model( # bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, # num_labels, use_one_hot_embeddings) (total_loss, logits, trans, pred_ids) = create_model(bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: # 针对NER ,进行了修改 def metric_fn(label_ids, pred_ids): # 首先对结果进行维特比解码 # crf 解码 indices = [2, 3] # indice参数告诉评估矩阵评估哪些标签,与label_list相对应 weight = tf.sequence_mask(FLAGS.max_seq_length) precision = tf_metrics.precision(label_ids, pred_ids, num_labels, indices, weight) recall = tf_metrics.recall(label_ids, pred_ids, num_labels, indices, weight) f = tf_metrics.f1(label_ids, pred_ids, num_labels, indices, weight) return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, # "eval_loss": loss, } eval_metrics = (metric_fn, [label_ids, pred_ids]) # eval_metrics = (metric_fn, [label_ids, logits]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: # raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode)) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=pred_ids, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] len_gt_titles = features["len_gt_titles"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits) = create_model(bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() scaffold_fn = None initialized_variable_names = [] if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.PREDICT: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions={ "logits": logits, "label_ids": label_ids, "len_gt_titles": len_gt_titles, }, scaffold_fn=scaffold_fn) else: raise ValueError("Only TRAIN and PREDICT modes are supported: %s" % (mode)) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids_x = features["input_ids_x"] input_mask_x = features["input_mask_x"] label_ids_x = features["label_ids_x"] info_x = features["info_x"] input_ids_y = features["input_ids_y"] input_mask_y = features["input_mask_y"] label_ids_y = features["label_ids_y"] info_y = features["info_y"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) print('Istrain:', is_training) model = modeling_coverage_learning_loss.BertModel( config=bert_config, gpu=FLAGS.gpu, is_training=is_training, input_ids_x=input_ids_x, input_ids_y=input_ids_y, label_ids_x=label_ids_x, label_ids_y=label_ids_y, input_mask_x=input_mask_x, input_mask_y=input_mask_y, ) total_loss = model.loss_ssl # pre-train # total_loss = model.loss_x + model.loss_y # finetune masked_lm_log_probs = model.log_probs_x tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: ( assignment_map, initialized_variable_names ) = modeling_coverage_learning_loss.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps) output_spec = tf.estimator.EstimatorSpec(mode=mode, loss=total_loss, train_op=train_op, scaffold=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: tf.add_to_collection('eval_sp', masked_lm_log_probs) tf.add_to_collection('eval_sp', input_ids_x) tf.add_to_collection('eval_sp', label_ids_x) tf.add_to_collection('eval_sp', info_x) output_spec = tf.estimator.EstimatorSpec(mode=mode, loss=total_loss, scaffold=scaffold_fn) else: raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode)) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_real_example = None if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits, probabilities) = create_model( bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, logits, is_real_example): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) accuracy = tf.metrics.accuracy( labels=label_ids, predictions=predictions, weights=is_real_example) loss = tf.metrics.mean(values=per_example_loss, weights=is_real_example) return { "eval_accuracy": accuracy, "eval_loss": loss, } eval_metrics = (metric_fn, [per_example_loss, label_ids, logits, is_real_example]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions={"probabilities": probabilities}, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): tf.compat.v1.logging.info("*** Features ***") for name in sorted(features.keys()): tf.compat.v1.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] # label_mask = features["label_mask"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits, predicts) = create_model(bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint and (hvd is None or hvd.rank() == 0): (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.compat.v1.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.compat.v1.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, hvd, False, amp) output_spec = tf.estimator.EstimatorSpec(mode=mode, loss=total_loss, train_op=train_op) elif mode == tf.estimator.ModeKeys.EVAL: dummy_op = tf.no_op() # Need to call mixed precision graph rewrite if fp16 to enable graph rewrite if amp: dummy_op = tf.train.experimental.enable_mixed_precision_graph_rewrite( optimization.LAMBOptimizer(learning_rate=0.0)) def metric_fn(per_example_loss, label_ids, logits): # def metric_fn(label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) precision = tf_metrics.precision(label_ids, predictions, num_labels, [1, 2], average="macro") recall = tf_metrics.recall(label_ids, predictions, num_labels, [1, 2], average="macro") f = tf_metrics.f1(label_ids, predictions, num_labels, [1, 2], average="macro") # return { "precision": precision, "recall": recall, "f1": f, } eval_metric_ops = metric_fn(per_example_loss, label_ids, logits) output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, eval_metric_ops=eval_metric_ops) else: dummy_op = tf.no_op() # Need to call mixed precision graph rewrite if fp16 to enable graph rewrite if amp: dummy_op = tf.train.experimental.enable_mixed_precision_graph_rewrite( optimization.LAMBOptimizer(learning_rate=0.0)) output_spec = tf.estimator.EstimatorSpec( mode=mode, predictions=predicts) #probabilities) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.compat.v1.logging.info("*** Features ***") for name in sorted(features.keys()): tf.compat.v1.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_real_example = None if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits, probabilities) = create_model(bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint and (hvd is None or hvd.rank() == 0): (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.compat.v1.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.compat.v1.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, hvd, False, amp) output_spec = tf.estimator.EstimatorSpec(mode=mode, loss=total_loss, train_op=train_op) elif mode == tf.estimator.ModeKeys.EVAL: dummy_op = tf.no_op() # Need to call mixed precision graph rewrite if fp16 to enable graph rewrite if amp: loss_scaler = tf.train.experimental.FixedLossScale(1) dummy_op = tf.train.experimental.enable_mixed_precision_graph_rewrite( optimization.LAMBOptimizer(learning_rate=0.0), loss_scaler) def metric_fn(per_example_loss, label_ids, logits, is_real_example): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) accuracy = tf.metrics.accuracy(labels=label_ids, predictions=predictions, weights=is_real_example) loss = tf.metrics.mean(values=per_example_loss, weights=is_real_example) return { "eval_accuracy": accuracy, "eval_loss": loss, } eval_metric_ops = metric_fn(per_example_loss, label_ids, logits, is_real_example) output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, eval_metric_ops=eval_metric_ops) else: dummy_op = tf.no_op() # Need to call mixed precision graph rewrite if fp16 to enable graph rewrite if amp: dummy_op = tf.train.experimental.enable_mixed_precision_graph_rewrite( optimization.LAMBOptimizer(learning_rate=0.0)) output_spec = tf.estimator.EstimatorSpec( mode=mode, predictions={"probabilities": probabilities}) #predicts)#probabilities) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_real_example = None if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits, probabilities) = create_model(bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) print("total_loss::", total_loss) print("per_example_loss::", per_example_loss) print("logits::", logits) print("probabilities::", probabilities) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: print("从{}恢复参数".format(init_checkpoint)) (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) def get_embedding_vars(tvars, grads, layer_name='word_embeddings'): ''' 返回embeding vars和归一化后的梯度 ''' embed_vars, embed_norm_grads = [], [] for var, grad in zip(tvars, grads): if layer_name in var.name: embed_vars.append(var) norm = tf.norm(grad) + 1e-10 embed_norm_grads.append(tf.div(grad, norm)) return embed_vars, embed_norm_grads output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: # 计算正常的梯度 grads = tf.gradients(total_loss, tvars) # 得到embedding的梯度,进行normalize,也是作为一个backup,有助于后面恢复 embed_vars, embed_norm_grads = get_embedding_vars(tvars, grads) # embedding上加扰动, attack with tf.control_dependencies(embed_norm_grads): attack_op = [] for param, norm_grad in zip(embed_vars, embed_norm_grads): attack_op.append( param.assign(param + tf.multiply(FLAGS.epsilon, norm_grad))) attack_op = tf.group(*attack_op, name='attack') # attack后再计算loss, grads with tf.control_dependencies([attack_op]): (att_total_loss, att_per_example_loss, att_logits, att_probabilities) = create_model(bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings, reuse=True) att_grads = tf.gradients(att_total_loss, tvars, name='att_gradients') # 返回None # 计算attack后的gradient后,进行参数恢复 with tf.control_dependencies(att_grads): restore_op = [] for param, norm_grad in zip(embed_vars, embed_norm_grads): restore_op.append( param.assign(param - tf.multiply(FLAGS.epsilon, norm_grad))) restore_op = tf.group(*restore_op, name='restore') # 进行梯度的累计,看做是一个batch的样本求平均 with tf.control_dependencies([restore_op]): acc_grads = [ tf.div(tf.add(grad, att_grad), 2.0) for grad, att_grad in zip(grads, att_grads) ] acc_total_loss = tf.div(tf.add(total_loss, att_total_loss), 2.0) # 进行参数的更新,必须在参数恢复后 with tf.control_dependencies(acc_grads): train_op = optimization.create_optimizer( tvars, acc_grads, learning_rate, num_train_steps, num_warmup_steps, use_tpu) logging_hook1 = tf.train.LoggingTensorHook( {"total_loss": acc_total_loss}, every_n_iter=10) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=acc_total_loss, train_op=train_op, training_hooks=[logging_hook1], scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, probabilities, is_real_example): probabilities = probabilities[:, 1] predictions = tf.cast(probabilities > 0.5, tf.int32) accuracy = tf.metrics.accuracy(label_ids, predictions) precision = tf.metrics.precision(label_ids, predictions) recall = tf.metrics.recall(label_ids, predictions) loss = tf.metrics.mean(per_example_loss) return { "eval_accuracy": accuracy, "eval_precision": precision, "eval_recall": recall, "eval_loss": loss, } eval_metrics = (metric_fn, [ per_example_loss, label_ids, probabilities, is_real_example ]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=probabilities, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] #label_mask = features["label_mask"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits, predictsDict) = create_model(bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) predictsDict["input_mask"] = input_mask tvars = tf.trainable_variables() scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, logits, num_labels): # def metric_fn(label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) precision = tf_metrics.precision(label_ids, predictions, num_labels, [1, 2], average="macro") recall = tf_metrics.recall(label_ids, predictions, num_labels, [1, 2], average="macro") f = tf_metrics.f1(label_ids, predictions, num_labels, [1, 2], average="macro") # return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, #"eval_loss": loss, } eval_metrics = (metric_fn, [per_example_loss, label_ids, logits, num_labels]) # eval_metrics = (metric_fn, [label_ids, logits]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.PREDICT: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=predictsDict, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for Estimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) if not is_training and FLAGS.use_trt: trt_graph = get_frozen_tftrt_model(bert_config, input_ids.shape, num_labels, use_one_hot_embeddings, init_checkpoint) (total_loss, per_example_loss, logits, probabilities) = tf.import_graph_def( trt_graph, input_map={ 'input_ids': input_ids, 'input_mask': input_mask, 'segment_ids': segment_ids, 'label_ids': label_ids }, return_elements=[ 'loss/cls_loss:0', 'loss/cls_per_example_loss:0', 'loss/cls_logits:0', 'loss/cls_probabilities:0' ], name='') if mode == tf.estimator.ModeKeys.PREDICT: predictions = {"probabilities": probabilities} output_spec = tf.estimator.EstimatorSpec( mode=mode, predictions=predictions) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) accuracy = tf.metrics.accuracy(labels=label_ids, predictions=predictions) loss = tf.metrics.mean(values=per_example_loss) return { "eval_accuracy": accuracy, "eval_loss": loss, } eval_metric_ops = metric_fn(per_example_loss, label_ids, logits) output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, eval_metric_ops=eval_metric_ops) return output_spec (total_loss, per_example_loss, logits, probabilities) = create_model(bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() initialized_variable_names = {} if init_checkpoint and (hvd is None or hvd.rank() == 0): (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) if FLAGS.verbose_logging: tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, hvd, FLAGS.use_fp16) output_spec = tf.estimator.EstimatorSpec(mode=mode, loss=total_loss, train_op=train_op) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) accuracy = tf.metrics.accuracy(label_ids, predictions) loss = tf.metrics.mean(per_example_loss) return { "eval_accuracy": accuracy, "eval_loss": loss, } eval_metric_ops = metric_fn(per_example_loss, label_ids, logits) output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, eval_metric_ops=eval_metric_ops) else: output_spec = tf.estimator.EstimatorSpec(mode=mode, predictions=probabilities) return output_spec
def model_fn(features, labels, mode, params): tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] output = model.model(hparams=hparams, X=input_ids) loss = tf.reduce_mean( input_tensor=tf.nn.sparse_softmax_cross_entropy_with_logits( labels=input_ids[:, 1:], logits=output["logits"][:, :-1])) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: ( assignment_map, initialized_variable_names, ) = get_assignment_map_from_checkpoint(tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu, optimizer, poly_power, start_warmup_step, use_memory_saving_gradients=use_memory_saving_gradients) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=loss, train_op=train_op, scaffold_fn=scaffold_fn, ) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(loss): """Evaluation metric Fn which runs on CPU.""" perplexity = tf.exp(tf.reduce_mean(loss)) bpc = tf.reduce_mean(loss) / tf.constant(math.log(2)) return { "perplexity": tf.metrics.mean(perplexity), "bpc": tf.metrics.mean(bpc), } if FLAGS.use_tpu: with tf.colocate_with(loss): loss = tf.contrib.tpu.cross_replica_sum(loss) \ / FLAGS.num_tpu_cores metric_loss = tf.tile(tf.reshape(loss, [1, 1]), [FLAGS.eval_batch_size, 1]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=loss, eval_metrics=(metric_fn, [metric_loss]), scaffold_fn=scaffold_fn) # eval_metrics = (metric_fn, {"loss":loss}) # output_spec = tf.contrib.tpu.TPUEstimatorSpec( # mode=mode, # loss=loss, # eval_metrics=eval_metrics, # scaffold_fn=scaffold_fn, # ) else: raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode)) return output_spec
def model_fn(features, labels, mode, params, config=None): # pylint: disable=unused-argument """ The `model_fn` for TPUEstimator. 模型有训练,验证和测试三种阶段,而且对于不同模式,对数据有不同的处理方式。例如在训练阶段,我们需要将数据喂给模型, 模型基于输入数据给出预测值,然后我们在通过预测值和真实值计算出loss,最后用loss更新网络参数, 而在评估阶段,我们则不需要反向传播更新网络参数,换句话说,mdoel_fn需要对三种模式设置三套代码。 Args: features: dict of Tensor, This is batch_features from input_fn,`Tensor` or dict of `Tensor` (depends on data passed to `fit` labels: This is batch_labels from input_fn. features, labels是从输入函数input_fn中返回的特征和标签batch mode: An instance of tf.estimator.ModeKeys params: Additional configuration for hyper-parameters. 是一个字典,它可以传入许多参数用来构建网络或者定义训练方式等 Return: tf.estimator.EstimatorSpec """ print("features={}".format(features)) tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] label_ids = features["label_ids"] if "is_real_example" in features: # 类型强制转换为tf.float32 is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: # 创建一个将所有元素都设置为1的张量Tensor. is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) # 根据mode判断是否为训练模式 is_training = (mode == tf.estimator.ModeKeys.TRAIN) # 基于特征数据创建模型,并计算loss等 print("create_model:\ninput_ids={}".format(input_ids.shape)) print("label_ids={}".format(label_ids.shape)) (total_loss, per_example_loss, logits, probabilities) = create_model(bert_config, is_training, input_ids, None, None, label_ids, num_labels, use_one_hot_embeddings, None) print("total_loss={}".format(total_loss)) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) # 训练模式 if mode == tf.estimator.ModeKeys.TRAIN: if FLAGS.num_gpu_cores > 1: train_op = custom_optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, fp16=fp16) output_spec = tf.estimator.EstimatorSpec(mode=mode, loss=total_loss, train_op=train_op, scaffold=scaffold_fn) else: train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) # 评估模式 elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, logits, is_real_example): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) accuracy = tf.metrics.accuracy(labels=label_ids, predictions=predictions, weights=is_real_example) loss = tf.metrics.mean(values=per_example_loss, weights=is_real_example) # add more metrics pr, pr_op = tf.metrics.precision(labels=label_ids, predictions=predictions, weights=is_real_example) re, re_op = tf.metrics.recall(labels=label_ids, predictions=predictions, weights=is_real_example) # if FLAGS.classifier_mode == "multi-class": # # multi-class # # pr, pr_op = tf_metrics.precision(label_ids, predictions, num_labels, average="macro") # # re, re_op = tf_metrics.recall(label_ids, predictions, num_labels, average="macro") # f1 = tf_metrics.f1(label_ids, predictions, num_labels, average="macro") # else: # # binary classifier # f1 = tf.contrib.metrics.f1_score(label_ids, predictions) # # f1, f1_op = (2 * pr * re) / (pr + re) # f1-score for binary classification # 返回结果:dict: {key: value(tuple: (metric_tensor, update_op)) } return { "eval_accuracy": accuracy, "eval_loss": loss, "eval_precision": (pr, pr_op), "eval_recall": (re, re_op), # "eval_f1": f1, } eval_metrics = (metric_fn, [ per_example_loss, label_ids, logits, is_real_example ]) if FLAGS.num_gpu_cores > 1: output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, eval_metric_ops=metric_fn(per_example_loss, label_ids, logits, is_real_example), scaffold=scaffold_fn, ) else: # eval on single-gpu only output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: # tf.estimator.ModeKeys.PREDICT 预测模式 # 基于logits计算最大的概率所在索引的label predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) if FLAGS.num_gpu_cores > 1: # 多GPUs output_spec = tf.estimator.EstimatorSpec(mode=mode, predictions={ "probabilities": probabilities, "predictions": predictions }) else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions={ "probabilities": probabilities, "predictions": predictions }, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) unique_ids = features["unique_ids"] input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] input_span_mask = features["input_span_mask"] output_span_mask = features["output_span_mask"] source_input_ids = features["source_input_ids"] source_input_mask = features["source_input_mask"] source_segment_ids = features["source_segment_ids"] source_input_span_mask = features["source_input_span_mask"] source_output_span_mask = features["source_output_span_mask"] start_positions = features["start_positions"] end_positions = features["end_positions"] source_start_positions = features["source_start_positions"] source_end_positions = features["source_end_positions"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (start_logits, end_logits, source_raw_start_logits, source_raw_end_logits, target_span_gt_tensor, source_span_gt_tensor) = create_model( bert_config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids, input_span_mask=input_span_mask, output_span_mask=output_span_mask, source_input_ids=source_input_ids, source_input_mask=source_input_mask, source_segment_ids=source_segment_ids, source_input_span_mask=source_input_span_mask, source_output_span_mask=source_output_span_mask, start_positions=start_positions, end_positions=end_positions, source_start_positions=source_start_positions, source_end_positions=source_end_positions, use_one_hot_embeddings=use_one_hot_embeddings) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) # print info tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: seq_length = modeling.get_shape_list(input_ids)[1] def compute_loss(logits, positions): on_hot_pos = tf.one_hot(positions, depth=seq_length, dtype=tf.float32) log_probs = tf.nn.log_softmax(logits, axis=-1) loss = -tf.reduce_mean( tf.reduce_sum(on_hot_pos * log_probs, axis=-1)) return loss def cosine_similarity(tensor1, tensor2): cosine_val = 1 - tf.losses.cosine_distance( tensor1, tensor2, axis=0) return cosine_val start_loss = compute_loss(start_logits, start_positions) end_loss = compute_loss(end_logits, end_positions) main_loss = (start_loss + end_loss) / 2.0 aux_lambda = cosine_similarity(target_span_gt_tensor, source_span_gt_tensor) source_start_loss = compute_loss(source_raw_start_logits, source_start_positions) source_end_loss = compute_loss(source_raw_end_logits, source_end_positions) aux_loss = tf.maximum( 0.0, aux_lambda) * (source_start_loss + source_end_loss) / 2.0 total_loss = main_loss + aux_loss train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.PREDICT: start_logits = tf.nn.log_softmax(start_logits, axis=-1) end_logits = tf.nn.log_softmax(end_logits, axis=-1) predictions = { "unique_ids": unique_ids, "start_logits": start_logits, "end_logits": end_logits, } output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) else: raise ValueError("Only TRAIN and PREDICT modes are supported: %s" % (mode)) return output_spec
def main(): print("print start load the params...") tf.logging.set_verbosity(tf.logging.INFO) tf.gfile.MakeDirs(config["out"]) train_examples_len = config["train_examples_len"] dev_examples_len = config["dev_examples_len"] learning_rate = config["learning_rate"] eval_per_step = config["eval_per_step"] num_labels = config["num_labels"] print(num_labels) num_train_steps = int(train_examples_len / config["train_batch_size"] * config["num_train_epochs"]) print("num_train_steps:", num_train_steps) num_dev_steps = int(dev_examples_len / config["dev_batch_size"]) num_warmup_steps = int(num_train_steps * config["warmup_proportion"]) use_one_hot_embeddings = False is_training = True use_tpu = False seq_len = config["max_seq_len"] init_checkpoint = config["init_checkpoint"] print("print start compile the bert model...") # 定义输入输出 print("{:*^100s}".format("v2")) input_ids_1 = tf.placeholder(tf.int64, shape=[None, seq_len], name='input_ids_1') input_mask_1 = tf.placeholder(tf.int64, shape=[None, seq_len], name='input_mask_1') segment_ids_1 = tf.placeholder(tf.int64, shape=[None, seq_len], name='segment_ids_1') input_ids_2 = tf.placeholder(tf.int64, shape=[None, seq_len], name='input_ids_2') input_mask_2 = tf.placeholder(tf.int64, shape=[None, seq_len], name='input_mask_2') segment_ids_2 = tf.placeholder(tf.int64, shape=[None, seq_len], name='segment_ids_2') labels = tf.placeholder(tf.int64, shape=[ None, ], name='labels') keep_prob = tf.placeholder(tf.float32, name='keep_prob') # , name='is_training' bert_config_ = load_bert_config(config["bert_config"]) (total_loss, per_example_loss, logits, probabilities) = create_model( bert_config_, is_training, input_ids_1, input_mask_1, segment_ids_1, input_ids_2, input_mask_2, segment_ids_2, labels, keep_prob, num_labels, use_one_hot_embeddings) print("{:*^100s}".format("v2")) train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, False) print("print start train the bert model(multi class)...") batch_size = config["train_batch_size"] input_ids_1_train, input_mask_1_train, segment_ids_1_train, input_ids_2_train, input_mask_2_train, \ segment_ids_2_train, labels_train = get_input_data(config["in_1"], seq_len, batch_size) dev_batch_size = config["dev_batch_size"] init_global = tf.global_variables_initializer() saver = tf.train.Saver(tf.global_variables(), max_to_keep=2) # 保存最后top3模型 with tf.Session() as sess: sess.run(init_global) tvars = tf.trainable_variables() initialized_variable_names = {} print("start load the pretrain model") if init_checkpoint: tvars = tf.trainable_variables() print("trainable_variables", len(tvars)) (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) print("initialized_variable_names:", len(initialized_variable_names)) saver_ = tf.train.Saver( [v for v in tvars if v.name in initialized_variable_names]) saver_.restore(sess, init_checkpoint) tvars = tf.global_variables() not_initialized_vars = [ v for v in tvars if v.name not in initialized_variable_names ] tf.logging.info('--all size %s; not initialized size %s' % (len(tvars), len(not_initialized_vars))) if len(not_initialized_vars): sess.run(tf.variables_initializer(not_initialized_vars)) for v in not_initialized_vars: tf.logging.info('--not initialized: %s, shape = %s' % (v.name, v.shape)) else: sess.run(tf.global_variables_initializer()) print("********* bert_multi_class_train start *********") # tf.summary.FileWriter("output/",sess.graph) def train_step(ids_1, mask_1, segment_1, ids_2, mask_2, segment_2, y, step): feed = { input_ids_1: ids_1, input_mask_1: mask_1, segment_ids_1: segment_1, input_ids_2: ids_2, input_mask_2: mask_2, segment_ids_2: segment_2, labels: y, keep_prob: 0.9 } _, out_loss, out_logits, p_ = sess.run( [train_op, total_loss, logits, probabilities], feed_dict=feed) pre = np.argmax(p_, axis=-1) acc = np.sum(np.equal(pre, y)) / len(pre) print("step :{},loss :{}, acc :{}".format(step, out_loss, acc)) return out_loss, pre, y def dev_step(ids_1, mask_1, segment_1, ids_2, mask_2, segment_2, y): feed = { input_ids_1: ids_1, input_mask_1: mask_1, segment_ids_1: segment_1, input_ids_2: ids_2, input_mask_2: mask_2, segment_ids_2: segment_2, labels: y, keep_prob: 1.0 } out_loss, out_logits, p_ = sess.run( [total_loss, logits, probabilities], feed_dict=feed) pre = np.argmax(p_, axis=-1) acc = np.sum(np.equal(pre, y)) / len(pre) print("loss :{}, acc :{}".format(out_loss, acc)) return out_loss, pre, y min_total_loss_dev = 999999 for i in range(num_train_steps): # batch 数据 i += 1 ids_1_train, mask_1_train, segment_1_train, ids_2_train, mask_2_train, segment_2_train, y_train = sess.run( [ input_ids_1_train, input_mask_1_train, segment_ids_1_train, input_ids_2_train, input_mask_2_train, segment_ids_2_train, labels_train ]) train_step(ids_1_train, mask_1_train, segment_1_train, ids_2_train, mask_2_train, segment_2_train, y_train, i) if i % eval_per_step == 0 and i >= config["eval_start_step"]: total_loss_dev = 0 input_ids_1_dev, input_mask_1_dev, segment_ids_1_dev, \ input_ids_2_dev, input_mask_2_dev, segment_ids_2_dev, labels_dev = get_input_data(config["in_2"], seq_len, dev_batch_size) total_pre_dev = [] total_true_dev = [] for j in range(num_dev_steps): # 一个 epoch 的 轮数 ids_1_dev, mask_1_dev, segment_1_dev, ids_2_dev, mask_2_dev, segment_2_dev, y_dev = sess.run( [ input_ids_1_dev, input_mask_1_dev, segment_ids_1_dev, input_ids_2_dev, input_mask_2_dev, segment_ids_2_dev, labels_dev ]) out_loss, pre, y = dev_step(ids_1_dev, mask_1_dev, segment_1_dev, ids_2_dev, mask_2_dev, segment_2_dev, y_dev) total_loss_dev += out_loss total_pre_dev.extend(pre) total_true_dev.extend(y_dev) # print("dev result report:") print(classification_report(total_true_dev, total_pre_dev)) if total_loss_dev < min_total_loss_dev: print("save model:\t%f\t>%f" % (min_total_loss_dev, total_loss_dev)) min_total_loss_dev = total_loss_dev saver.save(sess, config["out"] + 'bert.ckpt', global_step=i) elif i < config["eval_start_step"] and i % 1000 == 0: print("auto saved model.") saver.save(sess, config["out"] + 'bert.ckpt', global_step=i) sess.close() print("remove dropout in predict") tf.reset_default_graph() is_training = False input_ids_1 = tf.placeholder(tf.int64, shape=[None, seq_len], name='input_ids_1') input_mask_1 = tf.placeholder(tf.int64, shape=[None, seq_len], name='input_mask_1') segment_ids_1 = tf.placeholder(tf.int64, shape=[None, seq_len], name='segment_ids_1') input_ids_2 = tf.placeholder(tf.int64, shape=[None, seq_len], name='input_ids_2') input_mask_2 = tf.placeholder(tf.int64, shape=[None, seq_len], name='input_mask_2') segment_ids_2 = tf.placeholder(tf.int64, shape=[None, seq_len], name='segment_ids_2') labels = tf.placeholder(tf.int64, shape=[ None, ], name='labels') keep_prob = tf.placeholder(tf.float32, name='keep_prob') # , name='is_training' bert_config_ = load_bert_config(config["bert_config"]) (total_loss, per_example_loss, logits, probabilities) = create_model( bert_config_, is_training, input_ids_1, input_mask_1, segment_ids_1, input_ids_2, input_mask_2, segment_ids_2, labels, keep_prob, num_labels, use_one_hot_embeddings) init_global = tf.global_variables_initializer() saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) # 保存最后top3模型 try: checkpoint = tf.train.get_checkpoint_state(config["out"]) input_checkpoint = checkpoint.model_checkpoint_path print("[INFO] input_checkpoint:", input_checkpoint) except Exception as e: input_checkpoint = config["out"] print("[INFO] Model folder", config["out"], repr(e)) with tf.Session() as sess: sess.run(init_global) saver.restore(sess, input_checkpoint) saver.save(sess, config["out_1"] + 'bert.ckpt') sess.close()
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] masked_lm_positions = features["masked_lm_positions"] masked_lm_ids = features["masked_lm_ids"] masked_lm_weights = features["masked_lm_weights"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=None, use_one_hot_embeddings=use_one_hot_embeddings) (masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output( bert_config, model.get_sequence_output(), model.get_embedding_table(), masked_lm_positions, masked_lm_ids, masked_lm_weights) total_loss = masked_lm_loss tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) tf.logging.info("**** {} parameters ****".format( np.sum([np.prod(v.shape) for v in tf.trainable_variables()]))) n_predictions = masked_lm_ids.get_shape().as_list()[-1] probs = tf.reshape(masked_lm_log_probs, [-1, n_predictions, bert_config.vocab_size]) masked_lm_predictions = tf.argmax(probs, axis=-1, output_type=tf.int32) correct_prediction = tf.equal(masked_lm_predictions, masked_lm_ids) masked_lm_accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), axis=1) # # with tf.control_dependencies([total_loss]): accuracy = tf.reduce_mean(masked_lm_accuracy) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn, # training_hooks=[LoggingTensorHook({'accuracy': accuracy}, # every_n_iter=FLAGS.iterations_per_loop)] ) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights): """Computes the loss and accuracy of the model.""" tf.summary.scalar("train_accuracy", accuracy) masked_lm_weights = tf.reshape(masked_lm_weights, [-1]) masked_lm_predictions = tf.argmax(masked_lm_log_probs, axis=-1, output_type=tf.int32) masked_lm_ids = tf.reshape(masked_lm_ids, [-1]) masked_lm_accuracy = tf.metrics.accuracy( labels=masked_lm_ids, predictions=masked_lm_predictions, weights=masked_lm_weights) masked_lm_example_loss = tf.reshape(masked_lm_example_loss, [-1]) masked_lm_mean_loss = tf.metrics.mean( values=masked_lm_example_loss, weights=masked_lm_weights) return { "masked_lm_accuracy": masked_lm_accuracy, "masked_lm_loss": masked_lm_mean_loss } eval_metrics = (metric_fn, [ masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights ]) loss_per_seq = tf.reduce_mean(tf.reshape(masked_lm_example_loss, [-1, n_predictions]), axis=1) variables_to_export = [ input_ids, input_mask, masked_lm_positions, masked_lm_ids, masked_lm_weights, loss_per_seq, probs, masked_lm_accuracy, features["seq"] ] output_spec = TPUEstimatorSpec(mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn, evaluation_hooks=[ eval_hook( variables_to_export, FLAGS.output_dir) ]) else: raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode)) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] masked_lm_positions = features["masked_lm_positions"] masked_lm_ids = features["masked_lm_ids"] masked_lm_weights = features["masked_lm_weights"] next_sentence_labels = features["next_sentence_labels"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings, compute_type=tf.float16 if FLAGS.manual_fp16 else tf.float32) (masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output( bert_config, model.get_sequence_output(), model.get_embedding_table(), masked_lm_positions, masked_lm_ids, masked_lm_weights) (next_sentence_loss, next_sentence_example_loss, next_sentence_log_probs) = get_next_sentence_output( bert_config, model.get_pooled_output(), next_sentence_labels) masked_lm_loss = tf.identity(masked_lm_loss, name="mlm_loss") next_sentence_loss = tf.identity(next_sentence_loss, name="nsp_loss") total_loss = masked_lm_loss + next_sentence_loss total_loss = tf.identity(total_loss, name='total_loss') tvars = tf.trainable_variables() initialized_variable_names = {} if init_checkpoint and (hvd is None or hvd.rank() == 0): print("Loading checkpoint", init_checkpoint) (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) if FLAGS.verbose_logging: tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" %d :: name = %s, shape = %s%s", 0 if hvd is None else hvd.rank(), var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, hvd, FLAGS.manual_fp16, FLAGS.use_fp16, FLAGS.num_accumulation_steps, FLAGS.optimizer_type, FLAGS.allreduce_post_accumulation) output_spec = tf.estimator.EstimatorSpec(mode=mode, loss=total_loss, train_op=train_op) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, next_sentence_example_loss, next_sentence_log_probs, next_sentence_labels): """Computes the loss and accuracy of the model.""" masked_lm_log_probs = tf.reshape( masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]]) masked_lm_predictions = tf.argmax(masked_lm_log_probs, axis=-1, output_type=tf.int32) masked_lm_example_loss = tf.reshape(masked_lm_example_loss, [-1]) masked_lm_ids = tf.reshape(masked_lm_ids, [-1]) masked_lm_weights = tf.reshape(masked_lm_weights, [-1]) masked_lm_accuracy = tf.metrics.accuracy( labels=masked_lm_ids, predictions=masked_lm_predictions, weights=masked_lm_weights) masked_lm_mean_loss = tf.metrics.mean( values=masked_lm_example_loss, weights=masked_lm_weights) next_sentence_log_probs = tf.reshape( next_sentence_log_probs, [-1, next_sentence_log_probs.shape[-1]]) next_sentence_predictions = tf.argmax(next_sentence_log_probs, axis=-1, output_type=tf.int32) next_sentence_labels = tf.reshape(next_sentence_labels, [-1]) next_sentence_accuracy = tf.metrics.accuracy( labels=next_sentence_labels, predictions=next_sentence_predictions) next_sentence_mean_loss = tf.metrics.mean( values=next_sentence_example_loss) return { "masked_lm_accuracy": masked_lm_accuracy, "masked_lm_loss": masked_lm_mean_loss, "next_sentence_accuracy": next_sentence_accuracy, "next_sentence_loss": next_sentence_mean_loss, } eval_metric_ops = metric_fn(masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, next_sentence_example_loss, next_sentence_log_probs, next_sentence_labels) output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, eval_metric_ops=eval_metric_ops) else: raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode)) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_real_example = None if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits, probabilities) = create_model(bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, logits, is_real_example): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) accuracy = tf.metrics.accuracy(labels=label_ids, predictions=predictions, weights=is_real_example) loss = tf.metrics.mean(values=per_example_loss, weights=is_real_example) return { "eval_accuracy": accuracy, "eval_loss": loss, } eval_metrics = (metric_fn, [ per_example_loss, label_ids, logits, is_real_example ]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions={"probabilities": probabilities}, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] masked_lm_positions = features["masked_lm_positions"] masked_lm_ids = features["masked_lm_ids"] masked_lm_weights = features["masked_lm_weights"] # Note: We keep this feature name `next_sentence_labels` to be compatible # with the original data created by lanzhzh@. However, in the ALBERT case # it does represent sentence_order_labels. sentence_order_labels = features["next_sentence_labels"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = modeling.AlbertModel( config=albert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) (masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output( albert_config, model.get_sequence_output(), model.get_embedding_table(), masked_lm_positions, masked_lm_ids, masked_lm_weights) (sentence_order_loss, sentence_order_example_loss, sentence_order_log_probs) = get_sentence_order_output( albert_config, model.get_pooled_output(), sentence_order_labels) total_loss = masked_lm_loss + sentence_order_loss tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: tf.logging.info("number of hidden group %d to initialize", albert_config.num_hidden_groups) num_of_initialize_group = 1 if FLAGS.init_from_group0: num_of_initialize_group = albert_config.num_hidden_groups if albert_config.net_structure_type > 0: num_of_initialize_group = albert_config.num_hidden_layers (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint, num_of_initialize_group) if use_tpu: def tpu_scaffold(): for gid in range(num_of_initialize_group): tf.logging.info("initialize the %dth layer", gid) tf.logging.info(assignment_map[gid]) tf.train.init_from_checkpoint(init_checkpoint, assignment_map[gid]) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: for gid in range(num_of_initialize_group): tf.logging.info("initialize the %dth layer", gid) tf.logging.info(assignment_map[gid]) tf.train.init_from_checkpoint(init_checkpoint, assignment_map[gid]) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu, optimizer, poly_power, start_warmup_step) output_spec = contrib_tpu.TPUEstimatorSpec(mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(*args): """Computes the loss and accuracy of the model.""" (masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, sentence_order_example_loss, sentence_order_log_probs, sentence_order_labels) = args[:7] masked_lm_log_probs = tf.reshape( masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]]) masked_lm_predictions = tf.argmax(masked_lm_log_probs, axis=-1, output_type=tf.int32) masked_lm_example_loss = tf.reshape(masked_lm_example_loss, [-1]) masked_lm_ids = tf.reshape(masked_lm_ids, [-1]) masked_lm_weights = tf.reshape(masked_lm_weights, [-1]) masked_lm_accuracy = tf.metrics.accuracy( labels=masked_lm_ids, predictions=masked_lm_predictions, weights=masked_lm_weights) masked_lm_mean_loss = tf.metrics.mean( values=masked_lm_example_loss, weights=masked_lm_weights) metrics = { "masked_lm_accuracy": masked_lm_accuracy, "masked_lm_loss": masked_lm_mean_loss, } sentence_order_log_probs = tf.reshape( sentence_order_log_probs, [-1, sentence_order_log_probs.shape[-1]]) sentence_order_predictions = tf.argmax( sentence_order_log_probs, axis=-1, output_type=tf.int32) sentence_order_labels = tf.reshape(sentence_order_labels, [-1]) sentence_order_accuracy = tf.metrics.accuracy( labels=sentence_order_labels, predictions=sentence_order_predictions) sentence_order_mean_loss = tf.metrics.mean( values=sentence_order_example_loss) metrics.update({ "sentence_order_accuracy": sentence_order_accuracy, "sentence_order_loss": sentence_order_mean_loss }) return metrics metric_values = [ masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, sentence_order_example_loss, sentence_order_log_probs, sentence_order_labels ] eval_metrics = (metric_fn, metric_values) output_spec = contrib_tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode)) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_sequence = features["input_sequence"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] edit_sequence = features["edit_sequence"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits, probabilities) = gec_create_model( bert_config, is_training, input_sequence, input_mask, segment_ids, edit_sequence, use_one_hot_embeddings, mode, copy_weight, use_bert_more, insert_ids, multitoken_insert_ids, subtract_replaced_from_replacement) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) # tf.logging.info("**** Trainable Variables ****") # for var in tvars: # init_string = "" # if var.name in initialized_variable_names: # init_string = ", *INIT_FROM_CKPT*" # tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, edit_sequence, logits): predictions = tf.argmax(logits[:,:,3:], axis=-1, output_type=tf.int32) + 3 mask = tf.equal(edit_sequence,0) mask = tf.logical_or(mask, tf.equal(edit_sequence,1)) mask = tf.logical_or(mask, tf.equal(edit_sequence,2)) mask = tf.logical_or(mask, tf.equal(edit_sequence,3)) mask = tf.to_float(tf.logical_not(mask)) accuracy = tf.metrics.accuracy(edit_sequence, predictions, mask) loss = tf.metrics.mean(per_example_loss) result_dict = {} result_dict["eval_accuracy"] = accuracy result_dict["eval_loss"] = loss return { "eval_accuracy": accuracy, "eval_loss": loss, } eval_metrics = (metric_fn, [per_example_loss, edit_sequence, logits]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: #first three edit ids unk, sos, eos are dummy. We do not consider them in predictions predictions = tf.argmax(logits[:,:,3:], axis=-1, output_type=tf.int32) + 3 output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions={"predictions": predictions, "logits":logits}, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_real_example = None if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits, probabilities) = create_model(bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, use_one_hot_embeddings, fp16) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) is_multi_gpu = use_gpu and int(num_gpu_cores) >= 2 if mode == tf.estimator.ModeKeys.TRAIN: if is_multi_gpu: train_op = custom_optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, fp16=fp16) output_spec = tf.estimator.EstimatorSpec(mode=mode, loss=total_loss, train_op=train_op, scaffold=scaffold_fn) else: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu, fp16=fp16) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, probabilities, is_real_example): probs_split = tf.split(probabilities, NUM_LABELS, axis=-1) preds_split = tf.cast( tf.greater(probs_split, tf.constant(0.5)), tf.int32) label_ids_split = tf.split(label_ids, NUM_LABELS, axis=-1) eval_dict = dict() for idx in range(NUM_LABELS): label_ids_spec = label_ids_split[idx] preds_spec = preds_split[idx] acc, acc_op = tf.metrics.accuracy(labels=label_ids_spec, predictions=preds_spec, weights=is_real_example) pr, pr_op = tf.metrics.precision(labels=label_ids_spec, predictions=preds_spec, weights=is_real_example) re, re_op = tf.metrics.recall(labels=label_ids_spec, predictions=preds_spec, weights=is_real_example) f1 = (2 * pr * re) / (pr + re) # f1-score eval_dict["eval_accuracy_" + str(idx)] = (acc, acc_op) eval_dict["eval_precision_" + str(idx)] = (pr, pr_op) eval_dict["eval_recall_" + str(idx)] = (re, re_op) eval_dict["eval_f1score_" + str(idx)] = (f1, tf.identity(f1)) eval_dict["eval_loss"] = tf.metrics.mean( values=per_example_loss, weights=is_real_example) # loss = tf.metrics.mean(values=per_example_loss, weights=is_real_example) return eval_dict eval_metrics = (metric_fn, [ per_example_loss, label_ids, probabilities, is_real_example ]) # eval on single-gpu only output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: if is_multi_gpu: output_spec = tf.estimator.EstimatorSpec( mode=mode, predictions={"probabilities": probabilities}) else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions={"probabilities": probabilities}, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] masked_lm_positions = features["masked_lm_positions"] masked_lm_ids = features["masked_lm_ids"] masked_lm_weights = features["masked_lm_weights"] next_sentence_labels = features["next_sentence_labels"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) (masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output( bert_config, model.get_sequence_output(), model.get_embedding_table(), masked_lm_positions, masked_lm_ids, masked_lm_weights) (next_sentence_loss, next_sentence_example_loss, next_sentence_log_probs) = get_next_sentence_output( bert_config, model.get_pooled_output(), next_sentence_labels) total_loss = masked_lm_loss + next_sentence_loss tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, next_sentence_example_loss, next_sentence_log_probs, next_sentence_labels): """Computes the loss and accuracy of the model.""" masked_lm_log_probs = tf.reshape(masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]]) masked_lm_predictions = tf.argmax( masked_lm_log_probs, axis=-1, output_type=tf.int32) masked_lm_example_loss = tf.reshape(masked_lm_example_loss, [-1]) masked_lm_ids = tf.reshape(masked_lm_ids, [-1]) masked_lm_weights = tf.reshape(masked_lm_weights, [-1]) masked_lm_accuracy = tf.metrics.accuracy( labels=masked_lm_ids, predictions=masked_lm_predictions, weights=masked_lm_weights) masked_lm_mean_loss = tf.metrics.mean( values=masked_lm_example_loss, weights=masked_lm_weights) next_sentence_log_probs = tf.reshape( next_sentence_log_probs, [-1, next_sentence_log_probs.shape[-1]]) next_sentence_predictions = tf.argmax( next_sentence_log_probs, axis=-1, output_type=tf.int32) next_sentence_labels = tf.reshape(next_sentence_labels, [-1]) next_sentence_accuracy = tf.metrics.accuracy( labels=next_sentence_labels, predictions=next_sentence_predictions) next_sentence_mean_loss = tf.metrics.mean( values=next_sentence_example_loss) return { "masked_lm_accuracy": masked_lm_accuracy, "masked_lm_loss": masked_lm_mean_loss, "next_sentence_accuracy": next_sentence_accuracy, "next_sentence_loss": next_sentence_mean_loss, } eval_metrics = (metric_fn, [ masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, next_sentence_example_loss, next_sentence_log_probs, next_sentence_labels ]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode)) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits) = create_model( bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, use_one_hot_embeddings) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, logits): # Display labels and predictions concat1 = tf.contrib.metrics.streaming_concat(logits) concat2 = tf.contrib.metrics.streaming_concat(label_ids) # Compute Pearson correlation pearson = tf.contrib.metrics.streaming_pearson_correlation(logits, label_ids) # Compute MSE # mse = tf.metrics.mean(per_example_loss) mse = tf.metrics.mean_squared_error(label_ids, logits) # Compute Spearman correlation size = tf.size(logits) indice_of_ranks_pred = tf.nn.top_k(logits, k=size)[1] indice_of_ranks_label = tf.nn.top_k(label_ids, k=size)[1] rank_pred = tf.nn.top_k(-indice_of_ranks_pred, k=size)[1] rank_label = tf.nn.top_k(-indice_of_ranks_label, k=size)[1] rank_pred = tf.to_float(rank_pred) rank_label = tf.to_float(rank_label) spearman = tf.contrib.metrics.streaming_pearson_correlation(rank_pred, rank_label) return {'pred': concat1, 'label_ids': concat2, 'pearson': pearson, 'spearman': spearman, 'MSE': mse} eval_metrics = (metric_fn, [per_example_loss, label_ids, logits]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=logits, scaffold_fn=scaffold_fn) return output_spec
return loss # get the max prob for the predicted start/end position start_probs = tf.nn.softmax(start_logits, axis=-1) start_prob = tf.reduce_max(start_probs, axis=-1) end_probs = tf.nn.softmax(end_logits, axis=-1) end_prob = tf.reduce_max(end_probs, axis=-1) start_loss = compute_loss(start_logits, start_positions) end_loss = compute_loss(end_logits, end_positions) total_loss = (start_loss + end_loss) / 2.0 tf.summary.scalar('total_loss', total_loss) if FLAGS.do_train: train_op = optimization.create_optimizer(total_loss, FLAGS.learning_rate, num_train_steps, num_warmup_steps, False) print("***** Running training *****") print(" Num orig examples = %d", len(train_examples)) print(" Num train_features = %d", len(train_features)) print(" Batch size = %d", FLAGS.train_batch_size) print(" Num steps = %d", num_train_steps) merged_summary_op = tf.summary.merge_all() RawResult = collections.namedtuple("RawResult", ["unique_id", "start_logits", "end_logits"]) saver = tf.train.Saver() # Initializing the variables init = tf.global_variables_initializer() tf.get_default_graph().finalize()
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument from tensorflow.python.estimator.model_fn import EstimatorSpec tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits, probabilities) = BertSim.create_model(bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() initialized_variable_names = {} if init_checkpoint: (assignment_map, initialized_variable_names) \ = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, False) output_spec = EstimatorSpec(mode=mode, loss=total_loss, train_op=train_op) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) accuracy = tf.metrics.accuracy(label_ids, predictions) auc = tf.metrics.auc(label_ids, predictions) loss = tf.metrics.mean(per_example_loss) return { "eval_accuracy": accuracy, "eval_auc": auc, "eval_loss": loss, } eval_metrics = metric_fn(per_example_loss, label_ids, logits) output_spec = EstimatorSpec(mode=mode, loss=total_loss, eval_metric_ops=eval_metrics) else: output_spec = EstimatorSpec(mode=mode, predictions=probabilities) return output_spec