def model_fn(features, labels, mode, params): unique_id = features["unique_id"] input_ids = features["input_ids"] input_mask = features["input_mask"] input_type_ids = features["input_type_ids"] tokens = features["tokens"] model = modeling.BertModel( config=bert_config, is_training=False, input_ids=input_ids, input_mask=input_mask, token_type_ids=input_type_ids, use_one_hot_embeddings=False) if mode != tf.estimator.ModeKeys.PREDICT: raise ValueError("Only PREDICT modes are supported: %s" % (mode)) tvars = tf.trainable_variables() scaffold_fn = None (assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) all_layers = model.get_all_encoder_layers() predictions = { "unique_id": unique_id, "tokens": tokens, } for (i, layer_index) in enumerate(layer_indexes): predictions["layer_output_%d" % i] = all_layers[layer_index] output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" unique_ids = features["unique_ids"] input_ids = features["input_ids"] input_mask = features["input_mask"] input_type_ids = features["input_type_ids"] model = modeling.BertModel( config=bert_config, is_training=False, input_ids=input_ids, input_mask=input_mask, token_type_ids=input_type_ids, use_one_hot_embeddings=use_one_hot_embeddings) if mode != tf.estimator.ModeKeys.PREDICT: raise ValueError("Only PREDICT modes are supported: %s" % (mode)) tvars = tf.trainable_variables() scaffold_fn = None (assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) all_layers = model.get_all_encoder_layers() predictions = { "unique_id": unique_ids, } for (i, layer_index) in enumerate(layer_indexes): predictions["layer_output_%d" % i] = all_layers[layer_index] output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_real_example = None if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits, probabilities) = create_model(bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, logits, is_real_example): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) accuracy = tf.metrics.accuracy(labels=label_ids, predictions=predictions, weights=is_real_example) loss = tf.metrics.mean(values=per_example_loss, weights=is_real_example) return { "eval_accuracy": accuracy, "eval_loss": loss, } eval_metrics = (metric_fn, [ per_example_loss, label_ids, logits, is_real_example ]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions={"probabilities": probabilities}, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] masked_lm_positions = features["masked_lm_positions"] masked_lm_ids = features["masked_lm_ids"] masked_lm_weights = features["masked_lm_weights"] next_sentence_labels = features["next_sentence_labels"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) (masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output( bert_config, model.get_sequence_output(), model.get_embedding_table(), masked_lm_positions, masked_lm_ids, masked_lm_weights) (next_sentence_loss, next_sentence_example_loss, next_sentence_log_probs) = get_next_sentence_output( bert_config, model.get_pooled_output(), next_sentence_labels) total_loss = masked_lm_loss + next_sentence_loss tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, next_sentence_example_loss, next_sentence_log_probs, next_sentence_labels): """Computes the loss and accuracy of the model.""" masked_lm_log_probs = tf.reshape(masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]]) masked_lm_predictions = tf.argmax( masked_lm_log_probs, axis=-1, output_type=tf.int32) masked_lm_example_loss = tf.reshape(masked_lm_example_loss, [-1]) masked_lm_ids = tf.reshape(masked_lm_ids, [-1]) masked_lm_weights = tf.reshape(masked_lm_weights, [-1]) masked_lm_accuracy = tf.metrics.accuracy( labels=masked_lm_ids, predictions=masked_lm_predictions, weights=masked_lm_weights) masked_lm_mean_loss = tf.metrics.mean( values=masked_lm_example_loss, weights=masked_lm_weights) next_sentence_log_probs = tf.reshape( next_sentence_log_probs, [-1, next_sentence_log_probs.shape[-1]]) next_sentence_predictions = tf.argmax( next_sentence_log_probs, axis=-1, output_type=tf.int32) next_sentence_labels = tf.reshape(next_sentence_labels, [-1]) next_sentence_accuracy = tf.metrics.accuracy( labels=next_sentence_labels, predictions=next_sentence_predictions) next_sentence_mean_loss = tf.metrics.mean( values=next_sentence_example_loss) return { "masked_lm_accuracy": masked_lm_accuracy, "masked_lm_loss": masked_lm_mean_loss, "next_sentence_accuracy": next_sentence_accuracy, "next_sentence_loss": next_sentence_mean_loss, } eval_metrics = (metric_fn, [ masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, next_sentence_example_loss, next_sentence_log_probs, next_sentence_labels ]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode)) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] masked_lm_positions = features["masked_lm_positions"] masked_lm_ids = features["masked_lm_ids"] masked_lm_weights = features["masked_lm_weights"] next_sentence_labels = features["next_sentence_labels"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) (masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output( bert_config, model.get_sequence_output(), model.get_embedding_table(), masked_lm_positions, masked_lm_ids, masked_lm_weights) (next_sentence_loss, next_sentence_example_loss, next_sentence_log_probs) = get_next_sentence_output( bert_config, model.get_pooled_output(), next_sentence_labels) total_loss = masked_lm_loss + next_sentence_loss tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, next_sentence_example_loss, next_sentence_log_probs, next_sentence_labels): """Computes the loss and accuracy of the model.""" masked_lm_log_probs = tf.reshape( masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]]) masked_lm_predictions = tf.argmax(masked_lm_log_probs, axis=-1, output_type=tf.int32) masked_lm_example_loss = tf.reshape(masked_lm_example_loss, [-1]) masked_lm_ids = tf.reshape(masked_lm_ids, [-1]) masked_lm_weights = tf.reshape(masked_lm_weights, [-1]) masked_lm_accuracy = tf.metrics.accuracy( labels=masked_lm_ids, predictions=masked_lm_predictions, weights=masked_lm_weights) masked_lm_mean_loss = tf.metrics.mean( values=masked_lm_example_loss, weights=masked_lm_weights) next_sentence_log_probs = tf.reshape( next_sentence_log_probs, [-1, next_sentence_log_probs.shape[-1]]) next_sentence_predictions = tf.argmax(next_sentence_log_probs, axis=-1, output_type=tf.int32) next_sentence_labels = tf.reshape(next_sentence_labels, [-1]) next_sentence_accuracy = tf.metrics.accuracy( labels=next_sentence_labels, predictions=next_sentence_predictions) next_sentence_mean_loss = tf.metrics.mean( values=next_sentence_example_loss) return { "masked_lm_accuracy": masked_lm_accuracy, "masked_lm_loss": masked_lm_mean_loss, "next_sentence_accuracy": next_sentence_accuracy, "next_sentence_loss": next_sentence_mean_loss, } eval_metrics = (metric_fn, [ masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, next_sentence_example_loss, next_sentence_log_probs, next_sentence_labels ]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode)) return output_spec
def create_model(bert_config, num_labels, max_seq_length, sess, init_checkpoint=None, use_GPU=False, label_smoothing=0.0, cycle=1): """Creates a classification model.""" GPUs = get_available_gpus() defalut_device = '/cpu:0' if use_GPU and len(GPUs) != 0: defalut_device = '/gpu:{}'.format(GPUs[0]) # Place all ops on CPU by default with tf.device(defalut_device): tower_grads = [] loss_list = [] logits_list = [] probabilities_list = [] train_op = None loss = None logits = None probabilities = None global_step = tf.train.get_or_create_global_step() # input placeholder _input_ids = tf.placeholder(tf.int64, shape=(None, max_seq_length)) _input_mask = tf.placeholder(tf.int64, shape=(None, max_seq_length)) _segment_ids = tf.placeholder(tf.int64, shape=(None, max_seq_length)) _label_ids = tf.placeholder(tf.int64, shape=None) _sample_weight = tf.placeholder(tf.float32, shape=None) _output_dropout_keep_prob = tf.placeholder(tf.float32, shape=None) _hidden_dropout_prob = tf.placeholder(tf.float32, shape=None) _attention_probs_dropout_prob = tf.placeholder(tf.float32, shape=None) # optimizer placeholder _learning_rate = tf.placeholder(tf.float32, shape=None) _num_train_steps = tf.placeholder(tf.int32, shape=None) _num_warmup_steps = tf.placeholder(tf.int32, shape=None) _batch_size = tf.placeholder(tf.int32, shape=None) # feed dict feed_dict = { 'input_ids': _input_ids, 'input_mask': _input_mask, 'segment_ids': _segment_ids, 'label_ids': _label_ids, 'sample_weight': _sample_weight, 'output_dropout_keep_prob': _output_dropout_keep_prob, 'hidden_dropout_prob': _hidden_dropout_prob, 'attention_probs_dropout_prob': _attention_probs_dropout_prob, 'learning_rate': _learning_rate, 'num_train_steps': _num_train_steps, 'num_warmup_steps': _num_warmup_steps, 'batch_size': _batch_size } optimizer = optimization.create_optimizer( _learning_rate, tf.cast((_num_train_steps / cycle), tf.int32), _num_warmup_steps) if use_GPU: batch_size = tf.to_int32(_batch_size / len(GPUs)) for i in range(len(GPUs)): # with tf.device(assign_to_device('/gpu:{}'.format(GPUs[i]), ps_device='/gpu:0')): with tf.device('/gpu:{}'.format(GPUs[i])): # split input data for every gpu device. with tf.name_scope("input_slice"): input_ids = _input_ids[i * batch_size:(i + 1) * batch_size] input_mask = _input_mask[i * batch_size:(i + 1) * batch_size] segment_ids = _segment_ids[i * batch_size:(i + 1) * batch_size] label_ids = _label_ids[i * batch_size:(i + 1) * batch_size] sample_weight = _sample_weight[i * batch_size:(i + 1) * batch_size] # build model model = modeling.BertModel( config=bert_config, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, hidden_dropout_prob=_hidden_dropout_prob, attention_probs_dropout_prob= _attention_probs_dropout_prob, scope="bert") # If you want to use the token-level output, use model.get_sequence_output() instead. output_layer = model.get_pooled_output() hidden_size = output_layer.shape[-1].value with tf.variable_scope("output", reuse=tf.AUTO_REUSE): output_weights = tf.get_variable( "output_weights", [num_labels, hidden_size], initializer=tf.truncated_normal_initializer( stddev=0.02)) output_bias = tf.get_variable( "output_bias", [num_labels], initializer=tf.zeros_initializer()) with tf.variable_scope("loss"): # I.e., 0.1 dropout output_layer = tf.nn.dropout( output_layer, keep_prob=_output_dropout_keep_prob) logits_ = tf.matmul(output_layer, output_weights, transpose_b=True) logits_ = tf.nn.bias_add(logits_, output_bias) probabilities_ = tf.nn.softmax(logits_, axis=-1) one_hot_labels = tf.one_hot(label_ids, depth=num_labels, dtype=tf.float32) loss_ = tf.losses.softmax_cross_entropy( one_hot_labels, logits_, weights=sample_weight, label_smoothing=label_smoothing) grads_ = optimizer.compute_gradients(loss_) tower_grads.append(grads_) loss_list.append(loss_) logits_list.append(logits_) probabilities_list.append(probabilities_) loss = tf.reduce_mean(loss_list) if len(GPUs) == 1: logits = tf.squeeze(logits_list, [0]) probabilities = tf.squeeze(probabilities_list, [0]) else: logits = tf.keras.layers.concatenate(logits_list, axis=0) probabilities = tf.keras.layers.concatenate(probabilities_list, axis=0) # Merge grads with tf.name_scope("merge_grads"): grads = average_gradients(tower_grads) capped_gvs = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in grads] train_op = optimizer.apply_gradients(capped_gvs, global_step=global_step) else: # build model model = modeling.BertModel( config=bert_config, input_ids=_input_ids, input_mask=_input_mask, token_type_ids=_segment_ids, hidden_dropout_prob=_hidden_dropout_prob, attention_probs_dropout_prob=_attention_probs_dropout_prob, scope="bert") # If you want to use the token-level output, use model.get_sequence_output() instead. output_layer = model.get_pooled_output() hidden_size = output_layer.shape[-1].value with tf.variable_scope("output", reuse=tf.AUTO_REUSE): output_weights = tf.get_variable( "output_weights", [num_labels, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable( "output_bias", [num_labels], initializer=tf.zeros_initializer()) with tf.variable_scope("loss"): # I.e., 0.1 dropout output_layer = tf.nn.dropout( output_layer, keep_prob=_output_dropout_keep_prob) logits = tf.matmul(output_layer, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) probabilities = tf.nn.softmax(logits, axis=-1) one_hot_labels = tf.one_hot(_label_ids, depth=num_labels, dtype=tf.float32) loss = tf.losses.softmax_cross_entropy( one_hot_labels, logits, weights=_sample_weight, label_smoothing=label_smoothing) with tf.name_scope("merge_grads"): grads = optimizer.compute_gradients(loss) capped_gvs = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in grads] train_op = optimizer.apply_gradients(capped_gvs, global_step=global_step) # initial model's variables. tf.logging.info("Load model checkpoint : %s" % init_checkpoint) tvars = tf.trainable_variables() if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) init_op = tf.global_variables_initializer() sess.run(init_op) # # print variables # tf.logging.info("**** Trainable Variables ****") # for var in tvars: # init_string = "" # if var.name in initialized_variable_names: # init_string = ", *INIT_FROM_CKPT*" # tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, # init_string) # attention_probs = model.get_all_layer_attention_probs() # return (train_op, loss, logits, probabilities, feed_dict, attention_probs) return (train_op, loss, logits, probabilities, feed_dict)
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) batch_size = modeling.get_shape_list(input_ids)[0] masked_lm_positions = tf.constant([ sorted( random.sample(range(1, FLAGS.max_seq_length - 2), FLAGS.max_predictions_per_seq)) for i in range(batch_size) ]) masks_list = tf.constant([MASK_ID] * (FLAGS.max_predictions_per_seq * batch_size)) masked_lm_weights = tf.multiply( tf.ones(modeling.get_shape_list(masked_lm_positions)), tf.cast(gather_indexes_rank2(input_mask, masked_lm_positions), tf.float32)) masked_input_ids = replace_elements_by_indices(input_ids, masks_list, masked_lm_positions) masked_input_ids = tf.multiply(masked_input_ids, input_mask) masked_lm_ids = gather_indexes_rank2(input_ids, masked_lm_positions) model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=masked_input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings, train_pooler=False) (masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output( bert_config, model.get_sequence_output(), model.get_embedding_table(), masked_lm_positions, masked_lm_ids, masked_lm_weights) model_summary() total_loss = masked_lm_loss tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: if FLAGS.opt == 'lamb': train_op = optimization.create_lamb_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu, weight_decay=0.01) elif FLAGS.opt == 'adam': train_op = optimization.create_adam_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu, weight_decay=0.01) else: sys.exit(FLAGS.opt, 'does not exist.') output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) """ tf.profiler.profile( tf.get_default_graph(), options=tf.profiler.ProfileOptionBuilder.float_operation()) """ elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, next_sentence_example_loss, next_sentence_log_probs, next_sentence_labels): """Computes the loss and accuracy of the model.""" masked_lm_log_probs = tf.reshape( masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]]) masked_lm_predictions = tf.argmax(masked_lm_log_probs, axis=-1, output_type=tf.int32) masked_lm_example_loss = tf.reshape(masked_lm_example_loss, [-1]) masked_lm_ids = tf.reshape(masked_lm_ids, [-1]) masked_lm_weights = tf.reshape(masked_lm_weights, [-1]) masked_lm_accuracy = tf.metrics.accuracy( labels=masked_lm_ids, predictions=masked_lm_predictions, weights=masked_lm_weights) masked_lm_mean_loss = tf.metrics.mean( values=masked_lm_example_loss, weights=masked_lm_weights) next_sentence_log_probs = tf.reshape( next_sentence_log_probs, [-1, next_sentence_log_probs.shape[-1]]) next_sentence_predictions = tf.argmax(next_sentence_log_probs, axis=-1, output_type=tf.int32) next_sentence_labels = tf.reshape(next_sentence_labels, [-1]) next_sentence_accuracy = tf.metrics.accuracy( labels=next_sentence_labels, predictions=next_sentence_predictions) next_sentence_mean_loss = tf.metrics.mean( values=next_sentence_example_loss) return { "masked_lm_accuracy": masked_lm_accuracy, "masked_lm_loss": masked_lm_mean_loss, "next_sentence_accuracy": next_sentence_accuracy, "next_sentence_loss": next_sentence_mean_loss, } eval_metrics = (metric_fn, [ masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights ]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode)) return output_spec
def model_fn(features, labels, mode, params): tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] label_mask = features["label_mask"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits, predicts) = create_model( bert_config, is_training, input_ids, input_mask, label_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) hook_dict = {} hook_dict['loss'] = total_loss hook_dict['global_steps'] = tf.train.get_or_create_global_step() logging_hook = tf.train.LoggingTensorHook(hook_dict, every_n_iter=200) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn, training_hooks=[logging_hook]) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, logits): # def metric_fn(label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) # labels = [] # for i, x in enumerate() predict_labels = [] # for i in range(1, num_labels - 4): # predict_labels.append(i) # precision = tf_metrics.precision(label_ids, predictions, num_labels, predict_labels, average="macro") # recall = tf_metrics.recall(label_ids, predictions, num_labels, predict_labels, average="macro") # f = tf_metrics.f1(label_ids, predictions, num_labels, predict_labels, average="macro") precision = tf_metrics.precision(label_ids, predictions, num_labels, average="macro") recall = tf_metrics.recall(label_ids, predictions, num_labels, average="macro") f = tf_metrics.f1(label_ids, predictions, num_labels, average="macro") # return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, # "eval_loss": loss, } eval_metrics = (metric_fn, [per_example_loss, label_ids, logits]) # eval_metrics = (metric_fn, [label_ids, logits]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=predicts, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_real_example = None if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) loss_type = ft_params[1] print("") tf.logging.info("Using loss type:%s" % (loss_type)) (total_loss, per_example_loss, log_probs, probabilities) = create_model( bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, ft_params) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, log_probs, is_real_example): predictions = tf.argmax(log_probs, axis=-1, output_type=tf.int32) accuracy = tf.compat.v1.metrics.accuracy(labels=label_ids, predictions=predictions, weights=is_real_example) loss = tf.compat.v1.metrics.mean(values=per_example_loss, weights=is_real_example) f1_score = tf.contrib.metrics.f1_score(label_ids, predictions) auc = tf.compat.v1.metrics.auc(label_ids, predictions) recall = tf.compat.v1.metrics.recall(label_ids, predictions) precision = tf.compat.v1.metrics.precision(label_ids, predictions) true_pos = tf.compat.v1.metrics.true_positives(label_ids, predictions) true_neg = tf.compat.v1.metrics.true_negatives(label_ids, predictions) false_pos = tf.compat.v1.metrics.false_positives(label_ids, predictions) false_neg = tf.compat.v1.metrics.false_negatives(label_ids, predictions) return { "eval_accuracy": accuracy, "eval_loss": loss, "F1_Score": f1_score, "auc": auc, "precision": precision, "recall": recall, "true_positives": true_pos, "true_negatives": true_neg, "false_positives": false_pos, "false_negatives": false_neg } eval_metrics = (metric_fn, [per_example_loss, label_ids, log_probs, is_real_example]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions={"probabilities": probabilities}, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" """ features: This is batch_features from input_fn labels: This is batch_labels from input_fn mode: An instance of tf.estimator.ModeKeys params: Additional configuration """ tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_sequence = features["input_sequence"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] edit_sequence = features["edit_sequence"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits, probabilities) = gec_create_model( bert_config, is_training, input_sequence, input_mask, segment_ids, edit_sequence, use_one_hot_embeddings, mode, copy_weight, use_bert_more, insert_ids, multitoken_insert_ids, subtract_replaced_from_replacement) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: # 如果初始化检查点文件 (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: if FLAGS.use_tpu and FLAGS.tpu_name: # TPU train train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) else: # GPUs or CPU train train_op = custom_optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps) output_spec = tf.estimator.EstimatorSpec(mode=mode, loss=total_loss, train_op=train_op) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, edit_sequence, logits): predictions = tf.argmax( logits[:, :, 3:], axis=-1, output_type=tf.int32) + 3 mask = tf.equal(edit_sequence, 0) mask = tf.logical_or(mask, tf.equal(edit_sequence, 1)) mask = tf.logical_or(mask, tf.equal(edit_sequence, 2)) mask = tf.logical_or(mask, tf.equal(edit_sequence, 3)) mask = tf.to_float(tf.logical_not(mask)) accuracy = tf.metrics.accuracy(edit_sequence, predictions, mask) loss = tf.metrics.mean(per_example_loss) result_dict = {} result_dict["eval_accuracy"] = accuracy result_dict["eval_loss"] = loss return { "eval_accuracy": accuracy, "eval_loss": loss, } eval_metrics = (metric_fn, [per_example_loss, edit_sequence, logits]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: # first three edit ids unk, sos, eos are dummy. We do not consider them in predictions predictions = tf.argmax( logits[:, :, 3:], axis=-1, output_type=tf.int32) + 3 if FLAGS.use_tpu and FLAGS.tpu_name: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions={ "predictions": predictions, "logits": logits }, scaffold_fn=scaffold_fn) else: # multiple GPUs output_spec = tf.estimator.EstimatorSpec(mode=mode, predictions={ "predictions": predictions, "logits": logits }) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] # is_real_example = None # if "is_real_example" in features: # is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) # else: # is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) # 使用参数构建模型,input_idx 就是输入的样本idx表示,label_ids 就是标签的idx表示 if FLAGS.task_name.lower() == 'ner': (total_loss, per_example_loss, logits, predicts) = create_model( bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) else: (total_loss, per_example_loss, logits, probabilities) = create_model( bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None # 加载BERT模型 if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) # 加载的模型参数 tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: ### 词性标注问题评价函数设计 if FLAGS.task_name.lower() == 'ner': def metric_fn(per_example_loss, label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) precision = tf_metrics.precision(label_ids, predictions, 11, [2, 3, 4, 5, 6, 7], average="macro") recall = tf_metrics.recall(label_ids, predictions, 11, [2, 3, 4, 5, 6, 7], average="macro") f = tf_metrics.f1(label_ids, predictions, 11, [2, 3, 4, 5, 6, 7], average="macro") return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, } eval_metrics = (metric_fn, [per_example_loss, label_ids, logits]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: def metric_fn(per_example_loss, label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) accuracy = tf.metrics.accuracy( labels=label_ids, predictions=predictions) loss = tf.metrics.mean(values=per_example_loss) return { "eval_accuracy": accuracy, "eval_loss": loss, } eval_metrics = (metric_fn, [per_example_loss, label_ids, logits]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: if FLAGS.task_name.lower() == 'ner': output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=predicts, scaffold_fn=scaffold_fn ) else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions={"probabilities": probabilities}, scaffold_fn=scaffold_fn) return output_spec
def optimize_graph(logger=None, verbose=False): if not logger: logger = set_logger(colored('BERT_VEC', 'yellow'), verbose) try: # we don't need GPU for optimizing the graph from tensorflow.python.tools.optimize_for_inference_lib import optimize_for_inference tf.gfile.MakeDirs(args.output_dir_sim) tf.gfile.MakeDirs(args.output_dir) config_fp = args.config_name logger.info('model config: %s' % config_fp) # 加载bert配置文件 with tf.gfile.GFile(config_fp, 'r') as f: bert_config = modeling.BertConfig.from_dict(json.load(f)) logger.info('build graph...') # input placeholders, not sure if they are friendly to XLA input_ids = tf.placeholder(tf.int32, (None, args.max_seq_len), 'input_ids') input_mask = tf.placeholder(tf.int32, (None, args.max_seq_len), 'input_mask') input_type_ids = tf.placeholder(tf.int32, (None, args.max_seq_len), 'input_type_ids') jit_scope = tf.contrib.compiler.jit.experimental_jit_scope with jit_scope(): input_tensors = [input_ids, input_mask, input_type_ids] model = modeling.BertModel(config=bert_config, is_training=False, input_ids=input_ids, input_mask=input_mask, token_type_ids=input_type_ids, use_one_hot_embeddings=False) # 获取所有要训练的变量 tvars = tf.trainable_variables() init_checkpoint = args.ckpt_name (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) # 共享卷积核 with tf.variable_scope("pooling"): # 如果只有一层,就只取对应那一层的weight if len(args.layer_indexes) == 1: encoder_layer = model.all_encoder_layers[ args.layer_indexes[0]] else: # 否则遍历需要取的层,把所有层的weight取出来并拼接起来shape:768*层数 all_layers = [ model.all_encoder_layers[l] for l in args.layer_indexes ] encoder_layer = tf.concat(all_layers, -1) mul_mask = lambda x, m: x * tf.expand_dims(m, axis=-1) masked_reduce_mean = lambda x, m: tf.reduce_sum( mul_mask(x, m), axis=1) / (tf.reduce_sum( m, axis=1, keepdims=True) + 1e-10) input_mask = tf.cast(input_mask, tf.float32) # 以下代码是句向量的生成方法,可以理解为做了一个卷积的操作,但是没有把结果相加, 卷积核是input_mask pooled = masked_reduce_mean(encoder_layer, input_mask) pooled = tf.identity(pooled, 'final_encodes') output_tensors = [pooled] tmp_g = tf.get_default_graph().as_graph_def() # allow_soft_placement:自动选择运行设备,但是会指定gpu全部运行,容易报错 #config = tf.ConfigProto(allow_soft_placement=True) #指定50%的内存 config = tf.ConfigProto() config.gpu_options.allow_growth = True #config.gpu_options.per_process_gpu_memory_fraction = args.gpu_memory_fraction #config = tf.ConfigProto(gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.5)) with tf.Session(config=config) as sess: logger.info('load parameters from checkpoint...') sess.run(tf.global_variables_initializer()) logger.info('freeze...') #保存tensor输出名字 tmp_g = tf.graph_util.convert_variables_to_constants( sess, tmp_g, [n.name[:-2] for n in output_tensors]) dtypes = [n.dtype for n in input_tensors] logger.info('optimize...') tmp_g = optimize_for_inference( tmp_g, [n.name[:-2] for n in input_tensors], [n.name[:-2] for n in output_tensors], [dtype.as_datatype_enum for dtype in dtypes], False) #创建临时文件关闭后自动删delete=True tmp_file = tempfile.NamedTemporaryFile('w', delete=True, dir=args.output_dir_sim).name logger.info('write graph to a tmp file: %s' % tmp_file) with tf.gfile.GFile(tmp_file, 'wb') as f: f.write(tmp_g.SerializeToString()) return tmp_file except Exception as e: logger.error('fail to optimize the graph!') logger.error(e)
def __init__(self, is_training): self.is_training = is_training self.input_ids = tf.compat.v1.placeholder( tf.int32, shape=[None, hp.sequence_length], name='input_ids') self.input_masks = tf.compat.v1.placeholder( tf.int32, shape=[None, hp.sequence_length], name='input_masks') self.segment_ids = tf.compat.v1.placeholder( tf.int32, shape=[None, hp.sequence_length], name='segment_ids') self.label_ids = tf.compat.v1.placeholder(tf.int32, shape=[None], name='label_ids') # Load BERT Pre-training LM self.model = modeling.AlbertModel(config=bert_config, is_training=self.is_training, input_ids=self.input_ids, input_mask=self.input_masks, token_type_ids=self.segment_ids, use_one_hot_embeddings=False) # Get the feature vector with size 3D:(batch_size,sequence_length,hidden_size) output_layer_init = self.model.get_sequence_output() # Cell textcnn output_layer = cell_textcnn(output_layer_init, self.is_training) # Hidden size #hidden_size = output_layer.shape[-1].value hidden_size = output_layer.shape[-1] # Dense with tf.name_scope("Full-connection"): output_weights = tf.compat.v1.get_variable( "output_weights", [num_labels, hidden_size], initializer=tf.compat.v1.truncated_normal_initializer( stddev=0.02)) output_bias = tf.compat.v1.get_variable( "output_bias", [num_labels], initializer=tf.zeros_initializer()) # Logit logits = tf.matmul(output_layer, output_weights, transpose_b=True) self.logits = tf.nn.bias_add(logits, output_bias) print('logits: ', self.logits) self.probabilities = tf.nn.softmax(self.logits, axis=-1) # Prediction with tf.compat.v1.variable_scope("Prediction"): self.preds = tf.argmax(self.logits, axis=-1, output_type=tf.int32) print('preds:', self.preds) # Summary for tensorboard with tf.compat.v1.variable_scope("Loss"): if self.is_training: self.accuracy = tf.reduce_mean( tf.compat.v1.to_float(tf.equal(self.preds, self.label_ids))) tf.summary.scalar('Accuracy', self.accuracy) # Check whether has loaded model ckpt = tf.train.get_checkpoint_state(hp.saved_model_path) checkpoint_suffix = ".index" if ckpt and tf.compat.v1.gfile.Exists(ckpt.model_checkpoint_path + checkpoint_suffix): print('=' * 10, 'Restoring model from checkpoint!', '=' * 10) print("%s - Restoring model from checkpoint ~%s" % (time_now_string(), ckpt.model_checkpoint_path)) else: # Load BERT Pre-training LM print('=' * 10, 'First time load BERT model!', '=' * 10) tvars = tf.compat.v1.trainable_variables() if hp.init_checkpoint: (assignment_map, initialized_variable_names) = \ modeling.get_assignment_map_from_checkpoint(tvars, hp.init_checkpoint) tf.compat.v1.train.init_from_checkpoint( hp.init_checkpoint, assignment_map) # Optimization if self.is_training: # Global_step self.global_step = tf.Variable(0, name='global_step', trainable=False) # Loss log_probs = tf.nn.log_softmax(self.logits, axis=-1) #预测的结果 one_hot_labels = tf.one_hot( self.label_ids, depth=num_labels, dtype=tf.float32) #标签的onehot(用于后续做loss和acc) per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) self.loss = tf.reduce_mean(per_example_loss) # Optimizer train_examples = processor.get_train_examples(hp.data_dir) num_train_steps = int( len(train_examples) / hp.batch_size * hp.num_train_epochs) num_warmup_steps = int(num_train_steps * hp.warmup_proportion) print('num_train_steps', num_train_steps) self.optimizer = optimization.create_optimizer( self.loss, hp.learning_rate, num_train_steps, num_warmup_steps, hp.use_tpu, ) # Summary for tensorboard tf.summary.scalar('loss', self.loss) testvalue = tf.compat.v1.summary.merge_all() self.merged = tf.compat.v1.summary.merge_all() # Compte the parameters count_model_params() vs = tf.compat.v1.trainable_variables() for l in vs: print(l) print('=' * 40)
def predict_loop(opts, finetuned_checkpoint_path=None): i = 0 eval_examples = squad_data.read_squad_examples(opts["predict_file"], opts, is_training=False) tfrecord_dir = opts['tfrecord_dir'] if not os.path.exists(tfrecord_dir): os.makedirs(tfrecord_dir) eval_writer = squad_data.FeatureWriter(filename=os.path.join( tfrecord_dir, "eval.tf_record"), is_training=False) eval_features = [] tokenizer = tokenization.FullTokenizer(vocab_file=opts['vocab_file'], do_lower_case=opts['do_lower_case']) def append_feature(feature): eval_features.append(feature) eval_writer.process_feature(feature) # Create eval.tfrecord num_features = squad_data.convert_examples_to_features( examples=eval_examples, tokenizer=tokenizer, max_seq_length=opts["seq_length"], doc_stride=opts["doc_stride"], max_query_length=opts["max_query_length"], is_training=False, output_fn=append_feature) eval_writer.close() iterations_per_step = 1 predict = build_graph(opts, iterations_per_step, is_training=False) predict.session.run(predict.init) predict.session.run(predict.iterator.initializer) if opts["init_checkpoint"] and not finetuned_checkpoint_path: finetuned_checkpoint_path = opts['init_checkpoint'] # Note that finetuned_checkpoint_path could be already set during "do_predict" if finetuned_checkpoint_path and not opts.get('generated_data', False): (assignment_map, _initialized_variable_names ) = bert_ipu.get_assignment_map_from_checkpoint( predict.tvars, finetuned_checkpoint_path) saver_restore = tf.train.Saver(assignment_map) saver_restore.restore(predict.session, finetuned_checkpoint_path) assert len(assignment_map) >= 127 all_results = [] if (opts['micro_batch_size'] * opts['gradient_accumulation_count']) == 1: iterations = len(eval_features) // ( opts['micro_batch_size'] * opts['gradient_accumulation_count'] * opts['replicas']) else: iterations = len(eval_features) // ( opts['micro_batch_size'] * opts['gradient_accumulation_count'] * opts['replicas']) + 1 logger.info(f"Total iterations: {iterations}") all_time_consumption = [] while i < iterations: try: # start = time.time() unique_ids, start_logits, end_logits, batch_duration = predict_step( predict) # duration = time.time() - start # all_time_consumption.append(duration) all_time_consumption.append(batch_duration / opts["batches_per_step"]) except tf.errors.OpError as e: raise tf.errors.ResourceExhaustedError(e.node_def, e.op, e.message) i += iterations_per_step if len(all_results) % 1000 == 0: logger.info(f"Procesing example: {len(all_results)}") # The outfeed shape is [batches_per_step, num_replicas (if replication enabled), micro_batch_size, seq_len]. # Flatten to keep only the last dimension num_samples = np.prod(unique_ids.shape) seq_len = opts['seq_length'] unique_ids = unique_ids.reshape([num_samples]) start_logits = start_logits.reshape([num_samples, seq_len]) end_logits = end_logits.reshape([num_samples, seq_len]) for j in range(num_samples): unique_id = unique_ids[j] start_logit = start_logits[j, :].tolist() end_logit = end_logits[j, :].tolist() all_results.append( squad_results.RawResult(unique_id=unique_id, start_logits=start_logit, end_logits=end_logit)) if len( all_time_consumption ) >= 10 * 2: # The time consumption of First 10 steps is not stable for time measurement. all_time_consumption = np.array(all_time_consumption[10:]) else: logger.warning( f"if the first 10 steps is counted, the measurement of throughtput and latency is not accurate." ) all_time_consumption = np.array(all_time_consumption) logger.info(( f"inference throughput: { (opts['micro_batch_size'] * opts['gradient_accumulation_count'] if should_be_pipeline_when_inference(opts) else opts['micro_batch_size']) / all_time_consumption.mean() } " f"exmples/sec - Latency: {all_time_consumption.mean()} {all_time_consumption.min()} " f"{all_time_consumption.max()} (mean min max) sec ")) # Done predictions output_dir = opts['output_dir'] if output_dir is None: if 'adamw' in finetuned_checkpoint_path: output_dir = finetuned_checkpoint_path.split('/ckpt')[0] else: if not os.path.exists(output_dir): os.makedirs(output_dir) output_prediction_file = os.path.join(output_dir, "predictions.json") output_nbest_file = os.path.join(output_dir, "best_predictions.json") output_null_log_odds_file = os.path.join(output_dir, "null_odds.json") eval_features = eval_features[:num_features] squad_results.write_predictions( eval_examples, eval_features, all_results, opts["n_best_size"], opts["max_answer_length"], opts["do_lower_case"], output_prediction_file, output_nbest_file, output_null_log_odds_file, opts["version_2_with_negative"], opts["null_score_diff_threshold"], opts["verbose_logging"]) predict.session.close() if opts['do_evaluation']: evaluate_squad(output_prediction_file, opts)
def training_loop(opts): consume_time = None if opts["version_2_with_negative"]: base_name_train = f"{opts['seq_length']}_{opts['doc_stride']}_{opts['max_query_length']}_SQuAD20" else: base_name_train = f"{opts['seq_length']}_{opts['doc_stride']}_{opts['max_query_length']}_SQuAD11" train_metafile = os.path.join(opts["tfrecord_dir"], "train_" + base_name_train + ".metadata") if os.path.exists(train_metafile): with open(train_metafile) as f: total_samples = int(f.readline()) else: if opts["version_2_with_negative"]: logger.info( f"SQUAD 2.0 DATASET SIZE 131944 (based on no. of features).") total_samples = 131944 else: logger.info( f"SQUAD 1.1 DATASET SIZE 88641 (based on no. of features).") total_samples = 88641 logger.info(f"Total samples {total_samples}") iterations_per_epoch = total_samples // opts["total_batch_size"] log_iterations = opts['batches_per_step'] * opts["steps_per_logs"] ckpt_iterations = opts['batches_per_step'] * opts["steps_per_ckpts"] if opts.get('num_train_steps'): # total iterations iterations = opts['num_train_steps'] * opts['batches_per_step'] elif opts.get('epochs'): iterations = iterations_per_epoch * opts['epochs'] else: logger.error("One between epochs and num_train_step must be set") sys.exit(os.EX_OK) logger.info( f"Training will last {iterations} iterations and {iterations//opts['batches_per_step']} steps will be executed." ) # So many iterations will be run for one step. iterations_per_step = opts['batches_per_step'] # Avoid nan issue caused by queue length is zero. queue_len = iterations_per_epoch // iterations_per_step if queue_len == 0: queue_len = 1 batch_times = deque(maxlen=queue_len) total_steps = (iterations // opts['batches_per_step']) * opts['batches_per_step'] # Learning rate schedule lr_schedule_name = opts['lr_schedule'] logger.info(f"Using learning rate schedule {lr_schedule_name}") learning_rate_schedule = make_lr_schedule(lr_schedule_name, opts, total_steps) # -------------- BUILD TRAINING GRAPH ---------------- train = build_graph(opts, iterations_per_step, is_training=True) train.session.run(train.init) train.session.run(train.iterator.initializer) # Checkpoints restore and save init_checkpoint_path = opts['init_checkpoint'] if init_checkpoint_path and not opts.get('generated_data', False): if os.path.isfile(init_checkpoint_path): init_checkpoint_path = os.path.splitext(init_checkpoint_path)[0] (assignment_map, initialized_variable_names ) = bert_ipu.get_assignment_map_from_checkpoint( train.tvars, init_checkpoint_path) for var in train.tvars: if var.name in initialized_variable_names: mark = "*" else: mark = " " logger.info("%-60s [%s]\t%s (%s)", var.name, mark, var.shape, var.dtype.name) reader = tf.train.NewCheckpointReader(init_checkpoint_path) load_vars = reader.get_variable_to_shape_map() saver_restore = tf.train.Saver(assignment_map) saver_restore.restore(train.session, init_checkpoint_path) if opts['steps_per_ckpts']: filepath = train.saver.save(train.session, opts["checkpoint_path"], global_step=0) logger.info(f"Saved checkpoint to {filepath}") if opts.get('restore_dir'): restore_path = opts['restore_dir'] if os.path.isfile(restore_path): latest_checkpoint = os.path.splitext(restore_path)[0] else: latest_checkpoint = tf.train.latest_checkpoint(restore_path) ckpt_pattern = re.compile(".*ckpt-([0-9]+)$") i = int(ckpt_pattern.match(latest_checkpoint).groups()[0]) + 1 train.saver.restore(train.session, latest_checkpoint) epoch = float(opts["total_batch_size"] * (i + iterations_per_step)) / total_samples else: i = 0 # Tensorboard logs path log_path = os.path.join(opts["logs_path"], 'event') logger.info("Tensorboard event file path {}".format(log_path)) summary_writer = tf.summary.FileWriter(log_path, train.graph, session=train.session) start_time = datetime.datetime.now() # Training loop print_format = ( "step: {step:6d}, iteration: {iteration:6d}, epoch: {epoch:6.2f}, lr: {lr:6.4g}, loss: {loss:6.3f}, " "throughput {throughput_samples_per_sec:6.2f} samples/sec, batch time: {avg_batch_time:8.6f} s, total_time: {total_time:8.1f} s" ) step = 0 start_all = time.time() while i < iterations: step += 1 epoch = float(opts["total_batch_size"] * i) / total_samples learning_rate = learning_rate_schedule.get_at_step(step) try: loss, batch_time = training_step(train, learning_rate) except tf.errors.OpError as e: raise tf.errors.ResourceExhaustedError(e.node_def, e.op, e.message) batch_time /= iterations_per_step if i != 0: batch_times.append([batch_time]) avg_batch_time = np.mean(batch_times) else: avg_batch_time = batch_time if i % log_iterations == 0: throughput = opts['total_batch_size'] / avg_batch_time # flush times every time it is reported batch_times.clear() total_time = time.time() - start_all stats = OrderedDict([('step', step), ('iteration', i + iterations_per_step), ('epoch', epoch), ('lr', learning_rate), ('loss', loss), ('avg_batch_time', avg_batch_time), ('throughput_samples_per_sec', throughput), ('total_time', total_time), ('learning_rate', learning_rate)]) logger.info(print_format.format(**stats)) train_summary = tf.Summary() train_summary.value.add(tag='epoch', simple_value=epoch) train_summary.value.add(tag='loss', simple_value=loss) train_summary.value.add(tag='learning_rate', simple_value=learning_rate) train_summary.value.add(tag='througput', simple_value=throughput) if opts['wandb']: wandb.log(dict(stats)) summary_writer.add_summary(train_summary, step) summary_writer.flush() if i % ckpt_iterations == 0: filepath = train.saver.save(train.session, opts["checkpoint_path"], global_step=i + iterations_per_step) logger.info(f"Saved checkpoint to {filepath}") i += iterations_per_step # We save the final checkpoint finetuned_checkpoint_path = train.saver.save(train.session, opts["checkpoint_path"], global_step=i + iterations_per_step) logger.info(f"Saved checkpoint to {finetuned_checkpoint_path}") train.session.close() end_time = datetime.datetime.now() consume_time = (end_time - start_time).seconds logger.info(f"training times: {consume_time} s") return finetuned_checkpoint_path
def main(): print("print start load the params...") print(json.dumps(config, ensure_ascii=False, indent=2)) tf.logging.set_verbosity(tf.logging.INFO) tf.gfile.MakeDirs(config["out"]) train_examples_len = config["train_examples_len"] dev_examples_len = config["dev_examples_len"] learning_rate = config["learning_rate"] eval_per_step = config["eval_per_step"] num_labels = config["num_labels"] num_train_steps = math.ceil(train_examples_len / config["train_batch_size"]) num_dev_steps = math.ceil(dev_examples_len / config["dev_batch_size"]) num_warmup_steps = math.ceil(num_train_steps * config["num_train_epochs"] * config["warmup_proportion"]) print("num_train_steps:{}, num_dev_steps:{}, num_warmup_steps:{}".format(num_train_steps, num_dev_steps, num_warmup_steps)) use_one_hot_embeddings = False is_training = True use_tpu = False seq_len = config["max_seq_len"] init_checkpoint = config["init_checkpoint"] print("print start compile the bert model...") # 定义输入输出 input_ids = tf.placeholder(tf.int64, shape=[None, seq_len], name='input_ids') input_mask = tf.placeholder(tf.int64, shape=[None, seq_len], name='input_mask') segment_ids = tf.placeholder(tf.int64, shape=[None, seq_len], name='segment_ids') labels = tf.placeholder(tf.int64, shape=[None, seq_len], name='labels') keep_prob = tf.placeholder(tf.float32, name='keep_prob') # , name='is_training' bert_config_ = load_bert_config(config["bert_config"]) (total_loss, acc, logits, probabilities) = create_model(bert_config_, is_training, input_ids, input_mask, segment_ids, labels, keep_prob, num_labels, use_one_hot_embeddings) train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps * config["num_train_epochs"], num_warmup_steps, False) print("print start train the bert model...") batch_size = config["train_batch_size"] dev_batch_size = config["dev_batch_size"] init_global = tf.global_variables_initializer() saver = tf.train.Saver([v for v in tf.global_variables() if 'adam_v' not in v.name and 'adam_m' not in v.name], max_to_keep=2) # 保存最后top3模型 with tf.Session() as sess: sess.run(init_global) print("start load the pre train model") if init_checkpoint: # tvars = tf.global_variables() tvars = tf.trainable_variables() print("global_variables", len(tvars)) (assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) print("initialized_variable_names:", len(initialized_variable_names)) saver_ = tf.train.Saver([v for v in tvars if v.name in initialized_variable_names]) saver_.restore(sess, init_checkpoint) tvars = tf.global_variables() initialized_vars = [v for v in tvars if v.name in initialized_variable_names] not_initialized_vars = [v for v in tvars if v.name not in initialized_variable_names] tf.logging.info('--all size %s; not initialized size %s' % (len(tvars), len(not_initialized_vars))) if len(not_initialized_vars): sess.run(tf.variables_initializer(not_initialized_vars)) for v in initialized_vars: print('--initialized: %s, shape = %s' % (v.name, v.shape)) for v in not_initialized_vars: print('--not initialized: %s, shape = %s' % (v.name, v.shape)) else: sess.run(tf.global_variables_initializer()) # if init_checkpoint: # saver.restore(sess, init_checkpoint) # print("checkpoint restored from %s" % init_checkpoint) print("********* train start *********") # tf.summary.FileWriter("output/",sess.graph) # albert remove dropout def train_step(ids, mask, segment, y, step): feed = {input_ids: ids, input_mask: mask, segment_ids: segment, labels: y, keep_prob: 0.9} _, out_loss, acc_, p_ = sess.run([train_op, total_loss, acc, probabilities], feed_dict=feed) print("step :{}, lr:{}, loss :{}, acc :{}".format(step, _[1], out_loss, acc_)) return out_loss, p_, y def dev_step(ids, mask, segment, y): feed = {input_ids: ids, input_mask: mask, segment_ids: segment, labels: y, keep_prob: 1.0 } out_loss, acc_, p_ = sess.run([total_loss, acc, probabilities], feed_dict=feed) print("loss :{}, acc :{}".format(out_loss, acc_)) return out_loss, p_, y min_total_loss_dev = 999999 step = 0 for epoch in range(config["num_train_epochs"]): _ = "{:*^100s}".format(("epoch-" + str(epoch)).center(20)) print(_) # 读取训练数据 total_loss_train = 0 # total_pre_train = [] # total_true_train = [] input_ids2, input_mask2, segment_ids2, labels2 = get_input_data(config["in_1"], seq_len, batch_size) for i in range(num_train_steps): step += 1 ids_train, mask_train, segment_train, y_train = sess.run( [input_ids2, input_mask2, segment_ids2, labels2]) out_loss, pre, y = train_step(ids_train, mask_train, segment_train, y_train, step) total_loss_train += out_loss # total_pre_train.extend(pre) # total_true_train.extend(y) if step % eval_per_step == 0 and step >= config["eval_start_step"]: total_loss_dev = 0 dev_input_ids2, dev_input_mask2, dev_segment_ids2, dev_labels2 = get_input_data(config["in_2"], seq_len, dev_batch_size, False) # total_pre_dev = [] # total_true_dev = [] for j in range(num_dev_steps): # 一个 epoch 的 轮数 ids_dev, mask_dev, segment_dev, y_dev = sess.run( [dev_input_ids2, dev_input_mask2, dev_segment_ids2, dev_labels2]) out_loss, pre, y = dev_step(ids_dev, mask_dev, segment_dev, y_dev) total_loss_dev += out_loss # total_pre_dev.extend(pre) # total_true_dev.extend(y_dev) print("total_loss_dev:{}".format(total_loss_dev)) # print(classification_report(total_true_dev, total_pre_dev, digits=4)) if total_loss_dev < min_total_loss_dev: print("save model:\t%f\t>%f" % (min_total_loss_dev, total_loss_dev)) min_total_loss_dev = total_loss_dev saver.save(sess, config["out"] + 'bert.ckpt', global_step=step) elif step < config["eval_start_step"] and step % config["auto_save"] == 0: saver.save(sess, config["out"] + 'bert.ckpt', global_step=step) _ = "{:*^100s}".format(("epoch-" + str(epoch) + " report:").center(20)) print("total_loss_train:{}".format(total_loss_train)) # print(classification_report(total_true_train, total_pre_train, digits=4)) sess.close() # remove dropout print("remove dropout in predict") tf.reset_default_graph() is_training = False input_ids = tf.placeholder(tf.int64, shape=[None, seq_len], name='input_ids') input_mask = tf.placeholder(tf.int64, shape=[None, seq_len], name='input_mask') segment_ids = tf.placeholder(tf.int64, shape=[None, seq_len], name='segment_ids') labels = tf.placeholder(tf.int64, shape=[None, seq_len], name='labels') keep_prob = tf.placeholder(tf.float32, name='keep_prob') # , name='is_training' bert_config_ = load_bert_config(config["bert_config"]) (total_loss, _, logits, probabilities) = create_model(bert_config_, is_training, input_ids, input_mask, segment_ids, labels, keep_prob, num_labels, use_one_hot_embeddings) init_global = tf.global_variables_initializer() saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) # 保存最后top3模型 try: checkpoint = tf.train.get_checkpoint_state(config["out"]) input_checkpoint = checkpoint.model_checkpoint_path print("[INFO] input_checkpoint:", input_checkpoint) except Exception as e: input_checkpoint = config["out"] print("[INFO] Model folder", config["out"], repr(e)) with tf.Session() as sess: sess.run(init_global) saver.restore(sess, input_checkpoint) saver.save(sess, config["out_1"] + 'bert.ckpt') sess.close()
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for Estimator.""" def metric_fn(per_example_loss, label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) if task_name == "cola": FN, FN_op = tf.metrics.false_negatives(labels=label_ids, predictions=predictions) FP, FP_op = tf.metrics.false_positives(labels=label_ids, predictions=predictions) TP, TP_op = tf.metrics.true_positives(labels=label_ids, predictions=predictions) TN, TN_op = tf.metrics.true_negatives(labels=label_ids, predictions=predictions) MCC = (TP * TN - FP * FN) / ((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN))**0.5 MCC_op = tf.group(FN_op, TN_op, TP_op, FP_op, tf.identity(MCC, name="MCC")) return {"MCC": (MCC, MCC_op)} elif task_name == "mrpc": accuracy = tf.metrics.accuracy(labels=label_ids, predictions=predictions) loss = tf.metrics.mean(values=per_example_loss) f1 = tf_metrics.f1(labels=label_ids, predictions=predictions, num_classes=2, pos_indices=[1]) return { "eval_accuracy": accuracy, "eval_f1": f1, "eval_loss": loss, } else: accuracy = tf.metrics.accuracy(labels=label_ids, predictions=predictions) loss = tf.metrics.mean(values=per_example_loss) return { "eval_accuracy": accuracy, "eval_loss": loss, } tf.compat.v1.logging.info("*** Features ***") tf.compat.v1.logging.info("*** Features ***") for name in sorted(features.keys()): tf.compat.v1.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) if not is_training and FLAGS.use_trt: trt_graph = get_frozen_tftrt_model(bert_config, input_ids.shape, num_labels, use_one_hot_embeddings, init_checkpoint) (total_loss, per_example_loss, logits, probabilities) = tf.import_graph_def( trt_graph, input_map={ 'input_ids': input_ids, 'input_mask': input_mask, 'segment_ids': segment_ids, 'label_ids': label_ids }, return_elements=[ 'loss/cls_loss:0', 'loss/cls_per_example_loss:0', 'loss/cls_logits:0', 'loss/cls_probabilities:0' ], name='') if mode == tf.estimator.ModeKeys.PREDICT: predictions = {"probabilities": probabilities} output_spec = tf.estimator.EstimatorSpec( mode=mode, predictions=predictions) elif mode == tf.estimator.ModeKeys.EVAL: eval_metric_ops = metric_fn(per_example_loss, label_ids, logits) output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, eval_metric_ops=eval_metric_ops) return output_spec (total_loss, per_example_loss, logits, probabilities) = create_model(bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() initialized_variable_names = {} if init_checkpoint and (hvd is None or hvd.rank() == 0): (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) if FLAGS.verbose_logging: tf.compat.v1.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.compat.v1.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, hvd, False, FLAGS.amp, FLAGS.num_accumulation_steps, FLAGS.optimizer_type) output_spec = tf.estimator.EstimatorSpec(mode=mode, loss=total_loss, train_op=train_op) elif mode == tf.estimator.ModeKeys.EVAL: dummy_op = tf.no_op() # Need to call mixed precision graph rewrite if fp16 to enable graph rewrite if FLAGS.amp: loss_scaler = tf.train.experimental.FixedLossScale(1) dummy_op = tf.train.experimental.enable_mixed_precision_graph_rewrite( optimization.LAMBOptimizer(learning_rate=0.0), loss_scaler) eval_metric_ops = metric_fn(per_example_loss, label_ids, logits) output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, eval_metric_ops=eval_metric_ops) else: dummy_op = tf.no_op() # Need to call mixed precision graph rewrite if fp16 to enable graph rewrite if FLAGS.amp: dummy_op = tf.train.experimental.enable_mixed_precision_graph_rewrite( optimization.LAMBOptimizer(learning_rate=0.0)) output_spec = tf.estimator.EstimatorSpec(mode=mode, predictions=probabilities) return output_spec
segment_ids = tf.placeholder(shape=[1, Config.max_seq_length], dtype=tf.int32, name="segment_ids") # 创建bert 模型 model = modeling.BertModel( config=Config.bert_config, is_training=False, input_ids=input_ids, input_mask=input_mask, # input_mask是样本中有效词句的标识 token_type_ids=segment_ids, # token_type是句子标记 ## use_one_hot_embeddings=False ) embedding = model.get_sequence_output() # 获取字向量 tvars = tf.trainable_variables() #加载bert 参数 # 加载bert 模型参数 (assignment_map, initialized_variable_names) = \ modeling.get_assignment_map_from_checkpoint(tvars, Config.init_checkpoint) tf.train.init_from_checkpoint(Config.init_checkpoint, assignment_map) session = tf.InteractiveSession() session.run(tf.global_variables_initializer()) texts = ['在 关 系 数 据 库 中 , 对 关 系 的 最 基 本 要 求 的 满 足 第 一 范 式'] tokenizer = tokenization.FullTokenizer(vocab_file='chinese_L-12_H-768_A-12/vocab.txt', do_lower_case=True) input_ids_list, input_mask_list, segment_ids_list = word_ids(texts, tokenizer, Config.max_seq_length) input_ids_list = np.reshape(input_ids_list, newshape=[-1, Config.batch_size, Config.max_seq_length]) input_mask_list = np.reshape(input_mask_list, newshape=[-1, Config.batch_size, Config.max_seq_length]) segment_ids_list = np.reshape(segment_ids_list, newshape=[-1, Config.batch_size, Config.max_seq_length]) embedding_r = session.run(embedding, feed_dict={input_ids: input_ids_list[0], input_mask: input_mask_list[0], segment_ids: segment_ids_list[0]}) print(type(embedding_r)) print(embedding_r.shape)
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) tag_to_id, id_to_tag, num_tags = get_tag_map_tensors(params) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] tag_ids = features["tag_ids"] osentences_len = features["sentence_len"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (logits, crf_params, pred_ids, sentence_len) = create_model(bert_config, is_training, input_ids, input_mask, segment_ids, num_tags, osentences_len) if mode == tf.estimator.ModeKeys.PREDICT: pred_tags = id_to_tag.lookup(tf.to_int64(pred_ids)) predictions = {"pred_ids": pred_ids, "pred_string": pred_tags} output_spec = tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, ) return output_spec tvars = tf.trainable_variables() initialized_variable_names = {} if init_checkpoint: (assignment_map, initialized_variable_names) = \ modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) # Calculate the loss prediction log_likehood, _ = tf.contrib.crf.crf_log_likelihood( logits, tag_ids, osentences_len, crf_params) loss = tf.reduce_mean(-log_likehood) # metric weights = tf.sequence_mask(osentences_len, sentence_len - 1) metrics = { 'acc': tf.metrics.accuracy(tag_ids, pred_ids, weights), 'loss': loss, } # write summary for metric_name, op in metrics.items(): if metric_name == 'loss': tf.summary.scalar(metric_name, op) else: tf.summary.scalar(metric_name, op[1]) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False) output_spec = tf.estimator.EstimatorSpec(mode=mode, train_op=train_op, loss=loss) elif mode == tf.estimator.ModeKeys.EVAL: output_spec = tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=metrics) return output_spec
def get_frozen_tftrt_model(bert_config, shape, num_labels, use_one_hot_embeddings, init_checkpoint): tf_config = tf.compat.v1.ConfigProto() tf_config.gpu_options.allow_growth = True output_node_names = [ 'loss/cls_loss', 'loss/cls_per_example_loss', 'loss/cls_logits', 'loss/cls_probabilities' ] with tf.Session(config=tf_config) as tf_sess: input_ids = tf.placeholder(tf.int32, shape, 'input_ids') input_mask = tf.placeholder(tf.int32, shape, 'input_mask') segment_ids = tf.placeholder(tf.int32, shape, 'segment_ids') label_ids = tf.placeholder(tf.int32, (None), 'label_ids') create_model(bert_config, False, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf_sess.run(tf.global_variables_initializer()) print("LOADED!") tf.compat.v1.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" else: init_string = ", *NOTTTTTTTTTTTTTTTTTTTTT" tf.compat.v1.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) frozen_graph = tf.graph_util.convert_variables_to_constants( tf_sess, tf_sess.graph.as_graph_def(), output_node_names) num_nodes = len(frozen_graph.node) print('Converting graph using TensorFlow-TensorRT...') from tensorflow.python.compiler.tensorrt import trt_convert as trt converter = trt.TrtGraphConverter( input_graph_def=frozen_graph, nodes_blacklist=output_node_names, max_workspace_size_bytes=(4096 << 20) - 1000, precision_mode="FP16" if FLAGS.amp else "FP32", minimum_segment_size=4, is_dynamic_op=True, maximum_cached_engines=1000) frozen_graph = converter.convert() print('Total node count before and after TF-TRT conversion:', num_nodes, '->', len(frozen_graph.node)) print( 'TRT node count:', len([1 for n in frozen_graph.node if str(n.op) == 'TRTEngineOp'])) with tf.io.gfile.GFile("frozen_modelTRT.pb", "wb") as f: f.write(frozen_graph.SerializeToString()) return frozen_graph
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits, probabilities) = create_model(bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) ##****************************************************** predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) one_hot_labels = tf.one_hot(label_ids, depth=num_labels, dtype=tf.float32) #output_weights = tf.get_variable("output_weights", [num_labels, hidden_size],initializer=tf.truncated_normal_initializer(stddev=0.02)) ''' tf.logging.info("**** label_ids **** is: %s", label_ids) #shape=(32,) tf.logging.info("**** logits **** is: %s", logits) #shape=(32, 2) tf.logging.info("**** probabilities **** is: %s", probabilities) #shape=(32, 2) tf.logging.info("**** predictions **** is: %s", predictions) #shape=(32,) tf.logging.info("**** one_hot_labels **** is: %s", one_hot_labels) #shape=(32, 2) ## add loss to tensorboard (ok) tf.summary.scalar('total_loss', total_loss) # add cross_entropy to tensorboard (ok) with tf.variable_scope('cross_entropy'): diff = tf.nn.softmax_cross_entropy_with_logits(labels=one_hot_labels, logits=logits) cross_entropy = tf.reduce_mean(diff) tf.summary.scalar('cross_entropy', cross_entropy) # add learning_rate to tensorboard (ok) #with tf.name_scope('learning_rate'): # train_step = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(cross_entropy) #tf.summary.scalar('learning_rate', learning_rate) ''' # add accuracy to tensorboard (ok) with tf.name_scope('accuracy'): #accuracy = tf.metrics.accuracy(label_ids, predictions) correct_prediction = tf.equal(tf.argmax(one_hot_labels, axis=1), tf.argmax(logits, axis=1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('accuracy', accuracy) # add weigth to tensorboard #with tf.name_scope('weight'): # tf.summary.histogram('weight', output_weights) #merged = tf.summary.merge_all() #train_writer = tf.summary.FileWriter(FLAGS.log_dir + '/train', sess.graph) ##****************************************************** tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None # *** 寻找模型最后全连接层的最优参数(调用AdamWeightDecayOptimizer()) if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: # *** 预测结果评价指标 def metric_fn(per_example_loss, label_ids, logits): predictions = tf.argmax( logits, axis=-1, output_type=tf.int32) # *** 得到一个向量中最大值所处的位置 #predictions = tf.cast(predictions,tf.float32) # new add accuracy = tf.metrics.accuracy(label_ids, predictions) loss = tf.metrics.mean(per_example_loss) precision = tf.metrics.precision(label_ids, predictions) recall = tf.metrics.recall(label_ids, predictions) fn = tf.metrics.false_negatives(label_ids, predictions) fp = tf.metrics.false_positives(label_ids, predictions) tn = tf.metrics.true_negatives(label_ids, predictions) tp = tf.metrics.true_positives(label_ids, predictions) f1 = tf.contrib.metrics.f1_score(label_ids, predictions) return collections.OrderedDict({ #'eval_accuracy': accuracy, 'accuracy': accuracy, 'eval_precision': precision, 'eval_recall': recall, 'eval_tp': tp, 'eval_tn': tn, 'eval_fp': fp, 'eval_fn': fn, 'eval_f1': f1, 'eval_loss': loss, }) #return { # "eval_accuracy": accuracy, # "eval_loss": loss, #} #tf.summary.scalar('accuracy', accuracy) #tf.summary.scalar('precision', precision) #tf.summary.scalar('recall', recall) #tf.summary.scalar('loss', loss) eval_metrics = (metric_fn, [per_example_loss, label_ids, logits]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: # *** 结果预测 ''' output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=probabilities, scaffold_fn=scaffold_fn) ''' # used for savedmodel # Generate Predictions # v15 -- predict (this is work!) predictions = tf.argmax( probabilities, axis=-1, output_type=tf.int32) #logits-->probabilities export_outputs = { 'classes': tf.estimator.export.PredictOutput({ "probabilities": probabilities, "classid": predictions }) } output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=probabilities, scaffold_fn=scaffold_fn, export_outputs=export_outputs) ''' # v13 -- classify predictions = tf.argmax(probabilities, axis=-1, output_type=tf.int32) export_outputs = { 'classes': tf.estimator.export.ClassificationOutput( scores=probabilities, classes=tf.as_string(predictions)) } output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=probabilities, scaffold_fn=scaffold_fn, export_outputs=export_outputs) ''' return output_spec
def main(): print("print start load the params...") tf.logging.set_verbosity(tf.logging.INFO) tf.gfile.MakeDirs(config["out"]) train_examples_len = config["train_examples_len"] dev_examples_len = config["dev_examples_len"] learning_rate = config["learning_rate"] eval_per_step = config["eval_per_step"] num_labels = config["num_labels"] print(num_labels) num_train_steps = int(train_examples_len / config["train_batch_size"] * config["num_train_epochs"]) print("num_train_steps:", num_train_steps) num_dev_steps = int(dev_examples_len / config["dev_batch_size"]) num_warmup_steps = int(num_train_steps * config["warmup_proportion"]) use_one_hot_embeddings = False is_training = True use_tpu = False seq_len = config["max_seq_len"] init_checkpoint = config["init_checkpoint"] print("print start compile the bert model...") # 定义输入输出 input_ids = tf.placeholder(tf.int64, shape=[None, seq_len], name='input_ids') input_mask = tf.placeholder(tf.int64, shape=[None, seq_len], name='input_mask') segment_ids = tf.placeholder(tf.int64, shape=[None, seq_len], name='segment_ids') labels = tf.placeholder(tf.int64, shape=[ None, ], name='labels') keep_prob = tf.placeholder(tf.float32, name='keep_prob') # , name='is_training' bert_config_ = load_bert_config(config["bert_config"]) (total_loss, per_example_loss, logits, probabilities) = create_model(bert_config_, is_training, input_ids, input_mask, segment_ids, labels, keep_prob, num_labels, use_one_hot_embeddings) train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, False) print("print start train the bert model(multi class)...") batch_size = config["train_batch_size"] input_ids2, input_mask2, segment_ids2, labels2 = get_input_data( config["in_1"], seq_len, batch_size) dev_batch_size = config["dev_batch_size"] init_global = tf.global_variables_initializer() saver = tf.train.Saver(tf.global_variables(), max_to_keep=3) # 保存最后top3模型 with tf.Session() as sess: sess.run(init_global) tvars = tf.trainable_variables() initialized_variable_names = {} print("start load the pretrain model") scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: # var.trainable = False init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) # if init_checkpoint: # saver.restore(sess, init_checkpoint) # print("checkpoint restored from %s" % init_checkpoint) print("********* bert_multi_class_train start *********") # tf.summary.FileWriter("output/",sess.graph) def train_step(ids, mask, segment, y, step): feed = { input_ids: ids, input_mask: mask, segment_ids: segment, labels: y, keep_prob: 0.9 } _, out_loss, out_logits, p_ = sess.run( [train_op, total_loss, logits, probabilities], feed_dict=feed) pre = np.argmax(p_, axis=-1) acc = np.sum(np.equal(pre, y)) / len(pre) print("step :{},loss :{}, acc :{}".format(step, out_loss, acc)) return out_loss, pre, y def dev_step(ids, mask, segment, y): feed = { input_ids: ids, input_mask: mask, segment_ids: segment, labels: y, keep_prob: 1.0 } out_loss, out_logits, p_ = sess.run( [total_loss, logits, probabilities], feed_dict=feed) pre = np.argmax(p_, axis=-1) acc = np.sum(np.equal(pre, y)) / len(pre) print("loss :{}, acc :{}".format(out_loss, acc)) return out_loss, pre, y min_total_loss_dev = 999999 for i in range(num_train_steps): # batch 数据 i += 1 ids_train, mask_train, segment_train, y_train = sess.run( [input_ids2, input_mask2, segment_ids2, labels2]) train_step(ids_train, mask_train, segment_train, y_train, i) if i % eval_per_step == 0: total_loss_dev = 0 dev_input_ids2, dev_input_mask2, dev_segment_ids2, dev_labels2 = get_input_data( config["in_2"], seq_len, dev_batch_size) total_pre_dev = [] total_true_dev = [] for j in range(num_dev_steps): # 一个 epoch 的 轮数 ids_dev, mask_dev, segment_dev, y_dev = sess.run([ dev_input_ids2, dev_input_mask2, dev_segment_ids2, dev_labels2 ]) out_loss, pre, y = dev_step(ids_dev, mask_dev, segment_dev, y_dev) total_loss_dev += out_loss total_pre_dev.extend(pre) total_true_dev.extend(y_dev) # print("dev result report:") print(classification_report(total_true_dev, total_pre_dev)) if total_loss_dev < min_total_loss_dev: print("save model:\t%f\t>%f" % (min_total_loss_dev, total_loss_dev)) min_total_loss_dev = total_loss_dev saver.save(sess, config["out"] + 'bert.ckpt', global_step=i) sess.close() # remove dropout print("remove dropout in predict") tf.reset_default_graph() is_training = False input_ids = tf.placeholder(tf.int64, shape=[None, seq_len], name='input_ids') input_mask = tf.placeholder(tf.int64, shape=[None, seq_len], name='input_mask') segment_ids = tf.placeholder(tf.int64, shape=[None, seq_len], name='segment_ids') labels = tf.placeholder(tf.int64, shape=[ None, ], name='labels') keep_prob = tf.placeholder(tf.float32, name='keep_prob') # , name='is_training' bert_config_ = load_bert_config(config["bert_config"]) (total_loss, per_example_loss, logits, probabilities) = create_model(bert_config_, is_training, input_ids, input_mask, segment_ids, labels, keep_prob, num_labels, use_one_hot_embeddings) init_global = tf.global_variables_initializer() saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) # 保存最后top3模型 try: checkpoint = tf.train.get_checkpoint_state(config["out"]) input_checkpoint = checkpoint.model_checkpoint_path print("[INFO] input_checkpoint:", input_checkpoint) except Exception as e: input_checkpoint = config["out"] print("[INFO] Model folder", config["out"], repr(e)) with tf.Session() as sess: sess.run(init_global) saver.restore(sess, input_checkpoint) saver.save(sess, config["out_1"] + 'bert.ckpt') sess.close()
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_real_example = None if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits, predicts) = create_model(bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: # eval 的 计算方式metric需要自己定义修改 def metric_fn(per_example_loss, label_ids, logits): # def metric_fn(label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) # 评估函数,计算准确率、召回率、F1,假如改类别的话,下方数字需要修改,10是总类别数,1-6是有用的类别。B、I、E, # 具体见 tf.metrics里的函数 precision = tf_metrics.precision(label_ids, predictions, 10, [1, 2, 3, 4, 5, 6], average="macro") recall = tf_metrics.recall(label_ids, predictions, 10, [1, 2, 3, 4, 5, 6], average="macro") f = tf_metrics.f1(label_ids, predictions, 10, [1, 2, 3, 4, 5, 6], average="macro") return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, # "eval_loss": loss, } # def metric_fn(per_example_loss, label_ids, logits, is_real_example): # predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) # accuracy = tf.metrics.accuracy( # labels=label_ids, predictions=predictions, weights=is_real_example) # loss = tf.metrics.mean(values=per_example_loss, weights=is_real_example) # return { # "eval_accuracy": accuracy, # "eval_loss": loss, # } # eval_metrics = (metric_fn, # [per_example_loss, label_ids, logits, is_real_example]) eval_metrics = (metric_fn, [per_example_loss, label_ids, logits]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=predicts, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, log_probs) = create_model(bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() scaffold_fn = None initialized_variable_names = [] if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.PREDICT: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions={ "log_probs": log_probs, "label_ids": label_ids, }, scaffold_fn=scaffold_fn) else: raise ValueError("Only TRAIN and PREDICT modes are supported: %s" % (mode)) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_real_example = None #用于参与tf.metric函数的weight参数 if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) #创建模型(计算图),包括下游任务。注意tf api的结构,此刻并没有进行计算。而是构建了计算图。 #这里看着create_model像是一个输出loss的函数,但在代码层面,它只构建了计算图,以此命名create而非calculate。 # 只有estimator通过session(会话)访问时,计算才开始。 (total_loss, per_example_loss, logits, probabilities) = create_model(bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() #创建了计算图后,可训练的变量随之被创建。 initialized_variable_names = {} scaffold_fn = None #??这个只是tpu需要。 if init_checkpoint: #init_checkpoint是命令行中传入的预训练BERT或先前训练过的,ckpt文件 ( assignment_map, initialized_variable_names #从init_checkpoints中获取与可用的变量的值(预训练模型与实际任务计算图的变量的交集) ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: #因为 TPUEstimator 有bug,在非tpu上训练时无法显示loss。所以加入一个hook logging_hook = tf.train.LoggingTensorHook({"loss": total_loss}, every_n_iter=10) train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, training_hooks=[logging_hook], scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, logits, probabilities, is_real_example): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) #这里这个metrics没有命名,而tf会记录参数,我认为是以参数名与参数域来标识的,如此再次调用时如何识别? #应该还是在计算图的某个步骤中定义了这个更新指标的操作,还是需要建立通过会话访问计算图的概念, #执行tensorflow是执行这个计算图,而访问时每到这个metric节点都计算更新一次,而不是在代码层面一次次调用 metrics = {} metrics["eval_accuracy"] = tf.metrics.accuracy( labels=label_ids, predictions=predictions, weights=is_real_example) num_options = 10 #10个选项 shaped_probabilities = tf.reshape( tf.slice(probabilities, [0, 1], [-1, 1]), [-1, num_options]) shaped_label = tf.zeros( [tf.shape(shaped_probabilities)[0], 1], dtype=tf.int64) #标注shaped_probabilities的正确序号全为0,由数据决定 # shaped_label = tf.argmax(tf.reshape(label_ids,[-1,num_options]), 1)#维度1上的最大值(取值只有[0,1]),这样保证即使正确答案的数目不唯一,也会返回一个(第一个),但是不能返回多个 # shaped_label = tf.reshape(tf.slice(tf.where(tf.equal(tf.reshape(label_ids,[-1,num_options]), 1)), [0, 1], [-1, 1]), [-1, num_true] #equal中的1通过广播适配形状;如果确定正确选项个数,这行可以返回多个宽度唯一的索引数组 shaped_is_real_example = tf.slice( tf.reshape(is_real_example, [-1, num_options]), [0, 0], [-1, 1]) shaped_is_real_example = None for k in [1, 2, 5, 9, 10]: metrics["recall@{}".format(k)] = tf.metrics.recall_at_k( labels=shaped_label, predictions=shaped_probabilities, weights=shaped_is_real_example, k=k) #metrics["loss"] = tf.metrics.mean(values=per_example_loss, weights=is_real_example) return metrics eval_metrics = (metric_fn, [ per_example_loss, label_ids, logits, probabilities, is_real_example ]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions={"probabilities": probabilities}, scaffold_fn=scaffold_fn) return output_spec
word_id_list.append(word_ids) return word_id_list # 初始化BERT model = modeling.BertModel(config=bert_config, is_training=False, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=False) # 加载BERT模型 tvars = tf.trainable_variables() (assignment, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment) # 获取最后一层和倒数第二层 encoder_last_layer = model.get_sequence_output() encoder_last2_layer = model.all_encoder_layers[-2] # 读取数据 token = tokenization.FullTokenizer(vocab_file=bert_vocab_file) # input_train_data = read_input(file_dir='../data/legal_domain/train_x_c.txt') input_train_data = read_input(file_dir='../data/cnews/train_x.txt') # input_val_data = read_input(file_dir='../data/legal_domain/val_x_c.txt') input_val_data = read_input(file_dir='../data/cnews/val_x.txt') # input_test_data = read_input(file_dir='../data/legal_domain/test_x_c.txt') input_test_data = read_input(file_dir='../data/cnews/test_x.txt')
def main(_): tf.logging.set_verbosity(tf.logging.INFO) print_configuration_op(FLAGS) bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) root_path = FLAGS.output_dir if not os.path.exists(root_path): os.makedirs(root_path) timestamp = str(int(time())) root_path = os.path.join(root_path, timestamp) tf.logging.info('root_path: {}'.format(root_path)) if not os.path.exists(root_path): os.makedirs(root_path) train_data_size = total_sample(FLAGS.train_dir) tf.logging.info('train data size: {}'.format(train_data_size)) valid_data_size = total_sample(FLAGS.valid_dir) tf.logging.info('valid data size: {}'.format(valid_data_size)) num_train_steps = train_data_size // FLAGS.train_batch_size * FLAGS.num_train_epochs num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) filenames = tf.placeholder(tf.string, shape=[None]) shuffle_size = tf.placeholder(tf.int64) dataset = tf.data.TFRecordDataset(filenames) dataset = dataset.map(parse_exmp) # Parse the record into tensors. dataset = dataset.repeat(1) # buffer_size 100 dataset = dataset.shuffle(shuffle_size) dataset = dataset.batch(FLAGS.train_batch_size) iterator = dataset.make_initializable_iterator() ques_ids, ans_ids, sents, mask, segmentids, labels = iterator.get_next() # output dir pair_ids = [ques_ids, ans_ids, labels] training = tf.placeholder(tf.bool) mean_loss, logits, probabilities, accuracy, model = create_model(bert_config, is_training = training, input_ids = sents, input_mask = mask, segment_ids = segmentids, labels = labels, ques_ids = ques_ids, ans_ids = ans_ids, num_labels = 1, use_one_hot_embeddings = False) # init model with pre-training tvars = tf.trainable_variables() if FLAGS.init_checkpoint: (assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(tvars,FLAGS.init_checkpoint) tf.train.init_from_checkpoint(FLAGS.init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) train_opt = optimization.create_optimizer(mean_loss, FLAGS.learning_rate, num_train_steps, num_warmup_steps, False) config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True if FLAGS.do_train: with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) for epoch in range(FLAGS.num_train_epochs): tf.logging.info('Epoch {} training begin'.format(epoch)) sess.run(iterator.initializer, feed_dict={filenames: [FLAGS.train_dir], shuffle_size: 1024}) run_epoch(epoch, "train", sess, training, logits, accuracy, mean_loss, train_opt) tf.logging.info('Valid begin') sess.run(iterator.initializer, feed_dict={filenames: [FLAGS.valid_dir], shuffle_size: 1}) run_test(epoch, root_path, "valid", sess, training, accuracy, probabilities, pair_ids)
def model_fn(features, labels, mode, params): logging.info("*** Features ***") for name in sorted(features.keys()): logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] mask = features["mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) if FLAGS.crf: (total_loss, logits, predicts) = create_model(bert_config, is_training, input_ids, mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) else: (total_loss, logits, predicts) = create_model(bert_config, is_training, input_ids, mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() scaffold_fn = None initialized_variable_names = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) # logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" # logging.info(" name = %s, shape = %s%s", var.name, var.shape,init_string) if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(label_ids, logits, num_labels, mask): predictions = tf.math.argmax(logits, axis=-1, output_type=tf.int32) cm = metrics.streaming_confusion_matrix(label_ids, predictions, num_labels - 1, weights=mask) return {"confusion_matrix": cm} # eval_metrics = (metric_fn, [label_ids, logits, num_labels, mask]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=predicts, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] #label_mask = features["label_mask"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits, predictsDict) = create_model(bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) if not FLAGS.create_SavedModel: predictsDict[ "input_mask"] = input_mask # Gives problems with savedmodel!! tvars = tf.trainable_variables() scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, logits, num_labels): # def metric_fn(label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) precision = tf_metrics.precision(label_ids, predictions, num_labels, [1, 2], average="macro") recall = tf_metrics.recall(label_ids, predictions, num_labels, [1, 2], average="macro") f = tf_metrics.f1(label_ids, predictions, num_labels, [1, 2], average="macro") # return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, #"eval_loss": loss, } eval_metrics = (metric_fn, [per_example_loss, label_ids, logits, num_labels]) # eval_metrics = (metric_fn, [label_ids, logits]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.PREDICT: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=predictsDict, scaffold_fn=scaffold_fn) return output_spec
def main(opts): tf.logging.set_verbosity(tf.logging.INFO) """ Set up for synthetic data. """ if opts["synthetic_data"] or opts["generated_data"]: opts['task_name'] = 'synthetic' if opts['task_type'] == 'regression': opts['task_name'] = 'synthetic_regression' print(opts['task_name']) print(opts['task_type']) processors = { "cola": glue_data.ColaProcessor, "mnli": glue_data.MnliProcessor, "mrpc": glue_data.MrpcProcessor, "sst2": glue_data.Sst2Processor, "stsb": glue_data.StsbProcessor, "qqp": glue_data.QqpProcessor, "qnli": glue_data.QnliProcessor, "rte": glue_data.RteProcessor, "wnli": glue_data.WnliProcessor, "mnli-mm": glue_data.MnliMismatchProcessor, "ax": glue_data.AxProcessor, "synthetic": glue_data.SyntheticProcessor, "synthetic_regression": glue_data.SyntheticProcessorRegression } tokenization.validate_case_matches_checkpoint( do_lower_case=opts["do_lower_case"], init_checkpoint=opts["init_checkpoint"]) tf.gfile.MakeDirs(opts["output_dir"]) task_name = opts["task_name"].lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]() label_list = processor.get_labels() tokenizer = tokenization.FullTokenizer(vocab_file=opts["vocab_file"], do_lower_case=opts["do_lower_case"]) opts["pass_in"] = (processor, label_list, tokenizer) train_examples = None # num_train_steps = None num_warmup_steps = None # So many iterations will be run for one step. iterations_per_step = opts['batches_per_step'] # Avoid nan issue caused by queue length is zero. if opts["do_training"]: train_examples = processor.get_train_examples(opts["data_dir"]) num_train_steps = int( len(train_examples) / opts["total_batch_size"] * opts['epochs']) iterations_per_epoch = len(train_examples) // opts["total_batch_size"] if opts.get('num_train_steps'): # total iterations iterations = opts['num_train_steps'] * opts['batches_per_step'] else: iterations = iterations_per_epoch * opts['epochs'] num_warmup_steps = int(iterations * opts["warmup"]) tf.logging.info("***** Running training *****") tf.logging.info(f" Num examples = {len(train_examples)}") tf.logging.info(f" Micro batch size = {opts['micro_batch_size']}") tf.logging.info(f" Num steps / epoch = {iterations_per_epoch}") tf.logging.info(f" Num iterations = {iterations}") tf.logging.info(f" Num steps = {num_train_steps}") tf.logging.info(f" Warm steps = {num_warmup_steps}") tf.logging.info(f" Warm frac = {opts['warmup']}") # Learning rate schedule lr_schedule_name = opts['lr_schedule'] logger.info(f"Using learning rate schedule {lr_schedule_name}") learning_rate_schedule = make_lr_schedule(lr_schedule_name, opts, iterations) if opts["do_training"]: log_iterations = opts['batches_per_step'] * opts["steps_per_logs"] # -------------- BUILD TRAINING GRAPH ---------------- opts['current_mode'] = 'train' train = build_graph(opts, iterations_per_step, is_training=True) train.session.run(train.init) train.session.run(train.iterator.initializer) # Checkpoints load and save init_checkpoint_path = opts['init_checkpoint'] if init_checkpoint_path: if os.path.isfile(init_checkpoint_path): init_checkpoint_path = os.path.splitext( init_checkpoint_path)[0] (assignment_map, initialized_variable_names ) = bert_ipu.get_assignment_map_from_checkpoint( train.tvars, init_checkpoint_path) for var in train.tvars: if var.name in initialized_variable_names: mark = "*" else: mark = " " logger.info("%-60s [%s]\t%s (%s)", var.name, mark, var.shape, var.dtype.name) reader = tf.train.NewCheckpointReader(init_checkpoint_path) load_vars = reader.get_variable_to_shape_map() saver_restore = tf.train.Saver(assignment_map) saver_restore.restore(train.session, init_checkpoint_path) if opts['steps_per_ckpts']: filepath = train.saver.save(train.session, opts["checkpoint_path"], global_step=0) logger.info(f"Saved checkpoint to {filepath}") ckpt_iterations = opts['batches_per_step'] * \ opts["steps_per_ckpts"] else: i = 0 # Tensorboard logs path log_path = os.path.join(opts["logs_path"], 'event') logger.info("Tensorboard event file path {}".format(log_path)) summary_writer = tf.summary.FileWriter(log_path, train.graph, session=train.session) start_time = datetime.datetime.now() # Training loop if opts['task_type'] == 'regression': print_format = ( "step: {step:6d}, iteration: {iteration:6d} ({percent_done:.3f}%), epoch: {epoch:6.2f}, lr: {lr:6.4g}, loss: {loss:6.3f}, pearson: {pearson:6.3f}, spearman: {spearman:6.3f}, " "throughput {throughput_samples_per_sec:6.2f} samples/sec, batch time: {avg_batch_time:8.6f} s, total_time: {total_time:8.1f} s" ) else: print_format = ( "step: {step:6d}, iteration: {iteration:6d} ({percent_done:.3f}%), epoch: {epoch:6.2f}, lr: {lr:6.4g}, loss: {loss:6.3f}, acc: {acc:6.3f}, " "throughput {throughput_samples_per_sec:6.2f} samples/sec, batch time: {avg_batch_time:8.6f} s, total_time: {total_time:8.1f} s" ) step = 0 start_all = time.time() i = 0 total_samples = len(train_examples) while i < iterations: step += 1 epoch = float(opts["total_batch_size"] * i) / total_samples learning_rate = learning_rate_schedule.get_at_step(step) try: if opts['task_type'] == 'regression': loss, pred, batch_time, pearson, spearman = training_step( train, learning_rate, i, opts) else: loss, batch_time, acc, mean_preds = training_step( train, learning_rate, i, opts) except tf.errors.OpError as e: raise tf.errors.ResourceExhaustedError(e.node_def, e.op, e.message) batch_time /= iterations_per_step avg_batch_time = batch_time if i % log_iterations == 0: throughput = opts['total_batch_size'] / avg_batch_time # flush times every time it is reported # batch_times.clear() total_time = time.time() - start_all if opts['task_type'] == 'regression': stats = OrderedDict([ ('step', step), ('iteration', i + iterations_per_step), ('percent_done', i / iterations * 100), ('epoch', epoch), ('lr', learning_rate), ('loss', loss), ('pearson', pearson), ('spearman', spearman), ('avg_batch_time', avg_batch_time), ('throughput_samples_per_sec', throughput), ('total_time', total_time), ('learning_rate', learning_rate) ]) else: stats = OrderedDict([ ('step', step), ('iteration', i + iterations_per_step), ('percent_done', i / iterations * 100), ('epoch', epoch), ('lr', learning_rate), ('loss', loss), ('acc', acc), ('avg_batch_time', avg_batch_time), ('throughput_samples_per_sec', throughput), ('total_time', total_time), ('learning_rate', learning_rate) ]) logger.info(print_format.format(**stats)) train_summary = tf.Summary() train_summary.value.add(tag='epoch', simple_value=epoch) train_summary.value.add(tag='loss', simple_value=loss) if opts['task_type'] == 'regression': train_summary.value.add(tag='pearson', simple_value=pearson) train_summary.value.add(tag='spearman', simple_value=spearman) else: train_summary.value.add(tag='acc', simple_value=acc) train_summary.value.add(tag='learning_rate', simple_value=learning_rate) train_summary.value.add(tag='througput', simple_value=throughput) if opts['wandb']: wandb.log(dict(stats)) summary_writer.add_summary(train_summary, step) summary_writer.flush() if i % ckpt_iterations == 0 and i > 1: filepath = train.saver.save(train.session, opts["checkpoint_path"], global_step=i + iterations_per_step) logger.info(f"Saved checkpoint to {filepath}") i += iterations_per_step # We save the final checkpoint finetuned_checkpoint_path = train.saver.save(train.session, opts["checkpoint_path"], global_step=i + iterations_per_step) logger.info(f"Saved checkpoint to {finetuned_checkpoint_path}") train.session.close() end_time = datetime.datetime.now() consume_time = (end_time - start_time).seconds logger.info(f"training times: {consume_time} s") if opts["do_eval"]: eval_examples = processor.get_dev_examples(opts["data_dir"]) num_actual_eval_examples = len(eval_examples) opts["eval_batch_size"] = opts['micro_batch_size'] * \ opts['gradient_accumulation_count'] eval_file = os.path.join(opts["output_dir"], "eval.tf_record") tf.logging.info("***** Running evaluation *****") tf.logging.info(" Num examples = %d (%d actual, %d padding)", len(eval_examples), num_actual_eval_examples, len(eval_examples) - num_actual_eval_examples) tf.logging.info(" Evaluate batch size = %d", opts["eval_batch_size"]) iterations_per_step = 1 opts['current_mode'] = 'eval' predict = build_graph(opts, iterations_per_step, is_training=False) predict.session.run(predict.init) predict.session.run(predict.iterator.initializer) if opts["init_checkpoint"] and not opts['do_training'] and opts[ 'do_eval']: finetuned_checkpoint_path = opts['init_checkpoint'] if finetuned_checkpoint_path: print("********** RESTORING FROM CHECKPOINT *************") (assignment_map, _initialized_variable_names ) = bert_ipu.get_assignment_map_from_checkpoint( predict.tvars, finetuned_checkpoint_path) saver_restore = tf.train.Saver(assignment_map) saver_restore.restore(predict.session, finetuned_checkpoint_path) print("Done.") i = 0 all_time_consumption = [] iterations = int( len(eval_examples) // (opts['micro_batch_size'] * opts['gradient_accumulation_count']) + 1) all_accs = [] all_pearson = [] all_spearman = [] all_loss = [] while i < iterations: try: start = time.time() tmp_output = predict_step(predict) if opts['task_type'] == 'regression': all_pearson.append(tmp_output['pearson']) all_spearman.append(tmp_output['spearman']) else: all_accs.append(tmp_output['acc']) all_loss.append(tmp_output['loss']) output_eval_file = os.path.join(opts['output_dir'], "eval_results.txt") duration = time.time() - start all_time_consumption.append(duration / opts["batches_per_step"]) except tf.errors.OpError as e: raise tf.errors.ResourceExhaustedError(e.node_def, e.op, e.message) i += iterations_per_step if len(all_loss) % 1000 == 0: logger.info(f"Procesing example: {len(all_loss)}") if opts['task_type'] == 'regression': tmp_output['average_pearson'] = np.mean(all_pearson) tmp_output['average_spearman'] = np.mean(all_spearman) else: tmp_output['average_acc'] = np.mean(all_accs) tmp_output['average_loss'] = np.mean(all_loss) with tf.gfile.GFile(output_eval_file, "w") as writer: tf.logging.info("***** Eval results *****") for key in sorted(tmp_output.keys()): tf.logging.info(" %s = %s", key, str(tmp_output[key])) writer.write("%s = %s\n" % (key, str(tmp_output[key]))) # The time consumption of First 10 steps is not stable for time measurement. if len(all_time_consumption) >= 10 * 2: all_time_consumption = np.array(all_time_consumption[10:]) else: logger.warning( f"if the first 10 steps is counted, the measurement of throughtput and latency is not accurate." ) all_time_consumption = np.array(all_time_consumption) logger.info(( f"inference throughput: { (opts['micro_batch_size'] * opts['gradient_accumulation_count'] ) / all_time_consumption.mean() } " f"exmples/sec - Latency: {all_time_consumption.mean()} {all_time_consumption.min()} " f"{all_time_consumption.max()} (mean min max) sec ")) # Done evaluations if opts["do_predict"]: predict_examples = processor.get_test_examples(opts["data_dir"]) num_actual_predict_examples = len(predict_examples) opts["predict_batch_size"] = opts['micro_batch_size'] * \ opts['gradient_accumulation_count'] tf.logging.info("***** Running prediction *****") tf.logging.info(" Num examples = %d (%d actual, %d padding)", len(predict_examples), num_actual_predict_examples, len(predict_examples) - num_actual_predict_examples) tf.logging.info(" Predict batch size = %d", opts["predict_batch_size"]) iterations_per_step = 1 opts['current_mode'] = 'predict' prediction = build_graph(opts, iterations_per_step, is_training=False) prediction.session.run(prediction.init) prediction.session.run(prediction.iterator.initializer) if opts["init_checkpoint"] and not opts['do_training'] and opts[ 'do_predict']: finetuned_checkpoint_path = opts['init_checkpoint'] else: finetuned_checkpoint_path = False if finetuned_checkpoint_path: print("********** RESTORING FROM CHECKPOINT *************") (assignment_map, _initialized_variable_names ) = bert_ipu.get_assignment_map_from_checkpoint( prediction.tvars, finetuned_checkpoint_path) saver_restore = tf.train.Saver(assignment_map) saver_restore.restore(prediction.session, finetuned_checkpoint_path) print("Done.") all_results = [] i = 0 all_time_consumption = [] iterations = int( len(predict_examples) // (opts['micro_batch_size'] * opts['gradient_accumulation_count']) + 1) all_preds = [] while i < iterations: try: start = time.time() tmp_output = predict_step(prediction) all_preds.append(tmp_output['preds']) output_predict_file = os.path.join(opts['output_dir'], "predict_results.txt") duration = time.time() - start all_time_consumption.append(duration / opts["batches_per_step"]) except tf.errors.OpError as e: raise tf.errors.ResourceExhaustedError(e.node_def, e.op, e.message) i += iterations_per_step all_preds = np.array(all_preds) all_preds = all_preds.flatten() headers = ["index", "prediction"] name_list = ["mnli", "mnli-mm", "ax", "qnli", "rte"] if task_name in name_list: all_preds = glue_data.get_output_labels(opts, all_preds) with tf.gfile.GFile(output_predict_file, "w") as writer: tf.logging.info("***** Predict results writing*****") for i in range(len(predict_examples)): if i == 0: writer.write("%s\t%s\n" % (str(headers[0]), str(headers[1]))) output_line = "%s\t%s\n" % (i, all_preds[i]) writer.write(output_line)
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) unique_ids = features["unique_ids"] input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (start_logits, end_logits) = create_model( bert_config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: seq_length = modeling.get_shape_list(input_ids)[1] def compute_loss(logits, positions): one_hot_positions = tf.one_hot( positions, depth=seq_length, dtype=tf.float32) log_probs = tf.nn.log_softmax(logits, axis=-1) loss = -tf.reduce_mean( tf.reduce_sum(one_hot_positions * log_probs, axis=-1)) return loss start_positions = features["start_positions"] end_positions = features["end_positions"] start_loss = compute_loss(start_logits, start_positions) end_loss = compute_loss(end_logits, end_positions) total_loss = (start_loss + end_loss) / 2.0 train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.PREDICT: predictions = { "unique_ids": unique_ids, "start_logits": start_logits, "end_logits": end_logits, } output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) else: raise ValueError( "Only TRAIN and PREDICT modes are supported: %s" % (mode)) return output_spec
def model_fn(features, labels, mode, params): tf.compat.v1.logging.info("*** Features ***") for name in sorted(features.keys()): tf.compat.v1.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_real_example = None if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits, probabilities) = create_model(bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint and (hvd is None or hvd.rank() == 0): (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.compat.v1.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.compat.v1.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, hvd, False, use_fp16) def metric_fn(per_example_loss, label_ids, logits, is_real_example): predictions = tf.argmax(logits, axis=-1, output_type=tf.int64) accuracy = tf.compat.v1.metrics.accuracy( labels=label_ids, predictions=predictions, weights=is_real_example) return {"eval_accuracy": accuracy} eval_metric_ops = metric_fn(per_example_loss, label_ids, logits, is_real_example) output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, eval_metric_ops=eval_metric_ops) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, logits, is_real_example): predictions = tf.argmax(logits, axis=-1, output_type=tf.int64) accuracy = tf.compat.v1.metrics.accuracy( labels=label_ids, predictions=predictions, weights=is_real_example) loss = tf.compat.v1.metrics.mean(values=per_example_loss, weights=is_real_example) #recall = tf.compat.v1.metrics.recall(label_ids,predictions,num_labels) recall, op_rec = tf.compat.v1.metrics.recall( labels=label_ids, predictions=predictions, weights=is_real_example) #precision = tf.compat.v1.metrics.precision(label_ids,predictions,num_labels) precision, op_prec = tf.compat.v1.metrics.precision( labels=label_ids, predictions=predictions, weights=is_real_example) #f = tf_metrics.f1(label_ids,predictions,num_labels) FN = tf.metrics.false_negatives(labels=label_ids, predictions=predictions) FP = tf.metrics.false_positives(labels=label_ids, predictions=predictions) TP = tf.metrics.true_positives(labels=label_ids, predictions=predictions) TN = tf.metrics.true_negatives(labels=label_ids, predictions=predictions) #MCC = (TP * TN - FP * FN) / ((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN)) ** 0.5 #MCC_op = tf.group(FN_op, TN_op, TP_op, FP_op, tf.identity(MCC, name="MCC")) f1 = 2 * (precision * recall) / (precision + recall) f1_op = tf.group(op_rec, op_prec, tf.identity(f1, name="f1")) return { "eval_accuracy": accuracy, "eval_loss": loss, "recall": (recall, op_rec), "precision": (precision, op_prec), "f-score": (f1, f1_op), "tp": TP, "tn": TN, "fp": FP, "fn": FN, #"MCC": (MCC, MCC_op) } #return { # "eval_accuracy": accuracy, # "eval_loss": loss, #} eval_metric_ops = metric_fn(per_example_loss, label_ids, logits, is_real_example) output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, eval_metric_ops=eval_metric_ops) else: output_spec = tf.estimator.EstimatorSpec(mode=mode, predictions={ "probabilities": probabilities, "logits": logits }) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_real_example = None if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits, probabilities) = create_model( bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, logits, is_real_example): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) accuracy = tf.metrics.accuracy( labels=label_ids, predictions=predictions, weights=is_real_example) loss = tf.metrics.mean(values=per_example_loss, weights=is_real_example) return { "eval_accuracy": accuracy, "eval_loss": loss, } eval_metrics = (metric_fn, [per_example_loss, label_ids, logits, is_real_example]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions={"probabilities": probabilities}, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument from tensorflow.python.estimator.model_fn import EstimatorSpec tf.compat.v1.logging.info("*** Features ***") for name in sorted(features.keys()): tf.compat.v1.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits, probabilities) = BertSim.create_model(bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.compat.v1.trainable_variables() initialized_variable_names = {} if init_checkpoint: (assignment_map, initialized_variable_names) \ = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) tf.compat.v1.train.init_from_checkpoint( init_checkpoint, assignment_map) tf.compat.v1.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.compat.v1.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, False) output_spec = EstimatorSpec(mode=mode, loss=total_loss, train_op=train_op) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, logits): predictions = tf.argmax(input=logits, axis=-1, output_type=tf.int32) accuracy = tf.compat.v1.metrics.accuracy( label_ids, predictions) auc = tf.compat.v1.metrics.auc(label_ids, predictions) loss = tf.compat.v1.metrics.mean(per_example_loss) return { "eval_accuracy": accuracy, "eval_auc": auc, "eval_loss": loss, } eval_metrics = metric_fn(per_example_loss, label_ids, logits) output_spec = EstimatorSpec(mode=mode, loss=total_loss, eval_metric_ops=eval_metrics) else: output_spec = EstimatorSpec(mode=mode, predictions=probabilities) return output_spec