def add_init_checkpoint(init_checkpoint, scope=None): if not init_checkpoint: return (assignment_map, initialized_variables ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint, scope=scope) initialized_variable_names.update(initialized_variables.keys()) init_from_checkpoints.append((init_checkpoint, assignment_map))
def model_fn(features, labels, mode, params): """The `model_fn` for TPUEstimator.""" del labels, params # Unused. tf.logging.info("*** Features ***") for name in sorted(features): tf.logging.info(" name = %s, shape = %s", name, features[name].shape) masked_lm_positions = features["masked_lm_positions"] masked_lm_ids = features["masked_lm_ids"] masked_lm_weights = features["masked_lm_weights"] next_sentence_labels = features["next_sentence_labels"] model = table_bert.create_model( features=features, mode=mode, bert_config=bert_config, restrict_attention_mode=restrict_attention_mode, restrict_attention_bucket_size=restrict_attention_bucket_size, restrict_attention_header_size=restrict_attention_header_size, restrict_attention_row_heads_ratio= restrict_attention_row_heads_ratio, disabled_features=disabled_features, disable_position_embeddings=disable_position_embeddings, reset_position_index_per_cell=reset_position_index_per_cell, proj_value_length=proj_value_length, attention_bias_disabled=attention_bias_disabled, attention_bias_use_relative_scalar_only= attention_bias_use_relative_scalar_only, ) (masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs, masked_lm_predictions) = _get_masked_lm_output( bert_config, model.get_sequence_output(), model.get_embedding_table(), masked_lm_positions, masked_lm_ids, masked_lm_weights) (next_sentence_loss, next_sentence_example_loss, next_sentence_log_probs) = _get_next_sentence_output( bert_config, model.get_pooled_output(), next_sentence_labels) total_loss = masked_lm_loss + next_sentence_loss tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: init_tvars = [ tvar for tvar in tvars if "position_embeddings" not in tvar.name ] (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( init_tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf_estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf_estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf_estimator.ModeKeys.EVAL: def metric_fn(masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, next_sentence_example_loss, next_sentence_log_probs, next_sentence_labels): """Computes the loss and accuracy of the model.""" masked_lm_log_probs = tf.reshape( masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]]) masked_lm_predictions = tf.argmax(masked_lm_log_probs, axis=-1, output_type=tf.int32) masked_lm_example_loss = tf.reshape(masked_lm_example_loss, [-1]) masked_lm_ids = tf.reshape(masked_lm_ids, [-1]) masked_lm_weights = tf.reshape(masked_lm_weights, [-1]) masked_lm_accuracy = tf.metrics.accuracy( labels=masked_lm_ids, predictions=masked_lm_predictions, weights=masked_lm_weights) masked_lm_mean_loss = tf.metrics.mean( values=masked_lm_example_loss, weights=masked_lm_weights) next_sentence_log_probs = tf.reshape( next_sentence_log_probs, [-1, next_sentence_log_probs.shape[-1]]) next_sentence_predictions = tf.argmax(next_sentence_log_probs, axis=-1, output_type=tf.int32) next_sentence_labels = tf.reshape(next_sentence_labels, [-1]) next_sentence_accuracy = tf.metrics.accuracy( labels=next_sentence_labels, predictions=next_sentence_predictions) next_sentence_mean_loss = tf.metrics.mean( values=next_sentence_example_loss) return { "masked_lm_accuracy": masked_lm_accuracy, "masked_lm_loss": masked_lm_mean_loss, "next_sentence_accuracy": next_sentence_accuracy, "next_sentence_loss": next_sentence_mean_loss, } eval_metrics = (metric_fn, [ masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, next_sentence_example_loss, next_sentence_log_probs, next_sentence_labels ]) output_spec = tf_estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) elif mode == tf_estimator.ModeKeys.PREDICT: predictions = { "masked_lm_predictions": masked_lm_predictions, } output_spec = tf_estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, predictions=predictions, scaffold_fn=scaffold_fn) else: raise ValueError("Unsupported mode: %s" % mode) return output_spec
def model_fn(features, labels, mode, params): """The `model_fn` for TPUEstimator.""" del labels # Unused. tf.logging.info("*** Features ***") for name in sorted(features): tf.logging.info(" name = %s, shape = %s", name, features[name].shape) label_ids = features["label_ids"] input_mask = features["input_mask"] row_ids = features["row_ids"] column_ids = features["column_ids"] # Table cells only, without question tokens and table headers. table_mask = tf.where(row_ids > 0, tf.ones_like(row_ids), tf.zeros_like(row_ids)) do_model_aggregation = config.num_aggregation_labels > 0 aggregation_function_id = (tf.squeeze( features["aggregation_function_id"], axis=[1]) if do_model_aggregation else None) do_model_classification = config.num_classification_labels > 0 classification_class_index = (tf.squeeze( features["classification_class_index"], axis=[1]) if do_model_classification else None) is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = table_bert.create_model( features=features, mode=mode, bert_config=config.bert_config, disabled_features=config.disabled_features, disable_position_embeddings=config.disable_position_embeddings) if config.use_answer_as_supervision: answer = tf.squeeze(features["answer"], axis=[1]) numeric_values = features["numeric_values"] numeric_values_scale = features["numeric_values_scale"] else: answer = None numeric_values = None numeric_values_scale = None (total_loss, logits, logits_aggregation, probabilities, logits_cls) = _get_classification_outputs( config=config, output_layer=model.get_sequence_output(), output_layer_aggregation=model.get_pooled_output(), label_ids=label_ids, input_mask=input_mask, table_mask=table_mask, aggregation_function_id=aggregation_function_id, answer=answer, numeric_values=numeric_values, numeric_values_scale=numeric_values_scale, is_training=is_training, row_ids=row_ids, column_ids=column_ids, classification_class_index=classification_class_index) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None init_checkpoint = config.init_checkpoint if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if config.use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( total_loss, config.learning_rate, config.num_train_steps, config.num_warmup_steps, config.use_tpu, gradient_accumulation_steps=params.get( "gradient_accumulation_steps", 1), grad_clipping=config.grad_clipping) output_spec = tf.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: eval_metrics = (_calculate_eval_metrics_fn, [ total_loss, label_ids, logits, input_mask, aggregation_function_id, logits_aggregation, classification_class_index, logits_cls ]) output_spec = tf.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: predictions = { "embeddings": model.get_sequence_output(), "probabilities": probabilities, "column_ids": features["column_ids"], "row_ids": features["row_ids"], "segment_ids": features["segment_ids"], "question_id_ints": features["question_id_ints"], } # TODO Remove once the data has been updated. if "question_id" in features: # Only available when predicting on GPU. predictions["question_id"] = features["question_id"] if do_model_aggregation: predictions.update({ "gold_aggr": features["aggregation_function_id"], "pred_aggr": tf.argmax(logits_aggregation, axis=-1, output_type=tf.int32) }) if do_model_classification: predictions.update({ "gold_cls": features["classification_class_index"], "pred_cls": tf.argmax(logits_cls, axis=-1, output_type=tf.int32) }) if config.num_classification_labels == 2: predictions.update( {"logits_cls": logits_cls[:, 1] - logits_cls[:, 0]}) else: predictions.update({"logits_cls": logits_cls}) output_spec = tf.estimator.tpu.TPUEstimatorSpec( mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec