def model_fn(features, labels, mode, params): # pylint: disable=unused-argument tf_logging.info("model_fn_ranking") """The `model_fn` for TPUEstimator.""" log_features(features) input_ids, input_mask, segment_ids = combine_paired_input_features(features) is_training = (mode == tf.estimator.ModeKeys.TRAIN) # Updated model = BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=train_config.use_one_hot_embeddings, ) pooled_output = model.get_pooled_output() if is_training: pooled_output = dropout(pooled_output, 0.1) loss, losses, y_pred = apply_loss_modeling(modeling_opt, pooled_output, features) assignment_fn = assignment_map.get_bert_assignment_map scaffold_fn = checkpoint_init(assignment_fn, train_config) optimizer_factory = lambda x: create_optimizer_from_config(x, train_config) input_ids1 = tf.identity(features["input_ids1"]) input_ids2 = tf.identity(features["input_ids2"]) prediction = { "input_ids1": input_ids1, "input_ids2": input_ids2 } return ranking_estimator_spec(mode, loss, losses, y_pred, scaffold_fn, optimizer_factory, prediction)
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" output_weights = tf.compat.v1.get_variable( "output_weights", [4, 4], initializer=tf.compat.v1.truncated_normal_initializer(stddev=0.02) ) loss = tf.reduce_sum(output_weights) tvars = tf.compat.v1.trainable_variables() initialized_variable_names = {} scaffold_fn = None log_var_assignments(tvars, initialized_variable_names) TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer_from_config(loss, train_config, tvars) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: output_spec = NotImplemented else: output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, predictions={}, scaffold_fn=scaffold_fn) return output_spec
def get_training_spec(loss, mode, train_config, scaffold_fn): train_op = optimization.create_optimizer_from_config(loss, train_config) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=loss, train_op=train_op, scaffold_fn=scaffold_fn, ) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf_logging.info("*** Features ***") for name in sorted(features.keys()): tf_logging.info(" name = %s, shape = %s" % (name, features[name].shape)) label_ids = features["label_ids"] label_ids = tf.reshape(label_ids, [-1]) if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = model_class( config=model_config, is_training=is_training, use_one_hot_embeddings=train_config.use_one_hot_embeddings, features=features, ) logits = model.get_logits() loss = model.get_loss(label_ids) tvars = tf.compat.v1.trainable_variables() initialized_variable_names = {} scaffold_fn = None if train_config.init_checkpoint: initialized_variable_names, init_fn = get_init_fn(train_config, tvars) scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu) log_var_assignments(tvars, initialized_variable_names) TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: tvars = None train_op = optimization.create_optimizer_from_config(loss, train_config, tvars) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: eval_metrics = (classification_metric_fn, [ logits, label_ids, is_real_example ]) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: predictions = { "label_ids": label_ids, "logits": logits, } if "data_id" in features: predictions['data_id'] = features['data_id'] output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument tf_logging.info("model_fn_ranking") log_features(features) input_ids, input_mask, segment_ids = combine_paired_input_features( features) batch_size, _ = get_shape_list( input_mask) # This is not real batch_size, 2 * real_batch_size use_context = tf.ones([batch_size, 1], tf.int32) stacked_input_ids, stacked_input_mask, stacked_segment_ids, \ = split_and_append_sep(input_ids, input_mask, segment_ids, config.total_sequence_length, config.window_size, CLS_ID, EOW_ID) is_training = (mode == tf.estimator.ModeKeys.TRAIN) with tf.compat.v1.variable_scope("sero"): model = model_class(config, is_training, train_config.use_one_hot_embeddings) sequence_output_3d = model.network_stacked(stacked_input_ids, stacked_input_mask, stacked_segment_ids, use_context) pooled_output = model.get_pooled_output() if is_training: pooled_output = dropout(pooled_output, 0.1) loss, losses, y_pred = apply_loss_modeling(config.loss, pooled_output, features) assignment_fn = get_assignment_map_from_checkpoint_type( train_config.checkpoint_type, config.lower_layers) scaffold_fn = checkpoint_init(assignment_fn, train_config) prediction = { "stacked_input_ids": stacked_input_ids, "stacked_input_mask": stacked_input_mask, "stacked_segment_ids": stacked_segment_ids, } if train_config.gradient_accumulation != 1: optimizer_factory = lambda x: grad_accumulation.get_accumulated_optimizer_from_config( x, train_config, tf.compat.v1.trainable_variables(), train_config.gradient_accumulation) else: optimizer_factory = lambda x: create_optimizer_from_config( x, train_config) return ranking_estimator_spec(mode, loss, losses, y_pred, scaffold_fn, optimizer_factory, prediction)
def define_graph(input_ids, input_mask, segment_ids): train_config = TrainConfigEx.from_flags(FLAGS) config = JsonConfig.from_json_file(FLAGS.model_config_file) model = HorizontalAlpha(config, True, False) model.call(input_ids, input_mask, segment_ids) masked_input_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights \ = random_masking(input_ids, input_mask, train_config.max_predictions_per_seq, MASK_ID, 0) (masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output_albert( config, model.get_sequence_output(), model.get_embedding_table(), masked_lm_positions, masked_lm_ids, masked_lm_weights) train_op = optimization.create_optimizer_from_config( masked_lm_loss, train_config) return train_op
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" is_training = (mode == tf.estimator.ModeKeys.TRAIN) output_weights = tf.compat.v1.get_variable( "output_weights", [10, 100], initializer=tf.compat.v1.truncated_normal_initializer(stddev=0.02)) logits = output_weights[:, 1] loss = tf.reduce_mean(output_weights) loss2 = tf.reduce_mean(tf.square(output_weights)) loss2_metric_val, loss2_metric_op = tf.compat.v1.metrics.mean(loss2) tf.compat.v1.summary.scalar("loss2", loss2) scaffold_fn = None TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec if mode == tf.estimator.ModeKeys.TRAIN: tvars = None train_op = optimization.create_optimizer_from_config( loss, train_config, tvars) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: eval_metrics = (metric_fn, [logits]) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: predictions = { "logits": logits, } if "data_id" in features: predictions['data_id'] = features['data_id'] output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf_logging.info("*** Features ***") for name in sorted(features.keys()): tf_logging.info(" name = %s, shape = %s" % (name, features[name].shape)) q_input_ids_1 = features["q_input_ids_1"] q_input_mask_1 = features["q_input_mask_1"] d_input_ids_1 = features["d_input_ids_1"] d_input_mask_1 = features["d_input_mask_1"] q_input_ids_2 = features["q_input_ids_2"] q_input_mask_2 = features["q_input_mask_2"] d_input_ids_2 = features["d_input_ids_2"] d_input_mask_2 = features["d_input_mask_2"] q_input_ids = tf.stack([q_input_ids_1, q_input_ids_2], axis=0) q_input_mask = tf.stack([q_input_mask_1, q_input_mask_2], axis=0) q_segment_ids = tf.zeros_like(q_input_ids, tf.int32) d_input_ids = tf.stack([d_input_ids_1, d_input_ids_2], axis=0) d_input_mask = tf.stack([d_input_mask_1, d_input_mask_2], axis=0) d_segment_ids = tf.zeros_like(d_input_ids, tf.int32) label_ids = features["label_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) with tf.compat.v1.variable_scope("query"): model_q = model_class( config=model_config, is_training=is_training, input_ids=q_input_ids, input_mask=q_input_mask, token_type_ids=q_segment_ids, use_one_hot_embeddings=train_config.use_one_hot_embeddings, ) with tf.compat.v1.variable_scope("document"): model_d = model_class( config=model_config, is_training=is_training, input_ids=d_input_ids, input_mask=d_input_mask, token_type_ids=d_segment_ids, use_one_hot_embeddings=train_config.use_one_hot_embeddings, ) pooled_q = model_q.get_pooled_output() pooled_d = model_d.get_pooled_output() logits = tf.matmul(pooled_q, pooled_d, transpose_b=True) y = tf.cast(label_ids, tf.float32) * 2 - 1 losses = tf.maximum(1.0 - logits * y, 0) loss = tf.reduce_mean(losses) pred = tf.cast(logits > 0, tf.int32) tvars = tf.compat.v1.trainable_variables() initialized_variable_names = {} scaffold_fn = None if train_config.init_checkpoint: initialized_variable_names, init_fn = get_init_fn( train_config, tvars) scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu) log_var_assignments(tvars, initialized_variable_names) TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: if "simple_optimizer" in special_flags: tf_logging.info("using simple optimizer") train_op = create_simple_optimizer(loss, train_config.learning_rate, train_config.use_tpu) else: train_op = optimization.create_optimizer_from_config( loss, train_config, tvars) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: eval_metrics = (classification_metric_fn, [pred, label_ids, is_real_example]) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: predictions = { "q_input_ids": q_input_ids, "d_input_ids": d_input_ids, "score": logits } useful_inputs = ["data_id", "input_ids2", "data_ids"] for input_name in useful_inputs: if input_name in features: predictions[input_name] = features[input_name] output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf_logging.info("*** Features ***") for name in sorted(features.keys()): tf_logging.info(" name = %s, shape = %s" % (name, features[name].shape)) q_input_ids = features["q_input_ids"] q_input_mask = features["q_input_mask"] d_input_ids = features["d_input_ids"] d_input_mask = features["d_input_mask"] input_shape = get_shape_list(q_input_ids, expected_rank=2) batch_size = input_shape[0] doc_length = model_config.max_doc_length num_docs = model_config.num_docs d_input_ids_unpacked = tf.reshape(d_input_ids, [-1, num_docs, doc_length]) d_input_mask_unpacked = tf.reshape(d_input_mask, [-1, num_docs, doc_length]) d_input_ids_flat = tf.reshape(d_input_ids_unpacked, [-1, doc_length]) d_input_mask_flat = tf.reshape(d_input_mask_unpacked, [-1, doc_length]) q_segment_ids = tf.zeros_like(q_input_ids, tf.int32) d_segment_ids = tf.zeros_like(d_input_ids_flat, tf.int32) label_ids = features["label_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) with tf.compat.v1.variable_scope(dual_model_prefix1): q_model_config = copy.deepcopy(model_config) q_model_config.max_seq_length = model_config.max_sent_length model_q = model_class( config=model_config, is_training=is_training, input_ids=q_input_ids, input_mask=q_input_mask, token_type_ids=q_segment_ids, use_one_hot_embeddings=train_config.use_one_hot_embeddings, ) with tf.compat.v1.variable_scope(dual_model_prefix2): d_model_config = copy.deepcopy(model_config) d_model_config.max_seq_length = model_config.max_doc_length model_d = model_class( config=model_config, is_training=is_training, input_ids=d_input_ids_flat, input_mask=d_input_mask_flat, token_type_ids=d_segment_ids, use_one_hot_embeddings=train_config.use_one_hot_embeddings, ) pooled_q = model_q.get_pooled_output() # [batch, vector_size] pooled_d_flat = model_d.get_pooled_output( ) # [batch, num_window, vector_size] pooled_d = tf.reshape(pooled_d_flat, [batch_size, num_docs, -1]) pooled_q_t = tf.expand_dims(pooled_q, 1) pooled_d_t = tf.transpose(pooled_d, [0, 2, 1]) all_logits = tf.matmul(pooled_q_t, pooled_d_t) # [batch, 1, num_window] if "hinge_all" in special_flags: apply_loss_modeing = hinge_all elif "sigmoid_all" in special_flags: apply_loss_modeing = sigmoid_all else: apply_loss_modeing = hinge_max logits, loss = apply_loss_modeing(all_logits, label_ids) pred = tf.cast(logits > 0, tf.int32) tvars = tf.compat.v1.trainable_variables() initialized_variable_names = {} scaffold_fn = None if train_config.init_checkpoint: initialized_variable_names, init_fn = get_init_fn( train_config, tvars) scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu) log_var_assignments(tvars, initialized_variable_names) TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: if "simple_optimizer" in special_flags: tf_logging.info("using simple optimizer") train_op = create_simple_optimizer(loss, train_config.learning_rate, train_config.use_tpu) else: train_op = optimization.create_optimizer_from_config( loss, train_config, tvars) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: eval_metrics = (classification_metric_fn, [pred, label_ids, is_real_example]) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: predictions = { "q_input_ids": q_input_ids, "d_input_ids": d_input_ids, "logits": logits } useful_inputs = ["data_id", "input_ids2", "data_ids"] for input_name in useful_inputs: if input_name in features: predictions[input_name] = features[input_name] output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf_logging.info("*** Features ***") for name in sorted(features.keys()): tf_logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] if mode == tf.estimator.ModeKeys.PREDICT: label_ids = tf.ones([input_ids.shape[0]], dtype=tf.int32) else: label_ids = features["label_ids"] label_ids = tf.reshape(label_ids, [-1]) if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) input_ids2 = features["input_ids2"] input_mask2 = features["input_mask2"] segment_ids2 = features["segment_ids2"] with tf.compat.v1.variable_scope(dual_model_prefix1): model_1 = BertModel( config=model_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=train_config.use_one_hot_embeddings, ) pooled = model_1.get_pooled_output() if is_training: pooled = dropout(pooled, 0.1) logits = tf.keras.layers.Dense(train_config.num_classes, name="cls_dense")(pooled) with tf.compat.v1.variable_scope(dual_model_prefix2): model_2 = BertModel( config=model_config, is_training=is_training, input_ids=input_ids2, input_mask=input_mask2, token_type_ids=segment_ids2, use_one_hot_embeddings=train_config.use_one_hot_embeddings, ) pooled = model_2.get_pooled_output() if is_training: pooled = dropout(pooled, 0.1) conf_probs = tf.keras.layers.Dense( train_config.num_classes, name="cls_dense", activation=tf.keras.activations.softmax)(pooled) confidence = conf_probs[:, 1] confidence_loss = 1 - confidence cls_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=label_ids) k = model_config.k alpha = model_config.alpha loss_arr = cls_loss * confidence + confidence_loss * k loss_arr = apply_weighted_loss(loss_arr, label_ids, alpha) loss = tf.reduce_mean(input_tensor=loss_arr) tvars = tf.compat.v1.trainable_variables() initialized_variable_names = {} scaffold_fn = None if train_config.init_checkpoint: initialized_variable_names, init_fn = get_init_fn( train_config, tvars) scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu) log_var_assignments(tvars, initialized_variable_names) TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec def metric_fn(log_probs, label, is_real_example, confidence): r = classification_metric_fn(log_probs, label, is_real_example) r['confidence'] = tf.compat.v1.metrics.mean(confidence) return r output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: tvars = None train_op = optimization.create_optimizer_from_config( loss, train_config, tvars) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: eval_metrics = (metric_fn, [logits, label_ids, is_real_example, confidence]) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: predictions = { "input_ids": input_ids, "logits": logits, "confidence": confidence, } if "data_id" in features: predictions['data_id'] = features['data_id'] output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf_logging.info("*** Features ***") for name in sorted(features.keys()): tf_logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] label_ids = tf.reshape(label_ids, [-1]) if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = model_class( config=model_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=train_config.use_one_hot_embeddings, features=features, ) probs = model.get_prob() logits = probs epsilon = 1e-12 # probs_ad = tf.clip_by_value(probs, epsilon, 1.0 - epsilon) # logits1 = tf.math.log(probs_ad) # logits0 = tf.zeros_like(logits1) # logits = tf.stack([logits0, logits1], axis=1) # prob2 = tf.nn.softmax(logits, axis=1) # prob_err = prob2[:, 1] - probs y_true = tf.cast(label_ids, tf.float32) loss_arr = tf.keras.losses.BinaryCrossentropy()(y_true, probs) loss = tf.reduce_mean(input_tensor=loss_arr) tvars = tf.compat.v1.trainable_variables() initialized_variable_names = {} scaffold_fn = None if train_config.init_checkpoint: initialized_variable_names, init_fn = get_init_fn(train_config, tvars) scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu) log_var_assignments(tvars, initialized_variable_names) TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: tvars = None train_op = optimization.create_optimizer_from_config(loss, train_config, tvars) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: eval_metrics = (classification_metric_fn, [ logits, label_ids, is_real_example ]) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: predictions = { "input_ids": input_ids, "label_ids": label_ids, "logits": logits, "score1": model.score1, "score2": model.score2, # "prob_err": prob_err, } if "data_id" in features: predictions['data_id'] = features['data_id'] output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" log_features(features) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] # # # d_input_ids, d_input_mask, d_segment_ids, d_location_ids, ab_mapping, ab_mapping_mask \ # = get_dummy_apr_input(input_ids, input_mask, # dict_run_config.def_per_batch, # dict_run_config.inner_batch_size, # ssdr_config.max_loc_length, # dict_run_config.max_def_length) is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = APR(input_ids, input_mask, segment_ids, is_training, train_config.use_one_hot_embeddings, bert_config, ssdr_config, dict_run_config.def_per_batch, dict_run_config.inner_batch_size, dict_run_config.max_def_length) # # model = model_class( # config=bert_config, # ssdr_config=ssdr_config, # is_training=is_training, # input_ids=input_ids, # input_mask=input_mask, # token_type_ids=segment_ids, # d_input_ids=d_input_ids, # d_input_mask=d_input_mask, # d_segment_ids=d_segment_ids, # d_location_ids=d_location_ids, # ab_mapping=ab_mapping, # ab_mapping_mask=ab_mapping_mask, # use_one_hot_embeddings=train_config.use_one_hot_embeddings, # ) task = Classification(3, features, model.get_pooled_output(), is_training) loss = task.loss tvars = tf.compat.v1.trainable_variables() assignment_fn = tlm.training.assignment_map.get_assignment_map_as_is initialized_variable_names, init_fn = get_init_fn( tvars, train_config.init_checkpoint, assignment_fn) scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu) log_var_assignments(tvars, initialized_variable_names) output_spec = None TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec if mode == tf.estimator.ModeKeys.TRAIN: if ssdr_config.compare_attrib_value_safe("use_two_lr", True): tf_logging.info("Using two lr for each parts") train_op = create_optimizer_with_separate_lr( loss, train_config) else: tf_logging.info("Using single lr ") train_op = optimization.create_optimizer_from_config( loss, train_config) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: output_spec = TPUEstimatorSpec(mode=model, loss=loss, eval_metrics=task.eval_metrics(), scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.PREDICT: output_spec = TPUEstimatorSpec(mode=model, loss=loss, predictions={"loss": task.loss_arr}, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf_logging.info("*** Features ***") for name in sorted(features.keys()): tf_logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] if mode == tf.estimator.ModeKeys.PREDICT: label_ids = tf.ones([input_ids.shape[0]], dtype=tf.float32) else: label_ids = features["label_ids"] label_ids = tf.reshape(label_ids, [-1]) if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = BertModel( config=model_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=train_config.use_one_hot_embeddings, ) pooled = model.get_pooled_output() if is_training: pooled = dropout(pooled, 0.1) logits = tf.keras.layers.Dense(train_config.num_classes, name="cls_dense")(pooled) scale = model_config.scale label_ids = scale * label_ids weight = tf.abs(label_ids) loss_arr = tf.keras.losses.MAE(y_true=label_ids, y_pred=logits) loss_arr = loss_arr * weight loss = tf.reduce_mean(input_tensor=loss_arr) tvars = tf.compat.v1.trainable_variables() initialized_variable_names = {} scaffold_fn = None if train_config.init_checkpoint: initialized_variable_names, init_fn = get_init_fn(train_config, tvars) scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu) log_var_assignments(tvars, initialized_variable_names) TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec def metric_fn(logits, label, is_real_example): mae = tf.compat.v1.metrics.mean_absolute_error( labels=label, predictions=logits, weights=is_real_example) return { "mae": mae } output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: tvars = None train_op = optimization.create_optimizer_from_config(loss, train_config, tvars) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: eval_metrics = (metric_fn, [ logits, label_ids, is_real_example ]) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: predictions = { "input_ids": input_ids, "logits": logits, } if "data_id" in features: predictions['data_id'] = features['data_id'] output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf_logging.info("*** Features ***") for name in sorted(features.keys()): tf_logging.info("name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] loss_base = features["loss_base"] loss_target = features["loss_target"] masked_lm_positions = features["masked_lm_positions"] masked_lm_ids = features["masked_lm_ids"] masked_lm_weights = features["masked_lm_weights"] input_ids = recover_mask(input_ids, masked_lm_positions, masked_lm_ids) is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = model_class( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=train_config.use_one_hot_embeddings, ) if model_config.loss_model == "independent": loss_model = IndependentLossModel(bert_config) loss_model.train_modeling(model.get_sequence_output(), masked_lm_positions, masked_lm_weights, loss_base, loss_target) total_loss = loss_model.total_loss loss1 = loss_model.loss1 loss2 = loss_model.loss2 per_example_loss1 = loss_model.per_example_loss1 per_example_loss2 = loss_model.per_example_loss2 losses1 = tf.reduce_sum(per_example_loss1, axis=1) losses2 = tf.reduce_sum(per_example_loss2, axis=1) prob1 = loss_model.prob1 prob2 = loss_model.prob2 def host_call_fn(total_loss, loss1, loss2): tf.summary.scalar("total_loss", total_loss[0]) tf.summary.scalar("loss_base", loss1[0]) tf.summary.scalar("loss_target", loss2[0]) return tf.compat.v1.summary.all_v2_summary_ops() host_call = (host_call_fn, [ tf.reshape(total_loss, [1]), tf.reshape(loss1, [1]), tf.reshape(loss2, [1]) ]) elif model_config.loss_model == "diff_regression": total_loss, losses, logits = get_diff_loss( bert_config, model.get_sequence_output(), masked_lm_positions, masked_lm_weights, loss_base, loss_target) host_call = None pred_diff = prob1 - prob2 gold_diff = get_gold_diff(loss_base, loss_target) tvars = tf.compat.v1.trainable_variables() initialized_variable_names = {} scaffold_fn = None if train_config.init_checkpoint: assignment_map, initialized_variable_names = get_bert_assignment_map( tvars, train_config.init_checkpoint) if train_config.use_tpu: def tpu_scaffold(): tf.compat.v1.train.init_from_checkpoint( train_config.init_checkpoint, assignment_map) return tf.compat.v1.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.compat.v1.tain.init_from_checkpoint( train_config.init_checkpoint, assignment_map) tf_logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf_logging.info("name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer_from_config( total_loss, train_config) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, host_call=host_call, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss1, per_example_loss2): loss1 = tf.compat.v1.metrics.mean(values=per_example_loss1) loss2 = tf.compat.v1.metrics.mean(values=per_example_loss2) pel = per_example_loss1 + per_example_loss2 return { # "eval_loss": loss, "loss1": loss1, "loss2": loss2, } eval_metrics = (metric_fn, [losses1, losses2]) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: predictions = { "loss_base": loss_base, "loss_target": loss_target, "prob1": prob1, "prob2": prob2, "per_example_loss1": per_example_loss1, "per_example_loss2": per_example_loss2, "input_ids": input_ids, "masked_lm_positions": masked_lm_positions, "pred_diff": pred_diff, "gold_diff": gold_diff, } output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument tf_logging.info("*** Features ***") for name in sorted(features.keys()): tf_logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] if "next_sentence_labels" in features: next_sentence_labels = features["next_sentence_labels"] else: next_sentence_labels = get_dummy_next_sentence_labels(input_ids) tlm_prefix = "target_task" with tf.compat.v1.variable_scope(tlm_prefix): priority_score = tf.stop_gradient(priority_model(features)) priority_score = priority_score * target_model_config.amp masked_input_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights\ = biased_masking(input_ids, input_mask, priority_score, target_model_config.alpha, train_config.max_predictions_per_seq, MASK_ID) is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = model_class( config=bert_config, is_training=is_training, input_ids=masked_input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=train_config.use_one_hot_embeddings, ) (masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output( bert_config, model.get_sequence_output(), model.get_embedding_table(), masked_lm_positions, masked_lm_ids, masked_lm_weights) (next_sentence_loss, next_sentence_example_loss, next_sentence_log_probs) = get_next_sentence_output( bert_config, model.get_pooled_output(), next_sentence_labels) total_loss = masked_lm_loss + next_sentence_loss all_vars = tf.compat.v1.all_variables() tf_logging.info("We assume priority model is from v2") if train_config.checkpoint_type == "v2": assignment_map, initialized_variable_names = assignment_map_v2_to_v2( all_vars, train_config.init_checkpoint) assignment_map2, initialized_variable_names2 = get_assignment_map_remap_from_v2( all_vars, tlm_prefix, train_config.second_init_checkpoint) else: assignment_map, assignment_map2, initialized_variable_names \ = get_tlm_assignment_map_v2(all_vars, tlm_prefix, train_config.init_checkpoint, train_config.second_init_checkpoint) initialized_variable_names2 = None def init_fn(): if train_config.init_checkpoint: tf.compat.v1.train.init_from_checkpoint( train_config.init_checkpoint, assignment_map) if train_config.second_init_checkpoint: tf.compat.v1.train.init_from_checkpoint( train_config.second_init_checkpoint, assignment_map2) scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu) tvars = [v for v in all_vars if not v.name.startswith(tlm_prefix)] log_var_assignments(tvars, initialized_variable_names, initialized_variable_names2) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer_from_config( total_loss, train_config, tvars) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: eval_metrics = (metric_fn, [ masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, next_sentence_example_loss, next_sentence_log_probs, next_sentence_labels ]) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: predictions = { "input_ids": input_ids, "masked_input_ids": masked_input_ids, "priority_score": priority_score, "lm_loss1": features["loss1"], "lm_loss2": features["loss2"], } output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" logging.info("*** Features ***") for name in sorted(features.keys()): logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] batch_size, seq_len = get_shape_list2(input_ids) n_trial = 5 logging.info("Doing All Masking") new_input_ids, new_segment_ids, new_input_mask, indice, length_arr = \ candidate_gen(input_ids, input_mask, segment_ids, n_trial) is_training = (mode == tf.estimator.ModeKeys.TRAIN) prefix_cls = "classification" prefix_explain = "explain" all_input_ids = tf.concat([input_ids, new_input_ids], axis=0) all_segment_ids = tf.concat([segment_ids, new_segment_ids], axis=0) all_input_mask = tf.concat([input_mask, new_input_mask], axis=0) with tf.compat.v1.variable_scope(prefix_cls): model = BertModel( config=bert_config, is_training=is_training, input_ids=all_input_ids, input_mask=all_input_mask, token_type_ids=all_segment_ids, use_one_hot_embeddings=train_config.use_one_hot_embeddings, ) output_weights = tf.compat.v1.get_variable( "output_weights", [train_config.num_classes, bert_config.hidden_size], initializer=tf.compat.v1.truncated_normal_initializer( stddev=0.02)) output_bias = tf.compat.v1.get_variable( "output_bias", [train_config.num_classes], initializer=tf.compat.v1.zeros_initializer()) pooled = model.get_pooled_output() raw_logits = tf.matmul(pooled, output_weights, transpose_b=True) logits = tf.stop_gradient(raw_logits) cls_logits = tf.nn.bias_add(logits, output_bias) cls_probs = tf.nn.softmax(cls_logits) orig_probs = cls_probs[:batch_size] new_probs = tf.reshape(cls_probs[batch_size:], [batch_size, n_trial, -1]) best_run, informative = get_informative(new_probs, orig_probs) # informative.shape= [batch_size, num_clases] best_del_idx, best_del_len = select_best(best_run, indice, length_arr) signal_label = get_mask(best_del_idx, best_del_len, seq_len) with tf.compat.v1.variable_scope(prefix_explain): model = BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=train_config.use_one_hot_embeddings, ) seq = model.get_sequence_output() output_weights = tf.compat.v1.get_variable( "output_weights", [train_config.num_classes, bert_config.hidden_size], initializer=tf.compat.v1.truncated_normal_initializer( stddev=0.02)) output_bias = tf.compat.v1.get_variable( "output_bias", [train_config.num_classes], initializer=tf.compat.v1.zeros_initializer()) logits = tf.matmul(seq, output_weights, transpose_b=True) ex_logits = tf.nn.bias_add( logits, output_bias) # [batch, seq_len, num_class] ex_logits_flat = tf.reshape(tf.transpose(ex_logits, [0, 2, 1]), [-1, seq_len]) signal_label_flat = tf.cast(tf.reshape(signal_label, [-1, seq_len]), tf.float32) losses_per_clas_flat = correlation_coefficient_loss( signal_label_flat, ex_logits_flat) # [batch_size, num_class] losses_per_clas = tf.reshape(losses_per_clas_flat, [batch_size, -1]) losses_per_clas = losses_per_clas * tf.cast(informative, tf.float32) losses = tf.reduce_mean(losses_per_clas, axis=1) loss = tf.reduce_mean(losses) tvars = tf.compat.v1.trainable_variables() scaffold_fn = None initialized_variable_names, init_fn = get_init_fn_for_two_checkpoints( train_config, tvars, train_config.init_checkpoint, prefix_explain, train_config.second_init_checkpoint, prefix_cls) if train_config.use_tpu: def tpu_scaffold(): init_fn() return tf.compat.v1.train.Scaffold() scaffold_fn = tpu_scaffold else: init_fn() log_var_assignments(tvars, initialized_variable_names) output_spec = None TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer_from_config( loss, train_config) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.PREDICT: predictions = { "input_ids": input_ids, "ex_logits": ex_logits, "logits": logits, } output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=None, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf_logging.info("*** Features ***") for name in sorted(features.keys()): tf_logging.info("name = %s, shape = %s" % (name, features[name].shape)) input_ids, input_mask, segment_ids = combine_paired_input_features( features) strict_good = features["strict_good"] strict_bad = features["strict_bad"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = model_class( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=train_config.use_one_hot_embeddings, ) pooled = model.get_pooled_output() losses, logits, pair_logits = pairwise_model(pooled, strict_good, strict_bad) total_loss = tf.reduce_mean(losses) tvars = tf.compat.v1.trainable_variables() initialized_variable_names = {} scaffold_fn = None if train_config.init_checkpoint: assignment_map, initialized_variable_names = get_bert_assignment_map( tvars, train_config.init_checkpoint) if train_config.use_tpu: def tpu_scaffold(): tf.compat.v1.train.init_from_checkpoint( train_config.init_checkpoint, assignment_map) return tf.compat.v1.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.compat.v1.train.init_from_checkpoint( train_config.init_checkpoint, assignment_map) tf_logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf_logging.info("name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer_from_config( total_loss, train_config) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(pair_logits, strict_good, strict_bad): diff = pair_logits[:, 0] - pair_logits[:, 1] pairwise_correct = tf.less(0.0, diff) strict_good_correct_raw = tf.reshape( tf.less(1.0, pair_logits[:, 0]), [-1, 1]) strict_good_correct = cast_float_multiply( strict_good_correct_raw, strict_good) strict_bad_correct_raw = tf.reshape( tf.less(pair_logits[:, 1], -1.0), [-1, 1]) strict_bad_correct = cast_float_multiply( strict_bad_correct_raw, strict_bad) pairwise_acc_raw = tf.cast(pairwise_correct, tf.float32) mean_acc = tf.compat.v1.metrics.mean(values=pairwise_acc_raw) def strict_accuracy(correctness, gold): return tf.compat.v1.metrics.accuracy( labels=tf.ones_like(gold, tf.int32), predictions=tf.cast(correctness, tf.int32), weights=tf.cast(gold, tf.float32)) return { 'mean_acc': mean_acc, 'strict_good_acc': strict_accuracy(strict_good_correct, strict_good), 'strict_bad_acc': strict_accuracy(strict_bad_correct, strict_bad) } eval_metrics = (metric_fn, [pair_logits, strict_good, strict_bad]) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: predictions = { "logits": logits, "input_ids": input_ids, "strict_good": strict_good, "strict_bad": strict_bad, } output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument tf_logging.info("model_fn_nli_lm") """The `model_fn` for TPUEstimator.""" log_features(features) input_ids = features["input_ids"] # [batch_size, seq_length] input_mask = features["input_mask"] segment_ids = features["segment_ids"] batch_size, seq_max = get_shape_list2(input_ids) if "nli_input_ids" in features: nli_input_ids = features[ "nli_input_ids"] # [batch_size, seq_length] nli_input_mask = features["nli_input_mask"] nli_segment_ids = features["nli_segment_ids"] else: nli_input_ids = input_ids nli_input_mask = input_mask nli_segment_ids = segment_ids features["label_ids"] = tf.ones([batch_size], tf.int32) if mode == tf.estimator.ModeKeys.PREDICT: tf.random.set_seed(0) seed = 0 else: seed = None is_training = (mode == tf.estimator.ModeKeys.TRAIN) tf_logging.info("Doing dynamic masking (random)") masked_input_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights \ = random_masking(input_ids, input_mask, train_config.max_predictions_per_seq, MASK_ID, seed) sharing_model = sharing_model_factory( config, train_config.use_one_hot_embeddings, is_training, masked_input_ids, input_mask, segment_ids, nli_input_ids, nli_input_mask, nli_segment_ids) sequence_output_lm = sharing_model.lm_sequence_output() nli_feature = sharing_model.get_tt_feature() masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs \ = get_masked_lm_output(config, sequence_output_lm, sharing_model.get_embedding_table(), masked_lm_positions, masked_lm_ids, masked_lm_weights) masked_lm_log_probs = tf.reshape(masked_lm_log_probs, [batch_size, -1]) masked_lm_per_inst_loss = tf.reshape(masked_lm_example_loss, [batch_size, -1]) task = Classification(3, features, nli_feature, is_training) nli_loss = task.loss task_prob = tf.nn.softmax(task.logits, axis=-1) arg_like = task_prob[:, 1] + task_prob[:, 2] vars = sharing_model.model.all_layer_outputs grads_1 = tf.gradients(ys=masked_lm_loss, xs=vars) # List[ batch_szie, grads_2 = tf.gradients(ys=arg_like, xs=vars) l = [] for g1, g2 in zip(grads_1, grads_2): if g1 is not None and g2 is not None: a = tf.reshape(g1, [batch_size * 2, seq_max, -1])[:batch_size] a = a / masked_lm_per_inst_loss b = tf.reshape(g2, [batch_size * 2, seq_max, -1])[batch_size:] l.append(tf.abs(a * b)) h_overlap = tf.stack(l, axis=1) h_overlap = tf.reduce_sum(h_overlap, axis=2) loss = combine_loss_fn(masked_lm_loss, nli_loss) tvars = tf.compat.v1.trainable_variables() assignment_fn = get_bert_assignment_map initialized_variable_names, init_fn = get_init_fn( tvars, train_config.init_checkpoint, assignment_fn) scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu) log_var_assignments(tvars, initialized_variable_names) TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer_from_config( loss, train_config) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: eval_metrics = (metric_fn_lm, [ masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, ]) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: predictions = { "input_ids": input_ids, "masked_input_ids": masked_input_ids, "masked_lm_ids": masked_lm_ids, "masked_lm_example_loss": masked_lm_example_loss, "masked_lm_positions": masked_lm_positions, "masked_lm_log_probs": masked_lm_log_probs, "h_overlap": h_overlap, } output_spec = TPUEstimatorSpec(mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument tf_logging.info("model_fn_nli_lm") """The `model_fn` for TPUEstimator.""" log_features(features) input_ids = features["input_ids"] # [batch_size, seq_length] input_mask = features["input_mask"] segment_ids = features["segment_ids"] batch_size, _ = get_shape_list2(input_ids) if "nli_input_ids" in features: nli_input_ids = features[ "nli_input_ids"] # [batch_size, seq_length] nli_input_mask = features["nli_input_mask"] nli_segment_ids = features["nli_segment_ids"] else: nli_input_ids = input_ids nli_input_mask = input_mask nli_segment_ids = segment_ids features["label_ids"] = tf.ones([batch_size], tf.int32) if mode == tf.estimator.ModeKeys.PREDICT: tf.random.set_seed(0) seed = 0 else: seed = None is_training = (mode == tf.estimator.ModeKeys.TRAIN) tf_logging.info("Doing dynamic masking (random)") masked_input_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights \ = random_masking(input_ids, input_mask, train_config.max_predictions_per_seq, MASK_ID, seed) sharing_model = sharing_model_factory( config, train_config.use_one_hot_embeddings, is_training, masked_input_ids, input_mask, segment_ids, nli_input_ids, nli_input_mask, nli_segment_ids) sequence_output_lm = sharing_model.lm_sequence_output() nli_feature = sharing_model.get_tt_feature() masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs \ = get_masked_lm_output(config, sequence_output_lm, sharing_model.get_embedding_table(), masked_lm_positions, masked_lm_ids, masked_lm_weights) masked_lm_log_probs = tf.reshape(masked_lm_log_probs, [batch_size, -1]) top_guess = masked_lm_log_probs task = Classification(3, features, nli_feature, is_training) nli_loss = task.loss overlap_score = shared_gradient_fine_grained( masked_lm_example_loss, task.logits, train_config.max_predictions_per_seq) loss = combine_loss_fn(masked_lm_loss, nli_loss) tvars = tf.compat.v1.trainable_variables() assignment_fn = get_bert_assignment_map initialized_variable_names, init_fn = get_init_fn( tvars, train_config.init_checkpoint, assignment_fn) scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu) log_var_assignments(tvars, initialized_variable_names) TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer_from_config( loss, train_config) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: eval_metrics = (metric_fn_lm, [ masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, ]) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: predictions = { "input_ids": input_ids, "masked_input_ids": masked_input_ids, "masked_lm_ids": masked_lm_ids, "masked_lm_example_loss": masked_lm_example_loss, "masked_lm_positions": masked_lm_positions, "masked_lm_log_probs": masked_lm_log_probs, "overlap_score": overlap_score, "top_guess": top_guess, } output_spec = TPUEstimatorSpec(mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" logging.info("*** Features ***") for name in sorted(features.keys()): logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] d_input_ids = features["d_input_ids"] d_input_mask = features["d_input_mask"] d_location_ids = features["d_location_ids"] next_sentence_labels = features["next_sentence_labels"] if dict_run_config.prediction_op == "loss": seed = 0 else: seed = None if dict_run_config.prediction_op == "loss_fixed_mask" or train_config.fixed_mask: masked_input_ids = input_ids masked_lm_positions = features["masked_lm_positions"] masked_lm_ids = features["masked_lm_ids"] masked_lm_weights = tf.ones_like(masked_lm_positions, dtype=tf.float32) else: masked_input_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights \ = random_masking(input_ids, input_mask, train_config.max_predictions_per_seq, MASK_ID, seed) if dict_run_config.use_d_segment_ids: d_segment_ids = features["d_segment_ids"] else: d_segment_ids = None is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = model_class( config=bert_config, d_config=dbert_config, is_training=is_training, input_ids=masked_input_ids, input_mask=input_mask, d_input_ids=d_input_ids, d_input_mask=d_input_mask, d_location_ids=d_location_ids, use_target_pos_emb=dict_run_config.use_target_pos_emb, token_type_ids=segment_ids, use_one_hot_embeddings=train_config.use_one_hot_embeddings, d_segment_ids=d_segment_ids, pool_dict_output=dict_run_config.pool_dict_output, ) (masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output( bert_config, model.get_sequence_output(), model.get_embedding_table(), masked_lm_positions, masked_lm_ids, masked_lm_weights) (next_sentence_loss, next_sentence_example_loss, next_sentence_log_probs) = get_next_sentence_output( bert_config, model.get_pooled_output(), next_sentence_labels) total_loss = masked_lm_loss if dict_run_config.train_op == "entry_prediction": score_label = features["useful_entry"] # [batch, 1] score_label = tf.reshape(score_label, [-1]) entry_logits = bert_common.dense(2, bert_common.create_initializer(bert_config.initializer_range))\ (model.get_dict_pooled_output()) print("entry_logits: ", entry_logits.shape) losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=entry_logits, labels=score_label) loss = tf.reduce_mean(losses) total_loss = loss if dict_run_config.train_op == "lookup": lookup_idx = features["lookup_idx"] lookup_loss, lookup_example_loss, lookup_score = \ sequence_index_prediction(bert_config, lookup_idx, model.get_sequence_output()) total_loss += lookup_loss tvars = tf.compat.v1.trainable_variables() init_vars = {} scaffold_fn = None if train_config.init_checkpoint: if dict_run_config.is_bert_checkpoint: map1, map2, init_vars = get_bert_assignment_map_for_dict(tvars, train_config.init_checkpoint) def load_fn(): tf.compat.v1.train.init_from_checkpoint(train_config.init_checkpoint, map1) tf.compat.v1.train.init_from_checkpoint(train_config.init_checkpoint, map2) else: map1, init_vars = get_assignment_map_as_is(tvars, train_config.init_checkpoint) def load_fn(): tf.compat.v1.train.init_from_checkpoint(train_config.init_checkpoint, map1) if train_config.use_tpu: def tpu_scaffold(): load_fn() return tf.compat.v1.train.Scaffold() scaffold_fn = tpu_scaffold else: load_fn() logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in init_vars: init_string = ", *INIT_FROM_CKPT*" logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) logging.info("Total parameters : %d" % get_param_num()) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: if train_config.gradient_accumulation == 1: train_op = optimization.create_optimizer_from_config(total_loss, train_config) else: logging.info("Using gradient accumulation : %d" % train_config.gradient_accumulation) train_op = get_accumulated_optimizer_from_config(total_loss, train_config, tvars, train_config.gradient_accumulation) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: eval_metrics = (metric_fn, [ masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, next_sentence_example_loss, next_sentence_log_probs, next_sentence_labels ]) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: if dict_run_config.prediction_op == "gradient": logging.info("Fetching gradient") gradient = get_gradients(model, masked_lm_log_probs, train_config.max_predictions_per_seq, bert_config.vocab_size) predictions = { "masked_input_ids": masked_input_ids, #"input_ids": input_ids, "d_input_ids": d_input_ids, "masked_lm_positions": masked_lm_positions, "gradients": gradient, } elif dict_run_config.prediction_op == "loss" or dict_run_config.prediction_op == "loss_fixed_mask": logging.info("Fetching loss") predictions = { "masked_lm_example_loss": masked_lm_example_loss, } else: raise Exception("prediction target not specified") output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument tf_logging.info("model_fn_apr_lm") """The `model_fn` for TPUEstimator.""" log_features(features) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] if mode == tf.estimator.ModeKeys.PREDICT: tf.random.set_seed(0) seed = 0 else: seed = None tf_logging.info("Doing dynamic masking (random)") masked_input_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights \ = random_masking(input_ids, input_mask, train_config.max_predictions_per_seq, MASK_ID, seed) is_training = (mode == tf.estimator.ModeKeys.TRAIN) tf_logging.info("Using masked_input_ids") model = APR( masked_input_ids, input_mask, segment_ids, is_training, train_config.use_one_hot_embeddings, bert_config, ssdr_config, dict_run_config.def_per_batch, dict_run_config.inner_batch_size, dict_run_config.max_def_length, # MainTransformer, # SecondTransformerEmbeddingLess, ) masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs \ = get_masked_lm_output(bert_config, model.get_sequence_output(), model.get_embedding_table(), masked_lm_positions, masked_lm_ids, masked_lm_weights) loss = masked_lm_loss tvars = tf.compat.v1.trainable_variables() assignment_fn = dict_model_fn.get_bert_assignment_map_for_dict initialized_variable_names, init_fn = align_checkpoint_twice( tvars, train_config.init_checkpoint, assignment_fn) scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu) log_var_assignments(tvars, initialized_variable_names) TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec if mode == tf.estimator.ModeKeys.TRAIN: if ssdr_config.compare_attrib_value_safe("use_two_lr", True): tf_logging.info("Using two lr for each parts") train_op = create_optimizer_with_separate_lr( loss, train_config) else: tf_logging.info("Using single lr ") train_op = optimization.create_optimizer_from_config( loss, train_config) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, training_hooks=[OomReportingHook()], scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: eval_metrics = (metric_fn_lm, [ masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, ]) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: predictions = { "input_ids": input_ids, "masked_input_ids": masked_input_ids, "masked_lm_ids": masked_lm_ids, "masked_lm_example_loss": masked_lm_example_loss, "masked_lm_positions": masked_lm_positions, } output_spec = TPUEstimatorSpec(mode=mode, loss=loss, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf_logging.info("*** Features ***") for name in sorted(features.keys()): tf_logging.info(" name = %s, shape = %s" % (name, features[name].shape)) query = features["query"] doc = features["doc"] doc_mask = features["doc_mask"] data_ids = features["data_id"] segment_len = max_seq_length - query_len - 3 step_size = model_config.step_size input_ids, input_mask, segment_ids, n_segments = \ iterate_over(query, doc, doc_mask, total_doc_len, segment_len, step_size) if mode == tf.estimator.ModeKeys.PREDICT: label_ids = tf.ones([input_ids.shape[0]], dtype=tf.int32) else: label_ids = features["label_ids"] label_ids = tf.reshape(label_ids, [-1]) if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) if "feed_features" in special_flags: model = model_class( config=model_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=train_config.use_one_hot_embeddings, features=features, ) else: model = model_class( config=model_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=train_config.use_one_hot_embeddings, ) if "new_pooling" in special_flags: pooled = mimic_pooling(model.get_sequence_output(), model_config.hidden_size, model_config.initializer_range) else: pooled = model.get_pooled_output() if train_config.checkpoint_type != "bert_nli" and train_config.use_old_logits: tf_logging.info("Use old version of logistic regression") if is_training: pooled = dropout(pooled, 0.1) logits = tf.keras.layers.Dense(train_config.num_classes, name="cls_dense")(pooled) else: tf_logging.info("Use fixed version of logistic regression") output_weights = tf.compat.v1.get_variable( "output_weights", [train_config.num_classes, model_config.hidden_size], initializer=tf.compat.v1.truncated_normal_initializer( stddev=0.02)) output_bias = tf.compat.v1.get_variable( "output_bias", [train_config.num_classes], initializer=tf.compat.v1.zeros_initializer()) if is_training: pooled = dropout(pooled, 0.1) logits = tf.matmul(pooled, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=label_ids) if "bias_loss" in special_flags: tf_logging.info("Using special_flags : bias_loss") loss_arr = reweight_zero(label_ids, loss_arr) loss = tf.reduce_mean(input_tensor=loss_arr) tvars = tf.compat.v1.trainable_variables() initialized_variable_names = {} scaffold_fn = None if train_config.init_checkpoint: initialized_variable_names, init_fn = get_init_fn( train_config, tvars) scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu) log_var_assignments(tvars, initialized_variable_names) TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: if "simple_optimizer" in special_flags: tf_logging.info("using simple optimizer") train_op = create_simple_optimizer(loss, train_config.learning_rate, train_config.use_tpu) else: if "ask_tvar" in special_flags: tvars = model.get_trainable_vars() else: tvars = None train_op = optimization.create_optimizer_from_config( loss, train_config, tvars) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: eval_metrics = (classification_metric_fn, [logits, label_ids, is_real_example]) output_spec = TPUEstimatorSpec(mode=model, loss=loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: predictions = { "logits": logits, "doc": doc, "data_ids": data_ids, } useful_inputs = ["data_id", "input_ids2", "data_ids"] for input_name in useful_inputs: if input_name in features: predictions[input_name] = features[input_name] if override_prediction_fn is not None: predictions = override_prediction_fn(predictions, model) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" logging.info("*** Features ***") for name in sorted(features.keys()): logging.info(" name = %s, shape = %s" % (name, features[name].shape)) def reform_a_input(raw_input): return tf.reshape(raw_input, [dict_run_config.inner_batch_size, -1]) def reform_b_input(raw_input): return tf.reshape(raw_input, [dict_run_config.def_per_batch, -1]) input_ids = reform_a_input(features["input_ids"]) # [batch_size, def] input_mask = reform_a_input(features["input_mask"]) segment_ids = reform_a_input(features["segment_ids"]) d_input_ids = reform_b_input(features["d_input_ids"]) d_input_mask = reform_b_input(features["d_input_mask"]) d_location_ids = reform_a_input(features["d_location_ids"]) ab_mapping = features["ab_mapping"] if hasattr(ssdr_config, "blind_dictionary") and ssdr_config.blind_dictionary: logging.info("Hide dictionary") d_input_ids = tf.zeros_like(d_input_ids) d_input_mask = tf.zeros_like(d_input_mask) if dict_run_config.prediction_op == "loss": seed = 0 else: seed = None if dict_run_config.prediction_op == "loss_fixed_mask" or train_config.fixed_mask: masked_input_ids = input_ids masked_lm_positions = reform_a_input( features["masked_lm_positions"]) masked_lm_ids = reform_a_input(features["masked_lm_ids"]) masked_lm_weights = reform_a_input(features["masked_lm_weights"]) else: masked_input_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights \ = random_masking(input_ids, input_mask, train_config.max_predictions_per_seq, MASK_ID, seed) if dict_run_config.use_d_segment_ids: d_segment_ids = reform_b_input(features["d_segment_ids"]) else: d_segment_ids = None if dict_run_config.use_ab_mapping_mask: ab_mapping_mask = reform_a_input(features["ab_mapping_mask"]) else: ab_mapping_mask = None if ssdr_config.compare_attrib_value_safe("consistency", True): print("masked_input_ids", masked_input_ids.shape) print('d_input_ids', d_input_ids.shape) print("ab_mapping_mask", ab_mapping_mask.shape) masked_input_ids = tf.tile(masked_input_ids, [2, 1]) input_mask = tf.tile(input_mask, [2, 1]) segment_ids = tf.tile(segment_ids, [2, 1]) dummy = tf.zeros_like(d_input_ids, tf.int32) #d_input_ids = tf.concat([d_input_ids, dummy], axis=0) #d_input_mask = tf.concat([d_input_mask, dummy], axis=0) #if d_segment_ids is not None: # d_segment_ids = tf.concat([d_segment_ids, dummy], axis=0) d_location_ids = tf.concat( [d_location_ids, tf.zeros_like(d_location_ids, tf.int32)], axis=0) #ab_mapping = tf.concat([ab_mapping, tf.zeros_like(ab_mapping, tf.int32)], axis=0) ab_mapping_mask = tf.concat( [ab_mapping_mask, tf.zeros_like(ab_mapping_mask, tf.int32)], axis=0) masked_lm_positions = tf.tile(masked_lm_positions, [2, 1]) masked_lm_ids = tf.tile(masked_lm_ids, [2, 1]) masked_lm_weights = tf.tile(masked_lm_weights, [2, 1]) print("masked_input_ids", masked_input_ids.shape) print('d_input_ids', d_input_ids.shape) print("ab_mapping_mask", ab_mapping_mask.shape) is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = model_class( config=bert_config, ssdr_config=ssdr_config, is_training=is_training, input_ids=masked_input_ids, input_mask=input_mask, token_type_ids=segment_ids, d_input_ids=d_input_ids, d_input_mask=d_input_mask, d_segment_ids=d_segment_ids, d_location_ids=d_location_ids, ab_mapping=ab_mapping, ab_mapping_mask=ab_mapping_mask, use_one_hot_embeddings=train_config.use_one_hot_embeddings, ) (masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output( bert_config, model.get_sequence_output(), model.get_embedding_table(), masked_lm_positions, masked_lm_ids, masked_lm_weights) total_loss = masked_lm_loss tvars = tf.compat.v1.trainable_variables() init_vars = {} scaffold_fn = None if train_config.init_checkpoint: if dict_run_config.is_bert_checkpoint: map1, map2, init_vars = dict_model_fn.get_bert_assignment_map_for_dict( tvars, train_config.init_checkpoint) def load_fn(): tf.compat.v1.train.init_from_checkpoint( train_config.init_checkpoint, map1) tf.compat.v1.train.init_from_checkpoint( train_config.init_checkpoint, map2) else: map1, init_vars = get_assignment_map_as_is( tvars, train_config.init_checkpoint) def load_fn(): tf.compat.v1.train.init_from_checkpoint( train_config.init_checkpoint, map1) if train_config.use_tpu: def tpu_scaffold(): load_fn() return tf.compat.v1.train.Scaffold() scaffold_fn = tpu_scaffold else: load_fn() logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in init_vars: init_string = ", *INIT_FROM_CKPT*" logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) logging.info("Total parameters : %d" % get_param_num()) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: if train_config.gradient_accumulation == 1: train_op = optimization.create_optimizer_from_config( total_loss, train_config) else: logging.info("Using gradient accumulation : %d" % train_config.gradient_accumulation) train_op = get_accumulated_optimizer_from_config( total_loss, train_config, tvars, train_config.gradient_accumulation) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: eval_metrics = (metric_fn_lm, [ masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights ]) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: if dict_run_config.prediction_op == "gradient": logging.info("Fetching gradient") gradient = dict_model_fn.get_gradients( model, masked_lm_log_probs, train_config.max_predictions_per_seq, bert_config.vocab_size) predictions = { "masked_input_ids": masked_input_ids, "d_input_ids": d_input_ids, "masked_lm_positions": masked_lm_positions, "gradients": gradient, } elif dict_run_config.prediction_op == "scores": logging.info("Fetching input/d_input and scores") predictions = { "masked_input_ids": masked_input_ids, "d_input_ids": d_input_ids, "masked_lm_positions": masked_lm_positions, "masked_lm_ids": masked_lm_ids, "ab_mapping": ab_mapping, "d_location_ids": d_location_ids, "scores": model.scores, } elif dict_run_config.prediction_op == "loss" or dict_run_config.prediction_op == "loss_fixed_mask": logging.info("Fetching loss") predictions = { "masked_lm_example_loss": masked_lm_example_loss, } else: raise Exception("prediction target not specified") output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf_logging.info("*** Features ***") for name in sorted(features.keys()): tf_logging.info(" name = %s, shape = %s" % (name, features[name].shape)) vectors = features["vectors"] # [batch_size, max_unit, num_hidden] valid_mask = features["valid_mask"] label_ids = features["label_ids"] vectors = tf.reshape(vectors, [ -1, model_config.num_window, model_config.max_sequence, model_config.hidden_size ]) valid_mask = tf.reshape( valid_mask, [-1, model_config.num_window, model_config.max_sequence]) label_ids = tf.reshape(label_ids, [-1]) if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = MultiEvidenceCombiner(config=model_config, is_training=is_training, vectors=vectors, valid_mask=valid_mask, scope=None) pooled = model.pooled_output if is_training: pooled = dropout(pooled, 0.1) logits = tf.keras.layers.Dense(config.num_classes, name="cls_dense")(pooled) loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=label_ids) if "bias_loss" in special_flags: tf_logging.info("Using special_flags : bias_loss") loss_arr = reweight_zero(label_ids, loss_arr) loss = tf.reduce_mean(input_tensor=loss_arr) tvars = tf.compat.v1.trainable_variables() initialized_variable_names = {} scaffold_fn = None if config.init_checkpoint: initialized_variable_names, init_fn = get_init_fn(config, tvars) scaffold_fn = get_tpu_scaffold_or_init(init_fn, config.use_tpu) log_var_assignments(tvars, initialized_variable_names) TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: if "simple_optimizer" in special_flags: tf_logging.info("using simple optimizer") train_op = create_simple_optimizer(loss, config.learning_rate, config.use_tpu) else: if "ask_tvar" in special_flags: tvars = model.get_trainable_vars() else: tvars = None train_op = optimization.create_optimizer_from_config( loss, config, tvars) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: eval_metrics = (classification_metric_fn, [logits, label_ids, is_real_example]) output_spec = TPUEstimatorSpec(mode=model, loss=loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: predictions = {"logits": logits, "label_ids": label_ids} if override_prediction_fn is not None: predictions = override_prediction_fn(predictions, model) useful_inputs = ["data_id", "input_ids2"] for input_name in useful_inputs: if input_name in features: predictions[input_name] = features[input_name] output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" log_features(features) def reform_a_input(raw_input): return tf.reshape(raw_input, [dict_run_config.inner_batch_size, -1]) def reform_b_input(raw_input): return tf.reshape(raw_input, [dict_run_config.def_per_batch, -1]) input_ids = reform_a_input(features["input_ids"]) input_mask = reform_a_input(features["input_mask"]) segment_ids = reform_a_input(features["segment_ids"]) tf_logging.info("input_ids, input_mask") # input_ids = features["input_ids"] # input_mask = features["input_mask"] # segment_ids = features["segment_ids"] if mode == tf.estimator.ModeKeys.PREDICT: tf.random.set_seed(0) seed = 0 else: seed = None # tf_logging.info("Doing dynamic masking (random)") # masked_input_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights \ # = random_masking(input_ids, input_mask, train_config.max_predictions_per_seq, MASK_ID, seed) # if dict_run_config.prediction_op == "loss_fixed_mask" or train_config.fixed_mask: masked_input_ids = input_ids masked_lm_positions = reform_a_input(features["masked_lm_positions"]) masked_lm_ids = reform_a_input(features["masked_lm_ids"]) masked_lm_weights = reform_a_input(features["masked_lm_weights"]) is_training = (mode == tf.estimator.ModeKeys.TRAIN) if model_name == "APR": model = APR( masked_input_ids, input_mask, segment_ids, is_training, train_config.use_one_hot_embeddings, bert_config, ssdr_config, dict_run_config.def_per_batch, dict_run_config.inner_batch_size, dict_run_config.max_def_length, ) elif model_name == "BERT": model = BertModel( config=bert_config, is_training=is_training, input_ids=masked_input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=train_config.use_one_hot_embeddings, ) else: assert False masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs \ = get_masked_lm_output(bert_config, model.get_sequence_output(), model.get_embedding_table(), masked_lm_positions, masked_lm_ids, masked_lm_weights) loss = masked_lm_loss tvars = tf.compat.v1.trainable_variables() assignment_fn = dict_model_fn.get_bert_assignment_map_for_dict initialized_variable_names, init_fn = align_checkpoint_twice( tvars, train_config.init_checkpoint, assignment_fn) scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu) log_var_assignments(tvars, initialized_variable_names) TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec if mode == tf.estimator.ModeKeys.TRAIN: if ssdr_config.compare_attrib_value_safe("use_two_lr", True): tf_logging.info("Using two lr for each parts") train_op = create_optimizer_with_separate_lr( loss, train_config) else: tf_logging.info("Using single lr ") train_op = optimization.create_optimizer_from_config( loss, train_config) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, training_hooks=[OomReportingHook()], scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: eval_metrics = (metric_fn_lm, [ masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, ]) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: predictions = { "input_ids": input_ids, "masked_input_ids": masked_input_ids, "masked_lm_ids": masked_lm_ids, "masked_lm_example_loss": masked_lm_example_loss, "masked_lm_positions": masked_lm_positions, } output_spec = TPUEstimatorSpec(mode=mode, loss=loss, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf_logging.info("*** Features ***") for name in sorted(features.keys()): tf_logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] if "next_sentence_labels" in features: next_sentence_labels = features["next_sentence_labels"] else: next_sentence_labels = get_dummy_next_sentence_labels(input_ids) if mode == tf.estimator.ModeKeys.PREDICT: tf.random.set_seed(0) seed = 0 print("Seed as zero") else: seed = None tf_logging.info("Doing dynamic masking (random)") special_tokens = [LABEL_UNK, LABEL_0, LABEL_1, LABEL_2] masked_input_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights \ = random_masking(input_ids, input_mask, train_config.max_predictions_per_seq, MASK_ID, seed, special_tokens) masked_input_ids, masked_lm_positions_label, masked_label_ids_label, is_test_inst \ = get_label_indices(masked_input_ids) is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = model_class( config=model_config, is_training=is_training, input_ids=masked_input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=train_config.use_one_hot_embeddings, ) (masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output_fn( model_config, model.get_sequence_output(), model.get_embedding_table(), masked_lm_positions, masked_lm_ids, masked_lm_weights) with tf.compat.v1.variable_scope("label_token"): (masked_lm_loss_label, masked_lm_example_loss_label, masked_lm_log_probs_label) = get_masked_lm_output_fn( model_config, model.get_sequence_output(), model.get_embedding_table(), masked_lm_positions_label, masked_label_ids_label, is_test_inst) (next_sentence_loss, next_sentence_example_loss, next_sentence_log_probs) = get_next_sentence_output( model_config, model.get_pooled_output(), next_sentence_labels) total_loss = masked_lm_loss + masked_lm_loss_label * model_config.ratio tvars = tf.compat.v1.trainable_variables() initialized_variable_names, initialized_variable_names2, init_fn\ = align_checkpoint_for_lm(tvars, train_config.checkpoint_type, train_config.init_checkpoint, train_config.second_init_checkpoint, ) scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu) log_var_assignments(tvars, initialized_variable_names, initialized_variable_names2) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer_from_config( total_loss, train_config) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, training_hooks=[OomReportingHook()], scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: eval_metrics = (metric_fn, [ masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, masked_lm_example_loss_label, masked_lm_log_probs_label, masked_label_ids_label, is_test_inst ]) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: predictions = { "input_ids": input_ids, "masked_input_ids": masked_input_ids, "masked_lm_ids": masked_lm_ids, "masked_lm_example_loss": masked_lm_example_loss, "masked_lm_positions": masked_lm_positions, "masked_lm_example_loss_label": masked_lm_example_loss_label, "masked_lm_log_probs_label": masked_lm_log_probs_label, "masked_label_ids_label": masked_label_ids_label, "is_test_inst": is_test_inst, } output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf_logging.info("*** Features ***") for name in sorted(features.keys()): tf_logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] if mode == tf.estimator.ModeKeys.PREDICT: label_ids = tf.ones([input_ids.shape[0]], dtype=tf.int32) else: label_ids = features["label_ids"] label_ids = tf.reshape(label_ids, [-1]) if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) domain_ids = features["domain_ids"] domain_ids = tf.reshape(domain_ids, [-1]) is_valid_label = features["is_valid_label"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) model_1 = BertModel( config=model_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=train_config.use_one_hot_embeddings, ) pooled = model_1.get_pooled_output() if is_training: pooled = dropout(pooled, 0.1) logits = tf.keras.layers.Dense(train_config.num_classes, name="cls_dense")(pooled) pred_losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=label_ids) num_domain = 2 pooled_for_domain = grad_reverse(pooled) domain_logits = tf.keras.layers.Dense( num_domain, name="domain_dense")(pooled_for_domain) domain_losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=domain_logits, labels=domain_ids) pred_loss = tf.reduce_mean(pred_losses * tf.cast(is_valid_label, tf.float32)) domain_loss = tf.reduce_mean(domain_losses) tf.compat.v1.summary.scalar('domain_loss', domain_loss) tf.compat.v1.summary.scalar('pred_loss', pred_loss) alpha = model_config.alpha loss = pred_loss + alpha * domain_loss tvars = tf.compat.v1.trainable_variables() initialized_variable_names = {} scaffold_fn = None if train_config.init_checkpoint: initialized_variable_names, init_fn = get_init_fn( train_config, tvars) scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu) log_var_assignments(tvars, initialized_variable_names) TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: tvars = None train_op = optimization.create_optimizer_from_config( loss, train_config, tvars) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: eval_metrics = (classification_metric_fn, [logits, label_ids, is_real_example]) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: predictions = { "input_ids": input_ids, "logits": logits, } if "data_id" in features: predictions['data_id'] = features['data_id'] output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument log_features(features) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] next_sentence_labels = features["next_sentence_labels"] masked_input_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights \ = random_masking(input_ids, input_mask, train_config.max_predictions_per_seq, MASK_ID) is_training = (mode == tf.estimator.ModeKeys.TRAIN) prefix1 = "MaybeBERT" prefix2 = "MaybeBFN" with tf.compat.v1.variable_scope(prefix1): model1 = BertModel( config=bert_config, is_training=is_training, input_ids=masked_input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=train_config.use_one_hot_embeddings, ) (masked_lm_loss, masked_lm_example_loss1, masked_lm_log_probs1) = get_masked_lm_output( bert_config, model1.get_sequence_output(), model1.get_embedding_table(), masked_lm_positions, masked_lm_ids, masked_lm_weights) masked_lm_example_loss1 = tf.reshape(masked_lm_example_loss1, masked_lm_ids.shape) with tf.compat.v1.variable_scope(prefix2): model2 = BertModel( config=bert_config, is_training=is_training, input_ids=masked_input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=train_config.use_one_hot_embeddings, ) (masked_lm_loss, masked_lm_example_loss2, masked_lm_log_probs2) = get_masked_lm_output( bert_config, model2.get_sequence_output(), model2.get_embedding_table(), masked_lm_positions, masked_lm_ids, masked_lm_weights) print(model2.get_sequence_output().shape) masked_lm_example_loss2 = tf.reshape(masked_lm_example_loss2, masked_lm_ids.shape) model = model_class( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=train_config.use_one_hot_embeddings, ) loss_model = IndependentLossModel(bert_config) loss_model.train_modeling(model.get_sequence_output(), masked_lm_positions, masked_lm_weights, tf.stop_gradient(masked_lm_example_loss1), tf.stop_gradient(masked_lm_example_loss2)) total_loss = loss_model.total_loss loss1 = loss_model.loss1 loss2 = loss_model.loss2 per_example_loss1 = loss_model.per_example_loss1 per_example_loss2 = loss_model.per_example_loss2 losses1 = tf.reduce_sum(per_example_loss1, axis=1) losses2 = tf.reduce_sum(per_example_loss2, axis=1) prob1 = loss_model.prob1 prob2 = loss_model.prob2 checkpoint2_1, checkpoint2_2 = train_config.second_init_checkpoint.split( ",") tvars = tf.compat.v1.trainable_variables() initialized_variable_names_1, init_fn_1 = get_init_fn_for_two_checkpoints( train_config, tvars, checkpoint2_1, prefix1, checkpoint2_2, prefix2) assignment_fn = get_bert_assignment_map assignment_map2, initialized_variable_names_2 = assignment_fn( tvars, train_config.init_checkpoint) initialized_variable_names = {} initialized_variable_names.update(initialized_variable_names_1) initialized_variable_names.update(initialized_variable_names_2) def init_fn(): init_fn_1() tf.compat.v1.train.init_from_checkpoint( train_config.init_checkpoint, assignment_map2) scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu) log_var_assignments(tvars, initialized_variable_names) if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer_from_config( total_loss, train_config) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss1, per_example_loss2): loss1 = tf.compat.v1.metrics.mean(values=per_example_loss1) loss2 = tf.compat.v1.metrics.mean(values=per_example_loss2) return { "loss1": loss1, "loss2": loss2, } eval_metrics = (metric_fn, [losses1, losses2]) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: predictions = { "prob1": prob1, "prob2": prob2, "per_example_loss1": per_example_loss1, "per_example_loss2": per_example_loss2, "input_ids": input_ids, "masked_lm_positions": masked_lm_positions, } output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf_logging.info("*** Features ***") for name in sorted(features.keys()): tf_logging.info("name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] label_masks = features["label_masks"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = model_class( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=train_config.use_one_hot_embeddings, ) logits = tf.keras.layers.Dense(train_config.num_classes, name="token_regression")( model.get_sequence_output()) per_ex_losses = tf.keras.losses.MAE(tf.expand_dims(label_ids, 2), logits) masked_losses = per_ex_losses * tf.cast(label_masks, tf.float32) losses_sum = tf.reduce_sum(masked_losses, axis=1) denom = tf.reduce_sum(tf.cast(label_masks, tf.float32), axis=1) + 1e-5 losses = losses_sum / denom total_loss = tf.reduce_mean(losses) tvars = tf.compat.v1.trainable_variables() initialized_variable_names = {} scaffold_fn = None if train_config.init_checkpoint: assignment_map, initialized_variable_names = get_bert_assignment_map( tvars, train_config.init_checkpoint) if train_config.use_tpu: def tpu_scaffold(): tf.compat.v1.train.init_from_checkpoint( train_config.init_checkpoint, assignment_map) return tf.compat.v1.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.compat.v1.train.init_from_checkpoint( train_config.init_checkpoint, assignment_map) tf_logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf_logging.info("name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer_from_config( total_loss, train_config) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(logits, label_ids, label_masks): logits_reduced = tf.squeeze(logits, 2) is_neg_correct = tf.logical_and(tf.less(label_ids, 0.), tf.less(logits_reduced, 0.)) is_pos_correct = tf.logical_and(tf.less(0., label_ids), tf.less(0., logits_reduced)) is_correct = tf.logical_or(is_neg_correct, is_pos_correct) float_masks = tf.cast(label_masks, tf.float32) num_correct = tf.reduce_sum(tf.cast(is_correct, tf.float32) * float_masks, axis=1) num_problems = tf.reduce_sum(float_masks, axis=1) + 1e-5 acc_list = num_correct / num_problems mean_acc = tf.compat.v1.metrics.mean(values=acc_list) return {'mean_acc': mean_acc} eval_metrics = (metric_fn, [logits, label_ids, label_masks]) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: predictions = { "logits": logits, "input_ids": input_ids, "labels": label_ids, "label_masks": label_masks, } output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def _train_op_fn(train_config, loss): train_op = optimization_v2.create_optimizer_from_config(loss, train_config) return train_op