Exemplo n.º 1
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        tf_logging.info("model_fn_ranking")
        """The `model_fn` for TPUEstimator."""
        log_features(features)

        input_ids, input_mask, segment_ids = combine_paired_input_features(features)
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        # Updated

        model = BertModel(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=train_config.use_one_hot_embeddings,
        )
        pooled_output = model.get_pooled_output()
        if is_training:
            pooled_output = dropout(pooled_output, 0.1)

        loss, losses, y_pred = apply_loss_modeling(modeling_opt, pooled_output, features)


        assignment_fn = assignment_map.get_bert_assignment_map
        scaffold_fn = checkpoint_init(assignment_fn, train_config)

        optimizer_factory = lambda x: create_optimizer_from_config(x, train_config)
        input_ids1 = tf.identity(features["input_ids1"])
        input_ids2 = tf.identity(features["input_ids2"])
        prediction = {
            "input_ids1": input_ids1,
            "input_ids2": input_ids2
        }
        return ranking_estimator_spec(mode, loss, losses, y_pred, scaffold_fn, optimizer_factory, prediction)
Exemplo n.º 2
0
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""
    output_weights = tf.compat.v1.get_variable(
        "output_weights", [4, 4],
        initializer=tf.compat.v1.truncated_normal_initializer(stddev=0.02)
    )

    loss = tf.reduce_sum(output_weights)
    tvars = tf.compat.v1.trainable_variables()

    initialized_variable_names = {}

    scaffold_fn = None
    log_var_assignments(tvars, initialized_variable_names)

    TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec
    output_spec = None
    if mode == tf.estimator.ModeKeys.TRAIN:
        train_op = optimization.create_optimizer_from_config(loss, train_config, tvars)
        output_spec = TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, scaffold_fn=scaffold_fn)

    elif mode == tf.estimator.ModeKeys.EVAL:
        output_spec = NotImplemented
    else:
        output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                predictions={},
                scaffold_fn=scaffold_fn)

    return output_spec
Exemplo n.º 3
0
def get_training_spec(loss, mode, train_config, scaffold_fn):
    train_op = optimization.create_optimizer_from_config(loss, train_config)
    output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
        mode=mode,
        loss=loss,
        train_op=train_op,
        scaffold_fn=scaffold_fn,
    )
    return output_spec
Exemplo n.º 4
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        tf_logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf_logging.info("  name = %s, shape = %s" % (name, features[name].shape))
        label_ids = features["label_ids"]
        label_ids = tf.reshape(label_ids, [-1])
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = model_class(
            config=model_config,
            is_training=is_training,
            use_one_hot_embeddings=train_config.use_one_hot_embeddings,
            features=features,
        )
        logits = model.get_logits()
        loss = model.get_loss(label_ids)
        tvars = tf.compat.v1.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if train_config.init_checkpoint:
            initialized_variable_names, init_fn = get_init_fn(train_config, tvars)
            scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu)
        log_var_assignments(tvars, initialized_variable_names)
        TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            tvars = None
            train_op = optimization.create_optimizer_from_config(loss, train_config, tvars)
            output_spec = TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (classification_metric_fn, [
                logits, label_ids, is_real_example
            ])
            output_spec = TPUEstimatorSpec(mode=mode, loss=loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn)
        else:
            predictions = {
                    "label_ids": label_ids,
                    "logits": logits,
            }
            if "data_id" in features:
                predictions['data_id'] = features['data_id']
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                    mode=mode,
                    predictions=predictions,
                    scaffold_fn=scaffold_fn)
        return output_spec
Exemplo n.º 5
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        tf_logging.info("model_fn_ranking")
        log_features(features)

        input_ids, input_mask, segment_ids = combine_paired_input_features(
            features)
        batch_size, _ = get_shape_list(
            input_mask)  # This is not real batch_size, 2 * real_batch_size
        use_context = tf.ones([batch_size, 1], tf.int32)


        stacked_input_ids, stacked_input_mask, stacked_segment_ids, \
            = split_and_append_sep(input_ids, input_mask, segment_ids,
                                   config.total_sequence_length, config.window_size, CLS_ID, EOW_ID)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        with tf.compat.v1.variable_scope("sero"):
            model = model_class(config, is_training,
                                train_config.use_one_hot_embeddings)
            sequence_output_3d = model.network_stacked(stacked_input_ids,
                                                       stacked_input_mask,
                                                       stacked_segment_ids,
                                                       use_context)

        pooled_output = model.get_pooled_output()

        if is_training:
            pooled_output = dropout(pooled_output, 0.1)

        loss, losses, y_pred = apply_loss_modeling(config.loss, pooled_output,
                                                   features)

        assignment_fn = get_assignment_map_from_checkpoint_type(
            train_config.checkpoint_type, config.lower_layers)
        scaffold_fn = checkpoint_init(assignment_fn, train_config)
        prediction = {
            "stacked_input_ids": stacked_input_ids,
            "stacked_input_mask": stacked_input_mask,
            "stacked_segment_ids": stacked_segment_ids,
        }

        if train_config.gradient_accumulation != 1:
            optimizer_factory = lambda x: grad_accumulation.get_accumulated_optimizer_from_config(
                x, train_config, tf.compat.v1.trainable_variables(),
                train_config.gradient_accumulation)
        else:
            optimizer_factory = lambda x: create_optimizer_from_config(
                x, train_config)
        return ranking_estimator_spec(mode, loss, losses, y_pred, scaffold_fn,
                                      optimizer_factory, prediction)
Exemplo n.º 6
0
def define_graph(input_ids, input_mask, segment_ids):
    train_config = TrainConfigEx.from_flags(FLAGS)
    config = JsonConfig.from_json_file(FLAGS.model_config_file)
    model = HorizontalAlpha(config, True, False)
    model.call(input_ids, input_mask, segment_ids)
    masked_input_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights \
        = random_masking(input_ids, input_mask, train_config.max_predictions_per_seq, MASK_ID, 0)

    (masked_lm_loss, masked_lm_example_loss,
     masked_lm_log_probs) = get_masked_lm_output_albert(
         config, model.get_sequence_output(), model.get_embedding_table(),
         masked_lm_positions, masked_lm_ids, masked_lm_weights)
    train_op = optimization.create_optimizer_from_config(
        masked_lm_loss, train_config)

    return train_op
Exemplo n.º 7
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        output_weights = tf.compat.v1.get_variable(
            "output_weights", [10, 100],
            initializer=tf.compat.v1.truncated_normal_initializer(stddev=0.02))

        logits = output_weights[:, 1]
        loss = tf.reduce_mean(output_weights)
        loss2 = tf.reduce_mean(tf.square(output_weights))

        loss2_metric_val, loss2_metric_op = tf.compat.v1.metrics.mean(loss2)
        tf.compat.v1.summary.scalar("loss2", loss2)
        scaffold_fn = None
        TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec
        if mode == tf.estimator.ModeKeys.TRAIN:
            tvars = None
            train_op = optimization.create_optimizer_from_config(
                loss, train_config, tvars)
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           train_op=train_op,
                                           scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (metric_fn, [logits])
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           eval_metrics=eval_metrics,
                                           scaffold_fn=scaffold_fn)
        else:
            predictions = {
                "logits": logits,
            }
            if "data_id" in features:
                predictions['data_id'] = features['data_id']
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)
        return output_spec
Exemplo n.º 8
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        tf_logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf_logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        q_input_ids_1 = features["q_input_ids_1"]
        q_input_mask_1 = features["q_input_mask_1"]
        d_input_ids_1 = features["d_input_ids_1"]
        d_input_mask_1 = features["d_input_mask_1"]

        q_input_ids_2 = features["q_input_ids_2"]
        q_input_mask_2 = features["q_input_mask_2"]
        d_input_ids_2 = features["d_input_ids_2"]
        d_input_mask_2 = features["d_input_mask_2"]

        q_input_ids = tf.stack([q_input_ids_1, q_input_ids_2], axis=0)
        q_input_mask = tf.stack([q_input_mask_1, q_input_mask_2], axis=0)
        q_segment_ids = tf.zeros_like(q_input_ids, tf.int32)

        d_input_ids = tf.stack([d_input_ids_1, d_input_ids_2], axis=0)
        d_input_mask = tf.stack([d_input_mask_1, d_input_mask_2], axis=0)
        d_segment_ids = tf.zeros_like(d_input_ids, tf.int32)

        label_ids = features["label_ids"]
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

        with tf.compat.v1.variable_scope("query"):
            model_q = model_class(
                config=model_config,
                is_training=is_training,
                input_ids=q_input_ids,
                input_mask=q_input_mask,
                token_type_ids=q_segment_ids,
                use_one_hot_embeddings=train_config.use_one_hot_embeddings,
            )

        with tf.compat.v1.variable_scope("document"):
            model_d = model_class(
                config=model_config,
                is_training=is_training,
                input_ids=d_input_ids,
                input_mask=d_input_mask,
                token_type_ids=d_segment_ids,
                use_one_hot_embeddings=train_config.use_one_hot_embeddings,
            )
        pooled_q = model_q.get_pooled_output()
        pooled_d = model_d.get_pooled_output()

        logits = tf.matmul(pooled_q, pooled_d, transpose_b=True)
        y = tf.cast(label_ids, tf.float32) * 2 - 1
        losses = tf.maximum(1.0 - logits * y, 0)
        loss = tf.reduce_mean(losses)

        pred = tf.cast(logits > 0, tf.int32)

        tvars = tf.compat.v1.trainable_variables()

        initialized_variable_names = {}

        scaffold_fn = None
        if train_config.init_checkpoint:
            initialized_variable_names, init_fn = get_init_fn(
                train_config, tvars)
            scaffold_fn = get_tpu_scaffold_or_init(init_fn,
                                                   train_config.use_tpu)
        log_var_assignments(tvars, initialized_variable_names)

        TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec
        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            if "simple_optimizer" in special_flags:
                tf_logging.info("using simple optimizer")
                train_op = create_simple_optimizer(loss,
                                                   train_config.learning_rate,
                                                   train_config.use_tpu)
            else:
                train_op = optimization.create_optimizer_from_config(
                    loss, train_config, tvars)
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           train_op=train_op,
                                           scaffold_fn=scaffold_fn)

        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (classification_metric_fn,
                            [pred, label_ids, is_real_example])
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           eval_metrics=eval_metrics,
                                           scaffold_fn=scaffold_fn)
        else:
            predictions = {
                "q_input_ids": q_input_ids,
                "d_input_ids": d_input_ids,
                "score": logits
            }

            useful_inputs = ["data_id", "input_ids2", "data_ids"]
            for input_name in useful_inputs:
                if input_name in features:
                    predictions[input_name] = features[input_name]
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)

        return output_spec
Exemplo n.º 9
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        tf_logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf_logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        q_input_ids = features["q_input_ids"]
        q_input_mask = features["q_input_mask"]
        d_input_ids = features["d_input_ids"]
        d_input_mask = features["d_input_mask"]

        input_shape = get_shape_list(q_input_ids, expected_rank=2)
        batch_size = input_shape[0]

        doc_length = model_config.max_doc_length
        num_docs = model_config.num_docs

        d_input_ids_unpacked = tf.reshape(d_input_ids,
                                          [-1, num_docs, doc_length])
        d_input_mask_unpacked = tf.reshape(d_input_mask,
                                           [-1, num_docs, doc_length])

        d_input_ids_flat = tf.reshape(d_input_ids_unpacked, [-1, doc_length])
        d_input_mask_flat = tf.reshape(d_input_mask_unpacked, [-1, doc_length])

        q_segment_ids = tf.zeros_like(q_input_ids, tf.int32)
        d_segment_ids = tf.zeros_like(d_input_ids_flat, tf.int32)

        label_ids = features["label_ids"]
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

        with tf.compat.v1.variable_scope(dual_model_prefix1):
            q_model_config = copy.deepcopy(model_config)
            q_model_config.max_seq_length = model_config.max_sent_length
            model_q = model_class(
                config=model_config,
                is_training=is_training,
                input_ids=q_input_ids,
                input_mask=q_input_mask,
                token_type_ids=q_segment_ids,
                use_one_hot_embeddings=train_config.use_one_hot_embeddings,
            )

        with tf.compat.v1.variable_scope(dual_model_prefix2):
            d_model_config = copy.deepcopy(model_config)
            d_model_config.max_seq_length = model_config.max_doc_length
            model_d = model_class(
                config=model_config,
                is_training=is_training,
                input_ids=d_input_ids_flat,
                input_mask=d_input_mask_flat,
                token_type_ids=d_segment_ids,
                use_one_hot_embeddings=train_config.use_one_hot_embeddings,
            )
        pooled_q = model_q.get_pooled_output()  # [batch, vector_size]
        pooled_d_flat = model_d.get_pooled_output(
        )  # [batch, num_window, vector_size]

        pooled_d = tf.reshape(pooled_d_flat, [batch_size, num_docs, -1])
        pooled_q_t = tf.expand_dims(pooled_q, 1)
        pooled_d_t = tf.transpose(pooled_d, [0, 2, 1])
        all_logits = tf.matmul(pooled_q_t,
                               pooled_d_t)  # [batch, 1, num_window]
        if "hinge_all" in special_flags:
            apply_loss_modeing = hinge_all
        elif "sigmoid_all" in special_flags:
            apply_loss_modeing = sigmoid_all
        else:
            apply_loss_modeing = hinge_max
        logits, loss = apply_loss_modeing(all_logits, label_ids)
        pred = tf.cast(logits > 0, tf.int32)

        tvars = tf.compat.v1.trainable_variables()

        initialized_variable_names = {}

        scaffold_fn = None
        if train_config.init_checkpoint:
            initialized_variable_names, init_fn = get_init_fn(
                train_config, tvars)
            scaffold_fn = get_tpu_scaffold_or_init(init_fn,
                                                   train_config.use_tpu)
        log_var_assignments(tvars, initialized_variable_names)

        TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec
        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            if "simple_optimizer" in special_flags:
                tf_logging.info("using simple optimizer")
                train_op = create_simple_optimizer(loss,
                                                   train_config.learning_rate,
                                                   train_config.use_tpu)
            else:
                train_op = optimization.create_optimizer_from_config(
                    loss, train_config, tvars)
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           train_op=train_op,
                                           scaffold_fn=scaffold_fn)

        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (classification_metric_fn,
                            [pred, label_ids, is_real_example])
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           eval_metrics=eval_metrics,
                                           scaffold_fn=scaffold_fn)
        else:
            predictions = {
                "q_input_ids": q_input_ids,
                "d_input_ids": d_input_ids,
                "logits": logits
            }

            useful_inputs = ["data_id", "input_ids2", "data_ids"]
            for input_name in useful_inputs:
                if input_name in features:
                    predictions[input_name] = features[input_name]
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)

        return output_spec
Exemplo n.º 10
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        tf_logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf_logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        if mode == tf.estimator.ModeKeys.PREDICT:
            label_ids = tf.ones([input_ids.shape[0]], dtype=tf.int32)
        else:
            label_ids = features["label_ids"]
            label_ids = tf.reshape(label_ids, [-1])
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        input_ids2 = features["input_ids2"]
        input_mask2 = features["input_mask2"]
        segment_ids2 = features["segment_ids2"]
        with tf.compat.v1.variable_scope(dual_model_prefix1):
            model_1 = BertModel(
                config=model_config,
                is_training=is_training,
                input_ids=input_ids,
                input_mask=input_mask,
                token_type_ids=segment_ids,
                use_one_hot_embeddings=train_config.use_one_hot_embeddings,
            )
            pooled = model_1.get_pooled_output()
            if is_training:
                pooled = dropout(pooled, 0.1)
            logits = tf.keras.layers.Dense(train_config.num_classes,
                                           name="cls_dense")(pooled)
        with tf.compat.v1.variable_scope(dual_model_prefix2):
            model_2 = BertModel(
                config=model_config,
                is_training=is_training,
                input_ids=input_ids2,
                input_mask=input_mask2,
                token_type_ids=segment_ids2,
                use_one_hot_embeddings=train_config.use_one_hot_embeddings,
            )
            pooled = model_2.get_pooled_output()
            if is_training:
                pooled = dropout(pooled, 0.1)
            conf_probs = tf.keras.layers.Dense(
                train_config.num_classes,
                name="cls_dense",
                activation=tf.keras.activations.softmax)(pooled)

            confidence = conf_probs[:, 1]
        confidence_loss = 1 - confidence

        cls_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=label_ids)

        k = model_config.k
        alpha = model_config.alpha
        loss_arr = cls_loss * confidence + confidence_loss * k

        loss_arr = apply_weighted_loss(loss_arr, label_ids, alpha)

        loss = tf.reduce_mean(input_tensor=loss_arr)
        tvars = tf.compat.v1.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if train_config.init_checkpoint:
            initialized_variable_names, init_fn = get_init_fn(
                train_config, tvars)
            scaffold_fn = get_tpu_scaffold_or_init(init_fn,
                                                   train_config.use_tpu)
        log_var_assignments(tvars, initialized_variable_names)
        TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec

        def metric_fn(log_probs, label, is_real_example, confidence):
            r = classification_metric_fn(log_probs, label, is_real_example)
            r['confidence'] = tf.compat.v1.metrics.mean(confidence)
            return r

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            tvars = None
            train_op = optimization.create_optimizer_from_config(
                loss, train_config, tvars)
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           train_op=train_op,
                                           scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (metric_fn,
                            [logits, label_ids, is_real_example, confidence])
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           eval_metrics=eval_metrics,
                                           scaffold_fn=scaffold_fn)
        else:
            predictions = {
                "input_ids": input_ids,
                "logits": logits,
                "confidence": confidence,
            }
            if "data_id" in features:
                predictions['data_id'] = features['data_id']
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)
        return output_spec
Exemplo n.º 11
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        tf_logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf_logging.info("  name = %s, shape = %s" % (name, features[name].shape))
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        label_ids = tf.reshape(label_ids, [-1])
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = model_class(
            config=model_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=train_config.use_one_hot_embeddings,
            features=features,
        )
        probs = model.get_prob()
        logits = probs
        epsilon = 1e-12
        # probs_ad = tf.clip_by_value(probs, epsilon, 1.0 - epsilon)
        # logits1 = tf.math.log(probs_ad)
        # logits0 = tf.zeros_like(logits1)
        # logits = tf.stack([logits0, logits1], axis=1)
        # prob2 = tf.nn.softmax(logits, axis=1)

        # prob_err = prob2[:, 1] - probs
        y_true = tf.cast(label_ids, tf.float32)
        loss_arr = tf.keras.losses.BinaryCrossentropy()(y_true, probs)
        loss = tf.reduce_mean(input_tensor=loss_arr)
        tvars = tf.compat.v1.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if train_config.init_checkpoint:
            initialized_variable_names, init_fn = get_init_fn(train_config, tvars)
            scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu)
        log_var_assignments(tvars, initialized_variable_names)
        TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            tvars = None
            train_op = optimization.create_optimizer_from_config(loss, train_config, tvars)
            output_spec = TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (classification_metric_fn, [
                logits, label_ids, is_real_example
            ])
            output_spec = TPUEstimatorSpec(mode=mode, loss=loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn)
        else:
            predictions = {
                    "input_ids": input_ids,
                    "label_ids": label_ids,
                    "logits": logits,
                    "score1": model.score1,
                    "score2": model.score2,
                    # "prob_err": prob_err,
            }
            if "data_id" in features:
                predictions['data_id'] = features['data_id']
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                    mode=mode,
                    predictions=predictions,
                    scaffold_fn=scaffold_fn)
        return output_spec
Exemplo n.º 12
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        log_features(features)

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]

        #
        #
        # d_input_ids, d_input_mask, d_segment_ids, d_location_ids, ab_mapping, ab_mapping_mask \
        #     = get_dummy_apr_input(input_ids, input_mask,
        #                           dict_run_config.def_per_batch,
        #                           dict_run_config.inner_batch_size,
        #                           ssdr_config.max_loc_length,
        #                           dict_run_config.max_def_length)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        model = APR(input_ids, input_mask, segment_ids, is_training,
                    train_config.use_one_hot_embeddings, bert_config,
                    ssdr_config, dict_run_config.def_per_batch,
                    dict_run_config.inner_batch_size,
                    dict_run_config.max_def_length)

        #
        # model = model_class(
        #         config=bert_config,
        #         ssdr_config=ssdr_config,
        #         is_training=is_training,
        #         input_ids=input_ids,
        #         input_mask=input_mask,
        #         token_type_ids=segment_ids,
        #         d_input_ids=d_input_ids,
        #         d_input_mask=d_input_mask,
        #         d_segment_ids=d_segment_ids,
        #         d_location_ids=d_location_ids,
        #         ab_mapping=ab_mapping,
        #         ab_mapping_mask=ab_mapping_mask,
        #         use_one_hot_embeddings=train_config.use_one_hot_embeddings,
        # )
        task = Classification(3, features, model.get_pooled_output(),
                              is_training)
        loss = task.loss

        tvars = tf.compat.v1.trainable_variables()
        assignment_fn = tlm.training.assignment_map.get_assignment_map_as_is
        initialized_variable_names, init_fn = get_init_fn(
            tvars, train_config.init_checkpoint, assignment_fn)
        scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu)
        log_var_assignments(tvars, initialized_variable_names)
        output_spec = None
        TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec
        if mode == tf.estimator.ModeKeys.TRAIN:
            if ssdr_config.compare_attrib_value_safe("use_two_lr", True):
                tf_logging.info("Using two lr for each parts")
                train_op = create_optimizer_with_separate_lr(
                    loss, train_config)
            else:
                tf_logging.info("Using single lr ")
                train_op = optimization.create_optimizer_from_config(
                    loss, train_config)
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           train_op=train_op,
                                           scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            output_spec = TPUEstimatorSpec(mode=model,
                                           loss=loss,
                                           eval_metrics=task.eval_metrics(),
                                           scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.PREDICT:
            output_spec = TPUEstimatorSpec(mode=model,
                                           loss=loss,
                                           predictions={"loss": task.loss_arr},
                                           scaffold_fn=scaffold_fn)
        return output_spec
Exemplo n.º 13
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        tf_logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf_logging.info("  name = %s, shape = %s" % (name, features[name].shape))
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        if mode == tf.estimator.ModeKeys.PREDICT:
            label_ids = tf.ones([input_ids.shape[0]], dtype=tf.float32)
        else:
            label_ids = features["label_ids"]
            label_ids = tf.reshape(label_ids, [-1])
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        model = BertModel(
            config=model_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=train_config.use_one_hot_embeddings,
        )
        pooled = model.get_pooled_output()
        if is_training:
            pooled = dropout(pooled, 0.1)
        logits = tf.keras.layers.Dense(train_config.num_classes, name="cls_dense")(pooled)
        scale = model_config.scale

        label_ids = scale * label_ids

        weight = tf.abs(label_ids)
        loss_arr = tf.keras.losses.MAE(y_true=label_ids, y_pred=logits)
        loss_arr = loss_arr * weight

        loss = tf.reduce_mean(input_tensor=loss_arr)
        tvars = tf.compat.v1.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None

        if train_config.init_checkpoint:
            initialized_variable_names, init_fn = get_init_fn(train_config, tvars)
            scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu)
        log_var_assignments(tvars, initialized_variable_names)
        TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec

        def metric_fn(logits, label, is_real_example):
            mae = tf.compat.v1.metrics.mean_absolute_error(
                labels=label, predictions=logits, weights=is_real_example)

            return {
                "mae": mae
            }

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            tvars = None
            train_op = optimization.create_optimizer_from_config(loss, train_config, tvars)
            output_spec = TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (metric_fn, [
                logits, label_ids, is_real_example
            ])
            output_spec = TPUEstimatorSpec(mode=mode, loss=loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn)
        else:
            predictions = {
                    "input_ids": input_ids,
                    "logits": logits,
            }
            if "data_id" in features:
                predictions['data_id'] = features['data_id']
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                    mode=mode,
                    predictions=predictions,
                    scaffold_fn=scaffold_fn)
        return output_spec
Exemplo n.º 14
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        tf_logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf_logging.info("name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        loss_base = features["loss_base"]
        loss_target = features["loss_target"]
        masked_lm_positions = features["masked_lm_positions"]
        masked_lm_ids = features["masked_lm_ids"]
        masked_lm_weights = features["masked_lm_weights"]

        input_ids = recover_mask(input_ids, masked_lm_positions, masked_lm_ids)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        model = model_class(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=train_config.use_one_hot_embeddings,
        )

        if model_config.loss_model == "independent":
            loss_model = IndependentLossModel(bert_config)
            loss_model.train_modeling(model.get_sequence_output(),
                                      masked_lm_positions, masked_lm_weights,
                                      loss_base, loss_target)

            total_loss = loss_model.total_loss
            loss1 = loss_model.loss1
            loss2 = loss_model.loss2
            per_example_loss1 = loss_model.per_example_loss1
            per_example_loss2 = loss_model.per_example_loss2
            losses1 = tf.reduce_sum(per_example_loss1, axis=1)
            losses2 = tf.reduce_sum(per_example_loss2, axis=1)
            prob1 = loss_model.prob1
            prob2 = loss_model.prob2

            def host_call_fn(total_loss, loss1, loss2):
                tf.summary.scalar("total_loss", total_loss[0])
                tf.summary.scalar("loss_base", loss1[0])
                tf.summary.scalar("loss_target", loss2[0])
                return tf.compat.v1.summary.all_v2_summary_ops()

            host_call = (host_call_fn, [
                tf.reshape(total_loss, [1]),
                tf.reshape(loss1, [1]),
                tf.reshape(loss2, [1])
            ])

        elif model_config.loss_model == "diff_regression":
            total_loss, losses, logits = get_diff_loss(
                bert_config, model.get_sequence_output(), masked_lm_positions,
                masked_lm_weights, loss_base, loss_target)
            host_call = None

        pred_diff = prob1 - prob2
        gold_diff = get_gold_diff(loss_base, loss_target)
        tvars = tf.compat.v1.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if train_config.init_checkpoint:
            assignment_map, initialized_variable_names = get_bert_assignment_map(
                tvars, train_config.init_checkpoint)
            if train_config.use_tpu:

                def tpu_scaffold():
                    tf.compat.v1.train.init_from_checkpoint(
                        train_config.init_checkpoint, assignment_map)
                    return tf.compat.v1.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.compat.v1.tain.init_from_checkpoint(
                    train_config.init_checkpoint, assignment_map)

        tf_logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf_logging.info("name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer_from_config(
                total_loss, train_config)
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                host_call=host_call,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(per_example_loss1, per_example_loss2):
                loss1 = tf.compat.v1.metrics.mean(values=per_example_loss1)
                loss2 = tf.compat.v1.metrics.mean(values=per_example_loss2)

                pel = per_example_loss1 + per_example_loss2

                return {
                    #    "eval_loss": loss,
                    "loss1": loss1,
                    "loss2": loss2,
                }

            eval_metrics = (metric_fn, [losses1, losses2])
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            predictions = {
                "loss_base": loss_base,
                "loss_target": loss_target,
                "prob1": prob1,
                "prob2": prob2,
                "per_example_loss1": per_example_loss1,
                "per_example_loss2": per_example_loss2,
                "input_ids": input_ids,
                "masked_lm_positions": masked_lm_positions,
                "pred_diff": pred_diff,
                "gold_diff": gold_diff,
            }
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                predictions=predictions,
                scaffold_fn=scaffold_fn)

        return output_spec
Exemplo n.º 15
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        tf_logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf_logging.info("    name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        if "next_sentence_labels" in features:
            next_sentence_labels = features["next_sentence_labels"]
        else:
            next_sentence_labels = get_dummy_next_sentence_labels(input_ids)
        tlm_prefix = "target_task"

        with tf.compat.v1.variable_scope(tlm_prefix):
            priority_score = tf.stop_gradient(priority_model(features))

        priority_score = priority_score * target_model_config.amp
        masked_input_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights\
            = biased_masking(input_ids,
                             input_mask,
                             priority_score,
                             target_model_config.alpha,
                             train_config.max_predictions_per_seq,
                             MASK_ID)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = model_class(
            config=bert_config,
            is_training=is_training,
            input_ids=masked_input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=train_config.use_one_hot_embeddings,
        )

        (masked_lm_loss, masked_lm_example_loss,
         masked_lm_log_probs) = get_masked_lm_output(
             bert_config, model.get_sequence_output(),
             model.get_embedding_table(), masked_lm_positions, masked_lm_ids,
             masked_lm_weights)

        (next_sentence_loss, next_sentence_example_loss,
         next_sentence_log_probs) = get_next_sentence_output(
             bert_config, model.get_pooled_output(), next_sentence_labels)

        total_loss = masked_lm_loss + next_sentence_loss

        all_vars = tf.compat.v1.all_variables()

        tf_logging.info("We assume priority model is from v2")

        if train_config.checkpoint_type == "v2":
            assignment_map, initialized_variable_names = assignment_map_v2_to_v2(
                all_vars, train_config.init_checkpoint)
            assignment_map2, initialized_variable_names2 = get_assignment_map_remap_from_v2(
                all_vars, tlm_prefix, train_config.second_init_checkpoint)
        else:
            assignment_map, assignment_map2, initialized_variable_names \
                                            = get_tlm_assignment_map_v2(all_vars,
                                              tlm_prefix,
                                              train_config.init_checkpoint,
                                              train_config.second_init_checkpoint)
            initialized_variable_names2 = None

        def init_fn():
            if train_config.init_checkpoint:
                tf.compat.v1.train.init_from_checkpoint(
                    train_config.init_checkpoint, assignment_map)
            if train_config.second_init_checkpoint:
                tf.compat.v1.train.init_from_checkpoint(
                    train_config.second_init_checkpoint, assignment_map2)

        scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu)

        tvars = [v for v in all_vars if not v.name.startswith(tlm_prefix)]
        log_var_assignments(tvars, initialized_variable_names,
                            initialized_variable_names2)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer_from_config(
                total_loss, train_config, tvars)
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (metric_fn, [
                masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                masked_lm_weights, next_sentence_example_loss,
                next_sentence_log_probs, next_sentence_labels
            ])
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            predictions = {
                "input_ids": input_ids,
                "masked_input_ids": masked_input_ids,
                "priority_score": priority_score,
                "lm_loss1": features["loss1"],
                "lm_loss2": features["loss2"],
            }
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                predictions=predictions,
                scaffold_fn=scaffold_fn)

        return output_spec
Exemplo n.º 16
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        logging.info("*** Features ***")
        for name in sorted(features.keys()):
            logging.info("    name = %s, shape = %s" %
                         (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        batch_size, seq_len = get_shape_list2(input_ids)
        n_trial = 5

        logging.info("Doing All Masking")
        new_input_ids, new_segment_ids, new_input_mask, indice, length_arr = \
            candidate_gen(input_ids, input_mask, segment_ids, n_trial)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        prefix_cls = "classification"
        prefix_explain = "explain"
        all_input_ids = tf.concat([input_ids, new_input_ids], axis=0)
        all_segment_ids = tf.concat([segment_ids, new_segment_ids], axis=0)
        all_input_mask = tf.concat([input_mask, new_input_mask], axis=0)
        with tf.compat.v1.variable_scope(prefix_cls):
            model = BertModel(
                config=bert_config,
                is_training=is_training,
                input_ids=all_input_ids,
                input_mask=all_input_mask,
                token_type_ids=all_segment_ids,
                use_one_hot_embeddings=train_config.use_one_hot_embeddings,
            )
            output_weights = tf.compat.v1.get_variable(
                "output_weights",
                [train_config.num_classes, bert_config.hidden_size],
                initializer=tf.compat.v1.truncated_normal_initializer(
                    stddev=0.02))

            output_bias = tf.compat.v1.get_variable(
                "output_bias", [train_config.num_classes],
                initializer=tf.compat.v1.zeros_initializer())
            pooled = model.get_pooled_output()
            raw_logits = tf.matmul(pooled, output_weights, transpose_b=True)
            logits = tf.stop_gradient(raw_logits)
            cls_logits = tf.nn.bias_add(logits, output_bias)
            cls_probs = tf.nn.softmax(cls_logits)

            orig_probs = cls_probs[:batch_size]
            new_probs = tf.reshape(cls_probs[batch_size:],
                                   [batch_size, n_trial, -1])

            best_run, informative = get_informative(new_probs, orig_probs)
            # informative.shape= [batch_size, num_clases]
            best_del_idx, best_del_len = select_best(best_run, indice,
                                                     length_arr)

            signal_label = get_mask(best_del_idx, best_del_len, seq_len)

        with tf.compat.v1.variable_scope(prefix_explain):
            model = BertModel(
                config=bert_config,
                is_training=is_training,
                input_ids=input_ids,
                input_mask=input_mask,
                token_type_ids=segment_ids,
                use_one_hot_embeddings=train_config.use_one_hot_embeddings,
            )
            seq = model.get_sequence_output()

            output_weights = tf.compat.v1.get_variable(
                "output_weights",
                [train_config.num_classes, bert_config.hidden_size],
                initializer=tf.compat.v1.truncated_normal_initializer(
                    stddev=0.02))

            output_bias = tf.compat.v1.get_variable(
                "output_bias", [train_config.num_classes],
                initializer=tf.compat.v1.zeros_initializer())
            logits = tf.matmul(seq, output_weights, transpose_b=True)
            ex_logits = tf.nn.bias_add(
                logits, output_bias)  # [batch, seq_len, num_class]

        ex_logits_flat = tf.reshape(tf.transpose(ex_logits, [0, 2, 1]),
                                    [-1, seq_len])
        signal_label_flat = tf.cast(tf.reshape(signal_label, [-1, seq_len]),
                                    tf.float32)
        losses_per_clas_flat = correlation_coefficient_loss(
            signal_label_flat, ex_logits_flat)  # [batch_size, num_class]
        losses_per_clas = tf.reshape(losses_per_clas_flat, [batch_size, -1])
        losses_per_clas = losses_per_clas * tf.cast(informative, tf.float32)
        losses = tf.reduce_mean(losses_per_clas, axis=1)
        loss = tf.reduce_mean(losses)

        tvars = tf.compat.v1.trainable_variables()

        scaffold_fn = None
        initialized_variable_names, init_fn = get_init_fn_for_two_checkpoints(
            train_config, tvars, train_config.init_checkpoint, prefix_explain,
            train_config.second_init_checkpoint, prefix_cls)
        if train_config.use_tpu:

            def tpu_scaffold():
                init_fn()
                return tf.compat.v1.train.Scaffold()

            scaffold_fn = tpu_scaffold
        else:
            init_fn()

        log_var_assignments(tvars, initialized_variable_names)

        output_spec = None
        TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer_from_config(
                loss, train_config)
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           train_op=train_op,
                                           scaffold_fn=scaffold_fn)

        elif mode == tf.estimator.ModeKeys.PREDICT:
            predictions = {
                "input_ids": input_ids,
                "ex_logits": ex_logits,
                "logits": logits,
            }
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=None,
                predictions=predictions,
                scaffold_fn=scaffold_fn)

        return output_spec
Exemplo n.º 17
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        tf_logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf_logging.info("name = %s, shape = %s" %
                            (name, features[name].shape))
        input_ids, input_mask, segment_ids = combine_paired_input_features(
            features)

        strict_good = features["strict_good"]
        strict_bad = features["strict_bad"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        model = model_class(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=train_config.use_one_hot_embeddings,
        )
        pooled = model.get_pooled_output()
        losses, logits, pair_logits = pairwise_model(pooled, strict_good,
                                                     strict_bad)
        total_loss = tf.reduce_mean(losses)
        tvars = tf.compat.v1.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if train_config.init_checkpoint:
            assignment_map, initialized_variable_names = get_bert_assignment_map(
                tvars, train_config.init_checkpoint)
            if train_config.use_tpu:

                def tpu_scaffold():
                    tf.compat.v1.train.init_from_checkpoint(
                        train_config.init_checkpoint, assignment_map)
                    return tf.compat.v1.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.compat.v1.train.init_from_checkpoint(
                    train_config.init_checkpoint, assignment_map)

        tf_logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf_logging.info("name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer_from_config(
                total_loss, train_config)
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(pair_logits, strict_good, strict_bad):
                diff = pair_logits[:, 0] - pair_logits[:, 1]
                pairwise_correct = tf.less(0.0, diff)

                strict_good_correct_raw = tf.reshape(
                    tf.less(1.0, pair_logits[:, 0]), [-1, 1])
                strict_good_correct = cast_float_multiply(
                    strict_good_correct_raw, strict_good)
                strict_bad_correct_raw = tf.reshape(
                    tf.less(pair_logits[:, 1], -1.0), [-1, 1])
                strict_bad_correct = cast_float_multiply(
                    strict_bad_correct_raw, strict_bad)

                pairwise_acc_raw = tf.cast(pairwise_correct, tf.float32)
                mean_acc = tf.compat.v1.metrics.mean(values=pairwise_acc_raw)

                def strict_accuracy(correctness, gold):
                    return tf.compat.v1.metrics.accuracy(
                        labels=tf.ones_like(gold, tf.int32),
                        predictions=tf.cast(correctness, tf.int32),
                        weights=tf.cast(gold, tf.float32))

                return {
                    'mean_acc':
                    mean_acc,
                    'strict_good_acc':
                    strict_accuracy(strict_good_correct, strict_good),
                    'strict_bad_acc':
                    strict_accuracy(strict_bad_correct, strict_bad)
                }

            eval_metrics = (metric_fn, [pair_logits, strict_good, strict_bad])
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            predictions = {
                "logits": logits,
                "input_ids": input_ids,
                "strict_good": strict_good,
                "strict_bad": strict_bad,
            }
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                predictions=predictions,
                scaffold_fn=scaffold_fn)

        return output_spec
Exemplo n.º 18
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        tf_logging.info("model_fn_nli_lm")
        """The `model_fn` for TPUEstimator."""
        log_features(features)

        input_ids = features["input_ids"]  # [batch_size, seq_length]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        batch_size, seq_max = get_shape_list2(input_ids)
        if "nli_input_ids" in features:
            nli_input_ids = features[
                "nli_input_ids"]  # [batch_size, seq_length]
            nli_input_mask = features["nli_input_mask"]
            nli_segment_ids = features["nli_segment_ids"]
        else:
            nli_input_ids = input_ids
            nli_input_mask = input_mask
            nli_segment_ids = segment_ids
            features["label_ids"] = tf.ones([batch_size], tf.int32)

        if mode == tf.estimator.ModeKeys.PREDICT:
            tf.random.set_seed(0)
            seed = 0
        else:
            seed = None

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        tf_logging.info("Doing dynamic masking (random)")

        masked_input_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights \
            = random_masking(input_ids, input_mask,
                             train_config.max_predictions_per_seq, MASK_ID, seed)

        sharing_model = sharing_model_factory(
            config, train_config.use_one_hot_embeddings, is_training,
            masked_input_ids, input_mask, segment_ids, nli_input_ids,
            nli_input_mask, nli_segment_ids)

        sequence_output_lm = sharing_model.lm_sequence_output()
        nli_feature = sharing_model.get_tt_feature()

        masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs \
            = get_masked_lm_output(config, sequence_output_lm, sharing_model.get_embedding_table(),
                                     masked_lm_positions, masked_lm_ids, masked_lm_weights)

        masked_lm_log_probs = tf.reshape(masked_lm_log_probs, [batch_size, -1])

        masked_lm_per_inst_loss = tf.reshape(masked_lm_example_loss,
                                             [batch_size, -1])

        task = Classification(3, features, nli_feature, is_training)
        nli_loss = task.loss

        task_prob = tf.nn.softmax(task.logits, axis=-1)
        arg_like = task_prob[:, 1] + task_prob[:, 2]

        vars = sharing_model.model.all_layer_outputs
        grads_1 = tf.gradients(ys=masked_lm_loss, xs=vars)  # List[ batch_szie,
        grads_2 = tf.gradients(ys=arg_like, xs=vars)
        l = []
        for g1, g2 in zip(grads_1, grads_2):
            if g1 is not None and g2 is not None:
                a = tf.reshape(g1, [batch_size * 2, seq_max, -1])[:batch_size]
                a = a / masked_lm_per_inst_loss
                b = tf.reshape(g2, [batch_size * 2, seq_max, -1])[batch_size:]
                l.append(tf.abs(a * b))
        h_overlap = tf.stack(l, axis=1)
        h_overlap = tf.reduce_sum(h_overlap, axis=2)

        loss = combine_loss_fn(masked_lm_loss, nli_loss)
        tvars = tf.compat.v1.trainable_variables()
        assignment_fn = get_bert_assignment_map
        initialized_variable_names, init_fn = get_init_fn(
            tvars, train_config.init_checkpoint, assignment_fn)
        scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu)
        log_var_assignments(tvars, initialized_variable_names)

        TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer_from_config(
                loss, train_config)
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           train_op=train_op,
                                           scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (metric_fn_lm, [
                masked_lm_example_loss,
                masked_lm_log_probs,
                masked_lm_ids,
                masked_lm_weights,
            ])
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           eval_metrics=eval_metrics,
                                           scaffold_fn=scaffold_fn)
        else:
            predictions = {
                "input_ids": input_ids,
                "masked_input_ids": masked_input_ids,
                "masked_lm_ids": masked_lm_ids,
                "masked_lm_example_loss": masked_lm_example_loss,
                "masked_lm_positions": masked_lm_positions,
                "masked_lm_log_probs": masked_lm_log_probs,
                "h_overlap": h_overlap,
            }
            output_spec = TPUEstimatorSpec(mode=mode,
                                           predictions=predictions,
                                           scaffold_fn=scaffold_fn)

        return output_spec
Exemplo n.º 19
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        tf_logging.info("model_fn_nli_lm")
        """The `model_fn` for TPUEstimator."""
        log_features(features)

        input_ids = features["input_ids"]  # [batch_size, seq_length]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        batch_size, _ = get_shape_list2(input_ids)
        if "nli_input_ids" in features:
            nli_input_ids = features[
                "nli_input_ids"]  # [batch_size, seq_length]
            nli_input_mask = features["nli_input_mask"]
            nli_segment_ids = features["nli_segment_ids"]
        else:
            nli_input_ids = input_ids
            nli_input_mask = input_mask
            nli_segment_ids = segment_ids
            features["label_ids"] = tf.ones([batch_size], tf.int32)

        if mode == tf.estimator.ModeKeys.PREDICT:
            tf.random.set_seed(0)
            seed = 0
        else:
            seed = None

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        tf_logging.info("Doing dynamic masking (random)")

        masked_input_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights \
            = random_masking(input_ids, input_mask,
                             train_config.max_predictions_per_seq, MASK_ID, seed)

        sharing_model = sharing_model_factory(
            config, train_config.use_one_hot_embeddings, is_training,
            masked_input_ids, input_mask, segment_ids, nli_input_ids,
            nli_input_mask, nli_segment_ids)

        sequence_output_lm = sharing_model.lm_sequence_output()
        nli_feature = sharing_model.get_tt_feature()

        masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs \
            = get_masked_lm_output(config, sequence_output_lm, sharing_model.get_embedding_table(),
                                     masked_lm_positions, masked_lm_ids, masked_lm_weights)

        masked_lm_log_probs = tf.reshape(masked_lm_log_probs, [batch_size, -1])

        top_guess = masked_lm_log_probs

        task = Classification(3, features, nli_feature, is_training)
        nli_loss = task.loss

        overlap_score = shared_gradient_fine_grained(
            masked_lm_example_loss, task.logits,
            train_config.max_predictions_per_seq)
        loss = combine_loss_fn(masked_lm_loss, nli_loss)
        tvars = tf.compat.v1.trainable_variables()
        assignment_fn = get_bert_assignment_map
        initialized_variable_names, init_fn = get_init_fn(
            tvars, train_config.init_checkpoint, assignment_fn)
        scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu)
        log_var_assignments(tvars, initialized_variable_names)

        TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer_from_config(
                loss, train_config)
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           train_op=train_op,
                                           scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (metric_fn_lm, [
                masked_lm_example_loss,
                masked_lm_log_probs,
                masked_lm_ids,
                masked_lm_weights,
            ])
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           eval_metrics=eval_metrics,
                                           scaffold_fn=scaffold_fn)
        else:
            predictions = {
                "input_ids": input_ids,
                "masked_input_ids": masked_input_ids,
                "masked_lm_ids": masked_lm_ids,
                "masked_lm_example_loss": masked_lm_example_loss,
                "masked_lm_positions": masked_lm_positions,
                "masked_lm_log_probs": masked_lm_log_probs,
                "overlap_score": overlap_score,
                "top_guess": top_guess,
            }
            output_spec = TPUEstimatorSpec(mode=mode,
                                           predictions=predictions,
                                           scaffold_fn=scaffold_fn)

        return output_spec
Exemplo n.º 20
0
    def model_fn(features, labels, mode, params):    # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        logging.info("*** Features ***")
        for name in sorted(features.keys()):
            logging.info("    name = %s, shape = %s" % (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        d_input_ids = features["d_input_ids"]
        d_input_mask = features["d_input_mask"]
        d_location_ids = features["d_location_ids"]
        next_sentence_labels = features["next_sentence_labels"]

        if dict_run_config.prediction_op == "loss":
            seed = 0
        else:
            seed = None

        if dict_run_config.prediction_op == "loss_fixed_mask" or train_config.fixed_mask:
            masked_input_ids = input_ids
            masked_lm_positions = features["masked_lm_positions"]
            masked_lm_ids = features["masked_lm_ids"]
            masked_lm_weights = tf.ones_like(masked_lm_positions, dtype=tf.float32)
        else:
            masked_input_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights \
                = random_masking(input_ids, input_mask, train_config.max_predictions_per_seq, MASK_ID, seed)

        if dict_run_config.use_d_segment_ids:
            d_segment_ids = features["d_segment_ids"]
        else:
            d_segment_ids = None

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = model_class(
                config=bert_config,
                d_config=dbert_config,
                is_training=is_training,
                input_ids=masked_input_ids,
                input_mask=input_mask,
                d_input_ids=d_input_ids,
                d_input_mask=d_input_mask,
                d_location_ids=d_location_ids,
                use_target_pos_emb=dict_run_config.use_target_pos_emb,
                token_type_ids=segment_ids,
                use_one_hot_embeddings=train_config.use_one_hot_embeddings,
                d_segment_ids=d_segment_ids,
                pool_dict_output=dict_run_config.pool_dict_output,
        )

        (masked_lm_loss,
         masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output(
                 bert_config, model.get_sequence_output(), model.get_embedding_table(),
                 masked_lm_positions, masked_lm_ids, masked_lm_weights)
        (next_sentence_loss, next_sentence_example_loss,
         next_sentence_log_probs) = get_next_sentence_output(
                 bert_config, model.get_pooled_output(), next_sentence_labels)

        total_loss = masked_lm_loss

        if dict_run_config.train_op == "entry_prediction":
            score_label = features["useful_entry"] # [batch, 1]
            score_label = tf.reshape(score_label, [-1])
            entry_logits = bert_common.dense(2, bert_common.create_initializer(bert_config.initializer_range))\
                (model.get_dict_pooled_output())
            print("entry_logits: ", entry_logits.shape)
            losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=entry_logits, labels=score_label)
            loss = tf.reduce_mean(losses)
            total_loss = loss

        if dict_run_config.train_op == "lookup":
            lookup_idx = features["lookup_idx"]
            lookup_loss, lookup_example_loss, lookup_score = \
                sequence_index_prediction(bert_config, lookup_idx, model.get_sequence_output())

            total_loss += lookup_loss

        tvars = tf.compat.v1.trainable_variables()

        init_vars = {}
        scaffold_fn = None
        if train_config.init_checkpoint:
            if dict_run_config.is_bert_checkpoint:
                map1, map2, init_vars = get_bert_assignment_map_for_dict(tvars, train_config.init_checkpoint)

                def load_fn():
                    tf.compat.v1.train.init_from_checkpoint(train_config.init_checkpoint, map1)
                    tf.compat.v1.train.init_from_checkpoint(train_config.init_checkpoint, map2)
            else:
                map1, init_vars = get_assignment_map_as_is(tvars, train_config.init_checkpoint)

                def load_fn():
                    tf.compat.v1.train.init_from_checkpoint(train_config.init_checkpoint, map1)

            if train_config.use_tpu:
                def tpu_scaffold():
                    load_fn()
                    return tf.compat.v1.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                load_fn()

        logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in init_vars:
                init_string = ", *INIT_FROM_CKPT*"
            logging.info("    name = %s, shape = %s%s", var.name, var.shape, init_string)
        logging.info("Total parameters : %d" % get_param_num())

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            if train_config.gradient_accumulation == 1:
                train_op = optimization.create_optimizer_from_config(total_loss, train_config)
            else:
                logging.info("Using gradient accumulation : %d" % train_config.gradient_accumulation)
                train_op = get_accumulated_optimizer_from_config(total_loss, train_config,
                                                                 tvars, train_config.gradient_accumulation)
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    train_op=train_op,
                    scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (metric_fn, [
                    masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                    masked_lm_weights, next_sentence_example_loss,
                    next_sentence_log_probs, next_sentence_labels
            ])
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    eval_metrics=eval_metrics,
                    scaffold_fn=scaffold_fn)
        else:
            if dict_run_config.prediction_op == "gradient":
                logging.info("Fetching gradient")
                gradient = get_gradients(model, masked_lm_log_probs,
                                         train_config.max_predictions_per_seq, bert_config.vocab_size)
                predictions = {
                        "masked_input_ids": masked_input_ids,
                        #"input_ids": input_ids,
                        "d_input_ids": d_input_ids,
                        "masked_lm_positions": masked_lm_positions,
                        "gradients": gradient,
                }
            elif dict_run_config.prediction_op == "loss" or dict_run_config.prediction_op == "loss_fixed_mask":
                logging.info("Fetching loss")
                predictions = {
                    "masked_lm_example_loss": masked_lm_example_loss,
                }
            else:
                raise Exception("prediction target not specified")

            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    predictions=predictions,
                    scaffold_fn=scaffold_fn)

        return output_spec
Exemplo n.º 21
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        tf_logging.info("model_fn_apr_lm")
        """The `model_fn` for TPUEstimator."""
        log_features(features)

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]

        if mode == tf.estimator.ModeKeys.PREDICT:
            tf.random.set_seed(0)
            seed = 0
        else:
            seed = None

        tf_logging.info("Doing dynamic masking (random)")
        masked_input_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights \
            = random_masking(input_ids, input_mask, train_config.max_predictions_per_seq, MASK_ID, seed)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        tf_logging.info("Using masked_input_ids")
        model = APR(
            masked_input_ids,
            input_mask,
            segment_ids,
            is_training,
            train_config.use_one_hot_embeddings,
            bert_config,
            ssdr_config,
            dict_run_config.def_per_batch,
            dict_run_config.inner_batch_size,
            dict_run_config.max_def_length,
            #  MainTransformer,
            #  SecondTransformerEmbeddingLess,
        )

        masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs \
            = get_masked_lm_output(bert_config, model.get_sequence_output(), model.get_embedding_table(),
                 masked_lm_positions, masked_lm_ids, masked_lm_weights)

        loss = masked_lm_loss

        tvars = tf.compat.v1.trainable_variables()
        assignment_fn = dict_model_fn.get_bert_assignment_map_for_dict
        initialized_variable_names, init_fn = align_checkpoint_twice(
            tvars, train_config.init_checkpoint, assignment_fn)
        scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu)
        log_var_assignments(tvars, initialized_variable_names)

        TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec
        if mode == tf.estimator.ModeKeys.TRAIN:
            if ssdr_config.compare_attrib_value_safe("use_two_lr", True):
                tf_logging.info("Using two lr for each parts")
                train_op = create_optimizer_with_separate_lr(
                    loss, train_config)
            else:
                tf_logging.info("Using single lr ")
                train_op = optimization.create_optimizer_from_config(
                    loss, train_config)
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           train_op=train_op,
                                           training_hooks=[OomReportingHook()],
                                           scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (metric_fn_lm, [
                masked_lm_example_loss,
                masked_lm_log_probs,
                masked_lm_ids,
                masked_lm_weights,
            ])
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           eval_metrics=eval_metrics,
                                           scaffold_fn=scaffold_fn)
        else:
            predictions = {
                "input_ids": input_ids,
                "masked_input_ids": masked_input_ids,
                "masked_lm_ids": masked_lm_ids,
                "masked_lm_example_loss": masked_lm_example_loss,
                "masked_lm_positions": masked_lm_positions,
            }
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           predictions=predictions,
                                           scaffold_fn=scaffold_fn)

        return output_spec
Exemplo n.º 22
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        tf_logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf_logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        query = features["query"]
        doc = features["doc"]
        doc_mask = features["doc_mask"]
        data_ids = features["data_id"]

        segment_len = max_seq_length - query_len - 3
        step_size = model_config.step_size
        input_ids, input_mask, segment_ids, n_segments = \
            iterate_over(query, doc, doc_mask, total_doc_len, segment_len, step_size)
        if mode == tf.estimator.ModeKeys.PREDICT:
            label_ids = tf.ones([input_ids.shape[0]], dtype=tf.int32)
        else:
            label_ids = features["label_ids"]
            label_ids = tf.reshape(label_ids, [-1])

        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        if "feed_features" in special_flags:
            model = model_class(
                config=model_config,
                is_training=is_training,
                input_ids=input_ids,
                input_mask=input_mask,
                token_type_ids=segment_ids,
                use_one_hot_embeddings=train_config.use_one_hot_embeddings,
                features=features,
            )
        else:
            model = model_class(
                config=model_config,
                is_training=is_training,
                input_ids=input_ids,
                input_mask=input_mask,
                token_type_ids=segment_ids,
                use_one_hot_embeddings=train_config.use_one_hot_embeddings,
            )
        if "new_pooling" in special_flags:
            pooled = mimic_pooling(model.get_sequence_output(),
                                   model_config.hidden_size,
                                   model_config.initializer_range)
        else:
            pooled = model.get_pooled_output()

        if train_config.checkpoint_type != "bert_nli" and train_config.use_old_logits:
            tf_logging.info("Use old version of logistic regression")
            if is_training:
                pooled = dropout(pooled, 0.1)
            logits = tf.keras.layers.Dense(train_config.num_classes,
                                           name="cls_dense")(pooled)
        else:
            tf_logging.info("Use fixed version of logistic regression")
            output_weights = tf.compat.v1.get_variable(
                "output_weights",
                [train_config.num_classes, model_config.hidden_size],
                initializer=tf.compat.v1.truncated_normal_initializer(
                    stddev=0.02))

            output_bias = tf.compat.v1.get_variable(
                "output_bias", [train_config.num_classes],
                initializer=tf.compat.v1.zeros_initializer())

            if is_training:
                pooled = dropout(pooled, 0.1)

            logits = tf.matmul(pooled, output_weights, transpose_b=True)
            logits = tf.nn.bias_add(logits, output_bias)

        loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=label_ids)

        if "bias_loss" in special_flags:
            tf_logging.info("Using special_flags : bias_loss")
            loss_arr = reweight_zero(label_ids, loss_arr)

        loss = tf.reduce_mean(input_tensor=loss_arr)
        tvars = tf.compat.v1.trainable_variables()

        initialized_variable_names = {}

        scaffold_fn = None
        if train_config.init_checkpoint:
            initialized_variable_names, init_fn = get_init_fn(
                train_config, tvars)
            scaffold_fn = get_tpu_scaffold_or_init(init_fn,
                                                   train_config.use_tpu)
        log_var_assignments(tvars, initialized_variable_names)

        TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec
        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            if "simple_optimizer" in special_flags:
                tf_logging.info("using simple optimizer")
                train_op = create_simple_optimizer(loss,
                                                   train_config.learning_rate,
                                                   train_config.use_tpu)
            else:
                if "ask_tvar" in special_flags:
                    tvars = model.get_trainable_vars()
                else:
                    tvars = None
                train_op = optimization.create_optimizer_from_config(
                    loss, train_config, tvars)
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           train_op=train_op,
                                           scaffold_fn=scaffold_fn)

        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (classification_metric_fn,
                            [logits, label_ids, is_real_example])
            output_spec = TPUEstimatorSpec(mode=model,
                                           loss=loss,
                                           eval_metrics=eval_metrics,
                                           scaffold_fn=scaffold_fn)
        else:
            predictions = {
                "logits": logits,
                "doc": doc,
                "data_ids": data_ids,
            }

            useful_inputs = ["data_id", "input_ids2", "data_ids"]
            for input_name in useful_inputs:
                if input_name in features:
                    predictions[input_name] = features[input_name]

            if override_prediction_fn is not None:
                predictions = override_prediction_fn(predictions, model)

            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)

        return output_spec
Exemplo n.º 23
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        logging.info("*** Features ***")
        for name in sorted(features.keys()):
            logging.info("    name = %s, shape = %s" %
                         (name, features[name].shape))

        def reform_a_input(raw_input):
            return tf.reshape(raw_input,
                              [dict_run_config.inner_batch_size, -1])

        def reform_b_input(raw_input):
            return tf.reshape(raw_input, [dict_run_config.def_per_batch, -1])

        input_ids = reform_a_input(features["input_ids"])  # [batch_size, def]
        input_mask = reform_a_input(features["input_mask"])
        segment_ids = reform_a_input(features["segment_ids"])
        d_input_ids = reform_b_input(features["d_input_ids"])
        d_input_mask = reform_b_input(features["d_input_mask"])
        d_location_ids = reform_a_input(features["d_location_ids"])
        ab_mapping = features["ab_mapping"]

        if hasattr(ssdr_config,
                   "blind_dictionary") and ssdr_config.blind_dictionary:
            logging.info("Hide dictionary")
            d_input_ids = tf.zeros_like(d_input_ids)
            d_input_mask = tf.zeros_like(d_input_mask)

        if dict_run_config.prediction_op == "loss":
            seed = 0
        else:
            seed = None

        if dict_run_config.prediction_op == "loss_fixed_mask" or train_config.fixed_mask:
            masked_input_ids = input_ids
            masked_lm_positions = reform_a_input(
                features["masked_lm_positions"])
            masked_lm_ids = reform_a_input(features["masked_lm_ids"])
            masked_lm_weights = reform_a_input(features["masked_lm_weights"])
        else:
            masked_input_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights \
                = random_masking(input_ids, input_mask, train_config.max_predictions_per_seq, MASK_ID, seed)

        if dict_run_config.use_d_segment_ids:
            d_segment_ids = reform_b_input(features["d_segment_ids"])
        else:
            d_segment_ids = None

        if dict_run_config.use_ab_mapping_mask:
            ab_mapping_mask = reform_a_input(features["ab_mapping_mask"])
        else:
            ab_mapping_mask = None

        if ssdr_config.compare_attrib_value_safe("consistency", True):

            print("masked_input_ids", masked_input_ids.shape)
            print('d_input_ids', d_input_ids.shape)
            print("ab_mapping_mask", ab_mapping_mask.shape)

            masked_input_ids = tf.tile(masked_input_ids, [2, 1])
            input_mask = tf.tile(input_mask, [2, 1])
            segment_ids = tf.tile(segment_ids, [2, 1])

            dummy = tf.zeros_like(d_input_ids, tf.int32)
            #d_input_ids = tf.concat([d_input_ids, dummy], axis=0)
            #d_input_mask = tf.concat([d_input_mask, dummy], axis=0)
            #if d_segment_ids is not None:
            #    d_segment_ids = tf.concat([d_segment_ids, dummy], axis=0)
            d_location_ids = tf.concat(
                [d_location_ids,
                 tf.zeros_like(d_location_ids, tf.int32)],
                axis=0)
            #ab_mapping = tf.concat([ab_mapping, tf.zeros_like(ab_mapping, tf.int32)], axis=0)
            ab_mapping_mask = tf.concat(
                [ab_mapping_mask,
                 tf.zeros_like(ab_mapping_mask, tf.int32)],
                axis=0)

            masked_lm_positions = tf.tile(masked_lm_positions, [2, 1])
            masked_lm_ids = tf.tile(masked_lm_ids, [2, 1])
            masked_lm_weights = tf.tile(masked_lm_weights, [2, 1])

            print("masked_input_ids", masked_input_ids.shape)
            print('d_input_ids', d_input_ids.shape)
            print("ab_mapping_mask", ab_mapping_mask.shape)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = model_class(
            config=bert_config,
            ssdr_config=ssdr_config,
            is_training=is_training,
            input_ids=masked_input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            d_input_ids=d_input_ids,
            d_input_mask=d_input_mask,
            d_segment_ids=d_segment_ids,
            d_location_ids=d_location_ids,
            ab_mapping=ab_mapping,
            ab_mapping_mask=ab_mapping_mask,
            use_one_hot_embeddings=train_config.use_one_hot_embeddings,
        )

        (masked_lm_loss, masked_lm_example_loss,
         masked_lm_log_probs) = get_masked_lm_output(
             bert_config, model.get_sequence_output(),
             model.get_embedding_table(), masked_lm_positions, masked_lm_ids,
             masked_lm_weights)

        total_loss = masked_lm_loss
        tvars = tf.compat.v1.trainable_variables()

        init_vars = {}
        scaffold_fn = None
        if train_config.init_checkpoint:
            if dict_run_config.is_bert_checkpoint:
                map1, map2, init_vars = dict_model_fn.get_bert_assignment_map_for_dict(
                    tvars, train_config.init_checkpoint)

                def load_fn():
                    tf.compat.v1.train.init_from_checkpoint(
                        train_config.init_checkpoint, map1)
                    tf.compat.v1.train.init_from_checkpoint(
                        train_config.init_checkpoint, map2)
            else:
                map1, init_vars = get_assignment_map_as_is(
                    tvars, train_config.init_checkpoint)

                def load_fn():
                    tf.compat.v1.train.init_from_checkpoint(
                        train_config.init_checkpoint, map1)

            if train_config.use_tpu:

                def tpu_scaffold():
                    load_fn()
                    return tf.compat.v1.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                load_fn()

        logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in init_vars:
                init_string = ", *INIT_FROM_CKPT*"
            logging.info("    name = %s, shape = %s%s", var.name, var.shape,
                         init_string)
        logging.info("Total parameters : %d" % get_param_num())

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            if train_config.gradient_accumulation == 1:
                train_op = optimization.create_optimizer_from_config(
                    total_loss, train_config)
            else:
                logging.info("Using gradient accumulation : %d" %
                             train_config.gradient_accumulation)
                train_op = get_accumulated_optimizer_from_config(
                    total_loss, train_config, tvars,
                    train_config.gradient_accumulation)
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (metric_fn_lm, [
                masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                masked_lm_weights
            ])
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            if dict_run_config.prediction_op == "gradient":
                logging.info("Fetching gradient")
                gradient = dict_model_fn.get_gradients(
                    model, masked_lm_log_probs,
                    train_config.max_predictions_per_seq,
                    bert_config.vocab_size)
                predictions = {
                    "masked_input_ids": masked_input_ids,
                    "d_input_ids": d_input_ids,
                    "masked_lm_positions": masked_lm_positions,
                    "gradients": gradient,
                }
            elif dict_run_config.prediction_op == "scores":
                logging.info("Fetching input/d_input and scores")
                predictions = {
                    "masked_input_ids": masked_input_ids,
                    "d_input_ids": d_input_ids,
                    "masked_lm_positions": masked_lm_positions,
                    "masked_lm_ids": masked_lm_ids,
                    "ab_mapping": ab_mapping,
                    "d_location_ids": d_location_ids,
                    "scores": model.scores,
                }
            elif dict_run_config.prediction_op == "loss" or dict_run_config.prediction_op == "loss_fixed_mask":
                logging.info("Fetching loss")
                predictions = {
                    "masked_lm_example_loss": masked_lm_example_loss,
                }
            else:
                raise Exception("prediction target not specified")

            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                predictions=predictions,
                scaffold_fn=scaffold_fn)

        return output_spec
Exemplo n.º 24
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        tf_logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf_logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        vectors = features["vectors"]  # [batch_size, max_unit, num_hidden]
        valid_mask = features["valid_mask"]
        label_ids = features["label_ids"]
        vectors = tf.reshape(vectors, [
            -1, model_config.num_window, model_config.max_sequence,
            model_config.hidden_size
        ])
        valid_mask = tf.reshape(
            valid_mask,
            [-1, model_config.num_window, model_config.max_sequence])
        label_ids = tf.reshape(label_ids, [-1])

        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = MultiEvidenceCombiner(config=model_config,
                                      is_training=is_training,
                                      vectors=vectors,
                                      valid_mask=valid_mask,
                                      scope=None)
        pooled = model.pooled_output
        if is_training:
            pooled = dropout(pooled, 0.1)

        logits = tf.keras.layers.Dense(config.num_classes,
                                       name="cls_dense")(pooled)
        loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=label_ids)

        if "bias_loss" in special_flags:
            tf_logging.info("Using special_flags : bias_loss")
            loss_arr = reweight_zero(label_ids, loss_arr)

        loss = tf.reduce_mean(input_tensor=loss_arr)
        tvars = tf.compat.v1.trainable_variables()

        initialized_variable_names = {}

        scaffold_fn = None
        if config.init_checkpoint:
            initialized_variable_names, init_fn = get_init_fn(config, tvars)
            scaffold_fn = get_tpu_scaffold_or_init(init_fn, config.use_tpu)
        log_var_assignments(tvars, initialized_variable_names)

        TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec
        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            if "simple_optimizer" in special_flags:
                tf_logging.info("using simple optimizer")
                train_op = create_simple_optimizer(loss, config.learning_rate,
                                                   config.use_tpu)
            else:
                if "ask_tvar" in special_flags:
                    tvars = model.get_trainable_vars()
                else:
                    tvars = None
                train_op = optimization.create_optimizer_from_config(
                    loss, config, tvars)
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           train_op=train_op,
                                           scaffold_fn=scaffold_fn)

        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (classification_metric_fn,
                            [logits, label_ids, is_real_example])
            output_spec = TPUEstimatorSpec(mode=model,
                                           loss=loss,
                                           eval_metrics=eval_metrics,
                                           scaffold_fn=scaffold_fn)
        else:
            predictions = {"logits": logits, "label_ids": label_ids}
            if override_prediction_fn is not None:
                predictions = override_prediction_fn(predictions, model)

            useful_inputs = ["data_id", "input_ids2"]
            for input_name in useful_inputs:
                if input_name in features:
                    predictions[input_name] = features[input_name]
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)

        return output_spec
Exemplo n.º 25
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        log_features(features)

        def reform_a_input(raw_input):
            return tf.reshape(raw_input,
                              [dict_run_config.inner_batch_size, -1])

        def reform_b_input(raw_input):
            return tf.reshape(raw_input, [dict_run_config.def_per_batch, -1])

        input_ids = reform_a_input(features["input_ids"])
        input_mask = reform_a_input(features["input_mask"])
        segment_ids = reform_a_input(features["segment_ids"])
        tf_logging.info("input_ids, input_mask")

        # input_ids = features["input_ids"]
        # input_mask = features["input_mask"]
        # segment_ids = features["segment_ids"]

        if mode == tf.estimator.ModeKeys.PREDICT:
            tf.random.set_seed(0)
            seed = 0
        else:
            seed = None

        # tf_logging.info("Doing dynamic masking (random)")
        # masked_input_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights \
        #     = random_masking(input_ids, input_mask, train_config.max_predictions_per_seq, MASK_ID, seed)
        # if dict_run_config.prediction_op == "loss_fixed_mask" or train_config.fixed_mask:
        masked_input_ids = input_ids
        masked_lm_positions = reform_a_input(features["masked_lm_positions"])
        masked_lm_ids = reform_a_input(features["masked_lm_ids"])
        masked_lm_weights = reform_a_input(features["masked_lm_weights"])

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        if model_name == "APR":
            model = APR(
                masked_input_ids,
                input_mask,
                segment_ids,
                is_training,
                train_config.use_one_hot_embeddings,
                bert_config,
                ssdr_config,
                dict_run_config.def_per_batch,
                dict_run_config.inner_batch_size,
                dict_run_config.max_def_length,
            )
        elif model_name == "BERT":
            model = BertModel(
                config=bert_config,
                is_training=is_training,
                input_ids=masked_input_ids,
                input_mask=input_mask,
                token_type_ids=segment_ids,
                use_one_hot_embeddings=train_config.use_one_hot_embeddings,
            )
        else:
            assert False

        masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs \
            = get_masked_lm_output(bert_config, model.get_sequence_output(), model.get_embedding_table(),
                 masked_lm_positions, masked_lm_ids, masked_lm_weights)

        loss = masked_lm_loss

        tvars = tf.compat.v1.trainable_variables()
        assignment_fn = dict_model_fn.get_bert_assignment_map_for_dict
        initialized_variable_names, init_fn = align_checkpoint_twice(
            tvars, train_config.init_checkpoint, assignment_fn)
        scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu)
        log_var_assignments(tvars, initialized_variable_names)

        TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec
        if mode == tf.estimator.ModeKeys.TRAIN:
            if ssdr_config.compare_attrib_value_safe("use_two_lr", True):
                tf_logging.info("Using two lr for each parts")
                train_op = create_optimizer_with_separate_lr(
                    loss, train_config)
            else:
                tf_logging.info("Using single lr ")
                train_op = optimization.create_optimizer_from_config(
                    loss, train_config)
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           train_op=train_op,
                                           training_hooks=[OomReportingHook()],
                                           scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (metric_fn_lm, [
                masked_lm_example_loss,
                masked_lm_log_probs,
                masked_lm_ids,
                masked_lm_weights,
            ])
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           eval_metrics=eval_metrics,
                                           scaffold_fn=scaffold_fn)
        else:
            predictions = {
                "input_ids": input_ids,
                "masked_input_ids": masked_input_ids,
                "masked_lm_ids": masked_lm_ids,
                "masked_lm_example_loss": masked_lm_example_loss,
                "masked_lm_positions": masked_lm_positions,
            }
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           predictions=predictions,
                                           scaffold_fn=scaffold_fn)

        return output_spec
Exemplo n.º 26
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        tf_logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf_logging.info("    name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        if "next_sentence_labels" in features:
            next_sentence_labels = features["next_sentence_labels"]
        else:
            next_sentence_labels = get_dummy_next_sentence_labels(input_ids)

        if mode == tf.estimator.ModeKeys.PREDICT:
            tf.random.set_seed(0)
            seed = 0
            print("Seed as zero")
        else:
            seed = None

        tf_logging.info("Doing dynamic masking (random)")
        special_tokens = [LABEL_UNK, LABEL_0, LABEL_1, LABEL_2]
        masked_input_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights \
            = random_masking(input_ids,
                             input_mask,
                             train_config.max_predictions_per_seq,
                             MASK_ID,
                             seed,
                             special_tokens)

        masked_input_ids, masked_lm_positions_label, masked_label_ids_label, is_test_inst \
            = get_label_indices(masked_input_ids)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = model_class(
            config=model_config,
            is_training=is_training,
            input_ids=masked_input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=train_config.use_one_hot_embeddings,
        )

        (masked_lm_loss, masked_lm_example_loss,
         masked_lm_log_probs) = get_masked_lm_output_fn(
             model_config, model.get_sequence_output(),
             model.get_embedding_table(), masked_lm_positions, masked_lm_ids,
             masked_lm_weights)

        with tf.compat.v1.variable_scope("label_token"):
            (masked_lm_loss_label, masked_lm_example_loss_label,
             masked_lm_log_probs_label) = get_masked_lm_output_fn(
                 model_config, model.get_sequence_output(),
                 model.get_embedding_table(), masked_lm_positions_label,
                 masked_label_ids_label, is_test_inst)
        (next_sentence_loss, next_sentence_example_loss,
         next_sentence_log_probs) = get_next_sentence_output(
             model_config, model.get_pooled_output(), next_sentence_labels)

        total_loss = masked_lm_loss + masked_lm_loss_label * model_config.ratio

        tvars = tf.compat.v1.trainable_variables()

        initialized_variable_names, initialized_variable_names2, init_fn\
            = align_checkpoint_for_lm(tvars,
                                      train_config.checkpoint_type,
                                      train_config.init_checkpoint,
                                      train_config.second_init_checkpoint,
                                      )

        scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu)

        log_var_assignments(tvars, initialized_variable_names,
                            initialized_variable_names2)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer_from_config(
                total_loss, train_config)
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                training_hooks=[OomReportingHook()],
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (metric_fn, [
                masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                masked_lm_weights, masked_lm_example_loss_label,
                masked_lm_log_probs_label, masked_label_ids_label, is_test_inst
            ])
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            predictions = {
                "input_ids": input_ids,
                "masked_input_ids": masked_input_ids,
                "masked_lm_ids": masked_lm_ids,
                "masked_lm_example_loss": masked_lm_example_loss,
                "masked_lm_positions": masked_lm_positions,
                "masked_lm_example_loss_label": masked_lm_example_loss_label,
                "masked_lm_log_probs_label": masked_lm_log_probs_label,
                "masked_label_ids_label": masked_label_ids_label,
                "is_test_inst": is_test_inst,
            }
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                predictions=predictions,
                scaffold_fn=scaffold_fn)

        return output_spec
Exemplo n.º 27
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        tf_logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf_logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        if mode == tf.estimator.ModeKeys.PREDICT:
            label_ids = tf.ones([input_ids.shape[0]], dtype=tf.int32)
        else:
            label_ids = features["label_ids"]
            label_ids = tf.reshape(label_ids, [-1])
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

        domain_ids = features["domain_ids"]
        domain_ids = tf.reshape(domain_ids, [-1])

        is_valid_label = features["is_valid_label"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        model_1 = BertModel(
            config=model_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=train_config.use_one_hot_embeddings,
        )
        pooled = model_1.get_pooled_output()
        if is_training:
            pooled = dropout(pooled, 0.1)

        logits = tf.keras.layers.Dense(train_config.num_classes,
                                       name="cls_dense")(pooled)
        pred_losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=label_ids)
        num_domain = 2
        pooled_for_domain = grad_reverse(pooled)
        domain_logits = tf.keras.layers.Dense(
            num_domain, name="domain_dense")(pooled_for_domain)
        domain_losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=domain_logits, labels=domain_ids)

        pred_loss = tf.reduce_mean(pred_losses *
                                   tf.cast(is_valid_label, tf.float32))
        domain_loss = tf.reduce_mean(domain_losses)

        tf.compat.v1.summary.scalar('domain_loss', domain_loss)
        tf.compat.v1.summary.scalar('pred_loss', pred_loss)
        alpha = model_config.alpha
        loss = pred_loss + alpha * domain_loss
        tvars = tf.compat.v1.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if train_config.init_checkpoint:
            initialized_variable_names, init_fn = get_init_fn(
                train_config, tvars)
            scaffold_fn = get_tpu_scaffold_or_init(init_fn,
                                                   train_config.use_tpu)
        log_var_assignments(tvars, initialized_variable_names)
        TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            tvars = None
            train_op = optimization.create_optimizer_from_config(
                loss, train_config, tvars)
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           train_op=train_op,
                                           scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (classification_metric_fn,
                            [logits, label_ids, is_real_example])
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           eval_metrics=eval_metrics,
                                           scaffold_fn=scaffold_fn)
        else:
            predictions = {
                "input_ids": input_ids,
                "logits": logits,
            }
            if "data_id" in features:
                predictions['data_id'] = features['data_id']
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)
        return output_spec
Exemplo n.º 28
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        log_features(features)
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        next_sentence_labels = features["next_sentence_labels"]

        masked_input_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights \
            = random_masking(input_ids, input_mask, train_config.max_predictions_per_seq, MASK_ID)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        prefix1 = "MaybeBERT"
        prefix2 = "MaybeBFN"
        with tf.compat.v1.variable_scope(prefix1):
            model1 = BertModel(
                config=bert_config,
                is_training=is_training,
                input_ids=masked_input_ids,
                input_mask=input_mask,
                token_type_ids=segment_ids,
                use_one_hot_embeddings=train_config.use_one_hot_embeddings,
            )
            (masked_lm_loss, masked_lm_example_loss1,
             masked_lm_log_probs1) = get_masked_lm_output(
                 bert_config, model1.get_sequence_output(),
                 model1.get_embedding_table(), masked_lm_positions,
                 masked_lm_ids, masked_lm_weights)

            masked_lm_example_loss1 = tf.reshape(masked_lm_example_loss1,
                                                 masked_lm_ids.shape)

        with tf.compat.v1.variable_scope(prefix2):
            model2 = BertModel(
                config=bert_config,
                is_training=is_training,
                input_ids=masked_input_ids,
                input_mask=input_mask,
                token_type_ids=segment_ids,
                use_one_hot_embeddings=train_config.use_one_hot_embeddings,
            )

            (masked_lm_loss, masked_lm_example_loss2,
             masked_lm_log_probs2) = get_masked_lm_output(
                 bert_config, model2.get_sequence_output(),
                 model2.get_embedding_table(), masked_lm_positions,
                 masked_lm_ids, masked_lm_weights)

            print(model2.get_sequence_output().shape)
            masked_lm_example_loss2 = tf.reshape(masked_lm_example_loss2,
                                                 masked_lm_ids.shape)

        model = model_class(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=train_config.use_one_hot_embeddings,
        )

        loss_model = IndependentLossModel(bert_config)
        loss_model.train_modeling(model.get_sequence_output(),
                                  masked_lm_positions, masked_lm_weights,
                                  tf.stop_gradient(masked_lm_example_loss1),
                                  tf.stop_gradient(masked_lm_example_loss2))

        total_loss = loss_model.total_loss
        loss1 = loss_model.loss1
        loss2 = loss_model.loss2
        per_example_loss1 = loss_model.per_example_loss1
        per_example_loss2 = loss_model.per_example_loss2
        losses1 = tf.reduce_sum(per_example_loss1, axis=1)
        losses2 = tf.reduce_sum(per_example_loss2, axis=1)
        prob1 = loss_model.prob1
        prob2 = loss_model.prob2

        checkpoint2_1, checkpoint2_2 = train_config.second_init_checkpoint.split(
            ",")
        tvars = tf.compat.v1.trainable_variables()
        initialized_variable_names_1, init_fn_1 = get_init_fn_for_two_checkpoints(
            train_config, tvars, checkpoint2_1, prefix1, checkpoint2_2,
            prefix2)
        assignment_fn = get_bert_assignment_map
        assignment_map2, initialized_variable_names_2 = assignment_fn(
            tvars, train_config.init_checkpoint)

        initialized_variable_names = {}
        initialized_variable_names.update(initialized_variable_names_1)
        initialized_variable_names.update(initialized_variable_names_2)

        def init_fn():
            init_fn_1()
            tf.compat.v1.train.init_from_checkpoint(
                train_config.init_checkpoint, assignment_map2)

        scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu)

        log_var_assignments(tvars, initialized_variable_names)

        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer_from_config(
                total_loss, train_config)
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)

        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(per_example_loss1, per_example_loss2):
                loss1 = tf.compat.v1.metrics.mean(values=per_example_loss1)
                loss2 = tf.compat.v1.metrics.mean(values=per_example_loss2)
                return {
                    "loss1": loss1,
                    "loss2": loss2,
                }

            eval_metrics = (metric_fn, [losses1, losses2])
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            predictions = {
                "prob1": prob1,
                "prob2": prob2,
                "per_example_loss1": per_example_loss1,
                "per_example_loss2": per_example_loss2,
                "input_ids": input_ids,
                "masked_lm_positions": masked_lm_positions,
            }
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                predictions=predictions,
                scaffold_fn=scaffold_fn)

        return output_spec
Exemplo n.º 29
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        tf_logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf_logging.info("name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        label_masks = features["label_masks"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        model = model_class(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=train_config.use_one_hot_embeddings,
        )
        logits = tf.keras.layers.Dense(train_config.num_classes,
                                       name="token_regression")(
                                           model.get_sequence_output())

        per_ex_losses = tf.keras.losses.MAE(tf.expand_dims(label_ids, 2),
                                            logits)
        masked_losses = per_ex_losses * tf.cast(label_masks, tf.float32)
        losses_sum = tf.reduce_sum(masked_losses, axis=1)
        denom = tf.reduce_sum(tf.cast(label_masks, tf.float32), axis=1) + 1e-5
        losses = losses_sum / denom
        total_loss = tf.reduce_mean(losses)
        tvars = tf.compat.v1.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if train_config.init_checkpoint:
            assignment_map, initialized_variable_names = get_bert_assignment_map(
                tvars, train_config.init_checkpoint)
            if train_config.use_tpu:

                def tpu_scaffold():
                    tf.compat.v1.train.init_from_checkpoint(
                        train_config.init_checkpoint, assignment_map)
                    return tf.compat.v1.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.compat.v1.train.init_from_checkpoint(
                    train_config.init_checkpoint, assignment_map)

        tf_logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf_logging.info("name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer_from_config(
                total_loss, train_config)
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(logits, label_ids, label_masks):
                logits_reduced = tf.squeeze(logits, 2)
                is_neg_correct = tf.logical_and(tf.less(label_ids, 0.),
                                                tf.less(logits_reduced, 0.))
                is_pos_correct = tf.logical_and(tf.less(0., label_ids),
                                                tf.less(0., logits_reduced))
                is_correct = tf.logical_or(is_neg_correct, is_pos_correct)

                float_masks = tf.cast(label_masks, tf.float32)
                num_correct = tf.reduce_sum(tf.cast(is_correct, tf.float32) *
                                            float_masks,
                                            axis=1)
                num_problems = tf.reduce_sum(float_masks, axis=1) + 1e-5
                acc_list = num_correct / num_problems

                mean_acc = tf.compat.v1.metrics.mean(values=acc_list)

                return {'mean_acc': mean_acc}

            eval_metrics = (metric_fn, [logits, label_ids, label_masks])
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            predictions = {
                "logits": logits,
                "input_ids": input_ids,
                "labels": label_ids,
                "label_masks": label_masks,
            }
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                predictions=predictions,
                scaffold_fn=scaffold_fn)

        return output_spec
Exemplo n.º 30
0
def _train_op_fn(train_config, loss):
    train_op = optimization_v2.create_optimizer_from_config(loss, train_config)
    return train_op