def create_model(is_training, input_ids, input_mask, segment_ids, labels,
                 num_labels, use_one_hot_embeddings, bert_config,
                 class_weights):
    """Creates a classification model."""
    tags = set()
    if is_training:
        tags.add("train")

    model = modeling.BertModel(config=bert_config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=use_one_hot_embeddings)

    # In the demo, we are doing a simple classification task on the entire
    # segment.
    #
    # If you want to use the token-level output, use model.get_sequence_output()
    # instead.

    output_layer = model.get_pooled_output()
    hidden_size = output_layer.shape[-1].value

    output_weights = tf.get_variable(
        "output_weights", [num_labels, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable("output_bias", [num_labels],
                                  initializer=tf.zeros_initializer())

    with tf.variable_scope("loss"):
        if is_training:
            # I.e., 0.1 dropout
            output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

        logits = tf.matmul(output_layer, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)

        # Multi-label loss function
        probabilities = tf.nn.sigmoid(logits)

        labels = tf.cast(labels, tf.float32)
        tf.logging.info("num_labels:{};logits:{};labels:{}".format(
            num_labels, logits, labels))
        per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits(
            labels=labels, logits=logits)
        if class_weights:
            weights = tf.constant(class_weights)
            weighted_loss = tf.multiply(per_example_loss, weights)
            loss = tf.reduce_mean(weighted_loss)
            tf.logging.info(
                "weights:{}\nper_example_loss: {}\nweighted_loss:{}".format(
                    weights, per_example_loss, weighted_loss))
        else:
            loss = tf.reduce_mean(per_example_loss)
            tf.logging.info("loss:{}".format(loss))

        return (loss, per_example_loss, logits, probabilities)
예제 #2
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        unique_ids = features["unique_ids"]
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        input_type_ids = features["input_type_ids"]

        model = modeling.BertModel(
            config=bert_config,
            is_training=False,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=input_type_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        if mode != tf.estimator.ModeKeys.PREDICT:
            raise ValueError("Only PREDICT modes are supported: %s" % (mode))

        tvars = tf.trainable_variables()
        scaffold_fn = None
        (assignment_map, initialized_variable_names
         ) = modeling.get_assignment_map_from_checkpoint(
             tvars, init_checkpoint)
        if use_tpu:

            def tpu_scaffold():
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
                return tf.train.Scaffold()

            scaffold_fn = tpu_scaffold
        else:
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        #tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            #tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
            #init_string)

        all_layers = model.get_all_encoder_layers()

        predictions = {
            "unique_id": unique_ids,
        }

        for (i, layer_index) in enumerate(layer_indexes):
            predictions["layer_output_%d" % i] = all_layers[layer_index]

        output_spec = tf.contrib.tpu.TPUEstimatorSpec(mode=mode,
                                                      predictions=predictions,
                                                      scaffold_fn=scaffold_fn)
        return output_spec
def create_model(bert_config, is_training, input_ids, input_mask,
                 segment_ids, labels, num_labels, use_one_hot_embeddings):

    model = modeling.BertModel(config=bert_config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=use_one_hot_embeddings)

    # In the demo, we are doing a simple classification task on the entire
    # segment.
    #
    # If you want to use the token-level output, use model.get_sequence_output()
    # instead.
    output_layer = model.get_pooled_output()

    hidden_size = output_layer.shape[-1].value

    output_weights = tf.get_variable(
        "output_weights", [num_labels, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable("output_bias", [num_labels],
                                  initializer=tf.zeros_initializer())

    with tf.variable_scope("loss"):
        if is_training:
            # I.e., 0.1 dropout
            output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

        logits = tf.matmul(output_layer, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)

        probabilities = tf.nn.softmax(logits, axis=-1)
        log_probs = tf.nn.log_softmax(logits, axis=-1)

        #one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

        #per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
        per_example_loss = -tf.reduce_sum(labels * log_probs, axis=-1)
        loss = tf.reduce_mean(per_example_loss)

        return (loss, per_example_loss, logits, probabilities)
    def model_fn(features, labels, mode, params):
        """The `model_fn` for TPUEstimator."""

        predictions = {}

        tags = set()
        if mode == tf.estimator.ModeKeys.TRAIN:
            tags.add("train")

        input_mask = features["input_mask"]
        batch_size = input_mask.shape[0]
        if labels is not None:
            label_ids = tf.cast(labels["label_ids"], tf.float32)

        if "embeddings" not in features:
            input_ids = features["input_ids"]
            segment_ids = features["segment_ids"]

            model = modeling.BertModel(config=params['bert_config'],
                                       is_training=params['trainable_bert'],
                                       input_ids=input_ids,
                                       input_mask=input_mask,
                                       token_type_ids=segment_ids,
                                       use_one_hot_embeddings=True)

            # In the demo, we are doing a simple classification task on the entire
            # TODO: Check is_training === trainable Bert j?
            # model = create_model(bert_config=params['bert_config'],
            #                     is_training=params['trainable_bert'],
            #                     num_labels=params['num_classes'],
            #                     labels=label_ids,
            #                     segment_ids=segment_ids,
            #                     input_ids=input_ids,
            #                     input_mask=input_mask,
            #                     use_one_hot_embeddings=True)

            # TODO: Find correct place
            tvars = tf.trainable_variables()
            initialized_variable_names = {}

            scaffold_fn = None
            if params["init_checkpoint"]:
                (assignment_map, initialized_variable_names
                 ) = modeling.get_assignment_map_from_checkpoint(
                     tvars, params["init_checkpoint"])
                if use_tpu:
                    def tpu_scaffold():
                        tf.train.init_from_checkpoint(params["init_checkpoint"],
                                                      assignment_map)
                        return tf.train.Scaffold()
                    scaffold_fn = tpu_scaffold
                else:
                    tf.train.init_from_checkpoint(params["init_checkpoint"], assignment_map)

            tf.logging.info("**** Variables - INIT FROM CKPT ****")
            for var in tvars:
                if var.name in initialized_variable_names:
                    tf.logging.info("name: {}, shape: {}".format(var.name, var.shape))

            sequence_output = model.get_sequence_output()
            predictions["sequence_output"] = sequence_output

        else:
            sequence_output = features["embeddings"]

        hidden_size = sequence_output.shape[-1].value
        if params["class_based_attention"]:
            shared_query_embedding = tf.get_variable(
                'shared_query', [1, 1, params["shared_size"]],
                initializer=tf.truncated_normal_initializer(stddev=0.02))
            shared_query_embedding = tf.broadcast_to(
                shared_query_embedding,
                [1, params["num_classes"], params["shared_size"]])
            class_query_embedding = tf.get_variable(
                'class_query',
                [1, params["num_classes"], hidden_size - params["shared_size"]],
                initializer=tf.truncated_normal_initializer(stddev=0.02))
            query_embedding = tf.concat(
                [shared_query_embedding, class_query_embedding], axis=2)
            # Reimplement Attention layer to peek into weights.
            scores = tf.matmul(query_embedding,
                               sequence_output,
                               transpose_b=True)
            input_bias = tf.abs(input_mask - 1)
            scores -= 1.e9 * tf.expand_dims(tf.cast(input_bias, tf.float32),
                                            axis=1)
            distribution = tf.nn.softmax(scores)
            pooled_output = tf.matmul(distribution, sequence_output)
        else:
            first_token_tensor = tf.squeeze(sequence_output[:, 0:1, :], axis=1)
            pooled_output = tf.layers.dense(first_token_tensor,
                                            hidden_size,
                                            activation=tf.tanh)

        if mode == tf.estimator.ModeKeys.TRAIN:
            pooled_output = tf.nn.dropout(pooled_output, rate=params["dropout"])

        logits = tf.layers.dense(pooled_output, params["num_classes"])
        logits = tf.matrix_diag_part(logits)

        # probabilities = tf.nn.softmax(logits, axis=-1)  # single-label case
        probabilities = tf.nn.sigmoid(logits)  # multi-label case

        train_op, loss = None, None
        eval_metrics = None
        if mode != tf.estimator.ModeKeys.PREDICT:
            with tf.variable_scope("loss"):
                per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits(
                    labels=label_ids, logits=logits)
                loss = tf.reduce_mean(per_example_loss)
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(loss,
                                                     params["learning_rate"],
                                                     params["num_train_steps"],
                                                     params["num_warmup_steps"],
                                                     use_tpu,
                                                     trainable_bert=params['trainable_bert'])
        elif mode == tf.estimator.ModeKeys.EVAL:

            def _f1_score(labels, pred):
                """Computes F1 score, i.e. the harmonic mean of precision and recall."""
                precision = tf.metrics.precision(labels, pred)
                recall = tf.metrics.recall(labels, pred)
                return (2 * precision[0] * recall[0] /
                        (precision[0] + recall[0] + 1e-5),
                        tf.group(precision[1], recall[1]))

            def metric_fn(per_example_loss, labels, probabilities):
                pred = tf.where(probabilities > 0.4,
                                tf.ones_like(probabilities),
                                tf.zeros_like(probabilities))
                return {
                    'absolute/false_positives':
                        tf.metrics.false_positives(labels, pred),
                    'absolute/false_negatives':
                        tf.metrics.false_negatives(labels, pred),
                    'absolute/true_positives':
                        tf.metrics.true_positives(labels, pred),
                    'absolute/true_negatives':
                        tf.metrics.true_negatives(labels, pred),
                    'absolute/total':
                        tf.metrics.true_positives(tf.ones([batch_size]),
                                                  tf.ones([batch_size])),
                    'metric/acc':
                        tf.metrics.accuracy(labels, pred),
                    'metric/prec':
                        tf.metrics.precision(labels, pred),
                    'metric/recall':
                        tf.metrics.recall(labels, pred),
                    'metric/f1':
                        _f1_score(labels, pred),
                }

            eval_metrics = (metric_fn,
                            [per_example_loss, label_ids, probabilities])

        predictions["probabilities"] = probabilities
        predictions["attention"] = distribution
        predictions["pooled_output"] = pooled_output

        if use_tpu:
            return tf.contrib.tpu.TPUEstimatorSpec(mode=mode,
                                                   loss=loss,
                                                   train_op=train_op,
                                                   scaffold_fn=scaffold_fn,
                                                   eval_metrics=eval_metrics,
                                                   predictions=predictions)
        else:
            return tf.estimator.EstimatorSpec(mode=mode,
                                              loss=loss,
                                              train_op=train_op,
                                              predictions=predictions)