Пример #1
0
def create_model(albert_config, is_training, input_ids, input_mask,
                 segment_ids, labels, num_labels, use_one_hot_embeddings,
                 max_seq_length, dropout_prob, hub_module):
    """Creates a classification model."""
    bsz_per_core = tf.shape(input_ids)[0]

    input_ids = tf.reshape(input_ids,
                           [bsz_per_core * num_labels, max_seq_length])
    input_mask = tf.reshape(input_mask,
                            [bsz_per_core * num_labels, max_seq_length])
    token_type_ids = tf.reshape(segment_ids,
                                [bsz_per_core * num_labels, max_seq_length])

    (output_layer, _) = fine_tuning_utils.create_albert(
        albert_config=albert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        segment_ids=token_type_ids,
        use_one_hot_embeddings=use_one_hot_embeddings,
        use_einsum=True,
        hub_module=hub_module)

    hidden_size = output_layer.shape[-1].value

    output_weights = tf.get_variable(
        "output_weights", [1, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable("output_bias", [1],
                                  initializer=tf.zeros_initializer())

    with tf.variable_scope("loss"):
        if is_training:
            # I.e., 0.1 dropout
            output_layer = tf.nn.dropout(output_layer,
                                         keep_prob=1 - dropout_prob)

        logits = tf.matmul(output_layer, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        logits = tf.reshape(logits, [bsz_per_core, num_labels])
        probabilities = tf.nn.softmax(logits, axis=-1)
        predictions = tf.argmax(probabilities, axis=-1, output_type=tf.int32)
        log_probs = tf.nn.log_softmax(logits, axis=-1)

        one_hot_labels = tf.one_hot(labels,
                                    depth=tf.cast(num_labels, dtype=tf.int32),
                                    dtype=tf.float32)

        per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
        loss = tf.reduce_mean(per_example_loss)

        return (loss, per_example_loss, probabilities, logits, predictions)
Пример #2
0
def create_model(albert_config, is_training, input_ids, input_mask,
                 segment_ids, labels, num_labels, use_one_hot_embeddings,
                 task_name, hub_module):
    """Creates a classification model."""
    (output_layer, _) = fine_tuning_utils.create_albert(
        albert_config=albert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        segment_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings,
        use_einsum=True,
        hub_module=hub_module)

    hidden_size = output_layer.shape[-1].value

    output_weights = tf.get_variable(
        "output_weights", [num_labels, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable("output_bias", [num_labels],
                                  initializer=tf.zeros_initializer())

    with tf.variable_scope("loss"):
        if is_training:
            # I.e., 0.1 dropout
            output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

        logits = tf.matmul(output_layer, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        if task_name != "sts-b":
            probabilities = tf.nn.softmax(logits, axis=-1)
            predictions = tf.argmax(probabilities,
                                    axis=-1,
                                    output_type=tf.int32)
            log_probs = tf.nn.log_softmax(logits, axis=-1)
            one_hot_labels = tf.one_hot(labels,
                                        depth=num_labels,
                                        dtype=tf.float32)

            per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs,
                                              axis=-1)
        else:
            probabilities = logits
            logits = tf.squeeze(logits, [-1])
            predictions = logits
            per_example_loss = tf.square(logits - labels)
        loss = tf.reduce_mean(per_example_loss)

        return (loss, per_example_loss, probabilities, logits, predictions)
Пример #3
0
def create_model(albert_config, is_training, input_ids, input_mask,
                 segment_ids, labels, num_labels, use_one_hot_embeddings,
                 task_name, hub_module):
    """Creates a classification model."""
    (output_layer, _) = fine_tuning_utils.create_albert(
        albert_config=albert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        segment_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings,
        use_einsum=True,
        hub_module=hub_module)

    hidden_size = output_layer.shape[-1].value

    output_weights = tf.get_variable(
        "output_weights", [num_labels, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable("output_bias", [num_labels],
                                  initializer=tf.zeros_initializer())

    with tf.variable_scope("loss"):
        if is_training:
            # I.e., 0.1 dropout
            output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

        logits = tf.matmul(output_layer, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        # print("labels:",labels,";logits:",logits,"isinstance(labels,list):",isinstance(labels,list))
        # mulit-label classification: 1.multi-hot==> then use sigmoid to transform it to possibility
        probabilities = tf.nn.sigmoid(logits)
        # log_probs=tf.log(probabilities)
        labels = tf.cast(labels, tf.float32)
        #probabilities = tf.nn.softmax(logits, axis=-1)
        predictions = tf.argmax(probabilities, axis=-1, output_type=tf.int32)
        #log_probs = tf.nn.log_softmax(logits, axis=-1)
        #one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

        ##per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)

        per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits(
            labels=labels, logits=logits)
        loss = tf.reduce_mean(per_example_loss)

        return (loss, per_example_loss, probabilities, logits, predictions)