Пример #1
0
def maskedBert(
    input_ids_blob,
    input_mask_blob,
    token_type_ids_blob,
    masked_lm_positions_blob,
    # masked_lm_positions_blob,
    # masked_lm_ids_blob,
    vocab_size,
    seq_length=512,
    hidden_size=768,
    num_hidden_layers=12,
    num_attention_heads=12,
    intermediate_size=3072,
    hidden_act="gelu",
    hidden_dropout_prob=0.1,
    attention_probs_dropout_prob=0.1,
    max_position_embeddings=512,
    type_vocab_size=16,
    # max_predictions_per_seq=20,
    initializer_range=0.02,
):
    backbone = bert_util.BertBackbone(
        input_ids_blob=input_ids_blob,
        input_mask_blob=input_mask_blob,
        token_type_ids_blob=token_type_ids_blob,
        vocab_size=vocab_size,
        seq_length=seq_length,
        hidden_size=hidden_size,
        num_hidden_layers=num_hidden_layers,
        num_attention_heads=num_attention_heads,
        intermediate_size=intermediate_size,
        hidden_act=hidden_act,
        hidden_dropout_prob=hidden_dropout_prob,
        attention_probs_dropout_prob=attention_probs_dropout_prob,
        max_position_embeddings=max_position_embeddings,
        type_vocab_size=type_vocab_size,
        initializer_range=initializer_range,
    )

    predictions = _AddMaskedLanguageModel(
        input_blob=backbone.sequence_output(),
        output_weights_blob=backbone.embedding_table(),
        positions_blob=masked_lm_positions_blob,
        seq_length=seq_length,
        hidden_size=hidden_size,
        vocab_size=vocab_size,
        hidden_act=bert_util.GetActivation(hidden_act),
        initializer_range=initializer_range,
    )
    pooled_output = PooledOutput(backbone.sequence_output(), hidden_size,
                                 initializer_range)
    return predictions
Пример #2
0
def SQuAD(
    input_ids_blob,
    input_mask_blob,
    token_type_ids_blob,
    vocab_size,
    seq_length=512,
    hidden_size=768,
    num_hidden_layers=12,
    num_attention_heads=12,
    intermediate_size=3072,
    hidden_act="gelu",
    hidden_dropout_prob=0.1,
    attention_probs_dropout_prob=0.1,
    max_position_embeddings=512,
    type_vocab_size=16,
    initializer_range=0.02,
):

    backbone = bert_util.BertBackbone(
        input_ids_blob=input_ids_blob,
        input_mask_blob=input_mask_blob,
        token_type_ids_blob=token_type_ids_blob,
        vocab_size=vocab_size,
        seq_length=seq_length,
        hidden_size=hidden_size,
        num_hidden_layers=num_hidden_layers,
        num_attention_heads=num_attention_heads,
        intermediate_size=intermediate_size,
        hidden_act=hidden_act,
        hidden_dropout_prob=hidden_dropout_prob,
        attention_probs_dropout_prob=attention_probs_dropout_prob,
        max_position_embeddings=max_position_embeddings,
        type_vocab_size=type_vocab_size,
        initializer_range=initializer_range,
    )

    with flow.scope.namespace("cls-squad"):
        final_hidden = backbone.sequence_output()
        final_hidden_matrix = flow.reshape(final_hidden, [-1, hidden_size])
        logits = bert_util._FullyConnected(
            final_hidden_matrix,
            hidden_size,
            units=2,
            weight_initializer=bert_util.CreateInitializer(initializer_range),
            name='output')
        logits = flow.reshape(logits, [-1, seq_length, 2])

        start_logits = flow.slice(logits, [None, None, 0], [None, None, 1])
        end_logits = flow.slice(logits, [None, None, 1], [None, None, 1])

    return start_logits, end_logits
def GlueBERT(
    input_ids_blob,
    input_mask_blob,
    token_type_ids_blob,
    label_blob,
    vocab_size,
    seq_length=512,
    hidden_size=768,
    num_hidden_layers=12,
    num_attention_heads=12,
    intermediate_size=3072,
    hidden_act="gelu",
    hidden_dropout_prob=0.1,
    attention_probs_dropout_prob=0.1,
    max_position_embeddings=512,
    type_vocab_size=16,
    initializer_range=0.02,
    label_num=2,
    replace_prob=None,
):
    backbone = bert_util.BertBackbone(
        input_ids_blob=input_ids_blob,
        input_mask_blob=input_mask_blob,
        token_type_ids_blob=token_type_ids_blob,
        vocab_size=vocab_size,
        seq_length=seq_length,
        hidden_size=hidden_size,
        num_hidden_layers=num_hidden_layers,
        num_attention_heads=num_attention_heads,
        intermediate_size=intermediate_size,
        hidden_act=hidden_act,
        hidden_dropout_prob=hidden_dropout_prob,
        attention_probs_dropout_prob=attention_probs_dropout_prob,
        max_position_embeddings=max_position_embeddings,
        type_vocab_size=type_vocab_size,
        initializer_range=initializer_range,
    )
    pooled_output = PooledOutput(sequence_output=backbone.sequence_output(),
                                 hidden_size=hidden_size,
                                 initializer_range=initializer_range)
    loss, _, logit_blob = _AddClassficationLoss(
        input_blob=pooled_output,
        label_blob=label_blob,
        hidden_size=hidden_size,
        label_num=label_num,
        initializer_range=initializer_range,
        scope_name='classification')

    return loss, logit_blob
Пример #4
0
def PreTrain(
    input_ids_blob,
    input_mask_blob,
    token_type_ids_blob,
    masked_lm_positions_blob,
    masked_lm_ids_blob,
    masked_lm_weights_blob,
    next_sentence_label_blob,
    vocab_size,
    seq_length=512,
    hidden_size=768,
    num_hidden_layers=12,
    num_attention_heads=12,
    intermediate_size=3072,
    hidden_act="gelu",
    hidden_dropout_prob=0.1,
    attention_probs_dropout_prob=0.1,
    max_position_embeddings=512,
    type_vocab_size=16,
    max_predictions_per_seq=20,
    initializer_range=0.02,
    use_fp16=False,
):
    backbone = bert_util.BertBackbone(
        input_ids_blob=input_ids_blob,
        input_mask_blob=input_mask_blob,
        token_type_ids_blob=token_type_ids_blob,
        vocab_size=vocab_size,
        seq_length=seq_length,
        hidden_size=hidden_size,
        num_hidden_layers=num_hidden_layers,
        num_attention_heads=num_attention_heads,
        intermediate_size=intermediate_size,
        hidden_act=hidden_act,
        hidden_dropout_prob=hidden_dropout_prob,
        attention_probs_dropout_prob=attention_probs_dropout_prob,
        max_position_embeddings=max_position_embeddings,
        type_vocab_size=type_vocab_size,
        initializer_range=initializer_range,
    )

    (lm_loss, _, _) = _AddMaskedLanguageModelLoss(
        input_blob=backbone.sequence_output(),
        output_weights_blob=backbone.embedding_table(),
        positions_blob=masked_lm_positions_blob,
        label_id_blob=masked_lm_ids_blob,
        label_weight_blob=masked_lm_weights_blob,
        seq_length=seq_length,
        hidden_size=hidden_size,
        vocab_size=vocab_size,
        max_predictions_per_seq=max_predictions_per_seq,
        hidden_act=bert_util.GetActivation(hidden_act),
        initializer_range=initializer_range,
    )
    pooled_output = PooledOutput(backbone.sequence_output(), hidden_size,
                                 initializer_range)
    (ns_loss, _, _) = _AddNextSentenceOutput(
        input_blob=pooled_output,
        label_blob=next_sentence_label_blob,
        hidden_size=hidden_size,
        initializer_range=initializer_range,
    )
    with flow.scope.namespace("cls-loss"):
        lm_loss = flow.math.reduce_mean(lm_loss)
        ns_loss = flow.math.reduce_mean(ns_loss)
        total_loss = lm_loss + ns_loss
    return total_loss, lm_loss, ns_loss
Пример #5
0
def BertStudentForSequenceClassification(input_ids_blob,
                                         input_mask_blob,
                                         token_type_ids_blob,
                                         label_blob,
                                         vocab_size,
                                         seq_length=512,
                                         hidden_size=768,
                                         num_hidden_layers=12,
                                         num_attention_heads=12,
                                         intermediate_size=3072,
                                         hidden_act="gelu",
                                         hidden_dropout_prob=0.1,
                                         attention_probs_dropout_prob=0.1,
                                         max_position_embeddings=512,
                                         type_vocab_size=16,
                                         initializer_range=0.02,
                                         label_num=2,
                                         is_student=False,
                                         fit_size=768,
                                         is_train=True):
    with flow.scope.namespace('student'):
        backbone = bert_util.BertBackbone(
            input_ids_blob=input_ids_blob,
            input_mask_blob=input_mask_blob,
            token_type_ids_blob=token_type_ids_blob,
            vocab_size=vocab_size,
            seq_length=seq_length,
            hidden_size=hidden_size,
            num_hidden_layers=num_hidden_layers,
            num_attention_heads=num_attention_heads,
            intermediate_size=intermediate_size,
            hidden_act=hidden_act,
            hidden_dropout_prob=hidden_dropout_prob,
            attention_probs_dropout_prob=attention_probs_dropout_prob,
            max_position_embeddings=max_position_embeddings,
            type_vocab_size=type_vocab_size,
            initializer_range=initializer_range,
            is_train=is_train)
        pooled_output = PooledOutput(
            sequence_output=backbone.sequence_output(),
            hidden_size=hidden_size,
            initializer_range=initializer_range,
            is_train=is_train)
        logit_blob = _AddClassfication(input_blob=pooled_output,
                                       label_blob=label_blob,
                                       hidden_size=hidden_size,
                                       label_num=label_num,
                                       initializer_range=initializer_range,
                                       scope_name='classification',
                                       is_train=is_train)
        sequence_output = backbone.all_encoder_layers()
        att_output = backbone.all_attention_probs()
        embed_output = backbone.embedding_output()
        sequence_output.insert(0, embed_output)
        # print(logit_blob.shape)
        # print(len(sequence_output))
        # print(sequence_output.shape)

        tmp = []
        if is_student:
            for s_id, sequence_layer in enumerate(sequence_output):
                tmp.append(
                    fit_dense(input_blob=sequence_layer,
                              hidden_size=hidden_size,
                              label_num=fit_size,
                              initializer_range=initializer_range,
                              scope_name='fit_dense',
                              is_train=is_train))
            sequence_output = tmp

    return logit_blob, sequence_output, att_output