Exemplo n.º 1
0
def make_entity_border_encoder(bert_path, ckpt_file, max_seq_len, bert_dim):
    model_ckpt = bert_path + ckpt_file
    bert_params = params_from_pretrained_ckpt(bert_path)
    bert_layer = BertModelLayer.from_params(bert_params,
                                            name="bert",
                                            trainable=False)
    gather_fn = make_gather_entity_border_fn(bert_dim)

    input_ids = Input(shape=(max_seq_len, ), dtype='int32')
    index_border_ent1 = Input(shape=(2, ), dtype='int32')
    index_border_ent2 = Input(shape=(2, ), dtype='int32')
    bert_emb = bert_layer(input_ids)
    ent1_avg_emb = Lambda(lambda x: gather_fn(x))(
        [bert_emb, index_border_ent1])
    ent2_avg_emb = Lambda(lambda x: gather_fn(x))(
        [bert_emb, index_border_ent2])
    ent1_flatten = Flatten()(ent1_avg_emb)
    ent2_flatten = Flatten()(ent2_avg_emb)
    output = concatenate([ent1_flatten, ent2_flatten])

    model = Model(inputs=[input_ids, index_border_ent1, index_border_ent2],
                  outputs=output)
    model.build(input_shape=(None, max_seq_len))

    load_bert_weights(bert_layer, model_ckpt)
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    return model
Exemplo n.º 2
0
def make_entity_start_model(bert_path, ckpt_file, max_seq_len, bert_dim):
    model_ckpt = bert_path + ckpt_file
    bert_params = params_from_pretrained_ckpt(bert_path)
    bert_layer = BertModelLayer.from_params(bert_params,
                                            name="bert",
                                            trainable=True)
    slice_fn = make_gather_entity_start_fn(bert_dim)

    input_ids = Input(shape=(max_seq_len, ), dtype='int32')
    index_ent1 = Input(shape=(2, ), dtype='int32')
    index_ent2 = Input(shape=(2, ), dtype='int32')
    bert_emb = bert_layer(input_ids)
    ent1_start = Lambda(lambda x: slice_fn(x))([bert_emb, index_ent1])
    ent2_start = Lambda(lambda x: slice_fn(x))([bert_emb, index_ent2])
    concat = concatenate([ent1_start, ent2_start])
    output = Dense(2, activation='softmax')(concat)
    model = Model(inputs=[input_ids, index_ent1, index_ent2], outputs=output)
    model.build(input_shape=(None, max_seq_len))

    load_bert_weights(bert_layer, model_ckpt)
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    return model
Exemplo n.º 3
0
def load_pretrained_weights(bert_layer, model_name, fetch_dir=None):
    if getattr(bert_layer, "is_hf", False):
        # The RoBERTa layer will already have the pretrained weights loaded.
        return bert_layer
    ckpt = get_pretrained_checkpoint(model_name, fetch_dir=fetch_dir)
    bert.load_bert_weights(bert_layer, ckpt)
    return bert_layer
Exemplo n.º 4
0
def create_model(
    model_dir, model_type, max_seq_len, n_classes, load_pretrained_weights=True, summary=False,
):
    """Creates keras model with pretrained BERT/ALBERT layer.

    Args:
        model_dir: String. Path to model.
        model_type: String. Expects either "albert" or "bert"
        max_seq_len: Int. Maximum length of a classificaton example.
        n_classes: Int. Number of training classes.
        load_pretrained_weights: Boolean. Load pretrained model weights.
        summary: Boolean. Print model summary.

    Returns:
        Keras model
    """
    if model_type == "albert":
        model_ckpt = os.path.join(model_dir, "model.ckpt-best")
        model_params = bert.albert_params(model_dir)
    elif model_type == "bert":
        model_ckpt = os.path.join(model_dir, "bert_model.ckpt")
        model_params = bert.params_from_pretrained_ckpt(model_dir)

    layer_bert = bert.BertModelLayer.from_params(model_params, name=model_type)

    input_ids = keras.layers.Input(shape=(max_seq_len,), dtype="int32", name="input_ids")
    output = layer_bert(input_ids)

    cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(output)
    cls_out = keras.layers.Dropout(0.5)(cls_out)
    logits = keras.layers.Dense(units=model_params["hidden_size"], activation="relu")(cls_out)
    logits = keras.layers.Dropout(0.5)(logits)
    logits = keras.layers.Dense(units=n_classes, activation="softmax")(logits)

    model = keras.Model(inputs=input_ids, outputs=logits)
    model.build(input_shape=(None, max_seq_len))

    if load_pretrained_weights:
        if model_type == "albert":
            bert.load_albert_weights(layer_bert, model_ckpt)
        elif model_type == "bert":
            bert.load_bert_weights(layer_bert, model_ckpt)

    model.compile(
        optimizer=keras.optimizers.Adam(),
        loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc")],
    )
    if summary:
        model.summary()
    return model
Exemplo n.º 5
0
def load_pretrained_weights(model, pretrained_model, fetch_dir=None):
    bert_ckpt = bert_common.get_pretrained_checkpoint(
        pretrained_model, fetch_dir=fetch_dir
    )

    # We have to do this ugly hack as the load_bert_weights method checks if the
    # model is an instance of BertModelLayer.
    old_isinstance = builtins.isinstance
    builtins.isinstance = (
        lambda x, y: True if y == bert.BertModelLayer else old_isinstance(x, y)
    )
    bert.load_bert_weights(model, bert_ckpt, _our_map_to_stock_variable_name)
    builtins.isinstance = old_isinstance

    return model
Exemplo n.º 6
0
def build_transformer(transformer,
                      max_seq_length,
                      num_labels,
                      tagging=True,
                      tokenizer_only=False):
    if transformer in albert_models_google:
        from bert.tokenization.albert_tokenization import FullTokenizer
        model_url = albert_models_google[transformer]
        albert = True
    elif transformer in bert_models_google:
        from bert.tokenization.bert_tokenization import FullTokenizer
        model_url = bert_models_google[transformer]
        albert = False
    else:
        raise ValueError(
            f'Unknown model {transformer}, available ones: {list(bert_models_google.keys()) + list(albert_models_google.keys())}'
        )
    bert_dir = get_resource(model_url)
    vocab = glob.glob(os.path.join(bert_dir, '*vocab*.txt'))
    assert len(vocab) == 1, 'No vocab found or unambiguous vocabs found'
    vocab = vocab[0]
    # noinspection PyTypeChecker
    tokenizer = FullTokenizer(vocab_file=vocab)
    if tokenizer_only:
        return tokenizer
    bert_params = bert.params_from_pretrained_ckpt(bert_dir)
    l_bert = bert.BertModelLayer.from_params(bert_params, name="bert")
    l_input_ids = tf.keras.layers.Input(shape=(max_seq_length, ),
                                        dtype='int32',
                                        name="input_ids")
    l_mask_ids = tf.keras.layers.Input(shape=(max_seq_length, ),
                                       dtype='int32',
                                       name="mask_ids")
    l_token_type_ids = tf.keras.layers.Input(shape=(max_seq_length, ),
                                             dtype='int32',
                                             name="token_type_ids")
    output = l_bert([l_input_ids, l_token_type_ids], mask=l_mask_ids)
    if not tagging:
        output = tf.keras.layers.Lambda(lambda seq: seq[:, 0, :])(output)
    if bert_params.hidden_dropout:
        output = tf.keras.layers.Dropout(bert_params.hidden_dropout,
                                         name='hidden_dropout')(output)
    logits = tf.keras.layers.Dense(
        num_labels,
        kernel_initializer=tf.keras.initializers.TruncatedNormal(
            bert_params.initializer_range))(output)
    model = tf.keras.Model(inputs=[l_input_ids, l_mask_ids, l_token_type_ids],
                           outputs=logits)
    model.build(input_shape=(None, max_seq_length))
    ckpt = glob.glob(os.path.join(bert_dir, '*.index'))
    assert ckpt, f'No checkpoint found under {bert_dir}'
    ckpt, _ = os.path.splitext(ckpt[0])
    with stdout_redirected(to=os.devnull):
        if albert:
            skipped_weight_value_tuples = load_stock_weights(l_bert, ckpt)
        else:
            skipped_weight_value_tuples = bert.load_bert_weights(l_bert, ckpt)
    assert 0 == len(skipped_weight_value_tuples
                    ), f'failed to load pretrained {transformer}'
    return model, tokenizer
Exemplo n.º 7
0
 def __init__(self, model_dir, d_model, args):
     super(BertEncoder, self).__init__(trainable=False)
     bert_params = bert.params_from_pretrained_ckpt(model_dir)
     self.bert_layer = bert.BertModelLayer.from_params(bert_params,
                                                       name="bert_layer")
     self.model_dir = model_dir
     tf.compat.v1.logging.info(
         'bert model loaded from {}'.format(model_dir))
     tf.compat.v1.logging.info('bert model params: {}'.format(bert_params))
     # do dummy call to build the model indirectly
     self.bert_layer([
         tf.zeros([args.batch_size, args.seq_length],
                  dtype=tf.dtypes.int64),
         tf.zeros([args.batch_size, args.seq_length], dtype=tf.dtypes.int64)
     ])
     bert.load_bert_weights(self.bert_layer,
                            os.path.join(self.model_dir, "bert_model.ckpt"))
     tf.compat.v1.logging.info('bert weights loaded')
    def test_bert_google_weights(self):
        bert_model_name = "uncased_L-12_H-768_A-12"
        bert_dir = bert.fetch_google_bert_model(bert_model_name, ".models")
        bert_ckpt = os.path.join(bert_dir, "bert_model.ckpt")

        bert_params = bert.params_from_pretrained_ckpt(bert_dir)
        model, l_bert = self.build_model(bert_params)

        skipped_weight_value_tuples = bert.load_bert_weights(l_bert, bert_ckpt)
        self.assertEqual(0, len(skipped_weight_value_tuples))
        model.summary()
Exemplo n.º 9
0
def make_cls_encoder(bert_path, ckpt_file, max_seq_len, bert_dim):
    model_ckpt = bert_path + ckpt_file
    bert_params = params_from_pretrained_ckpt(bert_path)
    bert_layer = BertModelLayer.from_params(bert_params,
                                            name="bert",
                                            trainable=False)

    input_ids = Input(shape=(max_seq_len, ), dtype='int32')
    bert_emb = bert_layer(input_ids)
    output = Lambda(lambda x: tf.gather(x, indices=0, axis=1))(bert_emb)

    model = Model(inputs=input_ids, outputs=output)
    model.build(input_shape=(None, max_seq_len))

    load_bert_weights(bert_layer, model_ckpt)
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    return model
Exemplo n.º 10
0
def get_bert_classifier(inputs,
                        bert_params,
                        model_ckpt,
                        classification_head,
                        logging_fn=print):
    if len(inputs) < 3:
        raise ValueError("BERT inputs must be of length 3")

    params_str = "Initializing BERT layer with params:"
    for (k, v) in bert_params.items():
        params_str += f"\n  {k}: {v}"
    logging_fn(params_str)

    # inputs[:3] are always [word_ids, token_mask, token_type_ids]
    bert_inputs = [inputs[0], inputs[2]]
    bert_layer = bert.BertModelLayer.from_params(bert_params, name="bert")

    seq_output = bert_layer(bert_inputs)

    # inputs[3:] are any arguments to pass to the prediction layer.
    #   E.g. entity masks. Can be emtpy.
    args = [seq_output] + inputs[3:]
    predictions = classification_head(*args)

    model = tf.keras.Model(inputs=inputs,
                           outputs=predictions,
                           name=f"bert_{classification_head.name}")
    input_shapes = [inp.shape for inp in inputs]
    model.build(input_shape=input_shapes)
    skipped = bert.load_bert_weights(bert_layer, model_ckpt)

    # Probably because we extended the vocabulary.
    if len(skipped) == 1:
        skipped_param, ckpt_value = skipped[0]
        emb_name = "bert/embeddings/word_embeddings/embeddings:0"
        if skipped_param.name == emb_name:
            old_vocab_size = ckpt_value.shape[0]
            new_vocab_size = bert_params["vocab_size"]
            logging_fn(
                f"Extending pretrained BERT embeddings: {old_vocab_size} -> {new_vocab_size}"
            )  # noqa
            extended_embeddings = extend_ckpt_embeddings(
                ckpt_value, new_vocab_size)
            tf.keras.backend.set_value(skipped_param, extended_embeddings)
        else:
            raise ValueError(f"Skipped loading params: {skipped}")
    elif len(skipped) > 1:
        raise ValueError(f"Skipped loading params: {skipped}")
    else:
        pass

    return model
Exemplo n.º 11
0
def build_transformer(transformer, max_seq_length=None, num_labels=None, tagging=True, tokenizer_only=False):
    spm_model_file = None
    if transformer in zh_albert_models_google:
        from bert.tokenization.albert_tokenization import FullTokenizer
        model_url = zh_albert_models_google[transformer]
        albert = True
    elif transformer in albert_models_tfhub:
        from edparser.layers.transformers.albert_tokenization import FullTokenizer
        with stdout_redirected(to=os.devnull):
            model_url = fetch_tfhub_albert_model(transformer,
                                                 os.path.join(hanlp_home(), 'thirdparty', 'tfhub.dev', 'google',
                                                              transformer))
        albert = True
        spm_model_file = glob.glob(os.path.join(model_url, 'assets', '*.model'))
        assert len(spm_model_file) == 1, 'No vocab found or unambiguous vocabs found'
        spm_model_file = spm_model_file[0]
    elif transformer in bert_models_google:
        from bert.tokenization.bert_tokenization import FullTokenizer
        model_url = bert_models_google[transformer]
        albert = False
    else:
        raise ValueError(
            f'Unknown model {transformer}, available ones: {list(bert_models_google.keys()) + list(zh_albert_models_google.keys()) + list(albert_models_tfhub.keys())}')
    bert_dir = get_resource(model_url)
    if spm_model_file:
        vocab = glob.glob(os.path.join(bert_dir, 'assets', '*.vocab'))
    else:
        vocab = glob.glob(os.path.join(bert_dir, '*vocab*.txt'))
    assert len(vocab) == 1, 'No vocab found or unambiguous vocabs found'
    vocab = vocab[0]
    lower_case = any(key in transformer for key in ['uncased', 'multilingual', 'chinese', 'albert'])
    if spm_model_file:
        # noinspection PyTypeChecker
        tokenizer = FullTokenizer(vocab_file=vocab, spm_model_file=spm_model_file, do_lower_case=lower_case)
    else:
        tokenizer = FullTokenizer(vocab_file=vocab, do_lower_case=lower_case)
    if tokenizer_only:
        return tokenizer
    if spm_model_file:
        bert_params = albert_params(bert_dir)
    else:
        bert_params = bert.params_from_pretrained_ckpt(bert_dir)
    l_bert = bert.BertModelLayer.from_params(bert_params, name='albert' if albert else "bert")
    if not max_seq_length:
        return l_bert, tokenizer, bert_dir
    l_input_ids = tf.keras.layers.Input(shape=(max_seq_length,), dtype='int32', name="input_ids")
    l_mask_ids = tf.keras.layers.Input(shape=(max_seq_length,), dtype='int32', name="mask_ids")
    l_token_type_ids = tf.keras.layers.Input(shape=(max_seq_length,), dtype='int32', name="token_type_ids")
    output = l_bert([l_input_ids, l_token_type_ids], mask=l_mask_ids)
    if not tagging:
        output = tf.keras.layers.Lambda(lambda seq: seq[:, 0, :])(output)
    if bert_params.hidden_dropout:
        output = tf.keras.layers.Dropout(bert_params.hidden_dropout, name='hidden_dropout')(output)
    logits = tf.keras.layers.Dense(num_labels, kernel_initializer=tf.keras.initializers.TruncatedNormal(
        bert_params.initializer_range))(output)
    model = tf.keras.Model(inputs=[l_input_ids, l_mask_ids, l_token_type_ids], outputs=logits)
    model.build(input_shape=(None, max_seq_length))
    if not spm_model_file:
        ckpt = glob.glob(os.path.join(bert_dir, '*.index'))
        assert ckpt, f'No checkpoint found under {bert_dir}'
        ckpt, _ = os.path.splitext(ckpt[0])
    with stdout_redirected(to=os.devnull):
        if albert:
            if spm_model_file:
                skipped_weight_value_tuples = bert.load_albert_weights(l_bert, bert_dir)
            else:
                # noinspection PyUnboundLocalVariable
                skipped_weight_value_tuples = load_stock_weights(l_bert, ckpt)
        else:
            # noinspection PyUnboundLocalVariable
            skipped_weight_value_tuples = bert.load_bert_weights(l_bert, ckpt)
    assert 0 == len(skipped_weight_value_tuples), f'failed to load pretrained {transformer}'
    return model, tokenizer
Exemplo n.º 12
0
	def post_build_model(self):
		bert.load_bert_weights(self.l_bert, self.model_ckpt) 
Exemplo n.º 13
0
        tokens = tokenizer.tokenize(text.numpy())[:MAX_LEN - 2]
        tokens = ['[CLS]'] + tokens + ['[SEP]']
        token_ids = tokenizer.convert_tokens_to_ids(tokens)
        return token_ids, label

    return tf.py_function(_tokenize, [text, label], [tf.int32, tf.int64])


train = train.map(tokenize).padded_batch(128, padded_shapes=([MAX_LEN], []))
valid = valid.map(tokenize).padded_batch(128, padded_shapes=([MAX_LEN], []))

# Construct a classifier with BERT
bert_layer = bert.BertModelLayer.from_params(bert_params)
bert_layer.trainable = False

model = Sequential([
    Input(shape=(MAX_LEN, )),
    bert_layer,
    Lambda(lambda seq: seq[:, 0, :]),
    Dense(1),
])

bert.load_bert_weights(bert_layer, bert_model_ckpt)

# Train it!
model.compile(Adam(), BinaryCrossentropy(True), ['accuracy'])
model.fit(train, validation_data=valid, epochs=5)

# Save the model
model.save('models/imdb_bert')
Exemplo n.º 14
0
X_test = pad_sequences(X_test, padding='post', maxlen=maxlen)

# creating model: String with simple model to test result
# Pending including bert embeddings (if they exists) to improve models
# so far only using BERT tokenizer.

l_input_ids = keras.layers.Input(shape=(maxlen, ), dtype='int32')
l_token_type_ids = keras.layers.Input(shape=(maxlen, ), dtype='int32')

output = l_bert(l_input_ids)

LSTM_Layer_1 = keras.layers.LSTM(128)(output)
logits = keras.layers.Dense(numclass, activation='softmax')(LSTM_Layer_1)
model = keras.Model(inputs=l_input_ids, outputs=logits)
model.build(input_shape=(None, maxlen))

bert.load_bert_weights(l_bert, model_ckpt)

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['acc'])
model.summary()

history = model.fit(X_train,
                    y_train,
                    batch_size=128,
                    epochs=1,
                    verbose=1,
                    validation_split=0.2,
                    class_weight=class_weights)