Пример #1
0
def build_model(checkpoint_file, config_file, sequence_len, learning_rate):
    biobert = load_trained_model_from_checkpoint(config_file,
                                                 checkpoint_file,
                                                 training=False,
                                                 seq_len=sequence_len)
    #biobert_train = load_trained_model_from_checkpoint(config_file, checkpoint_file, training=True, seq_len=sequence_len)

    # Unfreeze bert layers.
    # for layer in biobert.layers[:]:
    #     layer.trainable = True

    logger.info(biobert.input)
    logger.info(biobert.layers[-1].output)

    logger.info(tf.slice(biobert.layers[-1].output, [0, 0, 0], [-1, 1, -1]))

    slice_layer = Lambda(lambda x: tf.slice(x, [0, 0, 0], [-1, 1, -1]))(
        biobert.layers[-1].output)

    flatten_layer = Flatten()(slice_layer)

    hidden_layer = Dense(400, activation='relu',
                         name='hidden_layer')(flatten_layer)
    prediction_layer = Dense(1, activation='sigmoid',
                             name='prediction_layer')(hidden_layer)

    model = Model(inputs=biobert.input, outputs=prediction_layer)

    logger.info(model.summary(line_length=118))

    model.compile(
        loss='binary_crossentropy',
        optimizer=Adam(lr=learning_rate))  #SGD(lr=0.2, momentum=0.9))

    return model
Пример #2
0
    def _get_model(base_dir, cfg_=None):
        if "albert" in cfg["verbose"].lower():
            from bert4keras.bert import build_bert_model
            config_file = os.path.join(base_dir, 'albert_config.json')
            checkpoint_file = os.path.join(base_dir, 'model.ckpt-best')
            model = build_bert_model(config_path=config_file,
                                     checkpoint_path=checkpoint_file,
                                     model='albert',
                                     return_keras_model=True)
            if cfg_["cls_num"] > 1:
                output = Concatenate(axis=-1)([
                    model.get_layer(
                        "Encoder-1-FeedForward-Norm").get_output_at(-i)
                    for i in range(1, cfg["cls_num"] + 1)
                ])
                model = Model(model.inputs[:2], outputs=output)
            model.trainable = cfg_["bert_trainable"]
        else:
            config_file = os.path.join(base_dir, 'bert_config.json')
            checkpoint_file = os.path.join(base_dir, 'bert_model.ckpt')
            if not os.path.exists(config_file):
                config_file = os.path.join(base_dir, 'bert_config_large.json')
                checkpoint_file = os.path.join(base_dir,
                                               'roberta_l24_large_model')
            model = load_trained_model_from_checkpoint(
                config_file,
                checkpoint_file,
                training=False,
                trainable=cfg_["bert_trainable"],
                output_layer_num=cfg_["cls_num"],
                seq_len=cfg_['maxlen'])

            # model = Model(inputs=model.inputs[: 2], outputs=model.layers[-7].output)

        return model
Пример #3
0
def get_pretrained_model(BERT_PRETRAINED_DIR, maxlen):

    config_file = os.path.join(BERT_PRETRAINED_DIR, 'bert_config.json')
    checkpoint_file = os.path.join(BERT_PRETRAINED_DIR, 'bert_model.ckpt')
    model = load_trained_model_from_checkpoint(config_file,
                                               checkpoint_file,
                                               training=True,
                                               seq_len=maxlen)

    return model
Пример #4
0
def get_bert_base_model(bert_config: BertConfig):
    bert_model = load_trained_model_from_checkpoint(bert_config.config, bert_config.check_point, trainable=True,
                                                    seq_len=512)
    inputs = bert_model.inputs
    layer = bert_model.outputs[0]
    layer = Extract(index=0, name='Extract')(layer)
    predicate = keras.layers.Dense(1, activation='sigmoid', name='Predicate-Dense')(layer)

    model = keras.models.Model(inputs=inputs, outputs=[predicate])
    model.summary()
    return model
Пример #5
0
def get_bert_multi_layers_model(bert_config: BertConfig):
    bert_model = load_trained_model_from_checkpoint(bert_config.config, bert_config.check_point, trainable=True,
                                                    seq_len=512, output_layer_num=4)
    inputs = bert_model.inputs
    layer = bert_model.outputs[0]
    layer = Extract(index=0, name='Extract')(layer)
    layer = keras.layers.Dense(512, activation='relu', name='Dense')(layer)
    predict = keras.layers.Dense(1, activation='sigmoid', name='Predict-Dense')(layer)
    aux = keras.layers.Dense(6, activation='sigmoid', name='Predict-Aux')(layer)

    model = keras.models.Model(inputs=inputs, outputs=[predict, aux])
    model.summary()
    return model
Пример #6
0
def build_bert(model, poolings=None, output_layer_num=1):
    """Extract embeddings from texts.

    :param model: Path to the checkpoint or built model without MLM and NSP.
    :param texts: Iterable texts.
    :param poolings: Pooling methods. Word embeddings will be returned if it is None.
                     Otherwise concatenated pooled embeddings will be returned.
    :param vocabs: A dict should be provided if model is built.
    :param cased: Whether it is cased for tokenizer.
    :param batch_size: Batch size.
    :param cut_embed: The computed embeddings will be cut based on their input lengths.
    :param output_layer_num: The number of layers whose outputs will be concatenated as a single output.
                             Only available when `model` is a path to checkpoint.
    :return: A list of numpy arrays representing the embeddings.
    """
    model = get_pretrained(PretrainedList.multi_cased_base)
    if isinstance(model, (str, type(u''))):
        paths = get_checkpoint_paths(model)
        model = load_trained_model_from_checkpoint(
            config_file=paths.config,
            checkpoint_file=paths.checkpoint,
            output_layer_num=output_layer_num,
        )

    outputs = []

    if poolings is not None:
        if isinstance(poolings, (str, type(u''))):
            poolings = [poolings]
        # outputs = []
        for pooling in poolings:
            if pooling == POOL_NSP:
                outputs.append(
                    Extract(index=0, name='Pool-NSP')(model.outputs[0]))
            elif pooling == POOL_MAX:
                outputs.append(
                    MaskedGlobalMaxPool1D(name='Pool-Max')(model.outputs[0]))
            elif pooling == POOL_AVE:
                outputs.append(
                    keras.layers.GlobalAvgPool1D(name='Pool-Ave')(
                        model.outputs[0]))
            else:
                raise ValueError('Unknown pooling method: {}'.format(pooling))
        # print(outputs)
        if len(outputs) == 1:
            outputs = outputs[0]
        else:
            outputs = keras.layers.Concatenate(name='Concatenate')(outputs)
        outputs = Lambda(bert_output_sum)(outputs)
        # model = keras.models.Model(inputs=model.inputs, outputs=outputs)
    return model.inputs, outputs
Пример #7
0
 def _get_model(base_dir, cfg_=None):
     config_file = os.path.join(base_dir, 'bert_config.json')
     checkpoint_file = os.path.join(base_dir, 'bert_model.ckpt')
     if not os.path.exists(config_file):
         config_file = os.path.join(base_dir, 'bert_config_large.json')
         checkpoint_file = os.path.join(base_dir, 'roberta_l24_large_model')
     print(config_file, checkpoint_file)
     model = load_trained_model_from_checkpoint(
         config_file,
         checkpoint_file,
         training=False,
         trainable=cfg_["bert_trainable"],
         output_layer_num=cfg["cls_num"],
         seq_len=cfg_['maxlen'])
     return model
Пример #8
0
from keras.optimizers import Adam
from keras_bert import extract_embeddings
from keras.layers import Dense, Input, Flatten, concatenate, Dropout, Lambda, Concatenate
from keras.models import Model
import re
import codecs

# Setting up logistics
adam = Adam(lr=2e-5, decay=0.01)
maxlen = 50
print('begin_build')
config_file = os.path.join(BERT_PRETRAINED_DIR, 'bert_config.json')
checkpoint_file = os.path.join(BERT_PRETRAINED_DIR, 'bert_model.ckpt')
model = load_trained_model_from_checkpoint(config_file,
                                           checkpoint_file,
                                           training=True,
                                           trainable=True,
                                           seq_len=maxlen)
model.summary()


#
# Custom tensorflow layers
#
def lambda1(x):

    indices = tf.dtypes.cast(x[:, 50, 0], tf.int32)
    row_indices = tf.range(tf.shape(indices)[0])
    full_indices = tf.stack([row_indices, indices], axis=1)
    return tf.gather_nd(x, full_indices)
def build_model(args):

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    K.set_session(tf.Session(config=config))

    if args.load_model:
        print("Loading previously saved model..")
        if args.bert_config:
            print("Warning: --bert_config ignored when loading previous Keras model.", file=sys.stderr)
        custom_objects = get_custom_objects()
        model = load_model(args.load_model, custom_objects=custom_objects)
    
    else:
        print("Building model..")
        bert = load_trained_model_from_checkpoint(args.bert_config, args.init_checkpoint,
                                                    training=False, trainable=True,
                                                    seq_len=args.seq_len)

        transformer_output = get_encoder_component(name="Encoder-13", input_layer=bert.layers[-1].output,
                                                head_num=12, hidden_dim=3072, feed_forward_activation=gelu)

        drop_mask = Lambda(lambda x: x, name="drop_mask")(bert.output)

        slice_CLS = Lambda(lambda x: K.slice(x, [0, 0, 0], [-1, 1, -1]), name="slice_CLS")(drop_mask)
        flatten_CLS = Flatten()(slice_CLS)

        # Needed to avoid a json serialization error when saving the model.
        last_position = args.seq_len-1
        slice_SEP = Lambda(lambda x: K.slice(x, [0, last_position, 0], [-1, 1, -1]), name="slice_SEP")(drop_mask)
        flatten_SEP = Flatten()(slice_SEP)

        permute_layer = Permute((2, 1))(drop_mask)
        permute_average = GlobalAveragePooling1D()(permute_layer)
        permute_maximum =  GlobalMaxPooling1D()(permute_layer)

        concat = Concatenate()([permute_average, permute_maximum, flatten_CLS, flatten_SEP])

        output_layer = Dense(get_label_dim(args.train), activation='sigmoid', name="label_out")(flatten_CLS)

        model = Model(bert.input, output_layer)
        
        total_steps, warmup_steps =  calc_train_steps(num_example=get_example_count(args.train),
                                                    batch_size=args.batch_size, epochs=args.epochs,
                                                    warmup_proportion=0.01)

        # optimizer = AdamWarmup(total_steps, warmup_steps, lr=args.lr)
        optimizer = keras.optimizers.Adam(lr=args.lr)

        model.compile(loss=["binary_crossentropy"], optimizer=optimizer, metrics=[])

    if args.gpus > 1:
        template_model = model
        # Set cpu_merge=False for better performance on NVLink connected GPUs.
        model = multi_gpu_model(template_model, gpus=args.gpus, cpu_merge=False)
        # TODO: need to compile this model as well when doing multigpu!

    callbacks = [Metrics(model)]

    if args.patience > -1:
        callbacks.append(EarlyStopping(patience=args.patience, verbose=1))

    if args.checkpoint_interval > 0:
        callbacks.append(ModelCheckpoint(args.output_file + ".checkpoint-{epoch}",  period=args.checkpoint_interval))


    print(model.summary(line_length=118))
    print("Number of GPUs in use:", args.gpus)
    print("Batch size:", args.batch_size)
    print("Learning rate:", K.eval(model.optimizer.lr))
    # print("Dropout:", args.dropout)

    model.fit_generator(data_generator(args.train, args.batch_size, seq_len=args.seq_len),
                        steps_per_epoch=ceil( get_example_count(args.train) / args.batch_size ),
                        use_multiprocessing=True, epochs=args.epochs, callbacks=callbacks,
                        validation_data=data_generator(args.dev, args.eval_batch_size, seq_len=args.seq_len),
                        validation_steps=ceil( get_example_count(args.dev) / args.eval_batch_size ))

    print("Saving model:", args.output_file)
    if args.gpus > 1:
        template_model.save(args.output_file)
    else:
        model.save(args.output_file)
from sklearn.metrics import accuracy_score
from google_research_code_bert_base_uncased import tokenization

os.environ['CUDA_VISIBLE_DEVICES'] = '1'

# LOADS PRE-TRAINED BERT MODEL USING KERAS-BERT
BERT_PRETRAINED_DIR = '/home/rrevutch/bert/google_research_code_bert_base_uncased'
# Setting up logistics
print('Import Bert Model')
adam = Adam(lr=2e-5, decay=0.01)
MAXLEN_BERT_MODEL = 200
config_file = os.path.join(BERT_PRETRAINED_DIR, 'bert_config.json')
checkpoint_file = os.path.join(BERT_PRETRAINED_DIR, 'bert_model.ckpt')
model = load_trained_model_from_checkpoint(config_file,
                                           checkpoint_file,
                                           training=True,
                                           trainable=True,
                                           seq_len=MAXLEN_BERT_MODEL)

print('Adding Custom Layers for dynamic target embedding output')


def lambda1(x):
    indices = tf.dtypes.cast(x[:, MAXLEN_BERT_MODEL, 0], tf.int32)
    row_indices = tf.range(tf.shape(indices)[0])
    full_indices = tf.stack([row_indices, indices], axis=1)
    return tf.gather_nd(x, full_indices)


def tokenize_sents(sents, word_indices):
    # transformers library
Пример #11
0
                                              train_examples,
                                              max_seq_length=sequence_length)
(dev_input_ids, dev_input_masks, dev_segment_ids,
 dev_labels) = convert_examples_to_features(tokenizer,
                                            dev_examples,
                                            max_seq_length=sequence_length)

(test_input_ids, test_input_masks, test_segment_ids,
 test_labels) = convert_examples_to_features(tokenizer,
                                             test_examples,
                                             max_seq_length=sequence_length)

config_file = os.path.join(BERT_PRETRAINED_DIR, 'bert_config.json')
checkpoint_file = os.path.join(BERT_PRETRAINED_DIR, 'bert_model.ckpt')
bert_model = load_trained_model_from_checkpoint(config_file,
                                                checkpoint_file,
                                                training=True,
                                                seq_len=sequence_length)

# bert_model.summary(line_length=120)

bert_output = bert_model.get_layer(name='Encoder-12-FeedForward-Norm').output

embedding_dim = 768
batch_size = 64
drop = 0.9

epochs = 100

############################
# use output of first token (CLS) for classification
Пример #12
0
def build_bert_model(X1, X2):
    bert_model = load_trained_model_from_checkpoint(config_path,
                                                    checkpoint_path,
                                                    seq_len=input_length)
    wordvec = bert_model.predict([X1, X2])
    return wordvec