def build_model(checkpoint_file, config_file, sequence_len, learning_rate): biobert = load_trained_model_from_checkpoint(config_file, checkpoint_file, training=False, seq_len=sequence_len) #biobert_train = load_trained_model_from_checkpoint(config_file, checkpoint_file, training=True, seq_len=sequence_len) # Unfreeze bert layers. # for layer in biobert.layers[:]: # layer.trainable = True logger.info(biobert.input) logger.info(biobert.layers[-1].output) logger.info(tf.slice(biobert.layers[-1].output, [0, 0, 0], [-1, 1, -1])) slice_layer = Lambda(lambda x: tf.slice(x, [0, 0, 0], [-1, 1, -1]))( biobert.layers[-1].output) flatten_layer = Flatten()(slice_layer) hidden_layer = Dense(400, activation='relu', name='hidden_layer')(flatten_layer) prediction_layer = Dense(1, activation='sigmoid', name='prediction_layer')(hidden_layer) model = Model(inputs=biobert.input, outputs=prediction_layer) logger.info(model.summary(line_length=118)) model.compile( loss='binary_crossentropy', optimizer=Adam(lr=learning_rate)) #SGD(lr=0.2, momentum=0.9)) return model
def _get_model(base_dir, cfg_=None): if "albert" in cfg["verbose"].lower(): from bert4keras.bert import build_bert_model config_file = os.path.join(base_dir, 'albert_config.json') checkpoint_file = os.path.join(base_dir, 'model.ckpt-best') model = build_bert_model(config_path=config_file, checkpoint_path=checkpoint_file, model='albert', return_keras_model=True) if cfg_["cls_num"] > 1: output = Concatenate(axis=-1)([ model.get_layer( "Encoder-1-FeedForward-Norm").get_output_at(-i) for i in range(1, cfg["cls_num"] + 1) ]) model = Model(model.inputs[:2], outputs=output) model.trainable = cfg_["bert_trainable"] else: config_file = os.path.join(base_dir, 'bert_config.json') checkpoint_file = os.path.join(base_dir, 'bert_model.ckpt') if not os.path.exists(config_file): config_file = os.path.join(base_dir, 'bert_config_large.json') checkpoint_file = os.path.join(base_dir, 'roberta_l24_large_model') model = load_trained_model_from_checkpoint( config_file, checkpoint_file, training=False, trainable=cfg_["bert_trainable"], output_layer_num=cfg_["cls_num"], seq_len=cfg_['maxlen']) # model = Model(inputs=model.inputs[: 2], outputs=model.layers[-7].output) return model
def get_pretrained_model(BERT_PRETRAINED_DIR, maxlen): config_file = os.path.join(BERT_PRETRAINED_DIR, 'bert_config.json') checkpoint_file = os.path.join(BERT_PRETRAINED_DIR, 'bert_model.ckpt') model = load_trained_model_from_checkpoint(config_file, checkpoint_file, training=True, seq_len=maxlen) return model
def get_bert_base_model(bert_config: BertConfig): bert_model = load_trained_model_from_checkpoint(bert_config.config, bert_config.check_point, trainable=True, seq_len=512) inputs = bert_model.inputs layer = bert_model.outputs[0] layer = Extract(index=0, name='Extract')(layer) predicate = keras.layers.Dense(1, activation='sigmoid', name='Predicate-Dense')(layer) model = keras.models.Model(inputs=inputs, outputs=[predicate]) model.summary() return model
def get_bert_multi_layers_model(bert_config: BertConfig): bert_model = load_trained_model_from_checkpoint(bert_config.config, bert_config.check_point, trainable=True, seq_len=512, output_layer_num=4) inputs = bert_model.inputs layer = bert_model.outputs[0] layer = Extract(index=0, name='Extract')(layer) layer = keras.layers.Dense(512, activation='relu', name='Dense')(layer) predict = keras.layers.Dense(1, activation='sigmoid', name='Predict-Dense')(layer) aux = keras.layers.Dense(6, activation='sigmoid', name='Predict-Aux')(layer) model = keras.models.Model(inputs=inputs, outputs=[predict, aux]) model.summary() return model
def build_bert(model, poolings=None, output_layer_num=1): """Extract embeddings from texts. :param model: Path to the checkpoint or built model without MLM and NSP. :param texts: Iterable texts. :param poolings: Pooling methods. Word embeddings will be returned if it is None. Otherwise concatenated pooled embeddings will be returned. :param vocabs: A dict should be provided if model is built. :param cased: Whether it is cased for tokenizer. :param batch_size: Batch size. :param cut_embed: The computed embeddings will be cut based on their input lengths. :param output_layer_num: The number of layers whose outputs will be concatenated as a single output. Only available when `model` is a path to checkpoint. :return: A list of numpy arrays representing the embeddings. """ model = get_pretrained(PretrainedList.multi_cased_base) if isinstance(model, (str, type(u''))): paths = get_checkpoint_paths(model) model = load_trained_model_from_checkpoint( config_file=paths.config, checkpoint_file=paths.checkpoint, output_layer_num=output_layer_num, ) outputs = [] if poolings is not None: if isinstance(poolings, (str, type(u''))): poolings = [poolings] # outputs = [] for pooling in poolings: if pooling == POOL_NSP: outputs.append( Extract(index=0, name='Pool-NSP')(model.outputs[0])) elif pooling == POOL_MAX: outputs.append( MaskedGlobalMaxPool1D(name='Pool-Max')(model.outputs[0])) elif pooling == POOL_AVE: outputs.append( keras.layers.GlobalAvgPool1D(name='Pool-Ave')( model.outputs[0])) else: raise ValueError('Unknown pooling method: {}'.format(pooling)) # print(outputs) if len(outputs) == 1: outputs = outputs[0] else: outputs = keras.layers.Concatenate(name='Concatenate')(outputs) outputs = Lambda(bert_output_sum)(outputs) # model = keras.models.Model(inputs=model.inputs, outputs=outputs) return model.inputs, outputs
def _get_model(base_dir, cfg_=None): config_file = os.path.join(base_dir, 'bert_config.json') checkpoint_file = os.path.join(base_dir, 'bert_model.ckpt') if not os.path.exists(config_file): config_file = os.path.join(base_dir, 'bert_config_large.json') checkpoint_file = os.path.join(base_dir, 'roberta_l24_large_model') print(config_file, checkpoint_file) model = load_trained_model_from_checkpoint( config_file, checkpoint_file, training=False, trainable=cfg_["bert_trainable"], output_layer_num=cfg["cls_num"], seq_len=cfg_['maxlen']) return model
from keras.optimizers import Adam from keras_bert import extract_embeddings from keras.layers import Dense, Input, Flatten, concatenate, Dropout, Lambda, Concatenate from keras.models import Model import re import codecs # Setting up logistics adam = Adam(lr=2e-5, decay=0.01) maxlen = 50 print('begin_build') config_file = os.path.join(BERT_PRETRAINED_DIR, 'bert_config.json') checkpoint_file = os.path.join(BERT_PRETRAINED_DIR, 'bert_model.ckpt') model = load_trained_model_from_checkpoint(config_file, checkpoint_file, training=True, trainable=True, seq_len=maxlen) model.summary() # # Custom tensorflow layers # def lambda1(x): indices = tf.dtypes.cast(x[:, 50, 0], tf.int32) row_indices = tf.range(tf.shape(indices)[0]) full_indices = tf.stack([row_indices, indices], axis=1) return tf.gather_nd(x, full_indices)
def build_model(args): config = tf.ConfigProto() config.gpu_options.allow_growth = True K.set_session(tf.Session(config=config)) if args.load_model: print("Loading previously saved model..") if args.bert_config: print("Warning: --bert_config ignored when loading previous Keras model.", file=sys.stderr) custom_objects = get_custom_objects() model = load_model(args.load_model, custom_objects=custom_objects) else: print("Building model..") bert = load_trained_model_from_checkpoint(args.bert_config, args.init_checkpoint, training=False, trainable=True, seq_len=args.seq_len) transformer_output = get_encoder_component(name="Encoder-13", input_layer=bert.layers[-1].output, head_num=12, hidden_dim=3072, feed_forward_activation=gelu) drop_mask = Lambda(lambda x: x, name="drop_mask")(bert.output) slice_CLS = Lambda(lambda x: K.slice(x, [0, 0, 0], [-1, 1, -1]), name="slice_CLS")(drop_mask) flatten_CLS = Flatten()(slice_CLS) # Needed to avoid a json serialization error when saving the model. last_position = args.seq_len-1 slice_SEP = Lambda(lambda x: K.slice(x, [0, last_position, 0], [-1, 1, -1]), name="slice_SEP")(drop_mask) flatten_SEP = Flatten()(slice_SEP) permute_layer = Permute((2, 1))(drop_mask) permute_average = GlobalAveragePooling1D()(permute_layer) permute_maximum = GlobalMaxPooling1D()(permute_layer) concat = Concatenate()([permute_average, permute_maximum, flatten_CLS, flatten_SEP]) output_layer = Dense(get_label_dim(args.train), activation='sigmoid', name="label_out")(flatten_CLS) model = Model(bert.input, output_layer) total_steps, warmup_steps = calc_train_steps(num_example=get_example_count(args.train), batch_size=args.batch_size, epochs=args.epochs, warmup_proportion=0.01) # optimizer = AdamWarmup(total_steps, warmup_steps, lr=args.lr) optimizer = keras.optimizers.Adam(lr=args.lr) model.compile(loss=["binary_crossentropy"], optimizer=optimizer, metrics=[]) if args.gpus > 1: template_model = model # Set cpu_merge=False for better performance on NVLink connected GPUs. model = multi_gpu_model(template_model, gpus=args.gpus, cpu_merge=False) # TODO: need to compile this model as well when doing multigpu! callbacks = [Metrics(model)] if args.patience > -1: callbacks.append(EarlyStopping(patience=args.patience, verbose=1)) if args.checkpoint_interval > 0: callbacks.append(ModelCheckpoint(args.output_file + ".checkpoint-{epoch}", period=args.checkpoint_interval)) print(model.summary(line_length=118)) print("Number of GPUs in use:", args.gpus) print("Batch size:", args.batch_size) print("Learning rate:", K.eval(model.optimizer.lr)) # print("Dropout:", args.dropout) model.fit_generator(data_generator(args.train, args.batch_size, seq_len=args.seq_len), steps_per_epoch=ceil( get_example_count(args.train) / args.batch_size ), use_multiprocessing=True, epochs=args.epochs, callbacks=callbacks, validation_data=data_generator(args.dev, args.eval_batch_size, seq_len=args.seq_len), validation_steps=ceil( get_example_count(args.dev) / args.eval_batch_size )) print("Saving model:", args.output_file) if args.gpus > 1: template_model.save(args.output_file) else: model.save(args.output_file)
from sklearn.metrics import accuracy_score from google_research_code_bert_base_uncased import tokenization os.environ['CUDA_VISIBLE_DEVICES'] = '1' # LOADS PRE-TRAINED BERT MODEL USING KERAS-BERT BERT_PRETRAINED_DIR = '/home/rrevutch/bert/google_research_code_bert_base_uncased' # Setting up logistics print('Import Bert Model') adam = Adam(lr=2e-5, decay=0.01) MAXLEN_BERT_MODEL = 200 config_file = os.path.join(BERT_PRETRAINED_DIR, 'bert_config.json') checkpoint_file = os.path.join(BERT_PRETRAINED_DIR, 'bert_model.ckpt') model = load_trained_model_from_checkpoint(config_file, checkpoint_file, training=True, trainable=True, seq_len=MAXLEN_BERT_MODEL) print('Adding Custom Layers for dynamic target embedding output') def lambda1(x): indices = tf.dtypes.cast(x[:, MAXLEN_BERT_MODEL, 0], tf.int32) row_indices = tf.range(tf.shape(indices)[0]) full_indices = tf.stack([row_indices, indices], axis=1) return tf.gather_nd(x, full_indices) def tokenize_sents(sents, word_indices): # transformers library
train_examples, max_seq_length=sequence_length) (dev_input_ids, dev_input_masks, dev_segment_ids, dev_labels) = convert_examples_to_features(tokenizer, dev_examples, max_seq_length=sequence_length) (test_input_ids, test_input_masks, test_segment_ids, test_labels) = convert_examples_to_features(tokenizer, test_examples, max_seq_length=sequence_length) config_file = os.path.join(BERT_PRETRAINED_DIR, 'bert_config.json') checkpoint_file = os.path.join(BERT_PRETRAINED_DIR, 'bert_model.ckpt') bert_model = load_trained_model_from_checkpoint(config_file, checkpoint_file, training=True, seq_len=sequence_length) # bert_model.summary(line_length=120) bert_output = bert_model.get_layer(name='Encoder-12-FeedForward-Norm').output embedding_dim = 768 batch_size = 64 drop = 0.9 epochs = 100 ############################ # use output of first token (CLS) for classification
def build_bert_model(X1, X2): bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path, seq_len=input_length) wordvec = bert_model.predict([X1, X2]) return wordvec