def __init__(self, configs, vocab_size, num_classes): super(NerModel, self).__init__() self.use_bert = configs.use_bert self.finetune = configs.finetune if self.use_bert and self.finetune: self.bert_model = TFBertModel.from_pretrained('bert-base-chinese') self.use_bigru = configs.use_bigru self.embedding = tf.keras.layers.Embedding(vocab_size, configs.embedding_dim, mask_zero=True) self.hidden_dim = configs.hidden_dim self.dropout_rate = configs.dropout self.dropout = tf.keras.layers.Dropout(self.dropout_rate) self.bigru = tf.keras.layers.Bidirectional( tf.keras.layers.GRU(self.hidden_dim, return_sequences=True)) self.dense = tf.keras.layers.Dense(num_classes) self.transition_params = tf.Variable( tf.random.uniform(shape=(num_classes, num_classes)))
def __init__(self, configs, data_manager, logger): self.dataManager = data_manager vocab_size = data_manager.max_token_number num_classes = data_manager.max_label_number self.logger = logger self.configs = configs logger.info('loading model parameter') if self.configs.use_bert and not self.configs.finetune: self.bert_model = TFBertModel.from_pretrained('bert-base-chinese') self.ner_model = NerModel(configs, vocab_size, num_classes) # 实例化Checkpoint,设置恢复对象为新建立的模型 if configs.finetune: checkpoint = tf.train.Checkpoint(ner_model=self.ner_model) else: checkpoint = tf.train.Checkpoint(ner_model=self.ner_model) checkpoint.restore(tf.train.latest_checkpoint( configs.checkpoints_dir)) # 从文件恢复模型参数 logger.info('loading model successfully')
def bert_large_uncased_for_squad2(self, max_seq_length): input_ids = Input((max_seq_length, ), dtype=tf.int32, name='input_ids') input_masks = Input((max_seq_length, ), dtype=tf.int32, name='input_masks') #Load model from huggingface config = BertConfig.from_pretrained(self.config.named_model, output_hidden_states=True) bert_layer = TFBertModel.from_pretrained(self.named_model, config=config) outputs, _, embeddings = bert_layer( [input_ids, input_masks]) #1 for pooled outputs, 0 for sequence model = Model(inputs=[input_ids, input_masks], outputs=[embeddings, outputs]) return model
def create_model1(): q_id = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32) a_id = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32) q_mask = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32) a_mask = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32) q_atn = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32) a_atn = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32) config = BertConfig() config.output_hidden_states = False bert_model = TFBertModel.from_pretrained('bert-base-uncased', config=config) q_embedding = bert_model(q_id, attention_mask=q_mask, token_type_ids=q_atn)[0] a_embedding = bert_model(a_id, attention_mask=a_mask, token_type_ids=a_atn)[0] q = tf.keras.layers.GlobalAveragePooling1D()(q_embedding) a = tf.keras.layers.GlobalAveragePooling1D()(a_embedding) x = tf.keras.layers.Concatenate()([q, q]) x = tf.keras.layers.Reshape((1, x.shape[-1]))(x) cnn = tf.keras.layers.Conv1D(64, 3, padding='same', activation='relu')(x) cnn = tf.keras.layers.MaxPooling1D(pool_size=1, strides=2)(cnn) cnn = tf.keras.layers.BatchNormalization()(cnn) lstm = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units=64))(cnn) lstm = tf.keras.layers.Dropout(0.2)(lstm) dense = tf.keras.layers.Dense(64, activation='relu')(lstm) x = tf.keras.layers.BatchNormalization()(x) x = tf.keras.layers.Dense(TARGET_COUNT1, activation='softmax')(dense) x = tf.keras.layers.BatchNormalization()(x) model = tf.keras.models.Model( inputs=[q_id, q_mask, q_atn, a_id, a_mask, a_atn], outputs=x) return model
def get_model(): K.clear_session() bert_model = TFBertModel.from_pretrained(bert_path, from_pt=True, trainable=True) for l in bert_model.layers: l.trainable = True input_ids_texta = Input(shape=(None,), dtype='int32', name='input_ids_texta') input_token_type_ids_texta = Input(shape=(None,), dtype='int32', name='input_token_type_ids_texta') input_attention_mask_texta = Input(shape=(None,), dtype='int32', name='input_attention_mask_texta') input_ids_textb = Input(shape=(None,), dtype='int32', name='input_ids_textb') input_token_type_ids_textb = Input(shape=(None,), dtype='int32', name='input_token_type_ids_textb') input_attention_mask_textb = Input(shape=(None,), dtype='int32', name='input_attention_mask_textb') input_token_type_ids_textb = Input(shape=(None,), dtype='int32', name='input_token_type_ids_textb') input_cat_texta = Input(shape=(1), dtype='float32', name='input_cat_texta') input_cat_textb = Input(shape=(1), dtype='float32', name='input_cat_textb') bert_output_texta = bert_model({'input_ids':input_ids_texta, 'token_type_ids':input_token_type_ids_texta, 'attention_mask':input_attention_mask_texta}, return_dict=False, training=True) projection_logits_texta = bert_output_texta[0] bert_cls_texta = Lambda(lambda x: x[:, 0])(projection_logits_texta) # 取出[CLS]对应的向量用来做分类 bert_output_textb = bert_model({'input_ids':input_ids_textb, 'token_type_ids':input_token_type_ids_textb, 'attention_mask':input_attention_mask_textb}, return_dict=False, training=True) projection_logits_textb = bert_output_textb[0] bert_cls_textb = Lambda(lambda x: x[:, 0])(projection_logits_textb) # 取出[CLS]对应的向量用来做分类 bert_cls = concatenate([bert_cls_texta, bert_cls_textb, input_cat_texta, input_cat_textb], axis=-1) dropout_A = Dropout(0.5)(bert_cls) output_A = Dense(1, activation='sigmoid', name='output_A')(dropout_A) dropout_B = Dropout(0.5)(bert_cls) output_B = Dense(1, activation='sigmoid', name='output_B')(dropout_B) model = Model([input_ids_texta, input_token_type_ids_texta, input_attention_mask_texta, input_ids_textb, input_token_type_ids_textb, input_attention_mask_textb, input_cat_texta, input_cat_textb], [output_A, output_B]) model.compile( loss=my_binary_crossentropy, # loss='binary_crossentropy', # loss=binary_crossentropy, optimizer=Adam(1e-5), #用足够小的学习率 metrics=[my_binary_accuracy, my_f1_score] # metrics='accuracy' ) print(model.summary()) return model
def get_bert(): ids = keras.layers.Input(shape=(None,), dtype=tf.int32, name='ids') att = keras.layers.Input(shape=(None,), dtype=tf.int32, name='att') tok_type_ids = keras.layers.Input(shape=(None,), dtype=tf.int32, name='tti') config = BertConfig.from_pretrained(Config.Bert.config) bert_model = TFBertModel.from_pretrained(Config.Bert.model, config=config) x = bert_model(ids, attention_mask=att, token_type_ids=tok_type_ids) x1 = keras.layers.Dropout(0.15)(x[0]) x1 = keras.layers.Conv1D(768, 2, padding='same')(x1) x1 = keras.layers.LeakyReLU()(x1) x1 = keras.layers.LayerNormalization()(x1) x1 = keras.layers.Conv1D(64, 2, padding='same')(x1) x1 = keras.layers.LeakyReLU()(x1) x1 = keras.layers.LayerNormalization()(x1) x1 = keras.layers.Conv1D(32, 2, padding='same')(x1) x1 = keras.layers.Conv1D(1, 1)(x1) x1 = keras.layers.Flatten()(x1) x1 = keras.layers.Activation('softmax', dtype='float32', name='sts')(x1) x2 = keras.layers.Dropout(0.15)(x[0]) x2 = keras.layers.Conv1D(768, 2, padding='same')(x2) x2 = keras.layers.LeakyReLU()(x2) x2 = keras.layers.LayerNormalization()(x2) x2 = keras.layers.Conv1D(64, 2, padding='same')(x2) x2 = keras.layers.LeakyReLU()(x2) x2 = keras.layers.LayerNormalization()(x2) x2 = keras.layers.Conv1D(32, 2, padding='same')(x2) x2 = keras.layers.Conv1D(1, 1)(x2) x2 = keras.layers.Flatten()(x2) x2 = keras.layers.Activation('softmax', dtype='float32', name='ets')(x2) model = keras.models.Model(inputs=[ids, att, tok_type_ids], outputs=[x1, x2]) optimizer = keras.optimizers.Adam(learning_rate=6e-5) if Config.Train.use_amp: optimizer = keras.mixed_precision.experimental.LossScaleOptimizer(optimizer, 'dynamic') loss = keras.losses.CategoricalCrossentropy(label_smoothing=Config.Train.label_smoothing) model.compile(loss=loss, optimizer=optimizer) return model
def create_model(): q_id = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32) q_mask = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32) q_atn = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32) config = BertConfig() config.output_hidden_states = False bert_model = TFBertModel.from_pretrained('bert-base-uncased', config=config) q_embedding = bert_model(q_id, attention_mask=q_mask, token_type_ids=q_atn)[0] q = tf.keras.layers.GlobalAveragePooling1D()(q_embedding) x = tf.keras.layers.Dropout(0.2)(q) x = tf.keras.layers.Dense(TARGET_COUNT, activation='sigmoid')(x) model = tf.keras.models.Model(inputs=[q_id, q_mask, q_atn], outputs=x) return model
def check_compability( torch_model: BertModel, tf_model: TFBertModel ): torch_weights = [] for k, v in torch_model.state_dict().items(): if k == "embeddings.position_ids": print("im here") continue if not k.startswith("embeddings.") and k.endswith(".weight"): torch_weights.append(v.t().numpy()) else: torch_weights.append(v.numpy()) torch_weights[1], torch_weights[2] = torch_weights[2], torch_weights[1] tf_weights = tf_model.get_weights() check = [(torch_weight == tf_weight).all() for torch_weight, tf_weight in zip(torch_weights, tf_weights)] return all(check)
def tokenize(text_list, tags_list, tag_dict, max_token_len): """ load tags to dict: Args: text list tag list tag dict - {tag: tag id} Returns: embeddings list - (data size, max token length, 768) token length list - (data size, 1) tags list - (batch size, max token length, tag size) """ tokenizer = BertTokenizer.from_pretrained('bert-base-chinese') bert_model = TFBertModel.from_pretrained('bert-base-chinese') token_len_list = [] embeddings_list = [] for text, tags in zip(text_list, tags_list): inputs = tokenizer(text, max_length=max_token_len, padding="max_length", truncation=True, return_tensors="np") token_len = np.sum(inputs.data['attention_mask']) if token_len - 2 != len(tags): continue outputs = bert_model(inputs) embeddings = outputs.last_hidden_state.numpy()[0] embeddings_list.append(embeddings) token_len_list.append(token_len) tags.insert(0, tag_dict['O']) cur_len = len(tags) for i in range(cur_len, max_token_len): tags.append(tag_dict['O']) embeddings_list = np.array(embeddings_list) token_len_list = np.array(token_len_list) tags_list = np.array(tags_list, dtype=np.int32) return embeddings_list, token_len_list, tags_list
def _embedding_from_bert(): log.info("Extracting pretrained word embeddings weights from BERT") #BERT_MODEL_URL = "https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/1" # dinput_word_ids = tf.keras.layers.Input(shape=(config.summ_length,), dtype=tf.int32, # name="input_word_ids") # dinput_mask = tf.keras.layers.Input(shape=(config.summ_length,), dtype=tf.int32, # name="input_mask") # dsegment_ids = tf.keras.layers.Input(shape=(config.summ_length,), dtype=tf.int32, # name="segment_ids") # einput_word_ids = tf.keras.layers.Input(shape=(config.doc_length,), dtype=tf.int32, # name="input_word_ids") # einput_mask = tf.keras.layers.Input(shape=(config.doc_length,), dtype=tf.int32, # name="input_mask") # esegment_ids = tf.keras.layers.Input(shape=(config.doc_length,), dtype=tf.int32, # name="segment_ids") #bert_layer = hub.KerasLayer(BERT_MODEL_URL, trainable=False) vocab_of_BERT = TFBertModel.from_pretrained('bert-base-uncased', trainable=False) embedding_matrix = vocab_of_BERT.get_weights()[0] # trainable_vars = vocab_of_BERT.variables # # Remove unused layers # trainable_vars = [var for var in trainable_vars if not "/cls/" in var.name] # # Select how many layers to fine tune # trainable_vars = [] # # Add to trainable weights # for var in trainable_vars: # vocab_of_BERT.trainable_weights.append(var) # for var in vocab_of_BERT.variables: # if var not in vocab_of_BERT.trainable_weights: # vocab_of_BERT.non_trainable_weights.append(var) #_, dsequence_output = vocab_of_BERT([dinput_word_ids, dinput_mask, dsegment_ids]) #_, esequence_output = vocab_of_BERT([einput_word_ids, einput_mask, esegment_ids]) #dec_model = tf.keras.models.Model(inputs=[dinput_word_ids, dinput_mask, dsegment_ids], outputs=dsequence_output) #enc_model = tf.keras.models.Model(inputs=[einput_word_ids, einput_mask, esegment_ids], outputs=esequence_output) log.info(f"Embedding matrix shape '{embedding_matrix.shape}'") return (embedding_matrix, vocab_of_BERT)
def get_bert_imdb_model(): from transformers import TFBertModel max_len = 512 ## BERT encoder # encoder = TFBertModel.from_pretrained("bert-base-uncased") ## QA Model input_ids = layers.Input(shape=(max_len, ), dtype=tf.int32) token_type_ids = layers.Input(shape=(max_len, ), dtype=tf.int32) attention_mask = layers.Input(shape=(max_len, ), dtype=tf.int32) # inputs = {'input_ids': input_ids, 'token_type_ids': token_type_ids, 'attention_mask': attention_mask} inputs = [input_ids, token_type_ids, attention_mask] # name = "distilbert-base-uncased" name = "bert-base-cased" # m = TFAutoModelForSequenceClassification.from_pretrained(name) # bert = m.layers[0] bert = TFBertModel.from_pretrained(name) bert.trainable = True bert_outputs = bert(input_ids=input_ids, token_type_ids=token_type_ids, attention_mask=attention_mask) last_hidden_states = bert_outputs.last_hidden_state avg = layers.GlobalAveragePooling1D()(last_hidden_states) avg = tf.keras.layers.Dense(128, activation='relu')(avg) output = layers.Dense(2, activation="softmax")(avg) model = tf.keras.Model(inputs=inputs, outputs=output) loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False) optimizer = tf.keras.optimizers.Adam(lr=5e-5) # optimizer = 'adam' model.compile(optimizer=optimizer, loss=[loss], metrics=['accuracy']) return model
def bert(self, batch_size=256, max_seq_len=100): def preprocess(docs, max_size): tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') docs = [" ".join(x.split()[:max_size]) for x in docs] max_size = max(max_size, max([len(x.split()) for x in docs])) return tokenizer, max_size texts = self.df["text"].values.tolist() tokenizer, max_len = preprocess(texts, max_seq_len) model = TFBertModel.from_pretrained('bert-base-uncased', output_hidden_states=True) X = prepareBertInput(tokenizer, texts, max_len) bert_embed = np.zeros((len(texts), 768)) for start_idx in range(0, len(texts), batch_size): end_idx = min(start_idx + batch_size, len(texts)) batch = texts[start_idx:end_idx] input_ids = [ X[0][start_idx:end_idx], X[1][start_idx:end_idx], X[2][start_idx:end_idx] ] outputs = model(input_ids) layer4 = outputs[-1][-4][:, 1:, :] layer3 = outputs[-1][-3][:, 1:, :] layer2 = outputs[-1][-2][:, 1:, :] layer1 = outputs[-1][-1][:, 1:, :] avg = tf.keras.layers.Average() avg_embedding = avg([layer4, layer3, layer2, layer1]) embedding = tf.math.reduce_mean(avg_embedding, axis=1) bert_embed[start_idx:end_idx, :] = embedding feature_length = bert_embed.shape[-1] bert_vecs = pd.DataFrame( bert_embed, index=self.df.index, columns=["BERT.{}".format(i) for i in range(feature_length)]) self.df = pd.concat((self.df, bert_vecs), axis=1, sort=False) return
def build_features(): tokenizer = BertTokenizer.from_pretrained('bert-base-cased') bert = TFBertModel.from_pretrained('bert-base-uncased') data = get_data() data_train, data_val_model, data_val_interpretation, data_test = get_train_val_test_splits( data) original_train_dataset = glue_convert_examples_to_features( utils.data_to_tf_dataset(data_train), tokenizer, max_length=utils.MAX_SEQ_LENGTH, task='cola') original_valid_dataset = glue_convert_examples_to_features( utils.data_to_tf_dataset(data_val_model), tokenizer, max_length=utils.MAX_SEQ_LENGTH, task='cola') f_train, f_val = make_features(original_train_dataset, bert), make_features( original_valid_dataset, bert) np.save("train_features.npy", f_train) np.save("valid_features.npy", f_val)
def __init__(self, label_size): super(BilstmCRF, self).__init__(self) self.embedding = TFBertModel.from_pretrained( '../resource/ernie_torch/', from_pt=True, output_hidden_states=True) # 设置Embedding层不训练 self.embedding.trainable = False # CNN self.conv1 = tf.keras.layers.Conv1D(filters=192, kernel_size=1, padding="same", activation='relu') self.conv2 = tf.keras.layers.Conv1D(filters=192, kernel_size=2, padding="same", activation='relu') self.conv3 = tf.keras.layers.Conv1D(filters=192, kernel_size=3, padding="same", activation='relu') self.conv4 = tf.keras.layers.Conv1D(filters=192, kernel_size=4, padding="same", activation='relu') self.dropout = tf.keras.layers.Dropout(0.4) # 双向LSTM self.bilstm = tf.keras.layers.Bidirectional( tf.keras.layers.LSTM(256, return_sequences=True)) # CRF层参数 self.transition = tf.Variable( tf.initializers.GlorotNormal()(shape=(label_size, label_size))) self.dense = tf.keras.layers.Dense(label_size, name='dense_out') self.layer_norm1 = tf.keras.layers.LayerNormalization(epsilon=1e-5) self.layer_norm2 = tf.keras.layers.LayerNormalization(epsilon=1e-5)
def __init__(self, intent_size, slot_size, lr=1e-4, dropout_rate=0.2, units=300): super().__init__() self.bert = TFBertModel.from_pretrained('bert-base-uncased', trainable=True) self.inp_dropout = Dropout(dropout_rate) self.intent_dropout = Dropout(dropout_rate) self.fc_intent = Dense(units, activation='relu') self.trans_params = self.add_weight(shape=(slot_size, slot_size)) self.out_linear_intent = Dense(intent_size) self.out_linear_slot = Dense(slot_size) self.optimizer = Adam(lr) self.slots_accuracy = tf.keras.metrics.Accuracy() self.intent_accuracy = tf.keras.metrics.Accuracy() self.decay_lr = tf.optimizers.schedules.ExponentialDecay( lr, 1000, 0.95) self.logger = logging.getLogger('tensorflow') self.logger.setLevel(logging.INFO)
def create_model(): id = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32) mask = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32) attn = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32) config = BertConfig() config.output_hidden_states = True bert_model = TFBertModel.from_pretrained( 'https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-tf_model.h5', config=config) _, _, hidden_states = bert_model(id, attention_mask=mask, token_type_ids=attn) h12 = tf.reshape(hidden_states[-1][:, 0], (-1, 1, 768)) h11 = tf.reshape(hidden_states[-2][:, 0], (-1, 1, 768)) h10 = tf.reshape(hidden_states[-3][:, 0], (-1, 1, 768)) h09 = tf.reshape(hidden_states[-4][:, 0], (-1, 1, 768)) concat_hidden = tf.keras.layers.Concatenate(axis=2)([h12, h11, h10, h09]) x = tf.keras.layers.GlobalAveragePooling1D()(concat_hidden) x = tf.keras.layers.Dropout(0.2)(x) x = tf.keras.layers.Dense(MAX_TARGET_LEN, activation='sigmoid')(x) model = tf.keras.models.Model(inputs=[id, mask, attn], outputs=x) return model
def __init__(self, params, name="model", **kwargs): super(NERwithHFBERT, self).__init__(params, name=name, **kwargs) self._tag_string_mapper = get_sm(self._params.tags_fn_) self.tag_vocab_size = self._tag_string_mapper.size() + 2 self._tracked_layers = dict() if self.pretrained_bert is None: if self._params.use_hf_electra_model_: self.pretrained_bert = TFElectraModel(ElectraConfig.from_pretrained(params.pretrained_hf_model_,cache_dir=params.hf_cache_dir_)) else: self.pretrained_bert = TFBertModel(BertConfig.from_pretrained(params.pretrained_hf_model_,cache_dir=params.hf_cache_dir_)) self._dropout = tf.keras.layers.Dropout(self._params.dropout_last) if self._params.bet_tagging_: # print(self.tag_vocab_size-1) # half of the classes is used plus O-Class, sos, eos self._layer_cls = tf.keras.layers.Dense( int(self._tag_string_mapper.size() // 2 + 3), activation=tf.keras.activations.softmax, name="layer_cls" ) self._layer_start = tf.keras.layers.Dense(1, activation=tf.keras.activations.sigmoid, name="layer_start") self._layer_end = tf.keras.layers.Dense(1, activation=tf.keras.activations.sigmoid, name="layer_end") elif self._params.use_crf: self._last_layer = tf.keras.layers.Dense(self.tag_vocab_size, name="last_layer") self._trans_params = tf.keras.layers.Embedding( self.tag_vocab_size, self.tag_vocab_size, name="trans_params" ) # ,embeddings_initializer=tf.keras.initializers.Constant(1)) if self._params.crf_with_ner_rule: self._penalty_factor = tf.keras.layers.Embedding(1, 1, name="penalty_factor") # ,embeddings_initializer=tf.keras.initializers.Constant(1)) self._penalty_absolute = tf.keras.layers.Embedding(1, 1, name="penalty_absolute") # ,embeddings_initializer=tf.keras.initializers.Constant(1)) elif self.params.crf_with_ner_forb_trans: self._penalty_factor = tf.constant(0.0, name="penalty_factor", dtype=tf.float32) self._penalty_absolute = tf.constant(-100000.0, name="penalty_absolute", dtype=tf.float32) self.init_crf_with_ner_rule((self.tag_vocab_size - 3) // 2) else: self._last_layer = tf.keras.layers.Dense( self.tag_vocab_size, activation=tf.keras.activations.softmax, name="last_layer" )
def __init__(self, pretrained_model_name_or_path='bert-base-uncased', reduce_output='cls_pooled', trainable=True, num_tokens=None, **kwargs): super().__init__() try: from transformers import TFBertModel except ModuleNotFoundError: logger.error( ' transformers is not installed. ' 'In order to install all text feature dependencies run ' 'pip install ludwig[text]') sys.exit(-1) self.transformer = TFBertModel.from_pretrained( pretrained_model_name_or_path) self.reduce_output = reduce_output if not self.reduce_output == 'cls_pooled': self.reduce_sequence = SequenceReducer(reduce_mode=reduce_output) self.transformer.trainable = trainable self.transformer.resize_token_embeddings(num_tokens)
def create_model(): ## BERT encoder encoder = TFBertModel.from_pretrained("bert-base-uncased") # QA Model input_ids = layers.Input(shape=(max_len,), dtype=tf.int32) token_type_ids = layers.Input(shape=(max_len,), dtype=tf.int32) attention_mask = layers.Input(shape=(max_len,), dtype=tf.int32) output = encoder( input_ids, token_type_ids=token_type_ids, attention_mask=attention_mask )[1] output = layers.Dense(1, use_bias=True)(output) output = layers.Activation(keras.activations.sigmoid)(output) model = keras.Model( inputs=[input_ids, token_type_ids, attention_mask], outputs=output, ) model.compile() return model
def bert_large_uncased_for_squad2(self, max_seq_length): input_ids = Input((max_seq_length, ), dtype=tf.int32, name='input_ids') input_masks = Input((max_seq_length, ), dtype=tf.int32, name='input_masks') input_tokens = Input((max_seq_length, ), dtype=tf.int32, name='input_tokens') #Load model from huggingface config = BertConfig.from_pretrained("bert-large-uncased", output_hidden_states=True) bert_layer = TFBertModel.from_pretrained(self.named_model, config=config) if self.weights_file is not None: bert_layer.load_weights(self.weights_file) _, _, embeddings = bert_layer( [input_ids, input_masks, input_tokens]) #1 for pooled outputs, 0 for sequence model = Model(inputs=[input_ids, input_masks, input_tokens], outputs=embeddings) return model
def __build_model__(self, learningrate=None, keep_prob=0.5, verbose=1): inputs = keras.layers.Input(shape=(self.sentence_length, ), dtype=tf.int32) bert = TFBertModel.from_pretrained('bert-base-uncased', trainable=False)(inputs)[0] average_pooling = keras.layers.GlobalAveragePooling1D()(bert) if self.hidden_layer > 0: hidden = keras.layers.Dense(self.hidden_layer, activation=tf.nn.relu)(average_pooling) else: hidden = average_pooling dropout = keras.layers.Dropout(1 - keep_prob)(hidden) outputs = keras.layers.Dense(1, activation=tf.nn.sigmoid)(dropout) model = keras.models.Model(inputs, outputs) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) if verbose: model.summary() return model
def __init__(self, config): super(MyModel, self).__init__() self.bert_model_config = BertConfig.from_pretrained( config.bert_model_config_path) self.bert_model = TFBertModel.from_pretrained( config.bert_model_weights_path, config=self.bert_model_config) self.convs = [ tf.keras.layers.Conv2D(filters=config.num_filters, kernel_size=(k, config.hidden_size), padding='valid', kernel_initializer='normal', activation='relu') for k in config.filter_sizes ] self.pools = [ tf.keras.layers.MaxPool2D(pool_size=(config.max_len - k + 1, 1), strides=(1, 1), padding='valid') for k in config.filter_sizes ] self.flatten = tf.keras.layers.Flatten() self.dropout = tf.keras.layers.Dropout(config.dropout) self.fc = Dense(config.num_classes, activation='softmax')
def create_model(): q_id = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32) q_mask = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32) q_atn = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32) config = BertConfig.from_pretrained(BERT_PATH) config.output_hidden_states = False bert_model = TFBertModel.from_pretrained(BERT_PATH, from_pt=True, config=config) q_embedding = bert_model(q_id, attention_mask=q_mask, token_type_ids=q_atn)[0] q = tf.keras.layers.GlobalAveragePooling1D()(q_embedding) a = tf.keras.layers.GlobalMaxPooling1D()(q_embedding) t = q_embedding[:, -1] e = q_embedding[:, 0] x = tf.keras.layers.Concatenate()([q, a, t, e]) x = tf.keras.layers.Dropout(0.5)(x) x = tf.keras.layers.Dense(1, activation='sigmoid')(x) model = tf.keras.models.Model(inputs=[q_id, q_mask, q_atn], outputs=x) return model
def bert_large_uncased_for_squad2(self, max_seq_length): input_ids = Input((max_seq_length, ), dtype=tf.int32, name='input_ids') input_masks = Input((max_seq_length, ), dtype=tf.int32, name='input_masks') #Load model from huggingface bert_layer = TFBertModel.from_pretrained(self.named_model) outputs = bert_layer([input_ids, input_masks ])[0] #1 for pooled outputs, 0 for sequence #Dense layer with 2 nodes; one for start span and one for end span logits = Dense(2)(outputs) #Split the outputs into start and end logits start_logits, end_logits = tf.split(logits, 2, axis=-1) start_logits = K.squeeze(start_logits, axis=-1) end_logits = K.squeeze(end_logits, axis=-1) model = Model(inputs=[input_ids, input_masks], outputs=[start_logits, end_logits]) return model
def initialize_hugface_model(hugging_face_model): # if hugging_face_model == "xlnet": # tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased') # model = TFXLNetModel.from_pretrained('xlnet-base-cased') # elif hugging_face_model == "roberta": # tokenizer = RobertaTokenizer.from_pretrained('roberta-base') # model = TFRobertaModel.from_pretrained('roberta-base') # elif hugging_face_model == "ernie": # tokenizer = AutoTokenizer.from_pretrained("nghuyong/ernie-2.0-en") # model = TFAutoModel.from_pretrained("nghuyong/ernie-2.0-en") #FAST TOKENIZERS if hugging_face_model == "distilbert": tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased") model = TFDistilBertModel.from_pretrained("distilbert-base-uncased") elif hugging_face_model == "bert": tokenizer = BertTokenizerFast.from_pretrained('bert-base-cased') model = TFBertModel.from_pretrained('bert-base-cased') else: raise ValueError('Invalid embedding type') return tokenizer, model
def create_model(num_tags): ## BERT encoder encoder = TFBertModel.from_pretrained("bert-base-uncased") encoder.save_pretrained(save_path) ## NER Model input_ids = layers.Input(shape=(max_len,), dtype=tf.int32) token_type_ids = layers.Input(shape=(max_len,), dtype=tf.int32) attention_mask = layers.Input(shape=(max_len,), dtype=tf.int32) embedding = encoder( input_ids, token_type_ids=token_type_ids, attention_mask=attention_mask )[0] embedding = layers.Dropout(0.3)(embedding) # todo 取消 + 1 tag_logits = layers.Dense(num_tags, activation='softmax')(embedding) model = keras.Model( inputs=[input_ids, token_type_ids, attention_mask], outputs=[tag_logits], ) optimizer = keras.optimizers.Adam(lr=3e-5) model.compile(optimizer=optimizer, loss=masked_ce_loss, metrics=['accuracy']) return model
def create_classification_model(model_config): classes, max_sentence_len, dropout = model_config["classes"], model_config[ "max_sentence_len"], model_config["dropout"] model_pretrained_name = model_config["pretrained_name"] tokens_config = model_config["tokens"] pad_token_id = tokens_config["pad"]["id"] model: tf.keras.Model if model_config["model_config_name"] == "bert": model = TFBertModel.from_pretrained(model_pretrained_name) elif model_config["model_config_name"] == "xlm_roberta": model = TFXLMRobertaModel.from_pretrained(model_pretrained_name) else: raise ValueError() subword_ids = tf.keras.layers.Input(shape=(max_sentence_len, ), dtype=tf.int32, name="input_ids") attention_masks = tf.keras.layers.Lambda( lambda x: tf.cast(x != pad_token_id, tf.int32))(subword_ids) subword_embeddings = model([subword_ids, attention_masks])[0] layer = subword_embeddings layer = tf.keras.layers.Flatten()(layer) dropout = tf.keras.layers.Dropout(rate=dropout)(layer) output = tf.keras.layers.Dense(units=classes, activation="softmax")(dropout) model = tf.keras.models.Model(inputs=subword_ids, outputs=output) if "weights_file" in model_config: model.load_weights(model_config["weights_file"]) return model
def embedding(self, text): tokens, masks, seg = self.text_transform.text_to_int(text) input_ids = tf.keras.layers.Input(shape=(self.max_len, ), dtype=tf.int32, name="input_ids") attention_mask = tf.keras.layers.Input(shape=(self.max_len, ), dtype=tf.int32, name="attention_mask") token_type_ids = tf.keras.layers.Input(shape=(self.max_len, ), dtype=tf.int32, name="token_type_ids") bert_layer = TFBertModel.from_pretrained(self.pre_trained_model) outputs = bert_layer({ 'input_ids': input_ids, 'token_type_ids': token_type_ids, 'attention_mask': attention_mask }) model = tf.keras.Model(inputs={ 'input_ids': input_ids, 'token_type_ids': token_type_ids, 'attention_mask': attention_mask }, outputs=outputs) input_token = tf.constant(tokens)[None, :] token_input = tf.constant(seg)[None, :] mask_input = tf.constant(masks)[None, :] input = { 'input_ids': input_token, 'token_type_ids': token_input, 'attention_mask': mask_input } output = model(input) return (np.asarray(tf.squeeze(output[0])))
def __post_init__(self): bert_model_name = [ "hfl/chinese-bert-wwm", "hfl/chinese-bert-wwm-ext", "hfl/chinese-roberta-wwm-ext", "chinese-roberta-wwm-ext-large", ] self.tokenizer = BertTokenizer.from_pretrained(bert_model_name[0]) self.bert_model = TFBertModel.from_pretrained(bert_model_name[0], from_pt=True) test_X_path = self.model_data_path + "test_X.pkl" test_mapping_path = self.model_data_path + "test_mapping.pkl" id2tag_path = self.model_data_path + "id2tag.pkl" test_X, self.test_mapping = GeneralDataPreprocessor.loadTestArrays( test_X_path, test_mapping_path ) with open(id2tag_path, "rb") as f: self.id2tag = pickle.load(f) ckpt = tf.train.Checkpoint(optimizer=self.optimizer, model=self.model) ckpt.restore(tf.train.latest_checkpoint(self.checkpoint_path))
def get_model(output_model, dropout=0.2, output_layer_name="output"): input_word_ids = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32, name='input_word_ids') input_masks = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32, name='input_masks') input_segments = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32, name='input_segments') config = BertConfig() bert_layer = TFBertModel.from_pretrained(BERT_PATH, config=config) hidden_layer, _ = bert_layer([input_word_ids, input_masks, input_segments]) hidden_layer_cls = tf.reshape(hidden_layer[:, 0], (-1, 768)) hidden_layer_dpout = tf.keras.layers.Dropout(dropout)(hidden_layer_cls) output_layer = output_model.get_layer(output_layer_name)( hidden_layer_dpout) model = tf.keras.models.Model( inputs=[input_word_ids, input_masks, input_segments], outputs=output_layer) return model