def create_model(max_seq_len, bert_ckpt_file, adapter_size): with tf.io.gfile.GFile(bert_config_file, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = adapter_size bert = BertModelLayer.from_params(bert_params, name="bert") input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="input_ids") bert_output = bert(input_ids) print("bert shape", bert_output.shape) cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(bert_output) cls_out = keras.layers.Dropout(0.5)(cls_out) logits = keras.layers.Dense(units=768, activation="tanh")(cls_out) logits = keras.layers.Dropout(0.5)(logits) logits = keras.layers.Dense(units=len(CLASSES), activation="softmax")(logits) model = keras.Model(inputs=input_ids, outputs=logits) model.build(input_shape=(None, max_seq_len)) load_stock_weights(bert, bert_ckpt_file) if adapter_size is not None: freeze_bert_layers(bert) return model
def create_model(max_seq_len, lr=1e-5): """ Creates a BERT classification model. The model architecutre is raw input -> BERT input -> drop out layer to prevent overfitting -> dense layer that outputs predicted probability. max_seq_len: the maximum sequence length lr: learning rate of optimizer """ # create the bert layer with tf.io.gfile.GFile(bert_config_file, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert = BertModelLayer.from_params(bert_params, name="bert") input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="input_ids") output = bert(input_ids) print("bert shape", output.shape) cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(output) # Dropout layer cls_out = keras.layers.Dropout(0.8)(cls_out) # Dense layer with probibility output logits = keras.layers.Dense(units=2, activation="softmax")(cls_out) model = keras.Model(inputs=input_ids, outputs=logits) model.build(input_shape=(None, max_seq_len)) # load the pre-trained model weights load_stock_weights(bert, bert_ckpt_file) model.compile( optimizer=keras.optimizers.Adam(learning_rate=lr), loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc")]) model.summary() return model
else: templabel.append(0) y = np.array(templabel) #################################################### # BERT TOKENIZATION BertTokenizer = bert_tokenization.FullTokenizer from bert.loader import params_from_pretrained_ckpt # these are necessary because of weird ImportError: cannot import name 'BertModelLayer' from 'bert' (unknown location) errors from bert.model import BertModelLayer bert_params = params_from_pretrained_ckpt( 'D:\\uncased_L-4_H-256_A-4') # from google, not tensorflow hub bert_layer1 = BertModelLayer.from_params( bert_params, name="bert") # # hidden_dropout = 0.1, model_name = 'uncased_L-4_H-256_A-4' vocabulary_file = os.path.join('D:\\uncased_L-4_H-256_A-4\\vocab.txt') to_lower_case = not (model_name.find("cased") == 0 or model_name.find("multi_cased") == 0) tokenizer = BertTokenizer(vocabulary_file, to_lower_case) max_seq_length = 256 train_tokens = map(tokenizer.tokenize, list( subsetdf.loc[:, 'content'])) # go all the way back to a list of raw strings train_tokens = map(lambda tok: ["[CLS]"] + tok + ["[SEP]"], train_tokens) train_token_ids = list(map(tokenizer.convert_tokens_to_ids, train_tokens))