def model(self): word_input = Input(shape=(self.maxlen_sentence,)) #[batch,sentencen] char_input = Input(shape=(self.maxlen_sentence,self.maxlen_word,)) #[batch,word,char] ner_label = Input(shape=(self.maxlen_sentence,)) # relation_label = Input(shape=(self.maxlen_sentence,)) mask = Lambda(lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(word_input) word_embedding = Embedding(self.word_vocab_size, self.word_embed_size,mask_zero=True,weights=[self.embedding_martrix],name='word_embedding',trainable=True)(word_input) #[batch,word,embed] char_embedding = Embedding(self.char_vocab_size,self.char_embed_size,mask_zero=True,name='char_embedding',trainable=True)(char_input) #[batch,word,char,embedd] if self.embedding_dropout_prob: word_embedding = Dropout(self.embedding_dropout_prob)(word_embedding) char_embedding = Dropout(self.embedding_dropout_prob)(char_embedding) if self.is_use_char_embedding: # char_embedding maxpooling part char_embedding_shape = K.int_shape(char_embedding) # [batch,sentence,word,dim] # char_embedding_reshaped = K.reshape(char_embedding, shape=(-1, char_embedding_shape[-2],self.char_embed_size)) # [batch*sentence,word,dim of char embedding] char_embedding_reshaped = self.reshape_layer_1(char_embedding,char_embedding_shape) char_lstm = Bidirectional(MaskedLSTM(units=self.char_embed_size // 2, return_sequences=True, name='char_lstm_layer'))( char_embedding_reshaped) attention = TimeDistributed(Dense(1, activation='tanh'))(char_lstm) attention = MaskFlatten()(attention) attention = Activation('softmax')(attention) attention = MaskRepeatVector(self.char_embed_size)(attention) attention = MaskPermute([2, 1])(attention) sent_representation = multiply([char_lstm, attention]) attention = Lambda(lambda xin: K.sum(xin, axis=1))(sent_representation) # char_maxpool = GlobalMaxPooling1D(char_lstm) # [batch*sentence,hidden_size] # char_att = Attention_Layer()(char_lstm) # [batch*sentence,hidden_size] # char_embedding = K.reshape(char_maxpool, shape=[-1, char_embedding_shape[1], # self.hidden_size]) # [batch,sentence,hidden_size] # char_embedding = K.reshape(attention, shape=[-1, char_embedding_shape[-1], self.char_embed_size]) # [batch,sentence,hidden_size] char_embedding = self.reshape_layer_2(attention,char_embedding_shape) if self.word_char_embed_mode == 'concate': embedding = Concatenate(axis=-1)([word_embedding,char_embedding]) else : embedding = Gate_Add_Lyaer()([word_embedding,char_embedding]) # pass else: embedding = word_embedding #multi-layers self-attention for ner pred if self.embedding_dropout_prob: embedding = Dropout(self.embedding_dropout_prob)(embedding) # part1 , multi-self-attentionblock, (CNN/LSTM/FNN+self-attention) lstm = Bidirectional(MaskedLSTM(units=self.hidden_size // 2, return_sequences=True))(embedding) attention = TimeDistributed(Dense(1, activation='tanh'))(lstm) attention = MaskFlatten()(attention) attention = Activation('softmax')(attention) attention = MaskRepeatVector(self.hidden_size)(attention) attention = MaskPermute([2, 1])(attention) sent_representation = multiply([lstm, attention]) attention = Lambda(lambda xin: K.sum(xin, axis=1))(sent_representation) # lstm_attention = Lambda(seq_and_vec, output_shape=(None, self.hidden_size * 2))( # [lstm, attention]) # [这里考虑下用相加的方法,以及门控相加] attention = MaskRepeatVector(self.maxlen_sentence)(attention) #[batch,sentence,hidden_size] lstm = Gate_Add_Lyaer()([lstm,attention]) if self.nn_dropout_prob: lstm = Dropout(self.nn_dropout_prob)(lstm) lstm_attention = MaskedConv1D(filters=self.hidden_size,kernel_size=3,activation='relu',padding='same')(lstm) bio_pred = Dense(self.num_classes, activation='softmax')(lstm_attention) pred_model =Model([word_input, char_input], bio_pred) #part2 multi-head selection for relation classification train_model = Model([word_input, char_input, ner_label], bio_pred) loss = K.sparse_categorical_crossentropy(ner_label, bio_pred) loss = K.sum(loss * mask[:, :, 0]) / K.sum(mask) train_model.summary() train_model.add_loss(loss) train_model.compile(keras.optimizers.adam(lr=self.learning_rate)) return train_model,pred_model
def build_model_from_config( config_file, checkpoint_file, training=False, trainable=False, seq_len=None, ): """Build the model from config file. :param config_file: The path to the JSON configuration file. :param training: If training, the whole model will be returned. :param trainable: Whether the model is trainable. :param seq_len: If it is not None and it is shorter than the value in the config file, the weights in position embeddings will be sliced to fit the new length. :return: model and config """ with open(config_file, 'r') as reader: config = json.loads(reader.read()) if seq_len is not None: config['max_position_embeddings'] = min( seq_len, config['max_position_embeddings']) if trainable is None: trainable = training model = get_model( token_num=config['vocab_size'], pos_num=config['max_position_embeddings'], seq_len=config['max_position_embeddings'], embed_dim=config['hidden_size'], transformer_num=config['num_hidden_layers'], head_num=config['num_attention_heads'], feed_forward_dim=config['intermediate_size'], training=False, trainable=True, ) inputs, outputs = model bio_label = Input(shape=(maxlen, )) event = Input(shape=(1, )) mask = Lambda( lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))( inputs[0]) event_embedding = Embedding(len(event2id), hidden_size, mask_zero=True)(event) outputs = Dropout(0.15)(outputs) attention = TimeDistributed(Dense(1, activation='tanh'))(outputs) attention = MaskFlatten()(attention) attention = Activation('softmax')(attention) attention = MaskRepeatVector(config['hidden_size'])(attention) attention = MaskPermute([2, 1])(attention) sent_representation = multiply([outputs, attention]) attention = Lambda(lambda xin: K.sum(xin, axis=1))(sent_representation) t_dim = K.int_shape(outputs)[-1] bert_attention = Lambda(seq_and_vec, output_shape=(None, t_dim * 2))([outputs, attention]) cnn1 = MaskedConv1D(filters=hidden_size, kernel_size=3, activation='relu', padding='same')(bert_attention) event_bc = Lambda(lambda input: input[0] * 0 + input[1])( [cnn1, event_embedding]) con_cnn_event = Concatenate(axis=-1)([cnn1, event_bc]) dens1 = Dense(hidden_size, activation='relu', use_bias=True)(con_cnn_event) #BIOE bio_pred = Dense(4, activation='softmax')(dens1) entity_model = keras.models.Model([inputs[0], inputs[1], event], [bio_pred]) # 预测subject的模型 train_model = keras.models.Model([inputs[0], inputs[1], bio_label, event], [bio_pred]) loss = K.sparse_categorical_crossentropy(bio_label, bio_pred) loss = K.sum(loss * mask[:, :, 0]) / K.sum(mask) train_model.add_loss(loss) train_model.summary() train_model.compile(optimizer=keras.optimizers.Adam(lr=3e-5), ) load_model_weights_from_checkpoint(train_model, config, checkpoint_file, training) return train_model, entity_model
def model(self): word_input = Input(shape=(self.maxlen_sentence,)) #[batch,sentencen] char_input = Input(shape=(self.maxlen_sentence,self.maxlen_word,)) #[batch,word,char] ner_label = Input(shape=(self.maxlen_sentence,)) # relation_label = Input(shape=self.maxlen_sentence,) #[batch,sentence,n_classes] mask = Lambda(lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(word_input) word_embedding = Embedding(self.word_vocab_size, self.word_embed_size,mask_zero=True,weights=[self.embedding_martrix],name='word_embedding',trainable=True)(word_input) #[batch,word,embed] char_embedding = Embedding(self.char_vocab_size,self.char_embed_size,mask_zero=True,name='char_embedding',trainable=True)(char_input) #[batch,word,char,embedd] if self.embedding_dropout_prob: word_embedding = Dropout(self.embedding_dropout_prob)(word_embedding) char_embedding = Dropout(self.embedding_dropout_prob)(char_embedding) if self.is_use_char_embedding: # char_embedding maxpooling part char_embedding_shape = K.int_shape(char_embedding) # [batch,sentence,word,dim] # char_embedding_reshaped = K.reshape(char_embedding, shape=(-1, char_embedding_shape[-2],self.char_embed_size)) # [batch*sentence,word,dim of char embedding] char_embedding_reshaped = self.reshape_layer_1(char_embedding,char_embedding_shape) char_lstm = Bidirectional(MaskedLSTM(units=self.char_embed_size // 2, return_sequences=True, name='char_lstm_layer'))( char_embedding_reshaped) attention = TimeDistributed(Dense(1, activation='tanh'))(char_lstm) attention = MaskFlatten()(attention) attention = Activation('softmax')(attention) attention = MaskRepeatVector(self.char_embed_size)(attention) attention = MaskPermute([2, 1])(attention) sent_representation = multiply([char_lstm, attention]) attention = Lambda(lambda xin: K.sum(xin, axis=1))(sent_representation) char_embedding = self.reshape_layer_2(attention,char_embedding_shape) if self.word_char_embed_mode == 'concate': embedding = Concatenate(axis=-1)([word_embedding,char_embedding]) else : embedding = Gate_Add_Lyaer()([word_embedding,char_embedding]) # pass else: embedding = word_embedding #multi-layers self-attention for ner pred if self.embedding_dropout_prob: embedding = Dropout(self.embedding_dropout_prob)(embedding) # part1 , multi-self-attentionblock, (CNN/LSTM/FNN+self-attention) lstm = Bidirectional(MaskedLSTM(units=self.hidden_size // 2, return_sequences=True), name='lstm_layer0')(embedding) if self.nn_dropout_prob: lstm = Dropout(self.nn_dropout_prob)(lstm) # # multi_lstm_layers # if self.multi_layers >= 2: # for i in range(self.multi_layers - 1): # i+=1 # lstm = Bidirectional(CuDNNLSTM(self.hidden_size // 2, return_sequences=True), name='lstm_layer{}'.format(i))(lstm) # if self.nn_dropout_prob: # lstm = Dropout(self.nn_dropout_prob)(lstm) bio_pred = Dense(self.num_classes, activation='softmax')(lstm) pred_model =Model([word_input, char_input], bio_pred) train_model = Model([word_input, char_input, ner_label], bio_pred) loss = K.sparse_categorical_crossentropy(ner_label, bio_pred) loss = K.sum(loss * mask[:, :, 0]) / K.sum(mask) loss = K.sum(loss * mask) / K.sum(mask) train_model.summary() train_model.add_loss(loss) train_model.compile(keras.optimizers.adam(lr=self.learning_rate)) return train_model,pred_model