def build_model(self, inputs, outputs): outputs_spati = L.SpatialDropout1D(self.dropout_spatial)(outputs) conv_pools = [] for filter in self.filters_size: x = L.Conv1D( filters=self.filters_num, kernel_size=filter, padding="valid", kernel_initializer="normal", activation="relu", )(outputs_spati) capsule = Capsule_bojone(num_capsule=self.num_capsule, dim_capsule=self.dim_capsule, routings=self.routings, kernel_size=(filter, 1), share_weights=True)(x) conv_pools.append(capsule) capsule = L.Concatenate(axis=-1)(conv_pools) x = L.Flatten()(capsule) x = L.Dropout(self.dropout)(x) # dense-mid x = L.Dense(units=min(max(self.label, 64), self.embed_size), activation=self.activate_mid)(x) x = L.Dropout(self.dropout)(x) # dense-end, 最后一层, dense到label self.outputs = L.Dense(units=self.label, activation=self.activate_end)(x) self.model = M.Model(inputs=inputs, outputs=self.outputs) self.model.summary(132)
def build_model(self, inputs, outputs): """ build_model. Args: inputs: tensor, input of model outputs: tensor, output of model Returns: None """ embed_char = outputs[0] embed_word = outputs[1] if self.wclstm_embed_type == "ATTNENTION": x_word = L.TimeDistributed( SelfAttention(K.int_shape(embed_word)[-1]))(embed_word) x_word_shape = K.int_shape(x_word) x_word = L.Reshape(target_shape=(x_word_shape[:2], x_word_shape[2] * x_word_shape[3])) x_word = L.Dense(self.embed_size, activation=self.activate_mid)(x_word) # elif self.wclstm_embed_type == "SHORT": else: x_word = L.Lambda(lambda x: x[:, :, 0, :])(embed_word) outputs_concat = L.Concatenate(axis=-1)([embed_char, x_word]) # LSTM or GRU if self.rnn_type == "LSTM": rnn_cell = L.LSTM elif self.rnn_type == "CuDNNLSTM": rnn_cell = L.CuDNNLSTM elif self.rnn_type == "CuDNNGRU": rnn_cell = L.CuDNNGRU else: rnn_cell = L.GRU # Bi-LSTM-CRF for nrl in range(self.num_rnn_layers): x = L.Bidirectional( rnn_cell( units=self.rnn_unit * (nrl + 1), return_sequences=True, activation=self.activate_mid, ))(outputs_concat) outputs = L.Dropout(self.dropout)(x) if self.use_crf: x = L.Dense(units=self.label, activation=self.activate_end)(outputs) self.CRF = ConditionalRandomField(self.crf_lr_multiplier, name="crf_bert4keras") self.outputs = self.CRF(x) self.trans = K.eval(self.CRF.trans).tolist() self.loss = self.CRF.dense_loss if self.use_onehot else self.CRF.sparse_loss self.metrics = [ self.CRF.dense_accuracy if self.use_onehot else self.CRF.sparse_accuracy ] else: self.outputs = L.TimeDistributed( L.Dense(units=self.label, activation=self.activate_end))(outputs) self.model = M.Model(inputs, self.outputs) self.model.summary(132)
def build_model(self, inputs, outputs): embedding_reshape = L.Reshape( (self.length_max, self.embed_size, 1))(outputs) # 提取n-gram特征和最大池化, 一般不用平均池化 conv_pools = [] for filter in self.filters_size: conv = L.Conv2D( filters=self.filters_num, kernel_size=(filter, self.embed_size), padding='valid', kernel_initializer='normal', activation='tanh', )(embedding_reshape) pooled = L.MaxPool2D( pool_size=(self.length_max - filter + 1, 1), strides=(1, 1), padding='valid', )(conv) conv_pools.append(pooled) # 拼接 x = L.Concatenate(axis=-1)(conv_pools) x = L.Dropout(self.dropout)(x) x = L.Flatten()(x) self.outputs = L.Dense(units=self.label, activation=self.activate_end)(x) self.model = M.Model(inputs=inputs, outputs=self.outputs) self.model.summary(132)
def build_model(self, inputs, outputs): x = None if self.rnn_type == "LSTM": rnn_cell = L.LSTM elif self.rnn_type == "CuDNNLSTM": rnn_cell = L.CuDNNLSTM elif self.rnn_type == "CuDNNGRU": rnn_cell = L.CuDNNGRU else: rnn_cell = L.GRU # Bi-RNN(LSTM/GRU) for _ in range(self.rnn_layer_repeat): x = L.Bidirectional( rnn_cell(units=self.rnn_unit, return_sequences=True, activation=self.activate_mid))(outputs) x = L.Dropout(self.dropout)(x) # dense-mid x = L.Flatten()(x) x = L.Dense(units=min(max(self.label, 128), self.embed_size), activation=self.activate_mid)(x) x = L.Dropout(self.dropout)(x) # dense-end self.outputs = L.Dense(units=self.label, activation=self.activate_end)(x) self.model = M.Model(inputs=inputs, outputs=self.outputs) self.model.summary(132)
def build_model(self, inputs, outputs): # rnn type, RNN的类型 if self.rnn_type == "LSTM": layer_cell = L.LSTM else: layer_cell = L.GRU # backword, 反向 x_backwords = layer_cell( units=self.rnn_unit, return_sequences=True, kernel_regularizer=keras.regularizers.l2(0.32 * 0.1), recurrent_regularizer=keras.regularizers.l2(0.32), go_backwards=True)(outputs) x_backwords_reverse = L.Lambda(lambda x: K.reverse(x, axes=1))( x_backwords) # fordword, 前向 x_fordwords = layer_cell( units=self.rnn_unit, return_sequences=True, kernel_regularizer=keras.regularizers.l2(0.32 * 0.1), recurrent_regularizer=keras.regularizers.l2(0.32), go_backwards=False)(outputs) # concatenate, 拼接 x_feb = L.Concatenate(axis=2)( [x_fordwords, outputs, x_backwords_reverse]) # dropout, 随机失活 x_feb = L.Dropout(self.dropout)(x_feb) # Concatenate, 拼接后的embedding_size dim_2 = K.int_shape(x_feb)[2] x_feb_reshape = L.Reshape((self.length_max, dim_2, 1))(x_feb) # n-gram, conv, maxpool, 使用n-gram进行卷积和池化 conv_pools = [] for filter in self.filters_size: conv = L.Conv2D( filters=self.filters_num, kernel_size=(filter, dim_2), padding='valid', kernel_initializer='normal', activation='relu', )(x_feb_reshape) pooled = L.MaxPooling2D( pool_size=(self.length_max - filter + 1, 1), strides=(1, 1), padding='valid', )(conv) conv_pools.append(pooled) # concatenate, 拼接TextCNN x = L.Concatenate()(conv_pools) x = L.Dropout(self.dropout)(x) # dense-mid, 中间全连接到中间的隐藏元 x = L.Flatten()(x) x = L.Dense(units=min(max(self.label, 64), self.embed_size), activation=self.activate_mid)(x) x = L.Dropout(self.dropout)(x) # dense-end, 最后一层, dense到label self.outputs = L.Dense(units=self.label, activation=self.activate_end)(x) self.model = M.Model(inputs=inputs, outputs=self.outputs) self.model.summary(132)
def build_model(self, inputs, outputs): x_m = L.GlobalMaxPooling1D()(outputs) x_g = L.GlobalAveragePooling1D()(outputs) x = L.Concatenate()([x_g, x_m]) x = L.Dense(min(max(self.label, 128), self.embed_size), activation=self.activate_mid)(x) x = L.Dropout(self.dropout)(x) self.outputs = L.Dense(units=self.label, activation=self.activate_end)(x) self.model = M.Model(inputs=inputs, outputs=self.outputs) self.model.summary(132)
def viterbi_decode(self, nodes: np.array, trans: np.array) -> np.array: """ viterbi decode of CRF, 维特比解码, Viterbi算法求最优路径 code from url: https://github.com/bojone/bert4keras author : bojone Args: nodes: np.array, shape=[seq_len, num_labels], output of model predict trans: np.array, shape=[num_labels, num_labels], state transition matrix Returns: res: np.array, label of sequence """ labels = np.arange(len(self.l2i)).reshape((1, -1)) scores = nodes[0].reshape((-1, 1)) scores[1:] -= np.inf # 第一个标签必然是0 paths = labels for l in range(1, len(nodes)): M = scores + trans + nodes[l].reshape((1, -1)) idxs = M.argmax(0) scores = M.max(0).reshape((-1, 1)) path_idxs = paths[:, idxs] paths = np.concatenate([path_idxs, labels], 0) return paths[:, scores[0].argmax()]
def build_model(self, inputs, outputs): x = L.Dense(units=self.label, activation=self.activate_mid)(outputs) self.CRF = ConditionalRandomField(self.crf_lr_multiplier, name="crf_bert4keras") self.outputs = self.CRF(x) self.model = M.Model(inputs, self.outputs) self.model.summary(132) self.trans = K.eval(self.CRF.trans).tolist() self.loss = self.CRF.dense_loss if self.use_onehot else self.CRF.sparse_loss self.metrics = [ self.CRF.dense_accuracy if self.use_onehot else self.CRF.sparse_accuracy ]
def build_model(self, inputs, outputs): if self.embed_type in ["xlnet"]: # x = L.Lambda(lambda x: x[:, -2:-1, :])(outputs) # xlnet获取CLS x = L.Lambda(lambda x: x[:, -1], name="Token-CLS")(outputs) else: # x = L.Lambda(lambda x: x[:, 0:1, :])(outputs) # bert-like获取CLS x = L.Lambda(lambda x: x[:, 0], name="Token-CLS")(outputs) # x = L.Flatten()(x) # 最后就是softmax self.outputs = L.Dense( self.label, activation=self.activate_end, kernel_initializer=keras.initializers.TruncatedNormal( stddev=0.02))(x) self.model = M.Model(inputs, self.outputs) self.model.summary(132)
def build_model(self, inputs, outputs): x = L.SpatialDropout1D(self.dropout_spatial)(outputs) x = SelfAttention(K.int_shape(outputs)[-1])(x) x_max = L.GlobalMaxPooling1D()(x) x_avg = L.GlobalAveragePooling1D()(x) x = L.Concatenate()([x_max, x_avg]) x = L.Dropout(self.dropout)(x) x = L.Flatten()(x) # dense-mid x = L.Dense(units=min(max(self.label, 64), self.embed_size), activation=self.activate_mid)(x) x = L.Dropout(self.dropout)(x) # dense-end, 最后一层, dense到label self.outputs = L.Dense(units=self.label, activation=self.activate_end)(x) self.model = M.Model(inputs=inputs, outputs=self.outputs) self.model.summary(132)
def build_model(self, inputs, outputs): # rnn type, RNN的类型 if self.rnn_unit == "LSTM": layer_cell = L.LSTM elif self.rnn_unit == "CuDNNLSTM": layer_cell = L.CuDNNLSTM elif self.rnn_unit == "CuDNNGRU": layer_cell = L.CuDNNGRU else: layer_cell = L.GRU # embedding遮挡 embedding_output_spatial = L.SpatialDropout1D( self.dropout_spatial)(outputs) # CNN convs = [] for kernel_size in self.filters_size: conv = L.Conv1D( self.filters_num, kernel_size=kernel_size, strides=1, padding='SAME', kernel_regularizer=keras.regularizers.l2(self.l2), bias_regularizer=keras.regularizers.l2(self.l2), )(embedding_output_spatial) convs.append(conv) x = L.Concatenate(axis=1)(convs) # Bi-LSTM, 论文中使用的是LSTM x = L.Bidirectional( layer_cell(units=self.rnn_unit, return_sequences=True, activation='relu', kernel_regularizer=keras.regularizers.l2(self.l2), recurrent_regularizer=keras.regularizers.l2( self.l2)))(x) x = L.Dropout(self.dropout)(x) x = L.Flatten()(x) # dense-mid x = L.Dense(units=min(max(self.label, 64), self.embed_size), activation=self.activate_mid)(x) x = L.Dropout(self.dropout)(x) # dense-end, 最后一层, dense到label self.outputs = L.Dense(units=self.label, activation=self.activate_end)(x) self.model = M.Model(inputs=inputs, outputs=self.outputs) self.model.summary(132)
def build_model(self, inputs, outputs): # rnn type, RNN的类型 if self.rnn_unit == "LSTM": layer_cell = L.LSTM elif self.rnn_unit == "CuDNNLSTM": layer_cell = L.CuDNNLSTM elif self.rnn_unit == "CuDNNGRU": layer_cell = L.CuDNNGRU else: layer_cell = L.GRU x = L.Activation(self.activate_mid)(outputs) # embedding遮挡 x = L.SpatialDropout1D(self.dropout_spatial)(x) lstm_0_output = L.Bidirectional(layer_cell( units=self.rnn_unit, return_sequences=True, activation='relu', kernel_regularizer=keras.regularizers.l2(self.l2), recurrent_regularizer=keras.regularizers.l2(self.l2)), name="bi_lstm_0")(x) lstm_1_output = L.Bidirectional(layer_cell( units=self.rnn_unit, return_sequences=True, activation='relu', kernel_regularizer=keras.regularizers.l2(self.l2), recurrent_regularizer=keras.regularizers.l2(self.l2)), name="bi_lstm_1")(lstm_0_output) x = L.Concatenate()([lstm_1_output, lstm_0_output, x]) x = AttentionWeightedAverage(name='attlayer', return_attention=False)(x) x = L.Dropout(self.dropout)(x) x = L.Flatten()(x) # dense-mid x = L.Dense(units=min(max(self.label, 64), self.embed_size), activation=self.activate_mid)(x) x = L.Dropout(self.dropout)(x) # dense-end, 最后一层, dense到label self.outputs = L.Dense(units=self.label, activation=self.activate_end)(x) self.model = M.Model(inputs=inputs, outputs=self.outputs) self.model.summary(132)
def build_model(self, inputs, outputs): # rnn type, RNN的类型 pools = [] for i in range(len(self.filters_size)): # 第一个, 宽卷积, 动态k-max池化 conv_1 = wide_convolution( name="wide_convolution_{}".format(i), filter_num=self.filters_num, filter_size=self.filters_size[i][0])(outputs) top_k_1 = select_k(self.length_max, len(self.filters_size[i]), 1) # 求取k dynamic_k_max_pooled_1 = dynamic_k_max_pooling( top_k=top_k_1)(conv_1) # 第二个, 宽卷积, 动态k-max池化 conv_2 = wide_convolution( name="wide_convolution_{}_{}".format(i, i), filter_num=self.filters_num, filter_size=self.filters_size[i][1])(dynamic_k_max_pooled_1) top_k_2 = select_k(self.length_max, len(self.filters_size[i]), 2) dynamic_k_max_pooled_2 = dynamic_k_max_pooling( top_k=top_k_2)(conv_2) # 第三层, 宽卷积, Fold层, 动态k-max池化 conv_3 = wide_convolution( name="wide_convolution_{}_{}_{}".format(i, i, i), filter_num=self.filters_num, filter_size=self.filters_size[i][2])(dynamic_k_max_pooled_2) fold_conv_3 = prem_fold()(conv_3) top_k_3 = select_k(self.length_max, len(self.filters_size[i]), 3) # 求取k dynamic_k_max_pooled_3 = dynamic_k_max_pooling( top_k=top_k_3)(fold_conv_3) pools.append(dynamic_k_max_pooled_3) pools_concat = L.Concatenate(axis=1)(pools) pools_concat_dropout = L.Dropout(self.dropout)(pools_concat) x = L.Flatten()(pools_concat_dropout) # dense-end, 最后一层, dense到label self.outputs = L.Dense(units=self.label, activation=self.activate_end)(x) self.model = M.Model(inputs=inputs, outputs=self.outputs) self.model.summary(132)
def build_model(self, inputs, outputs): """ build_model. Args: inputs: tensor, input of model outputs: tensor, output of model Returns: None """ # CuDNNGRU or GRU x = None if self.rnn_type.upper() == "CUDNNGRU": rnn_cell = L.CuDNNGRU else: rnn_cell = L.GRU # Bi-GRU for nrl in range(self.num_rnn_layers): x = L.Bidirectional( rnn_cell( units=self.rnn_unit, return_sequences=True, activation=self.activate_mid, ))(outputs) x = L.Dropout(self.dropout)(x) if self.use_crf: x = L.Dense(units=self.label, activation=self.activate_end)(x) self.CRF = ConditionalRandomField(self.crf_lr_multiplier, name="crf_bert4keras") self.outputs = self.CRF(x) self.trans = K.eval(self.CRF.trans).tolist() self.loss = self.CRF.dense_loss if self.use_onehot else self.CRF.sparse_loss self.metrics = [ self.CRF.dense_accuracy if self.use_onehot else self.CRF.sparse_accuracy ] else: self.outputs = L.TimeDistributed( L.Dense(units=self.label, activation=self.activate_end))(x) self.model = M.Model(inputs, self.outputs) self.model.summary(132)
def build_model(self, inputs, outputs): x = None # cnn + pool for char_cnn_size in self.char_cnn_layers: x = L.Convolution1D( filters=char_cnn_size[0], kernel_size=char_cnn_size[1], )(outputs) x = L.ThresholdedReLU(self.threshold)(x) if char_cnn_size[2] != -1: x = L.MaxPooling1D(pool_size=char_cnn_size[2], strides=1)(x) x = L.Flatten()(x) # full-connect 2 for full in self.full_connect_layers: x = L.Dense(units=full, )(x) x = L.ThresholdedReLU(self.threshold)(x) x = L.Dropout(self.dropout)(x) # dense label self.outputs = L.Dense(units=self.label, activation=self.activate_end)(x) self.model = M.Model(inputs=inputs, outputs=self.outputs) self.model.summary(132)
def build_model(self, inputs, outputs): """ build_model. Args: inputs: tensor, input of model outputs: tensor, output of model Returns: None """ # LSTM or GRU if self.rnn_type == "LSTM": rnn_cell = L.LSTM elif self.rnn_type == "CuDNNLSTM": rnn_cell = L.CuDNNLSTM elif self.rnn_type == "CuDNNGRU": rnn_cell = L.CuDNNGRU else: rnn_cell = L.GRU # Bi-LSTM-LAN for nrl in range(self.num_rnn_layers): x = L.Bidirectional(rnn_cell(units=self.rnn_unit*(nrl+1), return_sequences=True, activation=self.activate_mid, ))(outputs) x_att = SelfAttention(K.int_shape(x)[-1])(x) outputs = L.Concatenate()([x, x_att]) outputs = L.Dropout(self.dropout)(outputs) if self.use_crf: x = L.Dense(units=self.label, activation=self.activate_end)(outputs) self.CRF = ConditionalRandomField(self.crf_lr_multiplier, name="crf_bert4keras") self.outputs = self.CRF(x) self.trans = K.eval(self.CRF.trans).tolist() self.loss = self.CRF.dense_loss if self.use_onehot else self.CRF.sparse_loss self.metrics = [self.CRF.dense_accuracy if self.use_onehot else self.CRF.sparse_accuracy] else: self.outputs = L.TimeDistributed(L.Dense(units=self.label, activation=self.activate_end))(outputs) self.model = M.Model(inputs, self.outputs) self.model.summary(132)
def load_model(self): """ load model of keras of h5 which include graph-node and custom_objects """ self.model = M.load_model(self.path_model_h5, compile=False)
def build_model(self, inputs, outputs): """ build_model. Args: inputs: tensor, input of model outputs: tensor, output of model Returns: None """ # CNN, 提取n-gram特征和最大池化, DGCNN膨胀卷积(IDCNN) conv_pools = [] for i in range(len(self.filters_size)): conv = L.Conv1D( name="conv-{0}-{1}".format(i, self.filters_size[i]), dilation_rate=self.atrous_rates[0], kernel_size=self.filters_size[i], activation=self.activate_mid, filters=self.filters_num, padding="SAME", )(outputs) for j in range(len(self.atrous_rates) - 1): conv = L.Conv1D( name="conv-{0}-{1}-{2}".format(i, self.filters_size[i], j), dilation_rate=self.atrous_rates[j], kernel_size=self.filters_size[i], activation=self.activate_mid, filters=self.filters_num, padding="SAME", )(conv) conv = L.Dropout( name="dropout-{0}-{1}-{2}".format(i, self.filters_size[i], j), rate=self.dropout, )(conv) conv_pools.append(conv) # 拼接 x = L.Concatenate(axis=-1)(conv_pools) x = L.Dropout(self.dropout)(x) # CRF or Dense if self.use_crf: x = L.Dense(units=self.label, activation=self.activate_end)(x) self.CRF = ConditionalRandomField(self.crf_lr_multiplier, name="crf_bert4keras") self.outputs = self.CRF(x) self.trans = K.eval(self.CRF.trans).tolist() self.loss = self.CRF.dense_loss if self.use_onehot else self.CRF.sparse_loss self.metrics = [ self.CRF.dense_accuracy if self.use_onehot else self.CRF.sparse_accuracy ] else: x = L.Bidirectional( L.GRU( activation=self.activate_mid, return_sequences=True, units=self.rnn_unit, name="bi-gru", ))(x) self.outputs = L.TimeDistributed( L.Dense( activation=self.activate_end, name="dense-output", units=self.label, ))(x) self.model = M.Model(inputs, self.outputs) self.model.summary(132)
logger.info("训练/验证语料读取完成") # 数据预处理类初始化 preprocess_xy = ListPrerocessXY(embed, train_data, path_dir=path_model_dir, length_max=length_max) x = L.Lambda(lambda x: x[:, 0], name="Token-CLS")(embed.model.output) # 最后就是softmax outputs = L.Dense( len(preprocess_xy.l2i), activation="softmax", kernel_initializer=keras.initializers.TruncatedNormal(stddev=0.02))(x) model = M.Model(embed.model.input, outputs) model.summary(132) model.compile(optimizer=O.Adam(lr=1e-5), loss="categorical_crossentropy", metrics=["accuracy"]) len_train_data = len(train_data) lg_train = ListGenerator(train_data, preprocess_xy, batch_size=batch_size, len_data=len_train_data) lg_dev = None # monitor是早停和保存模型的依据, "loss", "acc", "val_loss", "val_acc"等 monitor = "val_loss" if dev_data:
def build_model(self, inputs, outputs): """ build_model. Args: inputs: tensor, input of model outputs: tensor, output of model Returns: None """ # LSTM or GRU if self.rnn_type == "LSTM": rnn_cell = L.LSTM elif self.rnn_type == "CuDNNLSTM": rnn_cell = L.CuDNNLSTM elif self.rnn_type == "CuDNNGRU": rnn_cell = L.CuDNNGRU else: rnn_cell = L.GRU # CNN-LSTM, 提取n-gram特征和最大池化, 一般不用平均池化 conv_pools = [] for i in range(len(self.filters_size)): conv = L.Conv1D( name="conv-{0}-{1}".format(i, self.filters_size[i]), kernel_size=self.filters_size[i], activation=self.activate_mid, filters=self.filters_num, padding='same', )(outputs) conv_rnn = L.Bidirectional( rnn_cell( name="bi-lstm-{0}-{1}".format(i, self.filters_size[i]), activation=self.activate_mid, return_sequences=True, units=self.rnn_unit, ))(conv) x_dropout = L.Dropout(rate=self.dropout, name="dropout-{0}-{1}".format( i, self.filters_size[i]))(conv_rnn) conv_pools.append(x_dropout) # 拼接 x = L.Concatenate(axis=-1)(conv_pools) x = L.Dropout(self.dropout)(x) # CRF or Dense if self.use_crf: x = L.Dense(units=self.label, activation=self.activate_end)(x) self.CRF = ConditionalRandomField(self.crf_lr_multiplier, name="crf_bert4keras") self.outputs = self.CRF(x) self.trans = K.eval(self.CRF.trans).tolist() self.loss = self.CRF.dense_loss if self.use_onehot else self.CRF.sparse_loss self.metrics = [ self.CRF.dense_accuracy if self.use_onehot else self.CRF.sparse_accuracy ] else: self.outputs = L.TimeDistributed( L.Dense(units=self.label, activation=self.activate_end, name="dense-output"))(x) self.model = M.Model(inputs, self.outputs) self.model.summary(132)