Exemplo n.º 1
0
def build_model(embeddings=100,
                vocab_size=vocab_size,
                rnn_units=100,
                window_size=1):

    x_in = Input(shape=(None, ))

    output = WindowEmbedding(window_size,
                             input_dim=vocab_size,
                             output_dim=embeddings,
                             trainable=True,
                             mask_zero=True)(x_in)
    #     output=Embedding(input_dim=vocab_size, output_dim=embeddings,
    #                                             trainable=True, mask_zero=True)(x_in)

    output = Bidirectional(LSTM(units=rnn_units, return_sequences=True),
                           name='bilstm_1')(output)

    seq_output = Dropout(0.5)(output)

    output = Bidirectional(LSTM(units=rnn_units, return_sequences=True),
                           name='bilstm_2')(output)

    tag_output = Dropout(0.5)(output)

    seq_output = TimeDistributed(Dense(num_seglabels),
                                 name='dense_seq')(seq_output)

    tag_output = TimeDistributed(Dense(num_labels),
                                 name='dense_tag')(tag_output)

    Seq_crf = ConditionalRandomField(lr_multiplier=seq_crf_lr_multiplier,
                                     name='seq_crf')

    Tag_crf = ConditionalRandomField(lr_multiplier=tag_crf_lr_multiplier,
                                     name='tag_crf')

    seq_output = Seq_crf(seq_output)

    tag_output = Tag_crf(tag_output)

    model = Model(x_in, [seq_output, tag_output])
    model.summary()

    model.compile(
        loss=[Seq_crf.sparse_loss, Tag_crf.sparse_loss],
        #         optimizer=Adam(learing_rate),
        optimizer=build_optimizer('Adam'),
        metrics=[SparseAccuracy()])
    return model, Seq_crf, Tag_crf
Exemplo n.º 2
0
 def define_model(self) -> tf.keras.Model:
     model_name = self.bert4keras_model_name_dict[
         self.pre_trained_model_type]
     params = {
         "config_path": self.pre_trained_model_config_path,
         "checkpoint_path": self.pre_trained_model_checkpoint_path,
         "model": model_name,
     }
     if self.simplified_tokenizer:
         params["keep_tokens"] = self.keep_tokens
     self.transformer = build_transformer_model(**params)
     output_layer = "Transformer-%s-FeedForward-Norm" % (self.bert_layers -
                                                         1)
     output = self.transformer.get_layer(output_layer).output
     output = tf.keras.layers.Dense(len(self.categories) * 2 + 1)(output)
     self.CRF = ConditionalRandomField(lr_multiplier=self.crf_lr_multiplier,
                                       name="CRF")
     output = self.CRF(output)
     model = tf.keras.models.Model(self.transformer.input, output)
     self.NER = NamedEntityRecognizer(
         self.tokenizer,
         model,
         self.categories,
         trans=K.eval(self.CRF.trans),
         starts=[0],
         ends=[0],
     )
     return model
Exemplo n.º 3
0
    def build_albert_model(self):
        del self.albert_model
        file_name = f'albert_{self.pretrain_name}_pretrain.h5'  ##这里,为了方便预训练模型加载,我预先将加载后的预训练模型保存为了.h5
        if os.path.exists(file_name):
            pretrain_model = load_model(file_name, compile=False)
        else:
            pretrain_model = build_transformer_model(
                config_path=self.config,
                checkpoint_path=self.checkpoint,
                model='albert_unshared' if self.unshared else 'albert',
                return_keras_model=True)

        if not self.unshared:
            output_layer = 'Transformer-FeedForward-Norm'
            output = pretrain_model.get_layer(output_layer).get_output_at(
                self.albert_layers - 1)
        else:
            output_layer = 'Transformer-%s-FeedForward-Norm' % (
                self.albert_layers - 1)
            output = pretrain_model.get_layer(output_layer).output
        output = Dense(self.__num_labels)(output)
        self.__crf = ConditionalRandomField(
            lr_multiplier=self.crf_lr_multiplier)
        output = self.__crf(output)
        model = Model(pretrain_model.input, output)
        model.load_weights(self.weight_path)
        self._model = model
Exemplo n.º 4
0
 def __init__(self):
     self.CRF = ConditionalRandomField(
         lr_multiplier=config.crf_lr_multiplier)
     self.model = self.get_model()
     self.NER = NamedEntityRecognizer(trans=K.eval(self.CRF.trans),
                                      starts=[0],
                                      ends=[0])
Exemplo n.º 5
0
 def build_model(self, inputs, outputs):
     """
     build_model.
     Args:
         inputs: tensor, input of model
         outputs: tensor, output of model
     Returns:
         None
     """
     embed_char = outputs[0]
     embed_word = outputs[1]
     if self.wclstm_embed_type == "ATTNENTION":
         x_word = L.TimeDistributed(
             SelfAttention(K.int_shape(embed_word)[-1]))(embed_word)
         x_word_shape = K.int_shape(x_word)
         x_word = L.Reshape(target_shape=(x_word_shape[:2],
                                          x_word_shape[2] *
                                          x_word_shape[3]))
         x_word = L.Dense(self.embed_size,
                          activation=self.activate_mid)(x_word)
     # elif self.wclstm_embed_type == "SHORT":
     else:
         x_word = L.Lambda(lambda x: x[:, :, 0, :])(embed_word)
     outputs_concat = L.Concatenate(axis=-1)([embed_char, x_word])
     # LSTM or GRU
     if self.rnn_type == "LSTM":
         rnn_cell = L.LSTM
     elif self.rnn_type == "CuDNNLSTM":
         rnn_cell = L.CuDNNLSTM
     elif self.rnn_type == "CuDNNGRU":
         rnn_cell = L.CuDNNGRU
     else:
         rnn_cell = L.GRU
     # Bi-LSTM-CRF
     for nrl in range(self.num_rnn_layers):
         x = L.Bidirectional(
             rnn_cell(
                 units=self.rnn_unit * (nrl + 1),
                 return_sequences=True,
                 activation=self.activate_mid,
             ))(outputs_concat)
         outputs = L.Dropout(self.dropout)(x)
     if self.use_crf:
         x = L.Dense(units=self.label,
                     activation=self.activate_end)(outputs)
         self.CRF = ConditionalRandomField(self.crf_lr_multiplier,
                                           name="crf_bert4keras")
         self.outputs = self.CRF(x)
         self.trans = K.eval(self.CRF.trans).tolist()
         self.loss = self.CRF.dense_loss if self.use_onehot else self.CRF.sparse_loss
         self.metrics = [
             self.CRF.dense_accuracy
             if self.use_onehot else self.CRF.sparse_accuracy
         ]
     else:
         self.outputs = L.TimeDistributed(
             L.Dense(units=self.label,
                     activation=self.activate_end))(outputs)
     self.model = M.Model(inputs, self.outputs)
     self.model.summary(132)
Exemplo n.º 6
0
 def build_model(self):
     input = Input(shape=(140, ), dtype='float64')
     embeder = Embedding(len(self.token_dict),
                         300,
                         input_length=140,
                         trainable=False)
     embed = embeder(input)
     bilstm = Bidirectional(
         LSTM(units=300,
              return_sequences=True,
              dropout=0.5,
              recurrent_dropout=0.5))(embed)
     lstm = LSTM(units=300 * 2,
                 return_sequences=True,
                 dropout=0.5,
                 recurrent_dropout=0.5)(bilstm)
     out = TimeDistributed(Dense(17, activation='relu'))(lstm)
     CRF = ConditionalRandomField(17)
     out = CRF(out)
     model = Model(input, out)
     model.compile(optimizer='adam',
                   loss=CRF.sparse_loss,
                   metrics=[CRF.sparse_accuracy])
     model.summary()
     return model
def build_model(embeddings=100,vocab_size=vocab_size,rnn_units=100):
   
    x_in = Input(shape=(None,))
    output=Embedding(input_dim=vocab_size, output_dim=embeddings,
                                            trainable=True, mask_zero=True)(x_in)
    
    flstm_output=LSTM(units=rnn_units, return_sequences=True)(output)
    seq_output=Dropout(0.5)(flstm_output)
    seq_output=TimeDistributed(Dense(num_seglabels), name='dense_seq')(seq_output)
    
    Seq_crf = ConditionalRandomField(lr_multiplier=seq_crf_lr_multiplier,name='seq_crf')
    
    seq_output=Seq_crf(seq_output)
    
    
    reverse_output=Lambda(lambda x: K.reverse(x,axes=1))(output)
    
    reverse_output=LSTM(units=rnn_units, return_sequences=True)(reverse_output)
    
    blstm_output=Lambda(lambda x: K.reverse(x,axes=1))(reverse_output)
    
    lstm_out=Concatenate()([flstm_output,blstm_output])
    tag_output=Dropout(0.5)(lstm_out)
    tag_output=TimeDistributed(Dense(num_labels), name='dense_tag')(tag_output)
    
    Tag_crf = ConditionalRandomField(lr_multiplier=tag_crf_lr_multiplier,name='tag_crf')
    
    tag_output=Tag_crf(tag_output)
    
    

    model = Model(x_in, [seq_output,tag_output])
    model.summary()

    model.compile(
        loss=[Seq_crf.sparse_loss,Tag_crf.sparse_loss],
        optimizer=Adam(learing_rate),
        metrics=[SparseAccuracy()]
    )
    return model,Seq_crf,Tag_crf
Exemplo n.º 8
0
 def build_model(self, inputs, outputs):
     x = L.Dense(units=self.label, activation=self.activate_mid)(outputs)
     self.CRF = ConditionalRandomField(self.crf_lr_multiplier,
                                       name="crf_bert4keras")
     self.outputs = self.CRF(x)
     self.model = M.Model(inputs, self.outputs)
     self.model.summary(132)
     self.trans = K.eval(self.CRF.trans).tolist()
     self.loss = self.CRF.dense_loss if self.use_onehot else self.CRF.sparse_loss
     self.metrics = [
         self.CRF.dense_accuracy
         if self.use_onehot else self.CRF.sparse_accuracy
     ]
Exemplo n.º 9
0
 def build_model(self):
     model = build_transformer_model(
         self.bert_config_path,
         self.bert_checkpoint_path,
     )
     output_layer = 'Transformer-%s-FeedForward-Norm' % (self.bert_layers -
                                                         1)
     output = model.get_layer(output_layer).output
     output = Dense(self.num_classes)(output)
     self.CRF = ConditionalRandomField(lr_multiplier=self.crf_lr_multiplier)
     output = self.CRF(output)
     self.model = Model(model.input, output)
     self.model_ = multi_gpu_model(self.model, gpus=2)
     self.model.summary(120)
     logger.info('build model done')
Exemplo n.º 10
0
def build_crf_adversarial_bert(num_labels, model_name='electra'):
    model = build_transformer_model(config_path,
                                    checkpoint_path,
                                    model=model_name)
    for layer in model.layers:
        layer.trainable = True
    output = Dense(num_labels)(model.output)
    CRF = ConditionalRandomField(lr_multiplier=crf_lr_multiplier)
    output = CRF(output)

    model = Model(model.input, output)

    model.compile(loss=CRF.sparse_loss,
                  optimizer=Adam(learning_rate),
                  metrics=[CRF.sparse_accuracy])

    return model, CRF
Exemplo n.º 11
0
def build_bert(num_labels):
    model = build_transformer_model(config_path,
                                    checkpoint_path)  # ,model = 'electra')
    for layer in model.layers:
        layer.trainable = True
    # bilstm = Bidirectional(GRU(200, return_sequences=True))(model.output)
    #     bilstm = SpatialDropout1D(0.5)(bilstm)
    output = Dense(num_labels)(model.output)
    CRF = ConditionalRandomField(lr_multiplier=crf_lr_multiplier)
    output = CRF(output)

    model = Model(model.input, output)
    # model.summary()

    # model = multi_gpu_model(model, gpus= 2)
    model.compile(loss=CRF.sparse_loss,
                  optimizer=Adam(learning_rate),
                  metrics=[CRF.sparse_accuracy])
    return model, CRF
Exemplo n.º 12
0
    def build_model(self):
        model = build_transformer_model(
            self.config_path,
            self.checkpoint_path,
            model='electra'
        )
        output_layer = 'Transformer-%s-FeedForward-Norm' % (12 - 1)
        output = model.get_layer(output_layer).output
        output = Dense(11)(output)
        self.CRF = ConditionalRandomField(lr_multiplier=100)
        output = self.CRF(output)

        model = Model(model.input, output)
        model.summary()
        model.compile(loss=self.CRF.sparse_loss,
                      optimizer=Adam(1e-4),
                      metrics=[self.CRF.sparse_accuracy]
                      )
        return model
Exemplo n.º 13
0
 def build_model(self):
     """
     建模,加载bert预训练模型,并在最后几层进行微调
     :return:
     """
     bert_model = build_transformer_model(config_path=args.BERT_CONFIG,
                                          checkpoint_path=args.BERT_MODEL)
     output = bert_model.get_layer(args.BERT_LAYER).output
     output = Dropout(rate=0.5)(output)
     output = Dense(_labels_num)(output)
     CRF = ConditionalRandomField(lr_multiplier=1)
     p = CRF(output)
     model = Model(bert_model.input,p)
     model.compile(
         loss=CRF.sparse_loss,
         optimizer=Adam(lr=1e-5),
         metrics=[CRF.sparse_accuracy]
     )
     model.summary()
     return model
Exemplo n.º 14
0
def bertmodel():
    model = build_transformer_model(
        config_path,
        checkpoint_path,
    )
    output_layer = 'Transformer-%s-FeedForward-Norm' % (bert_layers - 1)
    output = model.get_layer(output_layer).output
    output = Dense(num_labels)(output)  # 27分类

    CRF = ConditionalRandomField(lr_multiplier=crf_lr_multiplier)
    output = CRF(output)

    model = Model(model.input, output)
    #     model.summary()

    model.compile(
        loss=CRF.sparse_loss,
        optimizer=Adam(learing_rate),
        metrics=[CRF.sparse_accuracy]
    )
    return model, CRF
Exemplo n.º 15
0
 def build_model(self, inputs, outputs):
     """
     build_model.
     Args:
         inputs: tensor, input of model
         outputs: tensor, output of model
     Returns:
         None
     """
     # CuDNNGRU or GRU
     x = None
     if self.rnn_type.upper() == "CUDNNGRU":
         rnn_cell = L.CuDNNGRU
     else:
         rnn_cell = L.GRU
     # Bi-GRU
     for nrl in range(self.num_rnn_layers):
         x = L.Bidirectional(
             rnn_cell(
                 units=self.rnn_unit,
                 return_sequences=True,
                 activation=self.activate_mid,
             ))(outputs)
         x = L.Dropout(self.dropout)(x)
     if self.use_crf:
         x = L.Dense(units=self.label, activation=self.activate_end)(x)
         self.CRF = ConditionalRandomField(self.crf_lr_multiplier,
                                           name="crf_bert4keras")
         self.outputs = self.CRF(x)
         self.trans = K.eval(self.CRF.trans).tolist()
         self.loss = self.CRF.dense_loss if self.use_onehot else self.CRF.sparse_loss
         self.metrics = [
             self.CRF.dense_accuracy
             if self.use_onehot else self.CRF.sparse_accuracy
         ]
     else:
         self.outputs = L.TimeDistributed(
             L.Dense(units=self.label, activation=self.activate_end))(x)
     self.model = M.Model(inputs, self.outputs)
     self.model.summary(132)
Exemplo n.º 16
0
 def build_model(self, inputs, outputs):
     """
     build_model.
     Args:
         inputs: tensor, input of model
         outputs: tensor, output of model
     Returns:
         None
     """
     # LSTM or GRU
     if self.rnn_type == "LSTM":
         rnn_cell = L.LSTM
     elif self.rnn_type == "CuDNNLSTM":
         rnn_cell = L.CuDNNLSTM
     elif self.rnn_type == "CuDNNGRU":
         rnn_cell = L.CuDNNGRU
     else:
         rnn_cell = L.GRU
     # Bi-LSTM-LAN
     for nrl in range(self.num_rnn_layers):
         x = L.Bidirectional(rnn_cell(units=self.rnn_unit*(nrl+1),
                                      return_sequences=True,
                                      activation=self.activate_mid,
                                      ))(outputs)
         x_att = SelfAttention(K.int_shape(x)[-1])(x)
         outputs = L.Concatenate()([x, x_att])
         outputs = L.Dropout(self.dropout)(outputs)
     if self.use_crf:
         x = L.Dense(units=self.label, activation=self.activate_end)(outputs)
         self.CRF = ConditionalRandomField(self.crf_lr_multiplier, name="crf_bert4keras")
         self.outputs = self.CRF(x)
         self.trans = K.eval(self.CRF.trans).tolist()
         self.loss = self.CRF.dense_loss if self.use_onehot else self.CRF.sparse_loss
         self.metrics = [self.CRF.dense_accuracy if self.use_onehot else self.CRF.sparse_accuracy]
     else:
         self.outputs = L.TimeDistributed(L.Dense(units=self.label, activation=self.activate_end))(outputs)
     self.model = M.Model(inputs, self.outputs)
     self.model.summary(132)
                batch_labels.append(labels)
                if len(batch_token_ids) == self.batch_size or is_end:
                    batch_token_ids = sequence_padding(batch_token_ids)
                    batch_segment_ids = sequence_padding(batch_segment_ids)
                    batch_labels = sequence_padding(batch_labels)
                    yield [batch_token_ids, batch_segment_ids], batch_labels
                    batch_token_ids, batch_segment_ids, batch_labels = [], [], []


model = build_transformer_model(
    config_path,
    checkpoint_path,
)

output = Dense(num_labels)(model.output)
CRF = ConditionalRandomField(lr_multiplier=crf_lr_multiplier)
output = CRF(output)

model = Model(model.input, output)
model.summary()

model.compile(
    loss=CRF.sparse_loss,
    optimizer=Adam(learning_rate),
    metrics=[CRF.sparse_accuracy]
)

def viterbi_decode(nodes, trans):
    """Viterbi算法求最优路径
    其中nodes.shape=[seq_len, num_labels],
        trans.shape=[num_labels, num_labels].
Exemplo n.º 18
0
 def build_model(self, inputs, outputs):
     """
     build_model.
     Args:
         inputs: tensor, input of model
         outputs: tensor, output of model
     Returns:
         None
     """
     # CNN, 提取n-gram特征和最大池化, DGCNN膨胀卷积(IDCNN)
     conv_pools = []
     for i in range(len(self.filters_size)):
         conv = L.Conv1D(
             name="conv-{0}-{1}".format(i, self.filters_size[i]),
             dilation_rate=self.atrous_rates[0],
             kernel_size=self.filters_size[i],
             activation=self.activate_mid,
             filters=self.filters_num,
             padding="SAME",
         )(outputs)
         for j in range(len(self.atrous_rates) - 1):
             conv = L.Conv1D(
                 name="conv-{0}-{1}-{2}".format(i, self.filters_size[i], j),
                 dilation_rate=self.atrous_rates[j],
                 kernel_size=self.filters_size[i],
                 activation=self.activate_mid,
                 filters=self.filters_num,
                 padding="SAME",
             )(conv)
             conv = L.Dropout(
                 name="dropout-{0}-{1}-{2}".format(i, self.filters_size[i],
                                                   j),
                 rate=self.dropout,
             )(conv)
         conv_pools.append(conv)
     # 拼接
     x = L.Concatenate(axis=-1)(conv_pools)
     x = L.Dropout(self.dropout)(x)
     # CRF or Dense
     if self.use_crf:
         x = L.Dense(units=self.label, activation=self.activate_end)(x)
         self.CRF = ConditionalRandomField(self.crf_lr_multiplier,
                                           name="crf_bert4keras")
         self.outputs = self.CRF(x)
         self.trans = K.eval(self.CRF.trans).tolist()
         self.loss = self.CRF.dense_loss if self.use_onehot else self.CRF.sparse_loss
         self.metrics = [
             self.CRF.dense_accuracy
             if self.use_onehot else self.CRF.sparse_accuracy
         ]
     else:
         x = L.Bidirectional(
             L.GRU(
                 activation=self.activate_mid,
                 return_sequences=True,
                 units=self.rnn_unit,
                 name="bi-gru",
             ))(x)
         self.outputs = L.TimeDistributed(
             L.Dense(
                 activation=self.activate_end,
                 name="dense-output",
                 units=self.label,
             ))(x)
     self.model = M.Model(inputs, self.outputs)
     self.model.summary(132)
domain_output = Dense(domain_num,
                      activation='softmax',
                      kernel_initializer=electra_model.initializer,
                      name='domain_classifier')(classify_output)
domain_model = Model(electra_model.input, domain_output)

# 意图识别模型
intent_output = Dense(intent_num,
                      activation='softmax',
                      kernel_initializer=electra_model.initializer,
                      name='intent_classifier')(classify_output)
intent_model = Model(electra_model.model.input, intent_output)

# 槽填充
x = Dense(slot_num)(electra_model.model.output)
CRF = ConditionalRandomField(lr_multiplier=lr_multiplier, name='slots_tagger')
slot_output = CRF(x)
slot_model = Model(electra_model.input, slot_output)

# 训练模型
train_model = Model(
    electra_model.input + [domain_labels, intent_labels, slot_labels],
    [domain_output, intent_output, slot_output])

mask = Lambda(lambda x: K.cast(K.greater(x, 0), 'float32'))(slot_labels)

# intent_loss
intent_loss = K.sparse_categorical_crossentropy(intent_labels, intent_output)

# domain_loss
domain_loss = K.sparse_categorical_crossentropy(domain_labels, domain_output)
Exemplo n.º 20
0
classify_output = Lambda(lambda x: x[:, 0], name='CLS-token')(electra_model.model.output)

# 领域识别模型
domain_output = Dense(domain_num, activation='softmax', kernel_initializer=electra_model.initializer,
                      name='domain_classifier')(classify_output)
domain_model = Model(electra_model.input, domain_output)

# 意图识别模型
intent_output = Dense(intent_num, activation='softmax', kernel_initializer=electra_model.initializer,
                      name='intent_classifier')(classify_output)
intent_model = Model(electra_model.model.input, intent_output)

# 槽填充
x = Dense(slot_num)(electra_model.model.output)
CRF = ConditionalRandomField(lr_multiplier=lr_multiplier, name='slots_tagger')
slot_output = CRF(x)
slot_model = Model(electra_model.input, slot_output)

# 训练模型
train_model = Model(
    # electra_model.input + [intent_labels, domain_labels, slot_labels],
    electra_model.model.input,
    [domain_output, intent_output, slot_output]
)

loss = {
    "domain_classifier": 'sparse_categorical_crossentropy',
    "intent_classifier": 'sparse_categorical_crossentropy',
    'slots_tagger': CRF.sparse_loss
}
Exemplo n.º 21
0
 def build_model(self, inputs, outputs):
     """
     build_model.
     Args:
         inputs: tensor, input of model
         outputs: tensor, output of model
     Returns:
         None
     """
     # LSTM or GRU
     if self.rnn_type == "LSTM":
         rnn_cell = L.LSTM
     elif self.rnn_type == "CuDNNLSTM":
         rnn_cell = L.CuDNNLSTM
     elif self.rnn_type == "CuDNNGRU":
         rnn_cell = L.CuDNNGRU
     else:
         rnn_cell = L.GRU
     # CNN-LSTM, 提取n-gram特征和最大池化, 一般不用平均池化
     conv_pools = []
     for i in range(len(self.filters_size)):
         conv = L.Conv1D(
             name="conv-{0}-{1}".format(i, self.filters_size[i]),
             kernel_size=self.filters_size[i],
             activation=self.activate_mid,
             filters=self.filters_num,
             padding='same',
         )(outputs)
         conv_rnn = L.Bidirectional(
             rnn_cell(
                 name="bi-lstm-{0}-{1}".format(i, self.filters_size[i]),
                 activation=self.activate_mid,
                 return_sequences=True,
                 units=self.rnn_unit,
             ))(conv)
         x_dropout = L.Dropout(rate=self.dropout,
                               name="dropout-{0}-{1}".format(
                                   i, self.filters_size[i]))(conv_rnn)
         conv_pools.append(x_dropout)
     # 拼接
     x = L.Concatenate(axis=-1)(conv_pools)
     x = L.Dropout(self.dropout)(x)
     # CRF or Dense
     if self.use_crf:
         x = L.Dense(units=self.label, activation=self.activate_end)(x)
         self.CRF = ConditionalRandomField(self.crf_lr_multiplier,
                                           name="crf_bert4keras")
         self.outputs = self.CRF(x)
         self.trans = K.eval(self.CRF.trans).tolist()
         self.loss = self.CRF.dense_loss if self.use_onehot else self.CRF.sparse_loss
         self.metrics = [
             self.CRF.dense_accuracy
             if self.use_onehot else self.CRF.sparse_accuracy
         ]
     else:
         self.outputs = L.TimeDistributed(
             L.Dense(units=self.label,
                     activation=self.activate_end,
                     name="dense-output"))(x)
     self.model = M.Model(inputs, self.outputs)
     self.model.summary(132)
Exemplo n.º 22
0
# tf.disable_v2_behavior()  # 禁用2.0版本的方法

maxlen = 250
epochs = 10
batch_size = 1
bert_layers = 24
learing_rate = 1e-5  # bert_layers越小,学习率应该要越大
crf_lr_multiplier = 1000  # 必要时扩大CRF层的学习率

config_path = './chinese_roberta_wwm_large_ext_L-24_H-1024_A-16/bert_config.json'
checkpoint_path = './chinese_roberta_wwm_large_ext_L-24_H-1024_A-16/bert_model.ckpt'
dict_path = './chinese_roberta_wwm_large_ext_L-24_H-1024_A-16/vocab.txt'

# print(tf.test.is_gpu_available())
# print(tf.config.list_physical_devices('GPU'))
CRF = ConditionalRandomField(
    lr_multiplier=crf_lr_multiplier)  # CRF层本质上是一个带训练参数的loss计算层
# 建立分词器
tokenizer = Tokenizer(dict_path, do_lower_case=True)
# 标注数据
loader = tcm.TCM()


def train():
    train_data = loader.load_data('./round1_train/data/train.txt'
                                  )  # 第一个维度为所有训练样本中句子个数,第二个维度是每个句子所包含的(实体,类别)数
    valid_data = loader.load_data('./round1_train/data/val.txt')

    global train_generator
    train_generator = generator.Generator(train_data=train_data,
                                          batch_size=batch_size,
                                          tokenizer=tokenizer,
Exemplo n.º 23
0
print(train_inputs[0][0])
print(train_inputs[1][0])

bert_path = '../tfhub/chinese_roberta_wwm_ext_L-12_H-768_A-12'

from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import Callback
from bert4keras.backend import keras, K
from bert4keras.layers import ConditionalRandomField
from bert4keras.models import build_transformer_model
from bert4keras.snippets import ViterbiDecoder, to_array

bert = build_transformer_model(bert_path, return_keras_model=False)

output = Dense(len(y2id))(bert.model.output)
CRF = ConditionalRandomField(lr_multiplier=1000)
output = CRF(output)

model = tf.keras.models.Model(inputs=bert.model.input, outputs=output)

bt.lock_transformer_layers(bert, 8)

epochs = 2
batch_size = 32
total_steps = epochs * train_inputs[0].shape[0] // batch_size
optimizer = bt.get_suggested_optimizer(1e-4, total_steps=total_steps)
model.compile(optimizer, loss=CRF.sparse_loss, metrics=[CRF.sparse_accuracy])

#model.summary()

from seqeval.metrics import f1_score, accuracy_score, classification_report