Пример #1
0
def get_model():
    bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path)
    for l in bert_model.layers:
        l.trainable = True

    T1 = Input(shape=(None,))
    T2 = Input(shape=(None,))

    T = bert_model([T1, T2])

    T = Lambda(lambda x: x[:, 0])(T)
    output = Dense(64, activation='relu')(T)
    output = Dense(4, activation='softmax')(output)
    model = Model([T1, T2], output)
    model.compile(
        loss='categorical_crossentropy',
        optimizer=Adam(learning_rate),  # 用足够小的学习率
        metrics=['accuracy']
    )
    model.summary()
    return model
 def create_model(self):
     model_path = "./{}/".format(BASE_MODEL_DIR)
     bert = load_trained_model_from_checkpoint(
         model_path + "bert_config.json",
         model_path + "bert_model.ckpt",
         seq_len=self.max_seq_length
     )
     # make bert layer trainable
     for layer in bert.layers:
         layer.trainable = True
     # x1 = Input(shape=(None,))
     # x2 = Input(shape=(None,))
     # bert_out = bert([x1, x2])
     lstm_out = Bidirectional(LSTM(self.lstmDim,
                                   return_sequences=True,
                                   dropout=0.2,
                                   recurrent_dropout=0.2))(bert.output)
     crf_out = CRF(len(self.label), sparse_target=True)(lstm_out)
     model = Model(bert.input, crf_out)
     model.summary()
     return model
Пример #3
0
 def __init__(self, params):
     self.params = params
     self.input_size = 768
     self.tagger = None
     self.maxclauselen = None
     self.maxseqlen = None
     pretrained_path = self.params["repfile"]
     config_path = os.path.join(pretrained_path, 'bert_config.json')
     checkpoint_path = os.path.join(pretrained_path, 'bert_model.ckpt')
     vocab_path = os.path.join(pretrained_path, 'vocab.txt')
     
     self.bert = load_trained_model_from_checkpoint(config_path, checkpoint_path)
     #self.bert._make_predict_function() # Crucial step, otherwise TF will give error.
     #self.bert.Model.make_predict_function()
     
     token_dict = {}
     with codecs.open(vocab_path, 'r', 'utf8') as reader:
         for line in reader:
             token = line.strip()
             token_dict[token] = len(token_dict)
     self.tokenizer = Tokenizer(token_dict)    
Пример #4
0
def build_bert(nclass):
    bert_model = load_trained_model_from_checkpoint(config_path,
                                                    checkpoint_path,
                                                    seq_len=None)

    for l in bert_model.layers:
        l.trainable = True

    x1_in = Input(shape=(None, ))
    x2_in = Input(shape=(None, ))

    x = bert_model([x1_in, x2_in])
    x = Lambda(lambda x: x[:, 0])(x)
    p = Dense(nclass, activation='softmax')(x)

    model = Model([x1_in, x2_in], p)
    model.compile(loss='categorical_crossentropy',
                  optimizer=Adam(1e-5),
                  metrics=['accuracy', acc_top2])
    print(model.summary())
    return model
Пример #5
0
def build_bert(nclass):
  global lr_rate
  bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path, seq_len=None)  #加载预训练模型

  for l in bert_model.layers:
    l.trainable = True

  x1_in = Input(shape=(None,))
  x2_in = Input(shape=(None,))

  x = bert_model([x1_in, x2_in])
  x = Lambda(lambda x: x[:, 0])(x) #取出[CLS]对应的向量用来做分类
  p = Dense(nclass, activation='softmax')(x) #直接dense层softmax输出

  model = Model([x1_in, x2_in], p)
  model.compile(loss='categorical_crossentropy',
                optimizer=Adam(lr_rate),    #用足够小的学习率
                metrics=['accuracy', f1])#acc_top2

  print(model.summary())
  return model
Пример #6
0
    def build_model(self):
        bert_model = load_trained_model_from_checkpoint(
            self.config_path, self.checkpoint_path)

        for l in bert_model.layers:
            l.trainable = True

        x1_in = Input(shape=(None, ))
        x2_in = Input(shape=(None, ))

        x = bert_model([x1_in, x2_in])
        x = Lambda(lambda x: x[:, 0])(x)  # 取出[CLS]对应的向量用来做分类
        p = Dense(1, activation='sigmoid')(x)

        model = Model([x1_in, x2_in], p)
        model.compile(
            loss='binary_crossentropy',
            optimizer=Adam(1e-5),  # 用足够小的学习率
            metrics=['accuracy'])
        model.summary()
        return model
Пример #7
0
    def creat_model(self):
        print('load bert Model start!')
        model = keras_bert.load_trained_model_from_checkpoint(
            self.config_path,
            checkpoint_file=self.check_point_path,
            seq_len=self.max_len,
            trainable=True)
        print('load bert Model end!')
        inputs = model.inputs
        embedding = model.output
        x = Bidirectional(LSTM(units=self.rnn_units,
                               return_sequences=True))(embedding)
        x = Dropout(self.drop_rate)(x)
        x = Dense(self.n_class)(x)
        self.crf = CRF(self.n_class, sparse_target=False)
        x = self.crf(x)
        self.model = Model(inputs=inputs, outputs=x)
        self.model.summary()
        self.compile()

        return self.model
Пример #8
0
def bert_model():
    bert_model = load_trained_model_from_checkpoint(config_path,
                                                    checkpoint_path,
                                                    seq_len=None)

    for l in bert_model.layers:
        l.trainable = True

    x1_in = Input(shape=(None, ))  # 待识别句子输入
    x2_in = Input(shape=(None, ))  # 待识别句子输入
    s1_in = Input(shape=(None, ))  # 实体左边界(标签)
    s2_in = Input(shape=(None, ))  # 实体右边界(标签)

    x1, x2, s1, s2 = x1_in, x2_in, s1_in, s2_in
    x_mask = Lambda(
        lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(x1)

    x = bert_model([x1, x2])
    ps1 = Dense(1, use_bias=False)(x)
    ps1 = Lambda(lambda x: x[0][..., 0] - (1 - x[1][..., 0]) * 1e10)(
        [ps1, x_mask])
    ps2 = Dense(1, use_bias=False)(x)
    ps2 = Lambda(lambda x: x[0][..., 0] - (1 - x[1][..., 0]) * 1e10)(
        [ps2, x_mask])

    model = Model([x1_in, x2_in], [ps1, ps2])
    model = multi_gpu_model(model, gpus=2)

    train_model = Model([x1_in, x2_in, s1_in, s2_in], [ps1, ps2])

    loss1 = K.mean(K.categorical_crossentropy(s1_in, ps1, from_logits=True))
    ps2 -= (1 - K.cumsum(s1, 1)) * 1e10
    loss2 = K.mean(K.categorical_crossentropy(s2_in, ps2, from_logits=True))
    loss = loss1 + loss2

    train_model.add_loss(loss)
    train_model = multi_gpu_model(train_model, gpus=2)
    train_model.compile(optimizer=Adam(learning_rate))
    #     train_model.summary()
    return model, train_model
Пример #9
0
 def test_load_output_layer_num(self):
     current_path = os.path.dirname(os.path.abspath(__file__))
     config_path = os.path.join(current_path, 'test_checkpoint', 'bert_config.json')
     model_path = os.path.join(current_path, 'test_checkpoint', 'bert_model.ckpt')
     model = load_trained_model_from_checkpoint(config_path, model_path, training=False, output_layer_num=4)
     model.summary()
     model = load_trained_model_from_checkpoint(config_path, model_path, training=False, output_layer_num=[0])
     model.summary()
     model = load_trained_model_from_checkpoint(config_path, model_path, training=False, output_layer_num=[1])
     model.summary()
     model = load_trained_model_from_checkpoint(config_path, model_path, training=False, output_layer_num=[-1])
     model.summary()
     model = load_trained_model_from_checkpoint(config_path, model_path, training=False, output_layer_num=[-2])
     model.summary()
     model = load_trained_model_from_checkpoint(config_path, model_path, training=False, output_layer_num=[0, -1])
     model.summary()
Пример #10
0
    def get_multi_model(self, weight=None):
        bert_model = load_trained_model_from_checkpoint(self.config_path,
                                                        self.checkpoint_path,
                                                        seq_len=None)
        le = 1e-3
        if self.trainable:
            le = 1e-4
            for l in bert_model.layers:
                l.trainable = True
        x1_in = Input(shape=(None, ))
        x2_in = Input(shape=(None, ))

        x = bert_model([x1_in, x2_in])
        x = Lambda(lambda x: x[:, 0])(x)
        if not self.trainable:
            x = Dense(128,
                      activation='relu',
                      name="fc1",
                      kernel_regularizer=regularizers.l2(0.01))(x)
            x = Dropout(0.3)(x)
        predict1 = Dense(self.class_num[0],
                         activation='softmax',
                         name="label1")(x)
        predict2 = Dense(self.class_num[1],
                         activation='softmax',
                         name="label2")(x)

        model = Model([x1_in, x2_in], [predict1, predict2], name="multi_model")
        model.summary()
        if weight != None:
            print('loading pre_train weight...')
            model.load_weights(weight, by_name=True)
            print('Done!')

        model.compile(
            loss=['categorical_crossentropy', 'categorical_crossentropy'],
            optimizer=Adam(le),  # 用足够小的学习率
            metrics=['accuracy', 'accuracy'])

        return model
Пример #11
0
def build_bert(nclass):
    """
    参考:https://kexue.fm/archives/6736
    :param nclass: 文本分类种类
    :return: 构建的bert模型
    """
    # 注意,尽管可以设置seq_len=None,但是仍要保证序列长度不超过512
    # 真正调用Bert的也就只有load_trained_model_from_checkpoint 一行代码,剩下的只是普通的Keras操作
    bert_model = load_trained_model_from_checkpoint(config_path,
                                                    checkpoint_path,
                                                    seq_len=None)  # 加载预训练模型

    for l in bert_model.layers:
        l.trainable = True
    # 构建模型
    x1_in = Input(shape=(None, ))
    x2_in = Input(shape=(None, ))

    # “有什么原则来指导Bert后面应该要接哪些层?”。答案是:用尽可能少的层 来完成你的任务。
    # 比如上述情感分析 只是一个二分类任务,你就取出第一个向量然后加个Dense(1)就好了,
    # 不要想着多加几层Dense,更加不要想着接个LSTM再接Dense;
    # 如果你要做序列标注(比如NER),那你就接个Dense+CRF就好,也不要多加其他东西。
    # 总之,额外加的东西尽可能少。一是因为Bert本身就足够复杂,它有足够能力应对你要做的很多任务;
    # 二来你自己加的层都是随机初始化的,加太多会对Bert的预训练权重造成剧烈扰动,容易降低效果甚至造成模型不收敛

    # 这里x1_in,x2_in 作为bert_model的输入是什么意思?引入了Bert作为编码器
    x = bert_model([x1_in, x2_in])
    # Wraps arbitrary expressions as a Layer object.
    x = Lambda(lambda x: x[:, 0])(x)  # 取出[CLS]对应的向量 用来做分类
    p = Dense(nclass, activation='softmax')(x)
    # 参考:https://keras.io/api/models/model/#model-class
    # Model groups layers into an object with training and inference features.
    # 只需要将输入层和输出层作为参数,
    model = Model([x1_in, x2_in], p)
    model.compile(
        loss='categorical_crossentropy',
        optimizer=Adam(1e-5),  # 用足够小的学习率
        metrics=['accuracy', acc_top2])
    print(model.summary())
    return model
def create_text_match_model(num_labels):
    bert_model = load_trained_model_from_checkpoint(config_path,
                                                    checkpoint_path,
                                                    seq_len=maxlen)

    for layer in bert_model.layers:
        layer.trainable = True

    # Add Bi-LSTM layer
    bi_lstm = Bidirectional(LSTM(64, return_sequences=True))(bert_model.output)
    bi_lstm = Lambda(lambda x: x, output_shape=lambda s: s)(bi_lstm)
    print(bi_lstm.shape)
    # Applying hybrid pooling approach to bi_lstm sequence output
    avg_pool = GlobalAveragePooling1D()(bi_lstm)
    max_pool = GlobalMaxPooling1D()(bi_lstm)
    concat = concatenate([avg_pool, max_pool])
    # dropout = Dropout(0.3)(concat)
    output = Dense(num_labels, activation='softmax')(concat)
    model = Model(bert_model.input, output)
    model.summary()

    return model
Пример #13
0
    def build(config):
        bert_config_path = os.path.join(config.pretrained_path,
                                        'bert_config.json')
        bert_checkpoint_path = os.path.join(config.pretrained_path,
                                            'bert_model.ckpt')
        bert_model = load_trained_model_from_checkpoint(bert_config_path,
                                                        bert_checkpoint_path,
                                                        seq_len=None)

        for l in bert_model.layers:
            l.trainable = True
        x1_in = Input(shape=(None, ))  # token ids input
        x2_in = Input(shape=(None, ))  # segment ids input

        x = bert_model([x1_in, x2_in])
        x = Lambda(lambda x: x[:, 0])(x)  # get first token embedding
        x = Dropout(config.hidden_dropout_prob)(x)
        p = Dense(2, activation='softmax')(x)

        model = Model([x1_in, x2_in], p)

        return model
Пример #14
0
	def _model_compile_(self):
		layerN = 12
		bert_model = load_trained_model_from_checkpoint(
			os.path.join(self.pretrain_model_dir, "bert_config.json"),
			os.path.join(self.pretrain_model_dir, "bert_model.ckpt"),
			seq_len=None
		)

		for l in bert_model.layers:
			l.trainable = True
		
		x = Lambda(lambda x: x[:, 0])(bert_model.output)
		prob = Dense(self.n_classes, activation='softmax')(x)
		model = Model(inputs=bert_model.inputs, outputs=prob)
		model.summary()
		model.compile(
					optimizer=Adam(lr=0.001),
					loss='categorical_crossentropy',
					metrics =['accuracy']
			)
		plot_model(model, to_file=os.path.join(self.saved_models_dir,'bert_bilstm_model.png'), show_shapes=True)
		return model
Пример #15
0
def get_model():
    bert_model = load_trained_model_from_checkpoint(config_path,
                                                    checkpoint_path)
    for l in bert_model.layers:
        l.trainable = True

    input1 = Input(shape=(None, ))
    input2 = Input(shape=(None, ))

    input = bert_model([input1, input2])

    input = Lambda(lambda x: x[:, 0])(input)

    output = Dense(2, activation='softmax')(input)

    model = Model([input1, input2], output)
    model.compile(
        loss='categorical_crossentropy',
        optimizer=Adam(1e-5),  # 用足够小的学习率
        metrics=['accuracy'])
    model.summary()
    return model
Пример #16
0
def model_build(len_train):
    global NUM_CLASSES
    global BATCH_SIZE
    global NUM_EPOCHS
    global MIN_LR
    global LR

    bert_model = load_trained_model_from_checkpoint(config_path,
                                                    checkpoint_path,
                                                    seq_len=MAXLEN,
                                                    trainable=True)

    x1_in = Input(shape=(None, ))
    x2_in = Input(shape=(None, ))
    aux_in = Input(shape=(2, ))

    inputs = bert_model([x1_in, x2_in])
    bert = Lambda(lambda x: x[:, 0])(inputs)
    dense = concatenate([bert, aux_in])
    outputs = Dense(NUM_CLASSES, activation='softmax')(dense)
    model = Model([x1_in, x2_in, aux_in], outputs)

    decay_steps, warmup_steps = calc_train_steps(
        len_train,
        batch_size=BATCH_SIZE,
        epochs=NUM_EPOCHS,
    )

    model.compile(loss='sparse_categorical_crossentropy',
                  optimizer=AdamWarmup(
                      decay_steps=decay_steps,
                      warmup_steps=warmup_steps,
                      lr=LR,
                      min_lr=MIN_LR,
                  ),
                  metrics=['sparse_categorical_accuracy'])
    del bert_model
    gc.collect()
    return model
Пример #17
0
def get_model():
    bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path)
    for l in bert_model.layers:
        l.trainable = True

    T1 = Input(shape=(None,))
    T2 = Input(shape=(None,))

    T = bert_model([T1, T2])

    T = Lambda(lambda x: x[:, 0])(T)

    output = Dense(1, activation='sigmoid')(T)

    model = Model([T1, T2], output)
    model.compile(
        loss='binary_crossentropy',
        optimizer=Adam(1e-5),  # 用足够小的学习率
        metrics=['accuracy']
    )
    model.summary()
    return model
def get_model():
    bert_model = load_trained_model_from_checkpoint(config_path,
                                                    checkpoint_path)
    for l in bert_model.layers:
        l.trainable = True

    T1 = Input(shape=(None, ))
    T2 = Input(shape=(None, ))

    T = bert_model([T1, T2])

    T = Lambda(lambda x: x[:, 0])(T)  # 取第0列向量,即CLS

    output = Dense(num_class, activation='softmax')(T)  # 多分类时要改成softmax

    model = Model([T1, T2], output)
    model.compile(
        loss='categorical_crossentropy',
        optimizer=Adam(1e-5),  # 用足够小的学习率
        metrics=['accuracy'])
    model.summary()  # 输出参数Param计算过程
    return model
Пример #19
0
def load_model():
    """
    构建模型主体
    return 模型对象
    """
    with ss0.as_default():
        with ss0.graph.as_default():
            bert = load_trained_model_from_checkpoint(CONFIG.config_path,
                                                      CONFIG.checkpoint_path,
                                                      seq_len=CONFIG.maxlen)
            x1 = Input(shape=(None, ))
            x2 = Input(shape=(None, ))
            bert_out = bert([x1, x2])
            lstm_out = Bidirectional(
                LSTM(CONFIG.lstmDim,
                     return_sequences=True,
                     dropout=0.2,
                     recurrent_dropout=0.2))(bert_out)
            crf_out = CRF(len(label), sparse_target=True)(lstm_out)
            model = Model([x1, x2], crf_out)
            model.load_weights(CONFIG.relation_key_extract_model_path)
    return model
Пример #20
0
def trian_model_bert():
    bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path, seq_len=None)

    for l in bert_model.layers:
        l.trainable = True

    x1_in = Input(shape=(None,))
    x2_in = Input(shape=(None,))

    x = bert_model([x1_in, x2_in])
    # print(x.shape)
    x = Lambda(lambda x: x[:, 0])(x)  # 只取cls用于分类
    p = Dense(1, activation='sigmoid')(x)

    model = Model([x1_in, x2_in], p)
    model.compile(
        loss='binary_crossentropy',
        optimizer=Adam(1e-5),  # 用足够小的学习率
        metrics=['accuracy']
    )
    model.summary()
    return model
Пример #21
0
def build_bert(nclass):
    bert_model = load_trained_model_from_checkpoint(config_path,
                                                    checkpoint_path,
                                                    seq_len=None)
    # 注意,尽管可以设置seq_len=None,但是仍要保证序列长度不超过512
    for l in bert_model.layers:
        l.trainable = True

    x1_in = Input(shape=(None, ))
    x2_in = Input(shape=(None, ))

    x = bert_model([x1_in, x2_in])
    x = Lambda(lambda x: x[:, 0])(x)
    p = Dense(nclass, activation='sigmoid')(x)

    model = Model([x1_in, x2_in], p)
    model.compile(
        loss='binary_crossentropy',
        optimizer=Adam(learning_rate),  # 用足够小的学习率
        metrics=['accuracy'])
    print(model.summary())
    return model
def get_model():
    bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path, seq_len=None)
    # set bert model fix or not
    for layer in bert_model.layers:
        layer.trainable = True

    x1_in = Input(shape=(None,))
    x2_in = Input(shape=(None,))

    x = bert_model([x1_in, x2_in])
    x = Lambda(lambda x: x[:, 0])(x) # 取出[CLS]对应的向量用来做分类
    x = keras.layers.Dropout(rate=DROPOUT_RATE)(x)
    p = Dense(1, activation='sigmoid')(x)

    model = Model([x1_in, x2_in], p)
    model.compile(
        loss='binary_crossentropy',
        optimizer=Adam(INIT_LEARNING_RATE), # 用足够小的学习率
        metrics=['accuracy']
    )
    model.summary()
    return model
Пример #23
0
def create_cls_model(num_labels):
    bert_model = load_trained_model_from_checkpoint(config_path,
                                                    checkpoint_path,
                                                    seq_len=None)

    for layer in bert_model.layers:
        layer.trainable = True

    x1_in = Input(shape=(None, ))
    x2_in = Input(shape=(None, ))

    x = bert_model([x1_in, x2_in])
    cls_layer = Lambda(lambda x: x[:, 0])(x)  #取出[CLS]对应的向量用来做分类
    p = Dense(num_labels, activation='softmax')(cls_layer)  #多分类

    model = Model([x1_in, x2_in], p)
    model.compile(loss='categorical_crossentropy',
                  optimizer=Adam(1e-5),
                  metrics=['accuracy'])
    model.summary()

    return model
Пример #24
0
def create_model(config_path, checkpoint_path):
    bert_model = load_trained_model_from_checkpoint(config_path,
                                                    checkpoint_path,
                                                    seq_len=None)

    for l in bert_model.layers:
        l.trainable = True

    x1_in = Input(shape=(None, ))
    x2_in = Input(shape=(None, ))

    x = bert_model([x1_in, x2_in])
    x = Lambda(lambda x: x[:, 0])(x)
    p = Dense(13, activation='sigmoid')(x)

    model = Model([x1_in, x2_in], p)
    # val_metric = Metrics([val_x,val_y])
    model.compile(
        loss='binary_crossentropy',
        optimizer=Adam(1e-5),  # 用足够小的学习率
        metrics=[micro_f1, macro_f1])
    model.summary()
Пример #25
0
    def build_bert(self):
        bert_model = load_trained_model_from_checkpoint(self.config_path,
                                                        self.checkpoint_path,
                                                        seq_len=None)
        for l in bert_model.layers:
            l.trainable = True  #设定为BERT可训练
        x1_in = Input(shape=(None, ))
        x2_in = Input(shape=(None, ))

        x11_in = Input(shape=(None, ))
        x22_in = Input(shape=(None, ))

        x1 = bert_model([x1_in, x2_in])
        x2 = bert_model([x11_in, x22_in])

        # print((K.shape(x)))

        lamb = Lambda(lambda x: x[:, 0])

        x1 = lamb(x1)
        x2 = lamb(x2)

        x = Concatenate(axis=1)([x1, x2])

        # print((K.shape(x)))

        x = Dense(500, activation='tanh')(x)
        x = Dropout(0.5)(x)

        p = Dense(12, activation='softmax')(x)

        self.model = Model([x1_in, x2_in, x11_in, x22_in], p)

        self.model.compile(
            # loss = 'binary_crossentropy',
            loss='categorical_crossentropy',
            optimizer=Adam(1e-5),  # 用足够小的学习率
            metrics=['accuracy'])
        self.model.summary()
Пример #26
0
def trian_model_bertlstmgru():
    bert_model = load_trained_model_from_checkpoint(config_path,
                                                    checkpoint_path,
                                                    seq_len=None)

    for l in bert_model.layers:
        l.trainable = True

    x1_in = Input(shape=(None, ))
    x2_in = Input(shape=(None, ))

    x1, x2 = x1_in, x2_in
    mask = Lambda(
        lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(x1)
    x = bert_model([x1, x2])
    t = Dropout(0.1)(x)
    t = Bidirectional(LSTM(80, recurrent_dropout=0.1,
                           return_sequences=True))(t)
    t = Bidirectional(GRU(80, recurrent_dropout=0.1, return_sequences=True))(t)
    t = Dropout(0.4)(t)
    t = Dense(160)(t)
    # t_maxpool = Lambda(seq_maxpool)([t, mask])
    # t_maxpool = MaxPool1D()(t)
    # t_avgpool = Lambda(seq_avgpool)([t, mask])
    # t_ = concatenate([t_maxpool, t_avgpool], axis=-1)
    print(x.shape, t.shape)
    # x = Lambda(lambda x: x[:, 0])(x)  #只取cls用于分类
    c = concatenate([x, t], axis=-1)
    c = Lambda(lambda c: c[:, 0])(c)
    p = Dense(1, activation='sigmoid')(c)

    model = Model([x1, x2], p)
    model.compile(
        loss='binary_crossentropy',
        optimizer=Adam(2e-5),  # 用足够小的学习率
        metrics=['accuracy'])
    model.summary()
    return model
Пример #27
0
def prepare_model():
  bert_model = load_trained_model_from_checkpoint(CONFIG_PATH, CHECKPOINT_PATH)
  bert_model.summary()
  bert_output_shape = bert_model.output.shape.as_list()
  num_bert_outputs = bert_output_shape[1] * bert_output_shape[2]

  top_model_flatten = keras.Sequential([
    # Need lambda because flatten does not support masking
    # https://github.com/keras-team/keras/issues/4978#issuecomment-303985365
    keras.layers.Lambda(lambda x: x, output_shape=lambda s:s, input_shape=bert_output_shape[1:]),
    keras.layers.Flatten(),
  ])
  top_model_flatten.output_shape
  top_model_flatten.summary()

  top_model_dense = keras.Sequential([
    keras.layers.Dense(1, activation='sigmoid', input_shape=(num_bert_outputs,))
  ])
  top_model_dense.output_shape
  top_model_dense.summary()

  top_model = keras.models.Model(inputs=top_model_flatten.input, outputs=top_model_dense(top_model_flatten.output))
  top_model.output_shape
  top_model.summary()

  # https://github.com/keras-team/keras/issues/3465#issuecomment-314633196
  model = keras.models.Model(inputs=bert_model.input, outputs=top_model(bert_model.output))
  # Default learning rate is 0.001. Decrease it to prevent vanishing gratient (predictions all go to 0) because of sigmoid loss.
  # https://ayearofai.com/rohan-4-the-vanishing-gradient-problem-ec68f76ffb9b
  # keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
  optimizer = keras.optimizers.Adam(lr=0.0003)

  model.compile(
      loss='binary_crossentropy',
      optimizer=optimizer,
      metrics=['accuracy'])
  model.summary()
  return model, top_model_dense
Пример #28
0
def get_rcnn_model(config_path, checkpoint_path, train_flag=1):
    bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path)
    for l in bert_model.layers:
        l.trainable = True

    T1 = Input(shape=(None,))
    T2 = Input(shape=(None,))

    T = bert_model([T1, T2])

    T_ = Bidirectional(LSTM(units=32, return_sequences=True))(T)
    T_ = Bidirectional(LSTM(units=32, return_sequences=True))(T_)
    t_embed_layer = MaskedConv1D(filters=64, kernel_size=3, padding='same', activation='relu')(T_)
    pool = MaskedGlobalMaxPool1D()(t_embed_layer)
    ave = MaskedGlobalAveragePooling1D()(t_embed_layer)
    T_2 = Add()([pool, ave])
    
    #T = Concatenate()([T, T3_])
#    T_2 = Dense(64, activation='relu')(T_2)

    output = Dense(3, activation='softmax')(T_2)

    model = Model([T1, T2], output)
    if train_flag == 1:
        model.compile(
            loss='categorical_crossentropy',
            optimizer=Adam(2e-5),  # 用足够小的学习率
            metrics=['accuracy']
        )
    else:
        model = multi_gpu_model(model, gpus= 2)  # 使用几张显卡n等于几
        model.compile(
            loss='categorical_crossentropy',
            optimizer=Adam(2e-5),  # 用足够小的学习率
            metrics=['accuracy']
        )
    model.summary()
    return model
Пример #29
0
def build_dis_att_with_bert_zhou():

    bert_token_input = Input(shape=(250,), name='bert_token')
    bert_segment_input = Input(shape=(250,), name='bert_segment')
    bert_m1 = Input(shape=[250], name='bert_m1')
    bert_m2 = Input(shape=[250], name='bert_m2')

    bert_model = load_trained_model_from_checkpoint(config_path,
                                                    checkpoint_path,
                                                    seq_len=250)
    for l in bert_model.layers:
        l.trainable = False
    wordVector = bert_model([bert_token_input, bert_segment_input])

    e1_doc_vec, e2_doc_vec = None, None
    e1_doc_vec = Lambda(get_entity_vector_zhou, output_shape=get_entity_shape)([wordVector, bert_m1])
    e2_doc_vec = Lambda(get_entity_vector_zhou, output_shape=get_entity_shape)([wordVector, bert_m2])
    entity_dense = Dense(768*2, activation='relu')
    e1_doc_vec = entity_dense(e1_doc_vec)
    e2_doc_vec = entity_dense(e2_doc_vec)

    sub=Subtract()([e1_doc_vec,e2_doc_vec])
    # lstm
    encoded_seq = Bidirectional(GRU(768, dropout=0.5, recurrent_dropout=0.5, return_sequences=True))(wordVector)
    slice_1 = Lambda(slice, arguments={'h1': 249, 'h2': 250})(encoded_seq)
    slice_1 = Lambda(change_shape, output_shape=out_change_shape)(slice_1)
    att_sub = NormalAttention()([sub, encoded_seq])
    att_e1 = Lambda(my_entity_att, output_shape=out_entity_att)([e1_doc_vec, encoded_seq])
    att_e2 = Lambda(my_entity_att, output_shape=out_entity_att)([e2_doc_vec, encoded_seq])
    z = concatenate([slice_1,att_sub,att_e1,att_e2])
    z = Dropout(0.3)(z)
    z = Dense(256, activation='tanh')(z)
    main_output = Dense(5, activation='softmax', name='main_output')(z)  # (?,5)
    model = Model(inputs=[bert_token_input, bert_segment_input, bert_m1, bert_m2], outputs=main_output)

    model.compile(optimizer="Adam", loss='categorical_crossentropy', metrics=['accuracy'])
    print(model.summary())
    return model
Пример #30
0
def model_bert_txt_lstm(config_path,
                        checkpoint_path,
                        metric=f1,
                        max_txt_len=100):
    bert_model = load_trained_model_from_checkpoint(config_path,
                                                    checkpoint_path,
                                                    seq_len=None)
    inp_txt_x1 = Input(shape=(max_txt_len, ))
    inp_txt_x2 = Input(shape=(max_txt_len, ))

    for i in range(20):
        bert_model.layers[-i].trainable
    x1 = bert_model([inp_txt_x1, inp_txt_x2])
    x1 = Lambda(lambda x: x)(x1)
    x1 = SpatialDropout1D(0.3)(x1)
    max_pool = GlobalMaxPooling1D()(x1)
    avg_pool = GlobalAveragePooling1D()(x1)
    pools = Concatenate()([max_pool, avg_pool])
    predictions = Dense(1, activation='sigmoid')(pools)
    model = Model(inputs=[inp_txt_x1, inp_txt_x2], outputs=predictions)
    adam = optimizers.Adam(lr=learning_rate)
    model.compile(optimizer=adam, loss='binary_crossentropy', metrics=[metric])
    return model