Пример #1
0
def TrainLstmCrf(data_name, model_name):
    n_classes = 4
    max_len = 75
    batch_size = 128
    epoch = 100
    tags = ['S', 'B', 'I', 'E']
    sentences, words = get_sents(datasets=data_name)
    print(len(sentences), len(words))
    word2idx = {w: i + 1 for i, w in enumerate(words)}
    tag2idx = {t: i for i, t in enumerate(tags)}
    vocab_size = len(words)

    X = [[word2idx[w[0]] for w in s] for s in sentences]
    X = pad_sequences(maxlen=max_len,
                      sequences=X,
                      padding="post",
                      value=vocab_size - 1)

    y = [[tag2idx[w[1]] for w in s] for s in sentences]
    y = pad_sequences(maxlen=max_len,
                      sequences=y,
                      padding="post",
                      value=tag2idx["E"])
    y = [to_categorical(i, num_classes=n_classes) for i in y]
    # 获得数据
    X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.1)
    print(len(X_tr), len(y_tr), len(X_te), len(y_te))
    s = np.asarray([max_len] * batch_size, dtype='int32')

    # 建立模型
    word_ids = Input(batch_shape=(batch_size, max_len), dtype='int32')
    sequence_lengths = Input(batch_shape=[batch_size, 1], dtype='int32')
    print(sequence_lengths)
    word_embeddings = Embedding(vocab_size, n_classes)(word_ids)
    blstm = Bidirectional(LSTM(units=50,
                               return_sequences=True))(word_embeddings)
    model = TimeDistributed(Dense(4, activation='tanh'))(blstm)
    crf = CrfModel()
    pred = crf(inputs=[model, sequence_lengths])
    model = Model(inputs=[word_ids, sequence_lengths], outputs=[pred])
    print("word_ids:{}".format(word_ids))
    print("sequence_lengths:{}".format(sequence_lengths))
    model.compile(optimizer="rmsprop", loss=crf.loss, metrics=['accuracy'])

    print(model.summary())

    k = 0
    for batch_x, batch_y in minibatches(X_tr, y_tr, batch_size=batch_size):
        model.fit([batch_x, s],
                  np.array(batch_y),
                  epochs=epoch,
                  batch_size=batch_size)
        k += 1
        if k % 50 == 0:
            model.save("./models/{}_{}".format(k, model_name))
            print("saved")

    # 保存模型
    model.save(model_name)
Пример #2
0
    pos_emb = Embedding(input_dim=len(pos),
                        output_dim=10,
                        input_length=max_len)(pos_input)
    modified_input = keras.layers.concatenate([word_emb, pos_emb])
    model_1 = Bidirectional(
        LSTM(units=50, return_sequences=True,
             recurrent_dropout=0.1))(modified_input)
    model = TimeDistributed(Dense(50, activation="relu"))(
        model_1)  # a dense layer as suggested by neuralNer
    crf = CRF(n_tags)  # CRF layer
    out = crf(model)  # output
    model = Model([input, pos_input], out)
    model.compile(optimizer="rmsprop",
                  loss=crf.loss_function,
                  metrics=[crf.accuracy])
    print(model.summary())
    history = model.fit([X_tr, X_pos_tr],
                        np.array(y_tr),
                        batch_size=32,
                        epochs=60,
                        validation_split=0.1,
                        verbose=1)
    #Testing
    test_pred = model.predict([X_te, X_pos_te], verbose=1)
    idx2tag = {i: w for w, i in tag2idx.items()}
    pred_labels = pred2label(test_pred)
    test_labels = pred2label(y_te)
    print("Recall, Precision and F-score are",
          get_recall_precision(test_labels, pred_labels, "Destination"))
    model.save("BILSTM+CRF_with_pos_without_embeddings.model")
Пример #3
0
model.compile(optimizer=rmsprop,
              loss=crf.loss_function,
              metrics=[crf.accuracy])  #use crf
model.summary()

# #early stop
# filepath="ner_ch_em_v4.hdf5"
# checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=2, save_best_only=True, mode='min',save_weights_only=True) #val_acc
# early_stop = EarlyStopping(monitor='val_loss', patience=5, mode='min')
# callbacks_list = [checkpoint, early_stop]

history = model.fit([
    X_word_tr,
    np.array(X_char_tr).reshape((len(X_char_tr), MAX_LEN, max_len_char))
],
                    np.array(y_tr).reshape(len(y_tr), MAX_LEN, 5),
                    batch_size=BATCH_SIZE,
                    epochs=3,
                    validation_split=0.1,
                    verbose=2,
                    shuffle=True)  #,callbacks=callbacks_list

# #save json
# model_json = model.to_json()
# with open("ner_ch_em_v4_json.json", "w") as json_file:
#     json_file.write(model_json)

# # serialize weights to HDF5
# model.save('ner_ch_em_v4_json.h5')
# print("Saved model to disk")
#save_load_utils.save_all_weights(model,'ner_ch_em_v4.h5')
def create_model(X_train, y_train, X_test, y_test, look_back, num_features):

    # CNN Model
    cnn = Sequential()

    ks1_first = 10
    ks1_second = 2

    ks2_first = 2
    ks2_second = 10

    cnn.add(
        Conv2D(filters=(2),
               kernel_size=(ks1_first, ks1_second),
               padding='same',
               kernel_initializer='TruncatedNormal'))
    cnn.add(BatchNormalization())
    cnn.add(LeakyReLU())
    cnn.add(Dropout(0.240))

    for _ in range(1):
        cnn.add(
            Conv2D(filters=(8),
                   kernel_size=(ks2_first, ks2_second),
                   padding='same',
                   kernel_initializer='TruncatedNormal'))
        cnn.add(BatchNormalization())
        cnn.add(LeakyReLU())
        cnn.add(Dropout(0.434))

    cnn.add(Flatten())

    # RNN Model
    rnn = Sequential()
    rnn.add(
        CuDNNLSTM(3,
                  return_sequences=True,
                  kernel_initializer='TruncatedNormal'))
    rnn.add(BatchNormalization())
    rnn.add(LeakyReLU())
    rnn.add(Dropout(0.622))

    for _ in range(0):
        rnn.add(
            CuDNNLSTM(32,
                      kernel_initializer='TruncatedNormal',
                      return_sequences=True))
        rnn.add(BatchNormalization())
        rnn.add(LeakyReLU())
        rnn.add(Dropout(0.612))

    rnn.add(
        CuDNNLSTM(4,
                  kernel_initializer='TruncatedNormal',
                  return_sequences=False))
    rnn.add(BatchNormalization())
    rnn.add(LeakyReLU())
    rnn.add(Dropout(0.281))

    # DNN Model

    dnn = Sequential()

    for _ in range(4):
        dnn.add(Dense(128, kernel_initializer='TruncatedNormal'))
        dnn.add(BatchNormalization())
        dnn.add(LeakyReLU())
        dnn.add(Dropout(0.006))

    for _ in range(4):
        dnn.add(Dense(16, kernel_initializer='TruncatedNormal'))
        dnn.add(BatchNormalization())
        dnn.add(LeakyReLU())
        dnn.add(Dropout(0.08))

    for _ in range(4):
        dnn.add(Dense(256, kernel_initializer='TruncatedNormal'))
        dnn.add(BatchNormalization())
        dnn.add(LeakyReLU())
        dnn.add(Dropout(0.171))

    dnn.add(Dense(512, kernel_initializer='TruncatedNormal'))
    dnn.add(BatchNormalization())
    dnn.add(LeakyReLU())
    dnn.add(Dropout(0.257))

    dnn.add(Dense(1))

    # Putting it all together

    main_input = Input(shape=(
        X_train.shape[1],
        X_train.shape[2]))  # Data has been reshaped to (800, 5, 120, 60, 1)
    reshaped_to_smaller_images = Reshape(target_shape=(24, 5, X_train.shape[2],
                                                       1))(main_input)

    model = TimeDistributed(cnn)(
        reshaped_to_smaller_images)  # this should make the cnn 'run' 5 times?
    model = rnn(model)  # combine timedistributed cnn with rnn
    model = dnn(model)  # add dense

    # create the model, specify in and output
    model = Model(inputs=main_input, outputs=model)

    model.compile(loss='mse', metrics=['mape'], optimizer='adam')

    early_stopping_monitor = EarlyStopping(
        patience=50000
    )  # Not using earlystopping monitor for now, that's why patience is high
    bs = 256
    epoch_size = 14
    schedule = SGDRScheduler(
        min_lr=4.6e-6,  #1e-5
        max_lr=4.8e-2,  # 1e-2
        steps_per_epoch=np.ceil(epoch_size / bs),
        lr_decay=0.9,
        cycle_length=5,  # 5
        mult_factor=1.5)

    checkpoint1 = ModelCheckpoint("models\\timedist.val_loss.hdf5",
                                  monitor='val_loss',
                                  verbose=1,
                                  save_best_only=True,
                                  mode='min')
    checkpoint2 = ModelCheckpoint("models\\timedist.val_mape.hdf5",
                                  monitor='val_mape',
                                  verbose=1,
                                  save_best_only=True,
                                  mode='min')

    checkpoint4 = ModelCheckpoint("models\\timedist.train_loss.hdf5",
                                  monitor='loss',
                                  verbose=1,
                                  save_best_only=True,
                                  mode='min')
    checkpoint5 = ModelCheckpoint("models\\timedist.train_mape.hdf5",
                                  monitor='mape',
                                  verbose=1,
                                  save_best_only=True,
                                  mode='min')

    result = model.fit(
        X_train,
        y_train,
        batch_size=bs,
        epochs=4000,  # should take 24h ish
        verbose=1,
        validation_split=0.2,
        callbacks=[schedule, checkpoint1, checkpoint2])

    pd.DataFrame(result.history).to_csv('models\\timedist_fit_history.csv')
    #get the highest validation accuracy of the training epochs
    validation_loss = np.amin(result.history['val_loss'])
    print('validation loss of epoch:', validation_loss)
    return model
Пример #5
0
flag_use_iterator_or_function_to_generate_batches = 2
numer_of_epochs = 10
if flag_method_of_fitting == 1:
    #(1). Fit using large data already in the form of numpy arrays

    #Generate batches:
    if flag_use_iterator_or_function_to_generate_batches == 1:
        [X, y] = next(training_generator)
    elif flag_use_iterator_or_function_to_generate_batches == 2:
        [X, y] = training_generator_object.generate_function(number_of_batches)

    #Fit:
    history = conv_model_time_distributed.fit(
        x=X,
        y=y,
        batch_size=batch_size,
        epochs=1,
        callbacks=callbacks_list,
        validation_data=[validation_X, validation_y])

    #Plot what's going on:
    figure(1)
    subplot(2, 1, 1)
    plot(history['loss'])
    ylabel('loss')
    xlabel('epochs')
    subplot(2, 1, 2)
    plot(sqrt(history.history('mse')))
    ylabel('std')
    xlabel('epochs')
Пример #6
0
def create_model(X_train, y_train, X_test, y_test):

    # CNN Model
    cnn = Sequential()

    ks1_first = {{choice([2, 3, 4, 5, 8, 10])}}
    ks1_second = {{choice([2, 3, 4, 5, 8, 10])}}

    ks2_first = {{choice([2, 3, 4, 5, 8, 10])}}
    ks2_second = {{choice([2, 3, 4, 5, 8, 10])}}

    cnn.add(
        Conv2D(filters=({{choice([1, 2, 3, 4, 5, 8])}}),
               kernel_size=(ks1_first, ks1_second),
               padding='same',
               kernel_initializer='TruncatedNormal'))
    cnn.add(BatchNormalization())
    cnn.add(LeakyReLU())
    cnn.add(Dropout({{uniform(0, 1)}}))

    for _ in range({{choice([0, 1, 2, 3])}}):
        cnn.add(
            Conv2D(filters=({{choice([4, 8])}}),
                   kernel_size=(ks2_first, ks2_second),
                   padding='same',
                   kernel_initializer='TruncatedNormal'))
        cnn.add(BatchNormalization())
        cnn.add(LeakyReLU())
        cnn.add(Dropout({{uniform(0, 1)}}))

    cnn.add(Flatten())

    # RNN Model
    rnn = Sequential()
    rnn.add(
        CuDNNLSTM({{choice([1, 2, 3, 4, 5, 6, 7, 8])}},
                  return_sequences=True,
                  kernel_initializer='TruncatedNormal'))
    rnn.add(BatchNormalization())
    rnn.add(LeakyReLU())
    rnn.add(Dropout({{uniform(0, 1)}}))

    for _ in range({{choice([0, 1, 2, 3, 4, 8])}}):
        rnn.add(
            CuDNNLSTM({{choice([4, 8])}},
                      kernel_initializer='TruncatedNormal',
                      return_sequences=True))
        rnn.add(BatchNormalization())
        rnn.add(LeakyReLU())
        rnn.add(Dropout({{uniform(0, 1)}}))

    rnn.add(
        CuDNNLSTM({{choice([1, 2, 3, 4, 5, 6, 7, 8])}},
                  kernel_initializer='TruncatedNormal',
                  return_sequences=False))
    rnn.add(BatchNormalization())
    rnn.add(LeakyReLU())
    rnn.add(Dropout({{uniform(0, 1)}}))

    # DNN Model

    dnn = Sequential()

    for _ in range({{choice([0, 1, 2, 3, 4, 8, 16, 32])}}):
        dnn.add(
            Dense({{choice([4, 8, 16, 32, 64, 128, 256, 512])}},
                  kernel_initializer='TruncatedNormal'))
        dnn.add(BatchNormalization())
        dnn.add(LeakyReLU())
        dnn.add(Dropout({{uniform(0, 1)}}))

    for _ in range({{choice([0, 1, 2, 3, 4, 8, 16, 32])}}):
        dnn.add(
            Dense({{choice([4, 8, 16, 32, 64, 128, 256, 512])}},
                  kernel_initializer='TruncatedNormal'))
        dnn.add(BatchNormalization())
        dnn.add(LeakyReLU())
        dnn.add(Dropout({{uniform(0, 1)}}))

    for _ in range({{choice([0, 1, 2, 3, 4, 8, 16, 32])}}):
        dnn.add(
            Dense({{choice([4, 8, 16, 32, 64, 128, 256, 512])}},
                  kernel_initializer='TruncatedNormal'))
        dnn.add(BatchNormalization())
        dnn.add(LeakyReLU())
        dnn.add(Dropout({{uniform(0, 1)}}))

    for _ in range({{choice([0, 1, 2, 3, 4, 8, 16, 32])}}):
        dnn.add(
            Dense({{choice([4, 8, 16, 32, 64, 128, 256, 512])}},
                  kernel_initializer='TruncatedNormal'))
        dnn.add(BatchNormalization())
        dnn.add(LeakyReLU())
        dnn.add(Dropout({{uniform(0, 1)}}))

    dnn.add(
        Dense({{choice([8, 16, 32, 64, 128, 256, 512, 1024])}},
              kernel_initializer='TruncatedNormal'))
    dnn.add(BatchNormalization())
    dnn.add(LeakyReLU())
    dnn.add(Dropout({{uniform(0, 1)}}))

    dnn.add(Dense(1))

    # Putting it all together

    main_input = Input(shape=(
        X_train.shape[1],
        X_train.shape[2]))  # Data has been reshaped to (800, 5, 120, 60, 1)
    reshaped_to_smaller_images = Reshape(target_shape=(24, 5, X_train.shape[2],
                                                       1))(main_input)

    model = TimeDistributed(cnn)(
        reshaped_to_smaller_images)  # this should make the cnn 'run' 5 times?
    model = rnn(model)  # combine timedistributed cnn with rnn
    model = dnn(model)  # add dense

    # create the model, specify in and output
    model = Model(inputs=main_input, outputs=model)

    model.compile(loss='mse', metrics=['mape'], optimizer='nadam')

    early_stopping_monitor = EarlyStopping(
        patience=15
    )  # Not using earlystopping monitor for now, that's why patience is high

    bs = {{choice([32, 64, 128, 256])}}

    epoch_size = 1

    if bs == 32:
        epoch_size = 109
    elif bs == 64:
        epoch_size = 56
    elif bs == 128:
        epoch_size = 28
    elif bs == 256:
        epoch_size = 14

    #bs = 256
    #epoch_size = 14

    schedule = SGDRScheduler(
        min_lr={{uniform(1e-8, 1e-5)}},  #1e-5
        max_lr={{uniform(1e-3, 1e-1)}},  # 1e-2
        steps_per_epoch=np.ceil(epoch_size / bs),
        lr_decay=0.9,
        cycle_length=5,  # 5
        mult_factor=1.5)

    result = model.fit(X_train,
                       y_train,
                       batch_size=bs,
                       epochs=100,
                       verbose=1,
                       validation_split=0.2,
                       callbacks=[early_stopping_monitor, schedule])

    #get the highest validation accuracy of the training epochs

    hist = pd.DataFrame(result.history['val_loss']).fillna(
        9999)  #replace nan with 9999
    val_loss = np.amin(hist.values)

    print('Best validation loss of epoch:', val_loss)

    K.clear_session()  # Clear the tensorflow session (Free up RAM)

    return {
        'loss': val_loss,
        'status': STATUS_OK
    }  # Not returning model to save RAM
Пример #7
0
    def model_with_padding(self, DICT, n_char):

        # get sequences and labels separated.
        # convert BIO tags to numbers
        sequences, labels = self.get_seq(DICT)

        # sequences = sequences[:100]
        # labels = labels[:100]

        # X = pad_sequences(sequences, maxlen=self.w_arit_mean, padding='post', truncating='post')
        # y_pad = pad_sequences(labels, maxlen=self.w_arit_mean, padding='post', truncating='post')

        X = pad_sequences(sequences, maxlen=self.maxSeqLength, padding='post')
        y_pad = pad_sequences(labels, maxlen=self.maxSeqLength, padding='post')

        y = [to_categorical(i, num_classes=self.lab_len) for i in y_pad]

        # early stopping and best epoch
        #early_stop = keras.callbacks.EarlyStopping(monitor='loss', patience=2, verbose=0, mode='auto')
        #filepath = "max-seq.h5"
        #checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='max')
        #callbacks_list = [checkpoint, early_stop]

        # Set up the keras model
        input = Input(shape=(self.maxSeqLength, ))
        el = Embedding(n_char + 1, 200, name="embed")(input)
        bl1 = Bidirectional(LSTM(128,
                                 return_sequences=True,
                                 recurrent_dropout=0.5,
                                 dropout=0.5),
                            merge_mode="concat",
                            name="lstm1")(el)
        bl2 = Bidirectional(LSTM(64,
                                 return_sequences=True,
                                 recurrent_dropout=0.5,
                                 dropout=0.5),
                            merge_mode="concat",
                            name="lstm2")(bl1)
        bl3 = Bidirectional(LSTM(64,
                                 return_sequences=True,
                                 recurrent_dropout=0.5,
                                 dropout=0.5),
                            merge_mode="concat",
                            name="lstm3")(bl2)
        model = TimeDistributed(Dense(self.lab_len, activation="relu"))(bl3)
        crf = CRF(self.lab_len)  # CRF layer
        out = crf(model)  # output

        model = Model(input, out)
        model.compile(optimizer="rmsprop",
                      loss=crf.loss_function,
                      metrics=[crf.accuracy])
        model.summary()

        #treinar com 32, 147, 245, 735
        history = model.fit(X,
                            np.array(y),
                            batch_size=32,
                            epochs=self.epochsN,
                            validation_split=0.0,
                            verbose=1)
        # save all epochs
        save_load_utils.save_all_weights(model,
                                         'max_seq_%s_32b.h5' % self.epochsN)
Пример #8
0
    def model_no_padding(self, DICT, n_char):

        # convert BIO tags to numbers
        self.convert_tags()
        '''
        check if bion contains 'B' and 'I'
        for i in self.train_data:
            print(i['bion'])
        '''

        for i in range(len(self.train_data)):
            corp = self.train_data[i]['corpus']

            corp_num = []
            for c in corp:
                corp_num.append(DICT.get(c))
            self.train_data[i]['corpus'] = corp_num

        # get all sizes from the sequences with training data
        train_l_d = {}
        train_l_labels = {}
        for seq in self.train_data:
            # corpus
            l = len(seq['corpus'])
            if l not in train_l_d: train_l_d[l] = []
            train_l_d[l].append(seq['corpus'])

            # labels
            l1 = len(seq['bion'])
            if l1 not in train_l_labels: train_l_labels[l1] = []
            train_l_labels[l1].append(seq['bion'])
        '''
        for i in range(len(train_l_d[110])):
            print(len(train_l_d[110][i]) == len(train_l_labels[110][i]))
            print()
        print("\n\n")

        for i in range(len(train_l_d[31])):
            print(len(train_l_d[31][i]) == len(train_l_labels[31][i]))
        print("\n\n")

        for i in range(len(train_l_d[103])):
            print(len(train_l_d[103][i]) == len(train_l_labels[103][i]))
        print("\n\n")
        exit()
        '''
        sizes = list(train_l_d.keys())

        # Set up the keras model
        il = Input(shape=(None, ), dtype='int32')
        el = Embedding(n_char + 1, 200, name="embed")(il)
        bl1 = Bidirectional(LSTM(128,
                                 return_sequences=True,
                                 recurrent_dropout=0.5,
                                 dropout=0.5),
                            merge_mode="concat",
                            name="lstm1")(el)
        bl2 = Bidirectional(LSTM(64,
                                 return_sequences=True,
                                 recurrent_dropout=0.5,
                                 dropout=0.5),
                            merge_mode="concat",
                            name="lstm2")(bl1)
        bl3 = Bidirectional(LSTM(64,
                                 return_sequences=True,
                                 recurrent_dropout=0.5,
                                 dropout=0.5),
                            merge_mode="concat",
                            name="lstm3")(bl2)
        model = TimeDistributed(Dense(self.num_labs, activation="relu"))(bl3)
        crf = CRF(self.num_labs)  # CRF layer
        out = crf(model)  # output

        model = Model(il, out)
        model.compile(optimizer="rmsprop",
                      loss=crf.loss_function,
                      metrics=[crf.accuracy])
        model.summary()

        f_best = -1
        f_index = -1
        # OK, start actually training
        for epoch in range(self.epochsN):
            print("Epoch", epoch, "start at", datetime.now())
            # Train in batches of different sizes - randomize the order of sizes
            # Except for the first few epochs
            if epoch > 2:
                random.shuffle(sizes)
            for size in sizes:
                batch = train_l_d[size]
                labs = train_l_labels[size]

                tx = np.array([seq for seq in batch])
                y = [seq for seq in labs]

                ty = [to_categorical(i, num_classes=self.num_labs) for i in y]

                # This trains in mini-batches
                model.fit(tx, np.array(ty), verbose=0, epochs=1)
            print("Trained at", datetime.now())

            # save all epochs
            save_load_utils.save_all_weights(
                model, 'mini-batch-results/epoch_%s.h5' % epoch)
            # test the results
            self.test_minibatch(DICT, model)
            f = self.eval()

            if f > f_best:
                f_best = f
                f_index = epoch

        # Pick the best model, and save it with a useful name
        print("Choosing the best epoch")
        shutil.copyfile("mini-batch-results/epoch_%s.h5" % f_index,
                        "minibatch_%s.h5" % f_index)