Exemplo n.º 1
0
def TrainLstmCrf(data_name, model_name):
    n_classes = 4
    max_len = 75
    batch_size = 128
    epoch = 100
    tags = ['S', 'B', 'I', 'E']
    sentences, words = get_sents(datasets=data_name)
    print(len(sentences), len(words))
    word2idx = {w: i + 1 for i, w in enumerate(words)}
    tag2idx = {t: i for i, t in enumerate(tags)}
    vocab_size = len(words)

    X = [[word2idx[w[0]] for w in s] for s in sentences]
    X = pad_sequences(maxlen=max_len,
                      sequences=X,
                      padding="post",
                      value=vocab_size - 1)

    y = [[tag2idx[w[1]] for w in s] for s in sentences]
    y = pad_sequences(maxlen=max_len,
                      sequences=y,
                      padding="post",
                      value=tag2idx["E"])
    y = [to_categorical(i, num_classes=n_classes) for i in y]
    # 获得数据
    X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.1)
    print(len(X_tr), len(y_tr), len(X_te), len(y_te))
    s = np.asarray([max_len] * batch_size, dtype='int32')

    # 建立模型
    word_ids = Input(batch_shape=(batch_size, max_len), dtype='int32')
    sequence_lengths = Input(batch_shape=[batch_size, 1], dtype='int32')
    print(sequence_lengths)
    word_embeddings = Embedding(vocab_size, n_classes)(word_ids)
    blstm = Bidirectional(LSTM(units=50,
                               return_sequences=True))(word_embeddings)
    model = TimeDistributed(Dense(4, activation='tanh'))(blstm)
    crf = CrfModel()
    pred = crf(inputs=[model, sequence_lengths])
    model = Model(inputs=[word_ids, sequence_lengths], outputs=[pred])
    print("word_ids:{}".format(word_ids))
    print("sequence_lengths:{}".format(sequence_lengths))
    model.compile(optimizer="rmsprop", loss=crf.loss, metrics=['accuracy'])

    print(model.summary())

    k = 0
    for batch_x, batch_y in minibatches(X_tr, y_tr, batch_size=batch_size):
        model.fit([batch_x, s],
                  np.array(batch_y),
                  epochs=epoch,
                  batch_size=batch_size)
        k += 1
        if k % 50 == 0:
            model.save("./models/{}_{}".format(k, model_name))
            print("saved")

    # 保存模型
    model.save(model_name)
def new_model(image_size=299, video_length=40, cnn_trainable=False):
    inputs = Input(shape=(video_length, image_size, image_size, 3))
    cnn = inception_v3.InceptionV3(include_top=False, weights='imagenet')
    model = TimeDistributed(cnn)(inputs)
    model.trainable = cnn_trainable

    model = LSTM(512)(model)
    model = Dropout(0.5)(model)
    model = Dense(1, activation='softmax')(model)
    model = Model(inputs=inputs, outputs=model)

    adam = keras.optimizers.Adam(learning_rate=1e-5)
    model.compile(loss='binary_crossentropy',
                  optimizer=adam,
                  metrics=['accuracy'])

    model.summary()
    return model
def temporal_convs_linear(n_nodes,
                          conv_len,
                          n_classes,
                          n_feat,
                          max_len,
                          causal=False,
                          loss='categorical_crossentropy',
                          optimizer='adam',
                          return_param_str=False):
    """ Used in paper: 
    Segmental Spatiotemporal CNNs for Fine-grained Action Segmentation
    Lea et al. ECCV 2016

    Note: Spatial dropout was not used in the original paper. 
    It tends to improve performance a little.  
    """

    inputs = Input(shape=(max_len, n_feat))
    if causal: model = ZeroPadding1D((conv_len // 2, 0))(model)
    model = Convolution1D(n_nodes,
                          conv_len,
                          input_dim=n_feat,
                          input_length=max_len,
                          border_mode='same',
                          activation='relu')(inputs)
    if causal: model = Cropping1D((0, conv_len // 2))(model)

    model = SpatialDropout1D(0.3)(model)

    model = TimeDistributed(Dense(n_classes, activation="softmax"))(model)

    model = Model(input=inputs, output=model)
    model.compile(loss=loss,
                  optimizer=optimizer,
                  sample_weight_mode="temporal")

    if return_param_str:
        param_str = "tConv_C{}".format(conv_len)
        if causal:
            param_str += "_causal"

        return model, param_str
    else:
        return model
Exemplo n.º 4
0
def seq2seq():
    n_classes = len(LABELS)

    converse_input = Input(shape=(None, SENTENCE_ENCODING_DIM))
    # length_input = Input(shape=(None, 1))
    # word_input = Input(shape=(None, WORD_EMBEDDING_DIM))
    time_input = Input(shape=(None, 1))

    converse = Masking(mask_value=-1.)(converse_input)
    converse = Dropout(0.2)(converse)
    converse = Bidirectional(LSTM(1024, return_sequences=True))(converse)
    converse = Bidirectional(LSTM(1024, return_sequences=True))(converse)
    converse = Dropout(0.3)(converse)

    # lengths = Masking(mask_value=-1)(length_input)

    # words = Masking(mask_value=-1.)(word_input)
    # words = Dropout(0.2)(words)

    model = concatenate([converse, time_input], axis=-1)

    # print("merged outpout shape", model.output_shape)

    model = TimeDistributed(Dense(1024, activation='relu'))(model)
    model = Dropout(0.3)(model)
    model = TimeDistributed(Dense(512, activation='relu'))(model)
    model = Dropout(0.3)(model)
    # predictions = TimeDistributed(Dense(n_classes, activation='softmax'))(model)

    crf = CRF(n_classes, sparse_target=True)
    predictions = crf(model)

    model = Model(inputs=[converse_input, time_input], outputs=predictions)
    model.summary()
    # model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
    model.compile('adam', loss=crf.loss_function, metrics=[crf.accuracy])
    return model
Exemplo n.º 5
0
    def NN_model(self, word2idx, char2idx, max_len, max_len_char, n_tags):
        word_in = Input(shape=(max_len, ))
        n_words = len(word2idx.keys())
        n_chars = len(char2idx.keys())
        emb_word = Embedding(input_dim=n_words,
                             output_dim=30,
                             input_length=max_len,
                             mask_zero=True)(word_in)
        char_in = Input(shape=(
            max_len,
            max_len_char,
        ))
        emb_char = TimeDistributed(
            Embedding(input_dim=n_chars,
                      output_dim=10,
                      input_length=max_len_char,
                      mask_zero=True))(char_in)

        char_enc = TimeDistributed(
            LSTM(units=20, return_sequences=False,
                 recurrent_dropout=0.3))(emb_char)
        x = concatenate([emb_word, char_enc])
        x = SpatialDropout1D(0.3)(x)
        main_lstm = Bidirectional(
            LSTM(units=30, return_sequences=True, recurrent_dropout=0.4))(x)
        model = TimeDistributed(Dense(35, activation='relu'))(main_lstm)
        crf = CRF(n_tags, learn_mode='marginal')
        out = crf(model)  # prob
        model = Model([word_in, char_in], out)
        #         from keras.utils.vis_utils import plot_model
        #         plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)
        model.compile(optimizer='rmsprop',
                      loss=crf_loss,
                      metrics=[crf.accuracy])
        # model.summary()
        return model
main_lstm = Bidirectional(LSTM(units=50, return_sequences=True,
                               recurrent_dropout=0.6))(x)  #dropout 0.1试试?
model = TimeDistributed(Dense(50, activation="relu"))(main_lstm)

crf = CRF(n_tags+1)  # CRF layer, n_tags+1(PD)
out = crf(model)  # output

# out = Lambda(lambda x: K.reshape(x,(-1,5)))(out)
model = Model([word_in, char_in], out)

# set optimizer 
# rmsprop = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=1e-5)

adam = optimizers.Adam(lr=0.01, epsilon=None, decay=1e-1)
model.compile(optimizer=adam, loss=crf.loss_function, metrics=[crf.accuracy]) #use crf
model.summary()
#sample_weight_mode="temporal"


tr_pubs = pub_ids[:int(len(pub_ids)*0.9)]
val_pubs = pub_ids[int(len(pub_ids)*0.9):] 

train = subdata_getter(tr_pubs,data)
validation = subdata_getter(val_pubs,data)


tr_generator =  DataGenerator(tr_pubs,train)
val_generator =  DataGenerator(val_pubs,validation)

history = NBatchLogger()
Exemplo n.º 7
0
                               recurrent_dropout=0.4))(x)  
#attention
attention_sum = keras.layers.multiply([main_lstm, alpha_in])
attention_conc = concatenate([attention_sum,main_lstm])

model = TimeDistributed(Dense(75, activation="tanh"))(attention_conc)
model = TimeDistributed(Dense(50, activation="tanh"))(model)

#CRF layers
crf = CRF(n_tags+1)  # CRF layer, n_tags+1(PAD)
out = crf(model)  # output
model = Model([word_in, char_in, alpha_in], out)

# set optimizer 
rmsprop = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=1e-3)
model.compile(optimizer=rmsprop, loss=crf.loss_function, metrics=[crf.accuracy]) 
model.summary()

# DataGenerator and fit inot Model
####################################################################################################################################

tr_pubs = pub_ids[:int(len(pub_ids)*0.9)]
val_pubs = pub_ids[int(len(pub_ids)*0.9):] 

train = subdata_getter(tr_pubs,data)
validation = subdata_getter(val_pubs,data)

tr_generator =  DataGenerator(tr_pubs,train)
val_generator =  DataGenerator(val_pubs,validation)

Exemplo n.º 8
0
    def test_exist(self, glove, test_data, test_labels):
        # get word embeddings
        utils = wordUtils.Utils()

        if glove:
            # use glove
            self.words_list, self.embedding_matrix = utils.load_glove()
            unword_n = len(self.words_list)

        else:
            self.words_list, self.embedding_matrix = utils.load_word2vec()
            unword_n = len(self.words_list)

        # get the training corpus
        cr = corpusreader.CorpusReader(test_data, test_labels)
        corpus = cr.trainseqs

        # get the number of the embedding
        for idx in range(len(corpus)):
            words = corpus[idx]['tokens']
            words_id = []
            for i in words:

                # get the number of the embedding
                try:
                    # the index of the word in the embedding matrix
                    index = self.words_list.index(i)
                except ValueError:
                    # use the embedding full of zeros to identify an unknown word
                    index = unword_n

                # the index of the word in the embedding matrix
                words_id.append(index)

            corpus[idx]['embs'] = words_id

        input = Input(shape=(None,))
        el = Embedding(len(self.words_list) + 1, 200, weights=[self.embedding_matrix], trainable=False)(input)
        bl1 = Bidirectional(LSTM(128, return_sequences=True, recurrent_dropout=0.5, dropout=0.5),
                            merge_mode="concat",
                            name="lstm1")(el)
        bl2 = Bidirectional(LSTM(64, return_sequences=True, recurrent_dropout=0.5, dropout=0.5),
                            merge_mode="concat",
                            name="lstm2")(bl1)
        bl3 = Bidirectional(LSTM(64, return_sequences=True, recurrent_dropout=0.5, dropout=0.5),
                            merge_mode="concat",
                            name="lstm3")(bl2)
        model = TimeDistributed(Dense(50, activation="relu"))(bl3)  # a dense layer as suggested by neuralNer
        crf = CRF(self.lab_len)  # CRF layer
        out = crf(model)  # output

        model = Model(input, out)
        model.compile(optimizer="rmsprop", loss=crf.loss_function, metrics=[crf.accuracy])
        model.summary()
        save_load_utils.load_all_weights(model, 'word_models/words_glove_multiLSTM31.h5')

        for doc in corpus:
            doc_arr = doc['embs']
            p = model.predict(np.array([doc_arr]))
            p = np.argmax(p, axis=-1)

            position = 0
            offsets = defaultdict(list)
            counter = 0
            # check if there are any mutations identified
            # {'O': 0, 'B-E': 1, 'I-E': 2, 'E-E': 3, 'S-E': 4}
            B = False
            last = 0
            for idx in p[0]:
                if idx == 1 and last == 1:
                    counter = counter + 1
                    offsets[counter].append(position)
                    B = True
                elif idx == 1:
                    B = True
                    offsets[counter].append(position)
                    last = 1
                elif idx == 2 and B:
                    offsets[counter].append(position)
                    last = 2
                elif idx == 3 and B:
                    offsets[counter].append(position)
                    last = 3
                    B = False
                    counter = counter + 1
                elif idx == 4:
                    offsets[counter].append(position)
                    counter = counter + 1
                    last = 4
                else:
                    B = False

                position = position + 1

            # open file to write
            textid = str(doc['textid'])
            abstract = open("words-silver/" + textid + ".a1", 'w')
            for i in offsets:
                word = offsets.get(i)
                size = len(word)
                if size == 1:
                    s = word[0]  # just one; singleton

                    abstract.write(str(doc['tokstart'][s]) + "\t")
                    abstract.write(str(doc['tokend'][s]) + "\t")
                    abstract.write(str(doc['tokens'][s]) + "\n")


                elif size > 1:
                    s = word[0]  # start of token
                    e = word[-1]  # end of token

                    abstract.write(str(doc['tokstart'][s]) + "\t")
                    abstract.write(str(doc['tokend'][e]) + "\t")
                    token = ""
                    for c in word:
                        token = token + doc['tokens'][c]

                    abstract.write(str(token) + "\n")
def create_model(X_train, y_train, X_test, y_test, look_back, num_features):

    # CNN Model
    cnn = Sequential()

    ks1_first = 10
    ks1_second = 2

    ks2_first = 2
    ks2_second = 10

    cnn.add(
        Conv2D(filters=(2),
               kernel_size=(ks1_first, ks1_second),
               padding='same',
               kernel_initializer='TruncatedNormal'))
    cnn.add(BatchNormalization())
    cnn.add(LeakyReLU())
    cnn.add(Dropout(0.240))

    for _ in range(1):
        cnn.add(
            Conv2D(filters=(8),
                   kernel_size=(ks2_first, ks2_second),
                   padding='same',
                   kernel_initializer='TruncatedNormal'))
        cnn.add(BatchNormalization())
        cnn.add(LeakyReLU())
        cnn.add(Dropout(0.434))

    cnn.add(Flatten())

    # RNN Model
    rnn = Sequential()
    rnn.add(
        CuDNNLSTM(3,
                  return_sequences=True,
                  kernel_initializer='TruncatedNormal'))
    rnn.add(BatchNormalization())
    rnn.add(LeakyReLU())
    rnn.add(Dropout(0.622))

    for _ in range(0):
        rnn.add(
            CuDNNLSTM(32,
                      kernel_initializer='TruncatedNormal',
                      return_sequences=True))
        rnn.add(BatchNormalization())
        rnn.add(LeakyReLU())
        rnn.add(Dropout(0.612))

    rnn.add(
        CuDNNLSTM(4,
                  kernel_initializer='TruncatedNormal',
                  return_sequences=False))
    rnn.add(BatchNormalization())
    rnn.add(LeakyReLU())
    rnn.add(Dropout(0.281))

    # DNN Model

    dnn = Sequential()

    for _ in range(4):
        dnn.add(Dense(128, kernel_initializer='TruncatedNormal'))
        dnn.add(BatchNormalization())
        dnn.add(LeakyReLU())
        dnn.add(Dropout(0.006))

    for _ in range(4):
        dnn.add(Dense(16, kernel_initializer='TruncatedNormal'))
        dnn.add(BatchNormalization())
        dnn.add(LeakyReLU())
        dnn.add(Dropout(0.08))

    for _ in range(4):
        dnn.add(Dense(256, kernel_initializer='TruncatedNormal'))
        dnn.add(BatchNormalization())
        dnn.add(LeakyReLU())
        dnn.add(Dropout(0.171))

    dnn.add(Dense(512, kernel_initializer='TruncatedNormal'))
    dnn.add(BatchNormalization())
    dnn.add(LeakyReLU())
    dnn.add(Dropout(0.257))

    dnn.add(Dense(1))

    # Putting it all together

    main_input = Input(shape=(
        X_train.shape[1],
        X_train.shape[2]))  # Data has been reshaped to (800, 5, 120, 60, 1)
    reshaped_to_smaller_images = Reshape(target_shape=(24, 5, X_train.shape[2],
                                                       1))(main_input)

    model = TimeDistributed(cnn)(
        reshaped_to_smaller_images)  # this should make the cnn 'run' 5 times?
    model = rnn(model)  # combine timedistributed cnn with rnn
    model = dnn(model)  # add dense

    # create the model, specify in and output
    model = Model(inputs=main_input, outputs=model)

    model.compile(loss='mse', metrics=['mape'], optimizer='adam')

    early_stopping_monitor = EarlyStopping(
        patience=50000
    )  # Not using earlystopping monitor for now, that's why patience is high
    bs = 256
    epoch_size = 14
    schedule = SGDRScheduler(
        min_lr=4.6e-6,  #1e-5
        max_lr=4.8e-2,  # 1e-2
        steps_per_epoch=np.ceil(epoch_size / bs),
        lr_decay=0.9,
        cycle_length=5,  # 5
        mult_factor=1.5)

    checkpoint1 = ModelCheckpoint("models\\timedist.val_loss.hdf5",
                                  monitor='val_loss',
                                  verbose=1,
                                  save_best_only=True,
                                  mode='min')
    checkpoint2 = ModelCheckpoint("models\\timedist.val_mape.hdf5",
                                  monitor='val_mape',
                                  verbose=1,
                                  save_best_only=True,
                                  mode='min')

    checkpoint4 = ModelCheckpoint("models\\timedist.train_loss.hdf5",
                                  monitor='loss',
                                  verbose=1,
                                  save_best_only=True,
                                  mode='min')
    checkpoint5 = ModelCheckpoint("models\\timedist.train_mape.hdf5",
                                  monitor='mape',
                                  verbose=1,
                                  save_best_only=True,
                                  mode='min')

    result = model.fit(
        X_train,
        y_train,
        batch_size=bs,
        epochs=4000,  # should take 24h ish
        verbose=1,
        validation_split=0.2,
        callbacks=[schedule, checkpoint1, checkpoint2])

    pd.DataFrame(result.history).to_csv('models\\timedist_fit_history.csv')
    #get the highest validation accuracy of the training epochs
    validation_loss = np.amin(result.history['val_loss'])
    print('validation loss of epoch:', validation_loss)
    return model
Exemplo n.º 10
0
###tensorflow session:
#sess = tf.Session()
#K.set_session(sess)
#init_op = tf.global_variables_initializer()
#sess.run(init_op)

###### COMPILE: ######

#Set GLOBAL VARIABLES which go in to all sorts of possible functions:
max_shift_number_global = 3

#COMPILE MODEL:
#from keras.optimizers import Adam,SGD,RMSprop,Adagrad,Nadam,TFOptimizer
metrics_list = ['mse']
conv_model_time_distributed.compile(optimizer='adam',
                                    loss=custom_loss_function,
                                    metrics=metrics_list)

#Fit Model:
flag_method_of_fitting = 3

#Callbacks:
filepath_string = 'deep_speckles_weights.{epoch:02d}_{val_loss:.2f}.hdf5'
model_checkpoint_function = ModelCheckpoint(filepath_string,
                                            period=1,
                                            monitor='val_loss',
                                            verbose=0,
                                            save_best_only=False,
                                            save_weights_only=False,
                                            mode='auto')
custom_callback_function = custom_callback()
Exemplo n.º 11
0
def create_model(X_train, y_train, X_test, y_test):

    # CNN Model
    cnn = Sequential()

    ks1_first = {{choice([2, 3, 4, 5, 8, 10])}}
    ks1_second = {{choice([2, 3, 4, 5, 8, 10])}}

    ks2_first = {{choice([2, 3, 4, 5, 8, 10])}}
    ks2_second = {{choice([2, 3, 4, 5, 8, 10])}}

    cnn.add(
        Conv2D(filters=({{choice([1, 2, 3, 4, 5, 8])}}),
               kernel_size=(ks1_first, ks1_second),
               padding='same',
               kernel_initializer='TruncatedNormal'))
    cnn.add(BatchNormalization())
    cnn.add(LeakyReLU())
    cnn.add(Dropout({{uniform(0, 1)}}))

    for _ in range({{choice([0, 1, 2, 3])}}):
        cnn.add(
            Conv2D(filters=({{choice([4, 8])}}),
                   kernel_size=(ks2_first, ks2_second),
                   padding='same',
                   kernel_initializer='TruncatedNormal'))
        cnn.add(BatchNormalization())
        cnn.add(LeakyReLU())
        cnn.add(Dropout({{uniform(0, 1)}}))

    cnn.add(Flatten())

    # RNN Model
    rnn = Sequential()
    rnn.add(
        CuDNNLSTM({{choice([1, 2, 3, 4, 5, 6, 7, 8])}},
                  return_sequences=True,
                  kernel_initializer='TruncatedNormal'))
    rnn.add(BatchNormalization())
    rnn.add(LeakyReLU())
    rnn.add(Dropout({{uniform(0, 1)}}))

    for _ in range({{choice([0, 1, 2, 3, 4, 8])}}):
        rnn.add(
            CuDNNLSTM({{choice([4, 8])}},
                      kernel_initializer='TruncatedNormal',
                      return_sequences=True))
        rnn.add(BatchNormalization())
        rnn.add(LeakyReLU())
        rnn.add(Dropout({{uniform(0, 1)}}))

    rnn.add(
        CuDNNLSTM({{choice([1, 2, 3, 4, 5, 6, 7, 8])}},
                  kernel_initializer='TruncatedNormal',
                  return_sequences=False))
    rnn.add(BatchNormalization())
    rnn.add(LeakyReLU())
    rnn.add(Dropout({{uniform(0, 1)}}))

    # DNN Model

    dnn = Sequential()

    for _ in range({{choice([0, 1, 2, 3, 4, 8, 16, 32])}}):
        dnn.add(
            Dense({{choice([4, 8, 16, 32, 64, 128, 256, 512])}},
                  kernel_initializer='TruncatedNormal'))
        dnn.add(BatchNormalization())
        dnn.add(LeakyReLU())
        dnn.add(Dropout({{uniform(0, 1)}}))

    for _ in range({{choice([0, 1, 2, 3, 4, 8, 16, 32])}}):
        dnn.add(
            Dense({{choice([4, 8, 16, 32, 64, 128, 256, 512])}},
                  kernel_initializer='TruncatedNormal'))
        dnn.add(BatchNormalization())
        dnn.add(LeakyReLU())
        dnn.add(Dropout({{uniform(0, 1)}}))

    for _ in range({{choice([0, 1, 2, 3, 4, 8, 16, 32])}}):
        dnn.add(
            Dense({{choice([4, 8, 16, 32, 64, 128, 256, 512])}},
                  kernel_initializer='TruncatedNormal'))
        dnn.add(BatchNormalization())
        dnn.add(LeakyReLU())
        dnn.add(Dropout({{uniform(0, 1)}}))

    for _ in range({{choice([0, 1, 2, 3, 4, 8, 16, 32])}}):
        dnn.add(
            Dense({{choice([4, 8, 16, 32, 64, 128, 256, 512])}},
                  kernel_initializer='TruncatedNormal'))
        dnn.add(BatchNormalization())
        dnn.add(LeakyReLU())
        dnn.add(Dropout({{uniform(0, 1)}}))

    dnn.add(
        Dense({{choice([8, 16, 32, 64, 128, 256, 512, 1024])}},
              kernel_initializer='TruncatedNormal'))
    dnn.add(BatchNormalization())
    dnn.add(LeakyReLU())
    dnn.add(Dropout({{uniform(0, 1)}}))

    dnn.add(Dense(1))

    # Putting it all together

    main_input = Input(shape=(
        X_train.shape[1],
        X_train.shape[2]))  # Data has been reshaped to (800, 5, 120, 60, 1)
    reshaped_to_smaller_images = Reshape(target_shape=(24, 5, X_train.shape[2],
                                                       1))(main_input)

    model = TimeDistributed(cnn)(
        reshaped_to_smaller_images)  # this should make the cnn 'run' 5 times?
    model = rnn(model)  # combine timedistributed cnn with rnn
    model = dnn(model)  # add dense

    # create the model, specify in and output
    model = Model(inputs=main_input, outputs=model)

    model.compile(loss='mse', metrics=['mape'], optimizer='nadam')

    early_stopping_monitor = EarlyStopping(
        patience=15
    )  # Not using earlystopping monitor for now, that's why patience is high

    bs = {{choice([32, 64, 128, 256])}}

    epoch_size = 1

    if bs == 32:
        epoch_size = 109
    elif bs == 64:
        epoch_size = 56
    elif bs == 128:
        epoch_size = 28
    elif bs == 256:
        epoch_size = 14

    #bs = 256
    #epoch_size = 14

    schedule = SGDRScheduler(
        min_lr={{uniform(1e-8, 1e-5)}},  #1e-5
        max_lr={{uniform(1e-3, 1e-1)}},  # 1e-2
        steps_per_epoch=np.ceil(epoch_size / bs),
        lr_decay=0.9,
        cycle_length=5,  # 5
        mult_factor=1.5)

    result = model.fit(X_train,
                       y_train,
                       batch_size=bs,
                       epochs=100,
                       verbose=1,
                       validation_split=0.2,
                       callbacks=[early_stopping_monitor, schedule])

    #get the highest validation accuracy of the training epochs

    hist = pd.DataFrame(result.history['val_loss']).fillna(
        9999)  #replace nan with 9999
    val_loss = np.amin(hist.values)

    print('Best validation loss of epoch:', val_loss)

    K.clear_session()  # Clear the tensorflow session (Free up RAM)

    return {
        'loss': val_loss,
        'status': STATUS_OK
    }  # Not returning model to save RAM
Exemplo n.º 12
0
    def model_with_padding(self, DICT, n_char):

        # get sequences and labels separated.
        # convert BIO tags to numbers
        sequences, labels = self.get_seq(DICT)

        # sequences = sequences[:100]
        # labels = labels[:100]

        # X = pad_sequences(sequences, maxlen=self.w_arit_mean, padding='post', truncating='post')
        # y_pad = pad_sequences(labels, maxlen=self.w_arit_mean, padding='post', truncating='post')

        X = pad_sequences(sequences, maxlen=self.maxSeqLength, padding='post')
        y_pad = pad_sequences(labels, maxlen=self.maxSeqLength, padding='post')

        y = [to_categorical(i, num_classes=self.lab_len) for i in y_pad]

        # early stopping and best epoch
        #early_stop = keras.callbacks.EarlyStopping(monitor='loss', patience=2, verbose=0, mode='auto')
        #filepath = "max-seq.h5"
        #checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='max')
        #callbacks_list = [checkpoint, early_stop]

        # Set up the keras model
        input = Input(shape=(self.maxSeqLength, ))
        el = Embedding(n_char + 1, 200, name="embed")(input)
        bl1 = Bidirectional(LSTM(128,
                                 return_sequences=True,
                                 recurrent_dropout=0.5,
                                 dropout=0.5),
                            merge_mode="concat",
                            name="lstm1")(el)
        bl2 = Bidirectional(LSTM(64,
                                 return_sequences=True,
                                 recurrent_dropout=0.5,
                                 dropout=0.5),
                            merge_mode="concat",
                            name="lstm2")(bl1)
        bl3 = Bidirectional(LSTM(64,
                                 return_sequences=True,
                                 recurrent_dropout=0.5,
                                 dropout=0.5),
                            merge_mode="concat",
                            name="lstm3")(bl2)
        model = TimeDistributed(Dense(self.lab_len, activation="relu"))(bl3)
        crf = CRF(self.lab_len)  # CRF layer
        out = crf(model)  # output

        model = Model(input, out)
        model.compile(optimizer="rmsprop",
                      loss=crf.loss_function,
                      metrics=[crf.accuracy])
        model.summary()

        #treinar com 32, 147, 245, 735
        history = model.fit(X,
                            np.array(y),
                            batch_size=32,
                            epochs=self.epochsN,
                            validation_split=0.0,
                            verbose=1)
        # save all epochs
        save_load_utils.save_all_weights(model,
                                         'max_seq_%s_32b.h5' % self.epochsN)
Exemplo n.º 13
0
    def model_no_padding(self, DICT, n_char):

        # convert BIO tags to numbers
        self.convert_tags()
        '''
        check if bion contains 'B' and 'I'
        for i in self.train_data:
            print(i['bion'])
        '''

        for i in range(len(self.train_data)):
            corp = self.train_data[i]['corpus']

            corp_num = []
            for c in corp:
                corp_num.append(DICT.get(c))
            self.train_data[i]['corpus'] = corp_num

        # get all sizes from the sequences with training data
        train_l_d = {}
        train_l_labels = {}
        for seq in self.train_data:
            # corpus
            l = len(seq['corpus'])
            if l not in train_l_d: train_l_d[l] = []
            train_l_d[l].append(seq['corpus'])

            # labels
            l1 = len(seq['bion'])
            if l1 not in train_l_labels: train_l_labels[l1] = []
            train_l_labels[l1].append(seq['bion'])
        '''
        for i in range(len(train_l_d[110])):
            print(len(train_l_d[110][i]) == len(train_l_labels[110][i]))
            print()
        print("\n\n")

        for i in range(len(train_l_d[31])):
            print(len(train_l_d[31][i]) == len(train_l_labels[31][i]))
        print("\n\n")

        for i in range(len(train_l_d[103])):
            print(len(train_l_d[103][i]) == len(train_l_labels[103][i]))
        print("\n\n")
        exit()
        '''
        sizes = list(train_l_d.keys())

        # Set up the keras model
        il = Input(shape=(None, ), dtype='int32')
        el = Embedding(n_char + 1, 200, name="embed")(il)
        bl1 = Bidirectional(LSTM(128,
                                 return_sequences=True,
                                 recurrent_dropout=0.5,
                                 dropout=0.5),
                            merge_mode="concat",
                            name="lstm1")(el)
        bl2 = Bidirectional(LSTM(64,
                                 return_sequences=True,
                                 recurrent_dropout=0.5,
                                 dropout=0.5),
                            merge_mode="concat",
                            name="lstm2")(bl1)
        bl3 = Bidirectional(LSTM(64,
                                 return_sequences=True,
                                 recurrent_dropout=0.5,
                                 dropout=0.5),
                            merge_mode="concat",
                            name="lstm3")(bl2)
        model = TimeDistributed(Dense(self.num_labs, activation="relu"))(bl3)
        crf = CRF(self.num_labs)  # CRF layer
        out = crf(model)  # output

        model = Model(il, out)
        model.compile(optimizer="rmsprop",
                      loss=crf.loss_function,
                      metrics=[crf.accuracy])
        model.summary()

        f_best = -1
        f_index = -1
        # OK, start actually training
        for epoch in range(self.epochsN):
            print("Epoch", epoch, "start at", datetime.now())
            # Train in batches of different sizes - randomize the order of sizes
            # Except for the first few epochs
            if epoch > 2:
                random.shuffle(sizes)
            for size in sizes:
                batch = train_l_d[size]
                labs = train_l_labels[size]

                tx = np.array([seq for seq in batch])
                y = [seq for seq in labs]

                ty = [to_categorical(i, num_classes=self.num_labs) for i in y]

                # This trains in mini-batches
                model.fit(tx, np.array(ty), verbose=0, epochs=1)
            print("Trained at", datetime.now())

            # save all epochs
            save_load_utils.save_all_weights(
                model, 'mini-batch-results/epoch_%s.h5' % epoch)
            # test the results
            self.test_minibatch(DICT, model)
            f = self.eval()

            if f > f_best:
                f_best = f
                f_index = epoch

        # Pick the best model, and save it with a useful name
        print("Choosing the best epoch")
        shutil.copyfile("mini-batch-results/epoch_%s.h5" % f_index,
                        "minibatch_%s.h5" % f_index)