df = pd.read_csv('./data/raw/DJIA_table.csv')

scaler = StandardScaler()
data = scaler.fit_transform((df['Close'] - df['Open']).values.reshape(-1, 1))

X = data[:-1]
y = data[1:]

X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False)

train_data_gen = TimeseriesGenerator(X_train, y_train, length=window_size, batch_size=batch_size, shuffle=False)
test_data_gen = TimeseriesGenerator(X_test, y_test, length=window_size, batch_size=batch_size, shuffle=False)

model = Sequential()
model.add(CuDNNGRU(4, input_shape=(window_size, 1,)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
history = model.fit_generator(train_data_gen, epochs=epochs).history

index = [df['Open'][0]]
for i, d in enumerate(scaler.inverse_transform(data)):
    index.append(index[i] + d)

index_train = [df['Open'][0]]
for i, d in enumerate(scaler.inverse_transform(model.predict_generator(train_data_gen))):
    index_train.append(index_train[i] + d)

index_test = [index_train[-1]]
for i, d in enumerate(scaler.inverse_transform(model.predict_generator(test_data_gen))):
    index_test.append(index_test[i] + d)
    def build(input_shape, n_classes, train=True):
        '''
        input就是generator每次yield的
        inputs = {'the_input': train_batch,  # 样本图像批
                  'the_labels': labels,  # 样本类别序列批
                  'input_length': input_length,  # RNN输入长度批
                  'label_length': label_length}  # 类别序列长度批
        '''
        if K.image_data_format() == "channels_first":
            chanDim = 1
        else:
            chanDim = -1
        # input: (h, w, n_channels), kernel: (h, w)
        input_data = Input(name='the_input',
                           shape=input_shape,
                           dtype='float32')
        x = Conv2D(64, kernel_size=(3, 3), activation='relu',
                   padding='same')(input_data)
        x = BatchNormalization(axis=chanDim)(x)
        x = Conv2D(64, kernel_size=(3, 3), activation='relu',
                   padding='same')(x)
        x = BatchNormalization(axis=chanDim)(x)
        shortcut = Conv2D(64,
                          kernel_size=(1, 1),
                          activation='relu',
                          padding='same')(input_data)
        x1 = add([shortcut, x])
        x = Conv2D(64, kernel_size=(3, 3), activation='relu',
                   padding='same')(x1)
        x = BatchNormalization(axis=chanDim)(x)
        x = Conv2D(64, kernel_size=(3, 3), activation='relu',
                   padding='same')(x)
        x = BatchNormalization(axis=chanDim)(x)
        shortcut = Conv2D(64,
                          kernel_size=(1, 1),
                          activation='relu',
                          padding='same')(x1)
        x1 = add([x, shortcut])
        x = Conv2D(64, kernel_size=(3, 3), activation='relu',
                   padding='same')(x1)
        x = BatchNormalization(axis=chanDim)(x)
        x1 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='pool1')(x)

        x = Conv2D(128, kernel_size=(3, 3), activation='relu',
                   padding='same')(x1)
        x = BatchNormalization(axis=chanDim)(x)
        x = Conv2D(128, kernel_size=(3, 3), activation='relu',
                   padding='same')(x)
        x = BatchNormalization(axis=chanDim)(x)
        shortcut = Conv2D(128,
                          kernel_size=(1, 1),
                          activation='relu',
                          padding='same')(x1)
        x1 = add([x, shortcut])
        x = Conv2D(128, kernel_size=(3, 3), activation='relu',
                   padding='same')(x1)
        x = BatchNormalization(axis=chanDim)(x)
        x = Conv2D(128, kernel_size=(3, 3), activation='relu',
                   padding='same')(x)
        x = BatchNormalization(axis=chanDim)(x)
        shortcut = Conv2D(128,
                          kernel_size=(1, 1),
                          activation='relu',
                          padding='same')(x1)
        x1 = add([x, shortcut])
        x1 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='pool2')(x1)

        x = Conv2D(256, kernel_size=(3, 3), activation='relu',
                   padding='same')(x1)
        x = BatchNormalization(axis=chanDim)(x)
        x = Conv2D(256, kernel_size=(3, 3), activation='relu',
                   padding='same')(x)
        x = BatchNormalization(axis=chanDim)(x)
        shortcut = Conv2D(256,
                          kernel_size=(1, 1),
                          activation='relu',
                          padding='same')(x1)
        x1 = add([x, shortcut])
        x = Conv2D(256, kernel_size=(3, 3), activation='relu',
                   padding='same')(x1)
        x = BatchNormalization(axis=chanDim)(x)
        x = Conv2D(256, kernel_size=(3, 3), activation='relu',
                   padding='same')(x)
        x = BatchNormalization(axis=chanDim)(x)
        shortcut = Conv2D(256,
                          kernel_size=(1, 1),
                          activation='relu',
                          padding='same')(x1)
        x = add([x, shortcut])
        x = ZeroPadding2D(padding=(0, 1), name='pad1')(x)  # 只补宽度,不补高度
        x1 = MaxPooling2D(pool_size=(2, 2), strides=(2, 1), name='pool3')(x)

        x = Conv2D(512, kernel_size=(3, 3), activation='relu',
                   padding='same')(x1)
        x = BatchNormalization(axis=chanDim)(x)
        x = Conv2D(512, kernel_size=(3, 3), activation='relu',
                   padding='same')(x)
        x = BatchNormalization(axis=chanDim)(x)
        shortcut = Conv2D(512,
                          kernel_size=(1, 1),
                          activation='relu',
                          padding='same')(x1)
        x1 = add([x, shortcut])
        x = Conv2D(512, kernel_size=(3, 3), activation='relu',
                   padding='same')(x1)
        x = BatchNormalization(axis=chanDim)(x)
        x = Conv2D(512, kernel_size=(3, 3), activation='relu',
                   padding='same')(x)
        x = BatchNormalization(axis=chanDim)(x)
        shortcut = Conv2D(512,
                          kernel_size=(1, 1),
                          activation='relu',
                          padding='same')(x1)
        x = add([x, shortcut])
        x = ZeroPadding2D(padding=(0, 1), name='pad2')(x)  # 只补宽度,不补高度
        x = MaxPooling2D(pool_size=(2, 2), strides=(2, 1), name='pool4')(x)
        x = Conv2D(512,
                   kernel_size=(2, 2),
                   strides=(1, 1),
                   activation='relu',
                   padding='valid')(x)

        # 最后一层的尺寸:(高, 宽, 深), POOL和conv的补边、步长决定宽高,kernel数量决定深度
        shape = x.get_shape()
        # conv_to_rnn_dims = (int(shape[1]), int(shape[2]) * int(shape[3]))

        # CONV模块最后的特征图的每一列作为RNN输入序列的一元
        # cnn_feature = Reshape(target_shape=conv_to_rnn_dims, name='map2seq')(x)
        x = Permute((2, 1, 3))(x)
        x = TimeDistributed(Flatten(), name='timedistrib')(x)
        # 2层双向RNN
        # x = Bidirectional(GRU(256, return_sequences=True, implementation=2), name='bi-lstm1')(cnn_out)
        #x = Dense(int(shape[1]) * int(shape[3]), name='bi-lstm1_out')(x)
        # 第2层可能导致过拟合
        #rnn_out = Bidirectional(GRU(256, return_sequences=True, implementation=2), name='bi-lstm2')(x)
        rnn_f = CuDNNGRU(256, return_sequences=True, name='rnn1_f')(x)
        rnn_b = CuDNNGRU(256,
                         return_sequences=True,
                         go_backwards=True,
                         name='rnn1_b')(x)
        x = concatenate([rnn_f, rnn_b])

        # 全连接神经元数量=字符类别数+1(+1 for blank token)
        x = Dense(n_classes, name='dense')(x)
        # softmax层
        y_pred = Activation('softmax', name='softmax')(x)
        # CTC的输入序列长度和及其对应的类别序列长度
        # 原始序列长度必须小于等于CTC的输出序列长度,保证每个输入时刻最多对应一个类别
        # input_length是y_pred的长度,即送入ctc的长度也就是卷积层最后的宽度
        input_length = Input(name='input_length', shape=[1], dtype='int64')
        label_length = Input(name='label_length', shape=[1], dtype='int64')

        labels = Input(name='the_labels',
                       shape=[cfg.max_label_len],
                       dtype='float32')

        # ctc层
        # Keras doesn't currently support loss funcs with extra parameters
        # so CTC loss is implemented in a lambda layer
        loss_out = Lambda(ctc_lambda_func, output_shape=(1, ), name='ctc')(
            [y_pred, labels, input_length, label_length])

        if train == True:
            # 训练时需要labels, input_length, label_length计算ctc损失
            model = Model(
                inputs=[input_data, labels, input_length, label_length],
                outputs=loss_out)
        else:
            # 测试时只需要输入数据和预测输出
            model = Model(inputs=input_data, outputs=y_pred)
        """
        # 获取softmax层的输出,在可视化过程中用于解码验证,代替model.predict()
        # inputs: List of placeholder tensors.
        # outputs: List of output tensors.
        """
        test_func = K.function([input_data],
                               [y_pred])  # [input_data]是tensor input_data的list
        return model, y_pred, test_func
Пример #3
0
def rnn(embedding_matrix, config):
    if config['rnn'] == 'gru' and config['gpu']:
        encode = Bidirectional(
            CuDNNGRU(config['rnn_output_size'], return_sequences=True))
        encode2 = Bidirectional(
            CuDNNGRU(config['rnn_output_size'], return_sequences=True))
        encode3 = Bidirectional(
            CuDNNGRU(config['rnn_output_size'], return_sequences=True))
    else:
        encode = Bidirectional(
            CuDNNLSTM(config['rnn_output_size'], return_sequences=True))
        encode2 = Bidirectional(
            CuDNNLSTM(config['rnn_output_size'] * 2, return_sequences=True))
        encode3 = Bidirectional(
            CuDNNGRU(config['rnn_output_size'] * 4, return_sequences=True))

    q1 = Input(shape=(config['max_length'], ), dtype='int32', name='q1_input')
    q2 = Input((config['max_length'], ), dtype='int32', name='q2_input')
    embedding_layer = Embedding(embedding_matrix.shape[0],
                                embedding_matrix.shape[1],
                                trainable=config['embed_trainable'],
                                weights=[embedding_matrix]
                                # mask_zero=True
                                )

    q1_embed = embedding_layer(q1)
    q2_embed = embedding_layer(q2)  # bsz, 1, emb_dims
    q1_embed = BatchNormalization(axis=2)(q1_embed)
    q2_embed = BatchNormalization(axis=2)(q2_embed)
    q1_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q1_embed)
    q2_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q2_embed)

    q1_encoded = encode(q1_embed)
    q2_encoded = encode(q2_embed)
    q1_encoded = Dropout(0.2)(q1_encoded)
    q2_encoded = Dropout(0.2)(q2_encoded)
    # 双向
    #     q1_encoded = encode2(q1_encoded)
    #     q2_encoded = encode2(q2_encoded)
    # resnet
    rnn_layer2_input1 = concatenate([q1_embed, q1_encoded])
    rnn_layer2_input2 = concatenate([q2_embed, q2_encoded])
    q1_encoded2 = encode2(rnn_layer2_input1)
    q2_encoded2 = encode2(rnn_layer2_input2)

    # add res shortcut
    res_block1 = add([q1_encoded, q1_encoded2])
    res_block2 = add([q2_encoded, q2_encoded2])
    rnn_layer3_input1 = concatenate([q1_embed, res_block1])
    rnn_layer3_input2 = concatenate([q2_embed, res_block2])
    #     rnn_layer3_input1 = concatenate([q1_embed,q1_encoded,q1_encoded2])
    #     rnn_layer3_input2 = concatenate([q2_embed,q2_encoded,q2_encoded2])
    q1_encoded3 = encode3(rnn_layer3_input1)
    q2_encoded3 = encode3(rnn_layer3_input2)
    #     merged1 = GlobalMaxPool1D()(q1_encoded3)
    #     merged2 = GlobalMaxPool1D()(q2_encoded3)
    #     q1_encoded = concatenate([q1_encoded, q1_encoded2], axis=-1)
    #     q2_encoded = concatenate([q2_encoded, q2_encoded2], axis=-1)

    #     merged1 = concatenate([q1_encoded2, q1_embed], axis=-1)
    #     merged2 = concatenate([q2_encoded2, q2_embed], axis=-1)
    #     # TODO add attention rep , maxpooling rep
    q1_encoded3 = concatenate([q1_encoded, q1_encoded2, q1_encoded3])
    q2_encoded3 = concatenate([q2_encoded, q2_encoded2, q2_encoded3])
    merged1 = GlobalMaxPool1D()(q1_encoded3)
    merged2 = GlobalMaxPool1D()(q2_encoded3)
    # avg1 = GlobalAvgPool1D()(q1_encoded3)
    # avg2 = GlobalAvgPool1D()(q2_encoded3)
    # merged1 = concatenate([max1,avg1])
    # merged2 = concatenate([max2,avg2])
    sub_rep = Lambda(lambda x: K.abs(x[0] - x[1]))([merged1, merged2])
    mul_rep = Lambda(lambda x: x[0] * x[1])([merged1, merged2])
    #     jaccard_rep = Lambda(lambda x: x[0]*x[1]/(K.sum(x[0]**2,axis=1,keepdims=True)+K.sum(x[1]**2,axis=1,keepdims=True)-
    #                                               K.sum(K.abs(x[0]*x[1]),axis=1,keepdims=True)))([merged1,merged2])
    #     merged = Concatenate()([merged1, merged2, mul_rep, sub_rep,jaccard_rep])
    feature_input = Input(shape=(config['feature_length'], ))
    feature_dense = BatchNormalization()(feature_input)
    feature_dense = Dense(config['dense_dim'],
                          activation='relu')(feature_dense)

    merged = Concatenate()([merged1, merged2, mul_rep, sub_rep, feature_dense])
    # Classifier
    dense = Dropout(config['dense_dropout'])(merged)
    dense = BatchNormalization()(dense)
    dense = Dense(config['dense_dim'], activation='relu')(dense)
    dense = Dropout(config['dense_dropout'])(dense)
    dense = BatchNormalization()(dense)
    predictions = Dense(1, activation='sigmoid')(dense)
    model = Model(inputs=[q1, q2, feature_input], outputs=predictions)
    opt = optimizers.get(config['optimizer'])
    K.set_value(opt.lr, config['learning_rate'])
    model.compile(optimizer=opt, loss='binary_crossentropy', metrics=[f1])
    return model
Пример #4
0
print(model.summary())

model.fit(x_train_pad, y_train, 
	epochs=n_epochs, batch_size=batch_size, 
	validation_split=0.05, 
	callbacks=[callback_early_stopping])

eval_ = model.evaluate(x_test_pad, y_test)
print(eval_)

model.save('sentiment_lstm')

model_GRU = Sequential()
model_GRU.add(Embedding(input_dim = num_words, output_dim=embedding_size, 
	input_length=max_len, name = 'layer_embedding'))
model_GRU.add(CuDNNGRU(units=16, return_sequences=True))
model_GRU.add(CuDNNGRU(units=8, return_sequences=True))
model_GRU.add(CuDNNGRU(units=4, return_sequences=False))
model_GRU.add(Dense(1, activation='sigmoid'))
print(model_GRU.summary())

model_GRU.compile(optimizer = 'rmsprop', loss = 'binary_crossentropy', metrics = ['accuracy'])
model_GRU.fit(x_train_pad, y_train, 
	epochs=n_epochs, batch_size=batch_size, 
	validation_split=0.05, 
	)

eval_GRU = model.evaluate(x_test_pad, y_test)
print(eval_GRU)

y_pred = model.predict(x_test_pad[:1000])
Пример #5
0
X_test = X_test.todense()

train_generator = TimeseriesGenerator(X_train,
                                      y_train,
                                      length=window_size,
                                      batch_size=batch_size,
                                      shuffle=False)
test_generator = TimeseriesGenerator(X_test,
                                     y_test,
                                     length=window_size,
                                     batch_size=1,
                                     shuffle=False)

model = Sequential()
model.add(CuDNNGRU(128, input_shape=(
    window_size,
    X_train.shape[1],
)))
model.add(Dense(64, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(y_train.shape[1], activation='softmax'))

# Run training
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.fit_generator(train_generator, epochs=epochs)
print(model.evaluate_generator(test_generator))

y_true = np.argmax(y_test[window_size:], axis=1)
y_pred = np.argmax(model.predict_generator(test_generator), axis=1)
Пример #6
0
    def create_model(self):

        tdat_input = Input(shape=(self.tdatlen, ))
        pdat_input = Input(shape=(self.pdatlen, self.config['psdatlen'],
                                  self.config['pstdatlen']))
        sml_input = Input(shape=(self.smllen, ))
        com_input = Input(shape=(self.comlen, ))

        tdel = Embedding(output_dim=self.embdims,
                         input_dim=self.tdatvocabsize,
                         mask_zero=False)
        tde = tdel(tdat_input)

        tenc = CuDNNGRU(self.recdims, return_state=True, return_sequences=True)
        tencout, tstate_h = tenc(tde)

        de = Embedding(output_dim=self.embdims,
                       input_dim=self.comvocabsize,
                       mask_zero=False)(com_input)
        dec = CuDNNGRU(self.recdims, return_sequences=True)
        decout = dec(de, initial_state=tstate_h)

        se = Embedding(output_dim=self.smldims,
                       input_dim=self.smlvocabsize,
                       mask_zero=False)(sml_input)
        se_enc = CuDNNGRU(self.recdims,
                          return_state=True,
                          return_sequences=True)
        seout, state_sml = se_enc(se)
        ast_attn = dot([decout, seout], axes=[2, 2])
        ast_attn = Activation('softmax')(ast_attn)

        acontext = dot([ast_attn, seout], axes=[2, 1])

        tattn = dot([decout, tencout], axes=[2, 2])
        tattn = Activation('softmax')(tattn)

        tcontext = dot([tattn, tencout], axes=[2, 1])

        semb = TimeDistributed(tdel)
        #adding project context information as a time distributed sdat embedding
        pemb = TimeDistributed(semb)
        pde = pemb(pdat_input)
        senc = TimeDistributed(CuDNNGRU(int(self.recdims)))
        psenc = TimeDistributed(senc)
        psencout = psenc(pde)
        penc = TimeDistributed(CuDNNGRU(int(self.recdims)))
        pencout = penc(psencout)

        #pdats attention
        pattn = dot([decout, pencout], axes=[2, 2])
        pattn = Activation('softmax')(pattn)

        pcontext = dot([pattn, pencout], axes=[2, 1])
        # the context vector receives attention from the project and file context information along with the ast, tdats and decoder output
        context = concatenate([pcontext, tcontext, acontext, decout])

        out = TimeDistributed(Dense(self.tdddims, activation="relu"))(context)

        out = Flatten()(out)
        out1 = Dense(self.comvocabsize, activation="softmax")(out)

        model = Model(inputs=[tdat_input, pdat_input, sml_input, com_input],
                      outputs=out1)

        if self.config['multigpu']:
            model = keras.utils.multi_gpu_model(model, gpus=2)

        model.compile(loss='categorical_crossentropy',
                      optimizer='adamax',
                      metrics=['accuracy'])

        return self.config, model
Пример #7
0
def __run_s2s(sessions_i,
              sessions_t,
              num_songs,
              song_ix,
              max_l,
              NUM_DIM=128,
              BATCH_SIZE=128,
              EPOCHS=50,
              MODEL='RNN',
              WINDOW_SIZE=5):
    X, y = sessions_i, sessions_t
    num_encoder_songs, num_decoder_songs = num_songs
    song_ix_i, song_ix_t = song_ix
    max_length_i, max_length_t = max_l

    def generate_batch(X, y, batch_size=128):
        while True:
            for j in range(0, len(X), batch_size):
                encoder_input_data = np.zeros((batch_size, max_length_i),
                                              dtype='float32')
                decoder_input_data = np.zeros((batch_size, max_length_t),
                                              dtype='float32')
                decoder_target_data = np.zeros(
                    (batch_size, max_length_t, num_decoder_songs),
                    dtype='float32')
                for i, (input_sequence, target_sequence) in enumerate(
                        zip(X[j:j + batch_size], y[j:j + batch_size])):
                    for t, word in enumerate(input_sequence.split()):
                        encoder_input_data[
                            i, t] = song_ix_i[word] if word != '-' else 0
                    for t, word in enumerate(target_sequence.split()):
                        if t < len(target_sequence.split()) - 1:
                            decoder_input_data[
                                i, t] = song_ix_t[word] if word != '-' else 0
                        if t > 0:
                            decoder_target_data[
                                i, t - 1,
                                song_ix_t[word] if word != '-' else 0] = 1
                yield ([encoder_input_data,
                        decoder_input_data], decoder_target_data)

    np.random.shuffle(X)
    np.random.shuffle(y)

    X_train, X_test = X[int(len(X) * .1):], X[:int(len(X) * .1)]
    y_train, y_test = y[int(len(y) * .1):], y[:int(len(y) * .1)]

    TRAIN_SAMPLES = len(X_train)
    VAL_SAMPLES = len(X_test)

    ENCODER_INPUT = Input(shape=(None, ))
    ENCODER_EMBEDDING = Embedding(num_encoder_songs, NUM_DIM)(ENCODER_INPUT)
    if MODEL == 'LSTM':
        ENCODER_NN = CuDNNLSTM(NUM_DIM, return_state=True)
        _, state_h, state_c = ENCODER_NN(ENCODER_EMBEDDING)
        ENCODER_STATE = [state_h, state_c]
    if MODEL == 'GRU':
        ENCODER_NN = CuDNNGRU(NUM_DIM, return_state=True)
        _, ENCODER_STATE = ENCODER_NN(ENCODER_EMBEDDING)
    if MODEL == 'RNN':
        ENCODER_NN = SimpleRNN(NUM_DIM, return_state=True)
        _, ENCODER_STATE = ENCODER_NN(ENCODER_EMBEDDING)

    DECODER_INPUT = Input(shape=(None, ))
    DECODER_EMBEDDING = Embedding(num_decoder_songs, NUM_DIM)(DECODER_INPUT)
    if MODEL == 'LSTM':
        DECODER_NN = CuDNNLSTM(NUM_DIM,
                               return_sequences=True,
                               return_state=True)
        DECODER_OUTPUT, _, _ = DECODER_NN(DECODER_EMBEDDING,
                                          initial_state=ENCODER_STATE)
    if MODEL == 'GRU':
        DECODER_NN = CuDNNGRU(NUM_DIM,
                              return_sequences=True,
                              return_state=True)
        DECODER_OUTPUT, _ = DECODER_NN(DECODER_EMBEDDING,
                                       initial_state=ENCODER_STATE)
    if MODEL == 'RNN':
        DECODER_NN = SimpleRNN(NUM_DIM,
                               return_sequences=True,
                               return_state=True)
        DECODER_OUTPUT, _ = DECODER_NN(DECODER_EMBEDDING,
                                       initial_state=ENCODER_STATE)
    DENSE_DECODER = Dense(num_decoder_songs, activation='softmax')
    DECODER_OUTPUT = DENSE_DECODER(DECODER_OUTPUT)

    es = EarlyStopping(monitor='val_acc', mode='max', verbose=1, patience=5)

    model = Model([ENCODER_INPUT, DECODER_INPUT], DECODER_OUTPUT)
    model.compile(optimizer='rmsprop',
                  loss='categorical_crossentropy',
                  metrics=['acc'])
    model.summary()
    model.fit_generator(generator=generate_batch(X_train,
                                                 y_train,
                                                 batch_size=BATCH_SIZE),
                        steps_per_epoch=TRAIN_SAMPLES // BATCH_SIZE,
                        epochs=EPOCHS,
                        validation_data=generate_batch(X_test,
                                                       y_test,
                                                       batch_size=BATCH_SIZE),
                        validation_steps=VAL_SAMPLES // BATCH_SIZE,
                        callbacks=[es])

    return Model(ENCODER_INPUT, ENCODER_STATE), generate_batch
Пример #8
0
    def init_model(self, input_shape, num_classes, **kwargs):
        freq_axis = 2
        channel_axis = 3
        channel_size = 128
        min_size = min(input_shape[:2])
        melgram_input = Input(shape=input_shape)
        # x = ZeroPadding2D(padding=(0, 37))(melgram_input)
        # x = BatchNormalization(axis=freq_axis, name='bn_0_freq')(x)

        # Conv block 1
        x = Convolution2D(filters=64,
                          kernel_size=3,
                          strides=1,
                          padding='same',
                          name='conv1',
                          trainable=True)(melgram_input)
        x = BatchNormalization(axis=channel_axis, name='bn1',
                               trainable=True)(x)
        x = ELU()(x)
        x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='pool1')(x)
        x = Dropout(0.1, name='dropout1')(x)

        # Conv block 2
        x = Convolution2D(filters=channel_size,
                          kernel_size=3,
                          strides=1,
                          padding='same',
                          name='conv2')(x)
        x = BatchNormalization(axis=channel_axis, name='bn2')(x)
        x = ELU()(x)
        x = MaxPooling2D(pool_size=(3, 3), strides=(3, 3), name='pool2')(x)
        x = Dropout(0.1, name='dropout2')(x)

        # Conv block 3
        x = Convolution2D(filters=channel_size,
                          kernel_size=3,
                          strides=1,
                          padding='same',
                          name='conv3')(x)
        x = BatchNormalization(axis=channel_axis, name='bn3')(x)
        x = ELU()(x)
        x = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), name='pool3')(x)
        x = Dropout(0.1, name='dropout3')(x)

        if min_size // 24 >= 4:
            # Conv block 4
            x = Convolution2D(filters=channel_size,
                              kernel_size=3,
                              strides=1,
                              padding='same',
                              name='conv4')(x)
            x = BatchNormalization(axis=channel_axis, name='bn4')(x)
            x = ELU()(x)
            x = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), name='pool4')(x)
            x = Dropout(0.1, name='dropout4')(x)

        x = Reshape((-1, channel_size))(x)

        gru_units = 32
        if num_classes > 32:
            gru_units = int(num_classes * 1.5)
        # GRU block 1, 2, output
        x = CuDNNGRU(gru_units, return_sequences=True, name='gru1')(x)
        x = CuDNNGRU(gru_units, return_sequences=False, name='gru2')(x)
        x = Dropout(0.3)(x)
        outputs = Dense(num_classes, activation='softmax', name='output')(x)

        model = TFModel(inputs=melgram_input, outputs=outputs)
        optimizer = optimizers.Adam(
            # learning_rate=1e-3,
            lr=1e-3,
            beta_1=0.9,
            beta_2=0.999,
            epsilon=1e-08,
            decay=1e-4,
            amsgrad=True)
        model.compile(optimizer=optimizer,
                      loss="sparse_categorical_crossentropy",
                      metrics=['accuracy'])
        model.summary()
        self._model = model
        self.is_init = True
Пример #9
0
def basic_crnn_2d(rows, cols, channels, num_classes):
    kernel_size_7 = (7, 7)
    kernel_size_5 = (5, 5)
    kernel_size_3 = (3, 3)

    pool_size = (3, 3)

    activ = 'relu'

    input_1 = Input(shape=[rows, cols, channels])

    input_2 = Input(shape=[row, cols, channels])

    print input_1.shape

    print input_2.shape

    x = Conv2D(16, kernel_size=kernel_size_7, padding='same')(input_1)
    x = BatchNormalization()(x)
    x = Activation(activ)(x)
    x = MaxPooling2D(pool_size, strides=(2, 1), padding='same')(x)

    print x.shape

    x = Conv2D(32, kernel_size=kernel_size_5, padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation(activ)(x)
    x = MaxPooling2D(pool_size, strides=(2, 1), padding='same')(x)

    print x.shape

    x = Conv2D(32, kernel_size=kernel_size_3, padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation(activ)(x)
    x = MaxPooling2D(pool_size, strides=(2, 1), padding='same')(x)

    print x.shape

    x = Conv2D(32, kernel_size=kernel_size_3, padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation(activ)(x)
    x = MaxPooling2D(pool_size, strides=(2, 1), padding='same')(x)

    print x.shape

    x = Conv2D(32, kernel_size=kernel_size_3, padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation(activ)(x)
    x = MaxPooling2D(pool_size, strides=(2, 1), padding='same')(x)

    print x.shape

    x = Permute((2, 1, 3))(x)
    x = Reshape((126, 5 * 32))(x)

    print x.shape

    x = Bidirectional(CuDNNGRU(126, return_sequences=True))(x)
    x = Bidirectional(CuDNNGRU(126, return_sequences=False))(x)

    print x.shape

    #x = Dropout(0.25) (x)

    final = Dense(num_classes)(x)

    outputs = Activation('sigmoid', name='target')(final)

    model = Model([input_1], [outputs])

    model.compile(optimizer=opt,
                  loss=['binary_crossentropy'],
                  metrics=acc_dcf_metric_list)

    return model
Пример #10
0
def _add_GRU(model, layer_num=1, drop_out=0.2):
    for _ in range(layer_num):
        model.add(CuDNNGRU(100))
        model.add(Dropout(drop_out))
    return model
Пример #11
0
def capsulnet_model(batch_size, nb_epoch, hidden_dim, num):
    Routings = 15
    Num_capsule = 30
    Dim_capsule = 60

    sequence_input = Input(shape=(maxlen, ), dtype='int32')
    embedded_sequences = Embedding(input_dim=max_features,
                                   output_dim=num_features,
                                   input_length=maxlen,
                                   weights=[W],
                                   trainable=False)(sequence_input)
    embedded_sequences = SpatialDropout1D(0.1)(embedded_sequences)
    x = Bidirectional(CuDNNGRU(64, return_sequences=True))(embedded_sequences)
    x = Bidirectional(CuDNNGRU(64, return_sequences=True))(x)
    capsule = Capsule(num_capsule=Num_capsule,
                      dim_capsule=Dim_capsule,
                      routings=Routings,
                      share_weights=True)(x)
    # output_capsule = Lambda(lambda x: K.sqrt(K.sum(K.square(x), 2)))(capsule)
    capsule = Flatten()(capsule)
    capsule = Dropout(0.4)(capsule)
    output = Dense(2, activation='softmax')(capsule)
    model = Model(inputs=[sequence_input], outputs=output)
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy', f1])
    # checkpointer = ModelCheckpoint(filepath="weights.hdf5", monitor='val_acc', verbose=1, save_best_only=True)
    # early_stopping = EarlyStopping(monitor='val_acc', patience = 5, verbose=1)
    class_weight = {0: 1, 1: 7}

    # train_num, test_num = X_train.shape[0], X_dev.shape[0]
    train_num, test_num = X_train.shape[0], X_test.shape[0]
    num1 = y_train.shape[1]

    second_level_train_set = np.zeros((train_num, num1))  # (10556,)

    second_level_test_set = np.zeros((test_num, num1))  # (2684,)

    test_nfolds_sets = []

    # kf = KFold(n_splits=5)
    kf = KFold(n_splits=5)

    for i, (train_index, test_index) in enumerate(kf.split(X_train)):
        x_tra, y_tra = X_train[train_index], y_train[train_index]

        x_tst, y_tst = X_train[test_index], y_train[test_index]

        # checkpointer = ModelCheckpoint(filepath="weights.hdf5", monitor='val_acc', verbose=1, save_best_only=True)
        early_stopping = EarlyStopping(monitor='val_acc',
                                       patience=8,
                                       verbose=1)

        model.fit(x_tra,
                  y_tra,
                  validation_data=[x_tst, y_tst],
                  batch_size=batch_size,
                  epochs=nb_epoch,
                  verbose=2,
                  class_weight=class_weight,
                  callbacks=[early_stopping])

        second_level_train_set[test_index] = model.predict(
            x_tst, batch_size=batch_size
        )  # (2112,2) could not be broadcast to indexing result of shape (2112,)

        test_nfolds_sets.append(model.predict(X_test))
    for item in test_nfolds_sets:
        second_level_test_set += item

    second_level_test_set = second_level_test_set / 5

    model.save("weights_BB_capsulnet_lstm" + num + ".hdf5")

    y_pred = second_level_test_set

    return y_pred
Пример #12
0
    def create_model(self):

        tdat_input = Input(shape=(self.tdatlen, ))
        sdat_input = Input(shape=(self.sdatlen, self.config['stdatlen']))
        sml_input = Input(shape=(self.smllen, ))
        com_input = Input(shape=(self.comlen, ))

        tdel = Embedding(output_dim=self.embdims,
                         input_dim=self.tdatvocabsize,
                         mask_zero=False)
        tde = tdel(tdat_input)

        tenc = CuDNNGRU(self.recdims, return_state=True, return_sequences=True)
        tencout, tstate_h = tenc(tde)

        de = Embedding(output_dim=self.embdims,
                       input_dim=self.comvocabsize,
                       mask_zero=False)(com_input)
        dec = CuDNNGRU(self.recdims, return_sequences=True)
        decout = dec(de, initial_state=tstate_h)

        se = Embedding(output_dim=self.smldims,
                       input_dim=self.smlvocabsize,
                       mask_zero=False)(sml_input)
        se_enc = CuDNNGRU(self.recdims,
                          return_state=True,
                          return_sequences=True)
        seout, state_sml = se_enc(se)

        ast_attn = dot([decout, seout], axes=[2, 2])
        ast_attn = Activation('softmax')(ast_attn)

        acontext = dot([ast_attn, seout], axes=[2, 1])

        tattn = dot([decout, tencout], axes=[2, 2])
        tattn = Activation('softmax')(tattn)

        tcontext = dot([tattn, tencout], axes=[2, 1])

        # Adding file context information to ast-attendgru model
        # shared embedding between tdats and sdats
        semb = TimeDistributed(tdel)
        sde = semb(sdat_input)

        # sdats encoder
        senc = TimeDistributed(CuDNNGRU(int(self.recdims)))
        senc = senc(sde)

        # attention to sdats
        sattn = dot([decout, senc], axes=[2, 2])
        sattn = Activation('softmax')(sattn)

        scontext = dot([sattn, senc], axes=[2, 1])

        # context vector has teh result of attention to sdats along with ast, tdats and decoder output vectors
        context = concatenate([scontext, tcontext, acontext, decout])

        out = TimeDistributed(Dense(self.tdddims, activation="relu"))(context)

        out = Flatten()(out)
        out1 = Dense(self.comvocabsize, activation="softmax")(out)

        model = Model(inputs=[tdat_input, sdat_input, com_input, sml_input],
                      outputs=out1)

        if self.config['multigpu']:
            model = keras.utils.multi_gpu_model(model, gpus=2)

        model.compile(loss='categorical_crossentropy',
                      optimizer='adamax',
                      metrics=['accuracy'])
        return self.config, model
Пример #13
0

inputs = Input(shape=(None, train_x.shape[2]))  # 39-dim MFCC
train_labels = Input(shape=(None, ))
input_length = Input(shape=(1, ))
label_length = Input(shape=(1, ))
# x = Conv1D(16, 3, activation='relu')(inputs)
# x = Conv1D(16, 3, activation='relu')(x)
# x = Conv1D(16, 3, activation='relu')(x)
x = BatchNormalization()(inputs)
x = TimeDistributed(Dense(128, activation='relu'))(x)
x = TimeDistributed(Dense(128, activation='relu'))(x)
x = TimeDistributed(Dense(128, activation='relu'))(x)
x = BatchNormalization()(x)
# x = GRU(128, return_sequences=True, activation='relu')(x)
x = CuDNNGRU(128, return_sequences=True)(x)
x = TimeDistributed(Dense(64, activation='relu'))(x)
y = TimeDistributed(Dense(11, activation='softmax'))(x)
loss_output = Lambda(ctc_wrapper)(
    [train_labels, y, input_length, label_length])
model = Model(inputs=[inputs, train_labels, input_length, label_length],
              outputs=loss_output)
model.compile(optimizer='adam', loss=lambda y_true, y_pred: y_pred)
# model.summary()

# the model for testing which outputs the softmax result of each timestep
test_model = Model(inputs=inputs, outputs=y)
# def debug_pred(batch, logs):
#     pred = test_model.predict(train_x[0:1])
#     print(pred)
# debug_cb = LambdaCallback(on_batch_end=debug_pred)
Пример #14
0
def build_model2(lr=0.0,
                 lr_d=0.0,
                 units=0,
                 spatial_dr=0.0,
                 kernel_size1=3,
                 kernel_size2=2,
                 dense_units=128,
                 dr=0.1,
                 conv_size=32):
    file_path = "best_model.hdf5"
    check_point = ModelCheckpoint(file_path,
                                  monitor="val_loss",
                                  verbose=1,
                                  save_best_only=True,
                                  mode="min")
    early_stop = EarlyStopping(monitor="val_loss", mode="min", patience=3)

    inp = Input(shape=(max_len, ))
    x = Embedding(19479,
                  embed_size,
                  weights=[embedding_matrix],
                  trainable=False)(inp)
    x1 = SpatialDropout1D(spatial_dr)(x)

    x_gru = Bidirectional(CuDNNGRU(units, return_sequences=True))(x1)
    x_lstm = Bidirectional(CuDNNLSTM(units, return_sequences=True))(x1)

    x_conv1 = Conv1D(conv_size,
                     kernel_size=kernel_size1,
                     padding='valid',
                     kernel_initializer='he_uniform')(x_gru)
    avg_pool1_gru = GlobalAveragePooling1D()(x_conv1)
    max_pool1_gru = GlobalMaxPooling1D()(x_conv1)

    x_conv2 = Conv1D(conv_size,
                     kernel_size=kernel_size2,
                     padding='valid',
                     kernel_initializer='he_uniform')(x_gru)
    avg_pool2_gru = GlobalAveragePooling1D()(x_conv2)
    max_pool2_gru = GlobalMaxPooling1D()(x_conv2)

    x_conv3 = Conv1D(conv_size,
                     kernel_size=kernel_size1,
                     padding='valid',
                     kernel_initializer='he_uniform')(x_lstm)
    avg_pool1_lstm = GlobalAveragePooling1D()(x_conv3)
    max_pool1_lstm = GlobalMaxPooling1D()(x_conv3)

    x_conv4 = Conv1D(conv_size,
                     kernel_size=kernel_size2,
                     padding='valid',
                     kernel_initializer='he_uniform')(x_lstm)
    avg_pool2_lstm = GlobalAveragePooling1D()(x_conv4)
    max_pool2_lstm = GlobalMaxPooling1D()(x_conv4)

    x = concatenate([
        avg_pool1_gru, max_pool1_gru, avg_pool2_gru, max_pool2_gru,
        avg_pool1_lstm, max_pool1_lstm, avg_pool2_lstm, max_pool2_lstm
    ])
    x = BatchNormalization()(x)
    x = Dropout(dr)(Dense(dense_units, activation='relu')(x))
    x = BatchNormalization()(x)
    x = Dropout(dr)(Dense(int(dense_units / 2), activation='relu')(x))
    x = Dense(5, activation="sigmoid")(x)
    model = Model(inputs=inp, outputs=x)
    model.compile(loss="binary_crossentropy",
                  optimizer=Adam(lr=lr, decay=lr_d),
                  metrics=["accuracy"])
    history = model.fit(X_train,
                        y_ohe,
                        batch_size=128,
                        epochs=20,
                        validation_split=0.1,
                        verbose=1,
                        callbacks=[check_point, early_stop])
    model = load_model(file_path)
    return model
    def _compile_hans(self, shape, n_hidden_layers, hidden_units_size,
                      dropout_rate, word_dropout_rate, lr):
        """
        Compiles a Hierarchical Attention Network based on the given parameters
        :param shape: The shape of the sequence, i.e. (number of sections, number of tokens)
        :param hidden_units_size: size of hidden units, as a list
        :param dropout_rate: The percentage of inputs to dropout
        :param word_dropout_rate: The percentage of timesteps to dropout
        :param lr: learning rate
        :return: Nothing
        """

        # Sentence Feature Representation
        section_inputs = Input(shape=(None, ), name='document_inputs')
        self.pretrained_embeddings = self.PretrainedEmbedding()
        section_embs = self.pretrained_embeddings(section_inputs)

        # Apply variational dropout
        drop_section_embs = SpatialDropout1D(
            dropout_rate, name='feature_dropout')(section_embs)
        encodings = TimestepDropout(word_dropout_rate,
                                    name='word_dropout')(drop_section_embs)

        # Bi-GRUs over token embeddings
        for i in range(n_hidden_layers[0]):
            if self._cuDNN:
                grus = Bidirectional(
                    CuDNNGRU(hidden_units_size[0],
                             return_sequences=True,
                             kernel_constraint=MinMaxNorm(min_value=-2,
                                                          max_value=2)),
                    name='bidirectional_grus_{}'.format(i))(encodings)
            else:
                grus = Bidirectional(
                    GRU(hidden_units_size[0],
                        activation="tanh",
                        recurrent_activation='sigmoid',
                        return_sequences=True),
                    kernel_constraint=MinMaxNorm(min_value=-2, max_value=2),
                    name='bidirectional_grus_{}'.format(i))(encodings)
            grus = Camouflage(mask_value=0.0)([grus, encodings])
            if i == 0:
                encodings = SpatialDropout1D(dropout_rate)(grus)
            else:
                encodings = add([grus, encodings])
                encodings = SpatialDropout1D(dropout_rate)(encodings)

        # Attention over BI-GRU (context-aware) embeddings
        if self._attention_mechanism == 'maxpooling':
            section_encoder = GlobalMaxPooling1D()(encodings)
        elif self._attention_mechanism == 'attention':
            encodings = SymmetricMasking()([encodings, encodings])
            section_encoder = ContextualAttention(
                kernel_regularizer=l2(), bias_regularizer=l2())(encodings)

        # Wrap up section_encoder
        section_encoder = Model(inputs=section_inputs,
                                outputs=section_encoder,
                                name='sentence_encoder')

        # Document Input Layer
        document_inputs = Input(shape=(
            shape[0],
            shape[1],
        ),
                                name='document_inputs')

        # Distribute sentences
        section_encodings = TimeDistributed(
            section_encoder, name='sentence_encodings')(document_inputs)

        # BI-GRUs over section embeddings
        for i in range(n_hidden_layers[1]):
            if self._cuDNN:
                grus = Bidirectional(
                    CuDNNGRU(hidden_units_size[1],
                             return_sequences=True,
                             kernel_constraint=MinMaxNorm(min_value=-2,
                                                          max_value=2)),
                    name='bidirectional_grus_upper_{}'.format(i))(
                        section_encodings)
            else:
                grus = Bidirectional(GRU(hidden_units_size[1],
                                         activation="tanh",
                                         recurrent_activation='sigmoid',
                                         return_sequences=True,
                                         kernel_constraint=MinMaxNorm(
                                             min_value=-2, max_value=2)),
                                     name='bidirectional_grus_upper_{}'.format(
                                         i))(section_encodings)
            grus = Camouflage(mask_value=0.0)([grus, section_encodings])
            if i == 0:
                section_encodings = SpatialDropout1D(dropout_rate)(grus)
            else:
                section_encodings = add([grus, section_encodings])
                section_encodings = SpatialDropout1D(dropout_rate)(
                    section_encodings)

        # Attention over BI-LSTM (context-aware) sentence embeddings
        if self._attention_mechanism == 'maxpooling':
            doc_encoding = GlobalMaxPooling1D(
                name='max_pooling')(section_encodings)
        elif self._attention_mechanism == 'attention':
            section_encodings = SymmetricMasking()(
                [section_encodings, section_encodings])
            doc_encoding = ContextualAttention(
                kernel_regularizer=l2(),
                bias_regularizer=l2(),
                name='self_attention')(section_encodings)
        losses = 'binary_crossentropy' if self._decision_type == 'multi_label' else 'categorical_crossentropy'
        loss_weights = None

        # Final output (projection) layer
        outputs = Dense(self.n_classes,
                        activation='sigmoid'
                        if self._decision_type == 'multi_label' else 'softmax',
                        name='outputs')(doc_encoding)

        # Wrap up model + Compile with optimizer and loss function
        self.model = Model(inputs=document_inputs, outputs=[outputs])
        self.model.compile(optimizer=Adam(lr=lr, clipvalue=2.0),
                           loss=losses,
                           loss_weights=loss_weights)
def cnn_rnn(embedding_matrix, config):
    if config['rnn'] == 'gru' and config['gpu']:
        encode = Bidirectional(
            CuDNNGRU(config['rnn_output_size'], return_sequences=True))
        encode2 = Bidirectional(
            CuDNNGRU(config['rnn_output_size'], return_sequences=True))
        encode3 = Bidirectional(
            CuDNNGRU(config['rnn_output_size'], return_sequences=True))
    else:
        encode = Bidirectional(
            CuDNNLSTM(config['rnn_output_size'], return_sequences=True))
        encode2 = Bidirectional(
            CuDNNLSTM(config['rnn_output_size'] * 2, return_sequences=True))
        encode3 = Bidirectional(
            CuDNNGRU(config['rnn_output_size'] * 4, return_sequences=True))
    q1 = Input(shape=(config['max_length'], ), dtype='int32', name='q1_input')
    q2 = Input((config['max_length'], ), dtype='int32', name='q2_input')
    embedding_layer = Embedding(embedding_matrix.shape[0],
                                embedding_matrix.shape[1],
                                trainable=config['embed_trainable'],
                                weights=[embedding_matrix]
                                # mask_zero=True
                                )

    q1_embed = embedding_layer(q1)
    q2_embed = embedding_layer(q2)  # bsz, 1, emb_dims
    q1_embed = BatchNormalization(axis=2)(q1_embed)
    q2_embed = BatchNormalization(axis=2)(q2_embed)
    q1_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q1_embed)
    q2_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q2_embed)
    q1_encoded = encode(q1_embed)
    q2_encoded = encode(q2_embed)
    q1_encoded = Dropout(0.2)(q1_encoded)
    q2_encoded = Dropout(0.2)(q2_encoded)
    # 双向
    #     q1_encoded = encode2(q1_encoded)
    #     q2_encoded = encode2(q2_encoded)
    # resnet
    rnn_layer2_input1 = concatenate([q1_embed, q1_encoded])
    rnn_layer2_input2 = concatenate([q2_embed, q2_encoded])
    q1_encoded2 = encode2(rnn_layer2_input1)
    q2_encoded2 = encode2(rnn_layer2_input2)

    # add res shortcut
    res_block1 = add([q1_encoded, q1_encoded2])
    res_block2 = add([q2_encoded, q2_encoded2])
    rnn_layer3_input1 = concatenate([q1_embed, res_block1])
    rnn_layer3_input2 = concatenate([q2_embed, res_block2])
    #     rnn_layer3_input1 = concatenate([q1_embed,q1_encoded,q1_encoded2])
    #     rnn_layer3_input2 = concatenate([q2_embed,q2_encoded,q2_encoded2])
    q1_encoded3 = encode3(rnn_layer3_input1)
    q2_encoded3 = encode3(rnn_layer3_input2)
    convs1, convs2 = [], []
    for ksz in config['kernel_sizes']:
        pooling1, pooling2 = block(q1_embed, q2_embed, ksz, config['filters'])
        convs1.append(pooling1)
        convs2.append(pooling2)
    rnn_rep1 = GlobalMaxPooling1D()(q1_encoded3)
    rnn_rep2 = GlobalMaxPooling1D()(q2_encoded3)
    convs1.append(rnn_rep1)
    convs2.append(rnn_rep2)
    merged1 = concatenate(convs1, axis=-1)
    merged2 = concatenate(convs2, axis=-1)
    sub_rep = Lambda(lambda x: K.abs(x[0] - x[1]))([merged1, merged2])
    mul_rep = Lambda(lambda x: x[0] * x[1])([merged1, merged2])
    # merged = Concatenate()([mul_rep, sub_rep])
    feature_input = Input(shape=(config['feature_length'], ))
    feature_dense = BatchNormalization()(feature_input)
    feature_dense = Dense(config['dense_dim'],
                          activation='relu')(feature_dense)

    merged = Concatenate()([merged1, merged2, mul_rep, sub_rep, feature_dense])
    dense = Dropout(config['dense_dropout'])(merged)
    dense = BatchNormalization()(dense)
    dense = Dense(config['dense_dim'], activation='relu')(dense)
    dense = Dropout(config['dense_dropout'])(dense)
    dense = BatchNormalization()(dense)
    predictions = Dense(1, activation='sigmoid')(dense)
    model = Model(inputs=[q1, q2, feature_input], outputs=predictions)
    opt = optimizers.get(config['optimizer'])
    K.set_value(opt.lr, config['learning_rate'])
    model.compile(optimizer=opt, loss='binary_crossentropy', metrics=[f1])
    return model
    def _compile_bigrus(self, n_hidden_layers, hidden_units_size, dropout_rate,
                        word_dropout_rate, lr):
        """
        Compiles a Hierarchical RNN based on the given parameters
        :param hidden_units_size: size of hidden units, as a list
        :param dropout_rate: The percentage of inputs to dropout
        :param word_dropout_rate: The percentage of timesteps to dropout
        :param lr: learning rate
        :return: Nothing
        """

        # Document Feature Representation
        if self.elmo:
            document_inputs = Input(shape=(1, ),
                                    dtype='string',
                                    name='document_inputs')
            document_elmos = ElmoEmbeddingLayer()(document_inputs)
            document_inputs2 = Input(shape=(None, ), name='document_inputs2')
            self.pretrained_embeddings = self.PretrainedEmbedding()
            document_embs = self.pretrained_embeddings(document_inputs2)
            doc_embs = concatenate([document_embs, document_elmos])

        else:
            document_inputs = Input(shape=(None, ), name='document_inputs')
            self.pretrained_embeddings = self.PretrainedEmbedding()
            doc_embs = self.pretrained_embeddings(document_inputs)

        # Apply variational dropout
        drop_doc_embs = SpatialDropout1D(dropout_rate,
                                         name='feature_dropout')(doc_embs)
        encodings = TimestepDropout(word_dropout_rate,
                                    name='word_dropout')(drop_doc_embs)

        # Bi-GRUs over token embeddings
        return_sequences = True
        for i in range(n_hidden_layers):
            if i == n_hidden_layers - 1:
                return_sequences = False
            if self._cuDNN:
                grus = Bidirectional(
                    CuDNNGRU(hidden_units_size,
                             return_sequences=return_sequences),
                    name='bidirectional_grus_{}'.format(i))(encodings)
            else:
                grus = Bidirectional(
                    GRU(hidden_units_size,
                        activation="tanh",
                        recurrent_activation='sigmoid',
                        return_sequences=return_sequences),
                    name='bidirectional_grus_{}'.format(i))(encodings)
            if i != n_hidden_layers - 1:
                grus = Camouflage(mask_value=0.0)([grus, encodings])
                if i == 0:
                    encodings = SpatialDropout1D(dropout_rate)(grus)
                else:
                    encodings = add([grus, encodings])
                    encodings = SpatialDropout1D(dropout_rate)(encodings)
            else:
                encodings = grus

        # Final output (projection) layer
        outputs = Dense(self.n_classes,
                        activation='sigmoid'
                        if self._decision_type == 'multi_label' else 'softmax',
                        name='outputs')(encodings)

        # Wrap up model + Compile with optimizer and loss function
        self.model = Model(inputs=document_inputs if not self.elmo else
                           [document_inputs, document_inputs2],
                           outputs=[outputs])
        self.model.compile(optimizer=Adam(lr=lr, clipvalue=5.0),
                           loss='binary_crossentropy' if self._decision_type
                           == 'multi_label' else 'categorical_crossentropy')
Пример #18
0
def training_net_kfolds():

    train_dataset_path = path + "/Train/"
    val_dataset_path = path + "/Val/"

    train_files = os.listdir(train_dataset_path)
    train_files.sort()
    val_files = os.listdir(val_dataset_path)
    val_files.sort()

    labels = pd.read_csv(path + "REFERENCE.csv")
    labels_en = pd.read_csv(path + "kfold_labels_en.csv")
    #data_info = pd.read_csv(path + "data_info.csv")

    batch_size = 64
    num_classes = 10
    len_seg = 23296  # 91s

    main_input = Input(shape=(len_seg, 12), dtype='float32', name='main_input')
    x = Convolution1D(12, 3, padding='same')(main_input)
    x = LeakyReLU(alpha=0.3)(x)
    x = Convolution1D(12, 3, padding='same')(x)
    x = LeakyReLU(alpha=0.3)(x)
    x = Convolution1D(12, 24, strides=2, padding='same')(x)
    x = LeakyReLU(alpha=0.3)(x)
    x = Dropout(0.2)(x)
    x = Convolution1D(12, 3, padding='same')(x)
    x = LeakyReLU(alpha=0.3)(x)
    x = Convolution1D(12, 3, padding='same')(x)
    x = LeakyReLU(alpha=0.3)(x)
    x = Convolution1D(12, 24, strides=2, padding='same')(x)
    x = LeakyReLU(alpha=0.3)(x)
    x = Dropout(0.2)(x)
    x = Convolution1D(12, 3, padding='same')(x)
    x = LeakyReLU(alpha=0.3)(x)
    x = Convolution1D(12, 3, padding='same')(x)
    x = LeakyReLU(alpha=0.3)(x)
    x = Convolution1D(12, 24, strides=2, padding='same')(x)
    x = LeakyReLU(alpha=0.3)(x)
    x = Dropout(0.2)(x)
    x = Convolution1D(12, 3, padding='same')(x)
    x = LeakyReLU(alpha=0.3)(x)
    x = Convolution1D(12, 3, padding='same')(x)
    x = LeakyReLU(alpha=0.3)(x)
    x = Convolution1D(12, 24, strides=2, padding='same')(x)
    x = LeakyReLU(alpha=0.3)(x)
    x = Dropout(0.2)(x)
    x = Convolution1D(12, 3, padding='same')(x)
    x = LeakyReLU(alpha=0.3)(x)
    x = Convolution1D(12, 3, padding='same')(x)
    x = LeakyReLU(alpha=0.3)(x)
    x = Convolution1D(12, 48, strides=2, padding='same')(x)
    x = LeakyReLU(alpha=0.3)(x)
    cnnout = Dropout(0.2)(x)
    x = Bidirectional(
        CuDNNGRU(12,
                 input_shape=(2250, 12),
                 return_sequences=True,
                 return_state=False))(cnnout)
    x = LeakyReLU(alpha=0.3)(x)
    x = Dropout(0.2)(x)
    x = AttentionWithContext()(x)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.3)(x)
    x = Dropout(0.2)(x)
    main_output = Dense(num_classes, activation='sigmoid')(x)
    model = Model(inputs=main_input, outputs=main_output)
    print(model.summary())

    raw_IDs = labels_en["File_name"].values.tolist()
    extend_db4_IDs = [i + "_db4" for i in raw_IDs]
    extend_db6_IDs = [i + "_db6" for i in raw_IDs]
    all_IDs = raw_IDs + extend_db4_IDs + extend_db6_IDs

    train_labels = labels_en["label1"].values
    all_train_labels = np.hstack((train_labels, train_labels, train_labels))

    # Parameters
    params = {
        'dim': 23296,
        'batch_size': 64,
        'n_classes': 10,
        'n_channels': 12,
        'shuffle': True
    }

    en_amount = 1
    model_path = './official_attention_onenet_model/'

    for seed in range(en_amount):
        print("************************")
        n_fold = 3
        n_classes = 10

        kfold = StratifiedKFold(n_splits=n_fold,
                                shuffle=True,
                                random_state=1234)
        #kf = kfold.split(all_IDs, all_train_labels)
        kf = kfold.split(labels["File_name"].values.tolist(),
                         labels["label1"].values)

        for i, (index_train, index_valid) in enumerate(kf):
            print('fold: ', i + 1, ' training')
            t = time.time()

            #tr_IDs = np.array(all_IDs)[index_train]
            #val_IDs = np.array(all_IDs)[index_valid]
            #print(tr_IDs.shape)
            tr_IDs = labels["File_name"].values[index_train].tolist()
            val_IDs = labels["File_name"].values[index_valid].tolist()

            for j in range(4):
                for ids in labels[labels.label1 == 4]["File_name"]:
                    if ids in tr_IDs:
                        tr_IDs.append(ids)

            for j in range(2):
                for ids in labels[labels.label1 == 7]["File_name"]:
                    if ids in tr_IDs:
                        tr_IDs.append(ids)

            for j in range(1):
                for ids in labels[labels.label1 == 9]["File_name"]:
                    if ids in tr_IDs:
                        tr_IDs.append(ids)

            tr_IDs_db4 = [ids + "_db4" for ids in tr_IDs]
            tr_IDs_db6 = [ids + "_db6" for ids in tr_IDs]

            val_IDs_db4 = [ids + "_db4" for ids in val_IDs]
            val_IDs_db6 = [ids + "_db6" for ids in val_IDs]

            tr_IDs = tr_IDs + tr_IDs_db4 + tr_IDs_db6
            val_IDs = val_IDs + val_IDs_db4 + val_IDs_db6
            print("tr_IDs : ", len(tr_IDs))
            print("val_IDs : ", len(val_IDs))

            # Generators
            training_generator = DataGenerator(tr_IDs, labels, **params)
            validation_generator = DataGenerator(val_IDs, labels, **params)

            checkpointer = ModelCheckpoint(
                filepath=model_path +
                'attention_1net_extend_weights-best_k{}_r{}_0805.hdf5'.format(
                    seed, i),
                monitor='val_fmeasure',
                verbose=1,
                save_best_only=True,
                save_weights_only=True,
                mode='max')  # val_fmeasure
            reduce = ReduceLROnPlateau(monitor='val_fmeasure',
                                       factor=0.5,
                                       patience=2,
                                       verbose=1,
                                       min_delta=1e-4,
                                       mode='max')

            earlystop = EarlyStopping(monitor='val_fmeasure',
                                      mode="max",
                                      patience=6,
                                      restore_best_weights=True)

            tensorboard = TensorBoard(log_dir="./logs")

            config = Config()
            add_compile(model, config)

            callback_lists = [checkpointer, reduce, earlystop]

            history = model.fit_generator(generator=training_generator,
                                          validation_data=validation_generator,
                                          use_multiprocessing=False,
                                          epochs=30,
                                          verbose=1,
                                          callbacks=callback_lists)
Пример #19
0
def GRU_block(x, p=0.5, n=64):
    x = CuDNNGRU(n)(x)
    x = Dropout(p)(x)
    return x
Пример #20
0
def predcit_net_kfolds():

    pre_type = "sym"  # "sym"

    labels = pd.read_csv(path + "REFERENCE.csv")
    raw_IDs = labels["File_name"].values.tolist()

    IDs = {}
    IDs["sym"] = raw_IDs
    IDs["db4"] = [i + "_db4" for i in raw_IDs]
    IDs["db6"] = [i + "_db6" for i in raw_IDs]

    batch_size = 64
    num_classes = 10
    len_seg = 23296  # 91s

    main_input = Input(shape=(len_seg, 12), dtype='float32', name='main_input')
    x = Convolution1D(12, 3, padding='same')(main_input)
    x = LeakyReLU(alpha=0.3)(x)
    x = Convolution1D(12, 3, padding='same')(x)
    x = LeakyReLU(alpha=0.3)(x)
    x = Convolution1D(12, 24, strides=2, padding='same')(x)
    x = LeakyReLU(alpha=0.3)(x)
    x = Dropout(0.2)(x)

    x = Convolution1D(12, 3, padding='same')(x)
    x = LeakyReLU(alpha=0.3)(x)
    x = Convolution1D(12, 3, padding='same')(x)
    x = LeakyReLU(alpha=0.3)(x)
    x = Convolution1D(12, 24, strides=2, padding='same')(x)
    x = LeakyReLU(alpha=0.3)(x)
    x = Dropout(0.2)(x)

    x = Convolution1D(12, 3, padding='same')(x)
    x = LeakyReLU(alpha=0.3)(x)
    x = Convolution1D(12, 3, padding='same')(x)
    x = LeakyReLU(alpha=0.3)(x)
    x = Convolution1D(12, 24, strides=2, padding='same')(x)
    x = LeakyReLU(alpha=0.3)(x)
    x = Dropout(0.2)(x)

    x = Convolution1D(12, 3, padding='same')(x)
    x = LeakyReLU(alpha=0.3)(x)
    x = Convolution1D(12, 3, padding='same')(x)
    x = LeakyReLU(alpha=0.3)(x)
    x = Convolution1D(12, 24, strides=2, padding='same')(x)
    x = LeakyReLU(alpha=0.3)(x)
    x = Dropout(0.2)(x)

    x = Convolution1D(12, 3, padding='same')(x)
    x = LeakyReLU(alpha=0.3)(x)
    x = Convolution1D(12, 3, padding='same')(x)
    x = LeakyReLU(alpha=0.3)(x)
    x = Convolution1D(12, 48, strides=2, padding='same')(x)
    x = LeakyReLU(alpha=0.3)(x)
    cnnout = Dropout(0.2)(x)

    x = Bidirectional(
        CuDNNGRU(12,
                 input_shape=(2250, 12),
                 return_sequences=True,
                 return_state=False))(cnnout)
    x = LeakyReLU(alpha=0.3)(x)
    x = Dropout(0.2)(x)
    x = AttentionWithContext()(x)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.3)(x)
    x = Dropout(0.2)(x)
    main_output = Dense(num_classes, activation='sigmoid')(x)
    model = Model(inputs=main_input, outputs=main_output)

    test_x = read_data_seg(path,
                           split='Val',
                           preprocess=True,
                           n_index=0,
                           pre_type=pre_type)
    print("test_x shape: ", test_x.shape)
    model_path = './official_attention_onenet_model/'

    en_amount = 1
    for seed in range(en_amount):
        print("************************")
        n_fold = 3  # 3
        n_classes = 10

        kfold = StratifiedKFold(n_splits=n_fold,
                                shuffle=True,
                                random_state=seed)
        kf = kfold.split(IDs[pre_type], labels['label1'])

        blend_train = np.zeros(
            (6689, n_fold, n_classes)).astype('float32')  # len(train_x)
        blend_test = np.zeros(
            (558, n_fold, n_classes)).astype('float32')  # len(test_x)

        count = 0

        for i, (index_train, index_valid) in enumerate(kf):
            print('fold: ', i + 1, ' training')
            t = time.time()

            tr_IDs = np.array(IDs[pre_type])  # [index_train]
            # val_IDs = np.array(IDs[pre_type])[index_valid]
            print(tr_IDs.shape)

            X = np.empty((tr_IDs.shape[0], 23296, 12))
            for j, ID in enumerate(tr_IDs):
                X[j, ] = np.load("/media/jdcloud/training_data_pre/" + ID +
                                 ".npy")
            # X_tr = [(X[:, i] - np.mean(X[:, i])) / np.std(X[:, i]) for i in range(10)]
            X_tr = X
            # print(X.shape)
            del X

            # Evaluate best trained model
            model.load_weights(
                model_path +
                'attention_1net_extend_weights-best_k{}_r{}_0805.hdf5'.format(
                    seed, i))

            blend_train[:, i, :] = model.predict(X_tr)
            blend_test[:, i, :] = model.predict(test_x)

            del X_tr
            gc.collect()
            gc.collect()
            count += 1

    index = np.arange(6689)
    y_train = preprocess_y(labels, index)

    train_y = 0.1 * blend_train[:,
                                0, :] + 0.1 * blend_train[:,
                                                          1, :] + 0.8 * blend_train[:,
                                                                                    2, :]

    threshold = np.arange(0.1, 0.9, 0.1)
    acc = []
    accuracies = []
    best_threshold = np.zeros(train_y.shape[1])

    for i in range(train_y.shape[1]):
        y_prob = np.array(train_y[:, i])
        for j in threshold:
            y_pred = [1 if prob >= j else 0 for prob in y_prob]
            acc.append(f1_score(y_train[:, i], y_pred, average='macro'))
        acc = np.array(acc)
        index = np.where(acc == acc.max())
        accuracies.append(acc.max())
        best_threshold[i] = threshold[index[0][0]]
        acc = []

    print("best_threshold :", best_threshold)

    y_pred = np.array([[
        1 if train_y[i, j] >= best_threshold[j] else 0
        for j in range(train_y.shape[1])
    ] for i in range(len(train_y))])
    print(" train data f1_score  :", f1_score(y_train, y_pred,
                                              average='macro'))

    for i in range(10):
        print("f1 score of ab {} is {}".format(
            i, f1_score(y_train[:, i], y_pred[:, i], average='macro')))

    out = 0.1 * blend_test[:,
                           0, :] + 0.1 * blend_test[:,
                                                    1, :] + 0.8 * blend_test[:,
                                                                             2, :]

    y_pred_test = np.array([[
        1 if out[i, j] >= best_threshold[j] else 0 for j in range(out.shape[1])
    ] for i in range(len(out))])

    classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

    test_y = y_pred_test

    y_pred = [[
        1 if test_y[i, j] >= best_threshold[j] else 0
        for j in range(test_y.shape[1])
    ] for i in range(len(test_y))]
    pred = []
    for j in range(test_y.shape[0]):
        pred.append([classes[i] for i in range(10) if y_pred[j][i] == 1])

    val_dataset_path = path + "/Val/"
    val_files = os.listdir(val_dataset_path)
    val_files.sort()

    with open('answers_attention_1net_{}_0805.csv'.format(pre_type),
              'w') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow([
            'File_name', 'label1', 'label2', 'label3', 'label4', 'label5',
            'label6', 'label7', 'label8', 'label9', 'label10'
        ])
        count = 0
        for file_name in val_files:
            if file_name.endswith('.mat'):

                record_name = file_name.strip('.mat')
                answer = []
                answer.append(record_name)

                result = pred[count]

                answer.extend(result)
                for i in range(10 - len(result)):
                    answer.append('')
                count += 1
                writer.writerow(answer)
        csvfile.close()
    train_pd0 = pd.DataFrame(blend_train[:, 0, :])
    train_pd1 = pd.DataFrame(blend_train[:, 1, :])
    train_pd2 = pd.DataFrame(blend_train[:, 2, :])
    csv_path = "/media/jdcloud/ensemble_csv/"
    train_pd0.to_csv(csv_path + "attention_1net_fold0.csv", index=None)
    train_pd1.to_csv(csv_path + "attention_1net_fold1.csv", index=None)
    train_pd2.to_csv(csv_path + "attention_1net_fold2.csv", index=None)

    test_pd0 = pd.DataFrame(blend_test[:, 0, :])
    test_pd1 = pd.DataFrame(blend_test[:, 1, :])
    test_pd2 = pd.DataFrame(blend_test[:, 2, :])
    csv_path = "/media/jdcloud/test_csv/"
    test_pd0.to_csv(csv_path + "attention_1net_fold0.csv", index=None)
    test_pd1.to_csv(csv_path + "attention_1net_fold1.csv", index=None)
    test_pd2.to_csv(csv_path + "attention_1net_fold2.csv", index=None)
Пример #21
0
#%%
# model
ans_ques_input = Input(shape=(ans_seq, ques_dim), name='ans_question_input')
ans_feat_input = Input(shape=(ans_seq, ans_dim), name='ans_feature_input')
member_feat_input = Input(shape = (member_dim,), name='member_feature_input')
member_topic_input = Input(shape=(member_topic_dim,), name='member_topic_input')
time_input = Input(shape = (1,), name = 'time_input')
ques_input = Input(shape = (ques_dim,), name='ques_input')

member_feat_dense = PReLU(name='member_feature_dense_prelu')(
    Dense(units=40, name='member_feature_dense')(member_feat_input))

member_topic_dense = PReLU(name='member_topic_dense_prelu')(
    Dense(units=256, name='member_topic_dense')(member_topic_input))

ans_feat_gru = CuDNNGRU(units=40, return_sequences=True, name='ans_feat_gru')(ans_feat_input)
ans_ques_gru = CuDNNGRU(units=256, return_sequences=True, name='ans_ques_gru')(ans_ques_input)

ans_con = concatenate([ans_feat_gru, ans_ques_gru], name='answer_concatenate')
answer_gru = CuDNNGRU(units=128, return_sequences=False, name='answer_gru')(ans_con)

question_dense = PReLU(name='ques_dense_prelu')(
    Dense(units=128, name='ques_dense')(ques_input))

time_dense = PReLU(name='time_dense_prelu')(
    Dense(units=5, name='time_dense')(time_input))

inv_con = concatenate([time_dense, member_feat_dense, member_topic_dense, answer_gru, question_dense], name='invite_concatenate')

inv_dense_1 = PReLU(name='inv_dense_1_prelu')(
    Dense(units=512, name='inv_dense_1')(inv_con))
Пример #22
0
    def get_model(config):
        inp = Input(shape=(config.strmaxlen, ), name='input')

        emb = Embedding(config.max_features, config.embed_size,
                        trainable=True)(inp)
        emb1 = SpatialDropout1D(config.prob_dropout)(emb)
        ####
        l1_L = Bidirectional(
            CuDNNLSTM(config.cell_size_l1, return_sequences=True))(emb1)
        l2_LL = Bidirectional(
            CuDNNLSTM(config.cell_size_l2, return_sequences=True))(l1_L)
        l2_LG = Bidirectional(
            CuDNNGRU(config.cell_size_l2, return_sequences=True))(l1_L)
        l3_LLC = Conv1D(config.filter_size,
                        kernel_size=config.kernel_size,
                        strides=2,
                        padding="valid",
                        kernel_initializer="he_uniform")(l2_LL)
        l3_LGC = Conv1D(config.filter_size,
                        kernel_size=config.kernel_size,
                        strides=2,
                        padding="valid",
                        kernel_initializer="he_uniform")(l2_LG)

        avg_pool_L = GlobalAveragePooling1D()(l1_L)
        max_pool_L = GlobalMaxPooling1D()(l1_L)
        avg_pool_LL = GlobalAveragePooling1D()(l2_LL)
        max_pool_LL = GlobalMaxPooling1D()(l2_LL)
        avg_pool_LG = GlobalAveragePooling1D()(l2_LG)
        max_pool_LG = GlobalMaxPooling1D()(l2_LG)
        attention_LLA = Attention(config.strmaxlen)(l2_LL)
        attention_LGA = Attention(config.strmaxlen)(l2_LG)
        avg_pool_LLC = GlobalAveragePooling1D()(l3_LLC)
        max_pool_LLC = GlobalMaxPooling1D()(l3_LLC)
        avg_pool_LGC = GlobalAveragePooling1D()(l3_LGC)
        max_pool_LGC = GlobalMaxPooling1D()(l3_LGC)
        attention_LLCA = Attention(int(config.strmaxlen / 2 - 1))(l3_LLC)
        attention_LGCA = Attention(int(config.strmaxlen / 2 - 1))(l3_LGC)
        conc_LLC = concatenate([
            avg_pool_L, max_pool_L, avg_pool_LL, max_pool_LL, avg_pool_LLC,
            max_pool_LLC, attention_LLA, attention_LLCA
        ])
        conc_LGC = concatenate([
            avg_pool_L, max_pool_L, avg_pool_LG, max_pool_LG, avg_pool_LGC,
            max_pool_LGC, attention_LGA, attention_LGCA
        ])
        out_LL = Dropout(config.prob_dropout2)(conc_LLC)
        out_LG = Dropout(config.prob_dropout2)(conc_LGC)
        out_LL = Dense(2, activation='softmax')(out_LL)
        out_LG = Dense(2, activation='softmax')(out_LG)
        ####

        #         emb2 = Embedding(config.max_features, config.max_features,embeddings_initializer='identity', trainable = True)(inp)
        #         emb1 = Embedding(config.max_features, config.embed_size, trainable = True)(inp)
        emb2 = SpatialDropout1D(config.prob_dropout)(emb)

        ####
        l1_G = Bidirectional(
            CuDNNGRU(config.cell_size_l1, return_sequences=True))(emb2)

        l2_GL = Bidirectional(
            CuDNNLSTM(config.cell_size_l2, return_sequences=True))(l1_G)
        l2_GG = Bidirectional(
            CuDNNGRU(config.cell_size_l2, return_sequences=True))(l1_G)

        l3_GLC = Conv1D(config.filter_size,
                        kernel_size=config.kernel_size,
                        strides=2,
                        padding="valid",
                        kernel_initializer="he_uniform")(l2_GL)
        l3_GGC = Conv1D(config.filter_size,
                        kernel_size=config.kernel_size,
                        strides=2,
                        padding="valid",
                        kernel_initializer="he_uniform")(l2_GG)

        avg_pool_G = GlobalAveragePooling1D()(l1_G)
        max_pool_G = GlobalMaxPooling1D()(l1_G)

        avg_pool_GL = GlobalAveragePooling1D()(l2_GL)
        max_pool_GL = GlobalMaxPooling1D()(l2_GL)
        avg_pool_GG = GlobalAveragePooling1D()(l2_GG)
        max_pool_GG = GlobalMaxPooling1D()(l2_GG)

        attention_GLA = Attention(config.strmaxlen)(l2_GL)
        attention_GGA = Attention(config.strmaxlen)(l2_GG)

        avg_pool_GLC = GlobalAveragePooling1D()(l3_GLC)
        max_pool_GLC = GlobalMaxPooling1D()(l3_GLC)
        avg_pool_GGC = GlobalAveragePooling1D()(l3_GGC)
        max_pool_GGC = GlobalMaxPooling1D()(l3_GGC)

        attention_GLCA = Attention(int(config.strmaxlen / 2 - 1))(l3_GLC)
        attention_GGCA = Attention(int(config.strmaxlen / 2 - 1))(l3_GGC)

        conc_GLC = concatenate([
            avg_pool_G, max_pool_G, avg_pool_GL, max_pool_GL, avg_pool_GLC,
            max_pool_GLC, attention_GLA, attention_GLCA
        ])
        conc_GGC = concatenate([
            avg_pool_G, max_pool_G, avg_pool_GG, max_pool_GG, avg_pool_GGC,
            max_pool_GGC, attention_GGA, attention_GGCA
        ])

        out_GL = Dropout(config.prob_dropout2)(conc_GLC)
        out_GG = Dropout(config.prob_dropout2)(conc_GGC)
        out_GL = Dense(2, activation='softmax')(out_GL)
        out_GG = Dense(2, activation='softmax')(out_GG)

        out_avg = average([out_LL, out_LG, out_GL, out_GG])

        # #         ==================================================================================================
        model = Model(inputs=inp,
                      outputs=[out_LL, out_LG, out_GL, out_GG, out_avg])
        model.compile(loss='categorical_crossentropy',
                      optimizer='adam',
                      loss_weights=[1., 1., 1., 1., 0.1],
                      metrics=['accuracy'])
        return model
Пример #23
0
    num_features = W.shape[1]  # 400
    logging.info("dimension num of word vector [num_features]: %d" %
                 num_features)

    Routings = 20
    Num_capsule = 60
    Dim_capsule = 120

    sequence_input = Input(shape=(maxlen, ), dtype='int32')
    embedded_sequences = Embedding(input_dim=max_features,
                                   output_dim=num_features,
                                   input_length=maxlen,
                                   weights=[W],
                                   trainable=False)(sequence_input)
    embedded_sequences = SpatialDropout1D(0.1)(embedded_sequences)
    x = Bidirectional(CuDNNGRU(64, return_sequences=True))(embedded_sequences)
    x = Bidirectional(CuDNNGRU(64, return_sequences=True))(x)
    capsule = Capsule(num_capsule=Num_capsule,
                      dim_capsule=Dim_capsule,
                      routings=Routings,
                      share_weights=True)(x)
    # output_capsule = Lambda(lambda x: K.sqrt(K.sum(K.square(x), 2)))(capsule)
    capsule = Flatten()(capsule)
    capsule = Dropout(0.1)(capsule)
    output = Dense(4, activation='softmax')(capsule)
    model = Model(inputs=[sequence_input], outputs=output)
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy', f1])
    checkpointer = ModelCheckpoint(filepath="weights.hdf5",
                                   monitor='val_acc',
Пример #24
0
    def get_model(config):
        #model
        #wrote out all the blocks instead of looping for simplicity
        filter_nr = 64
        filter_size = 3
        max_pool_size = 3
        max_pool_strides = 2
        dense_nr = 64
        spatial_dropout = 0.3
        dense_dropout = 0.4
        conv_kern_reg = regularizers.l2(0.000005)
        conv_bias_reg = regularizers.l2(0.000005)

        inp = Input(shape=(config.strmaxlen, ), name='input')
        emb = Embedding(config.max_features, config.embed_size,
                        trainable=True)(inp)

        emb1 = SpatialDropout1D(config.prob_dropout)(emb)

        block1 = Conv1D(filter_nr,
                        kernel_size=filter_size,
                        padding='same',
                        activation='linear',
                        kernel_regularizer=conv_kern_reg,
                        bias_regularizer=conv_bias_reg)(emb1)
        block1 = BatchNormalization()(block1)
        block1 = PReLU()(block1)
        block1 = Conv1D(filter_nr,
                        kernel_size=filter_size,
                        padding='same',
                        activation='linear',
                        kernel_regularizer=conv_kern_reg,
                        bias_regularizer=conv_bias_reg)(block1)
        block1 = BatchNormalization()(block1)
        block1 = PReLU()(block1)

        #we pass embedded comment through conv1d with filter size 1 because it needs to have the same shape as block output
        #if you choose filter_nr = embed_size (300 in this case) you don't have to do this part and can add emb_comment directly to block1_output
        resize_emb = Conv1D(filter_nr,
                            kernel_size=1,
                            padding='same',
                            activation='linear',
                            kernel_regularizer=conv_kern_reg,
                            bias_regularizer=conv_bias_reg)(emb1)
        resize_emb = PReLU()(resize_emb)

        block1_output = add([block1, resize_emb])
        block1_output = MaxPooling1D(pool_size=max_pool_size,
                                     strides=max_pool_strides)(block1_output)

        block2 = Conv1D(filter_nr,
                        kernel_size=filter_size,
                        padding='same',
                        activation='linear',
                        kernel_regularizer=conv_kern_reg,
                        bias_regularizer=conv_bias_reg)(block1_output)
        block2 = BatchNormalization()(block2)
        block2 = PReLU()(block2)
        block2 = Conv1D(filter_nr,
                        kernel_size=filter_size,
                        padding='same',
                        activation='linear',
                        kernel_regularizer=conv_kern_reg,
                        bias_regularizer=conv_bias_reg)(block2)
        block2 = BatchNormalization()(block2)
        block2 = PReLU()(block2)

        block2_output = add([block2, block1_output])
        block2_output = MaxPooling1D(pool_size=max_pool_size,
                                     strides=max_pool_strides)(block2_output)

        block3 = Conv1D(filter_nr,
                        kernel_size=filter_size,
                        padding='same',
                        activation='linear',
                        kernel_regularizer=conv_kern_reg,
                        bias_regularizer=conv_bias_reg)(block2_output)
        block3 = BatchNormalization()(block3)
        block3 = PReLU()(block3)
        block3 = Conv1D(filter_nr,
                        kernel_size=filter_size,
                        padding='same',
                        activation='linear',
                        kernel_regularizer=conv_kern_reg,
                        bias_regularizer=conv_bias_reg)(block3)
        block3 = BatchNormalization()(block3)
        block3 = PReLU()(block3)

        block3_output = add([block3, block2_output])
        block3_output = MaxPooling1D(pool_size=max_pool_size,
                                     strides=max_pool_strides)(block3_output)

        block4 = Conv1D(filter_nr,
                        kernel_size=filter_size,
                        padding='same',
                        activation='linear',
                        kernel_regularizer=conv_kern_reg,
                        bias_regularizer=conv_bias_reg)(block3_output)
        block4 = BatchNormalization()(block4)
        block4 = PReLU()(block4)
        block4 = Conv1D(filter_nr,
                        kernel_size=filter_size,
                        padding='same',
                        activation='linear',
                        kernel_regularizer=conv_kern_reg,
                        bias_regularizer=conv_bias_reg)(block4)
        block4 = BatchNormalization()(block4)
        block4 = PReLU()(block4)

        block4_output = add([block4, block3_output])
        block4_output = MaxPooling1D(pool_size=max_pool_size,
                                     strides=max_pool_strides)(block4_output)

        block5 = Conv1D(filter_nr,
                        kernel_size=filter_size,
                        padding='same',
                        activation='linear',
                        kernel_regularizer=conv_kern_reg,
                        bias_regularizer=conv_bias_reg)(block4_output)
        block5 = BatchNormalization()(block5)
        block5 = PReLU()(block5)
        block5 = Conv1D(filter_nr,
                        kernel_size=filter_size,
                        padding='same',
                        activation='linear',
                        kernel_regularizer=conv_kern_reg,
                        bias_regularizer=conv_bias_reg)(block5)
        block5 = BatchNormalization()(block5)
        block5 = PReLU()(block5)

        block5_output = add([block5, block4_output])
        block5_output = MaxPooling1D(pool_size=max_pool_size,
                                     strides=max_pool_strides)(block5_output)

        block6 = Conv1D(filter_nr,
                        kernel_size=filter_size,
                        padding='same',
                        activation='linear',
                        kernel_regularizer=conv_kern_reg,
                        bias_regularizer=conv_bias_reg)(block5_output)
        block6 = BatchNormalization()(block6)
        block6 = PReLU()(block6)
        block6 = Conv1D(filter_nr,
                        kernel_size=filter_size,
                        padding='same',
                        activation='linear',
                        kernel_regularizer=conv_kern_reg,
                        bias_regularizer=conv_bias_reg)(block6)
        block6 = BatchNormalization()(block6)
        block6 = PReLU()(block6)

        block6_output = add([block6, block5_output])
        block6_output = MaxPooling1D(pool_size=max_pool_size,
                                     strides=max_pool_strides)(block6_output)

        block7 = Conv1D(filter_nr,
                        kernel_size=filter_size,
                        padding='same',
                        activation='linear',
                        kernel_regularizer=conv_kern_reg,
                        bias_regularizer=conv_bias_reg)(block6_output)
        block7 = BatchNormalization()(block7)
        block7 = PReLU()(block7)
        block7 = Conv1D(filter_nr,
                        kernel_size=filter_size,
                        padding='same',
                        activation='linear',
                        kernel_regularizer=conv_kern_reg,
                        bias_regularizer=conv_bias_reg)(block7)
        block7 = BatchNormalization()(block7)
        block7 = PReLU()(block7)

        block7_output = add([block7, block6_output])
        output = GlobalMaxPooling1D()(block7_output)

        output = Dense(dense_nr, activation='linear')(output)
        output = BatchNormalization()(output)
        output = PReLU()(output)
        output = Dropout(dense_dropout)(output)
        dpcnn_out = Dense(1)(output)

        #         model = Model(inputs=inp, outputs=output)
        #         model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mean_squared_error', 'accuracy'])

        ###         ========================================================
        #         inp = Input(shape=(config.strmaxlen, ), name='input')
        #         emb = Embedding(config.max_features, config.embed_size, trainable = True)(inp)
        emb2 = Embedding(config.max_features,
                         config.embed_size,
                         trainable=True)(inp)
        x1 = SpatialDropout1D(config.prob_dropout)(emb2)
        x1 = Bidirectional(CuDNNLSTM(config.cell_size,
                                     return_sequences=True))(x1)
        x12 = Bidirectional(CuDNNGRU(config.cell_size,
                                     return_sequences=True))(x1)
        x12c = Conv1D(filter_nr,
                      kernel_size=filter_size,
                      strides=1,
                      padding="valid",
                      kernel_initializer="he_uniform")(x12)
        #         x2 = SpatialDropout1D(config.prob_dropout)(emb)
        #         x2 = Bidirectional(CuDNNGRU(config.cell_size2, return_sequences=True))(x2)
        #         x22 = Bidirectional(CuDNNLSTM(config.cell_size2, return_sequences=False))(x1)

        avg_pool1 = GlobalAveragePooling1D()(x1)
        max_pool1 = GlobalMaxPooling1D()(x1)

        avg_pool12 = GlobalAveragePooling1D()(x12)
        max_pool12 = GlobalMaxPooling1D()(x12)

        avg_pool12c = GlobalAveragePooling1D()(x12c)
        max_pool12c = GlobalMaxPooling1D()(x12c)

        #         avg_pool14 = GlobalAveragePooling1D()(x22)
        #         max_pool14 = GlobalMaxPooling1D()(x22)

        conc = concatenate([
            avg_pool1, max_pool1, avg_pool12, max_pool12, avg_pool12c,
            max_pool12c
        ])
        #         fc1 = Dense(50, activation='relu')(conc)
        fc1 = Dropout(config.prob_dropout)(conc)
        rnnc_out = Dense(1)(fc1)

        #         model = Model(inputs=inp, outputs=outp)
        #         model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mean_squared_error', 'accuracy'])

        # #         ==================================================================================================

        ###         ========================================================
        #         inp = Input(shape=(config.strmaxlen, ), name='input')
        emb3 = Embedding(config.max_features,
                         config.embed_size,
                         trainable=True)(inp)
        r1 = SpatialDropout1D(config.prob_dropout)(emb3)
        r1 = Bidirectional(CuDNNLSTM(config.cell_size2,
                                     return_sequences=True))(r1)
        r12 = Bidirectional(
            CuDNNLSTM(config.cell_size2, return_sequences=False))(r1)

        rfc1 = Dense(50, activation='relu')(r12)
        rfc1 = Dropout(config.prob_dropout)(rfc1)
        rnn_out = Dense(1)(rfc1)

        #         model = Model(inputs=inp, outputs=outp)
        #         model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mean_squared_error', 'accuracy'])

        # #         ==================================================================================================
        ens_out = average([rnn_out, rnnc_out, dpcnn_out])
        model = Model(inputs=inp,
                      outputs=[rnn_out, rnnc_out, dpcnn_out, ens_out])
        model.compile(loss='mean_squared_error',
                      optimizer='adam',
                      loss_weights=[1., 0.8, 1., 0.3],
                      metrics=['mean_squared_error', 'accuracy'])

        return model
Пример #25
0
    def build_model(self):
        # 搭建seq2seq模型
        x_in = Input(shape=(None, ))
        y_in = Input(shape=(None, ))
        x = x_in
        y = y_in
        x_mask = Lambda(
            lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(x)
        y_mask = Lambda(
            lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(y)

        x_one_hot = Lambda(self._one_hot)([x, x_mask])
        x_prior = ScaleShift()(x_one_hot)  # 学习输出的先验分布(target的字词很可能在input出现过)

        # embedding
        embedding = Embedding(len(self.chars), self.hidden_dim)
        x = embedding(x)
        y = embedding(y)

        # encoder,双层双向GRU; decoder,双层单向GRU
        if self.use_gpu:
            # encoder
            x = Bidirectional(
                CuDNNGRU(int(self.hidden_dim / 2), return_sequences=True))(x)
            x = Bidirectional(
                CuDNNGRU(int(self.hidden_dim / 2), return_sequences=True))(x)
            # decoder
            y = CuDNNGRU(self.hidden_dim, return_sequences=True)(y)
            y = CuDNNGRU(self.hidden_dim, return_sequences=True)(y)
        else:
            # encoder
            x = Bidirectional(
                GRU(int(self.hidden_dim / 2),
                    return_sequences=True,
                    dropout=self.dropout))(x)
            x = Bidirectional(
                GRU(int(self.hidden_dim / 2),
                    return_sequences=True,
                    dropout=self.dropout))(x)
            # decoder
            y = GRU(self.hidden_dim,
                    return_sequences=True,
                    dropout=self.dropout)(y)
            y = GRU(self.hidden_dim,
                    return_sequences=True,
                    dropout=self.dropout)(y)

        xy = Interact()([y, x, x_mask])
        xy = Dense(512, activation='relu')(xy)
        xy = Dense(len(self.chars))(xy)
        xy = Lambda(lambda x: (x[0] + x[1]) / 2)([xy, x_prior])  # 与先验结果平均
        xy = Activation('softmax')(xy)

        # 交叉熵作为loss,但mask掉padding部分
        cross_entropy = K.sparse_categorical_crossentropy(
            y_in[:, 1:], xy[:, :-1])
        loss = K.sum(cross_entropy * y_mask[:, 1:, 0]) / K.sum(y_mask[:, 1:,
                                                                      0])

        model = Model([x_in, y_in], xy)
        model.add_loss(loss)
        model.compile(optimizer=Adam(1e-3))
        if os.path.exists(self.model_path):
            model.load_weights(self.model_path)
        return model
Пример #26
0
    def get_model(config):
        inp = Input(shape=(config.strmaxlen, ), name='input')
        #         inp = Input(shape=(config.max_features, ), name='input')

        emb = Embedding(config.max_features,
                        config.max_features,
                        embeddings_initializer='identity',
                        trainable=True)(inp)
        #         emb1 = Embedding(config.max_features, config.embed_size, trainable = True)(inp)
        emb1 = SpatialDropout1D(config.prob_dropout)(emb)

        ####
        l1_L = Bidirectional(
            CuDNNLSTM(config.cell_size_l1, return_sequences=True))(emb1)

        l2_LL = Bidirectional(
            CuDNNLSTM(config.cell_size_l2, return_sequences=True))(l1_L)
        l2_LG = Bidirectional(
            CuDNNGRU(config.cell_size_l2, return_sequences=True))(l1_L)

        l3_LLC = Conv1D(config.filter_size,
                        kernel_size=config.kernel_size,
                        strides=2,
                        padding="valid",
                        kernel_initializer="he_uniform")(l2_LL)
        l3_LGC = Conv1D(config.filter_size,
                        kernel_size=config.kernel_size,
                        strides=2,
                        padding="valid",
                        kernel_initializer="he_uniform")(l2_LG)

        avg_pool_L = GlobalAveragePooling1D()(l1_L)
        max_pool_L = GlobalMaxPooling1D()(l1_L)

        avg_pool_LL = GlobalAveragePooling1D()(l2_LL)
        max_pool_LL = GlobalMaxPooling1D()(l2_LL)
        avg_pool_LG = GlobalAveragePooling1D()(l2_LG)
        max_pool_LG = GlobalMaxPooling1D()(l2_LG)

        attention_LLA = Attention(config.strmaxlen)(l2_LL)
        attention_LGA = Attention(config.strmaxlen)(l2_LG)

        avg_pool_LLC = GlobalAveragePooling1D()(l3_LLC)
        max_pool_LLC = GlobalMaxPooling1D()(l3_LLC)
        avg_pool_LGC = GlobalAveragePooling1D()(l3_LGC)
        max_pool_LGC = GlobalMaxPooling1D()(l3_LGC)

        attention_LLCA = Attention(int(config.strmaxlen / 2 - 1))(l3_LLC)
        attention_LGCA = Attention(int(config.strmaxlen / 2 - 1))(l3_LGC)

        conc_LLC = concatenate([
            avg_pool_L, max_pool_L, avg_pool_LL, max_pool_LL, avg_pool_LLC,
            max_pool_LLC, attention_LLA, attention_LLCA
        ])
        conc_LGC = concatenate([
            avg_pool_L, max_pool_L, avg_pool_LG, max_pool_LG, avg_pool_LGC,
            max_pool_LGC, attention_LGA, attention_LGCA
        ])

        out_LL = Dropout(config.prob_dropout2)(conc_LLC)
        out_LG = Dropout(config.prob_dropout2)(conc_LGC)
        out_LL = Dense(1)(out_LL)
        out_LG = Dense(1)(out_LG)
        ####

        #         emb2 = Embedding(config.max_features, config.max_features,embeddings_initializer='identity', trainable = True)(inp)
        #         emb1 = Embedding(config.max_features, config.embed_size, trainable = True)(inp)
        emb2 = SpatialDropout1D(config.prob_dropout)(emb)

        ####
        l1_G = Bidirectional(
            CuDNNGRU(config.cell_size_l1, return_sequences=True))(emb2)

        l2_GL = Bidirectional(
            CuDNNLSTM(config.cell_size_l2, return_sequences=True))(l1_G)
        l2_GG = Bidirectional(
            CuDNNGRU(config.cell_size_l2, return_sequences=True))(l1_G)

        l3_GLC = Conv1D(config.filter_size,
                        kernel_size=config.kernel_size,
                        strides=2,
                        padding="valid",
                        kernel_initializer="he_uniform")(l2_GL)
        l3_GGC = Conv1D(config.filter_size,
                        kernel_size=config.kernel_size,
                        strides=2,
                        padding="valid",
                        kernel_initializer="he_uniform")(l2_GG)

        avg_pool_G = GlobalAveragePooling1D()(l1_G)
        max_pool_G = GlobalMaxPooling1D()(l1_G)

        avg_pool_GL = GlobalAveragePooling1D()(l2_GL)
        max_pool_GL = GlobalMaxPooling1D()(l2_GL)
        avg_pool_GG = GlobalAveragePooling1D()(l2_GG)
        max_pool_GG = GlobalMaxPooling1D()(l2_GG)

        attention_GLA = Attention(config.strmaxlen)(l2_GL)
        attention_GGA = Attention(config.strmaxlen)(l2_GG)

        avg_pool_GLC = GlobalAveragePooling1D()(l3_GLC)
        max_pool_GLC = GlobalMaxPooling1D()(l3_GLC)
        avg_pool_GGC = GlobalAveragePooling1D()(l3_GGC)
        max_pool_GGC = GlobalMaxPooling1D()(l3_GGC)

        attention_GLCA = Attention(int(config.strmaxlen / 2 - 1))(l3_GLC)
        attention_GGCA = Attention(int(config.strmaxlen / 2 - 1))(l3_GGC)

        conc_GLC = concatenate([
            avg_pool_G, max_pool_G, avg_pool_GL, max_pool_GL, avg_pool_GLC,
            max_pool_GLC, attention_GLA, attention_GLCA
        ])
        conc_GGC = concatenate([
            avg_pool_G, max_pool_G, avg_pool_GG, max_pool_GG, avg_pool_GGC,
            max_pool_GGC, attention_GGA, attention_GGCA
        ])

        out_GL = Dropout(config.prob_dropout2)(conc_GLC)
        out_GG = Dropout(config.prob_dropout2)(conc_GGC)
        out_GL = Dense(1)(out_GL)
        out_GG = Dense(1)(out_GG)

        out_avg = average([out_LL, out_LG, out_GL, out_GG])

        inp_post = Input(shape=(config.strmaxlen, ), name='input_post')
        #         inp = Input(shape=(config.max_features, ), name='input')

        embp = Embedding(config.max_features,
                         config.max_features,
                         embeddings_initializer='identity',
                         trainable=True)(inp_post)
        #         emb1 = Embedding(config.max_features, config.embed_size, trainable = True)(inp)
        embp1 = SpatialDropout1D(config.prob_dropout)(embp)

        ####
        l1_Lp = Bidirectional(
            CuDNNLSTM(config.cell_size_l1, return_sequences=True))(embp1)

        l2_LLp = Bidirectional(
            CuDNNLSTM(config.cell_size_l2, return_sequences=True))(l1_Lp)
        l2_LGp = Bidirectional(
            CuDNNGRU(config.cell_size_l2, return_sequences=True))(l1_Lp)

        l3_LLCp = Conv1D(config.filter_size,
                         kernel_size=config.kernel_size,
                         strides=2,
                         padding="valid",
                         kernel_initializer="he_uniform")(l2_LLp)
        l3_LGCp = Conv1D(config.filter_size,
                         kernel_size=config.kernel_size,
                         strides=2,
                         padding="valid",
                         kernel_initializer="he_uniform")(l2_LGp)

        avg_pool_Lp = GlobalAveragePooling1D()(l1_Lp)
        max_pool_Lp = GlobalMaxPooling1D()(l1_Lp)

        avg_pool_LLp = GlobalAveragePooling1D()(l2_LLp)
        max_pool_LLp = GlobalMaxPooling1D()(l2_LLp)
        avg_pool_LGp = GlobalAveragePooling1D()(l2_LGp)
        max_pool_LGp = GlobalMaxPooling1D()(l2_LGp)

        attention_LLAp = Attention(config.strmaxlen)(l2_LLp)
        attention_LGAp = Attention(config.strmaxlen)(l2_LGp)

        avg_pool_LLCp = GlobalAveragePooling1D()(l3_LLCp)
        max_pool_LLCp = GlobalMaxPooling1D()(l3_LLCp)
        avg_pool_LGCp = GlobalAveragePooling1D()(l3_LGCp)
        max_pool_LGCp = GlobalMaxPooling1D()(l3_LGCp)

        attention_LLCAp = Attention(int(config.strmaxlen / 2 - 1))(l3_LLCp)
        attention_LGCAp = Attention(int(config.strmaxlen / 2 - 1))(l3_LGCp)

        conc_LLCp = concatenate([
            avg_pool_Lp, max_pool_Lp, avg_pool_LLp, max_pool_LLp,
            avg_pool_LLCp, max_pool_LLCp, attention_LLAp, attention_LLCAp
        ])
        conc_LGCp = concatenate([
            avg_pool_Lp, max_pool_Lp, avg_pool_LGp, max_pool_LGp,
            avg_pool_LGCp, max_pool_LGCp, attention_LGAp, attention_LGCAp
        ])

        out_LLp = Dropout(config.prob_dropout2)(conc_LLCp)
        out_LGp = Dropout(config.prob_dropout2)(conc_LGCp)
        out_LLp = Dense(1)(out_LLp)
        out_LGp = Dense(1)(out_LGp)
        ####

        #         emb2 = Embedding(config.max_features, config.max_features,embeddings_initializer='identity', trainable = True)(inp)
        #         emb1 = Embedding(config.max_features, config.embed_size, trainable = True)(inp)
        embp2 = SpatialDropout1D(config.prob_dropout)(embp)

        ####
        l1_Gp = Bidirectional(
            CuDNNGRU(config.cell_size_l1, return_sequences=True))(embp2)

        l2_GLp = Bidirectional(
            CuDNNLSTM(config.cell_size_l2, return_sequences=True))(l1_Gp)
        l2_GGp = Bidirectional(
            CuDNNGRU(config.cell_size_l2, return_sequences=True))(l1_Gp)

        l3_GLCp = Conv1D(config.filter_size,
                         kernel_size=config.kernel_size,
                         strides=2,
                         padding="valid",
                         kernel_initializer="he_uniform")(l2_GLp)
        l3_GGCp = Conv1D(config.filter_size,
                         kernel_size=config.kernel_size,
                         strides=2,
                         padding="valid",
                         kernel_initializer="he_uniform")(l2_GGp)

        avg_pool_Gp = GlobalAveragePooling1D()(l1_Gp)
        max_pool_Gp = GlobalMaxPooling1D()(l1_Gp)

        avg_pool_GLp = GlobalAveragePooling1D()(l2_GLp)
        max_pool_GLp = GlobalMaxPooling1D()(l2_GLp)
        avg_pool_GGp = GlobalAveragePooling1D()(l2_GGp)
        max_pool_GGp = GlobalMaxPooling1D()(l2_GGp)

        attention_GLAp = Attention(config.strmaxlen)(l2_GLp)
        attention_GGAp = Attention(config.strmaxlen)(l2_GGp)

        avg_pool_GLCp = GlobalAveragePooling1D()(l3_GLCp)
        max_pool_GLCp = GlobalMaxPooling1D()(l3_GLCp)
        avg_pool_GGCp = GlobalAveragePooling1D()(l3_GGCp)
        max_pool_GGCp = GlobalMaxPooling1D()(l3_GGCp)

        attention_GLCAp = Attention(int(config.strmaxlen / 2 - 1))(l3_GLCp)
        attention_GGCAp = Attention(int(config.strmaxlen / 2 - 1))(l3_GGCp)

        conc_GLCp = concatenate([
            avg_pool_Gp, max_pool_Gp, avg_pool_GLp, max_pool_GLp,
            avg_pool_GLCp, max_pool_GLCp, attention_GLAp, attention_GLCAp
        ])
        conc_GGCp = concatenate([
            avg_pool_Gp, max_pool_Gp, avg_pool_GGp, max_pool_GGp,
            avg_pool_GGCp, max_pool_GGCp, attention_GGAp, attention_GGCAp
        ])

        out_GLp = Dropout(config.prob_dropout2)(conc_GLCp)
        out_GGp = Dropout(config.prob_dropout2)(conc_GGCp)
        out_GLp = Dense(1)(out_GLp)
        out_GGp = Dense(1)(out_GGp)

        out_avgp = average([out_LLp, out_LGp, out_GLp, out_GGp])

        out_last = average([out_avg, out_avgp])

        # #         ==================================================================================================
        model_avg = Model(inputs=[inp, inp_post],
                          outputs=[
                              out_LL, out_LG, out_GL, out_GG, out_LLp, out_LGp,
                              out_GLp, out_GGp, out_avg, out_avgp, out_last
                          ])

        #         inp_pre = Input(shape=(config.strmaxlen, ), name='input_pre')
        #         inp_post = Input(shape=(config.strmaxlen, ), name='input_post')

        #         model_pre = model_avg(inp_pre)
        #         model_post = model_avg(inp_post)

        #         stack_layer = concatenate([model_pre, model_post])
        #         ens_out = Dense(1, use_bias=False)(stack_layer)

        #         reg_model = Model(inputs=[inp_pre, inp_post], outputs=ens_out)

        model_avg.compile(
            loss='mean_squared_error',
            optimizer='adam',
            loss_weights=[1., 1., 1., 1., 1., 1., 1., 1., 0.1, 0.1, 0.01],
            metrics=['mean_squared_error', 'accuracy'])

        return model_avg
Пример #27
0
def build_rnn(config={}):

	cfgdefaults = {
		'embed': ['glove', 'Embedding'],
		'glove_dim': [100, 'GLoVe dims'],
		'spatial_dropout': [0, 'Spatial dropout'],
		'use_gpu': [False, 'GPU support'],
		'rnn_type': ['gru', 'RNN type'],
		'rnn_units': [32, 'RNN units'],
		'bidirectional': [False, 'Bi-directional RNN'],
		'dense_units': [64, 'Dense units'],
		'dense_dropout': [0, 'Dense dropout'],
		'd2v_include': [False, 'Additional Doc2Vec representation'],
		'd2v_dim': [200, 'Doc2Vec dimensionality'],
		'd2v_dense_nodes': [64, 'Doc2Vec MLP nodes'],
		'adam_lr': [.0001, 'Adam optimizer learning rate'],
		'adam_lr_decay': [.000001, 'Adam optimizer LR decay'],
		'SEQ_LEN': [200, 'Sequence length'],
		'train_epochs': [1, 'Training epochs'],
		'train': [True, 'Model training'],
		'target_field_names': [
			target_flags,
			'Target flag field names']
	}

	if type(config) is dict:
		for cfgkey in cfgdefaults.keys():
			if cfgkey not in config.keys():
				var = cfgdefaults.get(cfgkey)
				config[cfgkey] = var[0]
				print("{} not specified in config, defaulting to {}".format(var[1], var[0]))
	else:
		print('Must pass config as dict or leave empty.')
		return

	if config['embed'] == 'word2vec':
		from gensim.models import Word2Vec
		if not isinstance(config['w2v_model'], Word2Vec):
			print('Word2Vec model must be passed to config key w2vmodel')
			return

	from embeddingtools import make_embedding_layer

	seq_input = Input(shape=(config['SEQ_LEN'],), dtype='int32', name='seq_input')
	if config['embed'] == 'word2vec':
		embedding = embedding_layer_w2v(config['w2v_model'], config['SEQ_LEN'])(seq_input)

	if config['embed'] == 'glove':
		embedding = embedding_layer_glove(config['SEQ_LEN'], glove_dim=config['glove_dim'])(seq_input)

	spatial_dropout = SpatialDropout1D(config['spatial_dropout'])(embedding)

	if config['bidirectional']:
		from keras.layers import Bidirectional 

		if config['rnn_type'] == 'lstm':
			if config['use_gpu']:

				rnn = Bidirectional(CuDNNLSTM(config['rnn_units']))(spatial_dropout)
			else:

				rnn = Bidirectional(LSTM(config['rnn_units']))(spatial_dropout)

		elif config['rnn_type'] == 'gru':
			if config['use_gpu']:

				rnn = Bidirectional(CuDNNGRU(config['rnn_units']))(spatial_dropout)
			else:

				rnn = Bidirectional(GRU(config['rnn_units']))(spatial_dropout)
	else:
		if config['rnn_type'] == 'lstm':
			if config['use_gpu']:
				rnn = CuDNNLSTM(config['rnn_units'])(spatial_dropout)
			else:
				rnn = LSTM(config['rnn_units'])(spatial_dropout)
		elif config['rnn_type'] == 'gru':
			if config['use_gpu']:
				rnn = CuDNNGRU(config['rnn_units'])(spatial_dropout)
			else:
				rnn = GRU(config['rnn_units'])(spatial_dropout)

	inputs = [seq_input]

	if config['d2v_include']:
		d2v_input = Input(shape=(config['d2v_dim'],), name='D2VInput')
		d2v_dense = Dense(config['d2v_dense_nodes'], activation='relu', name='D2VDense')(d2v_input)

		rnn = concatenate([rnn, d2v_dense])

		inputs.append(d2v_input)


	dense = Dense(config['dense_units'], activation='relu')(rnn)
	dense_dropout = Dropout(config['dense_dropout'])(dense)

	output_nodes = []
	for i in range(len(config['target_field_names'])):
		output_nodes.append(
			Dense(1, activation='sigmoid', name=config['target_field_names'][i])(dense_dropout)
			)

	adam = Adam(lr=config['adam_lr'], decay=config['adam_lr_decay'])
	model = Model(inputs=inputs, outputs=output_nodes)
	model.compile(loss='binary_crossentropy',
		optimizer=adam, 
		metrics=['accuracy'])
	return model, config
def get_sentence_attention(word_model , word_length, sent_length, n_classes):
    #x = Permute((2,1))(si_vects)
    nclasses = 1
    input = Input(shape=(sent_length, word_length ), dtype='int32')
    print(' input to sentence attn network',word_model)
    preds = []
    attentions_pred = []
    #print(output.summary())
    si_vects = TimeDistributed(word_model)(input)
    print('Shape after si_vects', si_vects.shape)
    #u_it = TimeDistributed(TimeDistributed(BatchNormalization()))(si_vects)
    u_it = TimeDistributed(TimeDistributed(Dense(256, activation='tanh')))(si_vects)
    print('Shape after word vector',u_it.shape)
    #u_it = TimeDistributed(TimeDistributed(BatchNormalization()))(u_it)

    #h_it = TimeDistributed(Reshape((100,word_length)))(si_vects)
    #print('Shape after reshape word vector',h_it.shape)

    attn_final_word = [TimeDistributed(ATTNWORD(1))(u_it) for i in range(nclasses)]
    #a_it = Reshape(( word_length, 1))(a_it)
    #h_it = Reshape((word_length, 512))(h_it)
    print('ATTN Shape', attn_final_word[0].shape)
    attn_final_word = [Multiply()([si_vects, attn_final_word[i]]) for i in range(nclasses)]#Multiply()([h_it,a_it])
    print('Multi word Shape', attn_final_word[0].shape)
    attn_final_word = [Reshape((sent_length, 256,word_length))(attn_final_word[i]) for i in range(nclasses)]
    print ('Shape of the att1 is {}'.format(attn_final_word[0].shape))
    attn_final_word = [Lambda(lambda x: K.sum(x, axis=3))(attn_final_word[i]) for i in range(nclasses)]
    output_list = []
    for i in range(nclasses):
        print ('Shape of the lambda word is {}'.format(attn_final_word[i].shape))
        ind_t = 0
        attn_sents_for_all_classes = []
        #attn_final_word[i] = SpatialDropout1D(0.2)(attn_final_word[i])
        x = Bidirectional(CuDNNGRU(128,return_sequences=True))(attn_final_word[i])
        x = SpatialDropout1D(0.2)(x)
        x = BatchNormalization()(x)
        print ("Shape of X-X is {}".format(x.shape))
        u_it = TimeDistributed(Dense(256, activation='tanh'))(x)
        print('Shape after word vector',u_it.shape)
        #h_it = Reshape((100,sent_length))(x)
        attn_final_sent = ATTNWORD(1)(u_it)
        print ('Shape of the sent att is {}'.format(attn_final_sent.shape))
        #attentions_pred.append(attn_final)
        attn_final_sent = Multiply()([x, attn_final_sent])
        print ('Shape of the multi sent att is {}'.format(attn_final_sent.shape))
        attn_final_sent = Reshape((256,sent_length))(attn_final_sent)
        attn_final_sent = Lambda(lambda x: K.sum(x, axis=2))(attn_final_sent)
        output_list.append(attn_final_sent)

    word_attn = Reshape((sent_length*word_length, 256))(si_vects)
    x1 = Conv1D(256,2, activation='relu')(word_attn)
    x1_mp = GlobalMaxPooling1D()(x1)
    x1_av = GlobalAveragePooling1D()(x1)
    x2 = Conv1D(256,3, activation='relu')(word_attn)
    x2_mp = GlobalMaxPooling1D()(x2)
    x2_av = GlobalAveragePooling1D()(x2)
    x3 = Conv1D(256,4, activation='relu')(word_attn)
    x3_mp = GlobalMaxPooling1D()(x3)
    x3_av = GlobalAveragePooling1D()(x3)
    #x = Concatenate()([Flatten()(x1_mp), Flatten()(x2_mp),Flatten()(x3_mp)])
    #x = Concatenate()([x1_mp, x2_mp , x3_av])
    x = Maximum()([x1_mp, x1_av, x2_mp, x2_av , x3_mp, x3_av])
    x = BatchNormalization()(x)
    output_list.append(x)
    #x = Dense(256, activation='relu')(x)
    #x = Dropout(0.25)(x)
    #x = Dense(128, activation='relu')(x)
    #x = Dropout(0.25)(x)
    x = Multiply()(output_list)
    p = Dense(n_classes, activation='sigmoid')(x)

    model = Model(input, p)

    return model
Пример #29
0
def cs_setup_rnn(params, inshape=None, classes=None, char=None):
    
    # Parse network hyperparameters
    em_dim = int(params['em_dim']*10)
    kernel_size = 3
    filters = int(params['conv_units']*6)    
    num_layer = int(params['num_layer'])
    units1 = int(params['layer1_units']*6)
    units2 = int(params['layer2_units']*6)
    units3 = int(params['layer3_units']*6)
    relu_flag = str(params['relu_type'])
    dropval = float(params['dropval'])
    reg_flag = str(params['reg_type'])
    reg_val = 10**(-float(params['reg_val']))

    # Setup regularizer
    if reg_flag == "l1":
        reg = l1(reg_val)
        print("Regularizer "+reg_flag+" set at "+str(reg_val))
    elif reg_flag == "l2":
        reg = l2(reg_val)
        print("Regularizer "+reg_flag+" set at "+str(reg_val))
    elif reg_flag == "l1_l2":
        reg = l1_l2(reg_val)
        print("Regularizer "+reg_flag+" set at "+str(reg_val))
    else:
        reg = None
        print("NOTE: No regularizers used")
    
    # Setup neural network
    inlayer = Input(shape=[inshape])
    x = Embedding(input_dim=len(char)+1,output_dim=em_dim)(inlayer)   
    x = Conv1D(filters, kernel_size, strides=1, padding="same", kernel_regularizer=reg)(x)
    if relu_flag == "relu":
        x = Activation("relu")(x)
    elif relu_flag == "elu":
        x = Activation("elu")(x)
    elif relu_flag == "prelu":
        x = PReLU()(x)
    elif relu_flag == "leakyrelu":
        x = LeakyReLU()(x)
    if params['celltype'] == "GRU":
        if num_layer == 1:
            x = Bidirectional(CuDNNGRU(units1, return_sequences=False))(x)
            x = Dropout(dropval)(x)
        elif num_layer == 2:
            x = Bidirectional(CuDNNGRU(units1, return_sequences=True))(x)
            x = Dropout(dropval)(x)
            x = Bidirectional(CuDNNGRU(units2, return_sequences=False))(x)
            x = Dropout(dropval)(x)
        elif num_layer == 3:
            x = Bidirectional(CuDNNGRU(units1, return_sequences=True))(x)
            x = Dropout(dropval)(x)
            x = Bidirectional(CuDNNGRU(units2, return_sequences=True))(x)
            x = Dropout(dropval)(x)
            x = Bidirectional(CuDNNGRU(units3, return_sequences=False))(x)
            x = Dropout(dropval)(x)
    if params['celltype'] == "LSTM":
        if num_layer == 1:
            x = Bidirectional(CuDNNLSTM(units1, return_sequences=False))(x)
            x = Dropout(dropval)(x)
        elif num_layer == 2:
            x = Bidirectional(CuDNNLSTM(units1, return_sequences=True))(x)
            x = Dropout(dropval)(x)
            x = Bidirectional(CuDNNLSTM(units2, return_sequences=False))(x)
            x = Dropout(dropval)(x)
        elif num_layer == 3:
            x = Bidirectional(CuDNNLSTM(units1, return_sequences=True))(x)
            x = Dropout(dropval)(x)
            x = Bidirectional(CuDNNLSTM(units2, return_sequences=True))(x)
            x = Dropout(dropval)(x)
            x = Bidirectional(CuDNNLSTM(units3, return_sequences=False))(x)
            x = Dropout(dropval)(x)
    
    # Specify output layer
    if classes == 1:
        label = Dense(classes, activation='linear', name='predictions')(x)
    elif classes >= 2:
        label = Dense(classes, activation='softmax', name='predictions')(x)
    else:
        raise("ERROR in specifying tasktype")
        
    # Create base model
    model = Model(inputs=inlayer,outputs=label, name='SMILES2vec')
    
    # Create intermediate model
    submodel = Model(inputs=inlayer,outputs=x, name='SMILES2vec_truncated')
    
    # Specify training method
    if classes == 1:
        model.compile(optimizer="RMSprop", loss="mean_squared_error")
        submodel.compile(optimizer="RMSprop", loss="mean_squared_error")
    elif classes >= 2:
        model.compile(optimizer="RMSprop", loss="categorical_crossentropy")
        submodel.compile(optimizer="RMSprop", loss="categorical_crossentropy")
    else:
        raise("ERROR in specifying tasktype")
    
    return(model, submodel)
Пример #30
0
def new_lpcnet_model(frame_size=160,
                     rnn_units1=384,
                     rnn_units2=16,
                     nb_used_features=38,
                     training=False,
                     use_gpu=True):
    pcm = Input(shape=(None, 3))
    feat = Input(shape=(None, nb_used_features))
    pitch = Input(shape=(None, 1))
    dec_feat = Input(shape=(None, 128))
    dec_state1 = Input(shape=(rnn_units1, ))
    dec_state2 = Input(shape=(rnn_units2, ))

    padding = 'valid' if training else 'same'
    fconv1 = Conv1D(128,
                    3,
                    padding=padding,
                    activation='tanh',
                    name='feature_conv1')
    fconv2 = Conv1D(128,
                    3,
                    padding=padding,
                    activation='tanh',
                    name='feature_conv2')

    embed = Embedding(256,
                      embed_size,
                      embeddings_initializer=PCMInit(),
                      name='embed_sig')
    cpcm = Reshape((-1, embed_size * 3))(embed(pcm))

    pembed = Embedding(256, 64, name='embed_pitch')
    cat_feat = Concatenate()([feat, Reshape((-1, 64))(pembed(pitch))])

    cfeat = fconv2(fconv1(cat_feat))

    fdense1 = Dense(128, activation='tanh', name='feature_dense1')
    fdense2 = Dense(128, activation='tanh', name='feature_dense2')

    cfeat = fdense2(fdense1(cfeat))

    rep = Lambda(lambda x: K.repeat_elements(x, frame_size, 1))

    if use_gpu:
        rnn = CuDNNGRU(rnn_units1,
                       return_sequences=True,
                       return_state=True,
                       name='gru_a')
        rnn2 = CuDNNGRU(rnn_units2,
                        return_sequences=True,
                        return_state=True,
                        name='gru_b')
    else:
        rnn = GRU(rnn_units1,
                  return_sequences=True,
                  return_state=True,
                  recurrent_activation="sigmoid",
                  reset_after='true',
                  name='gru_a')
        rnn2 = GRU(rnn_units2,
                   return_sequences=True,
                   return_state=True,
                   recurrent_activation="sigmoid",
                   reset_after='true',
                   name='gru_b')

    rnn_in = Concatenate()([cpcm, rep(cfeat)])
    md = MDense(pcm_levels, activation='softmax', name='dual_fc')
    gru_out1, _ = rnn(rnn_in)
    gru_out2, _ = rnn2(Concatenate()([gru_out1, rep(cfeat)]))
    ulaw_prob = md(gru_out2)

    model = Model([pcm, feat, pitch], ulaw_prob)
    model.rnn_units1 = rnn_units1
    model.rnn_units2 = rnn_units2
    model.nb_used_features = nb_used_features
    model.frame_size = frame_size

    encoder = Model([feat, pitch], cfeat)

    dec_rnn_in = Concatenate()([cpcm, dec_feat])
    dec_gru_out1, state1 = rnn(dec_rnn_in, initial_state=dec_state1)
    dec_gru_out2, state2 = rnn2(Concatenate()([dec_gru_out1, dec_feat]),
                                initial_state=dec_state2)
    dec_ulaw_prob = md(dec_gru_out2)

    decoder = Model([pcm, dec_feat, dec_state1, dec_state2],
                    [dec_ulaw_prob, state1, state2])
    return model, encoder, decoder