df = pd.read_csv('./data/raw/DJIA_table.csv') scaler = StandardScaler() data = scaler.fit_transform((df['Close'] - df['Open']).values.reshape(-1, 1)) X = data[:-1] y = data[1:] X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False) train_data_gen = TimeseriesGenerator(X_train, y_train, length=window_size, batch_size=batch_size, shuffle=False) test_data_gen = TimeseriesGenerator(X_test, y_test, length=window_size, batch_size=batch_size, shuffle=False) model = Sequential() model.add(CuDNNGRU(4, input_shape=(window_size, 1,))) model.add(Dense(1)) model.compile(loss='mean_squared_error', optimizer='adam') history = model.fit_generator(train_data_gen, epochs=epochs).history index = [df['Open'][0]] for i, d in enumerate(scaler.inverse_transform(data)): index.append(index[i] + d) index_train = [df['Open'][0]] for i, d in enumerate(scaler.inverse_transform(model.predict_generator(train_data_gen))): index_train.append(index_train[i] + d) index_test = [index_train[-1]] for i, d in enumerate(scaler.inverse_transform(model.predict_generator(test_data_gen))): index_test.append(index_test[i] + d)
def build(input_shape, n_classes, train=True): ''' input就是generator每次yield的 inputs = {'the_input': train_batch, # 样本图像批 'the_labels': labels, # 样本类别序列批 'input_length': input_length, # RNN输入长度批 'label_length': label_length} # 类别序列长度批 ''' if K.image_data_format() == "channels_first": chanDim = 1 else: chanDim = -1 # input: (h, w, n_channels), kernel: (h, w) input_data = Input(name='the_input', shape=input_shape, dtype='float32') x = Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same')(input_data) x = BatchNormalization(axis=chanDim)(x) x = Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same')(x) x = BatchNormalization(axis=chanDim)(x) shortcut = Conv2D(64, kernel_size=(1, 1), activation='relu', padding='same')(input_data) x1 = add([shortcut, x]) x = Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same')(x1) x = BatchNormalization(axis=chanDim)(x) x = Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same')(x) x = BatchNormalization(axis=chanDim)(x) shortcut = Conv2D(64, kernel_size=(1, 1), activation='relu', padding='same')(x1) x1 = add([x, shortcut]) x = Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same')(x1) x = BatchNormalization(axis=chanDim)(x) x1 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='pool1')(x) x = Conv2D(128, kernel_size=(3, 3), activation='relu', padding='same')(x1) x = BatchNormalization(axis=chanDim)(x) x = Conv2D(128, kernel_size=(3, 3), activation='relu', padding='same')(x) x = BatchNormalization(axis=chanDim)(x) shortcut = Conv2D(128, kernel_size=(1, 1), activation='relu', padding='same')(x1) x1 = add([x, shortcut]) x = Conv2D(128, kernel_size=(3, 3), activation='relu', padding='same')(x1) x = BatchNormalization(axis=chanDim)(x) x = Conv2D(128, kernel_size=(3, 3), activation='relu', padding='same')(x) x = BatchNormalization(axis=chanDim)(x) shortcut = Conv2D(128, kernel_size=(1, 1), activation='relu', padding='same')(x1) x1 = add([x, shortcut]) x1 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='pool2')(x1) x = Conv2D(256, kernel_size=(3, 3), activation='relu', padding='same')(x1) x = BatchNormalization(axis=chanDim)(x) x = Conv2D(256, kernel_size=(3, 3), activation='relu', padding='same')(x) x = BatchNormalization(axis=chanDim)(x) shortcut = Conv2D(256, kernel_size=(1, 1), activation='relu', padding='same')(x1) x1 = add([x, shortcut]) x = Conv2D(256, kernel_size=(3, 3), activation='relu', padding='same')(x1) x = BatchNormalization(axis=chanDim)(x) x = Conv2D(256, kernel_size=(3, 3), activation='relu', padding='same')(x) x = BatchNormalization(axis=chanDim)(x) shortcut = Conv2D(256, kernel_size=(1, 1), activation='relu', padding='same')(x1) x = add([x, shortcut]) x = ZeroPadding2D(padding=(0, 1), name='pad1')(x) # 只补宽度,不补高度 x1 = MaxPooling2D(pool_size=(2, 2), strides=(2, 1), name='pool3')(x) x = Conv2D(512, kernel_size=(3, 3), activation='relu', padding='same')(x1) x = BatchNormalization(axis=chanDim)(x) x = Conv2D(512, kernel_size=(3, 3), activation='relu', padding='same')(x) x = BatchNormalization(axis=chanDim)(x) shortcut = Conv2D(512, kernel_size=(1, 1), activation='relu', padding='same')(x1) x1 = add([x, shortcut]) x = Conv2D(512, kernel_size=(3, 3), activation='relu', padding='same')(x1) x = BatchNormalization(axis=chanDim)(x) x = Conv2D(512, kernel_size=(3, 3), activation='relu', padding='same')(x) x = BatchNormalization(axis=chanDim)(x) shortcut = Conv2D(512, kernel_size=(1, 1), activation='relu', padding='same')(x1) x = add([x, shortcut]) x = ZeroPadding2D(padding=(0, 1), name='pad2')(x) # 只补宽度,不补高度 x = MaxPooling2D(pool_size=(2, 2), strides=(2, 1), name='pool4')(x) x = Conv2D(512, kernel_size=(2, 2), strides=(1, 1), activation='relu', padding='valid')(x) # 最后一层的尺寸:(高, 宽, 深), POOL和conv的补边、步长决定宽高,kernel数量决定深度 shape = x.get_shape() # conv_to_rnn_dims = (int(shape[1]), int(shape[2]) * int(shape[3])) # CONV模块最后的特征图的每一列作为RNN输入序列的一元 # cnn_feature = Reshape(target_shape=conv_to_rnn_dims, name='map2seq')(x) x = Permute((2, 1, 3))(x) x = TimeDistributed(Flatten(), name='timedistrib')(x) # 2层双向RNN # x = Bidirectional(GRU(256, return_sequences=True, implementation=2), name='bi-lstm1')(cnn_out) #x = Dense(int(shape[1]) * int(shape[3]), name='bi-lstm1_out')(x) # 第2层可能导致过拟合 #rnn_out = Bidirectional(GRU(256, return_sequences=True, implementation=2), name='bi-lstm2')(x) rnn_f = CuDNNGRU(256, return_sequences=True, name='rnn1_f')(x) rnn_b = CuDNNGRU(256, return_sequences=True, go_backwards=True, name='rnn1_b')(x) x = concatenate([rnn_f, rnn_b]) # 全连接神经元数量=字符类别数+1(+1 for blank token) x = Dense(n_classes, name='dense')(x) # softmax层 y_pred = Activation('softmax', name='softmax')(x) # CTC的输入序列长度和及其对应的类别序列长度 # 原始序列长度必须小于等于CTC的输出序列长度,保证每个输入时刻最多对应一个类别 # input_length是y_pred的长度,即送入ctc的长度也就是卷积层最后的宽度 input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') labels = Input(name='the_labels', shape=[cfg.max_label_len], dtype='float32') # ctc层 # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer loss_out = Lambda(ctc_lambda_func, output_shape=(1, ), name='ctc')( [y_pred, labels, input_length, label_length]) if train == True: # 训练时需要labels, input_length, label_length计算ctc损失 model = Model( inputs=[input_data, labels, input_length, label_length], outputs=loss_out) else: # 测试时只需要输入数据和预测输出 model = Model(inputs=input_data, outputs=y_pred) """ # 获取softmax层的输出,在可视化过程中用于解码验证,代替model.predict() # inputs: List of placeholder tensors. # outputs: List of output tensors. """ test_func = K.function([input_data], [y_pred]) # [input_data]是tensor input_data的list return model, y_pred, test_func
def rnn(embedding_matrix, config): if config['rnn'] == 'gru' and config['gpu']: encode = Bidirectional( CuDNNGRU(config['rnn_output_size'], return_sequences=True)) encode2 = Bidirectional( CuDNNGRU(config['rnn_output_size'], return_sequences=True)) encode3 = Bidirectional( CuDNNGRU(config['rnn_output_size'], return_sequences=True)) else: encode = Bidirectional( CuDNNLSTM(config['rnn_output_size'], return_sequences=True)) encode2 = Bidirectional( CuDNNLSTM(config['rnn_output_size'] * 2, return_sequences=True)) encode3 = Bidirectional( CuDNNGRU(config['rnn_output_size'] * 4, return_sequences=True)) q1 = Input(shape=(config['max_length'], ), dtype='int32', name='q1_input') q2 = Input((config['max_length'], ), dtype='int32', name='q2_input') embedding_layer = Embedding(embedding_matrix.shape[0], embedding_matrix.shape[1], trainable=config['embed_trainable'], weights=[embedding_matrix] # mask_zero=True ) q1_embed = embedding_layer(q1) q2_embed = embedding_layer(q2) # bsz, 1, emb_dims q1_embed = BatchNormalization(axis=2)(q1_embed) q2_embed = BatchNormalization(axis=2)(q2_embed) q1_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q1_embed) q2_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q2_embed) q1_encoded = encode(q1_embed) q2_encoded = encode(q2_embed) q1_encoded = Dropout(0.2)(q1_encoded) q2_encoded = Dropout(0.2)(q2_encoded) # 双向 # q1_encoded = encode2(q1_encoded) # q2_encoded = encode2(q2_encoded) # resnet rnn_layer2_input1 = concatenate([q1_embed, q1_encoded]) rnn_layer2_input2 = concatenate([q2_embed, q2_encoded]) q1_encoded2 = encode2(rnn_layer2_input1) q2_encoded2 = encode2(rnn_layer2_input2) # add res shortcut res_block1 = add([q1_encoded, q1_encoded2]) res_block2 = add([q2_encoded, q2_encoded2]) rnn_layer3_input1 = concatenate([q1_embed, res_block1]) rnn_layer3_input2 = concatenate([q2_embed, res_block2]) # rnn_layer3_input1 = concatenate([q1_embed,q1_encoded,q1_encoded2]) # rnn_layer3_input2 = concatenate([q2_embed,q2_encoded,q2_encoded2]) q1_encoded3 = encode3(rnn_layer3_input1) q2_encoded3 = encode3(rnn_layer3_input2) # merged1 = GlobalMaxPool1D()(q1_encoded3) # merged2 = GlobalMaxPool1D()(q2_encoded3) # q1_encoded = concatenate([q1_encoded, q1_encoded2], axis=-1) # q2_encoded = concatenate([q2_encoded, q2_encoded2], axis=-1) # merged1 = concatenate([q1_encoded2, q1_embed], axis=-1) # merged2 = concatenate([q2_encoded2, q2_embed], axis=-1) # # TODO add attention rep , maxpooling rep q1_encoded3 = concatenate([q1_encoded, q1_encoded2, q1_encoded3]) q2_encoded3 = concatenate([q2_encoded, q2_encoded2, q2_encoded3]) merged1 = GlobalMaxPool1D()(q1_encoded3) merged2 = GlobalMaxPool1D()(q2_encoded3) # avg1 = GlobalAvgPool1D()(q1_encoded3) # avg2 = GlobalAvgPool1D()(q2_encoded3) # merged1 = concatenate([max1,avg1]) # merged2 = concatenate([max2,avg2]) sub_rep = Lambda(lambda x: K.abs(x[0] - x[1]))([merged1, merged2]) mul_rep = Lambda(lambda x: x[0] * x[1])([merged1, merged2]) # jaccard_rep = Lambda(lambda x: x[0]*x[1]/(K.sum(x[0]**2,axis=1,keepdims=True)+K.sum(x[1]**2,axis=1,keepdims=True)- # K.sum(K.abs(x[0]*x[1]),axis=1,keepdims=True)))([merged1,merged2]) # merged = Concatenate()([merged1, merged2, mul_rep, sub_rep,jaccard_rep]) feature_input = Input(shape=(config['feature_length'], )) feature_dense = BatchNormalization()(feature_input) feature_dense = Dense(config['dense_dim'], activation='relu')(feature_dense) merged = Concatenate()([merged1, merged2, mul_rep, sub_rep, feature_dense]) # Classifier dense = Dropout(config['dense_dropout'])(merged) dense = BatchNormalization()(dense) dense = Dense(config['dense_dim'], activation='relu')(dense) dense = Dropout(config['dense_dropout'])(dense) dense = BatchNormalization()(dense) predictions = Dense(1, activation='sigmoid')(dense) model = Model(inputs=[q1, q2, feature_input], outputs=predictions) opt = optimizers.get(config['optimizer']) K.set_value(opt.lr, config['learning_rate']) model.compile(optimizer=opt, loss='binary_crossentropy', metrics=[f1]) return model
print(model.summary()) model.fit(x_train_pad, y_train, epochs=n_epochs, batch_size=batch_size, validation_split=0.05, callbacks=[callback_early_stopping]) eval_ = model.evaluate(x_test_pad, y_test) print(eval_) model.save('sentiment_lstm') model_GRU = Sequential() model_GRU.add(Embedding(input_dim = num_words, output_dim=embedding_size, input_length=max_len, name = 'layer_embedding')) model_GRU.add(CuDNNGRU(units=16, return_sequences=True)) model_GRU.add(CuDNNGRU(units=8, return_sequences=True)) model_GRU.add(CuDNNGRU(units=4, return_sequences=False)) model_GRU.add(Dense(1, activation='sigmoid')) print(model_GRU.summary()) model_GRU.compile(optimizer = 'rmsprop', loss = 'binary_crossentropy', metrics = ['accuracy']) model_GRU.fit(x_train_pad, y_train, epochs=n_epochs, batch_size=batch_size, validation_split=0.05, ) eval_GRU = model.evaluate(x_test_pad, y_test) print(eval_GRU) y_pred = model.predict(x_test_pad[:1000])
X_test = X_test.todense() train_generator = TimeseriesGenerator(X_train, y_train, length=window_size, batch_size=batch_size, shuffle=False) test_generator = TimeseriesGenerator(X_test, y_test, length=window_size, batch_size=1, shuffle=False) model = Sequential() model.add(CuDNNGRU(128, input_shape=( window_size, X_train.shape[1], ))) model.add(Dense(64, activation='relu')) model.add(Dense(64, activation='relu')) model.add(Dense(64, activation='relu')) model.add(Dense(y_train.shape[1], activation='softmax')) # Run training model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) model.fit_generator(train_generator, epochs=epochs) print(model.evaluate_generator(test_generator)) y_true = np.argmax(y_test[window_size:], axis=1) y_pred = np.argmax(model.predict_generator(test_generator), axis=1)
def create_model(self): tdat_input = Input(shape=(self.tdatlen, )) pdat_input = Input(shape=(self.pdatlen, self.config['psdatlen'], self.config['pstdatlen'])) sml_input = Input(shape=(self.smllen, )) com_input = Input(shape=(self.comlen, )) tdel = Embedding(output_dim=self.embdims, input_dim=self.tdatvocabsize, mask_zero=False) tde = tdel(tdat_input) tenc = CuDNNGRU(self.recdims, return_state=True, return_sequences=True) tencout, tstate_h = tenc(tde) de = Embedding(output_dim=self.embdims, input_dim=self.comvocabsize, mask_zero=False)(com_input) dec = CuDNNGRU(self.recdims, return_sequences=True) decout = dec(de, initial_state=tstate_h) se = Embedding(output_dim=self.smldims, input_dim=self.smlvocabsize, mask_zero=False)(sml_input) se_enc = CuDNNGRU(self.recdims, return_state=True, return_sequences=True) seout, state_sml = se_enc(se) ast_attn = dot([decout, seout], axes=[2, 2]) ast_attn = Activation('softmax')(ast_attn) acontext = dot([ast_attn, seout], axes=[2, 1]) tattn = dot([decout, tencout], axes=[2, 2]) tattn = Activation('softmax')(tattn) tcontext = dot([tattn, tencout], axes=[2, 1]) semb = TimeDistributed(tdel) #adding project context information as a time distributed sdat embedding pemb = TimeDistributed(semb) pde = pemb(pdat_input) senc = TimeDistributed(CuDNNGRU(int(self.recdims))) psenc = TimeDistributed(senc) psencout = psenc(pde) penc = TimeDistributed(CuDNNGRU(int(self.recdims))) pencout = penc(psencout) #pdats attention pattn = dot([decout, pencout], axes=[2, 2]) pattn = Activation('softmax')(pattn) pcontext = dot([pattn, pencout], axes=[2, 1]) # the context vector receives attention from the project and file context information along with the ast, tdats and decoder output context = concatenate([pcontext, tcontext, acontext, decout]) out = TimeDistributed(Dense(self.tdddims, activation="relu"))(context) out = Flatten()(out) out1 = Dense(self.comvocabsize, activation="softmax")(out) model = Model(inputs=[tdat_input, pdat_input, sml_input, com_input], outputs=out1) if self.config['multigpu']: model = keras.utils.multi_gpu_model(model, gpus=2) model.compile(loss='categorical_crossentropy', optimizer='adamax', metrics=['accuracy']) return self.config, model
def __run_s2s(sessions_i, sessions_t, num_songs, song_ix, max_l, NUM_DIM=128, BATCH_SIZE=128, EPOCHS=50, MODEL='RNN', WINDOW_SIZE=5): X, y = sessions_i, sessions_t num_encoder_songs, num_decoder_songs = num_songs song_ix_i, song_ix_t = song_ix max_length_i, max_length_t = max_l def generate_batch(X, y, batch_size=128): while True: for j in range(0, len(X), batch_size): encoder_input_data = np.zeros((batch_size, max_length_i), dtype='float32') decoder_input_data = np.zeros((batch_size, max_length_t), dtype='float32') decoder_target_data = np.zeros( (batch_size, max_length_t, num_decoder_songs), dtype='float32') for i, (input_sequence, target_sequence) in enumerate( zip(X[j:j + batch_size], y[j:j + batch_size])): for t, word in enumerate(input_sequence.split()): encoder_input_data[ i, t] = song_ix_i[word] if word != '-' else 0 for t, word in enumerate(target_sequence.split()): if t < len(target_sequence.split()) - 1: decoder_input_data[ i, t] = song_ix_t[word] if word != '-' else 0 if t > 0: decoder_target_data[ i, t - 1, song_ix_t[word] if word != '-' else 0] = 1 yield ([encoder_input_data, decoder_input_data], decoder_target_data) np.random.shuffle(X) np.random.shuffle(y) X_train, X_test = X[int(len(X) * .1):], X[:int(len(X) * .1)] y_train, y_test = y[int(len(y) * .1):], y[:int(len(y) * .1)] TRAIN_SAMPLES = len(X_train) VAL_SAMPLES = len(X_test) ENCODER_INPUT = Input(shape=(None, )) ENCODER_EMBEDDING = Embedding(num_encoder_songs, NUM_DIM)(ENCODER_INPUT) if MODEL == 'LSTM': ENCODER_NN = CuDNNLSTM(NUM_DIM, return_state=True) _, state_h, state_c = ENCODER_NN(ENCODER_EMBEDDING) ENCODER_STATE = [state_h, state_c] if MODEL == 'GRU': ENCODER_NN = CuDNNGRU(NUM_DIM, return_state=True) _, ENCODER_STATE = ENCODER_NN(ENCODER_EMBEDDING) if MODEL == 'RNN': ENCODER_NN = SimpleRNN(NUM_DIM, return_state=True) _, ENCODER_STATE = ENCODER_NN(ENCODER_EMBEDDING) DECODER_INPUT = Input(shape=(None, )) DECODER_EMBEDDING = Embedding(num_decoder_songs, NUM_DIM)(DECODER_INPUT) if MODEL == 'LSTM': DECODER_NN = CuDNNLSTM(NUM_DIM, return_sequences=True, return_state=True) DECODER_OUTPUT, _, _ = DECODER_NN(DECODER_EMBEDDING, initial_state=ENCODER_STATE) if MODEL == 'GRU': DECODER_NN = CuDNNGRU(NUM_DIM, return_sequences=True, return_state=True) DECODER_OUTPUT, _ = DECODER_NN(DECODER_EMBEDDING, initial_state=ENCODER_STATE) if MODEL == 'RNN': DECODER_NN = SimpleRNN(NUM_DIM, return_sequences=True, return_state=True) DECODER_OUTPUT, _ = DECODER_NN(DECODER_EMBEDDING, initial_state=ENCODER_STATE) DENSE_DECODER = Dense(num_decoder_songs, activation='softmax') DECODER_OUTPUT = DENSE_DECODER(DECODER_OUTPUT) es = EarlyStopping(monitor='val_acc', mode='max', verbose=1, patience=5) model = Model([ENCODER_INPUT, DECODER_INPUT], DECODER_OUTPUT) model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['acc']) model.summary() model.fit_generator(generator=generate_batch(X_train, y_train, batch_size=BATCH_SIZE), steps_per_epoch=TRAIN_SAMPLES // BATCH_SIZE, epochs=EPOCHS, validation_data=generate_batch(X_test, y_test, batch_size=BATCH_SIZE), validation_steps=VAL_SAMPLES // BATCH_SIZE, callbacks=[es]) return Model(ENCODER_INPUT, ENCODER_STATE), generate_batch
def init_model(self, input_shape, num_classes, **kwargs): freq_axis = 2 channel_axis = 3 channel_size = 128 min_size = min(input_shape[:2]) melgram_input = Input(shape=input_shape) # x = ZeroPadding2D(padding=(0, 37))(melgram_input) # x = BatchNormalization(axis=freq_axis, name='bn_0_freq')(x) # Conv block 1 x = Convolution2D(filters=64, kernel_size=3, strides=1, padding='same', name='conv1', trainable=True)(melgram_input) x = BatchNormalization(axis=channel_axis, name='bn1', trainable=True)(x) x = ELU()(x) x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='pool1')(x) x = Dropout(0.1, name='dropout1')(x) # Conv block 2 x = Convolution2D(filters=channel_size, kernel_size=3, strides=1, padding='same', name='conv2')(x) x = BatchNormalization(axis=channel_axis, name='bn2')(x) x = ELU()(x) x = MaxPooling2D(pool_size=(3, 3), strides=(3, 3), name='pool2')(x) x = Dropout(0.1, name='dropout2')(x) # Conv block 3 x = Convolution2D(filters=channel_size, kernel_size=3, strides=1, padding='same', name='conv3')(x) x = BatchNormalization(axis=channel_axis, name='bn3')(x) x = ELU()(x) x = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), name='pool3')(x) x = Dropout(0.1, name='dropout3')(x) if min_size // 24 >= 4: # Conv block 4 x = Convolution2D(filters=channel_size, kernel_size=3, strides=1, padding='same', name='conv4')(x) x = BatchNormalization(axis=channel_axis, name='bn4')(x) x = ELU()(x) x = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), name='pool4')(x) x = Dropout(0.1, name='dropout4')(x) x = Reshape((-1, channel_size))(x) gru_units = 32 if num_classes > 32: gru_units = int(num_classes * 1.5) # GRU block 1, 2, output x = CuDNNGRU(gru_units, return_sequences=True, name='gru1')(x) x = CuDNNGRU(gru_units, return_sequences=False, name='gru2')(x) x = Dropout(0.3)(x) outputs = Dense(num_classes, activation='softmax', name='output')(x) model = TFModel(inputs=melgram_input, outputs=outputs) optimizer = optimizers.Adam( # learning_rate=1e-3, lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=1e-4, amsgrad=True) model.compile(optimizer=optimizer, loss="sparse_categorical_crossentropy", metrics=['accuracy']) model.summary() self._model = model self.is_init = True
def basic_crnn_2d(rows, cols, channels, num_classes): kernel_size_7 = (7, 7) kernel_size_5 = (5, 5) kernel_size_3 = (3, 3) pool_size = (3, 3) activ = 'relu' input_1 = Input(shape=[rows, cols, channels]) input_2 = Input(shape=[row, cols, channels]) print input_1.shape print input_2.shape x = Conv2D(16, kernel_size=kernel_size_7, padding='same')(input_1) x = BatchNormalization()(x) x = Activation(activ)(x) x = MaxPooling2D(pool_size, strides=(2, 1), padding='same')(x) print x.shape x = Conv2D(32, kernel_size=kernel_size_5, padding='same')(x) x = BatchNormalization()(x) x = Activation(activ)(x) x = MaxPooling2D(pool_size, strides=(2, 1), padding='same')(x) print x.shape x = Conv2D(32, kernel_size=kernel_size_3, padding='same')(x) x = BatchNormalization()(x) x = Activation(activ)(x) x = MaxPooling2D(pool_size, strides=(2, 1), padding='same')(x) print x.shape x = Conv2D(32, kernel_size=kernel_size_3, padding='same')(x) x = BatchNormalization()(x) x = Activation(activ)(x) x = MaxPooling2D(pool_size, strides=(2, 1), padding='same')(x) print x.shape x = Conv2D(32, kernel_size=kernel_size_3, padding='same')(x) x = BatchNormalization()(x) x = Activation(activ)(x) x = MaxPooling2D(pool_size, strides=(2, 1), padding='same')(x) print x.shape x = Permute((2, 1, 3))(x) x = Reshape((126, 5 * 32))(x) print x.shape x = Bidirectional(CuDNNGRU(126, return_sequences=True))(x) x = Bidirectional(CuDNNGRU(126, return_sequences=False))(x) print x.shape #x = Dropout(0.25) (x) final = Dense(num_classes)(x) outputs = Activation('sigmoid', name='target')(final) model = Model([input_1], [outputs]) model.compile(optimizer=opt, loss=['binary_crossentropy'], metrics=acc_dcf_metric_list) return model
def _add_GRU(model, layer_num=1, drop_out=0.2): for _ in range(layer_num): model.add(CuDNNGRU(100)) model.add(Dropout(drop_out)) return model
def capsulnet_model(batch_size, nb_epoch, hidden_dim, num): Routings = 15 Num_capsule = 30 Dim_capsule = 60 sequence_input = Input(shape=(maxlen, ), dtype='int32') embedded_sequences = Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, weights=[W], trainable=False)(sequence_input) embedded_sequences = SpatialDropout1D(0.1)(embedded_sequences) x = Bidirectional(CuDNNGRU(64, return_sequences=True))(embedded_sequences) x = Bidirectional(CuDNNGRU(64, return_sequences=True))(x) capsule = Capsule(num_capsule=Num_capsule, dim_capsule=Dim_capsule, routings=Routings, share_weights=True)(x) # output_capsule = Lambda(lambda x: K.sqrt(K.sum(K.square(x), 2)))(capsule) capsule = Flatten()(capsule) capsule = Dropout(0.4)(capsule) output = Dense(2, activation='softmax')(capsule) model = Model(inputs=[sequence_input], outputs=output) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy', f1]) # checkpointer = ModelCheckpoint(filepath="weights.hdf5", monitor='val_acc', verbose=1, save_best_only=True) # early_stopping = EarlyStopping(monitor='val_acc', patience = 5, verbose=1) class_weight = {0: 1, 1: 7} # train_num, test_num = X_train.shape[0], X_dev.shape[0] train_num, test_num = X_train.shape[0], X_test.shape[0] num1 = y_train.shape[1] second_level_train_set = np.zeros((train_num, num1)) # (10556,) second_level_test_set = np.zeros((test_num, num1)) # (2684,) test_nfolds_sets = [] # kf = KFold(n_splits=5) kf = KFold(n_splits=5) for i, (train_index, test_index) in enumerate(kf.split(X_train)): x_tra, y_tra = X_train[train_index], y_train[train_index] x_tst, y_tst = X_train[test_index], y_train[test_index] # checkpointer = ModelCheckpoint(filepath="weights.hdf5", monitor='val_acc', verbose=1, save_best_only=True) early_stopping = EarlyStopping(monitor='val_acc', patience=8, verbose=1) model.fit(x_tra, y_tra, validation_data=[x_tst, y_tst], batch_size=batch_size, epochs=nb_epoch, verbose=2, class_weight=class_weight, callbacks=[early_stopping]) second_level_train_set[test_index] = model.predict( x_tst, batch_size=batch_size ) # (2112,2) could not be broadcast to indexing result of shape (2112,) test_nfolds_sets.append(model.predict(X_test)) for item in test_nfolds_sets: second_level_test_set += item second_level_test_set = second_level_test_set / 5 model.save("weights_BB_capsulnet_lstm" + num + ".hdf5") y_pred = second_level_test_set return y_pred
def create_model(self): tdat_input = Input(shape=(self.tdatlen, )) sdat_input = Input(shape=(self.sdatlen, self.config['stdatlen'])) sml_input = Input(shape=(self.smllen, )) com_input = Input(shape=(self.comlen, )) tdel = Embedding(output_dim=self.embdims, input_dim=self.tdatvocabsize, mask_zero=False) tde = tdel(tdat_input) tenc = CuDNNGRU(self.recdims, return_state=True, return_sequences=True) tencout, tstate_h = tenc(tde) de = Embedding(output_dim=self.embdims, input_dim=self.comvocabsize, mask_zero=False)(com_input) dec = CuDNNGRU(self.recdims, return_sequences=True) decout = dec(de, initial_state=tstate_h) se = Embedding(output_dim=self.smldims, input_dim=self.smlvocabsize, mask_zero=False)(sml_input) se_enc = CuDNNGRU(self.recdims, return_state=True, return_sequences=True) seout, state_sml = se_enc(se) ast_attn = dot([decout, seout], axes=[2, 2]) ast_attn = Activation('softmax')(ast_attn) acontext = dot([ast_attn, seout], axes=[2, 1]) tattn = dot([decout, tencout], axes=[2, 2]) tattn = Activation('softmax')(tattn) tcontext = dot([tattn, tencout], axes=[2, 1]) # Adding file context information to ast-attendgru model # shared embedding between tdats and sdats semb = TimeDistributed(tdel) sde = semb(sdat_input) # sdats encoder senc = TimeDistributed(CuDNNGRU(int(self.recdims))) senc = senc(sde) # attention to sdats sattn = dot([decout, senc], axes=[2, 2]) sattn = Activation('softmax')(sattn) scontext = dot([sattn, senc], axes=[2, 1]) # context vector has teh result of attention to sdats along with ast, tdats and decoder output vectors context = concatenate([scontext, tcontext, acontext, decout]) out = TimeDistributed(Dense(self.tdddims, activation="relu"))(context) out = Flatten()(out) out1 = Dense(self.comvocabsize, activation="softmax")(out) model = Model(inputs=[tdat_input, sdat_input, com_input, sml_input], outputs=out1) if self.config['multigpu']: model = keras.utils.multi_gpu_model(model, gpus=2) model.compile(loss='categorical_crossentropy', optimizer='adamax', metrics=['accuracy']) return self.config, model
inputs = Input(shape=(None, train_x.shape[2])) # 39-dim MFCC train_labels = Input(shape=(None, )) input_length = Input(shape=(1, )) label_length = Input(shape=(1, )) # x = Conv1D(16, 3, activation='relu')(inputs) # x = Conv1D(16, 3, activation='relu')(x) # x = Conv1D(16, 3, activation='relu')(x) x = BatchNormalization()(inputs) x = TimeDistributed(Dense(128, activation='relu'))(x) x = TimeDistributed(Dense(128, activation='relu'))(x) x = TimeDistributed(Dense(128, activation='relu'))(x) x = BatchNormalization()(x) # x = GRU(128, return_sequences=True, activation='relu')(x) x = CuDNNGRU(128, return_sequences=True)(x) x = TimeDistributed(Dense(64, activation='relu'))(x) y = TimeDistributed(Dense(11, activation='softmax'))(x) loss_output = Lambda(ctc_wrapper)( [train_labels, y, input_length, label_length]) model = Model(inputs=[inputs, train_labels, input_length, label_length], outputs=loss_output) model.compile(optimizer='adam', loss=lambda y_true, y_pred: y_pred) # model.summary() # the model for testing which outputs the softmax result of each timestep test_model = Model(inputs=inputs, outputs=y) # def debug_pred(batch, logs): # pred = test_model.predict(train_x[0:1]) # print(pred) # debug_cb = LambdaCallback(on_batch_end=debug_pred)
def build_model2(lr=0.0, lr_d=0.0, units=0, spatial_dr=0.0, kernel_size1=3, kernel_size2=2, dense_units=128, dr=0.1, conv_size=32): file_path = "best_model.hdf5" check_point = ModelCheckpoint(file_path, monitor="val_loss", verbose=1, save_best_only=True, mode="min") early_stop = EarlyStopping(monitor="val_loss", mode="min", patience=3) inp = Input(shape=(max_len, )) x = Embedding(19479, embed_size, weights=[embedding_matrix], trainable=False)(inp) x1 = SpatialDropout1D(spatial_dr)(x) x_gru = Bidirectional(CuDNNGRU(units, return_sequences=True))(x1) x_lstm = Bidirectional(CuDNNLSTM(units, return_sequences=True))(x1) x_conv1 = Conv1D(conv_size, kernel_size=kernel_size1, padding='valid', kernel_initializer='he_uniform')(x_gru) avg_pool1_gru = GlobalAveragePooling1D()(x_conv1) max_pool1_gru = GlobalMaxPooling1D()(x_conv1) x_conv2 = Conv1D(conv_size, kernel_size=kernel_size2, padding='valid', kernel_initializer='he_uniform')(x_gru) avg_pool2_gru = GlobalAveragePooling1D()(x_conv2) max_pool2_gru = GlobalMaxPooling1D()(x_conv2) x_conv3 = Conv1D(conv_size, kernel_size=kernel_size1, padding='valid', kernel_initializer='he_uniform')(x_lstm) avg_pool1_lstm = GlobalAveragePooling1D()(x_conv3) max_pool1_lstm = GlobalMaxPooling1D()(x_conv3) x_conv4 = Conv1D(conv_size, kernel_size=kernel_size2, padding='valid', kernel_initializer='he_uniform')(x_lstm) avg_pool2_lstm = GlobalAveragePooling1D()(x_conv4) max_pool2_lstm = GlobalMaxPooling1D()(x_conv4) x = concatenate([ avg_pool1_gru, max_pool1_gru, avg_pool2_gru, max_pool2_gru, avg_pool1_lstm, max_pool1_lstm, avg_pool2_lstm, max_pool2_lstm ]) x = BatchNormalization()(x) x = Dropout(dr)(Dense(dense_units, activation='relu')(x)) x = BatchNormalization()(x) x = Dropout(dr)(Dense(int(dense_units / 2), activation='relu')(x)) x = Dense(5, activation="sigmoid")(x) model = Model(inputs=inp, outputs=x) model.compile(loss="binary_crossentropy", optimizer=Adam(lr=lr, decay=lr_d), metrics=["accuracy"]) history = model.fit(X_train, y_ohe, batch_size=128, epochs=20, validation_split=0.1, verbose=1, callbacks=[check_point, early_stop]) model = load_model(file_path) return model
def _compile_hans(self, shape, n_hidden_layers, hidden_units_size, dropout_rate, word_dropout_rate, lr): """ Compiles a Hierarchical Attention Network based on the given parameters :param shape: The shape of the sequence, i.e. (number of sections, number of tokens) :param hidden_units_size: size of hidden units, as a list :param dropout_rate: The percentage of inputs to dropout :param word_dropout_rate: The percentage of timesteps to dropout :param lr: learning rate :return: Nothing """ # Sentence Feature Representation section_inputs = Input(shape=(None, ), name='document_inputs') self.pretrained_embeddings = self.PretrainedEmbedding() section_embs = self.pretrained_embeddings(section_inputs) # Apply variational dropout drop_section_embs = SpatialDropout1D( dropout_rate, name='feature_dropout')(section_embs) encodings = TimestepDropout(word_dropout_rate, name='word_dropout')(drop_section_embs) # Bi-GRUs over token embeddings for i in range(n_hidden_layers[0]): if self._cuDNN: grus = Bidirectional( CuDNNGRU(hidden_units_size[0], return_sequences=True, kernel_constraint=MinMaxNorm(min_value=-2, max_value=2)), name='bidirectional_grus_{}'.format(i))(encodings) else: grus = Bidirectional( GRU(hidden_units_size[0], activation="tanh", recurrent_activation='sigmoid', return_sequences=True), kernel_constraint=MinMaxNorm(min_value=-2, max_value=2), name='bidirectional_grus_{}'.format(i))(encodings) grus = Camouflage(mask_value=0.0)([grus, encodings]) if i == 0: encodings = SpatialDropout1D(dropout_rate)(grus) else: encodings = add([grus, encodings]) encodings = SpatialDropout1D(dropout_rate)(encodings) # Attention over BI-GRU (context-aware) embeddings if self._attention_mechanism == 'maxpooling': section_encoder = GlobalMaxPooling1D()(encodings) elif self._attention_mechanism == 'attention': encodings = SymmetricMasking()([encodings, encodings]) section_encoder = ContextualAttention( kernel_regularizer=l2(), bias_regularizer=l2())(encodings) # Wrap up section_encoder section_encoder = Model(inputs=section_inputs, outputs=section_encoder, name='sentence_encoder') # Document Input Layer document_inputs = Input(shape=( shape[0], shape[1], ), name='document_inputs') # Distribute sentences section_encodings = TimeDistributed( section_encoder, name='sentence_encodings')(document_inputs) # BI-GRUs over section embeddings for i in range(n_hidden_layers[1]): if self._cuDNN: grus = Bidirectional( CuDNNGRU(hidden_units_size[1], return_sequences=True, kernel_constraint=MinMaxNorm(min_value=-2, max_value=2)), name='bidirectional_grus_upper_{}'.format(i))( section_encodings) else: grus = Bidirectional(GRU(hidden_units_size[1], activation="tanh", recurrent_activation='sigmoid', return_sequences=True, kernel_constraint=MinMaxNorm( min_value=-2, max_value=2)), name='bidirectional_grus_upper_{}'.format( i))(section_encodings) grus = Camouflage(mask_value=0.0)([grus, section_encodings]) if i == 0: section_encodings = SpatialDropout1D(dropout_rate)(grus) else: section_encodings = add([grus, section_encodings]) section_encodings = SpatialDropout1D(dropout_rate)( section_encodings) # Attention over BI-LSTM (context-aware) sentence embeddings if self._attention_mechanism == 'maxpooling': doc_encoding = GlobalMaxPooling1D( name='max_pooling')(section_encodings) elif self._attention_mechanism == 'attention': section_encodings = SymmetricMasking()( [section_encodings, section_encodings]) doc_encoding = ContextualAttention( kernel_regularizer=l2(), bias_regularizer=l2(), name='self_attention')(section_encodings) losses = 'binary_crossentropy' if self._decision_type == 'multi_label' else 'categorical_crossentropy' loss_weights = None # Final output (projection) layer outputs = Dense(self.n_classes, activation='sigmoid' if self._decision_type == 'multi_label' else 'softmax', name='outputs')(doc_encoding) # Wrap up model + Compile with optimizer and loss function self.model = Model(inputs=document_inputs, outputs=[outputs]) self.model.compile(optimizer=Adam(lr=lr, clipvalue=2.0), loss=losses, loss_weights=loss_weights)
def cnn_rnn(embedding_matrix, config): if config['rnn'] == 'gru' and config['gpu']: encode = Bidirectional( CuDNNGRU(config['rnn_output_size'], return_sequences=True)) encode2 = Bidirectional( CuDNNGRU(config['rnn_output_size'], return_sequences=True)) encode3 = Bidirectional( CuDNNGRU(config['rnn_output_size'], return_sequences=True)) else: encode = Bidirectional( CuDNNLSTM(config['rnn_output_size'], return_sequences=True)) encode2 = Bidirectional( CuDNNLSTM(config['rnn_output_size'] * 2, return_sequences=True)) encode3 = Bidirectional( CuDNNGRU(config['rnn_output_size'] * 4, return_sequences=True)) q1 = Input(shape=(config['max_length'], ), dtype='int32', name='q1_input') q2 = Input((config['max_length'], ), dtype='int32', name='q2_input') embedding_layer = Embedding(embedding_matrix.shape[0], embedding_matrix.shape[1], trainable=config['embed_trainable'], weights=[embedding_matrix] # mask_zero=True ) q1_embed = embedding_layer(q1) q2_embed = embedding_layer(q2) # bsz, 1, emb_dims q1_embed = BatchNormalization(axis=2)(q1_embed) q2_embed = BatchNormalization(axis=2)(q2_embed) q1_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q1_embed) q2_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q2_embed) q1_encoded = encode(q1_embed) q2_encoded = encode(q2_embed) q1_encoded = Dropout(0.2)(q1_encoded) q2_encoded = Dropout(0.2)(q2_encoded) # 双向 # q1_encoded = encode2(q1_encoded) # q2_encoded = encode2(q2_encoded) # resnet rnn_layer2_input1 = concatenate([q1_embed, q1_encoded]) rnn_layer2_input2 = concatenate([q2_embed, q2_encoded]) q1_encoded2 = encode2(rnn_layer2_input1) q2_encoded2 = encode2(rnn_layer2_input2) # add res shortcut res_block1 = add([q1_encoded, q1_encoded2]) res_block2 = add([q2_encoded, q2_encoded2]) rnn_layer3_input1 = concatenate([q1_embed, res_block1]) rnn_layer3_input2 = concatenate([q2_embed, res_block2]) # rnn_layer3_input1 = concatenate([q1_embed,q1_encoded,q1_encoded2]) # rnn_layer3_input2 = concatenate([q2_embed,q2_encoded,q2_encoded2]) q1_encoded3 = encode3(rnn_layer3_input1) q2_encoded3 = encode3(rnn_layer3_input2) convs1, convs2 = [], [] for ksz in config['kernel_sizes']: pooling1, pooling2 = block(q1_embed, q2_embed, ksz, config['filters']) convs1.append(pooling1) convs2.append(pooling2) rnn_rep1 = GlobalMaxPooling1D()(q1_encoded3) rnn_rep2 = GlobalMaxPooling1D()(q2_encoded3) convs1.append(rnn_rep1) convs2.append(rnn_rep2) merged1 = concatenate(convs1, axis=-1) merged2 = concatenate(convs2, axis=-1) sub_rep = Lambda(lambda x: K.abs(x[0] - x[1]))([merged1, merged2]) mul_rep = Lambda(lambda x: x[0] * x[1])([merged1, merged2]) # merged = Concatenate()([mul_rep, sub_rep]) feature_input = Input(shape=(config['feature_length'], )) feature_dense = BatchNormalization()(feature_input) feature_dense = Dense(config['dense_dim'], activation='relu')(feature_dense) merged = Concatenate()([merged1, merged2, mul_rep, sub_rep, feature_dense]) dense = Dropout(config['dense_dropout'])(merged) dense = BatchNormalization()(dense) dense = Dense(config['dense_dim'], activation='relu')(dense) dense = Dropout(config['dense_dropout'])(dense) dense = BatchNormalization()(dense) predictions = Dense(1, activation='sigmoid')(dense) model = Model(inputs=[q1, q2, feature_input], outputs=predictions) opt = optimizers.get(config['optimizer']) K.set_value(opt.lr, config['learning_rate']) model.compile(optimizer=opt, loss='binary_crossentropy', metrics=[f1]) return model
def _compile_bigrus(self, n_hidden_layers, hidden_units_size, dropout_rate, word_dropout_rate, lr): """ Compiles a Hierarchical RNN based on the given parameters :param hidden_units_size: size of hidden units, as a list :param dropout_rate: The percentage of inputs to dropout :param word_dropout_rate: The percentage of timesteps to dropout :param lr: learning rate :return: Nothing """ # Document Feature Representation if self.elmo: document_inputs = Input(shape=(1, ), dtype='string', name='document_inputs') document_elmos = ElmoEmbeddingLayer()(document_inputs) document_inputs2 = Input(shape=(None, ), name='document_inputs2') self.pretrained_embeddings = self.PretrainedEmbedding() document_embs = self.pretrained_embeddings(document_inputs2) doc_embs = concatenate([document_embs, document_elmos]) else: document_inputs = Input(shape=(None, ), name='document_inputs') self.pretrained_embeddings = self.PretrainedEmbedding() doc_embs = self.pretrained_embeddings(document_inputs) # Apply variational dropout drop_doc_embs = SpatialDropout1D(dropout_rate, name='feature_dropout')(doc_embs) encodings = TimestepDropout(word_dropout_rate, name='word_dropout')(drop_doc_embs) # Bi-GRUs over token embeddings return_sequences = True for i in range(n_hidden_layers): if i == n_hidden_layers - 1: return_sequences = False if self._cuDNN: grus = Bidirectional( CuDNNGRU(hidden_units_size, return_sequences=return_sequences), name='bidirectional_grus_{}'.format(i))(encodings) else: grus = Bidirectional( GRU(hidden_units_size, activation="tanh", recurrent_activation='sigmoid', return_sequences=return_sequences), name='bidirectional_grus_{}'.format(i))(encodings) if i != n_hidden_layers - 1: grus = Camouflage(mask_value=0.0)([grus, encodings]) if i == 0: encodings = SpatialDropout1D(dropout_rate)(grus) else: encodings = add([grus, encodings]) encodings = SpatialDropout1D(dropout_rate)(encodings) else: encodings = grus # Final output (projection) layer outputs = Dense(self.n_classes, activation='sigmoid' if self._decision_type == 'multi_label' else 'softmax', name='outputs')(encodings) # Wrap up model + Compile with optimizer and loss function self.model = Model(inputs=document_inputs if not self.elmo else [document_inputs, document_inputs2], outputs=[outputs]) self.model.compile(optimizer=Adam(lr=lr, clipvalue=5.0), loss='binary_crossentropy' if self._decision_type == 'multi_label' else 'categorical_crossentropy')
def training_net_kfolds(): train_dataset_path = path + "/Train/" val_dataset_path = path + "/Val/" train_files = os.listdir(train_dataset_path) train_files.sort() val_files = os.listdir(val_dataset_path) val_files.sort() labels = pd.read_csv(path + "REFERENCE.csv") labels_en = pd.read_csv(path + "kfold_labels_en.csv") #data_info = pd.read_csv(path + "data_info.csv") batch_size = 64 num_classes = 10 len_seg = 23296 # 91s main_input = Input(shape=(len_seg, 12), dtype='float32', name='main_input') x = Convolution1D(12, 3, padding='same')(main_input) x = LeakyReLU(alpha=0.3)(x) x = Convolution1D(12, 3, padding='same')(x) x = LeakyReLU(alpha=0.3)(x) x = Convolution1D(12, 24, strides=2, padding='same')(x) x = LeakyReLU(alpha=0.3)(x) x = Dropout(0.2)(x) x = Convolution1D(12, 3, padding='same')(x) x = LeakyReLU(alpha=0.3)(x) x = Convolution1D(12, 3, padding='same')(x) x = LeakyReLU(alpha=0.3)(x) x = Convolution1D(12, 24, strides=2, padding='same')(x) x = LeakyReLU(alpha=0.3)(x) x = Dropout(0.2)(x) x = Convolution1D(12, 3, padding='same')(x) x = LeakyReLU(alpha=0.3)(x) x = Convolution1D(12, 3, padding='same')(x) x = LeakyReLU(alpha=0.3)(x) x = Convolution1D(12, 24, strides=2, padding='same')(x) x = LeakyReLU(alpha=0.3)(x) x = Dropout(0.2)(x) x = Convolution1D(12, 3, padding='same')(x) x = LeakyReLU(alpha=0.3)(x) x = Convolution1D(12, 3, padding='same')(x) x = LeakyReLU(alpha=0.3)(x) x = Convolution1D(12, 24, strides=2, padding='same')(x) x = LeakyReLU(alpha=0.3)(x) x = Dropout(0.2)(x) x = Convolution1D(12, 3, padding='same')(x) x = LeakyReLU(alpha=0.3)(x) x = Convolution1D(12, 3, padding='same')(x) x = LeakyReLU(alpha=0.3)(x) x = Convolution1D(12, 48, strides=2, padding='same')(x) x = LeakyReLU(alpha=0.3)(x) cnnout = Dropout(0.2)(x) x = Bidirectional( CuDNNGRU(12, input_shape=(2250, 12), return_sequences=True, return_state=False))(cnnout) x = LeakyReLU(alpha=0.3)(x) x = Dropout(0.2)(x) x = AttentionWithContext()(x) x = BatchNormalization()(x) x = LeakyReLU(alpha=0.3)(x) x = Dropout(0.2)(x) main_output = Dense(num_classes, activation='sigmoid')(x) model = Model(inputs=main_input, outputs=main_output) print(model.summary()) raw_IDs = labels_en["File_name"].values.tolist() extend_db4_IDs = [i + "_db4" for i in raw_IDs] extend_db6_IDs = [i + "_db6" for i in raw_IDs] all_IDs = raw_IDs + extend_db4_IDs + extend_db6_IDs train_labels = labels_en["label1"].values all_train_labels = np.hstack((train_labels, train_labels, train_labels)) # Parameters params = { 'dim': 23296, 'batch_size': 64, 'n_classes': 10, 'n_channels': 12, 'shuffle': True } en_amount = 1 model_path = './official_attention_onenet_model/' for seed in range(en_amount): print("************************") n_fold = 3 n_classes = 10 kfold = StratifiedKFold(n_splits=n_fold, shuffle=True, random_state=1234) #kf = kfold.split(all_IDs, all_train_labels) kf = kfold.split(labels["File_name"].values.tolist(), labels["label1"].values) for i, (index_train, index_valid) in enumerate(kf): print('fold: ', i + 1, ' training') t = time.time() #tr_IDs = np.array(all_IDs)[index_train] #val_IDs = np.array(all_IDs)[index_valid] #print(tr_IDs.shape) tr_IDs = labels["File_name"].values[index_train].tolist() val_IDs = labels["File_name"].values[index_valid].tolist() for j in range(4): for ids in labels[labels.label1 == 4]["File_name"]: if ids in tr_IDs: tr_IDs.append(ids) for j in range(2): for ids in labels[labels.label1 == 7]["File_name"]: if ids in tr_IDs: tr_IDs.append(ids) for j in range(1): for ids in labels[labels.label1 == 9]["File_name"]: if ids in tr_IDs: tr_IDs.append(ids) tr_IDs_db4 = [ids + "_db4" for ids in tr_IDs] tr_IDs_db6 = [ids + "_db6" for ids in tr_IDs] val_IDs_db4 = [ids + "_db4" for ids in val_IDs] val_IDs_db6 = [ids + "_db6" for ids in val_IDs] tr_IDs = tr_IDs + tr_IDs_db4 + tr_IDs_db6 val_IDs = val_IDs + val_IDs_db4 + val_IDs_db6 print("tr_IDs : ", len(tr_IDs)) print("val_IDs : ", len(val_IDs)) # Generators training_generator = DataGenerator(tr_IDs, labels, **params) validation_generator = DataGenerator(val_IDs, labels, **params) checkpointer = ModelCheckpoint( filepath=model_path + 'attention_1net_extend_weights-best_k{}_r{}_0805.hdf5'.format( seed, i), monitor='val_fmeasure', verbose=1, save_best_only=True, save_weights_only=True, mode='max') # val_fmeasure reduce = ReduceLROnPlateau(monitor='val_fmeasure', factor=0.5, patience=2, verbose=1, min_delta=1e-4, mode='max') earlystop = EarlyStopping(monitor='val_fmeasure', mode="max", patience=6, restore_best_weights=True) tensorboard = TensorBoard(log_dir="./logs") config = Config() add_compile(model, config) callback_lists = [checkpointer, reduce, earlystop] history = model.fit_generator(generator=training_generator, validation_data=validation_generator, use_multiprocessing=False, epochs=30, verbose=1, callbacks=callback_lists)
def GRU_block(x, p=0.5, n=64): x = CuDNNGRU(n)(x) x = Dropout(p)(x) return x
def predcit_net_kfolds(): pre_type = "sym" # "sym" labels = pd.read_csv(path + "REFERENCE.csv") raw_IDs = labels["File_name"].values.tolist() IDs = {} IDs["sym"] = raw_IDs IDs["db4"] = [i + "_db4" for i in raw_IDs] IDs["db6"] = [i + "_db6" for i in raw_IDs] batch_size = 64 num_classes = 10 len_seg = 23296 # 91s main_input = Input(shape=(len_seg, 12), dtype='float32', name='main_input') x = Convolution1D(12, 3, padding='same')(main_input) x = LeakyReLU(alpha=0.3)(x) x = Convolution1D(12, 3, padding='same')(x) x = LeakyReLU(alpha=0.3)(x) x = Convolution1D(12, 24, strides=2, padding='same')(x) x = LeakyReLU(alpha=0.3)(x) x = Dropout(0.2)(x) x = Convolution1D(12, 3, padding='same')(x) x = LeakyReLU(alpha=0.3)(x) x = Convolution1D(12, 3, padding='same')(x) x = LeakyReLU(alpha=0.3)(x) x = Convolution1D(12, 24, strides=2, padding='same')(x) x = LeakyReLU(alpha=0.3)(x) x = Dropout(0.2)(x) x = Convolution1D(12, 3, padding='same')(x) x = LeakyReLU(alpha=0.3)(x) x = Convolution1D(12, 3, padding='same')(x) x = LeakyReLU(alpha=0.3)(x) x = Convolution1D(12, 24, strides=2, padding='same')(x) x = LeakyReLU(alpha=0.3)(x) x = Dropout(0.2)(x) x = Convolution1D(12, 3, padding='same')(x) x = LeakyReLU(alpha=0.3)(x) x = Convolution1D(12, 3, padding='same')(x) x = LeakyReLU(alpha=0.3)(x) x = Convolution1D(12, 24, strides=2, padding='same')(x) x = LeakyReLU(alpha=0.3)(x) x = Dropout(0.2)(x) x = Convolution1D(12, 3, padding='same')(x) x = LeakyReLU(alpha=0.3)(x) x = Convolution1D(12, 3, padding='same')(x) x = LeakyReLU(alpha=0.3)(x) x = Convolution1D(12, 48, strides=2, padding='same')(x) x = LeakyReLU(alpha=0.3)(x) cnnout = Dropout(0.2)(x) x = Bidirectional( CuDNNGRU(12, input_shape=(2250, 12), return_sequences=True, return_state=False))(cnnout) x = LeakyReLU(alpha=0.3)(x) x = Dropout(0.2)(x) x = AttentionWithContext()(x) x = BatchNormalization()(x) x = LeakyReLU(alpha=0.3)(x) x = Dropout(0.2)(x) main_output = Dense(num_classes, activation='sigmoid')(x) model = Model(inputs=main_input, outputs=main_output) test_x = read_data_seg(path, split='Val', preprocess=True, n_index=0, pre_type=pre_type) print("test_x shape: ", test_x.shape) model_path = './official_attention_onenet_model/' en_amount = 1 for seed in range(en_amount): print("************************") n_fold = 3 # 3 n_classes = 10 kfold = StratifiedKFold(n_splits=n_fold, shuffle=True, random_state=seed) kf = kfold.split(IDs[pre_type], labels['label1']) blend_train = np.zeros( (6689, n_fold, n_classes)).astype('float32') # len(train_x) blend_test = np.zeros( (558, n_fold, n_classes)).astype('float32') # len(test_x) count = 0 for i, (index_train, index_valid) in enumerate(kf): print('fold: ', i + 1, ' training') t = time.time() tr_IDs = np.array(IDs[pre_type]) # [index_train] # val_IDs = np.array(IDs[pre_type])[index_valid] print(tr_IDs.shape) X = np.empty((tr_IDs.shape[0], 23296, 12)) for j, ID in enumerate(tr_IDs): X[j, ] = np.load("/media/jdcloud/training_data_pre/" + ID + ".npy") # X_tr = [(X[:, i] - np.mean(X[:, i])) / np.std(X[:, i]) for i in range(10)] X_tr = X # print(X.shape) del X # Evaluate best trained model model.load_weights( model_path + 'attention_1net_extend_weights-best_k{}_r{}_0805.hdf5'.format( seed, i)) blend_train[:, i, :] = model.predict(X_tr) blend_test[:, i, :] = model.predict(test_x) del X_tr gc.collect() gc.collect() count += 1 index = np.arange(6689) y_train = preprocess_y(labels, index) train_y = 0.1 * blend_train[:, 0, :] + 0.1 * blend_train[:, 1, :] + 0.8 * blend_train[:, 2, :] threshold = np.arange(0.1, 0.9, 0.1) acc = [] accuracies = [] best_threshold = np.zeros(train_y.shape[1]) for i in range(train_y.shape[1]): y_prob = np.array(train_y[:, i]) for j in threshold: y_pred = [1 if prob >= j else 0 for prob in y_prob] acc.append(f1_score(y_train[:, i], y_pred, average='macro')) acc = np.array(acc) index = np.where(acc == acc.max()) accuracies.append(acc.max()) best_threshold[i] = threshold[index[0][0]] acc = [] print("best_threshold :", best_threshold) y_pred = np.array([[ 1 if train_y[i, j] >= best_threshold[j] else 0 for j in range(train_y.shape[1]) ] for i in range(len(train_y))]) print(" train data f1_score :", f1_score(y_train, y_pred, average='macro')) for i in range(10): print("f1 score of ab {} is {}".format( i, f1_score(y_train[:, i], y_pred[:, i], average='macro'))) out = 0.1 * blend_test[:, 0, :] + 0.1 * blend_test[:, 1, :] + 0.8 * blend_test[:, 2, :] y_pred_test = np.array([[ 1 if out[i, j] >= best_threshold[j] else 0 for j in range(out.shape[1]) ] for i in range(len(out))]) classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] test_y = y_pred_test y_pred = [[ 1 if test_y[i, j] >= best_threshold[j] else 0 for j in range(test_y.shape[1]) ] for i in range(len(test_y))] pred = [] for j in range(test_y.shape[0]): pred.append([classes[i] for i in range(10) if y_pred[j][i] == 1]) val_dataset_path = path + "/Val/" val_files = os.listdir(val_dataset_path) val_files.sort() with open('answers_attention_1net_{}_0805.csv'.format(pre_type), 'w') as csvfile: writer = csv.writer(csvfile) writer.writerow([ 'File_name', 'label1', 'label2', 'label3', 'label4', 'label5', 'label6', 'label7', 'label8', 'label9', 'label10' ]) count = 0 for file_name in val_files: if file_name.endswith('.mat'): record_name = file_name.strip('.mat') answer = [] answer.append(record_name) result = pred[count] answer.extend(result) for i in range(10 - len(result)): answer.append('') count += 1 writer.writerow(answer) csvfile.close() train_pd0 = pd.DataFrame(blend_train[:, 0, :]) train_pd1 = pd.DataFrame(blend_train[:, 1, :]) train_pd2 = pd.DataFrame(blend_train[:, 2, :]) csv_path = "/media/jdcloud/ensemble_csv/" train_pd0.to_csv(csv_path + "attention_1net_fold0.csv", index=None) train_pd1.to_csv(csv_path + "attention_1net_fold1.csv", index=None) train_pd2.to_csv(csv_path + "attention_1net_fold2.csv", index=None) test_pd0 = pd.DataFrame(blend_test[:, 0, :]) test_pd1 = pd.DataFrame(blend_test[:, 1, :]) test_pd2 = pd.DataFrame(blend_test[:, 2, :]) csv_path = "/media/jdcloud/test_csv/" test_pd0.to_csv(csv_path + "attention_1net_fold0.csv", index=None) test_pd1.to_csv(csv_path + "attention_1net_fold1.csv", index=None) test_pd2.to_csv(csv_path + "attention_1net_fold2.csv", index=None)
#%% # model ans_ques_input = Input(shape=(ans_seq, ques_dim), name='ans_question_input') ans_feat_input = Input(shape=(ans_seq, ans_dim), name='ans_feature_input') member_feat_input = Input(shape = (member_dim,), name='member_feature_input') member_topic_input = Input(shape=(member_topic_dim,), name='member_topic_input') time_input = Input(shape = (1,), name = 'time_input') ques_input = Input(shape = (ques_dim,), name='ques_input') member_feat_dense = PReLU(name='member_feature_dense_prelu')( Dense(units=40, name='member_feature_dense')(member_feat_input)) member_topic_dense = PReLU(name='member_topic_dense_prelu')( Dense(units=256, name='member_topic_dense')(member_topic_input)) ans_feat_gru = CuDNNGRU(units=40, return_sequences=True, name='ans_feat_gru')(ans_feat_input) ans_ques_gru = CuDNNGRU(units=256, return_sequences=True, name='ans_ques_gru')(ans_ques_input) ans_con = concatenate([ans_feat_gru, ans_ques_gru], name='answer_concatenate') answer_gru = CuDNNGRU(units=128, return_sequences=False, name='answer_gru')(ans_con) question_dense = PReLU(name='ques_dense_prelu')( Dense(units=128, name='ques_dense')(ques_input)) time_dense = PReLU(name='time_dense_prelu')( Dense(units=5, name='time_dense')(time_input)) inv_con = concatenate([time_dense, member_feat_dense, member_topic_dense, answer_gru, question_dense], name='invite_concatenate') inv_dense_1 = PReLU(name='inv_dense_1_prelu')( Dense(units=512, name='inv_dense_1')(inv_con))
def get_model(config): inp = Input(shape=(config.strmaxlen, ), name='input') emb = Embedding(config.max_features, config.embed_size, trainable=True)(inp) emb1 = SpatialDropout1D(config.prob_dropout)(emb) #### l1_L = Bidirectional( CuDNNLSTM(config.cell_size_l1, return_sequences=True))(emb1) l2_LL = Bidirectional( CuDNNLSTM(config.cell_size_l2, return_sequences=True))(l1_L) l2_LG = Bidirectional( CuDNNGRU(config.cell_size_l2, return_sequences=True))(l1_L) l3_LLC = Conv1D(config.filter_size, kernel_size=config.kernel_size, strides=2, padding="valid", kernel_initializer="he_uniform")(l2_LL) l3_LGC = Conv1D(config.filter_size, kernel_size=config.kernel_size, strides=2, padding="valid", kernel_initializer="he_uniform")(l2_LG) avg_pool_L = GlobalAveragePooling1D()(l1_L) max_pool_L = GlobalMaxPooling1D()(l1_L) avg_pool_LL = GlobalAveragePooling1D()(l2_LL) max_pool_LL = GlobalMaxPooling1D()(l2_LL) avg_pool_LG = GlobalAveragePooling1D()(l2_LG) max_pool_LG = GlobalMaxPooling1D()(l2_LG) attention_LLA = Attention(config.strmaxlen)(l2_LL) attention_LGA = Attention(config.strmaxlen)(l2_LG) avg_pool_LLC = GlobalAveragePooling1D()(l3_LLC) max_pool_LLC = GlobalMaxPooling1D()(l3_LLC) avg_pool_LGC = GlobalAveragePooling1D()(l3_LGC) max_pool_LGC = GlobalMaxPooling1D()(l3_LGC) attention_LLCA = Attention(int(config.strmaxlen / 2 - 1))(l3_LLC) attention_LGCA = Attention(int(config.strmaxlen / 2 - 1))(l3_LGC) conc_LLC = concatenate([ avg_pool_L, max_pool_L, avg_pool_LL, max_pool_LL, avg_pool_LLC, max_pool_LLC, attention_LLA, attention_LLCA ]) conc_LGC = concatenate([ avg_pool_L, max_pool_L, avg_pool_LG, max_pool_LG, avg_pool_LGC, max_pool_LGC, attention_LGA, attention_LGCA ]) out_LL = Dropout(config.prob_dropout2)(conc_LLC) out_LG = Dropout(config.prob_dropout2)(conc_LGC) out_LL = Dense(2, activation='softmax')(out_LL) out_LG = Dense(2, activation='softmax')(out_LG) #### # emb2 = Embedding(config.max_features, config.max_features,embeddings_initializer='identity', trainable = True)(inp) # emb1 = Embedding(config.max_features, config.embed_size, trainable = True)(inp) emb2 = SpatialDropout1D(config.prob_dropout)(emb) #### l1_G = Bidirectional( CuDNNGRU(config.cell_size_l1, return_sequences=True))(emb2) l2_GL = Bidirectional( CuDNNLSTM(config.cell_size_l2, return_sequences=True))(l1_G) l2_GG = Bidirectional( CuDNNGRU(config.cell_size_l2, return_sequences=True))(l1_G) l3_GLC = Conv1D(config.filter_size, kernel_size=config.kernel_size, strides=2, padding="valid", kernel_initializer="he_uniform")(l2_GL) l3_GGC = Conv1D(config.filter_size, kernel_size=config.kernel_size, strides=2, padding="valid", kernel_initializer="he_uniform")(l2_GG) avg_pool_G = GlobalAveragePooling1D()(l1_G) max_pool_G = GlobalMaxPooling1D()(l1_G) avg_pool_GL = GlobalAveragePooling1D()(l2_GL) max_pool_GL = GlobalMaxPooling1D()(l2_GL) avg_pool_GG = GlobalAveragePooling1D()(l2_GG) max_pool_GG = GlobalMaxPooling1D()(l2_GG) attention_GLA = Attention(config.strmaxlen)(l2_GL) attention_GGA = Attention(config.strmaxlen)(l2_GG) avg_pool_GLC = GlobalAveragePooling1D()(l3_GLC) max_pool_GLC = GlobalMaxPooling1D()(l3_GLC) avg_pool_GGC = GlobalAveragePooling1D()(l3_GGC) max_pool_GGC = GlobalMaxPooling1D()(l3_GGC) attention_GLCA = Attention(int(config.strmaxlen / 2 - 1))(l3_GLC) attention_GGCA = Attention(int(config.strmaxlen / 2 - 1))(l3_GGC) conc_GLC = concatenate([ avg_pool_G, max_pool_G, avg_pool_GL, max_pool_GL, avg_pool_GLC, max_pool_GLC, attention_GLA, attention_GLCA ]) conc_GGC = concatenate([ avg_pool_G, max_pool_G, avg_pool_GG, max_pool_GG, avg_pool_GGC, max_pool_GGC, attention_GGA, attention_GGCA ]) out_GL = Dropout(config.prob_dropout2)(conc_GLC) out_GG = Dropout(config.prob_dropout2)(conc_GGC) out_GL = Dense(2, activation='softmax')(out_GL) out_GG = Dense(2, activation='softmax')(out_GG) out_avg = average([out_LL, out_LG, out_GL, out_GG]) # # ================================================================================================== model = Model(inputs=inp, outputs=[out_LL, out_LG, out_GL, out_GG, out_avg]) model.compile(loss='categorical_crossentropy', optimizer='adam', loss_weights=[1., 1., 1., 1., 0.1], metrics=['accuracy']) return model
num_features = W.shape[1] # 400 logging.info("dimension num of word vector [num_features]: %d" % num_features) Routings = 20 Num_capsule = 60 Dim_capsule = 120 sequence_input = Input(shape=(maxlen, ), dtype='int32') embedded_sequences = Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, weights=[W], trainable=False)(sequence_input) embedded_sequences = SpatialDropout1D(0.1)(embedded_sequences) x = Bidirectional(CuDNNGRU(64, return_sequences=True))(embedded_sequences) x = Bidirectional(CuDNNGRU(64, return_sequences=True))(x) capsule = Capsule(num_capsule=Num_capsule, dim_capsule=Dim_capsule, routings=Routings, share_weights=True)(x) # output_capsule = Lambda(lambda x: K.sqrt(K.sum(K.square(x), 2)))(capsule) capsule = Flatten()(capsule) capsule = Dropout(0.1)(capsule) output = Dense(4, activation='softmax')(capsule) model = Model(inputs=[sequence_input], outputs=output) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy', f1]) checkpointer = ModelCheckpoint(filepath="weights.hdf5", monitor='val_acc',
def get_model(config): #model #wrote out all the blocks instead of looping for simplicity filter_nr = 64 filter_size = 3 max_pool_size = 3 max_pool_strides = 2 dense_nr = 64 spatial_dropout = 0.3 dense_dropout = 0.4 conv_kern_reg = regularizers.l2(0.000005) conv_bias_reg = regularizers.l2(0.000005) inp = Input(shape=(config.strmaxlen, ), name='input') emb = Embedding(config.max_features, config.embed_size, trainable=True)(inp) emb1 = SpatialDropout1D(config.prob_dropout)(emb) block1 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(emb1) block1 = BatchNormalization()(block1) block1 = PReLU()(block1) block1 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block1) block1 = BatchNormalization()(block1) block1 = PReLU()(block1) #we pass embedded comment through conv1d with filter size 1 because it needs to have the same shape as block output #if you choose filter_nr = embed_size (300 in this case) you don't have to do this part and can add emb_comment directly to block1_output resize_emb = Conv1D(filter_nr, kernel_size=1, padding='same', activation='linear', kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(emb1) resize_emb = PReLU()(resize_emb) block1_output = add([block1, resize_emb]) block1_output = MaxPooling1D(pool_size=max_pool_size, strides=max_pool_strides)(block1_output) block2 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block1_output) block2 = BatchNormalization()(block2) block2 = PReLU()(block2) block2 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block2) block2 = BatchNormalization()(block2) block2 = PReLU()(block2) block2_output = add([block2, block1_output]) block2_output = MaxPooling1D(pool_size=max_pool_size, strides=max_pool_strides)(block2_output) block3 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block2_output) block3 = BatchNormalization()(block3) block3 = PReLU()(block3) block3 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block3) block3 = BatchNormalization()(block3) block3 = PReLU()(block3) block3_output = add([block3, block2_output]) block3_output = MaxPooling1D(pool_size=max_pool_size, strides=max_pool_strides)(block3_output) block4 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block3_output) block4 = BatchNormalization()(block4) block4 = PReLU()(block4) block4 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block4) block4 = BatchNormalization()(block4) block4 = PReLU()(block4) block4_output = add([block4, block3_output]) block4_output = MaxPooling1D(pool_size=max_pool_size, strides=max_pool_strides)(block4_output) block5 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block4_output) block5 = BatchNormalization()(block5) block5 = PReLU()(block5) block5 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block5) block5 = BatchNormalization()(block5) block5 = PReLU()(block5) block5_output = add([block5, block4_output]) block5_output = MaxPooling1D(pool_size=max_pool_size, strides=max_pool_strides)(block5_output) block6 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block5_output) block6 = BatchNormalization()(block6) block6 = PReLU()(block6) block6 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block6) block6 = BatchNormalization()(block6) block6 = PReLU()(block6) block6_output = add([block6, block5_output]) block6_output = MaxPooling1D(pool_size=max_pool_size, strides=max_pool_strides)(block6_output) block7 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block6_output) block7 = BatchNormalization()(block7) block7 = PReLU()(block7) block7 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block7) block7 = BatchNormalization()(block7) block7 = PReLU()(block7) block7_output = add([block7, block6_output]) output = GlobalMaxPooling1D()(block7_output) output = Dense(dense_nr, activation='linear')(output) output = BatchNormalization()(output) output = PReLU()(output) output = Dropout(dense_dropout)(output) dpcnn_out = Dense(1)(output) # model = Model(inputs=inp, outputs=output) # model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mean_squared_error', 'accuracy']) ### ======================================================== # inp = Input(shape=(config.strmaxlen, ), name='input') # emb = Embedding(config.max_features, config.embed_size, trainable = True)(inp) emb2 = Embedding(config.max_features, config.embed_size, trainable=True)(inp) x1 = SpatialDropout1D(config.prob_dropout)(emb2) x1 = Bidirectional(CuDNNLSTM(config.cell_size, return_sequences=True))(x1) x12 = Bidirectional(CuDNNGRU(config.cell_size, return_sequences=True))(x1) x12c = Conv1D(filter_nr, kernel_size=filter_size, strides=1, padding="valid", kernel_initializer="he_uniform")(x12) # x2 = SpatialDropout1D(config.prob_dropout)(emb) # x2 = Bidirectional(CuDNNGRU(config.cell_size2, return_sequences=True))(x2) # x22 = Bidirectional(CuDNNLSTM(config.cell_size2, return_sequences=False))(x1) avg_pool1 = GlobalAveragePooling1D()(x1) max_pool1 = GlobalMaxPooling1D()(x1) avg_pool12 = GlobalAveragePooling1D()(x12) max_pool12 = GlobalMaxPooling1D()(x12) avg_pool12c = GlobalAveragePooling1D()(x12c) max_pool12c = GlobalMaxPooling1D()(x12c) # avg_pool14 = GlobalAveragePooling1D()(x22) # max_pool14 = GlobalMaxPooling1D()(x22) conc = concatenate([ avg_pool1, max_pool1, avg_pool12, max_pool12, avg_pool12c, max_pool12c ]) # fc1 = Dense(50, activation='relu')(conc) fc1 = Dropout(config.prob_dropout)(conc) rnnc_out = Dense(1)(fc1) # model = Model(inputs=inp, outputs=outp) # model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mean_squared_error', 'accuracy']) # # ================================================================================================== ### ======================================================== # inp = Input(shape=(config.strmaxlen, ), name='input') emb3 = Embedding(config.max_features, config.embed_size, trainable=True)(inp) r1 = SpatialDropout1D(config.prob_dropout)(emb3) r1 = Bidirectional(CuDNNLSTM(config.cell_size2, return_sequences=True))(r1) r12 = Bidirectional( CuDNNLSTM(config.cell_size2, return_sequences=False))(r1) rfc1 = Dense(50, activation='relu')(r12) rfc1 = Dropout(config.prob_dropout)(rfc1) rnn_out = Dense(1)(rfc1) # model = Model(inputs=inp, outputs=outp) # model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mean_squared_error', 'accuracy']) # # ================================================================================================== ens_out = average([rnn_out, rnnc_out, dpcnn_out]) model = Model(inputs=inp, outputs=[rnn_out, rnnc_out, dpcnn_out, ens_out]) model.compile(loss='mean_squared_error', optimizer='adam', loss_weights=[1., 0.8, 1., 0.3], metrics=['mean_squared_error', 'accuracy']) return model
def build_model(self): # 搭建seq2seq模型 x_in = Input(shape=(None, )) y_in = Input(shape=(None, )) x = x_in y = y_in x_mask = Lambda( lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(x) y_mask = Lambda( lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(y) x_one_hot = Lambda(self._one_hot)([x, x_mask]) x_prior = ScaleShift()(x_one_hot) # 学习输出的先验分布(target的字词很可能在input出现过) # embedding embedding = Embedding(len(self.chars), self.hidden_dim) x = embedding(x) y = embedding(y) # encoder,双层双向GRU; decoder,双层单向GRU if self.use_gpu: # encoder x = Bidirectional( CuDNNGRU(int(self.hidden_dim / 2), return_sequences=True))(x) x = Bidirectional( CuDNNGRU(int(self.hidden_dim / 2), return_sequences=True))(x) # decoder y = CuDNNGRU(self.hidden_dim, return_sequences=True)(y) y = CuDNNGRU(self.hidden_dim, return_sequences=True)(y) else: # encoder x = Bidirectional( GRU(int(self.hidden_dim / 2), return_sequences=True, dropout=self.dropout))(x) x = Bidirectional( GRU(int(self.hidden_dim / 2), return_sequences=True, dropout=self.dropout))(x) # decoder y = GRU(self.hidden_dim, return_sequences=True, dropout=self.dropout)(y) y = GRU(self.hidden_dim, return_sequences=True, dropout=self.dropout)(y) xy = Interact()([y, x, x_mask]) xy = Dense(512, activation='relu')(xy) xy = Dense(len(self.chars))(xy) xy = Lambda(lambda x: (x[0] + x[1]) / 2)([xy, x_prior]) # 与先验结果平均 xy = Activation('softmax')(xy) # 交叉熵作为loss,但mask掉padding部分 cross_entropy = K.sparse_categorical_crossentropy( y_in[:, 1:], xy[:, :-1]) loss = K.sum(cross_entropy * y_mask[:, 1:, 0]) / K.sum(y_mask[:, 1:, 0]) model = Model([x_in, y_in], xy) model.add_loss(loss) model.compile(optimizer=Adam(1e-3)) if os.path.exists(self.model_path): model.load_weights(self.model_path) return model
def get_model(config): inp = Input(shape=(config.strmaxlen, ), name='input') # inp = Input(shape=(config.max_features, ), name='input') emb = Embedding(config.max_features, config.max_features, embeddings_initializer='identity', trainable=True)(inp) # emb1 = Embedding(config.max_features, config.embed_size, trainable = True)(inp) emb1 = SpatialDropout1D(config.prob_dropout)(emb) #### l1_L = Bidirectional( CuDNNLSTM(config.cell_size_l1, return_sequences=True))(emb1) l2_LL = Bidirectional( CuDNNLSTM(config.cell_size_l2, return_sequences=True))(l1_L) l2_LG = Bidirectional( CuDNNGRU(config.cell_size_l2, return_sequences=True))(l1_L) l3_LLC = Conv1D(config.filter_size, kernel_size=config.kernel_size, strides=2, padding="valid", kernel_initializer="he_uniform")(l2_LL) l3_LGC = Conv1D(config.filter_size, kernel_size=config.kernel_size, strides=2, padding="valid", kernel_initializer="he_uniform")(l2_LG) avg_pool_L = GlobalAveragePooling1D()(l1_L) max_pool_L = GlobalMaxPooling1D()(l1_L) avg_pool_LL = GlobalAveragePooling1D()(l2_LL) max_pool_LL = GlobalMaxPooling1D()(l2_LL) avg_pool_LG = GlobalAveragePooling1D()(l2_LG) max_pool_LG = GlobalMaxPooling1D()(l2_LG) attention_LLA = Attention(config.strmaxlen)(l2_LL) attention_LGA = Attention(config.strmaxlen)(l2_LG) avg_pool_LLC = GlobalAveragePooling1D()(l3_LLC) max_pool_LLC = GlobalMaxPooling1D()(l3_LLC) avg_pool_LGC = GlobalAveragePooling1D()(l3_LGC) max_pool_LGC = GlobalMaxPooling1D()(l3_LGC) attention_LLCA = Attention(int(config.strmaxlen / 2 - 1))(l3_LLC) attention_LGCA = Attention(int(config.strmaxlen / 2 - 1))(l3_LGC) conc_LLC = concatenate([ avg_pool_L, max_pool_L, avg_pool_LL, max_pool_LL, avg_pool_LLC, max_pool_LLC, attention_LLA, attention_LLCA ]) conc_LGC = concatenate([ avg_pool_L, max_pool_L, avg_pool_LG, max_pool_LG, avg_pool_LGC, max_pool_LGC, attention_LGA, attention_LGCA ]) out_LL = Dropout(config.prob_dropout2)(conc_LLC) out_LG = Dropout(config.prob_dropout2)(conc_LGC) out_LL = Dense(1)(out_LL) out_LG = Dense(1)(out_LG) #### # emb2 = Embedding(config.max_features, config.max_features,embeddings_initializer='identity', trainable = True)(inp) # emb1 = Embedding(config.max_features, config.embed_size, trainable = True)(inp) emb2 = SpatialDropout1D(config.prob_dropout)(emb) #### l1_G = Bidirectional( CuDNNGRU(config.cell_size_l1, return_sequences=True))(emb2) l2_GL = Bidirectional( CuDNNLSTM(config.cell_size_l2, return_sequences=True))(l1_G) l2_GG = Bidirectional( CuDNNGRU(config.cell_size_l2, return_sequences=True))(l1_G) l3_GLC = Conv1D(config.filter_size, kernel_size=config.kernel_size, strides=2, padding="valid", kernel_initializer="he_uniform")(l2_GL) l3_GGC = Conv1D(config.filter_size, kernel_size=config.kernel_size, strides=2, padding="valid", kernel_initializer="he_uniform")(l2_GG) avg_pool_G = GlobalAveragePooling1D()(l1_G) max_pool_G = GlobalMaxPooling1D()(l1_G) avg_pool_GL = GlobalAveragePooling1D()(l2_GL) max_pool_GL = GlobalMaxPooling1D()(l2_GL) avg_pool_GG = GlobalAveragePooling1D()(l2_GG) max_pool_GG = GlobalMaxPooling1D()(l2_GG) attention_GLA = Attention(config.strmaxlen)(l2_GL) attention_GGA = Attention(config.strmaxlen)(l2_GG) avg_pool_GLC = GlobalAveragePooling1D()(l3_GLC) max_pool_GLC = GlobalMaxPooling1D()(l3_GLC) avg_pool_GGC = GlobalAveragePooling1D()(l3_GGC) max_pool_GGC = GlobalMaxPooling1D()(l3_GGC) attention_GLCA = Attention(int(config.strmaxlen / 2 - 1))(l3_GLC) attention_GGCA = Attention(int(config.strmaxlen / 2 - 1))(l3_GGC) conc_GLC = concatenate([ avg_pool_G, max_pool_G, avg_pool_GL, max_pool_GL, avg_pool_GLC, max_pool_GLC, attention_GLA, attention_GLCA ]) conc_GGC = concatenate([ avg_pool_G, max_pool_G, avg_pool_GG, max_pool_GG, avg_pool_GGC, max_pool_GGC, attention_GGA, attention_GGCA ]) out_GL = Dropout(config.prob_dropout2)(conc_GLC) out_GG = Dropout(config.prob_dropout2)(conc_GGC) out_GL = Dense(1)(out_GL) out_GG = Dense(1)(out_GG) out_avg = average([out_LL, out_LG, out_GL, out_GG]) inp_post = Input(shape=(config.strmaxlen, ), name='input_post') # inp = Input(shape=(config.max_features, ), name='input') embp = Embedding(config.max_features, config.max_features, embeddings_initializer='identity', trainable=True)(inp_post) # emb1 = Embedding(config.max_features, config.embed_size, trainable = True)(inp) embp1 = SpatialDropout1D(config.prob_dropout)(embp) #### l1_Lp = Bidirectional( CuDNNLSTM(config.cell_size_l1, return_sequences=True))(embp1) l2_LLp = Bidirectional( CuDNNLSTM(config.cell_size_l2, return_sequences=True))(l1_Lp) l2_LGp = Bidirectional( CuDNNGRU(config.cell_size_l2, return_sequences=True))(l1_Lp) l3_LLCp = Conv1D(config.filter_size, kernel_size=config.kernel_size, strides=2, padding="valid", kernel_initializer="he_uniform")(l2_LLp) l3_LGCp = Conv1D(config.filter_size, kernel_size=config.kernel_size, strides=2, padding="valid", kernel_initializer="he_uniform")(l2_LGp) avg_pool_Lp = GlobalAveragePooling1D()(l1_Lp) max_pool_Lp = GlobalMaxPooling1D()(l1_Lp) avg_pool_LLp = GlobalAveragePooling1D()(l2_LLp) max_pool_LLp = GlobalMaxPooling1D()(l2_LLp) avg_pool_LGp = GlobalAveragePooling1D()(l2_LGp) max_pool_LGp = GlobalMaxPooling1D()(l2_LGp) attention_LLAp = Attention(config.strmaxlen)(l2_LLp) attention_LGAp = Attention(config.strmaxlen)(l2_LGp) avg_pool_LLCp = GlobalAveragePooling1D()(l3_LLCp) max_pool_LLCp = GlobalMaxPooling1D()(l3_LLCp) avg_pool_LGCp = GlobalAveragePooling1D()(l3_LGCp) max_pool_LGCp = GlobalMaxPooling1D()(l3_LGCp) attention_LLCAp = Attention(int(config.strmaxlen / 2 - 1))(l3_LLCp) attention_LGCAp = Attention(int(config.strmaxlen / 2 - 1))(l3_LGCp) conc_LLCp = concatenate([ avg_pool_Lp, max_pool_Lp, avg_pool_LLp, max_pool_LLp, avg_pool_LLCp, max_pool_LLCp, attention_LLAp, attention_LLCAp ]) conc_LGCp = concatenate([ avg_pool_Lp, max_pool_Lp, avg_pool_LGp, max_pool_LGp, avg_pool_LGCp, max_pool_LGCp, attention_LGAp, attention_LGCAp ]) out_LLp = Dropout(config.prob_dropout2)(conc_LLCp) out_LGp = Dropout(config.prob_dropout2)(conc_LGCp) out_LLp = Dense(1)(out_LLp) out_LGp = Dense(1)(out_LGp) #### # emb2 = Embedding(config.max_features, config.max_features,embeddings_initializer='identity', trainable = True)(inp) # emb1 = Embedding(config.max_features, config.embed_size, trainable = True)(inp) embp2 = SpatialDropout1D(config.prob_dropout)(embp) #### l1_Gp = Bidirectional( CuDNNGRU(config.cell_size_l1, return_sequences=True))(embp2) l2_GLp = Bidirectional( CuDNNLSTM(config.cell_size_l2, return_sequences=True))(l1_Gp) l2_GGp = Bidirectional( CuDNNGRU(config.cell_size_l2, return_sequences=True))(l1_Gp) l3_GLCp = Conv1D(config.filter_size, kernel_size=config.kernel_size, strides=2, padding="valid", kernel_initializer="he_uniform")(l2_GLp) l3_GGCp = Conv1D(config.filter_size, kernel_size=config.kernel_size, strides=2, padding="valid", kernel_initializer="he_uniform")(l2_GGp) avg_pool_Gp = GlobalAveragePooling1D()(l1_Gp) max_pool_Gp = GlobalMaxPooling1D()(l1_Gp) avg_pool_GLp = GlobalAveragePooling1D()(l2_GLp) max_pool_GLp = GlobalMaxPooling1D()(l2_GLp) avg_pool_GGp = GlobalAveragePooling1D()(l2_GGp) max_pool_GGp = GlobalMaxPooling1D()(l2_GGp) attention_GLAp = Attention(config.strmaxlen)(l2_GLp) attention_GGAp = Attention(config.strmaxlen)(l2_GGp) avg_pool_GLCp = GlobalAveragePooling1D()(l3_GLCp) max_pool_GLCp = GlobalMaxPooling1D()(l3_GLCp) avg_pool_GGCp = GlobalAveragePooling1D()(l3_GGCp) max_pool_GGCp = GlobalMaxPooling1D()(l3_GGCp) attention_GLCAp = Attention(int(config.strmaxlen / 2 - 1))(l3_GLCp) attention_GGCAp = Attention(int(config.strmaxlen / 2 - 1))(l3_GGCp) conc_GLCp = concatenate([ avg_pool_Gp, max_pool_Gp, avg_pool_GLp, max_pool_GLp, avg_pool_GLCp, max_pool_GLCp, attention_GLAp, attention_GLCAp ]) conc_GGCp = concatenate([ avg_pool_Gp, max_pool_Gp, avg_pool_GGp, max_pool_GGp, avg_pool_GGCp, max_pool_GGCp, attention_GGAp, attention_GGCAp ]) out_GLp = Dropout(config.prob_dropout2)(conc_GLCp) out_GGp = Dropout(config.prob_dropout2)(conc_GGCp) out_GLp = Dense(1)(out_GLp) out_GGp = Dense(1)(out_GGp) out_avgp = average([out_LLp, out_LGp, out_GLp, out_GGp]) out_last = average([out_avg, out_avgp]) # # ================================================================================================== model_avg = Model(inputs=[inp, inp_post], outputs=[ out_LL, out_LG, out_GL, out_GG, out_LLp, out_LGp, out_GLp, out_GGp, out_avg, out_avgp, out_last ]) # inp_pre = Input(shape=(config.strmaxlen, ), name='input_pre') # inp_post = Input(shape=(config.strmaxlen, ), name='input_post') # model_pre = model_avg(inp_pre) # model_post = model_avg(inp_post) # stack_layer = concatenate([model_pre, model_post]) # ens_out = Dense(1, use_bias=False)(stack_layer) # reg_model = Model(inputs=[inp_pre, inp_post], outputs=ens_out) model_avg.compile( loss='mean_squared_error', optimizer='adam', loss_weights=[1., 1., 1., 1., 1., 1., 1., 1., 0.1, 0.1, 0.01], metrics=['mean_squared_error', 'accuracy']) return model_avg
def build_rnn(config={}): cfgdefaults = { 'embed': ['glove', 'Embedding'], 'glove_dim': [100, 'GLoVe dims'], 'spatial_dropout': [0, 'Spatial dropout'], 'use_gpu': [False, 'GPU support'], 'rnn_type': ['gru', 'RNN type'], 'rnn_units': [32, 'RNN units'], 'bidirectional': [False, 'Bi-directional RNN'], 'dense_units': [64, 'Dense units'], 'dense_dropout': [0, 'Dense dropout'], 'd2v_include': [False, 'Additional Doc2Vec representation'], 'd2v_dim': [200, 'Doc2Vec dimensionality'], 'd2v_dense_nodes': [64, 'Doc2Vec MLP nodes'], 'adam_lr': [.0001, 'Adam optimizer learning rate'], 'adam_lr_decay': [.000001, 'Adam optimizer LR decay'], 'SEQ_LEN': [200, 'Sequence length'], 'train_epochs': [1, 'Training epochs'], 'train': [True, 'Model training'], 'target_field_names': [ target_flags, 'Target flag field names'] } if type(config) is dict: for cfgkey in cfgdefaults.keys(): if cfgkey not in config.keys(): var = cfgdefaults.get(cfgkey) config[cfgkey] = var[0] print("{} not specified in config, defaulting to {}".format(var[1], var[0])) else: print('Must pass config as dict or leave empty.') return if config['embed'] == 'word2vec': from gensim.models import Word2Vec if not isinstance(config['w2v_model'], Word2Vec): print('Word2Vec model must be passed to config key w2vmodel') return from embeddingtools import make_embedding_layer seq_input = Input(shape=(config['SEQ_LEN'],), dtype='int32', name='seq_input') if config['embed'] == 'word2vec': embedding = embedding_layer_w2v(config['w2v_model'], config['SEQ_LEN'])(seq_input) if config['embed'] == 'glove': embedding = embedding_layer_glove(config['SEQ_LEN'], glove_dim=config['glove_dim'])(seq_input) spatial_dropout = SpatialDropout1D(config['spatial_dropout'])(embedding) if config['bidirectional']: from keras.layers import Bidirectional if config['rnn_type'] == 'lstm': if config['use_gpu']: rnn = Bidirectional(CuDNNLSTM(config['rnn_units']))(spatial_dropout) else: rnn = Bidirectional(LSTM(config['rnn_units']))(spatial_dropout) elif config['rnn_type'] == 'gru': if config['use_gpu']: rnn = Bidirectional(CuDNNGRU(config['rnn_units']))(spatial_dropout) else: rnn = Bidirectional(GRU(config['rnn_units']))(spatial_dropout) else: if config['rnn_type'] == 'lstm': if config['use_gpu']: rnn = CuDNNLSTM(config['rnn_units'])(spatial_dropout) else: rnn = LSTM(config['rnn_units'])(spatial_dropout) elif config['rnn_type'] == 'gru': if config['use_gpu']: rnn = CuDNNGRU(config['rnn_units'])(spatial_dropout) else: rnn = GRU(config['rnn_units'])(spatial_dropout) inputs = [seq_input] if config['d2v_include']: d2v_input = Input(shape=(config['d2v_dim'],), name='D2VInput') d2v_dense = Dense(config['d2v_dense_nodes'], activation='relu', name='D2VDense')(d2v_input) rnn = concatenate([rnn, d2v_dense]) inputs.append(d2v_input) dense = Dense(config['dense_units'], activation='relu')(rnn) dense_dropout = Dropout(config['dense_dropout'])(dense) output_nodes = [] for i in range(len(config['target_field_names'])): output_nodes.append( Dense(1, activation='sigmoid', name=config['target_field_names'][i])(dense_dropout) ) adam = Adam(lr=config['adam_lr'], decay=config['adam_lr_decay']) model = Model(inputs=inputs, outputs=output_nodes) model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy']) return model, config
def get_sentence_attention(word_model , word_length, sent_length, n_classes): #x = Permute((2,1))(si_vects) nclasses = 1 input = Input(shape=(sent_length, word_length ), dtype='int32') print(' input to sentence attn network',word_model) preds = [] attentions_pred = [] #print(output.summary()) si_vects = TimeDistributed(word_model)(input) print('Shape after si_vects', si_vects.shape) #u_it = TimeDistributed(TimeDistributed(BatchNormalization()))(si_vects) u_it = TimeDistributed(TimeDistributed(Dense(256, activation='tanh')))(si_vects) print('Shape after word vector',u_it.shape) #u_it = TimeDistributed(TimeDistributed(BatchNormalization()))(u_it) #h_it = TimeDistributed(Reshape((100,word_length)))(si_vects) #print('Shape after reshape word vector',h_it.shape) attn_final_word = [TimeDistributed(ATTNWORD(1))(u_it) for i in range(nclasses)] #a_it = Reshape(( word_length, 1))(a_it) #h_it = Reshape((word_length, 512))(h_it) print('ATTN Shape', attn_final_word[0].shape) attn_final_word = [Multiply()([si_vects, attn_final_word[i]]) for i in range(nclasses)]#Multiply()([h_it,a_it]) print('Multi word Shape', attn_final_word[0].shape) attn_final_word = [Reshape((sent_length, 256,word_length))(attn_final_word[i]) for i in range(nclasses)] print ('Shape of the att1 is {}'.format(attn_final_word[0].shape)) attn_final_word = [Lambda(lambda x: K.sum(x, axis=3))(attn_final_word[i]) for i in range(nclasses)] output_list = [] for i in range(nclasses): print ('Shape of the lambda word is {}'.format(attn_final_word[i].shape)) ind_t = 0 attn_sents_for_all_classes = [] #attn_final_word[i] = SpatialDropout1D(0.2)(attn_final_word[i]) x = Bidirectional(CuDNNGRU(128,return_sequences=True))(attn_final_word[i]) x = SpatialDropout1D(0.2)(x) x = BatchNormalization()(x) print ("Shape of X-X is {}".format(x.shape)) u_it = TimeDistributed(Dense(256, activation='tanh'))(x) print('Shape after word vector',u_it.shape) #h_it = Reshape((100,sent_length))(x) attn_final_sent = ATTNWORD(1)(u_it) print ('Shape of the sent att is {}'.format(attn_final_sent.shape)) #attentions_pred.append(attn_final) attn_final_sent = Multiply()([x, attn_final_sent]) print ('Shape of the multi sent att is {}'.format(attn_final_sent.shape)) attn_final_sent = Reshape((256,sent_length))(attn_final_sent) attn_final_sent = Lambda(lambda x: K.sum(x, axis=2))(attn_final_sent) output_list.append(attn_final_sent) word_attn = Reshape((sent_length*word_length, 256))(si_vects) x1 = Conv1D(256,2, activation='relu')(word_attn) x1_mp = GlobalMaxPooling1D()(x1) x1_av = GlobalAveragePooling1D()(x1) x2 = Conv1D(256,3, activation='relu')(word_attn) x2_mp = GlobalMaxPooling1D()(x2) x2_av = GlobalAveragePooling1D()(x2) x3 = Conv1D(256,4, activation='relu')(word_attn) x3_mp = GlobalMaxPooling1D()(x3) x3_av = GlobalAveragePooling1D()(x3) #x = Concatenate()([Flatten()(x1_mp), Flatten()(x2_mp),Flatten()(x3_mp)]) #x = Concatenate()([x1_mp, x2_mp , x3_av]) x = Maximum()([x1_mp, x1_av, x2_mp, x2_av , x3_mp, x3_av]) x = BatchNormalization()(x) output_list.append(x) #x = Dense(256, activation='relu')(x) #x = Dropout(0.25)(x) #x = Dense(128, activation='relu')(x) #x = Dropout(0.25)(x) x = Multiply()(output_list) p = Dense(n_classes, activation='sigmoid')(x) model = Model(input, p) return model
def cs_setup_rnn(params, inshape=None, classes=None, char=None): # Parse network hyperparameters em_dim = int(params['em_dim']*10) kernel_size = 3 filters = int(params['conv_units']*6) num_layer = int(params['num_layer']) units1 = int(params['layer1_units']*6) units2 = int(params['layer2_units']*6) units3 = int(params['layer3_units']*6) relu_flag = str(params['relu_type']) dropval = float(params['dropval']) reg_flag = str(params['reg_type']) reg_val = 10**(-float(params['reg_val'])) # Setup regularizer if reg_flag == "l1": reg = l1(reg_val) print("Regularizer "+reg_flag+" set at "+str(reg_val)) elif reg_flag == "l2": reg = l2(reg_val) print("Regularizer "+reg_flag+" set at "+str(reg_val)) elif reg_flag == "l1_l2": reg = l1_l2(reg_val) print("Regularizer "+reg_flag+" set at "+str(reg_val)) else: reg = None print("NOTE: No regularizers used") # Setup neural network inlayer = Input(shape=[inshape]) x = Embedding(input_dim=len(char)+1,output_dim=em_dim)(inlayer) x = Conv1D(filters, kernel_size, strides=1, padding="same", kernel_regularizer=reg)(x) if relu_flag == "relu": x = Activation("relu")(x) elif relu_flag == "elu": x = Activation("elu")(x) elif relu_flag == "prelu": x = PReLU()(x) elif relu_flag == "leakyrelu": x = LeakyReLU()(x) if params['celltype'] == "GRU": if num_layer == 1: x = Bidirectional(CuDNNGRU(units1, return_sequences=False))(x) x = Dropout(dropval)(x) elif num_layer == 2: x = Bidirectional(CuDNNGRU(units1, return_sequences=True))(x) x = Dropout(dropval)(x) x = Bidirectional(CuDNNGRU(units2, return_sequences=False))(x) x = Dropout(dropval)(x) elif num_layer == 3: x = Bidirectional(CuDNNGRU(units1, return_sequences=True))(x) x = Dropout(dropval)(x) x = Bidirectional(CuDNNGRU(units2, return_sequences=True))(x) x = Dropout(dropval)(x) x = Bidirectional(CuDNNGRU(units3, return_sequences=False))(x) x = Dropout(dropval)(x) if params['celltype'] == "LSTM": if num_layer == 1: x = Bidirectional(CuDNNLSTM(units1, return_sequences=False))(x) x = Dropout(dropval)(x) elif num_layer == 2: x = Bidirectional(CuDNNLSTM(units1, return_sequences=True))(x) x = Dropout(dropval)(x) x = Bidirectional(CuDNNLSTM(units2, return_sequences=False))(x) x = Dropout(dropval)(x) elif num_layer == 3: x = Bidirectional(CuDNNLSTM(units1, return_sequences=True))(x) x = Dropout(dropval)(x) x = Bidirectional(CuDNNLSTM(units2, return_sequences=True))(x) x = Dropout(dropval)(x) x = Bidirectional(CuDNNLSTM(units3, return_sequences=False))(x) x = Dropout(dropval)(x) # Specify output layer if classes == 1: label = Dense(classes, activation='linear', name='predictions')(x) elif classes >= 2: label = Dense(classes, activation='softmax', name='predictions')(x) else: raise("ERROR in specifying tasktype") # Create base model model = Model(inputs=inlayer,outputs=label, name='SMILES2vec') # Create intermediate model submodel = Model(inputs=inlayer,outputs=x, name='SMILES2vec_truncated') # Specify training method if classes == 1: model.compile(optimizer="RMSprop", loss="mean_squared_error") submodel.compile(optimizer="RMSprop", loss="mean_squared_error") elif classes >= 2: model.compile(optimizer="RMSprop", loss="categorical_crossentropy") submodel.compile(optimizer="RMSprop", loss="categorical_crossentropy") else: raise("ERROR in specifying tasktype") return(model, submodel)
def new_lpcnet_model(frame_size=160, rnn_units1=384, rnn_units2=16, nb_used_features=38, training=False, use_gpu=True): pcm = Input(shape=(None, 3)) feat = Input(shape=(None, nb_used_features)) pitch = Input(shape=(None, 1)) dec_feat = Input(shape=(None, 128)) dec_state1 = Input(shape=(rnn_units1, )) dec_state2 = Input(shape=(rnn_units2, )) padding = 'valid' if training else 'same' fconv1 = Conv1D(128, 3, padding=padding, activation='tanh', name='feature_conv1') fconv2 = Conv1D(128, 3, padding=padding, activation='tanh', name='feature_conv2') embed = Embedding(256, embed_size, embeddings_initializer=PCMInit(), name='embed_sig') cpcm = Reshape((-1, embed_size * 3))(embed(pcm)) pembed = Embedding(256, 64, name='embed_pitch') cat_feat = Concatenate()([feat, Reshape((-1, 64))(pembed(pitch))]) cfeat = fconv2(fconv1(cat_feat)) fdense1 = Dense(128, activation='tanh', name='feature_dense1') fdense2 = Dense(128, activation='tanh', name='feature_dense2') cfeat = fdense2(fdense1(cfeat)) rep = Lambda(lambda x: K.repeat_elements(x, frame_size, 1)) if use_gpu: rnn = CuDNNGRU(rnn_units1, return_sequences=True, return_state=True, name='gru_a') rnn2 = CuDNNGRU(rnn_units2, return_sequences=True, return_state=True, name='gru_b') else: rnn = GRU(rnn_units1, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_a') rnn2 = GRU(rnn_units2, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_b') rnn_in = Concatenate()([cpcm, rep(cfeat)]) md = MDense(pcm_levels, activation='softmax', name='dual_fc') gru_out1, _ = rnn(rnn_in) gru_out2, _ = rnn2(Concatenate()([gru_out1, rep(cfeat)])) ulaw_prob = md(gru_out2) model = Model([pcm, feat, pitch], ulaw_prob) model.rnn_units1 = rnn_units1 model.rnn_units2 = rnn_units2 model.nb_used_features = nb_used_features model.frame_size = frame_size encoder = Model([feat, pitch], cfeat) dec_rnn_in = Concatenate()([cpcm, dec_feat]) dec_gru_out1, state1 = rnn(dec_rnn_in, initial_state=dec_state1) dec_gru_out2, state2 = rnn2(Concatenate()([dec_gru_out1, dec_feat]), initial_state=dec_state2) dec_ulaw_prob = md(dec_gru_out2) decoder = Model([pcm, dec_feat, dec_state1, dec_state2], [dec_ulaw_prob, state1, state2]) return model, encoder, decoder