def _model_tf_v1(n_input, n_output): """ Initialize the tensorflow 1.1X version of the model :param int n_input: number of input dimensions (number of ECG + aux channels) :param int n_output: number of output (number of EEG channels) :return: initialized model """ from tensorflow.python.keras.layers import Input, Bidirectional, CuDNNGRU, Dense, Dropout session_config = tf.ConfigProto(gpu_options=tf.GPUOptions( allow_growth=True)) sess = tf.Session(config=session_config) K.set_floatx('float64') ecg_input = Input(shape=(None, n_input), dtype='float64', name='ecg_input') x = Bidirectional( CuDNNGRU(16, return_sequences=True, recurrent_regularizer=l2(0.096), activity_regularizer=l2(0.030)))(ecg_input) x = Bidirectional( CuDNNGRU(16, return_sequences=True, recurrent_regularizer=l2(0.090), activity_regularizer=l2(0.013)))(x) x = Dense(8, activation='relu')(x) x = Dropout(0.327)(x) x = Bidirectional( CuDNNGRU(16, return_sequences=True, recurrent_regularizer=l2(0.024), activity_regularizer=l2(0.067)))(x) x = Bidirectional( CuDNNGRU(64, return_sequences=True, recurrent_regularizer=l2(2.48e-07), activity_regularizer=l2(0.055)))(x) bcg_out = Dense(n_output, activation='linear')(x) model = Model(inputs=ecg_input, outputs=bcg_out) return model
def get_bidirectional_cudnn_model(self, pre_embeddings, dp_rate=-1.0, use_lstm=False): """ cudnn provided versions, should be much faster :param pre_embeddings: :param use_lstm: utilize LSTM or GRU unit :return: the model """ # Embedding part can try multichannel as same as origin paper embedding_layer = Embedding(self.max_features, # 字典长度 self.embedding_dims, # 词向量维度 weights=[pre_embeddings], # 预训练的词向量 input_length=self.maxlen, # 每句话的最大长度 trainable=False # 是否在训练过程中更新词向量 ) input = Input((self.maxlen,)) embedding = embedding_layer(input) if use_lstm: x = Bidirectional(CuDNNLSTM(RNN_DIM, return_sequences=True))(embedding) # LSTM else: x = Bidirectional(CuDNNGRU(RNN_DIM, return_sequences=True))(embedding) # GRU # add none or one of the following attention layers x, atten_layer = self.do_attention(x) fn = kb.function([input], [atten_layer.att_weights]) if dp_rate > 0: # 加dropout层 x = Dropout(dp_rate)(x) output = Dense(self.class_num, activation=self.last_activation)(x) model = Model(inputs=input, outputs=output) return model, fn
def build_gru_model(): model = Sequential() #CuDNNGRU 代替GRU, 提高速度 model.add(CuDNNGRU(32, input_shape=(None, float_data.shape[-1]))) model.add(Dense(1)) model.compile(optimizer=RMSprop(), loss='mae') return model
def build_model_2(time_steps): # 卷积层过多的激活函数效果反而一般 model = tf.keras.models.Sequential() # 每次输入一个月的数据量 # 输入数据5个通道是指四个价格,加一个交易量 # 时间步数太短,卷积核尺寸开始小,然后增大,不能一直用1,否则卷积无法查看相邻的关系 # model.add(layers.Conv1D(16, 2, padding='same', activation='tanh', strides=1, input_shape=(time_steps, 5))) # model.add(layers.Conv1D(32, 2, padding='same', activation='tanh', strides=1)) model.add( layers.Conv1D(64, 2, padding='same', activation='tanh', strides=1)) model.add( layers.Conv1D(128, 2, padding='same', activation='tanh', strides=1)) model.add(layers.AveragePooling1D(2)) # 卷积核数量作为需要标准化的轴,每个卷积核使用不用beta和gamma # 在这里用BN层效果一般,可能是股价的均值,方差不稳定 # model.add(layers.BatchNormalization(axis=2)) # activation = 'relu' CuDNNGRU,CuDNNLSTM的激活函数貌似是内定的 # 单次直接输入多个日期,貌似不需要时间记忆,先去掉,把神经网络加深。。。。 # return_sequences 决定返回单个 hidden state值还是返回全部time steps 的 hidden state值 # 这里第一个不加return sequence ,不能连7续用两个gru,输出没有time steps形状不匹配 model.add(CuDNNGRU(128, return_sequences=True)) # model.add(CuDNNGRU(256, return_sequences=True)) model.add(layers.Flatten()) model.add(layers.Dropout(0.4)) model.add(layers.Dense(128, activation='relu')) model.add(layers.Dense(5, activation='tanh')) # 最终输出层用tanh收敛好于relu,并且预测效果远好于relu,可能是输出-1,1,对应输入范围广的原因 model.compile(optimizer='adam', loss='mse') return model
def get_cnn_rnn_model(self, pre_embeddings, dp_rate=0.0, use_lstm=False, filter_sizes=[2, 3, 4]): """ first CNN to generate a vector, then apply RNN on the vector :param pre_embeddings: :param dp_rate: drop out rate :param use_lstm: utilize LSTM or GRU unit :return: the model """ # Embedding part can try multichannel as same as origin paper embedding_layer = Embedding(self.max_features, # 字典长度 self.embedding_dims, # 词向量维度 weights=[pre_embeddings], # 预训练的词向量 input_length=self.maxlen, # 每句话的最大长度 trainable=False # 是否在训练过程中更新词向量 ) input = Input((self.maxlen,)) embedding = embedding_layer(input) # add a convolution layer c = Conv1D(NUM_FILTERS, 3, padding='valid', activation='relu')(embedding) cc = MaxPooling1D()(c) if dp_rate > 0: # 加dropout层 cc = Dropout(dp_rate)(cc) if use_lstm: x = CuDNNLSTM(RNN_DIM)(cc) else: x = CuDNNGRU(RNN_DIM)(cc) output = Dense(self.class_num, activation=self.last_activation)(x) model = Model(inputs=input, outputs=output) return model
def build_model_3(time_steps): model = tf.keras.models.Sequential() model.add( layers.Conv1D(64, 2, padding='same', strides=1, activation='relu', kernel_initializer='uniform', input_shape=(time_steps, 5))) # model.add(layers.Conv1D(32, 2, padding='same', strides=1,activation='relu',kernel_initializer='uniform')) # model.add(layers.Conv1D(64, 2, padding='same', strides=1,activation='relu',kernel_initializer='uniform')) model.add( layers.Conv1D(128, 2, padding='same', strides=1, activation='relu', kernel_initializer='uniform')) model.add(layers.AveragePooling1D(2)) model.add(CuDNNGRU(128, return_sequences=True)) model.add(layers.Flatten()) model.add(layers.Dropout(0.4)) model.add(layers.Dense(128, activation='relu')) model.add(layers.Dense(128, activation='relu')) model.add(layers.Dense(5, activation='tanh')) # 优化算法使用adam,短周期收敛较慢 model.compile(optimizer='adam', loss='mse') return model
def get_cudnn_version_model(self, pre_embeddings, dp_rate=-1.0, use_lstm=False): """ cudnn provided versions, should be much faster :param pre_embeddings: :param use_lstm: utilize LSTM or GRU unit :return: the model """ # Embedding part can try multichannel as same as origin paper embedding_layer = Embedding(self.max_features, # 字典长度 self.embedding_dims, # 词向量维度 weights=[pre_embeddings], # 预训练的词向量 input_length=self.maxlen, # 每句话的最大长度 trainable=False # 是否在训练过程中更新词向量 ) input = Input((self.maxlen,)) embedding = embedding_layer(input) if use_lstm: x = CuDNNLSTM(RNN_DIM)(embedding) # LSTM else: x = CuDNNGRU(RNN_DIM)(embedding) # GRU if dp_rate > 0: # 加dropout层 x = Dropout(dp_rate)(x) output = Dense(self.class_num, activation=self.last_activation)(x) model = Model(inputs=input, outputs=output) return model
def model(embedding_size, n_a): # word embedding matrix #word_vec = Input(shape=(embedding_size), name='Words') # batch, 300 word_vec = tf.constant(answer_emb, name='Words', dtype='float32') # preprocessing sentences into sentence vectors sentence = Input(shape=(T, embedding_size), name='Sentences') # batch, 50, 300 sentence_vec = Bidirectional(CuDNNGRU(units=n_a, return_sequences=False), name='Sentence_Vectors')(sentence) # batch, 300 # dot #product = Dot(axes=-1, normalize=False, name='Matrix')([word_vec, sentence_vec]) product = tf.matmul(word_vec, sentence_vec, transpose_b = True, name = 'Matrix') key_matrix = K.transpose(product) model = Model(inputs= sentence, outputs=key_matrix) return model
def gru_test(): ''' 使用return_sequences 返回所有time steps的输出 不适用的时候只返回最后一次 ''' model = Sequential() model.add(CuDNNGRU(128)) # model.add(CuDNNGRU(128, return_sequences=True)) model.compile('rmsprop', 'mse') input_array = np.random.normal(size=(32, 10, 1)) output_array = model.predict(input_array) print(output_array.shape) return model
def get_decoder_outputs_gpu(target_length, encoder_states, decoder_inputs, latent_dim): # First Layer decoder_gru1_layer = CuDNNGRU(latent_dim, input_shape=(None, target_length), return_sequences=True, return_state=True, kernel_constraint=None, kernel_regularizer=None, name="decoder_gru1_layer") decoder_gru1, state_h = decoder_gru1_layer(decoder_inputs, initial_state=encoder_states) # Second LSTM Layer decoder_gru2_layer = CuDNNGRU(latent_dim, stateful=False, return_sequences=True, return_state=True, kernel_constraint=None, kernel_regularizer=None, name="decoder_gru2_layer") decoder_outputs, state_h = decoder_gru2_layer(decoder_gru1) return decoder_outputs
def get_encoder_states_gpu(input_shape, encoder_inputs, latent_dim): encoder = CuDNNGRU(latent_dim, input_shape=(None, input_shape), stateful=False, return_sequences=False, return_state=True, kernel_constraint=None, kernel_regularizer=None, recurrent_initializer='glorot_uniform', name="encoder_gru_layer") # 'encoder_outputs' are ignored and only states are kept. encoder_outputs, state_h = encoder(encoder_inputs) encoder_states = [state_h] return encoder_states
def encoder_bi_GRU_gpu(input_shape, encoder_inputs, latent_dim): encoder = Bidirectional(CuDNNGRU(latent_dim, input_shape=(None, input_shape), stateful=False, return_sequences=False, return_state=True, kernel_constraint=None, kernel_regularizer=None, recurrent_initializer='glorot_uniform'), name="encoder_bi_gru_layer") # 'encoder_outputs' are ignored and only states are kept. encoder_outputs, forward_h, backward_h = encoder(encoder_inputs) state_h = Concatenate()([forward_h, backward_h]) encoder_states = [state_h] return encoder_states
def make_discriminator(name, s, adj, node_f, use_gcn=True, use_gru=True): n = node_f.shape[0] # number of nodes input_s = Input(shape=(s, n)) input_f = Input(shape=(n, node_f.shape[1])) input_g = Input(shape=(n, n)) if use_gcn: gcov1 = GraphConv(2 * base)([input_f, input_g]) # gcov2 = GraphConv(base)([gcov1, input_g]) input_s1 = Dot(axes=(2, 1))( [input_s, gcov1]) # dot product: element by element multiply else: input_s1 = input_s fc1 = Dense(4 * base, activation='relu', input_shape=(n, ))(input_s1) fc2 = Dense(8 * base, activation='relu', input_shape=(n, ))(fc1) # S*D2 if use_gru: rnn1 = Dropout(dropout)(CuDNNGRU(2 * base, return_sequences=True)(fc2)) else: rnn1 = fc2 fc3 = Dense(16 * base, activation='relu', input_shape=(n, ))(rnn1) out = Dense(1)(Flatten()(fc3)) return Model(name=name, inputs=[input_s, input_f, input_g], outputs=out)
def build_model_1(time_steps): model = tf.keras.models.Sequential() # 时间步数太短,卷积核尺寸开始小,然后增大,不能一直用1,否则卷积无法查看相邻的关系 model.add( layers.Conv1D(16, 2, padding='same', strides=1, input_shape=(time_steps, 5))) model.add(layers.Conv1D(32, 2, padding='same', strides=1)) model.add(layers.Conv1D(64, 2, padding='same', strides=1)) # 注意这里第二次卷积核为2的卷积实际上就已经跨过三天的k线,所以没必要用太多 model.add(layers.Conv1D(128, 2, padding='same', strides=1)) model.add(layers.AveragePooling1D(2)) model.add(CuDNNGRU(128, return_sequences=True)) model.add(layers.Flatten()) model.add(layers.Dropout(0.4)) model.add(layers.Dense(128, activation='relu')) # model.add(layers.Dense(128, activation='relu')) model.add(layers.Dense(5, activation='tanh')) # 优化算法使用adam,短周期收敛较慢 model.compile(optimizer='adam', loss='mse') return model
def one_hot(x, num_classes): return K.one_hot(x, num_classes=num_classes) enc_size = 256 dec_size = 256 # encoder part enc_inp = Input(shape=(None, ), dtype=tf.int32) # our inputs are sparse but we need one-hot encoding enc_one_hot = Lambda(function=one_hot, arguments={'num_classes': eng_vocab_size}, output_shape=(max_inp_seq, eng_vocab_size))(enc_inp) # use CuDNNGRU if available, is 3x faster if tf.test.is_gpu_available(): enc_gru = CuDNNGRU(units=enc_size, return_state=True) enc_output, enc_state = enc_gru(enc_one_hot) else: enc_gru = GRU(units=enc_size, return_state=True) enc_output, enc_state = enc_gru(enc_one_hot) # decoder part dec_inp = Input(shape=(None, ), dtype=tf.int32) # our outputs are sparse but we need one-hot encoding dec_one_hot = Lambda(function=one_hot, arguments={'num_classes': spa_vocab_size}, output_shape=(max_trg_seq, spa_vocab_size))(dec_inp) # use CuDNNGRU if available, is 3x faster if tf.test.is_gpu_available(): dec_gru = CuDNNGRU(units=dec_size, return_sequences=True,
def init_model(self, input_shape, num_classes, **kwargs): layers = 5 filters_size = [64, 128, 256, 512, 512] kernel_size = (3, 3) pool_size = [(2, 2), (2, 2), (2, 2), (4, 1), (4, 1)] freq_axis = 2 channel_axis = 3 channel_size = 128 min_size = min(input_shape[:2]) melgram_input = Input(shape=input_shape) # x = ZeroPadding2D(padding=(0, 37))(melgram_input) x = Reshape((input_shape[0], input_shape[1], 1))(melgram_input) x = BatchNormalization(axis=freq_axis, name='bn_0_freq')(x) # Conv block 1 x = Convolution2D( filters=filters_size[0], kernel_size=kernel_size, padding='same', name='conv1')(x) x = ELU()(x) x = BatchNormalization(axis=channel_axis, name='bn1')(x) x = MaxPooling2D( pool_size=pool_size[0], strides=pool_size[0], name='pool1')(x) x = Dropout(0.1, name='dropout1')(x) min_size = min_size // pool_size[0][0] for layer in range(1, layers): min_size = min_size // pool_size[layer][0] if min_size < 1: break x = Convolution2D( filters=filters_size[layer], kernel_size=kernel_size, padding='same', name='conv' + str(layer + 1))(x) x = ELU()(x) x = BatchNormalization(axis=channel_axis, name='bn'+str(layer + 1)+'')(x) x = MaxPooling2D( pool_size=pool_size[layer], strides=pool_size[layer], name='pool'+str(layer + 1)+'')(x) x = Dropout(0.1, name='dropout'+str(layer + 1)+'')(x) x = Reshape((-1, channel_size))(x) gru_units = 32 if num_classes > 32: gru_units = int(num_classes * 1.5) # GRU block 1, 2, output x = CuDNNGRU(gru_units, return_sequences=True, name='gru1')(x) x = CuDNNGRU(gru_units, return_sequences=False, name='gru2')(x) x = Dropout(0.3)(x) outputs = Dense(num_classes, activation='softmax', name='output')(x) model = TFModel(inputs=melgram_input, outputs=outputs) optimizer = optimizers.Adam( # learning_rate=1e-3, lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=1e-4, amsgrad=True) model.compile( optimizer=optimizer, loss="sparse_categorical_crossentropy", metrics=['accuracy']) model.summary() self._model = model self.is_init = True
def gru_model(): emb_n = 64 category_num = { 'adidmd5': (780369, emb_n), 'idfamd5': (360, emb_n), 'imeimd5': (1021836, emb_n), 'macmd5': (329184, emb_n), 'openudidmd5': (85051, emb_n), 'ip': (813719, emb_n), 'reqrealip': (9748, emb_n), 'adunitshowid': (800, emb_n), 'apptype': (91, emb_n), 'carrier': (4, emb_n), 'city': (331, emb_n), 'dvctype': (3, emb_n), 'model': (5923, emb_n), # 7957 7958 5922 'make': (1704, emb_n), 'mediashowid': (313, emb_n), 'ntt': (7, emb_n), 'orientation': (2, emb_n), 'osv': (185, emb_n), 'pkgname': (2368, emb_n), 'ppi': (119, emb_n), 'ver': (3268, emb_n), 'screen_area': (1396, emb_n), 'creative_dpi': (1763, emb_n), 'hour': (24, emb_n), 'lan': (33, emb_n), 'h': (985, emb_n), 'w': (449, emb_n), } # 类别型变量输入 category_inp = Input(shape=(len(category),), name='category_inp') cat_embeds = [] for idx, col in enumerate(category): x = Lambda(lambda x: x[:, idx, None])(category_inp) x = Embedding(category_num[col][0], category_num[col][1], input_length=1)(x) cat_embeds.append(x) embeds = concatenate(cat_embeds, axis=2) embeds = GaussianDropout(0.5)(embeds) # 数值型变量输入 numerical_inp = Input(shape=(len(numerical),), name='continous_inp') print('numerical', len(numerical) // 8 * 8 + 8) x2 = Dense(len(numerical) // 8 + 8, activation='relu', kernel_initializer='random_uniform', bias_initializer='zeros')( numerical_inp) x2 = Dropout(0.5)(x2) x2 = BatchNormalization()(x2) x2 = Reshape([1, int(x2.shape[1])])(x2) x = concatenate([embeds, x2], axis=2) # 主干网络 x = CuDNNGRU(128)(x) x = BatchNormalization()(x) x = Dropout(0.50)(x) x = Dense(64, activation='relu', kernel_initializer='random_uniform')(x) x = PReLU()(x) x = BatchNormalization()(x) x = Dropout(0.50)(x) x = Dense(32, activation='relu', kernel_initializer='random_uniform')(x) x = PReLU()(x) x = BatchNormalization()(x) x = Dropout(0.50)(x) out_p = Dense(1, activation='sigmoid')(x) return Model(inputs=[category_inp, numerical_inp], outputs=out_p)
# In[43]: embedding_boyut = 50 # In[44]: model.add( Embedding(input_dim=max_kelime, output_dim=embedding_boyut, input_length=max_token, name='embedding_katman')) # In[45]: model.add(CuDNNGRU(units=16, return_sequences=True)) model.add(CuDNNGRU(units=8, return_sequences=True)) model.add(CuDNNGRU(units=4, return_sequences=False)) model.add(Dense(1, activation='sigmoid')) # In[46]: optimizer = Adam(lr=1e-3) # In[47]: model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) # In[48]:
def init_model(self, input_shape, num_classes, **kwargs): freq_axis = 2 channel_axis = 3 channel_size = 128 min_size = min(input_shape[:2]) melgram_input = Input(shape=input_shape) # x = ZeroPadding2D(padding=(0, 37))(melgram_input) # x = BatchNormalization(axis=freq_axis, name='bn_0_freq')(x) x = Reshape((input_shape[0], input_shape[1], 1))(melgram_input) # Conv block 1 x = Convolution2D(64, 3, 1, padding='same', name='conv1')(x) x = BatchNormalization(axis=channel_axis, name='bn1')(x) x = ELU()(x) x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='pool1')(x) x = Dropout(0.1, name='dropout1')(x) # Conv block 2 x = Convolution2D(channel_size, 3, 1, padding='same', name='conv2')(x) x = BatchNormalization(axis=channel_axis, name='bn2')(x) x = ELU()(x) x = MaxPooling2D(pool_size=(3, 3), strides=(3, 3), name='pool2')(x) x = Dropout(0.1, name='dropout2')(x) # Conv block 3 x = Convolution2D(channel_size, 3, 1, padding='same', name='conv3')(x) x = BatchNormalization(axis=channel_axis, name='bn3')(x) x = ELU()(x) x = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), name='pool3')(x) x = Dropout(0.1, name='dropout3')(x) if min_size // 24 >= 4: # Conv block 4 x = Convolution2D(channel_size, 3, 1, padding='same', name='conv4')(x) x = BatchNormalization(axis=channel_axis, name='bn4')(x) x = ELU()(x) x = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), name='pool4')(x) x = Dropout(0.1, name='dropout4')(x) x = Reshape((-1, channel_size))(x) gru_units = 128 if num_classes > gru_units: gru_units = int(num_classes * 1.5) # GRU block 1, 2, output x = CuDNNGRU(gru_units, return_sequences=True, name='gru1')(x) x = CuDNNGRU(gru_units, return_sequences=False, name='gru2')(x) # x = Dense(max(int(num_classes*1.5), 128), activation='relu', name='dense1')(x) x = Dropout(0.3)(x) outputs = Dense(num_classes, activation='softmax', name='output')(x) model = TFModel(inputs=melgram_input, outputs=outputs) optimizer = optimizers.Adam( # learning_rate=1e-3, lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=1e-4, amsgrad=True) model.compile(optimizer=optimizer, loss="sparse_categorical_crossentropy", metrics=['accuracy']) model.summary() self._model = model self.is_init = True