def build_model(embedding_matrix, word_index, max_len, lstm_units, verbose = False, compile = True, multi=True, gpu_num=4): #logger.info('Build model') sequence_input = L.Input(shape=(max_len,), dtype='int32') embedding_layer = L.Embedding(*embedding_matrix.shape, weights=[embedding_matrix], trainable=False) x = embedding_layer(sequence_input) x = L.SpatialDropout1D(0.3)(x) x = L.Bidirectional(L.CuDNNLSTM(lstm_units, return_sequences=True))(x) x = L.Bidirectional(L.CuDNNLSTM(lstm_units, return_sequences=True))(x) att = Attention(max_len)(x) avg_pool1 = L.GlobalAveragePooling1D()(x) max_pool1 = L.GlobalMaxPooling1D()(x) x = L.concatenate([att,avg_pool1, max_pool1]) preds = L.Dense(1, activation='sigmoid')(x) model = Model(sequence_input, preds) if multi: print('use multi gpus') model = ModelMGPU(model, gpus=gpu_num) if verbose: model.summary() if compile: model.compile(loss='binary_crossentropy',optimizer=Adam(0.005),metrics=['acc']) return model
def HorisontalySweepLayer(input,filter): height = input._keras_shape[1] width = input._keras_shape[2] Channels = input._keras_shape[3] Timestep = int(width * height); input = layers.Lambda(Model.rotateMatrix)(input) reshapedinput = layers.Reshape((int(Timestep),int(Channels)),name='')(input) xUp = layers.CuDNNLSTM(int(filter/2),unit_forget_bias=True, return_sequences=True)(reshapedinput) xDown = layers.CuDNNLSTM(int(filter/2),unit_forget_bias=True,go_backwards = True, return_sequences=True)(reshapedinput) xUp = layers.Reshape((int(height),int(width),int(filter/2)),name='')(xUp) xDown = layers.Reshape((int(height),int(width),int(filter/2)),name='')(xDown) concatenate = layers.concatenate(inputs = [xUp,xDown],axis=-1) concatenate = layers.Lambda(Model.rotateMatrix)(concatenate) return concatenate
def LYRICS_RNN(num_classes, emb_size): model = Sequential() model.add( layers.Embedding(emb_size, output_dim=200, input_length=None, name='embedding')) model.add( layers.CuDNNLSTM(units=256, return_sequences=True, name='rnn_layer_1')) model.add( layers.CuDNNLSTM(units=512, return_sequences=True, name='rnn_layer_2')) model.add( layers.CuDNNLSTM(units=1024, return_sequences=False, name='rnn_out')) model.add(layers.Dense(num_classes, name='logits')) return model
def trainmodel(self, X=None, y=None, fit_args=None, use_generator=False, generator=None): # Copy paste this from the diag above. model = keras.models.Sequential() model.add(layers.TimeDistributed(layers.Dense(28), input_shape=(self.lookback, len(self.tokens_unique)))) # model.add(layers.LeakyReLU(alpha=.001)) model.add(layers.CuDNNLSTM(64, input_shape=(self.lookback, len(self.tokens_unique)))) model.add(layers.Dropout(0.2, noise_shape=None, seed=None)) model.add(layers.Dense(len(self.tokens_unique), activation='softmax')) optimizer = keras.optimizers.Adam(lr=0.01) model.compile(loss='categorical_crossentropy', optimizer=optimizer) if use_generator: if not generator: generator = self.generator model.fit_generator(generator, **fit_args) else: model.fit(x=X, y=y, **fit_args) self.model = model
def FirstVerticalysweepLayer(*args): imageHeight = args[0] * 0.5 imageWidth = args[1] * 0.5 channels = args[2] input = args[3] Timestep = int(imageHeight * imageWidth); #256,12 reshapedinput = layers.Reshape((Timestep,channels*4),name='')(input) #reshapedinput = keras.layers.transpose_shape(reshapedinput,'channels_first',spatial_axes=(0,3)) xUp = layers.CuDNNLSTM((Timestep),unit_forget_bias=True, return_sequences=True)(reshapedinput) xDown = layers.CuDNNLSTM((Timestep),unit_forget_bias=True,go_backwards = True, return_sequences=True)(reshapedinput) xUp = layers.Reshape((int(imageHeight),int(imageWidth),Timestep),name='')(xUp) xDown = layers.Reshape((int(imageHeight),int(imageWidth),Timestep),name='')(xDown) concatenate = layers.concatenate(inputs = [xUp,xDown],axis=-1) channels = Timestep return [concatenate,imageHeight,imageWidth,channels]
def HorisontalySweepLayer(*args): imageHeight = args[0] imageWidth = args[1] channels = args[2] input = args[3] Timestep = int(imageHeight * imageWidth*2); input = layers.Lambda(Model.rotateMatrix)(input) reshapedinput = layers.Reshape((Timestep,channels),name='')(input) xUp = layers.CuDNNLSTM(int(channels/2),unit_forget_bias=True, return_sequences=True)(reshapedinput) xDown = layers.CuDNNLSTM(int(channels/2),unit_forget_bias=True,go_backwards = True, return_sequences=True)(reshapedinput) xUp = layers.Reshape((int(imageHeight),int(imageWidth),channels),name='')(xUp) xDown = layers.Reshape((int(imageHeight),int(imageWidth),channels),name='')(xDown) concatenate = layers.concatenate(inputs = [xUp,xDown],axis=-1) concatenate = layers.Lambda(Model.rotateMatrix)(concatenate) channels = channels return [concatenate,imageHeight,imageWidth,channels]
def build_controller(self): """ Builds controller computational graph """ with tf.variable_scope("Controller"): input_layer = layers.Input(shape = INPUT_SHAPE) initializer = initializers.RandomUniform(minval=-0.1, maxval=0.1, seed=None) input_layers = [input_layer] hidden_layers = [] output_softmaxes = [] for i in range(N_SUBPOL): hidden_layers.append(layers.CuDNNLSTM(units = N_UNITS, kernel_initializer = initializer)(input_layers[-1])) output_layer = [] for j in range(N_OPS): name = "subpol_{}_operation_{}".format(i + 1, j + 1) output_layer.extend([ layers.Dense(N_TYPES, activation ='softmax', name = name + '_type', kernel_initializer = initializer)(hidden_layers[-1]), layers.Dense(N_PROBS, activation ='softmax', name = name + '_prob', kernel_initializer = initializer)(hidden_layers[-1]), layers.Dense(N_MAG, activation ='softmax', name = name + '_magn', kernel_initializer = initializer)(hidden_layers[-1]) ]) output_softmaxes.append(output_layer) input_layers.append(layers.Lambda(expand_dims)(layers.Concatenate()(output_layer))) output_list = [item for sublist in output_softmaxes for item in sublist] model = models.Model(input_layer, output_list) exists = os.path.isfile(os.path.join(LOG_DIR, "controller_model", "model.json")) if not exists: model_json = model.to_json() # Converts model to JSON with open(os.path.join(LOG_DIR, "controller_model", "model.json"), "w") as json_file: json_file.write(model_json) # Write to file return model
def build_model(verbose = False, compile = True): sequence_input = L.Input(shape=(maxlen,), dtype='int32') embedding_layer = L.Embedding(len(word_index) + 1, 300, weights=[embedding_matrix], input_length=maxlen, trainable=False) x = embedding_layer(sequence_input) x = L.SpatialDropout1D(0.2)(x) x = L.Bidirectional(L.CuDNNLSTM(64, return_sequences=True))(x) att = Attention(maxlen)(x) avg_pool1 = L.GlobalAveragePooling1D()(x) max_pool1 = L.GlobalMaxPooling1D()(x) x = L.concatenate([att,avg_pool1, max_pool1]) preds = L.Dense(1, activation='sigmoid')(x) model = Model(sequence_input, preds) if verbose: model.summary() if compile: model.compile(loss='binary_crossentropy',optimizer=Adam(0.005),metrics=['acc']) return model
def VerticalysweepUpscaleLayer(*args): imageHeight = args[0] imageWidth = args[1] channels = args[2] input = args[3] Timestep = int(imageHeight * imageWidth); reshapedinput = layers.Reshape((Timestep,int(channels*2)),name='')(input) xUp = layers.CuDNNLSTM(int(channels),unit_forget_bias=True, return_sequences=True)(reshapedinput) xDown = layers.CuDNNLSTM(int(channels),unit_forget_bias=True,go_backwards = True, return_sequences=True)(reshapedinput) xUp = layers.Reshape((int(imageHeight*2),int(imageWidth*2),int(channels/4)),name='')(xUp) xDown = layers.Reshape((int(imageHeight*2),int(imageWidth*2),int(channels/4)),name='')(xDown) concatenate = layers.concatenate(inputs = [xUp,xDown],axis=-1) channels = int(channels/2) imageHeight = imageHeight*2 imageWidth = imageWidth *2 return [concatenate,imageHeight,imageWidth,channels]
def get_lstm(self, size, return_sequences=True, name='monkeys'): if tf.test.is_gpu_available(): return layers.CuDNNLSTM(size, return_sequences=return_sequences, name=name) else: return layers.LSTM(size, return_sequences=return_sequences, name=name)
def verticalSweepLayer(input,filter,Scale,Down): height = input._keras_shape[1] width = input._keras_shape[2] Channels = input._keras_shape[3] Scale = Scale * 2 Timestep = int(width * height*(1/Scale)); reshapedinput = layers.Reshape((int(Timestep),int(Channels*Scale)),name='')(input) if(Down): xUp = layers.CuDNNLSTM(int(filter/2),unit_forget_bias=True, return_sequences=True)(reshapedinput) xDown = layers.CuDNNLSTM(int(filter/2),unit_forget_bias=True,go_backwards = True, return_sequences=True)(reshapedinput) xUp = layers.Reshape((int(height*(2/Scale)),int(width*(2/Scale)),int(filter/2)),name='')(xUp) xDown = layers.Reshape((int(height*(2/Scale)),int(width*(2/Scale)),int(filter/2)),name='')(xDown) else: xUp = layers.CuDNNLSTM(int(filter*Scale*2),unit_forget_bias=True, return_sequences=True)(reshapedinput) xDown = layers.CuDNNLSTM(int(filter*Scale*2),unit_forget_bias=True,go_backwards = True, return_sequences=True)(reshapedinput) xUp = layers.Reshape((int(height*(Scale/2)),int(width*(Scale/2)),int(filter/2)),name='')(xUp) xDown = layers.Reshape((int(height*(Scale/2)),int(width*(Scale/2)),int(filter/2)),name='')(xDown) concatenate = layers.concatenate(inputs = [xUp,xDown],axis=-1) return concatenate
def build_model(self): """Build an actor (policy) model that maps states -> actions.""" # Define input layer (state) states = layers.Input(shape=(self.state_size,), name='states') # Reshape action repeats into timesteps for recurrent layer reshape = layers.Reshape((9, 3))(states) # Add hidden layers net = layers.CuDNNLSTM(units=16, return_sequences=True)(reshape) net = layers.CuDNNLSTM(units=32)(net) net = layers.Dense(units=32, kernel_regularizer=regularizers.l2(0.01))(net) net = layers.BatchNormalization()(net) net = layers.LeakyReLU(alpha=0.1)(net) net = layers.Dense(units=64, kernel_regularizer=regularizers.l2(0.01))(net) net = layers.BatchNormalization()(net) net = layers.LeakyReLU(alpha=0.1)(net) # Try different layer sizes, activations, add batch normalization, regularizers, etc. # Add final output layer with sigmoid activation raw_actions = layers.Dense(units=self.action_size, activation='sigmoid', name='raw_actions')(net) actions = layers.Lambda(lambda x: (x * self.action_range) + self.action_low, name='actions')(raw_actions) # Create Keras model self.model = models.Model(inputs=states, outputs=actions) # Define loss function using action value (Q value) gradients action_gradients = layers.Input(shape=(self.action_size,)) loss = K.mean(-action_gradients * actions) # Incorporate any additional losses here (e.g. from regularizers) # Define optimizer and training function optimizer = optimizers.Adam(lr=self.lr) updates_op = optimizer.get_updates(params=self.model.trainable_weights, loss=loss) self.train_fn = K.function( inputs=[self.model.input, action_gradients, K.learning_phase()], outputs=[], updates=updates_op)
def RNN_LARGE(input_shape, test_type, num_classes): inputs = layers.Input(shape=input_shape) x = layers.CuDNNLSTM(units=256, return_sequences=True, name='rnn_layer_1')(inputs) x = layers.CuDNNLSTM(units=512, return_sequences=True, name='rnn_layer_2')(x) x = layers.CuDNNLSTM(units=1024, return_sequences=False, name='rnn_out')(x) x = layers.Dense(num_classes, name='logits')(x) if test_type == 'sgc': output_activation = 'softmax' elif test_type == 'mgc': output_activation = 'sigmoid' elif test_type in ['cos', 'mse']: output_activation = 'linear' pred = layers.Activation(output_activation, name=output_activation)(x) return Model(inputs=inputs, outputs=pred)
def build_model(self): """Build a critic (value) network that maps (state, action) pairs -> Q-values.""" # Define input layers states = layers.Input(shape=(self.state_size,), name='states') actions = layers.Input(shape=(self.action_size,), name='actions') # Add hidden layers for state pathway reshape = layers.Reshape((9, 3))(states) net_states = layers.CuDNNLSTM(units=16)(reshape) net_states = layers.Dense(units=32)(states) net_states = layers.BatchNormalization()(net_states) net_states = layers.LeakyReLU(alpha=0.3)(net_states) net_states = layers.Dense(units=64)(net_states) net_states = layers.BatchNormalization()(net_states) net_states = layers.LeakyReLU(alpha=0.3)(net_states) # Add hidden layers for action pathway net_actions = layers.Dense(units=32)(actions) net_actions = layers.BatchNormalization()(net_actions) net_actions = layers.LeakyReLU(alpha=0.3)(net_actions) net_actions = layers.Dense(units=64)(net_actions) net_actions = layers.BatchNormalization()(net_actions) net_actions = layers.LeakyReLU(alpha=0.3)(net_actions) # Try different layer sizes, activations, add batch normalization, regularizers, etc. # Combine state and action pathways net = layers.Add()([net_states, net_actions]) net = layers.Activation('relu')(net) # Add more layers to the combined network if needed net = layers.Dense(units=32)(net) net = layers.BatchNormalization()(net) net = layers.Activation('relu')(net) # Add final output layer to produce action values (Q values) Q_values = layers.Dense(units=1, name='q_values')(net) # Create Keras model self.model = models.Model(inputs=[states, actions], outputs=Q_values) # Define optimizer and compile model for training with built-in loss function optimizer = optimizers.Adam(lr=self.lr) self.model.compile(optimizer=optimizer, loss='mse') # Compute action gradients (derivative of Q values w.r.t actions) action_gradients = K.gradients(Q_values, actions) # Define an addition function to fetch action gradients (to be used by actor model) self.get_action_gradients = K.function( inputs=[*self.model.input, K.learning_phase()], outputs=action_gradients)
def create_model(self): # Implementation note: Keras requires an i nput. I create an input and then feed # zeros to the network. Ugly, but it's the same as disabling those weights. # Furthermore, Keras LSTM input=output, so we cannot produce more than SUBPOLICIES # outputs. This is not desirable, since the paper produces 25 subpolicies in the # end. with tf.variable_scope("Controller"): input_layer = layers.Input(shape=INPUT_SHAPE) initializer = initializers.RandomUniform(minval=-0.1, maxval=0.1, seed=None) input_layers = [input_layer] hidden_layers = [] output_softmaxes = [] for i in range(5): hidden_layers.append( layers.CuDNNLSTM(units=100, kernel_initializer=initializer)( input_layers[-1])) output_layer = [] for j in range(2): name = "subpol_{}_operation_{}".format(i + 1, j + 1) output_layer.extend([ layers.Dense(OP_TYPES, activation='softmax', name=name + '_type', kernel_initializer=initializer)( hidden_layers[-1]), layers.Dense(OP_PROBS, activation='softmax', name=name + '_prob', kernel_initializer=initializer)( hidden_layers[-1]), layers.Dense(OP_MAGNITUDES, activation='softmax', name=name + '_magn', kernel_initializer=initializer)( hidden_layers[-1]) ]) output_softmaxes.append(output_layer) input_layers.append( layers.Lambda(expand_dims)( layers.Concatenate()(output_layer))) output_list = [ item for sublist in output_softmaxes for item in sublist ] model = models.Model(input_layer, output_list) return model '''
def __init__(self, use_cudnn_lstm=True, plot_model_architecture=False): n_hidden = 50 input_dim = 300 # unit_forget_bias: Boolean. If True, add 1 to the bias of the forget gate at initialization. Setting it to true will also force bias_initializer="zeros". This is recommended in Jozefowicz et al. # he_normal: Gaussian initialization scaled by fan_in (He et al., 2014) if use_cudnn_lstm: # Use CuDNNLSTM instead of LSTM, because it is faster lstm = layers.CuDNNLSTM(n_hidden, unit_forget_bias=True, kernel_initializer='he_normal', kernel_regularizer='l2', name='lstm_layer') else: lstm = layers.LSTM(n_hidden, unit_forget_bias=True, kernel_initializer='he_normal', kernel_regularizer='l2', name='lstm_layer') # Building the left branch of the model: inputs are variable-length sequences of vectors of size 128. left_input = Input(shape=(None, input_dim), name='input_1') # left_masked_input = layers.Masking(mask_value=0)(left_input) left_output = lstm(left_input) # Building the right branch of the model: when you call an existing layer instance, you reuse its weights. right_input = Input(shape=(None, input_dim), name='input_2') # right_masked_input = layers.Masking(mask_value=0)(right_input) right_output = lstm(right_input) # Builds the classifier on top l1_norm = lambda x: 1 - K.abs(x[0] - x[1]) merged = layers.merge([left_output, right_output], mode=l1_norm, output_shape=lambda x: x[0], name='L1_distance') predictions = layers.Dense(1, activation='sigmoid', name='Similarity_layer')(merged) # Instantiating and training the model: when you train such a model, the weights of the LSTM layer are updated based on both inputs. self.model = Model([left_input, right_input], predictions) self.__compile() print(self.model.summary()) if plot_model_architecture: from keras.utils import plot_model plot_model(self.model, to_file='siamese_architecture.png')
def build_model(sentenceLength, word_index, verbose=False, compile=True): sequence_input = L.Input(shape=(sentenceLength, ), dtype='int32') print(sequence_input[0]) topic_sequence_input = L.Input(shape=(sentenceLength, ), dtype='int32') print(topic_sequence_input.shape) embedding_layer = L.Embedding(len(word_index) + 1, 300, weights=[embedding_matrix], input_length=sentenceLength, trainable=False) print(embedding_layer) topic_embedding_layer = L.Embedding(len(word_index) + 1, 300, weights=[embedding_matrix], input_length=sentenceLength, trainable=False) x = embedding_layer(sequence_input) topic_x = topic_embedding_layer(topic_sequence_input) att_x = Attention(sentenceLength)([x, topic_x]) topic_mean_x = Lambda(topic_mean, output_shape=topic_mean_output_shape)(topic_x) distance = Lambda(cosine_distance, output_shape=cos_dist_output_shape)( [att_x, topic_mean_x]) x = concatenate([att_x, distance]) # att=K.Dropout(0.15)(att) x = L.Bidirectional(L.CuDNNLSTM(128, return_sequences=True))(x) avg_pool1 = L.GlobalAveragePooling1D()(x) max_pool1 = L.GlobalMaxPooling1D()(x) x = L.concatenate([avg_pool1, max_pool1]) preds = L.Dense(3, activation='sigmoid')(x) model = Model(inputs=[sequence_input, topic_sequence_input], outputs=preds) if verbose: model.summary() if compile: model.compile(loss='binary_crossentropy', optimizer=Adam(0.005), metrics=['accuracy']) return model
def build_model(sentenceLength , word_index ,): maxlen = 150 embed_size = 300 max_features = 100000 sequence_input = L.Input ( shape = (sentenceLength ,) , dtype = 'int32' ) print ( sequence_input[ 0 ] ) topic_sequence_input = L.Input ( shape = (sentenceLength ,) , dtype = 'int32' ) print ( topic_sequence_input.shape ) embedding_layer = L.Embedding ( len ( word_index ) + 1 , 300 , weights = [ embedding_matrix ] , input_length = sentenceLength , trainable = False ) print ( embedding_layer ) topic_embedding_layer = L.Embedding ( len ( word_index ) + 1 , 300 , weights = [ embedding_matrix ] , input_length = sentenceLength , trainable = False ) x = embedding_layer ( sequence_input ) topic_x = topic_embedding_layer ( topic_sequence_input ) topic_mean_x=Lambda (topic_mean,output_shape = topic_mean_output_shape)(topic_x) distance = Lambda ( cosine_distance , output_shape = cos_dist_output_shape ) ( [ x , topic_mean_x ]) x = concatenate ( [ x , distance ] ) #dropout = 0.15 , recurrent_dropout = 0.15 ) x = Bidirectional ( L.CuDNNLSTM ( 96 , return_sequences = True ) ) ( x ) x = Conv1D ( 64 , kernel_size = 3 , padding = "valid" , kernel_initializer = "glorot_uniform" ) ( x ) avg_pool = GlobalAveragePooling1D ( ) ( x ) max_pool = GlobalMaxPooling1D ( ) ( x ) x = concatenate ( [ avg_pool , max_pool ] ) preds = Dense ( 3 , activation = "sigmoid" ) ( x ) print ( preds.shape ) model = Model ( inputs = [ sequence_input , topic_sequence_input ] , outputs = preds ) model.compile ( loss = 'binary_crossentropy' , optimizer = Adam ( lr = 1e-3 ) , metrics = [ 'accuracy' ] ) return model
def build_model(): maxlen = 150 embed_size = 300 max_features = 100000 inp = Input(shape=(maxlen, )) x = Embedding(max_features, embed_size)(inp) # maxlen=200 as defined earlier #dropout = 0.15 , recurrent_dropout = 0.15 ) x = Bidirectional(L.CuDNNLSTM(96, return_sequences=True))(x) x = Conv1D(64, kernel_size=3, padding="valid", kernel_initializer="glorot_uniform")(x) avg_pool = GlobalAveragePooling1D()(x) max_pool = GlobalMaxPooling1D()(x) x = concatenate([avg_pool, max_pool]) preds = Dense(3, activation="sigmoid")(x) model = Model(inp, preds) model.compile(loss='binary_crossentropy', optimizer=Adam(lr=1e-3), metrics=['accuracy']) return model
def build_model(embedding_matrix, word_index, verbose=False, compile=True): logger.info('Build model') sequence_input = L.Input(shape=(MAX_LEN, ), dtype='int32') embedding_layer = L.Embedding(*embedding_matrix.shape, weights=[embedding_matrix], trainable=False) x = embedding_layer(sequence_input) x = L.SpatialDropout1D(0.2)(x) x = L.Bidirectional(L.CuDNNLSTM(LSTM_UNITS, return_sequences=True))(x) att = Attention(MAX_LEN)(x) avg_pool1 = L.GlobalAveragePooling1D()(x) max_pool1 = L.GlobalMaxPooling1D()(x) x = L.concatenate([att, avg_pool1, max_pool1]) preds = L.Dense(1, activation='sigmoid')(x) model = Model(sequence_input, preds) if verbose: model.summary() if compile: model.compile(loss='binary_crossentropy', optimizer=Adam(0.005), metrics=['acc']) return model
dtype='int64', name='prehist_tracks_input') x2 = track_embed(prehist_tracks_input) x2 = track_bn(x2) x2 = track_transformer(x2) topred_tracks_input = kl.Input(shape=(None, ), dtype='int64', name='topred_tracks_input') x3 = track_embed(topred_tracks_input) x3 = track_bn(x3) x3 = track_transformer(x3) x = kl.concatenate([x1, x2], axis=-1) lstm1 = kl.Bidirectional( kl.CuDNNLSTM(64, return_sequences=False, return_state=False, name='lstm1')) prehist_sc_1 = lstm1(x) x = kl.concatenate([x2, x3], axis=1) lstm2 = kl.Bidirectional( kl.CuDNNLSTM(64, return_sequences=False, return_state=False, name='lstm2')) prehist_sc_2 = lstm2(x) prehist_sc = kl.concatenate([prehist_sc_1, prehist_sc_2]) def repeat_vector(args): layer_to_repeat = args[0] sequence_layer = args[1] return kl.RepeatVector(K.shape(sequence_layer)[1])(layer_to_repeat)
written_train0 = written_train.reshape(written_train.shape[0], img_height, img_width, 1) written_test0 = written_test.reshape(written_test.shape[0], img_height, img_width, 1) # ### Model Building: # We choose multi model approach with lstm and Cnn based models used for speak and image respectively. And concatenated the both model output then apply binary cross entropy loss # In[5]: # a single input layer input1 = Input(shape=(max_len_speak_frames, speak_frame_feature)) # x1 =layers.LSTM(40, activation="relu", dropout=0.25, recurrent_dropout=0.25)(input1) x1 = layers.CuDNNLSTM(50)(input1) x1 = layers.BatchNormalization()(x1) x1 = layers.Activation('relu')(x1) x1 = layers.Dropout(0.2)(x1) x1 = layers.Dense(256)(x1) x1 = layers.BatchNormalization()(x1) x1 = layers.Activation('relu')(x1) x1 = layers.Dropout(0.2)(x1) x1 = layers.Dense(128, activation="relu")(x1) input2 = Input(shape=(img_height, img_width, 1)) x2 = layers.Conv2D(32, kernel_size=(3, 3))(input2) x2 = layers.BatchNormalization()(x2) x2 = layers.Activation('relu')(x2) x2 = layers.Dropout(0.1)(x2)
# In[49]: plot_train_validation_loss(history, 'Dense') # ## CuDNNLSTM # ### 학습하기(CuDNNLSTM) # In[51]: start_time = time.time() model = Sequential() model.add( layers.Flatten(input_shape=(lookback // step, weather_df_value.shape[-1]))) model.add(layers.CuDNNLSTM(32, input_shape=(None, weather_df_value.shape[-1]))) model.add(layers.Dense(1)) model.compile(optimizer=RMSprop(), loss='mae') history = model.fit_generator(train_gen, steps_per_epoch=400, epochs=20, validation_data=val_gen, validation_steps=val_steps) print("--- %s seconds ---" % (time.time() - start_time)) # ### Train Loss, Validation Loss 그래프 # In[ ]:
def build_keras_model(word_embedding_dims, num_words_name, emb_matrix_name, max_seq_len_name, num_words_item_desc, emb_matrix_item_desc, max_seq_len_item_desc, cat_embedding_dims, num_categories, num_brands): cond_input = kl.Input(shape=(1, ), name='cond_input') ship_input = kl.Input(shape=(1, ), name='ship_input') category_input = kl.Input(shape=(1, ), name='category_input') brand_input = kl.Input(shape=(1, ), name='brand_input') item_desc_input = kl.Input(shape=(max_seq_len_item_desc, ), name='item_desc_input') name_input = kl.Input(shape=(max_seq_len_name, ), name='name_input') item_desc_embedding = kl.Embedding(num_words_item_desc, word_embedding_dims, weights=[emb_matrix_item_desc], trainable=True, name='item_desc_embedding') item_desc_embedding_dropout = kl.SpatialDropout1D( 0.5, name='item_desc_embedding_dropout') item_desc_lstm_1 = kl.CuDNNLSTM(units=200, name='item_desc_lstm_1', return_sequences=True) item_desc_lstm_2 = kl.CuDNNLSTM(units=200, name='item_desc_lstm_2') item_desc_lstm_dropout = kl.Dropout(0.5, name='item_desc_lstm_dropout') name_embedding = kl.Embedding(num_words_name, word_embedding_dims, weights=[emb_matrix_name], trainable=True, name='name_embedding') name_embedding_dropout = kl.SpatialDropout1D(0.5, name='name_embedding_dropout') name_lstm_1 = kl.CuDNNLSTM(units=100, name='name_lstm_1', return_sequences=True) name_lstm_2 = kl.CuDNNLSTM(units=100, name='name_lstm_2') name_lstm_dropout = kl.Dropout(0.5, name='name_lstm_dropout') category_embedding = kl.Embedding(num_categories, cat_embedding_dims, name='category_embedding') category_embedding_dropout = kl.Dropout(0.5, name='category_embedding_dropout') category_reshape = kl.Reshape(target_shape=(cat_embedding_dims, ), name='category_reshape') brand_embedding = kl.Embedding(num_brands, cat_embedding_dims, name='brand_embedding') brand_embedding_dropout = kl.Dropout(0.5, name='brand_embedding_dropout') brand_reshape = kl.Reshape(target_shape=(cat_embedding_dims, ), name='brand_reshape') input_fusion = kl.Concatenate(axis=1, name='input_fusion') fusion_dense_1 = kl.Dense(400, activation='relu', name='fusion_dense_1') # fusion_dropout_1 = kl.Dropout(0.1, name='fusion_dropout_1') fusion_dense_2 = kl.Dense(200, activation='relu', name='fusion_dense_2') fusion_dense_3 = kl.Dense(1, activation='relu', name='fusion_dense_3') item_desc_output = item_desc_embedding(item_desc_input) item_desc_output = item_desc_embedding_dropout(item_desc_output) item_desc_output = item_desc_lstm_1(item_desc_output) item_desc_output = item_desc_lstm_2(item_desc_output) item_desc_output = item_desc_lstm_dropout(item_desc_output) name_output = name_embedding(name_input) name_output = name_embedding_dropout(name_output) name_output = name_lstm_1(name_output) name_output = name_lstm_2(name_output) name_output = name_lstm_dropout(name_output) category_output = category_embedding(category_input) category_output = category_embedding_dropout(category_output) category_output = category_reshape(category_output) brand_output = brand_embedding(brand_input) brand_output = brand_embedding_dropout(brand_output) brand_output = brand_reshape(brand_output) output = input_fusion([ cond_input, ship_input, name_output, item_desc_output, category_output, brand_output ]) output = fusion_dense_1(output) # output = fusion_dropout_1(output) output = fusion_dense_2(output) prediction = fusion_dense_3(output) model = km.Model(inputs=[ cond_input, ship_input, category_input, brand_input, name_input, item_desc_input ], outputs=prediction) return model
test_data = all_data[467:584, 0:396900] test_labels = all_data[467:584, 396900:396904, 0] cnn = keras.models.Sequential() cnn.add( layers.Conv1D(2, kernel_size=(1), strides=(1), activation='relu', input_shape=(396900, 1))) cnn.add(layers.MaxPooling1D(pool_size=(2), strides=(2))) cnn.add(layers.Conv1D(8, (1), activation='relu')) cnn.add(layers.MaxPooling1D(pool_size=(2))) cnn.add(layers.CuDNNLSTM(12, input_shape=(396900, 1))) #cnn.add(layers.CuDNNGRU(12)) cnn.add(layers.Dense(4, activation='softmax')) cnn.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['acc']) history = cnn.fit(train_data, train_labels, epochs=5, batch_size=4, validation_data=(test_data, test_labels)) #results = models.evaluate(test_data, test_targets) history_dict = history.history epochs = range(1, 2 + 1) train_loss = history_dict['loss']
def compile_elmo(self): """ Compiles a Language Model RNN based on the given parameters """ if self.parameters.get('token_encoding') == 'word': # Train word embeddings from scratch word_inputs = layers.Input(shape=(None, ), name='word_indices', dtype='int32') embedding = layers.Embedding( input_dim=self.parameters.get('vocab_size'), output_dim=self.parameters.get('hidden_units_size'), trainable=True, name='token_encoding') inputs = embedding(word_inputs) # Token embeddings for Input drop_inputs = layers.SpatialDropout1D( self.parameters.get('dropout_rate'))(inputs) lstm_inputs = TimeStepDropout( self.parameters.get('word_dropout_rate'))(drop_inputs) # Pass outputs as inputs to apply sampled softmax next_ids = layers.Input(shape=(None, 1), name='next_ids', dtype='float32') previous_ids = layers.Input(shape=(None, 1), name='previous_ids', dtype='float32') elif self.parameters.get('token_encoding') == 'char': # Train character-level representation word_inputs = layers.Input( shape=(None, self.parameters.get('token_maxlen')), dtype='int32', name='char_indices') inputs = self.char_level_token_encoder()(word_inputs) # Token embeddings for Input drop_inputs = layers.SpatialDropout1D( self.parameters.get('dropout_rate'))(inputs) lstm_inputs = TimeStepDropout( self.parameters.get('word_dropout_rate'))(inputs) # Pass outputs as inputs to apply sampled softmax next_ids = layers.Input(shape=(None, 1), name='next_ids', dtype='float32') previous_ids = layers.Input(shape=(None, 1), name='previous_ids', dtype='float32') # Reversed input for backward LSTMs re_lstm_inputs = layers.Lambda(function=ELMo.reverse)(lstm_inputs) mask = layers.Lambda(function=ELMo.reverse)(drop_inputs) # Forward LSTMs for i in range(self.parameters.get('n_lstm_layers')): if self.parameters['cuDNN']: lstm = layers.CuDNNLSTM( units=self.parameters.get('lstm_units_size'), return_sequences=True, kernel_constraint=constraints.MinMaxNorm( -1 * self.parameters.get('cell_clip'), self.parameters('cell_clip')), recurrent_constraint=constraints.MinMaxNorm( -1 * self.parameters.get('cell_clip'), self.parameters.get('cell_clip')))(lstm_inputs) else: lstm = layers.LSTM( units=self.parameters.get('lstm_units_size'), return_sequences=True, activation='tanh', recurrent_activation='sigmoid', kernel_constraint=constraints.MinMaxNorm( -1 * self.parameters.get('cell_clip'), self.parameters.get('cell_clip')), recurrent_constraint=constraints.MinMaxNorm( -1 * self.parameters.get('cell_clip'), self.parameters.get('cell_clip')))(lstm_inputs) lstm = Camouflage(mask_value=0)(inputs=[lstm, drop_inputs]) # Projection to hidden_units_size proj = layers.TimeDistributed( layers.Dense(self.parameters.get('hidden_units_size'), activation='linear', kernel_constraint=constraints.MinMaxNorm( -1 * self.parameters.get('proj_clip'), self.parameters.get('proj_clip'))))(lstm) # Merge Bi-LSTMs feature vectors with the previous ones lstm_inputs = layers.add([proj, lstm_inputs], name='f_block_{}'.format(i + 1)) # Apply variational drop-out between BI-LSTM layers lstm_inputs = layers.SpatialDropout1D( self.parameters.get('dropout_rate'))(lstm_inputs) # Backward LSTMs for i in range(self.parameters.get('n_lstm_layers')): if self.parameters['cuDNN']: re_lstm = layers.CuDNNLSTM( units=self.parameters.get('lstm_units_size'), return_sequences=True, kernel_constraint=constraints.MinMaxNorm( -1 * self.parameters.get('cell_clip'), self.parameters.get('cell_clip')), recurrent_constraint=constraints.MinMaxNorm( -1 * self.parameters('cell_clip'), self.parameters.get('cell_clip')))(re_lstm_inputs) else: re_lstm = layers.LSTM( units=self.parameters.get('lstm_units_size'), return_sequences=True, activation='tanh', recurrent_activation='sigmoid', kernel_constraint=constraints.MinMaxNorm( -1 * self.parameters.get('cell_clip'), self.parameters.get('cell_clip')), recurrent_constraint=constraints.MinMaxNorm( -1 * self.parameters.get('cell_clip'), self.parameters.get('cell_clip')))(re_lstm_inputs) re_lstm = Camouflage(mask_value=0)(inputs=[re_lstm, mask]) # Projection to hidden_units_size re_proj = layers.TimeDistributed( layers.Dense(self.parameters.get('hidden_units_size'), activation='linear', kernel_constraint=constraints.MinMaxNorm( -1 * self.parameters.get('proj_clip'), self.parameters.get('proj_clip'))))(re_lstm) # Merge Bi-LSTMs feature vectors with the previous ones re_lstm_inputs = layers.add([re_proj, re_lstm_inputs], name='b_block_{}'.format(i + 1)) # Apply variational drop-out between BI-LSTM layers re_lstm_inputs = layers.SpatialDropout1D( self.parameters.get('dropout_rate'))(re_lstm_inputs) # Reverse backward LSTMs' outputs = Make it forward again re_lstm_inputs = layers.Lambda(function=ELMo.reverse, name='reverse')(re_lstm_inputs) # Project to Vocabulary with Sampled Softmax sampled_softmax = SampleSoftmax( num_classes=self.parameters.get('vocab_size'), num_sampled=int(self.parameters.get('num_sampled')), tied_to=embedding if self.parameters.get('weight_tying') and self.parameters.get('token_encoding') == 'word' else None) outputs = sampled_softmax([lstm_inputs, next_ids]) re_outputs = sampled_softmax([re_lstm_inputs, previous_ids]) self._model = models.Model( inputs=[word_inputs, next_ids, previous_ids], outputs=[outputs, re_outputs]) self._model.compile(optimizer=optimizers.Adagrad( lr=self.parameters.get('lr'), clipvalue=self.parameters.get('clip_value')), loss=None) print(self._model.summary())
def gentext_diag(self, dirname='./sacredtexts', epochs=30, steps_per_epoch=200, genlength=30, maxbatch=1000, epoch_keep=1, temp_keep=1): model = keras.models.Sequential() model.add(layers.TimeDistributed(layers.Dense(28), input_shape=(self.lookback, len(self.tokens_unique)))) # model.add(layers.LeakyReLU(alpha=.001)) model.add(layers.CuDNNLSTM(64, input_shape=(self.lookback, len(self.tokens_unique)))) model.add(layers.Dropout(0.2, noise_shape=None, seed=None)) model.add(layers.Dense(len(self.tokens_unique), activation='softmax')) optimizer = keras.optimizers.Adam(lr=0.01) model.compile(loss='categorical_crossentropy', optimizer=optimizer) # When generating, temperature = 0.5 seems to work best. def sample(preds, temperature): preds = np.asarray(preds).astype('float64') preds = np.log(preds) / temperature exp_preds = np.exp(preds) preds = exp_preds / np.sum(exp_preds) probas = np.random.multinomial(1, preds, 1) return np.argmax(probas) for epoch in range(1, epochs): print('epoch', epoch) # Fit the model for 1 epoch on the available data model.fit_generator(self.generator(dirname=dirname, batch_size=maxbatch), steps_per_epoch=steps_per_epoch, epochs=1) # Select a text seed at random start_index = random.randint(0, len(self.tokens) - self.lookback - 1) generated_text = self.tokens[start_index: start_index + self.lookback] if self.kind == 'char': print('--- Generating with seed: "' + ''.join(generated_text) + '"') else: print('--- Generating with seed: "' + ' '.join(generated_text) + '"') for temperature in [0.5, 1.0]: print('------ temperature:', temperature) if self.kind == 'char': sys.stdout.write(''.join(generated_text)) else: sys.stdout.write(' '.join(generated_text)) for i in range(genlength): sampled = np.zeros((1, self.lookback, len(self.tokens_unique))) for t, token in enumerate(generated_text): sampled[0, t, self.token_indices[token]] = 1. preds = model.predict(sampled, verbose=0)[0] next_index = sample(preds, temperature) next_token = self.index_tokens[next_index] generated_text.append(next_token) generated_text = generated_text[1:] if self.kind == 'char': sys.stdout.write(next_token) else: sys.stdout.write(' ' + next_token) sys.stdout.flush() if epoch == epoch_keep and temperature == temp_keep: self.model = model print()
def retain(ARGS): '''Create the model''' #Define the constant for model saving reshape_size = ARGS.emb_size + ARGS.numeric_size if ARGS.allow_negative: embeddings_constraint = FreezePadding() beta_activation = 'tanh' output_constraint = None else: embeddings_constraint = FreezePadding_Non_Negative() beta_activation = 'sigmoid' output_constraint = non_neg() #Get available gpus , returns empty list if none glist = get_available_gpus() def reshape(data): '''Reshape the context vectors to 3D vector''' return K.reshape(x=data, shape=(K.shape(data)[0], 1, reshape_size)) #Code Input codes = L.Input((None, None), name='codes_input') inputs_list = [codes] #Calculate embedding for each code and sum them to a visit level codes_embs_total = L.Embedding( ARGS.num_codes + 1, ARGS.emb_size, name='embedding', embeddings_constraint=embeddings_constraint)(codes) codes_embs = L.Lambda(lambda x: K.sum(x, axis=2))(codes_embs_total) #Numeric input if needed if ARGS.numeric_size: numerics = L.Input((None, ARGS.numeric_size), name='numeric_input') inputs_list.append(numerics) full_embs = L.concatenate([codes_embs, numerics], name='catInp') else: full_embs = codes_embs #Apply dropout on inputs full_embs = L.Dropout(ARGS.dropout_input)(full_embs) #Time input if needed if ARGS.use_time: time = L.Input((None, 1), name='time_input') inputs_list.append(time) time_embs = L.concatenate([full_embs, time], name='catInp2') else: time_embs = full_embs #Setup Layers #This implementation uses Bidirectional LSTM instead of reverse order # (see https://github.com/mp2893/retain/issues/3 for more details) #If training on GPU and Tensorflow use CuDNNLSTM for much faster training if glist: alpha = L.Bidirectional(L.CuDNNLSTM(ARGS.recurrent_size, return_sequences=True), name='alpha') beta = L.Bidirectional(L.CuDNNLSTM(ARGS.recurrent_size, return_sequences=True), name='beta') else: alpha = L.Bidirectional(L.LSTM(ARGS.recurrent_size, return_sequences=True, implementation=2), name='alpha') beta = L.Bidirectional(L.LSTM(ARGS.recurrent_size, return_sequences=True, implementation=2), name='beta') alpha_dense = L.Dense(1, kernel_regularizer=l2(ARGS.l2)) beta_dense = L.Dense(ARGS.emb_size + ARGS.numeric_size, activation=beta_activation, kernel_regularizer=l2(ARGS.l2)) #Compute alpha, visit attention alpha_out = alpha(time_embs) alpha_out = L.TimeDistributed(alpha_dense, name='alpha_dense_0')(alpha_out) alpha_out = L.Softmax(axis=1)(alpha_out) #Compute beta, codes attention beta_out = beta(time_embs) beta_out = L.TimeDistributed(beta_dense, name='beta_dense_0')(beta_out) #Compute context vector based on attentions and embeddings c_t = L.Multiply()([alpha_out, beta_out, full_embs]) c_t = L.Lambda(lambda x: K.sum(x, axis=1))(c_t) #Reshape to 3d vector for consistency between Many to Many and Many to One implementations contexts = L.Lambda(reshape)(c_t) #Make a prediction contexts = L.Dropout(ARGS.dropout_context)(contexts) output_layer = L.Dense(1, activation='sigmoid', name='dOut', kernel_regularizer=l2(ARGS.l2), kernel_constraint=output_constraint) #TimeDistributed is used for consistency # between Many to Many and Many to One implementations output = L.TimeDistributed(output_layer, name='time_distributed_out')(contexts) #Define the model with appropriate inputs model = Model(inputs=inputs_list, outputs=[output]) return model
def create_sample_rnn(input_shape: Tuple[int], num_classes): model = models.Sequential() model.add(layers.CuDNNLSTM(32, input_shape=input_shape)) model.add(layers.Dense(num_classes, activation="softmax")) return model
def build(self, mode, config): print("# Building LSTM %s model --------------- #\n" % (mode)) # Model graph # Bbox coord: [Batch, TS, 4] input_coord = KL.Input(batch_shape=(config.BATCH_SIZE, config.TIME_STEPS, config.MRCNNBBOX_SIZE)) # Feature map: [Batch, TS, 1024] input_feat = KL.Input(batch_shape=(config.BATCH_SIZE, config.TIME_STEPS, config.FEATURE_SIZE)) # LSTM if mode == 'training': # Separate x_coord = KL.CuDNNLSTM( units=config.L1_CELL_SIZE, return_sequences=True, stateful=False )(input_coord) x_feat = KL.CuDNNLSTM( units=config.L1_CELL_SIZE, return_sequences=True, stateful=False )(input_feat) x_coord = KL.TimeDistributed(KL.Dense(2048, activation='relu'))(x_coord) x_coord = KL.TimeDistributed(KL.Dense(1024, activation='relu'))(x_coord) x_feat = KL.TimeDistributed(KL.Dense(2048, activation='relu'))(x_feat) x_feat = KL.TimeDistributed(KL.Dense(1024, activation='relu'))(x_feat) x = KL.Concatenate()([x_coord, x_feat]) delta = KL.TimeDistributed(KL.Dense(config.OUTPUT_SIZE, activation='tanh'))(x) out_bbox = KL.Add()([input_coord, delta]) elif mode == 'inference': # Separate x_coord = KL.CuDNNLSTM( units=config.L1_CELL_SIZE, return_sequences=False, stateful=False )(input_coord) x_feat = KL.CuDNNLSTM( units=config.L1_CELL_SIZE, return_sequences=False, stateful=False )(input_feat) x_coord = KL.Dense(2048, activation='relu')(x_coord) x_coord = KL.Dense(1024, activation='relu')(x_coord) x_feat = KL.Dense(2048, activation='relu')(x_feat) x_feat = KL.Dense(1024, activation='relu')(x_feat) x = KL.Concatenate()([x_coord, x_feat]) delta = KL.Dense(config.OUTPUT_SIZE, activation='tanh')(x) last_coord = KL.Lambda(lambda x: x[:, config.TIME_STEPS-1, :], output_shape=(4, ))(input_coord) out_bbox = KL.Add()([last_coord, delta]) # Create model model = KM.Model(inputs=[input_coord, input_feat], outputs=out_bbox, name='RRCNN') model.summary() if mode == 'training': # Initial loss function and optimizer adam = KO.Adam(config.LEARNING_RATE) model.compile(optimizer=adam, loss=smooth_l1_loss) plot_model(model, to_file='RRCNN.png', show_shapes=True, show_layer_names=False) return model