def train(neurons, hidden, act, epochs=10, repetition=0, summary=False): samples = int(1e6) h = 1 norms = np.random.uniform(0, 3, (samples, 1)) kn = gaussian(norms, h) X = norms y = kn inputs = layers.Input(shape=(1, )) x = layers.Dense(neurons, activation=act)(inputs) for i in range(hidden - 1): x = layers.Dense(neurons, activation=act)(x) outputs = layers.Dense(1, activation='linear')(x) save_path = "models/kernel/h{}/nn_{}_{}.h5".format(hidden, neurons, repetition) model = models.Model(inputs=inputs, outputs=outputs) early_stop = callbacks.EarlyStopping(monitor='val_mean_absolute_percentage_error', patience=10) check_point = callbacks.ModelCheckpoint(save_path, monitor='val_mean_absolute_percentage_error', save_best_only=True, mode='min') opt = optimizers.Adam(lr=1e-3, decay=1e-5) model.compile(optimizer=opt, loss='mean_squared_error', metrics=['mean_absolute_percentage_error']) if summary: model.summary() history = model.fit(X, y, epochs=epochs, batch_size=50, callbacks=[check_point, early_stop], validation_split=0.01) return models.load_model(save_path)
def _gru_ctc_init(self): self.input_data = layers.Input(name='the_input', shape=(self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH, 1)) layers_h1 = layers.Reshape((-1, 200))(self.input_data) layers_h2 = GRUCTCAM._dense(128, layers_h1) layers_h3 = GRUCTCAM._bi_gru(64, layers_h2) y_pred = GRUCTCAM._dense(self.OUTPUT_SIZE, layers_h3, activation='softmax') self.gru_model = models.Model(inputs=self.input_data, outputs=y_pred) self.labels = layers.Input(name='the_label', shape=[self.LABEL_SEQUENCE_LENGTH], dtype='float32') self.input_length = layers.Input(name='input_length', shape=[1], dtype='int64') self.label_length = layers.Input(name='label_length', shape=[1], dtype='int64') self.loss = layers.Lambda(function=self._ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, self.labels, self.input_length, self.label_length]) self.ctc_model = models.Model(inputs=[self.input_data, self.labels, self.input_length, self.label_length], outputs=self.loss) optimizer = optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, decay=0.0, epsilon=10e-8) self.ctc_model.compile(optimizer=optimizer, loss={'ctc': lambda y_true, y_pred: y_pred}) print('[*Info] Create Model Successful, Compiles Model Successful. ') return self.gru_model, self.ctc_model
def build_model(self): l2_regularization_kernel = 1e-5 # Input Layer input = layers.Input(shape=(self.state_size,), name='input_states') # Hidden Layers model = layers.Dense(units=300, kernel_regularizer=regularizers.l2(l2_regularization_kernel))(input) model = layers.BatchNormalization()(model) model = layers.LeakyReLU(1e-2)(model) model = layers.Dense(units=400, kernel_regularizer=regularizers.l2(l2_regularization_kernel))(model) model = layers.BatchNormalization()(model) model = layers.LeakyReLU(1e-2)(model) model = layers.Dense(units=200, kernel_regularizer=regularizers.l2(l2_regularization_kernel))(model) model = layers.BatchNormalization()(model) model = layers.LeakyReLU(1e-2)(model) # Our output layer - a fully connected layer output = layers.Dense(units=self.action_size, activation='tanh', kernel_regularizer=regularizers.l2(l2_regularization_kernel), kernel_initializer=initializers.RandomUniform(minval=-3e-3, maxval=3e-3), name='output_actions')(model) # Keras model self.model = models.Model(inputs=input, outputs=output) # Define loss and optimizer action_gradients = layers.Input(shape=(self.action_size,)) loss = K.mean(-action_gradients * output) optimizer = optimizers.Adam(lr=1e-4) update_operation = optimizer.get_updates(params=self.model.trainable_weights, loss=loss) self.train_fn = K.function(inputs=[self.model.input, action_gradients, K.learning_phase()], outputs=[], updates=update_operation)
def build_model(self): l2_kernel_regularization = 1e-5 # Define input layers input_states = layers.Input(shape=(self.state_size, ), name='input_states') input_actions = layers.Input(shape=(self.action_size, ), name='input_actions') # Hidden layers for states model_states = layers.Dense( units=32, kernel_regularizer=regularizers.l2(l2_kernel_regularization))( input_states) model_states = layers.BatchNormalization()(model_states) model_states = layers.LeakyReLU(1e-2)(model_states) model_states = layers.Dense( units=64, kernel_regularizer=regularizers.l2(l2_kernel_regularization))( model_states) model_states = layers.BatchNormalization()(model_states) model_states = layers.LeakyReLU(1e-2)(model_states) # Hidden layers for actions model_actions = layers.Dense( units=64, kernel_regularizer=regularizers.l2(l2_kernel_regularization))( input_actions) model_actions = layers.BatchNormalization()(model_actions) model_actions = layers.LeakyReLU(1e-2)(model_actions) # Both models merge here model = layers.add([model_states, model_actions]) # Fully connected and batch normalization model = layers.Dense(units=32, kernel_regularizer=regularizers.l2( l2_kernel_regularization))(model) model = layers.BatchNormalization()(model) model = layers.LeakyReLU(1e-2)(model) # Q values / output layer Q_values = layers.Dense( units=1, activation=None, kernel_regularizer=regularizers.l2(l2_kernel_regularization), kernel_initializer=initializers.RandomUniform(minval=-5e-3, maxval=5e-3), name='output_Q_values')(model) # Keras wrap the model self.model = models.Model(inputs=[input_states, input_actions], outputs=Q_values) optimizer = optimizers.Adam(lr=1e-2) self.model.compile(optimizer=optimizer, loss='mse') action_gradients = K.gradients(Q_values, input_actions) self.get_action_gradients = K.function( inputs=[*self.model.input, K.learning_phase()], outputs=action_gradients)
def build_model(self): """Build an actor (policy) network that maps states -> actions.""" # Define input layer (states) states = layers.Input(shape=(self.state_size, ), name='states') '''# Add hidden layers net = layers.Dense(units=32, activation='relu')(states) net = layers.Dense(units=64, activation='relu')(net) net = layers.Dense(units=32, activation='relu')(net) # Try different layer sizes, activations, add batch normalization, regularizers, etc. # Add final output layer with sigmoid activation raw_actions = layers.Dense(units=self.action_size, activation='sigmoid', name='raw_actions')(net) ''' ################################### # Add hidden layers net = layers.Dense(units=400, kernel_regularizer=regularizers.l2(1e-6))(states) net = layers.BatchNormalization()(net) net = layers.LeakyReLU(1e-2)(net) net = layers.Dense(units=300, kernel_regularizer=regularizers.l2(1e-6))(net) net = layers.BatchNormalization()(net) net = layers.LeakyReLU(1e-2)(net) # Add final output layer with sigmoid activation raw_actions = layers.Dense( units=self.action_size, activation='sigmoid', name='raw_actions', kernel_initializer=initializers.RandomUniform(minval=-0.003, maxval=0.003))(net) ####################################### # Scale [0, 1] output for each action dimension to proper range actions = layers.Lambda(lambda x: (x * self.action_range) + self.action_low, name='actions')(raw_actions) # Create Keras model self.model = models.Model(inputs=states, outputs=actions) # Define loss function using action value (Q value) gradients action_gradients = layers.Input(shape=(self.action_size, )) loss = K.mean(-action_gradients * actions) # Incorporate any additional losses here (e.g. from regularizers) # Define optimizer and training function optimizer = optimizers.Adam(lr=1e-6) updates_op = optimizer.get_updates(params=self.model.trainable_weights, loss=loss) self.train_fn = K.function( inputs=[self.model.input, action_gradients, K.learning_phase()], outputs=[], updates=updates_op)
def build_model(self): """Build an actor (policy) network that maps states -> actions.""" # Define input layer (states) states = layers.Input(shape=(self.state_size, ), name='states') #--------- copy from DDPG quadcopter ----------- net = layers.Dense(units=400)(states) # net = layers.BatchNormalization()(net) net = layers.Activation("relu")(net) net = layers.Dense(units=200)(net) # net = layers.BatchNormalization()(net) net = layers.Activation("relu")(net) actions = layers.Dense(units=self.action_size, activation='softmax', name='actions', kernel_initializer=initializers.RandomUniform( minval=-1, maxval=1))(net) # actions = layers.Dense(units=self.action_size, activation='sigmoid', name='actions', # kernel_initializer=initializers.RandomUniform(minval=-0.001, maxval=0.001))(net) # Add hidden layers # net = layers.Dense(units=16,activation=activations.sigmoid)(states) # net = layers.BatchNormalization()(net) # net = layers.Dense(units=16,activation=activations.sigmoid)(net) # net = layers.BatchNormalization()(net) # net = layers.Dense(units=128,activation=activations.relu)(net) # net = layers.BatchNormalization()(net) # Add final output layer with sigmoid activation # actions = layers.Dense(units=self.action_size, activation='linear', # sigmoid # name='raw_actions' )(net) # Scale [0, 1] output for each action dimension to proper range # actions = layers.Lambda(lambda x: (x * self.action_range) + self.action_low, # name='actions')(raw_actions) # Create Keras model self.model = models.Model(inputs=states, outputs=actions) action_gradients = layers.Input(shape=(self.action_size, )) loss = K.mean(-action_gradients * actions) # Define optimizer and training function optimizer = optimizers.Adam(lr=.0001) updates_op = optimizer.get_updates(params=self.model.trainable_weights, loss=loss) self.train_fn = K.function( inputs=[self.model.input, action_gradients, K.learning_phase()], outputs=[], updates=updates_op)
def main(_): train_dir, valid_dir, test_dir = dataset.prepare() num_train = len(utils.listdir(train_dir, recursive=True)) num_valid = len(utils.listdir(valid_dir, recursive=True)) num_test = len(utils.listdir(test_dir, recursive=True)) print('Training images: {:5}'.format(num_train)) print('Validation images: {:5}'.format(num_valid)) print('Test images: {:5}'.format(num_test)) model = create_model(FLAGS.dropout) model.summary() model.compile(optimizer=optimizers.Adam(FLAGS.learning_rate), loss='binary_crossentropy', metrics=['acc']) train_datagen = dataset.get_generator(train_dir, FLAGS.batch_size, augmentation=FLAGS.augmentation) valid_datagen = dataset.get_generator(valid_dir, FLAGS.batch_size, augmentation=False) test_datagen = dataset.get_generator(test_dir, FLAGS.batch_size, augmentation=False) res = model.fit_generator(train_datagen, steps_per_epoch=num_train // FLAGS.batch_size, epochs=FLAGS.epochs, validation_data=valid_datagen, validation_steps=num_valid // FLAGS.batch_size) model.save('cats_and_dogs_{}.h5'.format(num_train)) plots.show_accuracy(res.history['acc'], res.history['val_acc']) plots.show_loss(res.history['loss'], res.history['val_loss']) scores = model.evaluate_generator(test_datagen, steps=num_test // FLAGS.batch_size) print('Test result:') print('Loss: {} Accuracy: {}'.format(scores[0], scores[1]))
def build_model(self): states = layers.Input(shape=(self.state_size,), name='inputStates') # Hidden Layers model = layers.Dense(units=128, activation='linear')(states) model = layers.BatchNormalization()(model) model = layers.LeakyReLU(0.01)(model) model = layers.Dropout(0.3)(model) model = layers.Dense(units=256, activation='linear')(model) model = layers.BatchNormalization()(model) model = layers.LeakyReLU(0.01)(model) model = layers.Dropout(0.3)(model) model = layers.Dense(units=512, activation='linear')(model) model = layers.BatchNormalization()(model) model = layers.LeakyReLU(0.01)(model) model = layers.Dropout(0.3)(model) model = layers.Dense(units=128, activation='linear')(model) model = layers.BatchNormalization()(model) model = layers.LeakyReLU(0.01)(model) model = layers.Dropout(0.3)(model) output = layers.Dense( units=self.action_size, activation='tanh', kernel_regularizer=regularizers.l2(0.01), name='outputActions')(model) #Keras self.model = models.Model(inputs=states, outputs=output) #Definint Optimizer actionGradients = layers.Input(shape=(self.action_size,)) loss = K.mean(-actionGradients * output) optimizer = optimizers.Adam() update_operation = optimizer.get_updates(params=self.model.trainable_weights, loss=loss) self.train_fn = K.function( inputs=[self.model.input, actionGradients, K.learning_phase()], outputs=[], updates=update_operation)
def build_model(self): #Define input layers inputStates = layers.Input(shape=(self.state_size, ), name='inputStates') inputActions = layers.Input(shape=(self.action_size, ), name='inputActions') # Hidden layers for states modelS = layers.Dense(units=128, activation='linear')(inputStates) modelS = layers.BatchNormalization()(modelS) modelS = layers.LeakyReLU(0.01)(modelS) modelS = layers.Dropout(0.3)(modelS) modelS = layers.Dense(units=256, activation='linear')(modelS) modelS = layers.BatchNormalization()(modelS) modelS = layers.LeakyReLU(0.01)(modelS) modelS = layers.Dropout(0.3)(modelS) modelA = layers.Dense(units=256, activation='linear')(inputActions) modelA = layers.LeakyReLU(0.01)(modelA) modelA = layers.BatchNormalization()(modelA) modelA = layers.Dropout(0.5)(modelA) #Merging the models model = layers.add([modelS, modelA]) model = layers.Dense(units=256, activation='linear')(model) model = layers.BatchNormalization()(model) model = layers.LeakyReLU(0.01)(model) #Q Layer Qvalues = layers.Dense(units=1, activation=None, name='outputQvalues')(model) #Keras model self.model = models.Model(inputs=[inputStates, inputActions], outputs=Qvalues) optimizer = optimizers.Adam() self.model.compile(optimizer=optimizer, loss='mse') actionGradients = K.gradients(Qvalues, inputActions) self.get_action_gradients = K.function( inputs=[*self.model.input, K.learning_phase()], outputs=actionGradients)
def build_model(input_seq_len, output_seq_len, num_samples, multi_gpus=False): RNN = layers.LSTM encoder_layers = 1 decoder_layers = 2 hidden_dim = 200 model = models.Sequential() model.add( layers.TimeDistributed(layers.Dense(100, activation='relu'), input_shape=(input_seq_len, 1))) for _ in range(encoder_layers): model.add(RNN(hidden_dim, return_sequences=True)) model.add(RNN(hidden_dim, return_sequences=False)) model.add(layers.RepeatVector(output_seq_len)) for _ in range(decoder_layers): model.add(RNN(hidden_dim, return_sequences=True)) model.add(layers.TimeDistributed(layers.Dense(1))) decay = 1. / num_samples optimizer = optimizers.Adam(lr=0.1, decay=decay) def score_func(y_true, y_pred): y_true = tf.reduce_sum(y_true, axis=1) y_pred = tf.reduce_sum(y_pred, axis=1) mae = tf.reduce_sum(tf.abs(y_true - y_pred)) score = mae / tf.reduce_sum(y_true) return score if multi_gpus: model = keras.utils.multi_gpu_model(model, gpus=2) model.compile(loss='mean_squared_error', optimizer=optimizer, metrics=['mae']) print('model input shape: {0}'.format(model.input_shape)) print('model output shape: {0}'.format(model.output_shape)) return model
def train_and_validate(model, train_dataset, val_dataset): optimizer = optimizers.Adam(lr=LEARNING_RATE) loss = 'categorical_crossentropy' metrics = ['categorical_accuracy'] filepath = "saved_models/transfer_learning_epoch_{epoch:02d}_{val_categorical_accuracy:.4f}.h5" checkpoint = callbacks.ModelCheckpoint(filepath, monitor='val_categorical_accuracy', verbose=0, save_best_only=False) callbacks_list = [checkpoint] TRAIN_STEPS = 19302 // BATCH_SIZE #TODO change the numbers VAL_STEPS = 1927 // BATCH_SIZE model.compile(optimizer=optimizer, loss=loss, metrics=metrics) model.fit(train_dataset, epochs=NUM_EPOCHS, steps_per_epoch=TRAIN_STEPS, validation_data=val_dataset, validation_steps=VAL_STEPS, callbacks=callbacks_list)
def train(neurons, hidden=1, act='relu', epochs=10, repetition=0): samples = int(1e6) norms = np.random.uniform(0, 3, samples) veldiffs = np.random.uniform(0, 1, samples) dkn = dgaussian(norms, 1) cont = continuity(veldiffs, dkn) X = np.zeros((samples, 2)) X[:, 0] = norms / 3 X[:, 1] = veldiffs y = cont inputs = layers.Input(shape=(2, )) x = layers.Dense(neurons, activation=act)(inputs) for i in range(hidden - 1): x = layers.Dense(neurons, activation=act)(x) outputs = layers.Dense(1, activation='linear')(x) save_path = "models/continuity/h{}/nn_{}_{}.h5".format( hidden, neurons, repetition) model = models.Model(inputs=inputs, outputs=outputs) early_stop = callbacks.EarlyStopping(monitor='val_loss', patience=10) check_point = callbacks.ModelCheckpoint(save_path, monitor='val_loss', save_best_only=True, mode='min') opt = optimizers.Adam(lr=1e-3, decay=1e-5) model.compile(optimizer=opt, loss='mean_squared_error', metrics=['mean_absolute_percentage_error']) history = model.fit(X, y, epochs=epochs, batch_size=100, callbacks=[early_stop, check_point], validation_split=0.01) return models.load_model(save_path)
def main(_): train_dir, valid_dir, test_dir = dataset.prepare() num_train = len(utils.listdir(train_dir, recursive=True)) num_valid = len(utils.listdir(valid_dir, recursive=True)) num_test = len(utils.listdir(test_dir, recursive=True)) print('Training images: {:5}'.format(num_train)) print('Validation images: {:5}'.format(num_valid)) print('Test images: {:5}'.format(num_test)) model = create_model(FLAGS.dropout) model.summary() model.compile(optimizer=optimizers.Adam(FLAGS.learning_rate), loss='binary_crossentropy', metrics=['acc']) train_datagen = dataset.get_generator(train_dir, FLAGS.batch_size, augmentation=FLAGS.augmentation) valid_datagen = dataset.get_generator(valid_dir, FLAGS.batch_size, augmentation=False) test_datagen = dataset.get_generator(test_dir, FLAGS.batch_size, augmentation=False) res = model.fit_generator(train_datagen, steps_per_epoch=num_train // FLAGS.batch_size, epochs=FLAGS.epochs, validation_data=valid_datagen, validation_steps=num_valid // FLAGS.batch_size) model.save('cats_and_dogs_vgg16_{}.h5'.format(num_train)) utils.show_accuracy(res.history['acc'], res.history['val_acc']) utils.show_loss(res.history['loss'], res.history['val_loss']) scores = model.evaluate_generator(test_datagen, steps=num_test // FLAGS.batch_size) print('Test result:') print('Loss: {} Accuracy: {}'.format(scores[0], scores[1])) if FLAGS.fine_tuning_epochs > 0: # make block5 trainable for fine-tuning for layer in model.layers[0].layers: if layer.name.startswith('block5_conv'): layer.trainable = True model.summary() model.compile(optimizer=optimizers.Adam(FLAGS.learning_rate / 2), loss='binary_crossentropy', metrics=['acc']) res = model.fit_generator( train_datagen, steps_per_epoch=num_train // FLAGS.batch_size, epochs=FLAGS.fine_tuning_epochs, validation_data=valid_datagen, validation_steps=num_valid // FLAGS.batch_size) model.save('cats_and_dogs_vgg16_finetuned_{}.h5'.format(num_train)) utils.show_accuracy(res.history['acc'], res.history['val_acc']) utils.show_loss(res.history['loss'], res.history['val_loss']) scores = model.evaluate_generator(test_datagen, steps=num_test // FLAGS.batch_size) print('Test result after fine-tuning:') print('Loss: {} Accuracy: {}'.format(scores[0], scores[1]))
if conf.TRAIN_FLAG: # data import x_train, y_train = mio.loadBatchData( conf.TRAIN_DATA_PATH, conf.TRAINING_SIZE, start_num=conf.TRAINING_START) # model constrconftion if conf.MODEL_NAME = 'AttentionSEResUNet': # adding channel-level attention: SENet model = AttentionSEResUNet(dropout_rate=conf.DROPOUT_RATE, batch_norm=conf.BATCH_NORM_FLAG) else: # using spatial-level attention model = Attention_ResUNet(dropout_rate=conf.DROPOUT_RATE, batch_norm=conf.BATCH_NORM_FLAG) if conf.MODEL_LOAD_FLAG: model.load_weights(conf.MODEL_LOAD_PATH) # training setup optimizer = optimizers.Adam() # training optimizer loss = ['mean_squared_error'] # training loss function metrics = ['mae'] # training evaluation metrics # model configuration model.compile(optimizer=optimizer, loss=loss, metrics=metrics) # Tensorboard visualization if conf.TENSORBOARD_FLAG: tb = callbacks.TensorBoard(log_dir=conf.LOG_PATH, histogram_freq=0, batch_size=conf.BATCH_SIZE, write_graph=False, write_images=True) # embeddings_freq=0, # embeddings_layer_names=None,
''' #preprocessing_function def preproces(x): return x/255.0 - 0.5 ''' # create the base pre-trained model base_model = applications.inception_v3.InceptionV3(weights='imagenet', include_top=False, pooling='avg', input_shape=(192, 192, 3)) # print(base_model.load_weights("my_model_final.h5", by_name=True)) predictions = layers.Dense(n_classes, activation='softmax')(base_model.output) # this is the model we will train model = models.Model(inputs=base_model.input, outputs=predictions) model.summary() adam = optimizers.Adam(lr=learning_rate) # decay=0.0001? decay 1/(1+decay*epochs*batches_per_epoch)*lr model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=[metrics.categorical_accuracy]) reduce_lr = callbacks.ReduceLROnPlateau(monitor='val_loss', verbose=1, factor=0.9, patience=2, min_lr=0.00001) model.fit(loader.X_train, loader.Y_train, batch_size=n_batches, epochs=epochs, validation_data=(loader.X_test, loader.Y_test), callbacks=[reduce_lr]) score = model.evaluate(loader.X_test, loader.Y_test, batch_size=n_batches) #model.save('my_model_final.h5') print('Test loss:', score[0]) print('Test accuracy:', score[1])
def build_model(self): """Build a critic (value) network that maps (state, action) pairs -> Q-values.""" # Define input layers states = layers.Input(shape=(self.state_size, ), name='states') actions = layers.Input(shape=(self.action_size, ), name='actions') # Add hidden layer(s) for state pathway net_states = layers.Dense( units=32, activation='relu', use_bias=False, kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l1(0.01))(states) net_states = layers.BatchNormalization()(net_states) net_states = layers.LeakyReLU(1e-2)(net_states) net_states = layers.Dense( units=64, activation='relu', use_bias=False, kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l1(0.01))(net_states) net_states = layers.BatchNormalization()(net_states) net_states = layers.LeakyReLU(1e-2)(net_states) net_states = layers.Dense( units=128, activation='relu', use_bias=False, kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l1(0.01))(net_states) net_states = layers.BatchNormalization()(net_states) net_states = layers.LeakyReLU(1e-2)(net_states) # Add hidden layer(s) for action pathway net_actions = layers.Dense( units=32, activation='relu', use_bias=False, kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l1(0.01))(actions) net_actions = layers.BatchNormalization()(net_actions) net_actions = layers.LeakyReLU(1e-2)(net_actions) net_actions = layers.Dense( units=64, activation='relu', use_bias=False, kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l1(0.01))(net_actions) net_actions = layers.BatchNormalization()(net_actions) net_actions = layers.LeakyReLU(1e-2)(net_actions) net_actions = layers.Dense( units=128, activation='relu', use_bias=False, kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l1(0.01))(net_actions) net_actions = layers.BatchNormalization()(net_actions) net_actions = layers.LeakyReLU(1e-2)(net_actions) # Try different layer sizes, activations, add batch normalization, regularizers, etc. # Combine state and action pathways net = layers.Add()([net_states, net_actions]) net = layers.Activation('relu')(net) # Add more layers to the combined network if needed # Add final output layer to prduce action values (Q values) Q_values = layers.Dense(units=1, name='q_values')(net) # Create Keras model self.model = models.Model(inputs=[states, actions], outputs=Q_values) # Define optimizer and compile model for training with built-in loss function optimizer = optimizers.Adam() self.model.compile(optimizer=optimizer, loss='mse') # Compute action gradients (derivative of Q values w.r.t. to actions) action_gradients = K.gradients(Q_values, actions) # Define an additional function to fetch action gradients (to be used by actor model) self.get_action_gradients = K.function( inputs=[*self.model.input, K.learning_phase()], outputs=action_gradients)
batch_end = BATCH_SIZE while batch_start < L: limit = min(batch_end, L) file_list = files[batch_start:limit] batch_img_array, batch_label_array, batch_idx_array = load_batch( file_list) yield (batch_img_array, batch_label_array) batch_start += BATCH_SIZE batch_end += BATCH_SIZE # Ottimizzatore per la fase di training optimizer = optimizers.Adam(lr=1e-5) loss = 'categorical_crossentropy' metrics = ['categorical_accuracy'] # Salva un modello dopo ogni epoch, in base alla rete può pesare molto filepath = "saved_models/transfer_learning_epoch_{epoch:02d}_{val_categorical_accuracy:.4f}.h5" checkpoint = callbacks.ModelCheckpoint(filepath, monitor='val_categorical_accuracy', verbose=0, save_best_only=False) callbacks_list = [checkpoint] model.compile(optimizer=optimizer, loss=loss, metrics=metrics) STEPS_PER_EPOCH = len(train_files) // BATCH_SIZE
# Define model model = models.Sequential() model.add( layers.LSTM(512, input_shape=(X.shape[1], X.shape[2]), activation='tanh', return_sequences=True)) model.add(layers.Dropout(0.2)) model.add(layers.LSTM(512, activation='tanh')) model.add(layers.Dropout(0.2)) model.add(layers.Dense(Y.shape[1], activation='softmax')) model.summary() # Define optimizer adam = optimizers.Adam(learning_rate) model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy']) # Set check point filepath = '../data/output/practice/weights/weights-improvement={epoch:02d}-{loss:4f}.hdf5' checkpoint = callbacks.ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min') callbacks_list = [checkpoint] ############## ## Training ##
def build_model(self): # Define input layers input_states = layers.Input(shape=(self.state_size, ), name='input_states') input_actions = layers.Input(shape=(self.action_size, ), name='input_actions') #---------- copy from DDPG quadcopter --------- # Add hidden layer(s) for state pathway net_states = layers.Dense(units=400)(input_states) # net_states = layers.BatchNormalization()(net_states) net_states = layers.Activation("relu")(net_states) net_states = layers.Dense(units=300)(net_states) net_states = layers.Activation("relu")(net_states) # Add hidden layer(s) for action pathway net_actions = layers.Dense(units=300)(input_actions) net_actions = layers.Activation("relu")(net_actions) # net_actions = layers.Dense(units=250,kernel_regularizer=regularizers.l2(1e-7))(net_actions) # net_actions = layers.BatchNormalization()(net_actions) # net_actions = layers.Activation("relu")(net_actions) # Combine state and action pathways net = layers.Add()([net_states, net_actions]) net = layers.Activation('relu')(net) net = layers.Dense(units=200, kernel_initializer=initializers.RandomUniform( minval=-0.5, maxval=0.5))(net) net = layers.Activation('relu')(net) # Add final output layer to prduce action values (Q values) Q_values = layers.Dense(units=1, name='q_values')(net) # ---------------- Hidden layers for states ---------------- # model_states = layers.Dense(units=32, activation=activations.sigmoid)(input_states) # # model_states = layers.BatchNormalization()(model_states) # model_states = layers.Dense(units=16, activation=activations.sigmoid)(model_states) # # model_states = layers.BatchNormalization()(model_states) # # model_states = layers.Dense(units=64)(model_states) # # model_states = layers.BatchNormalization()(model_states) # # ---------------- Hidden layers for actions ---------------- # model_actions = layers.Dense(units=16, activation=activations.sigmoid)(input_actions) # # model_actions = layers.BatchNormalization()(model_actions) # model_actions = layers.Dense(units=16, activation=activations.sigmoid)(model_actions) # # model_actions = layers.BatchNormalization()(model_actions) # # Both models merge here # model = layers.add([model_states, model_actions]) # # Fully connected and batch normalization # model = layers.Dense(units=8, activation=activations.sigmoid)(model) # # model = layers.BatchNormalization()(model) # # model = layers.Dense(units=64, activation=activations.relu)(model) # # model = layers.BatchNormalization()(model) # # Q values / output layer # Q_values = layers.Dense(units=1, name='Q_s_a')(model) # # model = layers.BatchNormalization()(model) # Keras wrap the model self.model = models.Model(inputs=[input_states, input_actions], outputs=Q_values) optimizer = optimizers.Adam(lr=0.0001) self.model.compile(optimizer=optimizer, loss='mse') action_gradients = K.gradients(Q_values, input_actions) self.get_action_gradients = K.function( inputs=[*self.model.input, K.learning_phase()], outputs=action_gradients)
def _cnn_ctc_init(self): self.input_data = layers.Input(name='the_input', shape=(self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH, 1)) layers_h1 = layers.Conv2D(filters=32, kernel_size=(3, 3), use_bias=False, activation='relu', padding='same', kernel_initializer='he_normal')( self.input_data) layers_h1 = layers.Dropout(rate=0.05)(layers_h1) layers_h2 = layers.Conv2D(filters=32, kernel_size=(3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layers_h1) layers_h3 = layers.MaxPooling2D(pool_size=2, strides=None, padding='valid')(layers_h2) layers_h3 = layers.Dropout(rate=0.05)(layers_h3) layers_h4 = layers.Conv2D(filters=64, kernel_size=(3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layers_h3) layers_h4 = layers.Dropout(rate=0.1)(layers_h4) layers_h5 = layers.Conv2D(filters=64, kernel_size=(3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layers_h4) layers_h6 = layers.MaxPooling2D(pool_size=2, strides=None, padding='valid')(layers_h5) layers_h6 = layers.Dropout(rate=0.1)(layers_h6) layers_h7 = layers.Conv2D(filters=128, kernel_size=(3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layers_h6) layers_h7 = layers.Dropout(rate=0.15)(layers_h7) layers_h8 = layers.Conv2D(filters=128, kernel_size=(3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layers_h7) layers_h9 = layers.MaxPooling2D(pool_size=2, strides=None, padding='valid')(layers_h8) layers_h9 = layers.Dropout(rate=0.15)(layers_h9) layers_h10 = layers.Conv2D(filters=128, kernel_size=(3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layers_h9) layers_h10 = layers.Dropout(rate=0.2)(layers_h10) layers_h11 = layers.Conv2D(filters=128, kernel_size=(3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layers_h10) layers_h12 = layers.MaxPooling2D(pool_size=1, strides=None, padding='valid')(layers_h11) layers_h12 = layers.Dropout(rate=0.2)(layers_h12) layers_h13 = layers.Conv2D(filters=128, kernel_size=(3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layers_h12) layers_h13 = layers.Dropout(rate=0.2)(layers_h13) layers_h14 = layers.Conv2D(filters=128, kernel_size=(3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layers_h13) layers_h15 = layers.MaxPooling2D(pool_size=1, strides=None, padding='valid')(layers_h14) layers_h16 = layers.Reshape( (self.AUDIO_FEATURE_LENGTH, self.AUDIO_LENGTH * 2))(layers_h15) layers_h16 = layers.Dropout(rate=0.3)(layers_h16) layers_h17 = layers.Dense(units=128, use_bias=True, activation='relu', kernel_initializer='he_normal')(layers_h16) layers_h17 = layers.Dropout(rate=0.3)(layers_h17) layers_h18 = layers.Dense(units=self.OUTPUT_SIZE, use_bias=True, kernel_initializer='he_normal')(layers_h17) y_pred = layers.Activation('softmax', name='activation_0')(layers_h18) self.cnn_model = models.Model(inputs=self.input_data, outputs=y_pred) self.labels = layers.Input(name='the_label', shape=[self.LABEL_SEQUENCE_LENGTH], dtype='float32') self.input_length = layers.Input(name='input_length', shape=[1], dtype='int64') self.label_length = layers.Input(name='label_length', shape=[1], dtype='int64') self.loss = layers.Lambda(function=self._ctc_lambda_func, output_shape=(1, ), name='ctc')([ y_pred, self.labels, self.input_length, self.label_length ]) self.ctc_model = models.Model(inputs=[ self.input_data, self.labels, self.input_length, self.label_length ], outputs=self.loss) optimizer = optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, decay=0.0, epsilon=10e-8) self.ctc_model.compile(optimizer=optimizer, loss={ 'ctc': lambda y_true, y_pred: y_pred }) print('[*Info] Create Model Successful, Compiles Model Successful. ') return self.cnn_model, self.ctc_model
def build_model(self): kernel_l2_reg = 1e-5 """Build an actor (policy) network that maps states -> actions.""" # Define input layer (states) states = layers.Input(shape=(self.state_size, ), name='states') # size_repeat = 30 # block_size = size_repeat*self.state_size # print("Actor block size = {}".format(block_size)) # # net = layers.concatenate([states]*size_repeat) # # net = layers.Dense(block_size, # # # kernel_initializer=initializers.RandomNormal(mean=1.0, stddev=0.1), # # # bias_initializer=initializers.RandomNormal(mean=0.0, stddev=0.01), # # activation=None, # # use_bias=False)(states) # net = layers.BatchNormalization()(net) # net = layers.Dropout(0.2)(net) # # net = layers.LeakyReLU(1e-2)(net) # # for _ in range(5): # net = res_block(net, block_size) # Add hidden layers net = layers.Dense( units=300, kernel_regularizer=regularizers.l2(kernel_l2_reg))(states) net = layers.BatchNormalization()(net) net = layers.LeakyReLU(1e-2)(net) net = layers.Dense( units=400, kernel_regularizer=regularizers.l2(kernel_l2_reg))(net) net = layers.BatchNormalization()(net) net = layers.LeakyReLU(1e-2)(net) net = layers.Dense( units=200, kernel_regularizer=regularizers.l2(kernel_l2_reg))(net) net = layers.BatchNormalization()(net) net = layers.LeakyReLU(1e-2)(net) # Try different layer sizes, activations, add batch normalization, regularizers, etc. # # Add final output layer with sigmoid activation # raw_actions = layers.Dense(units=self.action_size, # activation='sigmoid', # # kernel_regularizer=regularizers.l2(kernel_l2_reg), # kernel_initializer=initializers.RandomUniform(minval=-3e-3, maxval=3e-3), # # bias_initializer=initializers.RandomUniform(minval=-3e-3, maxval=3e-3), # name='raw_actions')(net) # # # Scale [0, 1] output for each action dimension to proper range # actions = layers.Lambda(lambda x: (x * self.action_range) + self.action_low, name='actions')(raw_actions) actions = layers.Dense( units=self.action_size, activation='tanh', kernel_regularizer=regularizers.l2(kernel_l2_reg), kernel_initializer=initializers.RandomUniform(minval=-3e-3, maxval=3e-3), name='actions')(net) # Create Keras model self.model = models.Model(inputs=states, outputs=actions) # Define loss function using action value (Q value) gradients action_gradients = layers.Input(shape=(self.action_size, )) loss = K.mean(-action_gradients * actions) # Incorporate any additional losses here (e.g. from regularizers) # Define optimizer and training function optimizer = optimizers.Adam(lr=1e-4) updates_op = optimizer.get_updates(params=self.model.trainable_weights, loss=loss) self.train_fn = K.function( inputs=[self.model.input, action_gradients, K.learning_phase()], outputs=[], updates=updates_op)
def build_model(self): kernel_l2_reg = 1e-5 # Dense Options # units = 200, # activation='relu', # activation = None, # activity_regularizer=regularizers.l2(0.01), # kernel_regularizer=regularizers.l2(kernel_l2_reg), # bias_initializer=initializers.Constant(1e-2), # use_bias = True # use_bias=False """Build a critic (value) network that maps (state, action) pairs -> Q-values.""" # Define input layers states = layers.Input(shape=(self.state_size, ), name='states') actions = layers.Input(shape=(self.action_size, ), name='actions') # size_repeat = 30 # state_size = size_repeat*self.state_size # action_size = size_repeat*self.action_size # block_size = size_repeat*self.state_size + size_repeat*self.action_size # print("Critic block size = {}".format(block_size)) # # net_states = layers.concatenate(size_repeat * [states]) # net_states = layers.BatchNormalization()(net_states) # net_states = layers.Dropout(0.2)(net_states) # # net_actions = layers.concatenate(size_repeat * [actions]) # net_actions = layers.BatchNormalization()(net_actions) # net_actions = layers.Dropout(0.2)(net_actions) # # # State pathway # for _ in range(3): # net_states = res_block(net_states, state_size) # # # Action pathway # for _ in range(2): # net_actions = res_block(net_actions, action_size) # # # Merge state and action pathways # net = layers.concatenate([net_states, net_actions]) # # # Final blocks # for _ in range(3): # net = res_block(net, block_size) # Add hidden layer(s) for state pathway net_states = layers.Dense( units=300, kernel_regularizer=regularizers.l2(kernel_l2_reg))(states) net_states = layers.BatchNormalization()(net_states) net_states = layers.LeakyReLU(1e-2)(net_states) net_states = layers.Dense( units=400, kernel_regularizer=regularizers.l2(kernel_l2_reg))(net_states) net_states = layers.BatchNormalization()(net_states) net_states = layers.LeakyReLU(1e-2)(net_states) # Add hidden layer(s) for action pathway net_actions = layers.Dense( units=400, kernel_regularizer=regularizers.l2(kernel_l2_reg))(actions) net_actions = layers.BatchNormalization()(net_actions) net_actions = layers.LeakyReLU(1e-2)(net_actions) # Merge state and action pathways net = layers.add([net_states, net_actions]) net = layers.Dense( units=200, kernel_regularizer=regularizers.l2(kernel_l2_reg))(net) net = layers.BatchNormalization()(net) net = layers.LeakyReLU(1e-2)(net) # Add final output layer to prduce action values (Q values) Q_values = layers.Dense( units=1, activation=None, kernel_regularizer=regularizers.l2(kernel_l2_reg), kernel_initializer=initializers.RandomUniform(minval=-5e-3, maxval=5e-3), # bias_initializer=initializers.RandomUniform(minval=-3e-3, maxval=3e-3), name='q_values')(net) # Create Keras model self.model = models.Model(inputs=[states, actions], outputs=Q_values) # Define optimizer and compile model for training with built-in loss function optimizer = optimizers.Adam(lr=1e-2) self.model.compile(optimizer=optimizer, loss='mse') # Compute action gradients (derivative of Q values w.r.t. to actions) action_gradients = K.gradients(Q_values, actions) # Define an additional function to fetch action gradients (to be used by actor model) self.get_action_gradients = K.function( inputs=[*self.model.input, K.learning_phase()], outputs=action_gradients)
test_generator = test_datagen.flow_from_directory( join(data_path, 'test'), target_size=(32, 32), batch_size=32) tb_callback_ln = callbacks.TensorBoard(log_dir='/tmp/tensorboard/cifar10/resnet2') batch_size = 32 nb_train_samples = 50000 nb_test_samples = 10000 opt = optimizers.Adam(lr=1E-3) model1.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) history1 = model1.fit_generator( train_generator, steps_per_epoch = nb_train_samples // batch_size, epochs = 50, validation_data = test_generator, validation_steps = nb_test_samples // batch_size, callbacks=[tb_callback_ln] ) opt = optimizers.Adam(lr=1E-4) model1.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) history2 = model1.fit_generator( train_generator, steps_per_epoch = nb_train_samples // batch_size,
)) model.add(layers.Dropout(rate=0.2)) model.add(layers.BatchNormalization()) model.add(layers.Dense( units=160, activation=activations.relu, )) model.add(layers.Dropout(rate=0.2)) model.add(layers.BatchNormalization()) model.add(layers.Dense( units=80, activation=activations.sigmoid, )) model.add(layers.Dropout(rate=0.2)) model.add(layers.BatchNormalization()) model.add(layers.Dense( units=2, activation=activations.softmax, )) model.compile(optimizer=optimizers.Adam(lr=0.001), loss=losses.categorical_crossentropy, metrics=['accuracy']) model.fit(x=data.train, y=data.train_labels, batch_size=32, epochs=epochs, verbose=2) print("Test") _, acc = model.evaluate(data.test, data.test_labels) print(acc)