def build_model(self): l2_regularization_kernel = 1e-5 # Input Layer input = layers.Input(shape=(self.state_size,), name='input_states') # Hidden Layers model = layers.Dense(units=300, kernel_regularizer=regularizers.l2(l2_regularization_kernel))(input) model = layers.BatchNormalization()(model) model = layers.LeakyReLU(1e-2)(model) model = layers.Dense(units=400, kernel_regularizer=regularizers.l2(l2_regularization_kernel))(model) model = layers.BatchNormalization()(model) model = layers.LeakyReLU(1e-2)(model) model = layers.Dense(units=200, kernel_regularizer=regularizers.l2(l2_regularization_kernel))(model) model = layers.BatchNormalization()(model) model = layers.LeakyReLU(1e-2)(model) # Our output layer - a fully connected layer output = layers.Dense(units=self.action_size, activation='tanh', kernel_regularizer=regularizers.l2(l2_regularization_kernel), kernel_initializer=initializers.RandomUniform(minval=-3e-3, maxval=3e-3), name='output_actions')(model) # Keras model self.model = models.Model(inputs=input, outputs=output) # Define loss and optimizer action_gradients = layers.Input(shape=(self.action_size,)) loss = K.mean(-action_gradients * output) optimizer = optimizers.Adam(lr=1e-4) update_operation = optimizer.get_updates(params=self.model.trainable_weights, loss=loss) self.train_fn = K.function(inputs=[self.model.input, action_gradients, K.learning_phase()], outputs=[], updates=update_operation)
def _gru_ctc_init(self): self.input_data = layers.Input(name='the_input', shape=(self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH, 1)) layers_h1 = layers.Reshape((-1, 200))(self.input_data) layers_h2 = GRUCTCAM._dense(128, layers_h1) layers_h3 = GRUCTCAM._bi_gru(64, layers_h2) y_pred = GRUCTCAM._dense(self.OUTPUT_SIZE, layers_h3, activation='softmax') self.gru_model = models.Model(inputs=self.input_data, outputs=y_pred) self.labels = layers.Input(name='the_label', shape=[self.LABEL_SEQUENCE_LENGTH], dtype='float32') self.input_length = layers.Input(name='input_length', shape=[1], dtype='int64') self.label_length = layers.Input(name='label_length', shape=[1], dtype='int64') self.loss = layers.Lambda(function=self._ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, self.labels, self.input_length, self.label_length]) self.ctc_model = models.Model(inputs=[self.input_data, self.labels, self.input_length, self.label_length], outputs=self.loss) optimizer = optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, decay=0.0, epsilon=10e-8) self.ctc_model.compile(optimizer=optimizer, loss={'ctc': lambda y_true, y_pred: y_pred}) print('[*Info] Create Model Successful, Compiles Model Successful. ') return self.gru_model, self.ctc_model
def build_model(self): l2_kernel_regularization = 1e-5 # Define input layers input_states = layers.Input(shape=(self.state_size, ), name='input_states') input_actions = layers.Input(shape=(self.action_size, ), name='input_actions') # Hidden layers for states model_states = layers.Dense( units=32, kernel_regularizer=regularizers.l2(l2_kernel_regularization))( input_states) model_states = layers.BatchNormalization()(model_states) model_states = layers.LeakyReLU(1e-2)(model_states) model_states = layers.Dense( units=64, kernel_regularizer=regularizers.l2(l2_kernel_regularization))( model_states) model_states = layers.BatchNormalization()(model_states) model_states = layers.LeakyReLU(1e-2)(model_states) # Hidden layers for actions model_actions = layers.Dense( units=64, kernel_regularizer=regularizers.l2(l2_kernel_regularization))( input_actions) model_actions = layers.BatchNormalization()(model_actions) model_actions = layers.LeakyReLU(1e-2)(model_actions) # Both models merge here model = layers.add([model_states, model_actions]) # Fully connected and batch normalization model = layers.Dense(units=32, kernel_regularizer=regularizers.l2( l2_kernel_regularization))(model) model = layers.BatchNormalization()(model) model = layers.LeakyReLU(1e-2)(model) # Q values / output layer Q_values = layers.Dense( units=1, activation=None, kernel_regularizer=regularizers.l2(l2_kernel_regularization), kernel_initializer=initializers.RandomUniform(minval=-5e-3, maxval=5e-3), name='output_Q_values')(model) # Keras wrap the model self.model = models.Model(inputs=[input_states, input_actions], outputs=Q_values) optimizer = optimizers.Adam(lr=1e-2) self.model.compile(optimizer=optimizer, loss='mse') action_gradients = K.gradients(Q_values, input_actions) self.get_action_gradients = K.function( inputs=[*self.model.input, K.learning_phase()], outputs=action_gradients)
def build_model(self): """Build an actor (policy) network that maps states -> actions.""" # Define input layer (states) states = layers.Input(shape=(self.state_size, ), name='states') '''# Add hidden layers net = layers.Dense(units=32, activation='relu')(states) net = layers.Dense(units=64, activation='relu')(net) net = layers.Dense(units=32, activation='relu')(net) # Try different layer sizes, activations, add batch normalization, regularizers, etc. # Add final output layer with sigmoid activation raw_actions = layers.Dense(units=self.action_size, activation='sigmoid', name='raw_actions')(net) ''' ################################### # Add hidden layers net = layers.Dense(units=400, kernel_regularizer=regularizers.l2(1e-6))(states) net = layers.BatchNormalization()(net) net = layers.LeakyReLU(1e-2)(net) net = layers.Dense(units=300, kernel_regularizer=regularizers.l2(1e-6))(net) net = layers.BatchNormalization()(net) net = layers.LeakyReLU(1e-2)(net) # Add final output layer with sigmoid activation raw_actions = layers.Dense( units=self.action_size, activation='sigmoid', name='raw_actions', kernel_initializer=initializers.RandomUniform(minval=-0.003, maxval=0.003))(net) ####################################### # Scale [0, 1] output for each action dimension to proper range actions = layers.Lambda(lambda x: (x * self.action_range) + self.action_low, name='actions')(raw_actions) # Create Keras model self.model = models.Model(inputs=states, outputs=actions) # Define loss function using action value (Q value) gradients action_gradients = layers.Input(shape=(self.action_size, )) loss = K.mean(-action_gradients * actions) # Incorporate any additional losses here (e.g. from regularizers) # Define optimizer and training function optimizer = optimizers.Adam(lr=1e-6) updates_op = optimizer.get_updates(params=self.model.trainable_weights, loss=loss) self.train_fn = K.function( inputs=[self.model.input, action_gradients, K.learning_phase()], outputs=[], updates=updates_op)
def build_model(self): """Build an actor (policy) network that maps states -> actions.""" # Define input layer (states) states = layers.Input(shape=(self.state_size, ), name='states') #--------- copy from DDPG quadcopter ----------- net = layers.Dense(units=400)(states) # net = layers.BatchNormalization()(net) net = layers.Activation("relu")(net) net = layers.Dense(units=200)(net) # net = layers.BatchNormalization()(net) net = layers.Activation("relu")(net) actions = layers.Dense(units=self.action_size, activation='softmax', name='actions', kernel_initializer=initializers.RandomUniform( minval=-1, maxval=1))(net) # actions = layers.Dense(units=self.action_size, activation='sigmoid', name='actions', # kernel_initializer=initializers.RandomUniform(minval=-0.001, maxval=0.001))(net) # Add hidden layers # net = layers.Dense(units=16,activation=activations.sigmoid)(states) # net = layers.BatchNormalization()(net) # net = layers.Dense(units=16,activation=activations.sigmoid)(net) # net = layers.BatchNormalization()(net) # net = layers.Dense(units=128,activation=activations.relu)(net) # net = layers.BatchNormalization()(net) # Add final output layer with sigmoid activation # actions = layers.Dense(units=self.action_size, activation='linear', # sigmoid # name='raw_actions' )(net) # Scale [0, 1] output for each action dimension to proper range # actions = layers.Lambda(lambda x: (x * self.action_range) + self.action_low, # name='actions')(raw_actions) # Create Keras model self.model = models.Model(inputs=states, outputs=actions) action_gradients = layers.Input(shape=(self.action_size, )) loss = K.mean(-action_gradients * actions) # Define optimizer and training function optimizer = optimizers.Adam(lr=.0001) updates_op = optimizer.get_updates(params=self.model.trainable_weights, loss=loss) self.train_fn = K.function( inputs=[self.model.input, action_gradients, K.learning_phase()], outputs=[], updates=updates_op)
def _create_generator(self): inputs = layers.Input(shape=(self.args.latent_dims, )) x = layers.Dense(128 * 16 * 16)(inputs) x = layers.LeakyReLU()(x) x = layers.Reshape((16, 16, 128))(x) x = layers.Conv2D(256, kernel_size=5, strides=1, padding='same')(x) x = layers.LeakyReLU()(x) # we use a kernel-size which is a multiple of the strides to don't have artifacts when up-sampling x = layers.Conv2DTranspose(256, kernel_size=4, strides=2, padding='same')(x) x = layers.LeakyReLU()(x) x = layers.Conv2D(256, kernel_size=5, padding='same')(x) x = layers.LeakyReLU()(x) x = layers.Conv2D(256, kernel_size=5, padding='same')(x) x = layers.LeakyReLU()(x) outputs = layers.Conv2D(CHANNELS, kernel_size=7, activation='tanh', padding='same')(x) generator = models.Model(inputs, outputs) return generator
def train(neurons, hidden, act, epochs=10, repetition=0, summary=False): samples = int(1e6) h = 1 norms = np.random.uniform(0, 3, (samples, 1)) kn = gaussian(norms, h) X = norms y = kn inputs = layers.Input(shape=(1, )) x = layers.Dense(neurons, activation=act)(inputs) for i in range(hidden - 1): x = layers.Dense(neurons, activation=act)(x) outputs = layers.Dense(1, activation='linear')(x) save_path = "models/kernel/h{}/nn_{}_{}.h5".format(hidden, neurons, repetition) model = models.Model(inputs=inputs, outputs=outputs) early_stop = callbacks.EarlyStopping(monitor='val_mean_absolute_percentage_error', patience=10) check_point = callbacks.ModelCheckpoint(save_path, monitor='val_mean_absolute_percentage_error', save_best_only=True, mode='min') opt = optimizers.Adam(lr=1e-3, decay=1e-5) model.compile(optimizer=opt, loss='mean_squared_error', metrics=['mean_absolute_percentage_error']) if summary: model.summary() history = model.fit(X, y, epochs=epochs, batch_size=50, callbacks=[check_point, early_stop], validation_split=0.01) return models.load_model(save_path)
def ShatheNet_v2(n_classes=256, weights=None): # paddign same, filtros mas pequemos.. input_shape = (192, 192, 3) inputs = layers.Input(shape=input_shape) # a layer instance is callable on a tensor, and returns a tensor x = conv2d_bn(inputs, 32, 3, 3, padding='valid', strides=(2, 2)) x = conv2d_bn(x, 64, 1, 1, padding='valid', strides=(1, 1)) x = conv2d_bn(x, 64, 3, 3, padding='valid', strides=(1, 1)) x = layers.MaxPooling2D((2, 2))(x) x = dense_block(x, 8, 32) x = transition_block(x, 96) x = dense_block(x, 12, 32) x = transition_block(x, 128) x = dense_block(x, 20, 32) x = transition_block(x, 196) x = dense_block(x, 16, 32) x = layers.GlobalAveragePooling2D()(x) # x = layers.Flatten()(x) predictions = layers.Dense(n_classes, activation='softmax')(x) model = models.Model(inputs=inputs, outputs=predictions) if weights: model.load_weights(weights) return model
def create_model(self, img_shape, num_class): self.handle_dim_ordering() K.set_learning_phase(True) #model = models.Sequential() inputs = layers.Input(shape = img_shape) x = self.conv_bn_relu(inputs, (3, 3),(1, 1), 32, 'conv0_1') net = self.conv_bn_relu(x, (3, 3), (1, 1), 64, 'conv0_2') bn1 = layers.BatchNormalization(momentum=0.99, name='conv0_3_bn')(self.conv( net, (3, 3), (1, 1), 32, 'conv0_3')) act1 = layers.Activation('relu')(bn1) bn2, act2 = self.down_block(act1, 32, 'down1') bn3, act3 = self.down_block(act2, 32, 'down2') bn4, act4 = self.down_block(act3, 32, 'down3') bn5, act5 = self.down_block(act4, 32, 'down4') bn6, act6 = self.down_block(act5, 32, 'down5') bn7, act7 = self.down_block(act6, 32, 'down6') temp = self.up_block(act7, bn6, 32, 'up6') temp = self.up_block(temp, bn5, 32, 'up5') temp = self.up_block(temp, bn4, 32, 'up4') temp = self.up_block(temp, bn3, 32, 'up3') temp = self.up_block(temp, bn2, 32, 'up2') temp = self.up_block(temp, bn1, 32, 'up1') output = self.conv(temp, (1, 1), (1, 1), num_class, 'output') model = models.Model(outputs=output, inputs=inputs) print(model.summary()) return model
def create_model(self, img_shape, num_class): concat_axis = 3 inputs = layers.Input(shape = img_shape) conv1 = layers.Conv2D(32, (3, 3), activation='relu', padding='same', name='conv1_1')(inputs) conv1 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(conv1) pool1 = layers.MaxPooling2D(pool_size=(2, 2))(conv1) conv2 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(pool1) conv2 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(conv2) pool2 = layers.MaxPooling2D(pool_size=(2, 2))(conv2) conv3 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(pool2) conv3 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(conv3) pool3 = layers.MaxPooling2D(pool_size=(2, 2))(conv3) conv4 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(pool3) conv4 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(conv4) pool4 = layers.MaxPooling2D(pool_size=(2, 2))(conv4) conv5 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(pool4) conv5 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(conv5) up_conv5 = layers.UpSampling2D(size=(2, 2))(conv5) ch, cw = self.get_crop_shape(conv4, up_conv5) crop_conv4 = layers.Cropping2D(cropping=(ch,cw))(conv4) up6 = layers.concatenate([up_conv5, crop_conv4], axis=concat_axis) conv6 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(up6) conv6 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(conv6) up_conv6 = layers.UpSampling2D(size=(2, 2))(conv6) ch, cw = self.get_crop_shape(conv3, up_conv6) crop_conv3 = layers.Cropping2D(cropping=(ch,cw))(conv3) up7 = layers.concatenate([up_conv6, crop_conv3], axis=concat_axis) conv7 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(up7) conv7 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(conv7) up_conv7 = layers.UpSampling2D(size=(2, 2))(conv7) ch, cw = self.get_crop_shape(conv2, up_conv7) crop_conv2 = layers.Cropping2D(cropping=(ch,cw))(conv2) up8 = layers.concatenate([up_conv7, crop_conv2], axis=concat_axis) conv8 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(up8) conv8 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(conv8) up_conv8 = layers.UpSampling2D(size=(2, 2))(conv8) ch, cw = self.get_crop_shape(conv1, up_conv8) crop_conv1 = layers.Cropping2D(cropping=(ch,cw))(conv1) up9 = layers.concatenate([up_conv8, crop_conv1], axis=concat_axis) conv9 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(up9) conv9 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(conv9) ch, cw = self.get_crop_shape(inputs, conv9) conv9 = layers.ZeroPadding2D(padding=((ch[0], ch[1]), (cw[0], cw[1])))(conv9) conv10 = layers.Conv2D(num_class, (1, 1))(conv9) model = models.Model(inputs=inputs, outputs=conv10) return model
def _create_gan(self): # this might cause a wrong WARNING (https://github.com/keras-team/keras/issues/8585) self._discriminator.trainable = False gan_input = layers.Input(shape=(self.args.latent_dims, )) gan_output = self._discriminator(self._generator(gan_input)) gan = models.Model(gan_input, gan_output) return gan
def rnn_generator(static_dim, sequence_dim, length, code_dim, kind='LSTM'): '''''' if kind in ('LSTM', 'GRU'): rnn = eval('layers.' + kind) else: raise ValueError, 'no such RNN method "{}"'.format(kind) static = layers.Input((static_dim, )) repeat = layers.RepeatVector(length)(static) sequence = layers.Input((length, sequence_dim)) sequence_emb = layers.Conv1D(static_dim, 1)(sequence) code = layers.concatenate([repeat, sequence_emb]) emb = rnn(code_dim, recurrent_dropout=0.5, unroll=True, return_sequences=True, activation='linear')(code) out = layers.Conv1D(sequence_dim, 1)(emb) return models.Model([static, sequence], out)
def build_model(self): #Define input layers inputStates = layers.Input(shape=(self.state_size, ), name='inputStates') inputActions = layers.Input(shape=(self.action_size, ), name='inputActions') # Hidden layers for states modelS = layers.Dense(units=128, activation='linear')(inputStates) modelS = layers.BatchNormalization()(modelS) modelS = layers.LeakyReLU(0.01)(modelS) modelS = layers.Dropout(0.3)(modelS) modelS = layers.Dense(units=256, activation='linear')(modelS) modelS = layers.BatchNormalization()(modelS) modelS = layers.LeakyReLU(0.01)(modelS) modelS = layers.Dropout(0.3)(modelS) modelA = layers.Dense(units=256, activation='linear')(inputActions) modelA = layers.LeakyReLU(0.01)(modelA) modelA = layers.BatchNormalization()(modelA) modelA = layers.Dropout(0.5)(modelA) #Merging the models model = layers.add([modelS, modelA]) model = layers.Dense(units=256, activation='linear')(model) model = layers.BatchNormalization()(model) model = layers.LeakyReLU(0.01)(model) #Q Layer Qvalues = layers.Dense(units=1, activation=None, name='outputQvalues')(model) #Keras model self.model = models.Model(inputs=[inputStates, inputActions], outputs=Qvalues) optimizer = optimizers.Adam() self.model.compile(optimizer=optimizer, loss='mse') actionGradients = K.gradients(Qvalues, inputActions) self.get_action_gradients = K.function( inputs=[*self.model.input, K.learning_phase()], outputs=actionGradients)
def build_model(self): states = layers.Input(shape=(self.state_size,), name='inputStates') # Hidden Layers model = layers.Dense(units=128, activation='linear')(states) model = layers.BatchNormalization()(model) model = layers.LeakyReLU(0.01)(model) model = layers.Dropout(0.3)(model) model = layers.Dense(units=256, activation='linear')(model) model = layers.BatchNormalization()(model) model = layers.LeakyReLU(0.01)(model) model = layers.Dropout(0.3)(model) model = layers.Dense(units=512, activation='linear')(model) model = layers.BatchNormalization()(model) model = layers.LeakyReLU(0.01)(model) model = layers.Dropout(0.3)(model) model = layers.Dense(units=128, activation='linear')(model) model = layers.BatchNormalization()(model) model = layers.LeakyReLU(0.01)(model) model = layers.Dropout(0.3)(model) output = layers.Dense( units=self.action_size, activation='tanh', kernel_regularizer=regularizers.l2(0.01), name='outputActions')(model) #Keras self.model = models.Model(inputs=states, outputs=output) #Definint Optimizer actionGradients = layers.Input(shape=(self.action_size,)) loss = K.mean(-actionGradients * output) optimizer = optimizers.Adam() update_operation = optimizer.get_updates(params=self.model.trainable_weights, loss=loss) self.train_fn = K.function( inputs=[self.model.input, actionGradients, K.learning_phase()], outputs=[], updates=update_operation)
def rnn_discriminator(sequence_dim, length, code_dim, kind='LSTM'): '''''' if kind in ('LSTM', 'GRU'): rnn = eval('layers.' + kind) else: raise ValueError, 'no such RNN method "{}"'.format(kind) sequence = layers.Input((length, sequence_dim)) sequence_emb = layers.Conv1D(code_dim, 1)(sequence) out = rnn(1, recurrent_dropout=0.5, unroll=True, return_sequences=False, activation='linear')(sequence) return models.Model(sequence, out)
def create_model(img_shape): inputs = layers.Input(shape=img_shape) encoder0_pool, encoder0 = encoder_block(inputs, 32) encoder1_pool, encoder1 = encoder_block(encoder0_pool, 64) encoder2_pool, encoder2 = encoder_block(encoder1_pool, 128) encoder3_pool, encoder3 = encoder_block(encoder2_pool, 256) encoder4_pool, encoder4 = encoder_block(encoder3_pool, 512) center = conv_block(encoder4_pool, 1024) decoder4 = decoder_block(center, encoder4, 512) decoder3 = decoder_block(decoder4, encoder3, 256) decoder2 = decoder_block(decoder3, encoder2, 128) decoder1 = decoder_block(decoder2, encoder1, 64) decoder0 = decoder_block(decoder1, encoder0, 32) outputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(decoder0) # change to perceptron? model = models.Model(inputs=[inputs], outputs=[outputs]) return model
def _create_discriminator(self): inputs = layers.Input(shape=(HEIGHT, WIDTH, CHANNELS)) x = layers.Conv2D(128, kernel_size=3)(inputs) x = layers.LeakyReLU()(x) x = layers.Conv2D(128, kernel_size=4, strides=2)(x) x = layers.LeakyReLU()(x) x = layers.Conv2D(128, kernel_size=4, strides=2)(x) x = layers.LeakyReLU()(x) x = layers.Conv2D(128, kernel_size=4, strides=2)(x) x = layers.LeakyReLU()(x) x = layers.Flatten()(x) x = layers.Dropout(self.args.dropout)(x) outputs = layers.Dense(1, activation='sigmoid')(x) discriminator = models.Model(inputs, outputs) return discriminator
def train(neurons, hidden=1, act='relu', epochs=10, repetition=0): samples = int(1e6) norms = np.random.uniform(0, 3, samples) veldiffs = np.random.uniform(0, 1, samples) dkn = dgaussian(norms, 1) cont = continuity(veldiffs, dkn) X = np.zeros((samples, 2)) X[:, 0] = norms / 3 X[:, 1] = veldiffs y = cont inputs = layers.Input(shape=(2, )) x = layers.Dense(neurons, activation=act)(inputs) for i in range(hidden - 1): x = layers.Dense(neurons, activation=act)(x) outputs = layers.Dense(1, activation='linear')(x) save_path = "models/continuity/h{}/nn_{}_{}.h5".format( hidden, neurons, repetition) model = models.Model(inputs=inputs, outputs=outputs) early_stop = callbacks.EarlyStopping(monitor='val_loss', patience=10) check_point = callbacks.ModelCheckpoint(save_path, monitor='val_loss', save_best_only=True, mode='min') opt = optimizers.Adam(lr=1e-3, decay=1e-5) model.compile(optimizer=opt, loss='mean_squared_error', metrics=['mean_absolute_percentage_error']) history = model.fit(X, y, epochs=epochs, batch_size=100, callbacks=[early_stop, check_point], validation_split=0.01) return models.load_model(save_path)
def keras_efficientnet(blocks_args, global_params, training=False): inp = layers.Input((224, 224, 3)) x = layers.Conv2D(32, 3, padding='same', strides=2, name='stem_conv2d', use_bias=False)(inp) x = em.batchnorm(name='stem_tpu_batch_normalization')(x) x = layers.Lambda(lambda x: em.relu_fn(x))(x) idx = 0 for block in blocks_args: x = el.mbConvBlock(x, block, global_params, idx, training=training) # x = MBConvBlock(block, global_params, idx)(x, training=training) idx += 1 if block.num_repeat > 1: block = block._replace( input_filters=block.output_filters, strides=[1, 1]) for _ in range(block.num_repeat - 1): x = el.mbConvBlock(x, block, global_params, idx, training=training) idx += 1 x = layers.Conv2D(1280, 1, name='head_conv2d', use_bias=False)(x) x = em.batchnorm(name='head_tpu_batch_normalization')(x) x = layers.Lambda(lambda x: em.relu_fn(x))(x) x = layers.GlobalAveragePooling2D()(x) x = layers.Dense(1000, activation='softmax', name='head_dense', )(x) model = models.Model(inp, x, name='efficientnet-b0') return model
def test_DLGMLayer(): xDim = 2 yDim = 5 mu_nn = layers.Input((None, yDim)) mu_nn_d = (layers.Dense( xDim * xDim, activation="linear", kernel_initializer=tf.orthogonal_initializer())(mu_nn)) mu_net = models.Model(inputs=mu_nn, outputs=mu_nn_d) u_nn = layers.Input((None, yDim)) u_nn_d = (layers.Dense( xDim * xDim, activation="linear", kernel_initializer=tf.orthogonal_initializer())(u_nn)) u_net = models.Model(inputs=u_nn, outputs=u_nn_d) unc_d_nn = layers.Input((None, yDim)) unc_d_nn_d = (layers.Dense( xDim * xDim, activation="linear", kernel_initializer=tf.orthogonal_initializer())(unc_d_nn)) unc_d_net = models.Model(inputs=unc_d_nn, outputs=unc_d_nn_d) Data = np.random.randn(10, 5).astype(np.float32) rec_nets = ({'mu_net': mu_net, 'u_net': u_net, 'unc_d_net': unc_d_net}) NN = models.Sequential() inputlayer = layers.InputLayer(batch_input_shape=(10, 5)) NN.add(inputlayer) lm = DLGMLayer(NN, 4, rec_nets=rec_nets, k=-1) lm.calculate_xi(tf.constant(Data.astype(np.float32))) lm.get_ELBO(tf.constant(10.0)) num_units = 4 with tf.Session() as sess: sess.run(tf.global_variables_initializer()) W = lm.W.eval() b = lm.b.eval() G = lm.G.eval() batch_u = lm.batch_u.eval() batch_unc_d = lm.batch_unc_d.eval() batch_mu = lm.batch_mu.eval() batch_Tr_C_lm = lm.batch_Tr_C.eval() batch_ld_C_lm = lm.batch_ld_C.eval() batch_R_lm = lm.batch_R.eval() get_ELBO_lm = lm.get_ELBO(tf.constant(10.0)).eval() activation_lm = lm.call(tf.constant(Data, dtype=tf.float32), use_rec_model=True).eval() batch_Tr_C = [] batch_ld_C = [] batch_R = [] batch_u = batch_u.astype(np.float32) batch_unc_d = batch_unc_d.astype(np.float32) for i in range(batch_u.shape[0]): u = batch_u[i] unc_d = batch_unc_d[i] d = np.log1p(np.exp(np.maximum(unc_d, -15.0)), dtype=np.float32) D_inv = np.diag(1.0 / d) eta = 1.0 / (u.T.dot(D_inv).dot(u) + 1.0) C = D_inv - eta * D_inv.dot(u).dot(u.T).dot(D_inv) Tr_C = np.trace(C) ld_C = np.log(eta) - np.log(d).sum() # eq 20 in DLGM # coeff = ((1 - T.sqrt(eta)) / (u.T.dot(D_inv).dot(u))) # simplified coefficient below is more stable as u -> 0 # original coefficient from paper is above coeff = eta / (1.0 + np.sqrt(eta)) R = np.sqrt(D_inv) - coeff * D_inv.dot(u).dot(u.T).dot(np.sqrt(D_inv)) batch_Tr_C.append(Tr_C) batch_ld_C.append(ld_C) batch_R.append(R) batch_Tr_C = np.array(batch_Tr_C) batch_ld_C = np.array(batch_ld_C) batch_R = np.array(batch_R) npt.assert_allclose(batch_Tr_C_lm, batch_Tr_C, atol=1e-3, rtol=1e-4) npt.assert_allclose(batch_ld_C_lm, batch_ld_C, atol=1e-3, rtol=1e-4) npt.assert_allclose(batch_R_lm, batch_R, atol=1e-3, rtol=1e-4) KL_div = (0.5 * (np.sqrt((batch_mu**2).sum(axis=1)).sum() + batch_Tr_C.sum() - batch_ld_C.sum() - 10.0)) weight_reg = ((0.5 / -1) * np.sqrt((W**2).sum()) * np.sqrt((G**2).sum())) get_ELBO_np = -(weight_reg + KL_div) npt.assert_allclose(get_ELBO_np, get_ELBO_lm, atol=1e-5, rtol=1e-4) test_rand = np.random.normal(size=(batch_R.shape[0], num_units)) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) batch_mu = lm.batch_mu.eval() batch_xi = (batch_mu + np.squeeze( np.matmul(lm.batch_R.eval(), np.expand_dims(test_rand, axis=2)))) test_batch_xi = (lm.batch_mu + tf.squeeze( tf.matmul(lm.batch_R, tf.expand_dims(tf.constant(test_rand, tf.float32), -1)))) activation = np.matmul(np.maximum(Data, 0), W) + b xi = batch_xi activation += np.matmul(xi, G) inputs = tf.constant(Data, dtype=tf.float32) activation_lm = tf.matmul(lm.nonlinearity(inputs), lm.W) + lm.b activation_lm += tf.matmul(tf.constant(xi, tf.float32), lm.G) activation_lm = activation_lm.eval() npt.assert_allclose(batch_xi, test_batch_xi.eval(), atol=1e-5, rtol=1e-4) npt.assert_allclose(activation_lm, activation, atol=1e-3, rtol=1e-4)
def build_model(self): kernel_l2_reg = 1e-5 # Dense Options # units = 200, # activation='relu', # activation = None, # activity_regularizer=regularizers.l2(0.01), # kernel_regularizer=regularizers.l2(kernel_l2_reg), # bias_initializer=initializers.Constant(1e-2), # use_bias = True # use_bias=False """Build a critic (value) network that maps (state, action) pairs -> Q-values.""" # Define input layers states = layers.Input(shape=(self.state_size, ), name='states') actions = layers.Input(shape=(self.action_size, ), name='actions') # size_repeat = 30 # state_size = size_repeat*self.state_size # action_size = size_repeat*self.action_size # block_size = size_repeat*self.state_size + size_repeat*self.action_size # print("Critic block size = {}".format(block_size)) # # net_states = layers.concatenate(size_repeat * [states]) # net_states = layers.BatchNormalization()(net_states) # net_states = layers.Dropout(0.2)(net_states) # # net_actions = layers.concatenate(size_repeat * [actions]) # net_actions = layers.BatchNormalization()(net_actions) # net_actions = layers.Dropout(0.2)(net_actions) # # # State pathway # for _ in range(3): # net_states = res_block(net_states, state_size) # # # Action pathway # for _ in range(2): # net_actions = res_block(net_actions, action_size) # # # Merge state and action pathways # net = layers.concatenate([net_states, net_actions]) # # # Final blocks # for _ in range(3): # net = res_block(net, block_size) # Add hidden layer(s) for state pathway net_states = layers.Dense( units=300, kernel_regularizer=regularizers.l2(kernel_l2_reg))(states) net_states = layers.BatchNormalization()(net_states) net_states = layers.LeakyReLU(1e-2)(net_states) net_states = layers.Dense( units=400, kernel_regularizer=regularizers.l2(kernel_l2_reg))(net_states) net_states = layers.BatchNormalization()(net_states) net_states = layers.LeakyReLU(1e-2)(net_states) # Add hidden layer(s) for action pathway net_actions = layers.Dense( units=400, kernel_regularizer=regularizers.l2(kernel_l2_reg))(actions) net_actions = layers.BatchNormalization()(net_actions) net_actions = layers.LeakyReLU(1e-2)(net_actions) # Merge state and action pathways net = layers.add([net_states, net_actions]) net = layers.Dense( units=200, kernel_regularizer=regularizers.l2(kernel_l2_reg))(net) net = layers.BatchNormalization()(net) net = layers.LeakyReLU(1e-2)(net) # Add final output layer to prduce action values (Q values) Q_values = layers.Dense( units=1, activation=None, kernel_regularizer=regularizers.l2(kernel_l2_reg), kernel_initializer=initializers.RandomUniform(minval=-5e-3, maxval=5e-3), # bias_initializer=initializers.RandomUniform(minval=-3e-3, maxval=3e-3), name='q_values')(net) # Create Keras model self.model = models.Model(inputs=[states, actions], outputs=Q_values) # Define optimizer and compile model for training with built-in loss function optimizer = optimizers.Adam(lr=1e-2) self.model.compile(optimizer=optimizer, loss='mse') # Compute action gradients (derivative of Q values w.r.t. to actions) action_gradients = K.gradients(Q_values, actions) # Define an additional function to fetch action gradients (to be used by actor model) self.get_action_gradients = K.function( inputs=[*self.model.input, K.learning_phase()], outputs=action_gradients)
def Attention_ResUNet_PA(dropout_rate=0.0, batch_norm=True): ''' Rsidual UNet construction, with attention gate convolution: 3*3 SAME padding pooling: 2*2 VALID padding upsampling: 3*3 VALID padding final convolution: 1*1 :param dropout_rate: FLAG & RATE of dropout. if < 0 dropout cancelled, if > 0 set as the rate :param batch_norm: flag of if batch_norm used, if True batch normalization :return: model ''' # input data # dimension of the image depth inputs = layers.Input((INPUT_SIZE, INPUT_SIZE, INPUT_CHANNEL), dtype=tf.float32) axis = 3 # Downsampling layers # DownRes 1, double residual convolution + pooling conv_128 = double_conv_layer(inputs, FILTER_SIZE, FILTER_NUM, dropout_rate, batch_norm) pool_64 = layers.MaxPooling2D(pool_size=(2, 2))(conv_128) # DownRes 2 conv_64 = double_conv_layer(pool_64, FILTER_SIZE, 2 * FILTER_NUM, dropout_rate, batch_norm) pool_32 = layers.MaxPooling2D(pool_size=(2, 2))(conv_64) # DownRes 3 conv_32 = double_conv_layer(pool_32, FILTER_SIZE, 4 * FILTER_NUM, dropout_rate, batch_norm) pool_16 = layers.MaxPooling2D(pool_size=(2, 2))(conv_32) # DownRes 4 conv_16 = double_conv_layer(pool_16, FILTER_SIZE, 8 * FILTER_NUM, dropout_rate, batch_norm) pool_8 = layers.MaxPooling2D(pool_size=(2, 2))(conv_16) # DownRes 5, convolution only conv_8 = double_conv_layer(pool_8, FILTER_SIZE, 16 * FILTER_NUM, dropout_rate, batch_norm) # Upsampling layers # UpRes 6, attention gated concatenation + upsampling + double residual convolution # channel attention block se_conv_16 = SE_block(conv_16, out_dim=8 * FILTER_NUM, ratio=SE_RATIO, name='att_16') # spatial attention block gating_16 = gating_signal(conv_8, 8 * FILTER_NUM, batch_norm) att_16 = attention_block(se_conv_16, gating_16, 8 * FILTER_NUM, name='att_16') # attention re-weight & concatenate up_16 = layers.UpSampling2D(size=(UP_SAMP_SIZE, UP_SAMP_SIZE), data_format="channels_last")(conv_8) up_16 = layers.concatenate([up_16, att_16], axis=axis) up_conv_16 = double_conv_layer(up_16, FILTER_SIZE, 8 * FILTER_NUM, dropout_rate, batch_norm) # UpRes 7 # channel attention block se_conv_32 = SE_block(conv_32, out_dim=4 * FILTER_NUM, ratio=SE_RATIO, name='att_32') # spatial attention block gating_32 = gating_signal(up_conv_16, 4 * FILTER_NUM, batch_norm) att_32 = attention_block(se_conv_32, gating_32, 4 * FILTER_NUM, name='att_32') # attention re-weight & concatenate up_32 = layers.UpSampling2D(size=(UP_SAMP_SIZE, UP_SAMP_SIZE), data_format="channels_last")(up_conv_16) up_32 = layers.concatenate([up_32, att_32], axis=axis) up_conv_32 = double_conv_layer(up_32, FILTER_SIZE, 4 * FILTER_NUM, dropout_rate, batch_norm) # UpRes 8 # channel attention block se_conv_64 = SE_block(conv_64, out_dim=2 * FILTER_NUM, ratio=SE_RATIO, name='att_64') # spatial attention block gating_64 = gating_signal(up_conv_32, 2 * FILTER_NUM, batch_norm) att_64 = attention_block(se_conv_64, gating_64, 2 * FILTER_NUM, name='att_64') # attention re-weight & concatenate up_64 = layers.UpSampling2D(size=(UP_SAMP_SIZE, UP_SAMP_SIZE), data_format="channels_last")(up_conv_32) up_64 = layers.concatenate([up_64, att_64], axis=axis) up_conv_64 = double_conv_layer(up_64, FILTER_SIZE, 2 * FILTER_NUM, dropout_rate, batch_norm) # UpRes 9 # channel attention block se_conv_128 = SE_block(conv_128, out_dim=FILTER_NUM, ratio=SE_RATIO, name='att_128') # spatial attention block gating_128 = gating_signal(up_conv_64, FILTER_NUM, batch_norm) # attention re-weight & concatenate att_128 = attention_block(se_conv_128, gating_128, FILTER_NUM, name='att_128') up_128 = layers.UpSampling2D(size=(UP_SAMP_SIZE, UP_SAMP_SIZE), data_format="channels_last")(up_conv_64) up_128 = layers.concatenate([up_128, att_128], axis=axis) up_conv_128 = double_conv_layer(up_128, FILTER_SIZE, FILTER_NUM, dropout_rate, batch_norm) # 1*1 convolutional layers # valid padding # batch normalization # sigmoid nonlinear activation conv_final = layers.Conv2D(OUTPUT_MASK_CHANNEL, kernel_size=(1, 1))(up_conv_128) conv_final = layers.BatchNormalization(axis=axis)(conv_final) conv_final = layers.Activation('relu')(conv_final) # Model integration model = models.Model(inputs, conv_final, name="AttentionSEResUNet") return model
def UNet_PA(dropout_rate=0.0, batch_norm=True): ''' UNet construction convolution: 3*3 SAME padding pooling: 2*2 VALID padding upsampling: 3*3 VALID padding final convolution: 1*1 :param dropout_rate: FLAG & RATE of dropout. if < 0 dropout cancelled, if > 0 set as the rate :param batch_norm: flag of if batch_norm used, if True batch normalization :return: UNet model for PACT recons ''' # input data # dimension of the image depth inputs = layers.Input((INPUT_SIZE, INPUT_SIZE, INPUT_CHANNEL)) axis = 3 # Subsampling layers # double layer 1, convolution + pooling conv_128 = double_conv_layer(inputs, FILTER_SIZE, INPUT_SIZE, dropout_rate, batch_norm) pool_64 = layers.MaxPooling2D(pool_size=(2, 2))(conv_128) # double layer 2 conv_64 = double_conv_layer(pool_64, 2 * FILTER_SIZE, INPUT_SIZE, dropout_rate, batch_norm) pool_32 = layers.MaxPooling2D(pool_size=(2, 2))(conv_64) # double layer 3 conv_32 = double_conv_layer(pool_32, 4 * FILTER_SIZE, INPUT_SIZE, dropout_rate, batch_norm) pool_16 = layers.MaxPooling2D(pool_size=(2, 2))(conv_32) # double layer 4 conv_16 = double_conv_layer(pool_16, 8 * FILTER_SIZE, INPUT_SIZE, dropout_rate, batch_norm) pool_8 = layers.MaxPooling2D(pool_size=(2, 2))(conv_16) # double layer 5, convolution only conv_8 = double_conv_layer(pool_8, 16 * FILTER_SIZE, INPUT_SIZE, dropout_rate, batch_norm) # Upsampling layers # double layer 6, upsampling + concatenation + convolution up_16 = layers.UpSampling2D(size=(UP_SAMP_SIZE, UP_SAMP_SIZE), data_format="channels_last")(conv_8) up_16 = layers.concatenate([up_16, conv_16], axis=axis) up_conv_16 = double_conv_layer(up_16, 8 * FILTER_SIZE, INPUT_SIZE, dropout_rate, batch_norm) # double layer 7 up_32 = layers.concatenate([ layers.UpSampling2D(size=(UP_SAMP_SIZE, UP_SAMP_SIZE), data_format="channels_last")(up_conv_16), conv_32 ], axis=axis) up_conv_32 = double_conv_layer(up_32, 4 * FILTER_SIZE, INPUT_SIZE, dropout_rate, batch_norm) # double layer 8 up_64 = layers.concatenate([ layers.UpSampling2D(size=(UP_SAMP_SIZE, UP_SAMP_SIZE), data_format="channels_last")(up_conv_32), conv_64 ], axis=axis) up_conv_64 = double_conv_layer(up_64, 2 * FILTER_SIZE, INPUT_SIZE, dropout_rate, batch_norm) # double layer 9 up_128 = layers.concatenate([ layers.UpSampling2D(size=(UP_SAMP_SIZE, UP_SAMP_SIZE), data_format="channels_last")(up_conv_64), conv_128 ], axis=axis) up_conv_128 = double_conv_layer(up_128, FILTER_SIZE, INPUT_SIZE, dropout_rate, batch_norm) # 1*1 convolutional layers # valid padding # batch normalization # sigmoid nonlinear activation conv_final = layers.Conv2D(OUTPUT_MASK_CHANNEL, kernel_size=(1, 1))(up_conv_128) conv_final = layers.BatchNormalization(axis=axis)(conv_final) conv_final = layers.Activation('sigmoid')(conv_final) # Model integration model = models.Model(inputs, conv_final, name="UNet") return model
import tensorflow as tf from tensorflow.contrib.keras import layers import numpy as np if __name__ == '__main__': input1_ = layers.Input(shape=(2, 2,3), name='input1') input2_ = layers.Input(shape=(2, 2,3), name='input2') y = layers.Concatenate()([input1_, input2_]) model = tf.keras.Model(inputs=[input1_, input2_], outputs=y) model.summary() # 产生训练数据 x1 = np.random.rand(1, 2, 2,3) print(x1) print('\n') x2 = np.random.rand(1, 2, 2,3) print(x2) print('\n') result = model.predict([x1, x2], batch_size=1) print(result) # print(result.reshape((6, 6)))
def create_network(**kwargs): model_input = L.Input(shape=(17, 9, 9)) print model_input convolution_path = L.Convolution2D( input_shape=(), filters=64, kernel_size=3, activation='linear', padding='same', kernel_regularizer=R.l2(.0001), bias_regularizer=R.l2(.0001))(model_input) print convolution_path convolution_path = L.BatchNormalization( beta_regularizer=R.l2(.0001), gamma_regularizer=R.l2(.0001))(convolution_path) print convolution_path convolution_path = L.Activation('relu')(convolution_path) convolution_path = L.Convolution2D( input_shape=(), filters=128, kernel_size=3, activation='linear', padding='same', kernel_regularizer=R.l2(.0001), bias_regularizer=R.l2(.0001))(convolution_path) print convolution_path convolution_path = L.BatchNormalization( beta_regularizer=R.l2(.0001), gamma_regularizer=R.l2(.0001))(convolution_path) print convolution_path convolution_path = L.Activation('relu')(convolution_path) print '------------- value -------------------' # policy head policy_path = L.Convolution2D( input_shape=(), filters=2, kernel_size=1, activation='linear', padding='same', kernel_regularizer=R.l2(.0001), bias_regularizer=R.l2(.0001))(convolution_path) print policy_path policy_path = L.BatchNormalization( beta_regularizer=R.l2(.0001), gamma_regularizer=R.l2(.0001))(policy_path) policy_path = L.Activation('relu')(policy_path) print policy_path policy_path = L.Flatten()(policy_path) print policy_path policy_path = L.Dense((9 * 9) + 1, kernel_regularizer=R.l2(.0001), bias_regularizer=R.l2(.0001))(policy_path) policy_output = L.Activation('softmax')(policy_path) print 'policy_output', policy_output print '------------- policy -------------------' # value head value_path = L.Convolution2D( input_shape=(), filters=1, kernel_size=1, activation='linear', padding='same', kernel_regularizer=R.l2(.0001), bias_regularizer=R.l2(.0001))(convolution_path) print value_path value_path = L.BatchNormalization( beta_regularizer=R.l2(.0001), gamma_regularizer=R.l2(.0001))(value_path) value_path = L.Activation('relu')(value_path) print value_path value_path = L.Flatten()(value_path) print value_path value_path = L.Dense(256, kernel_regularizer=R.l2(.0001), bias_regularizer=R.l2(.0001))(value_path) print value_path value_path = L.Activation('relu')(value_path) print value_path value_path = L.Dense(1, kernel_regularizer=R.l2(.0001), bias_regularizer=R.l2(.0001))(value_path) print value_path value_output = L.Activation('tanh')(value_path) print value_path return M.Model(inputs=[model_input], outputs=[policy_output, value_output])
return x # Basic #sz_ly0_filters, nb_ly0_filters, nb_ly0_stride = (64,5,2) #sz_res_filters, nb_res_filters, nb_res_stages = (3,16,3) # 92% of accuracy sz_ly0_filters, nb_ly0_filters, nb_ly0_stride = (128,3,2) sz_res_filters, nb_res_filters, nb_res_stages = (3,32,25) img_input = layers.Input(shape=(32,32,3), name='cifar') # Initial layers x = layers.Conv2D(sz_ly0_filters, (nb_ly0_filters,nb_ly0_filters), strides=(nb_ly0_stride, nb_ly0_stride), padding='same', kernel_initializer='glorot_normal', kernel_regularizer=regularizers.l2(1.e-4), use_bias=False, name='conv0')(img_input) x = layers.BatchNormalization(axis=-1, name='bn0')(x) x = layers.Activation('relu', name='relu0')(x) # Resnet layers for stage in range(1, nb_res_stages+1): x = residual_layer(x, nb_in_filters=sz_ly0_filters,
def build_model(self): """Build a critic (value) network that maps (state, action) pairs -> Q-values.""" # Define input layers states = layers.Input(shape=(self.state_size, ), name='states') actions = layers.Input(shape=(self.action_size, ), name='actions') # Add hidden layer(s) for state pathway net_states = layers.Dense( units=32, activation='relu', use_bias=False, kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l1(0.01))(states) net_states = layers.BatchNormalization()(net_states) net_states = layers.LeakyReLU(1e-2)(net_states) net_states = layers.Dense( units=64, activation='relu', use_bias=False, kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l1(0.01))(net_states) net_states = layers.BatchNormalization()(net_states) net_states = layers.LeakyReLU(1e-2)(net_states) net_states = layers.Dense( units=128, activation='relu', use_bias=False, kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l1(0.01))(net_states) net_states = layers.BatchNormalization()(net_states) net_states = layers.LeakyReLU(1e-2)(net_states) # Add hidden layer(s) for action pathway net_actions = layers.Dense( units=32, activation='relu', use_bias=False, kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l1(0.01))(actions) net_actions = layers.BatchNormalization()(net_actions) net_actions = layers.LeakyReLU(1e-2)(net_actions) net_actions = layers.Dense( units=64, activation='relu', use_bias=False, kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l1(0.01))(net_actions) net_actions = layers.BatchNormalization()(net_actions) net_actions = layers.LeakyReLU(1e-2)(net_actions) net_actions = layers.Dense( units=128, activation='relu', use_bias=False, kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l1(0.01))(net_actions) net_actions = layers.BatchNormalization()(net_actions) net_actions = layers.LeakyReLU(1e-2)(net_actions) # Try different layer sizes, activations, add batch normalization, regularizers, etc. # Combine state and action pathways net = layers.Add()([net_states, net_actions]) net = layers.Activation('relu')(net) # Add more layers to the combined network if needed # Add final output layer to prduce action values (Q values) Q_values = layers.Dense(units=1, name='q_values')(net) # Create Keras model self.model = models.Model(inputs=[states, actions], outputs=Q_values) # Define optimizer and compile model for training with built-in loss function optimizer = optimizers.Adam() self.model.compile(optimizer=optimizer, loss='mse') # Compute action gradients (derivative of Q values w.r.t. to actions) action_gradients = K.gradients(Q_values, actions) # Define an additional function to fetch action gradients (to be used by actor model) self.get_action_gradients = K.function( inputs=[*self.model.input, K.learning_phase()], outputs=action_gradients)
def build_model(self): # Define input layers input_states = layers.Input(shape=(self.state_size, ), name='input_states') input_actions = layers.Input(shape=(self.action_size, ), name='input_actions') #---------- copy from DDPG quadcopter --------- # Add hidden layer(s) for state pathway net_states = layers.Dense(units=400)(input_states) # net_states = layers.BatchNormalization()(net_states) net_states = layers.Activation("relu")(net_states) net_states = layers.Dense(units=300)(net_states) net_states = layers.Activation("relu")(net_states) # Add hidden layer(s) for action pathway net_actions = layers.Dense(units=300)(input_actions) net_actions = layers.Activation("relu")(net_actions) # net_actions = layers.Dense(units=250,kernel_regularizer=regularizers.l2(1e-7))(net_actions) # net_actions = layers.BatchNormalization()(net_actions) # net_actions = layers.Activation("relu")(net_actions) # Combine state and action pathways net = layers.Add()([net_states, net_actions]) net = layers.Activation('relu')(net) net = layers.Dense(units=200, kernel_initializer=initializers.RandomUniform( minval=-0.5, maxval=0.5))(net) net = layers.Activation('relu')(net) # Add final output layer to prduce action values (Q values) Q_values = layers.Dense(units=1, name='q_values')(net) # ---------------- Hidden layers for states ---------------- # model_states = layers.Dense(units=32, activation=activations.sigmoid)(input_states) # # model_states = layers.BatchNormalization()(model_states) # model_states = layers.Dense(units=16, activation=activations.sigmoid)(model_states) # # model_states = layers.BatchNormalization()(model_states) # # model_states = layers.Dense(units=64)(model_states) # # model_states = layers.BatchNormalization()(model_states) # # ---------------- Hidden layers for actions ---------------- # model_actions = layers.Dense(units=16, activation=activations.sigmoid)(input_actions) # # model_actions = layers.BatchNormalization()(model_actions) # model_actions = layers.Dense(units=16, activation=activations.sigmoid)(model_actions) # # model_actions = layers.BatchNormalization()(model_actions) # # Both models merge here # model = layers.add([model_states, model_actions]) # # Fully connected and batch normalization # model = layers.Dense(units=8, activation=activations.sigmoid)(model) # # model = layers.BatchNormalization()(model) # # model = layers.Dense(units=64, activation=activations.relu)(model) # # model = layers.BatchNormalization()(model) # # Q values / output layer # Q_values = layers.Dense(units=1, name='Q_s_a')(model) # # model = layers.BatchNormalization()(model) # Keras wrap the model self.model = models.Model(inputs=[input_states, input_actions], outputs=Q_values) optimizer = optimizers.Adam(lr=0.0001) self.model.compile(optimizer=optimizer, loss='mse') action_gradients = K.gradients(Q_values, input_actions) self.get_action_gradients = K.function( inputs=[*self.model.input, K.learning_phase()], outputs=action_gradients)
def create_network(**kwargs): """construct a convolutional neural network with Residual blocks. Arguments are the same as with the default CNNPolicy network, except the default number of layers is 20 plus a new n_skip parameter Keword Arguments: - input_dim: depth of features to be processed by first layer (default 17) - board: width of the go board to be processed (default 19) - filters_per_layer: number of filters used on every layer (default 256) - layers: number of residual blocks (default 19) - filter_width: width of filter Must be odd. """ defaults = { "input_dim": 17, "board": 9, "filters_per_layer": 64, "layers": 9, "filter_width": 3 } # copy defaults, but override with anything in kwargs params = defaults params.update(kwargs) # create the network using Keras' functional API, model_input = L.Input(shape=(params["input_dim"], params["board"], params["board"])) print model_input # create first layer convolution_path = L.Convolution2D( input_shape=(), filters=params["filters_per_layer"], kernel_size=params["filter_width"], activation='linear', padding='same', kernel_regularizer=R.l2(.0001), bias_regularizer=R.l2(.0001))(model_input) print convolution_path convolution_path = L.BatchNormalization( beta_regularizer=R.l2(.0001), gamma_regularizer=R.l2(.0001))(convolution_path) print convolution_path convolution_path = L.Activation('relu')(convolution_path) def add_resnet_unit(path, **params): block_input = path # add Conv2D path = L.Convolution2D( filters=params["filters_per_layer"], kernel_size=params["filter_width"], activation='linear', padding='same', kernel_regularizer=R.l2(.0001), bias_regularizer=R.l2(.0001))(path) print path path = L.BatchNormalization( beta_regularizer=R.l2(.0001), gamma_regularizer=R.l2(.0001))(path) print path path = L.Activation('relu')(path) print path path = L.Convolution2D( filters=params["filters_per_layer"], kernel_size=params["filter_width"], activation='linear', padding='same', kernel_regularizer=R.l2(.0001), bias_regularizer=R.l2(.0001))(path) print path path = L.BatchNormalization( beta_regularizer=R.l2(.0001), gamma_regularizer=R.l2(.0001))(path) print path path = L.Add()([block_input, path]) print path path = L.Activation('relu')(path) print path return path # create all other layers for _ in range(params['layers']): convolution_path = add_resnet_unit(convolution_path, **params) print '------------- policy -------------------' # policy head policy_path = L.Convolution2D( input_shape=(), filters=2, kernel_size=1, activation='linear', padding='same', kernel_regularizer=R.l2(.0001), bias_regularizer=R.l2(.0001))(convolution_path) print policy_path policy_path = L.BatchNormalization( beta_regularizer=R.l2(.0001), gamma_regularizer=R.l2(.0001))(policy_path) policy_path = L.Activation('relu')(policy_path) print policy_path policy_path = L.Flatten()(policy_path) print policy_path policy_path = L.Dense( params["board"]*params["board"]+1, kernel_regularizer=R.l2(.0001), bias_regularizer=R.l2(.0001))(policy_path) policy_output = L.Activation('softmax')(policy_path) print 'policy_output', policy_output print '-------------value -------------------' # value head value_path = L.Convolution2D( input_shape=(), filters=1, kernel_size=1, activation='linear', padding='same', kernel_regularizer=R.l2(.0001), bias_regularizer=R.l2(.0001))(convolution_path) print value_path value_path = L.BatchNormalization( beta_regularizer=R.l2(.0001), gamma_regularizer=R.l2(.0001))(value_path) value_path = L.Activation('relu')(value_path) print value_path value_path = L.Flatten()(value_path) print value_path value_path = L.Dense( 256, kernel_regularizer=R.l2(.0001), bias_regularizer=R.l2(.0001))(value_path) print value_path value_path = L.Activation('relu')(value_path) print value_path value_path = L.Dense( 1, kernel_regularizer=R.l2(.0001), bias_regularizer=R.l2(.0001))(value_path) print value_path value_output = L.Activation('tanh')(value_path) print value_path return M.Model(inputs=[model_input], outputs=[policy_output, value_output])
def VanillaUnet(num_class, img_shape): concat_axis = 3 # input inputs = layers.Input(shape=img_shape) # Unet convolution block 1 conv1 = layers.Conv2D(32, (3, 3), activation='relu', padding='same', name='conv1_1')(inputs) conv1 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(conv1) pool1 = layers.MaxPooling2D(pool_size=(2, 2))(conv1) # Unet convolution block 2 conv2 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(pool1) conv2 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(conv2) pool2 = layers.MaxPooling2D(pool_size=(2, 2))(conv2) # Unet convolution block 3 conv3 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(pool2) conv3 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(conv3) pool3 = layers.MaxPooling2D(pool_size=(2, 2))(conv3) # Unet convolution block 4 conv4 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(pool3) conv4 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(conv4) pool4 = layers.MaxPooling2D(pool_size=(2, 2))(conv4) # Unet convolution block 5 conv5 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(pool4) conv5 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(conv5) # Unet up-sampling block 1; Concatenation with crop_conv4 up_conv5 = layers.UpSampling2D(size=(2, 2))(conv5) ch, cw = get_crop_shape(conv4, up_conv5) crop_conv4 = layers.Cropping2D(cropping=(ch, cw))(conv4) up6 = layers.concatenate([up_conv5, crop_conv4], axis=concat_axis) conv6 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(up6) conv6 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(conv6) # Unet up-sampling block 2; Concatenation with crop_conv3 up_conv6 = layers.UpSampling2D(size=(2, 2))(conv6) ch, cw = get_crop_shape(conv3, up_conv6) crop_conv3 = layers.Cropping2D(cropping=(ch, cw))(conv3) up7 = layers.concatenate([up_conv6, crop_conv3], axis=concat_axis) conv7 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(up7) conv7 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(conv7) # Unet up-sampling block 3; Concatenation with crop_conv2 up_conv7 = layers.UpSampling2D(size=(2, 2))(conv7) ch, cw = get_crop_shape(conv2, up_conv7) crop_conv2 = layers.Cropping2D(cropping=(ch, cw))(conv2) up8 = layers.concatenate([up_conv7, crop_conv2], axis=concat_axis) conv8 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(up8) conv8 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(conv8) # Unet up-sampling block 4; Concatenation with crop_conv1 up_conv8 = layers.UpSampling2D(size=(2, 2))(conv8) ch, cw = get_crop_shape(conv1, up_conv8) crop_conv1 = layers.Cropping2D(cropping=(ch, cw))(conv1) up9 = layers.concatenate([up_conv8, crop_conv1], axis=concat_axis) conv9 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(up9) conv9 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(conv9) ch, cw = get_crop_shape(inputs, conv9) conv9 = layers.ZeroPadding2D(padding=((ch[0], ch[1]), (cw[0], cw[1])))(conv9) conv10 = layers.Conv2D(num_class, (1, 1))(conv9) model = models.Model(inputs=inputs, outputs=conv10) return model