def rnn_conv_block(self, n_filter, kernel_size, strides, x, conv_type=Conv2D, drop=True, flatten=False, batchnorm=True, **kwargs): x = TD( conv_type( n_filter, kernel_size=kernel_size, strides=strides, use_bias=self.use_bias, padding=self.padding, kernel_regularizer=l1_l2(self.regularizer[0], self.regularizer[1]), bias_regularizer=l1_l2(self.regularizer[0], self.regularizer[1]), ), **kwargs)(x) if batchnorm: x = TD(BatchNormalization())(x) x = TD(Activation(self.prev_act))(x) if drop: x = TD(Dropout(self.drop_rate))(x) if flatten: x = TD(Flatten())(x) return x
def rnn_dense_block(self, n_neurones, x, drop=True, batchnorm=True): x = TD(Dense(n_neurones, use_bias=self.use_bias))(x) if batchnorm: x = TD(BatchNormalization())(x) x = TD(Activation(self.prev_act))(x) if drop: x = TD(Dropout(self.drop_rate))(x) return x
def build_rnn_distinct_models(env: gym.Env, bs: int = 1): """Build simple policy and value models having an rnnexperiments before their heads.""" continuous_control = isinstance(env.action_space, Box) state_dimensionality, n_actions = env_extract_dims(env) inputs = tf.keras.Input(batch_shape=( bs, None, state_dimensionality, )) masked = tf.keras.layers.Masking()(inputs) # policy network x = TD(_build_encoding_sub_model((state_dimensionality, ), bs, layer_sizes=(64, ), name="policy_encoder"), name="TD_policy")(masked) x.set_shape([bs] + x.shape[1:]) x = tf.keras.layers.SimpleRNN(64, stateful=True, return_sequences=True, batch_size=bs, name="policy_recurrent_layer")(x) if continuous_control: out_policy = _build_continuous_head(n_actions, x.shape[1:], bs)(x) else: out_policy = _build_discrete_head(n_actions, x.shape[1:], bs)(x) # value network x = TD(_build_encoding_sub_model((state_dimensionality, ), bs, layer_sizes=(64, ), name="value_encoder"), name="TD_value")(masked) x.set_shape([bs] + x.shape[1:]) x = tf.keras.layers.SimpleRNN(64, stateful=True, return_sequences=True, batch_size=bs, name="value_recurrent_layer")(x) out_value = tf.keras.layers.Dense(1)(x) policy = tf.keras.Model(inputs=inputs, outputs=out_policy, name="simple_rnn_policy") value = tf.keras.Model(inputs=inputs, outputs=out_value, name="simple_rnn_value") return policy, value, tf.keras.Model(inputs=inputs, outputs=[out_policy, out_value], name="simple_rnn")
def build(self): inputs = [] outputs = [] input_components_names = self.dataset.indexes2components_names( self.input_components) output_components_names = self.dataset.indexes2components_names( self.output_components) inp = Input(shape=self.img_shape) inputs.append(inp) x = self.rnn_conv_block(12, 5, 2, inp, drop=True, name="start_fe") x = self.rnn_conv_block(16, 5, 2, x, drop=True) x = self.rnn_conv_block(32, 3, 2, x, drop=True) x = self.rnn_conv_block(48, 3, 2, x, drop=True, name="end_fe") y1 = self.rnn_conv_block(64, (8, 10), (8, 10), x, flatten=True, drop=False) y2 = self.rnn_conv_block(24, (8, 1), (8, 1), x, flatten=True, drop=False) y3 = self.rnn_conv_block(24, (1, 10), (1, 10), x, flatten=True, drop=False) y = Concatenate()([y1, y2, y3]) y = TD(Dropout(self.drop_rate))(y) y = self.rnn_dense_block(150, y, batchnorm=False) y = self.rnn_dense_block(75, y, batchnorm=False) if "speed" in input_components_names: inp = Input((self.img_shape[0], 1)) inputs.append(inp) y = Concatenate()([y, inp]) y = self.rnn_dense_block(50, y, batchnorm=False, drop=False) if "direction" in input_components_names: z = Dense(1, use_bias=self.use_bias, activation=self.last_act, name="direction")(y) # kernel_regularizer=l2(0.0005) outputs.append(z) y = Concatenate()([y, z]) if "throttle" in output_components_names: th = Dense(1, use_bias=self.use_bias, activation="sigmoid", name="throttle")(y) outputs.append(th) y = Concatenate()([y, th]) return Model(inputs, outputs)
def build_model_cnnedit3(self): img_channels = self.img_channels img_rows = self.img_rows img_cols = self.img_cols color_channels = self.color_channels model = Sequential() model.add( Input(shape=(img_channels, img_rows, img_cols, color_channels), name="img_in")) # (batchsize, 4, 80, 80, 1) model.add( TD( Conv2D(16, (5, 5), strides=(2, 2), padding="same", activation='relu'))) model.add( TD( Conv2D(32, (5, 5), strides=(2, 2), padding="same", activation='relu'))) model.add( TD( Conv2D(64, (5, 5), strides=(2, 2), padding="same", activation='relu'))) model.add( TD( Conv2D(128, (3, 3), strides=(2, 2), padding="same", activation='relu'))) model.add( TD( Conv2D(256, (3, 3), strides=(1, 1), padding="same", activation='relu'))) model.add(TD( GlobalAveragePooling2D())) # replace the CNN model with a custom one model.add(Flatten()) model.add(Dense(128, activation='relu')) # 15 categorical bins for Steering angles model.add(Dense(self.action_size, activation="linear")) adam = Adam(lr=self.learning_rate) model.compile(loss="mse", optimizer=adam) return model
def build_model_cnnedit4_lstm(self): img_channels = self.img_channels img_rows = self.img_rows img_cols = self.img_cols color_channels = self.color_channels img_seq_shape = (img_channels, img_rows, img_cols, color_channels) model = Sequential() model.add(Input(shape=img_seq_shape, name='img_in')) # 4*80*80*1 model.add( Conv3D(16, (3, 5, 5), strides=(1, 2, 2), padding="same", activation='relu')) model.add( Conv3D(32, (3, 5, 5), strides=(1, 2, 2), padding="same", activation='relu')) model.add( Conv3D(64, (1, 5, 5), strides=(1, 2, 2), padding="same", activation='relu')) model.add( Conv3D(128, (1, 3, 3), strides=(1, 2, 2), padding="same", activation='relu')) model.add( Conv3D(256, (1, 3, 3), strides=(1, 1, 1), padding="same", activation='relu')) model.add(TD(GlobalAveragePooling2D())) model.add(LSTM(128, return_sequences=False, name="LSTM_fin")) model.add(Dense(128, activation='relu')) model.add( Dense(self.action_size, activation='linear', name='model_outputs')) adam = Adam(lr=self.learning_rate) model.compile(loss="mse", optimizer=adam) return model
def build_model_transfer_mobilenetv2_lstm(self): if self.img_cols != 96: self.img_cols = 96 if self.img_rows != 96: self.img_rows = 96 img_channels = self.img_channels model = Sequential() model.add(Input(shape=(img_channels, 3, 3, 1280), name='features_in')) # 4*80*80 model.add(TD(GlobalAveragePooling2D())) model.add(LSTM(128, return_sequences=False, name="LSTM_fin")) model.add(Dense(128, activation='relu')) model.add( Dense(self.action_size, activation='linear', name='model_outputs')) adam = Adam(lr=self.learning_rate) model.compile(loss="mse", optimizer=adam) return model
def build_shadow_brain_v1(env: gym.Env, distribution: BasePolicyDistribution, bs: int, model_type: str = "rnn", **kwargs): """Build network for the shadow hand task.""" state_dimensionality, n_actions = env_extract_dims(env) hidden_dimensions = 32 rnn_choice = { "rnn": tf.keras.layers.SimpleRNN, "lstm": tf.keras.layers.LSTM, "gru": tf.keras.layers.GRU }[model_type] # inputs visual_in = tf.keras.Input(batch_shape=(bs, None, VISION_WH, VISION_WH, 3), name="visual_input") proprio_in = tf.keras.Input(batch_shape=( bs, None, 48, ), name="proprioceptive_input") touch_in = tf.keras.Input(batch_shape=( bs, None, 92, ), name="somatosensory_input") goal_in = tf.keras.Input(batch_shape=( bs, None, 7, ), name="goal_input") # abstractions of perceptive inputs visual_latent = TD( _build_visual_encoder(shape=(VISION_WH, VISION_WH, 3), batch_size=bs))(visual_in) proprio_latent = TD( _build_fcn_component(48, 12, 8, batch_size=bs, name="latent_proprio"))(proprio_in) touch_latent = TD( _build_fcn_component(92, 24, 8, batch_size=bs, name="latent_touch"))(touch_in) # concatenation of perceptive abstractions concatenation = tf.keras.layers.Concatenate()( [visual_latent, proprio_latent, touch_latent]) # fully connected layer integrating perceptive representations x = TD(tf.keras.layers.Dense(48))(concatenation) x = TD(tf.keras.layers.ReLU())(x) # concatenation of goal and perception x.set_shape([bs] + x.shape[1:]) x = tf.keras.layers.Concatenate()([x, goal_in]) # recurrent layer o = rnn_choice(hidden_dimensions, stateful=True, return_sequences=True, batch_size=bs)(x) # output heads policy_out = distribution.build_action_head(n_actions, o.shape[1:], bs)(o) value_out = tf.keras.layers.Dense(1, name="value")(o) # define models policy = tf.keras.Model(inputs=[visual_in, proprio_in, touch_in, goal_in], outputs=[policy_out], name="shadow_brain_v1_policy") value = tf.keras.Model(inputs=[visual_in, proprio_in, touch_in, goal_in], outputs=[value_out], name="shadow_brain_v1_value") joint = tf.keras.Model(inputs=[visual_in, proprio_in, touch_in, goal_in], outputs=[policy_out, value_out], name="shadow_brain_v1") return policy, value, joint
def build_shadow_brain_v2(env: gym.Env, distribution: BasePolicyDistribution, bs: int, **kwargs): """Build network for the shadow hand task, version 2.""" state_dimensionality, n_actions = env_extract_dims(env) hidden_dimensions = 32 # inputs visual_in = tf.keras.Input(batch_shape=(bs, None, VISION_WH, VISION_WH, 3), name="visual_input") proprio_in = tf.keras.Input(batch_shape=( bs, None, 48, ), name="proprioceptive_input") touch_in = tf.keras.Input(batch_shape=( bs, None, 92, ), name="somatosensory_input") goal_in = tf.keras.Input(batch_shape=( bs, None, 7, ), name="goal_input") # abstractions of perceptive inputs visual_latent = TD( _build_visual_encoder(shape=(VISION_WH, VISION_WH, 3), batch_size=bs))(visual_in) visual_latent = TD(tf.keras.layers.Dense(128))(visual_latent) visual_latent = TD(tf.keras.layers.ReLU())(visual_latent) visual_latent.set_shape([bs] + visual_latent.shape[1:]) visual_plus_goal = tf.keras.layers.Concatenate()([visual_latent, goal_in]) eigengrasps = TD(tf.keras.layers.Dense(20))(visual_plus_goal) eigengrasps = TD(tf.keras.layers.ReLU())(eigengrasps) # concatenation of touch and proprioception proprio_touch = tf.keras.layers.Concatenate()([proprio_in, touch_in]) proprio_touch_latent = TD(tf.keras.layers.Dense(20))(proprio_touch) proprio_touch_latent = TD(tf.keras.layers.ReLU())(proprio_touch_latent) # concatenation of goal and perception proprio_touch_latent.set_shape([bs] + proprio_touch_latent.shape[1:]) eigengrasps.set_shape([bs] + eigengrasps.shape[1:]) x = tf.keras.layers.Concatenate()( [goal_in, eigengrasps, proprio_touch_latent]) # recurrent layer rnn_out = tf.keras.layers.GRU(hidden_dimensions, stateful=True, return_sequences=True, batch_size=bs)(x) # output heads policy_out = distribution.build_action_head(n_actions, rnn_out.shape[1:], bs)(rnn_out) value_out = tf.keras.layers.Dense(1, name="value")(rnn_out) # define models policy = tf.keras.Model(inputs=[visual_in, proprio_in, touch_in, goal_in], outputs=[policy_out], name="shadow_brain_v2_policy") value = tf.keras.Model(inputs=[visual_in, proprio_in, touch_in, goal_in], outputs=[value_out], name="shadow_brain_v2_value") joint = tf.keras.Model(inputs=[visual_in, proprio_in, touch_in, goal_in], outputs=[policy_out, value_out], name="shadow_brain_v2") return policy, value, joint
def rnn_lstm(seq_length=3, num_outputs=2, input_shape=(120, 160, 3)): # add sequence length dimensions as keras time-distributed expects shape # of (num_samples, seq_length, input_shape) img_seq_shape = (seq_length,) + input_shape img_in = Input(batch_shape=img_seq_shape, name='img_in') drop_out = 0.3 x = Sequential() x.add(TD(Convolution2D(24, (5,5), strides=(2,2), activation='relu'), input_shape=img_seq_shape)) x.add(TD(Dropout(drop_out))) x.add(TD(Convolution2D(32, (5, 5), strides=(2, 2), activation='relu'))) x.add(TD(Dropout(drop_out))) x.add(TD(Convolution2D(32, (3, 3), strides=(2, 2), activation='relu'))) x.add(TD(Dropout(drop_out))) x.add(TD(Convolution2D(32, (3, 3), strides=(1, 1), activation='relu'))) x.add(TD(Dropout(drop_out))) x.add(TD(MaxPooling2D(pool_size=(2, 2)))) x.add(TD(Flatten(name='flattened'))) x.add(TD(Dense(100, activation='relu'))) x.add(TD(Dropout(drop_out))) x.add(LSTM(128, return_sequences=True, name="LSTM_seq")) x.add(Dropout(.1)) x.add(LSTM(128, return_sequences=False, name="LSTM_fin")) x.add(Dropout(.1)) x.add(Dense(128, activation='relu')) x.add(Dropout(.1)) x.add(Dense(64, activation='relu')) x.add(Dense(10, activation='relu')) x.add(Dense(num_outputs, activation='linear', name='model_outputs')) return x
def build_rnn_models(env: gym.Env, distribution: BasePolicyDistribution, shared: bool = False, bs: int = 1, model_type: str = "rnn", layer_sizes: Tuple = (64, )): """Build simple policy and value models having a recurrent layer before their heads.""" state_dimensionality, n_actions = env_extract_dims(env) rnn_choice = { "rnn": tf.keras.layers.SimpleRNN, "lstm": tf.keras.layers.LSTM, "gru": tf.keras.layers.GRU }[model_type] inputs = tf.keras.Input(batch_shape=( bs, None, state_dimensionality, )) masked = tf.keras.layers.Masking()(inputs) # policy network x = TD(_build_encoding_sub_model((state_dimensionality, ), bs, layer_sizes=layer_sizes, name="policy_encoder"), name="TD_policy")(masked) x.set_shape([bs] + x.shape[1:]) x, *_ = rnn_choice(layer_sizes[-1], stateful=True, return_sequences=True, return_state=True, batch_size=bs, name="policy_recurrent_layer")(x) out_policy = distribution.build_action_head(n_actions, x.shape[1:], bs)(x) # value network if not shared: x = TD(_build_encoding_sub_model((state_dimensionality, ), bs, layer_sizes=layer_sizes, name="value_encoder"), name="TD_value")(masked) x.set_shape([bs] + x.shape[1:]) x, *_ = rnn_choice(layer_sizes[-1], stateful=True, return_sequences=True, return_state=True, batch_size=bs, name="value_recurrent_layer")(x) out_value = tf.keras.layers.Dense( 1, kernel_initializer=tf.keras.initializers.Orthogonal(1.0), bias_initializer=tf.keras.initializers.Constant(0.0))(x) else: out_value = tf.keras.layers.Dense( 1, input_dim=x.shape[1:], kernel_initializer=tf.keras.initializers.Orthogonal(1.0), bias_initializer=tf.keras.initializers.Constant(0.0))(x) policy = tf.keras.Model(inputs=inputs, outputs=out_policy, name="simple_rnn_policy") value = tf.keras.Model(inputs=inputs, outputs=out_value, name="simple_rnn_value") return policy, value, tf.keras.Model(inputs=inputs, outputs=[out_policy, out_value], name="simple_rnn")