示例#1
0
    def rnn_conv_block(self,
                       n_filter,
                       kernel_size,
                       strides,
                       x,
                       conv_type=Conv2D,
                       drop=True,
                       flatten=False,
                       batchnorm=True,
                       **kwargs):

        x = TD(
            conv_type(
                n_filter,
                kernel_size=kernel_size,
                strides=strides,
                use_bias=self.use_bias,
                padding=self.padding,
                kernel_regularizer=l1_l2(self.regularizer[0],
                                         self.regularizer[1]),
                bias_regularizer=l1_l2(self.regularizer[0],
                                       self.regularizer[1]),
            ), **kwargs)(x)

        if batchnorm:
            x = TD(BatchNormalization())(x)
        x = TD(Activation(self.prev_act))(x)
        if drop:
            x = TD(Dropout(self.drop_rate))(x)
        if flatten:
            x = TD(Flatten())(x)
        return x
示例#2
0
 def rnn_dense_block(self, n_neurones, x, drop=True, batchnorm=True):
     x = TD(Dense(n_neurones, use_bias=self.use_bias))(x)
     if batchnorm:
         x = TD(BatchNormalization())(x)
     x = TD(Activation(self.prev_act))(x)
     if drop:
         x = TD(Dropout(self.drop_rate))(x)
     return x
示例#3
0
def build_rnn_distinct_models(env: gym.Env, bs: int = 1):
    """Build simple policy and value models having an rnnexperiments before their heads."""
    continuous_control = isinstance(env.action_space, Box)
    state_dimensionality, n_actions = env_extract_dims(env)

    inputs = tf.keras.Input(batch_shape=(
        bs,
        None,
        state_dimensionality,
    ))
    masked = tf.keras.layers.Masking()(inputs)

    # policy network
    x = TD(_build_encoding_sub_model((state_dimensionality, ),
                                     bs,
                                     layer_sizes=(64, ),
                                     name="policy_encoder"),
           name="TD_policy")(masked)
    x.set_shape([bs] + x.shape[1:])
    x = tf.keras.layers.SimpleRNN(64,
                                  stateful=True,
                                  return_sequences=True,
                                  batch_size=bs,
                                  name="policy_recurrent_layer")(x)

    if continuous_control:
        out_policy = _build_continuous_head(n_actions, x.shape[1:], bs)(x)
    else:
        out_policy = _build_discrete_head(n_actions, x.shape[1:], bs)(x)

    # value network
    x = TD(_build_encoding_sub_model((state_dimensionality, ),
                                     bs,
                                     layer_sizes=(64, ),
                                     name="value_encoder"),
           name="TD_value")(masked)
    x.set_shape([bs] + x.shape[1:])
    x = tf.keras.layers.SimpleRNN(64,
                                  stateful=True,
                                  return_sequences=True,
                                  batch_size=bs,
                                  name="value_recurrent_layer")(x)

    out_value = tf.keras.layers.Dense(1)(x)

    policy = tf.keras.Model(inputs=inputs,
                            outputs=out_policy,
                            name="simple_rnn_policy")
    value = tf.keras.Model(inputs=inputs,
                           outputs=out_value,
                           name="simple_rnn_value")

    return policy, value, tf.keras.Model(inputs=inputs,
                                         outputs=[out_policy, out_value],
                                         name="simple_rnn")
示例#4
0
    def build(self):
        inputs = []
        outputs = []

        input_components_names = self.dataset.indexes2components_names(
            self.input_components)
        output_components_names = self.dataset.indexes2components_names(
            self.output_components)

        inp = Input(shape=self.img_shape)
        inputs.append(inp)

        x = self.rnn_conv_block(12, 5, 2, inp, drop=True, name="start_fe")
        x = self.rnn_conv_block(16, 5, 2, x, drop=True)
        x = self.rnn_conv_block(32, 3, 2, x, drop=True)
        x = self.rnn_conv_block(48, 3, 2, x, drop=True, name="end_fe")

        y1 = self.rnn_conv_block(64, (8, 10), (8, 10),
                                 x,
                                 flatten=True,
                                 drop=False)
        y2 = self.rnn_conv_block(24, (8, 1), (8, 1),
                                 x,
                                 flatten=True,
                                 drop=False)
        y3 = self.rnn_conv_block(24, (1, 10), (1, 10),
                                 x,
                                 flatten=True,
                                 drop=False)
        y = Concatenate()([y1, y2, y3])
        y = TD(Dropout(self.drop_rate))(y)

        y = self.rnn_dense_block(150, y, batchnorm=False)
        y = self.rnn_dense_block(75, y, batchnorm=False)

        if "speed" in input_components_names:
            inp = Input((self.img_shape[0], 1))
            inputs.append(inp)
            y = Concatenate()([y, inp])

        y = self.rnn_dense_block(50, y, batchnorm=False, drop=False)

        if "direction" in input_components_names:
            z = Dense(1,
                      use_bias=self.use_bias,
                      activation=self.last_act,
                      name="direction")(y)  # kernel_regularizer=l2(0.0005)
            outputs.append(z)
            y = Concatenate()([y, z])

        if "throttle" in output_components_names:
            th = Dense(1,
                       use_bias=self.use_bias,
                       activation="sigmoid",
                       name="throttle")(y)
            outputs.append(th)
            y = Concatenate()([y, th])

        return Model(inputs, outputs)
def build_model_cnnedit3(self):

    img_channels = self.img_channels
    img_rows = self.img_rows
    img_cols = self.img_cols
    color_channels = self.color_channels

    model = Sequential()
    model.add(
        Input(shape=(img_channels, img_rows, img_cols, color_channels),
              name="img_in"))  # (batchsize, 4, 80, 80, 1)
    model.add(
        TD(
            Conv2D(16, (5, 5),
                   strides=(2, 2),
                   padding="same",
                   activation='relu')))
    model.add(
        TD(
            Conv2D(32, (5, 5),
                   strides=(2, 2),
                   padding="same",
                   activation='relu')))
    model.add(
        TD(
            Conv2D(64, (5, 5),
                   strides=(2, 2),
                   padding="same",
                   activation='relu')))
    model.add(
        TD(
            Conv2D(128, (3, 3),
                   strides=(2, 2),
                   padding="same",
                   activation='relu')))
    model.add(
        TD(
            Conv2D(256, (3, 3),
                   strides=(1, 1),
                   padding="same",
                   activation='relu')))

    model.add(TD(
        GlobalAveragePooling2D()))  # replace the CNN model with a custom one
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))

    # 15 categorical bins for Steering angles
    model.add(Dense(self.action_size, activation="linear"))

    adam = Adam(lr=self.learning_rate)
    model.compile(loss="mse", optimizer=adam)

    return model
def build_model_cnnedit4_lstm(self):
    img_channels = self.img_channels
    img_rows = self.img_rows
    img_cols = self.img_cols
    color_channels = self.color_channels

    img_seq_shape = (img_channels, img_rows, img_cols, color_channels)

    model = Sequential()
    model.add(Input(shape=img_seq_shape, name='img_in'))  # 4*80*80*1
    model.add(
        Conv3D(16, (3, 5, 5),
               strides=(1, 2, 2),
               padding="same",
               activation='relu'))
    model.add(
        Conv3D(32, (3, 5, 5),
               strides=(1, 2, 2),
               padding="same",
               activation='relu'))
    model.add(
        Conv3D(64, (1, 5, 5),
               strides=(1, 2, 2),
               padding="same",
               activation='relu'))
    model.add(
        Conv3D(128, (1, 3, 3),
               strides=(1, 2, 2),
               padding="same",
               activation='relu'))
    model.add(
        Conv3D(256, (1, 3, 3),
               strides=(1, 1, 1),
               padding="same",
               activation='relu'))

    model.add(TD(GlobalAveragePooling2D()))
    model.add(LSTM(128, return_sequences=False, name="LSTM_fin"))
    model.add(Dense(128, activation='relu'))
    model.add(
        Dense(self.action_size, activation='linear', name='model_outputs'))

    adam = Adam(lr=self.learning_rate)
    model.compile(loss="mse", optimizer=adam)

    return model
def build_model_transfer_mobilenetv2_lstm(self):
    if self.img_cols != 96:
        self.img_cols = 96
    if self.img_rows != 96:
        self.img_rows = 96

    img_channels = self.img_channels

    model = Sequential()
    model.add(Input(shape=(img_channels, 3, 3, 1280),
                    name='features_in'))  # 4*80*80

    model.add(TD(GlobalAveragePooling2D()))

    model.add(LSTM(128, return_sequences=False, name="LSTM_fin"))

    model.add(Dense(128, activation='relu'))
    model.add(
        Dense(self.action_size, activation='linear', name='model_outputs'))

    adam = Adam(lr=self.learning_rate)
    model.compile(loss="mse", optimizer=adam)

    return model
def build_shadow_brain_v1(env: gym.Env,
                          distribution: BasePolicyDistribution,
                          bs: int,
                          model_type: str = "rnn",
                          **kwargs):
    """Build network for the shadow hand task."""
    state_dimensionality, n_actions = env_extract_dims(env)
    hidden_dimensions = 32

    rnn_choice = {
        "rnn": tf.keras.layers.SimpleRNN,
        "lstm": tf.keras.layers.LSTM,
        "gru": tf.keras.layers.GRU
    }[model_type]

    # inputs
    visual_in = tf.keras.Input(batch_shape=(bs, None, VISION_WH, VISION_WH, 3),
                               name="visual_input")
    proprio_in = tf.keras.Input(batch_shape=(
        bs,
        None,
        48,
    ),
                                name="proprioceptive_input")
    touch_in = tf.keras.Input(batch_shape=(
        bs,
        None,
        92,
    ),
                              name="somatosensory_input")
    goal_in = tf.keras.Input(batch_shape=(
        bs,
        None,
        7,
    ), name="goal_input")

    # abstractions of perceptive inputs
    visual_latent = TD(
        _build_visual_encoder(shape=(VISION_WH, VISION_WH, 3),
                              batch_size=bs))(visual_in)
    proprio_latent = TD(
        _build_fcn_component(48, 12, 8, batch_size=bs,
                             name="latent_proprio"))(proprio_in)
    touch_latent = TD(
        _build_fcn_component(92, 24, 8, batch_size=bs,
                             name="latent_touch"))(touch_in)

    # concatenation of perceptive abstractions
    concatenation = tf.keras.layers.Concatenate()(
        [visual_latent, proprio_latent, touch_latent])

    # fully connected layer integrating perceptive representations
    x = TD(tf.keras.layers.Dense(48))(concatenation)
    x = TD(tf.keras.layers.ReLU())(x)

    # concatenation of goal and perception
    x.set_shape([bs] + x.shape[1:])
    x = tf.keras.layers.Concatenate()([x, goal_in])

    # recurrent layer
    o = rnn_choice(hidden_dimensions,
                   stateful=True,
                   return_sequences=True,
                   batch_size=bs)(x)

    # output heads
    policy_out = distribution.build_action_head(n_actions, o.shape[1:], bs)(o)
    value_out = tf.keras.layers.Dense(1, name="value")(o)

    # define models
    policy = tf.keras.Model(inputs=[visual_in, proprio_in, touch_in, goal_in],
                            outputs=[policy_out],
                            name="shadow_brain_v1_policy")
    value = tf.keras.Model(inputs=[visual_in, proprio_in, touch_in, goal_in],
                           outputs=[value_out],
                           name="shadow_brain_v1_value")
    joint = tf.keras.Model(inputs=[visual_in, proprio_in, touch_in, goal_in],
                           outputs=[policy_out, value_out],
                           name="shadow_brain_v1")

    return policy, value, joint
def build_shadow_brain_v2(env: gym.Env, distribution: BasePolicyDistribution,
                          bs: int, **kwargs):
    """Build network for the shadow hand task, version 2."""
    state_dimensionality, n_actions = env_extract_dims(env)
    hidden_dimensions = 32

    # inputs
    visual_in = tf.keras.Input(batch_shape=(bs, None, VISION_WH, VISION_WH, 3),
                               name="visual_input")
    proprio_in = tf.keras.Input(batch_shape=(
        bs,
        None,
        48,
    ),
                                name="proprioceptive_input")
    touch_in = tf.keras.Input(batch_shape=(
        bs,
        None,
        92,
    ),
                              name="somatosensory_input")
    goal_in = tf.keras.Input(batch_shape=(
        bs,
        None,
        7,
    ), name="goal_input")

    # abstractions of perceptive inputs
    visual_latent = TD(
        _build_visual_encoder(shape=(VISION_WH, VISION_WH, 3),
                              batch_size=bs))(visual_in)
    visual_latent = TD(tf.keras.layers.Dense(128))(visual_latent)
    visual_latent = TD(tf.keras.layers.ReLU())(visual_latent)
    visual_latent.set_shape([bs] + visual_latent.shape[1:])
    visual_plus_goal = tf.keras.layers.Concatenate()([visual_latent, goal_in])
    eigengrasps = TD(tf.keras.layers.Dense(20))(visual_plus_goal)
    eigengrasps = TD(tf.keras.layers.ReLU())(eigengrasps)

    # concatenation of touch and proprioception
    proprio_touch = tf.keras.layers.Concatenate()([proprio_in, touch_in])
    proprio_touch_latent = TD(tf.keras.layers.Dense(20))(proprio_touch)
    proprio_touch_latent = TD(tf.keras.layers.ReLU())(proprio_touch_latent)

    # concatenation of goal and perception
    proprio_touch_latent.set_shape([bs] + proprio_touch_latent.shape[1:])
    eigengrasps.set_shape([bs] + eigengrasps.shape[1:])
    x = tf.keras.layers.Concatenate()(
        [goal_in, eigengrasps, proprio_touch_latent])

    # recurrent layer
    rnn_out = tf.keras.layers.GRU(hidden_dimensions,
                                  stateful=True,
                                  return_sequences=True,
                                  batch_size=bs)(x)

    # output heads
    policy_out = distribution.build_action_head(n_actions, rnn_out.shape[1:],
                                                bs)(rnn_out)
    value_out = tf.keras.layers.Dense(1, name="value")(rnn_out)

    # define models
    policy = tf.keras.Model(inputs=[visual_in, proprio_in, touch_in, goal_in],
                            outputs=[policy_out],
                            name="shadow_brain_v2_policy")
    value = tf.keras.Model(inputs=[visual_in, proprio_in, touch_in, goal_in],
                           outputs=[value_out],
                           name="shadow_brain_v2_value")
    joint = tf.keras.Model(inputs=[visual_in, proprio_in, touch_in, goal_in],
                           outputs=[policy_out, value_out],
                           name="shadow_brain_v2")

    return policy, value, joint
示例#10
0
def rnn_lstm(seq_length=3, num_outputs=2, input_shape=(120, 160, 3)):
    # add sequence length dimensions as keras time-distributed expects shape
    # of (num_samples, seq_length, input_shape)
    img_seq_shape = (seq_length,) + input_shape   
    img_in = Input(batch_shape=img_seq_shape, name='img_in')
    drop_out = 0.3

    x = Sequential()
    x.add(TD(Convolution2D(24, (5,5), strides=(2,2), activation='relu'),
             input_shape=img_seq_shape))
    x.add(TD(Dropout(drop_out)))
    x.add(TD(Convolution2D(32, (5, 5), strides=(2, 2), activation='relu')))
    x.add(TD(Dropout(drop_out)))
    x.add(TD(Convolution2D(32, (3, 3), strides=(2, 2), activation='relu')))
    x.add(TD(Dropout(drop_out)))
    x.add(TD(Convolution2D(32, (3, 3), strides=(1, 1), activation='relu')))
    x.add(TD(Dropout(drop_out)))
    x.add(TD(MaxPooling2D(pool_size=(2, 2))))
    x.add(TD(Flatten(name='flattened')))
    x.add(TD(Dense(100, activation='relu')))
    x.add(TD(Dropout(drop_out)))
      
    x.add(LSTM(128, return_sequences=True, name="LSTM_seq"))
    x.add(Dropout(.1))
    x.add(LSTM(128, return_sequences=False, name="LSTM_fin"))
    x.add(Dropout(.1))
    x.add(Dense(128, activation='relu'))
    x.add(Dropout(.1))
    x.add(Dense(64, activation='relu'))
    x.add(Dense(10, activation='relu'))
    x.add(Dense(num_outputs, activation='linear', name='model_outputs'))
    return x
def build_rnn_models(env: gym.Env,
                     distribution: BasePolicyDistribution,
                     shared: bool = False,
                     bs: int = 1,
                     model_type: str = "rnn",
                     layer_sizes: Tuple = (64, )):
    """Build simple policy and value models having a recurrent layer before their heads."""
    state_dimensionality, n_actions = env_extract_dims(env)
    rnn_choice = {
        "rnn": tf.keras.layers.SimpleRNN,
        "lstm": tf.keras.layers.LSTM,
        "gru": tf.keras.layers.GRU
    }[model_type]

    inputs = tf.keras.Input(batch_shape=(
        bs,
        None,
        state_dimensionality,
    ))
    masked = tf.keras.layers.Masking()(inputs)

    # policy network
    x = TD(_build_encoding_sub_model((state_dimensionality, ),
                                     bs,
                                     layer_sizes=layer_sizes,
                                     name="policy_encoder"),
           name="TD_policy")(masked)
    x.set_shape([bs] + x.shape[1:])
    x, *_ = rnn_choice(layer_sizes[-1],
                       stateful=True,
                       return_sequences=True,
                       return_state=True,
                       batch_size=bs,
                       name="policy_recurrent_layer")(x)

    out_policy = distribution.build_action_head(n_actions, x.shape[1:], bs)(x)

    # value network
    if not shared:
        x = TD(_build_encoding_sub_model((state_dimensionality, ),
                                         bs,
                                         layer_sizes=layer_sizes,
                                         name="value_encoder"),
               name="TD_value")(masked)
        x.set_shape([bs] + x.shape[1:])
        x, *_ = rnn_choice(layer_sizes[-1],
                           stateful=True,
                           return_sequences=True,
                           return_state=True,
                           batch_size=bs,
                           name="value_recurrent_layer")(x)
        out_value = tf.keras.layers.Dense(
            1,
            kernel_initializer=tf.keras.initializers.Orthogonal(1.0),
            bias_initializer=tf.keras.initializers.Constant(0.0))(x)
    else:
        out_value = tf.keras.layers.Dense(
            1,
            input_dim=x.shape[1:],
            kernel_initializer=tf.keras.initializers.Orthogonal(1.0),
            bias_initializer=tf.keras.initializers.Constant(0.0))(x)

    policy = tf.keras.Model(inputs=inputs,
                            outputs=out_policy,
                            name="simple_rnn_policy")
    value = tf.keras.Model(inputs=inputs,
                           outputs=out_value,
                           name="simple_rnn_value")

    return policy, value, tf.keras.Model(inputs=inputs,
                                         outputs=[out_policy, out_value],
                                         name="simple_rnn")