Пример #1
0
def create_feedforward_Q_function(input_shapes,
                                  *args,
                                  preprocessors=None,
                                  observation_keys=None,
                                  name='feedforward_Q',
                                  **kwargs):
    print(input_shapes)
    inputs_flat = create_inputs(input_shapes)
    preprocessors_flat = (flatten_input_structure(preprocessors)
                          if preprocessors is not None else tuple(
                              None for _ in inputs_flat))

    assert len(inputs_flat) == len(preprocessors_flat), (inputs_flat,
                                                         preprocessors_flat)

    preprocessed_inputs = [
        tf.cast(preprocessor(input_), dtype=tf.float32)
        if preprocessor is not None else tf.cast(input_, dtype=tf.float32)
        for preprocessor, input_ in zip(preprocessors_flat, inputs_flat)
    ]

    Q_function = feedforward_model(*args, output_size=1, name=name, **kwargs)

    Q_function = PicklableModel(inputs_flat, Q_function(preprocessed_inputs))
    Q_function.observation_keys = observation_keys

    return Q_function
Пример #2
0
def create_embedding_fn(input_shapes,
                        embedding_dim,
                        *args,
                        preprocessors=None,
                        observation_keys=None,
                        goal_keys=None,
                        name='embedding_fn',
                        **kwargs):
    inputs_flat = create_inputs(input_shapes)
    preprocessors_flat = (flatten_input_structure(preprocessors)
                          if preprocessors is not None else tuple(
                              None for _ in inputs_flat))

    assert len(inputs_flat) == len(preprocessors_flat), (inputs_flat,
                                                         preprocessors_flat)

    preprocessed_inputs = [
        preprocessor(input_) if preprocessor is not None else input_
        for preprocessor, input_ in zip(preprocessors_flat, inputs_flat)
    ]

    embedding_fn = feedforward_model(*args,
                                     output_size=embedding_dim,
                                     name=f'feedforward_{name}',
                                     **kwargs)

    embedding_fn = PicklableModel(inputs_flat,
                                  embedding_fn(preprocessed_inputs),
                                  name=name)

    embedding_fn.observation_keys = observation_keys or tuple()
    embedding_fn.goal_keys = goal_keys or tuple()
    embedding_fn.all_keys = embedding_fn.observation_keys + embedding_fn.goal_keys

    return embedding_fn
Пример #3
0
 def __init__(self,
              image_shape,
              latent_dim=32,
              beta=1.0,
              encoder_config=None,
              decoder_config=None,
              kernel_regularizer=regularizers.l2(5e-4),
              name='online_vae_preprocessor',
              *args,
              **kwargs):
     super(OnlineVAEPreprocessor, self).__init__()
     self.image_shape = image_shape
     self.latent_dim = latent_dim
     self.beta = beta
     if encoder_config:
         self.encoder = PicklableModel.from_config(encoder_config)
     else:
         self.encoder = create_vae_encoder_model(
             image_shape=image_shape,
             latent_dim=latent_dim,
             **kwargs)
     if decoder_config:
         self.decoder = PicklableModel.from_config(decoder_config)
     else:
         self.decoder = create_vae_decoder_model(latent_dim, **kwargs)
Пример #4
0
def create_feedforward_Q_function(input_shapes,
                                  *args,
                                  preprocessors=None,
                                  observation_keys=None,
                                  goal_keys=None,
                                  name='feedforward_Q',
                                  **kwargs):
    inputs_flat = create_inputs(input_shapes)
    preprocessors_flat = (flatten_input_structure(preprocessors)
                          if preprocessors is not None else tuple(
                              None for _ in inputs_flat))

    assert len(inputs_flat) == len(preprocessors_flat), (inputs_flat,
                                                         preprocessors_flat)

    preprocessed_inputs = [
        preprocessor(input_) if preprocessor is not None else input_
        for preprocessor, input_ in zip(preprocessors_flat, inputs_flat)
    ]

    Q_function = feedforward_model(*args, output_size=1, name=name, **kwargs)

    Q_function = PicklableModel(inputs_flat, Q_function(preprocessed_inputs))
    preprocessed_inputs_fn = PicklableModel(inputs_flat, preprocessed_inputs)

    Q_function.observation_keys = observation_keys or ()
    Q_function.goal_keys = goal_keys or ()
    Q_function.all_keys = observation_keys + goal_keys

    Q_function.actions_preprocessors = preprocessors['actions']
    Q_function.observations_preprocessors = preprocessors['observations']

    Q_function.preprocessed_inputs_fn = preprocessed_inputs_fn
    return Q_function
Пример #5
0
 def from_config(self, config):
     assert 'encoder' in config and 'decoder' in config, (
         f'Need to speciy encoder and decoder configs, {config}')
     rae_preprocessor = config['cls'](image_shape=config['image_shape'],
                                      latent_dim=config['latent_dim'])
     rae_preprocessor.encoder = PicklableModel.from_config(
         config['encoder'])
     rae_preprocessor.decoder = PicklableModel.from_config(
         config['decoder'])
     rae_preprocessor.set_weights(config['weights'])
     return rae_preprocessor
Пример #6
0
def create_feedforward_Q_function(input_shapes,
                                  *args,
                                  preprocessors=None,
                                  observation_keys=None,
                                  name='feedforward_Q',
                                  **kwargs):
    inputs = create_inputs(input_shapes)
    if preprocessors is None:
        preprocessors = tree.map_structure(lambda _: None, inputs)

    preprocessed_inputs = apply_preprocessors(preprocessors, inputs)

    Q_function = feedforward_model(*args, output_size=1, name=name, **kwargs)

    Q_function = PicklableModel(inputs, Q_function(preprocessed_inputs))
    Q_function.observation_keys = observation_keys

    return Q_function
Пример #7
0
def create_feedforward_reward_classifier_function(
        input_shapes,
        *args,
        preprocessors=None,
        observation_keys=None,
        name='feedforward_reward_classifier',
        kernel_regularizer_lambda=1e-3,
        # output_activation=tf.math.log_sigmoid,
        **kwargs):
    inputs_flat = create_inputs(input_shapes)
    preprocessors_flat = (flatten_input_structure(preprocessors)
                          if preprocessors is not None else tuple(
                              None for _ in inputs_flat))

    assert len(inputs_flat) == len(preprocessors_flat), (inputs_flat,
                                                         preprocessors_flat)

    preprocessed_inputs = [
        preprocessor(input_) if preprocessor is not None else input_
        for preprocessor, input_ in zip(preprocessors_flat, inputs_flat)
    ]

    reward_classifier_function = feedforward_model(
        *args,
        output_size=1,
        kernel_regularizer=tf.keras.regularizers.l2(kernel_regularizer_lambda)
        if kernel_regularizer_lambda else None,
        name=name,
        # output_activation=output_activation,
        **kwargs)

    # from IPython import embed; embed()
    reward_classifier_function = PicklableModel(
        inputs_flat, reward_classifier_function(preprocessed_inputs))
    reward_classifier_function.observation_keys = observation_keys
    reward_classifier_function.observations_preprocessors = preprocessors

    return reward_classifier_function
Пример #8
0
def create_dynamics_model(input_shapes,
                          dynamics_latent_dim,
                          *args,
                          preprocessors=None,
                          observation_keys=None,
                          goal_keys=None,
                          name='dynamics_model',
                          encoder_kwargs=None,
                          decoder_kwargs=None,
                          **kwargs):
    inputs_flat = create_inputs(input_shapes)
    preprocessors_flat = (
        flatten_input_structure(preprocessors)
        if preprocessors is not None
        else tuple(None for _ in inputs_flat))

    assert len(inputs_flat) == len(preprocessors_flat), (
        inputs_flat, preprocessors_flat)

    preprocessed_inputs = [
        preprocessor(input_) if preprocessor is not None else input_
        for preprocessor, input_
        in zip(preprocessors_flat, inputs_flat)
    ]
    encoder = feedforward_model(
        *args,
        output_size=dynamics_latent_dim,
        name=f'{name}_encoder',
        **encoder_kwargs)

    output_size = sum([
        shape.as_list()[0]
        for shape in input_shapes['observations'].values()
    ])
    decoder = feedforward_model(
        *args,
        output_size=output_size,
        name=f'{name}_decoder',
        **decoder_kwargs)

    latent = encoder(preprocessed_inputs)
    dynamics_pred = decoder(latent)

    dynamics_model = PicklableModel(inputs_flat, dynamics_pred, name=name)

    dynamics_model.observation_keys = observation_keys or tuple()
    dynamics_model.goal_keys = goal_keys or tuple()
    dynamics_model.all_keys = dynamics_model.observation_keys + dynamics_model.goal_keys

    dynamics_model.encoder = PicklableModel(inputs_flat, latent, name=f'{name}_encoder_model')

    return dynamics_model
Пример #9
0
def create_encoder_model(input_shape,
                         latent_dim,
                         trainable=True,
                         kernel_regularizer_type='l2',
                         kernel_regularizer_lambda=5e-4,
                         name='encoder',
                         **kwargs):
    x = tfkl.Input(shape=input_shape, name='pixel_input')
    kernel_regularizer = REGULARIZERS[kernel_regularizer_type](
        kernel_regularizer_lambda)
    encoder = convnet_model(
        # preprocessed_image_range=(0, 1), # Preprocess images to [0, 1]
        kernel_regularizer=kernel_regularizer,
        **kwargs)
    fc = tfkl.Dense(latent_dim, activation='linear')
    z = fc(encoder(x))
    return PicklableModel(x, z, name=name)
Пример #10
0
def create_rae(image_shape, latent_dim, name='regularized_ae', **kwargs):
    encoder = create_encoder_model(image_shape, latent_dim, **kwargs)
    decoder = create_decoder_model(latent_dim, image_shape, **kwargs)

    z = encoder(encoder.inputs)
    reconstruction = decoder(z)
    rae = PicklableModel(encoder.inputs, [z, reconstruction], name=name)

    # rae = PicklableModel(
    #     encoder.inputs,
    #     decoder(encoder(encoder.inputs)),
    #     name=name)

    # rae.encoder = encoder
    # rae.decoder = decoder
    # rae.latent_dim = latent_dim

    return rae
Пример #11
0
def create_decoder_model(latent_dim,
                         output_shape,
                         trainable=True,
                         kernel_regularizer_type='l2',
                         kernel_regularizer_lambda=5e-4,
                         name='decoder',
                         **kwargs):
    assert kernel_regularizer_type in REGULARIZERS, (
        f'Regularizer type must be one of {str(REGULARIZERS.keys())}')
    z = tfkl.Input(shape=(latent_dim, ), name='latent_input')
    kernel_regularizer = REGULARIZERS[kernel_regularizer_type](
        kernel_regularizer_lambda)
    decoder = convnet_transpose_model(
        output_shape=output_shape,
        kernel_regularizer_type=kernel_regularizer,
        output_activation='sigmoid',  # Want [0, 1] outputs
        **kwargs)
    return PicklableModel(z, decoder(z), name=name)
Пример #12
0
def get_rnd_networks_from_variant(variant, env):
    rnd_params = variant['algorithm_params']['rnd_params']
    target_network = None
    predictor_network = None

    observation_keys = variant['policy_params']['kwargs']['observation_keys']
    if not observation_keys:
        observation_keys = env.observation_keys
    observation_shapes = OrderedDict(
        ((key, value) for key, value in env.observation_shape.items()
         if key in observation_keys))

    inputs_flat = create_inputs(observation_shapes)

    target_network, predictor_network = [], []
    for input_tensor in inputs_flat:
        if 'pixels' in input_tensor.name:  # check logic
            from softlearning.preprocessors.utils import get_convnet_preprocessor
            target_network.append(
                get_convnet_preprocessor(
                    'rnd_target_conv',
                    **rnd_params['convnet_params'])(input_tensor))
            predictor_network.append(
                get_convnet_preprocessor(
                    'rnd_predictor_conv',
                    **rnd_params['convnet_params'])(input_tensor))
        else:
            target_network.append(input_tensor)
            predictor_network.append(input_tensor)

    target_network = tf.keras.layers.Lambda(
        lambda inputs: tf.concat(training_utils.cast_if_floating_dtype(inputs),
                                 axis=-1))(target_network)

    predictor_network = tf.keras.layers.Lambda(
        lambda inputs: tf.concat(training_utils.cast_if_floating_dtype(inputs),
                                 axis=-1))(predictor_network)

    target_network = get_feedforward_preprocessor(
        'rnd_target_fc', **rnd_params['fc_params'])(target_network)

    predictor_network = get_feedforward_preprocessor(
        'rnd_predictor_fc', **rnd_params['fc_params'])(predictor_network)

    # Initialize RN weights
    target_network = PicklableModel(inputs_flat, target_network)
    target_network.set_weights([
        np.random.normal(0, 0.1, size=weight.shape)
        for weight in target_network.get_weights()
    ])
    predictor_network = PicklableModel(inputs_flat, predictor_network)
    return target_network, predictor_network
Пример #13
0
def create_vae(image_shape,
               latent_dim,
               *args,
               beta=1.0,
               name='beta_vae',
               **kwargs):
    encoder = create_vae_encoder_model(
        image_shape=image_shape,
        latent_dim=latent_dim,
        **kwargs)
    decoder = create_vae_decoder_model(latent_dim, **kwargs)

    outputs = decoder(encoder(encoder.inputs)[2])
    vae = PicklableModel(encoder.inputs, outputs, name=name)
    vae.beta = beta
    vae.encoder = encoder
    vae.decoder = decoder
    vae.latent_dim = latent_dim

    return vae
Пример #14
0
def create_distance_estimator(input_shapes,
                              *args,
                              preprocessors=None,
                              observation_keys=None,
                              goal_keys=None,
                              name='distance_estimator',
                              classifier_params=None,
                              **kwargs):
    inputs_flat = create_inputs(input_shapes)
    preprocessors_flat = (flatten_input_structure(preprocessors)
                          if preprocessors is not None else tuple(
                              None for _ in inputs_flat))

    assert len(inputs_flat) == len(preprocessors_flat), (inputs_flat,
                                                         preprocessors_flat)

    preprocessed_inputs = [
        preprocessor(input_) if preprocessor is not None else input_
        for preprocessor, input_ in zip(preprocessors_flat, inputs_flat)
    ]

    output_size = 1 if not classifier_params else int(
        classifier_params.get('bins', 1) + 1)

    distance_fn = feedforward_model(*args,
                                    output_size=output_size,
                                    name=name,
                                    **kwargs)

    distance_fn = PicklableModel(inputs_flat, distance_fn(preprocessed_inputs))
    # preprocessed_inputs_fn = PicklableModel(inputs_flat, preprocessed_inputs)

    distance_fn.observation_keys = observation_keys or tuple()
    distance_fn.goal_keys = goal_keys or tuple()
    distance_fn.all_keys = distance_fn.observation_keys + distance_fn.goal_keys
    distance_fn.classifier_params = classifier_params

    # distance_fn.observations_preprocessors = preprocessors['s1']
    # distance_fn.preprocessed_inputs_fn = preprocessed_inputs_fn
    return distance_fn