def create_feedforward_Q_function(input_shapes, *args, preprocessors=None, observation_keys=None, name='feedforward_Q', **kwargs): print(input_shapes) inputs_flat = create_inputs(input_shapes) preprocessors_flat = (flatten_input_structure(preprocessors) if preprocessors is not None else tuple( None for _ in inputs_flat)) assert len(inputs_flat) == len(preprocessors_flat), (inputs_flat, preprocessors_flat) preprocessed_inputs = [ tf.cast(preprocessor(input_), dtype=tf.float32) if preprocessor is not None else tf.cast(input_, dtype=tf.float32) for preprocessor, input_ in zip(preprocessors_flat, inputs_flat) ] Q_function = feedforward_model(*args, output_size=1, name=name, **kwargs) Q_function = PicklableModel(inputs_flat, Q_function(preprocessed_inputs)) Q_function.observation_keys = observation_keys return Q_function
def create_embedding_fn(input_shapes, embedding_dim, *args, preprocessors=None, observation_keys=None, goal_keys=None, name='embedding_fn', **kwargs): inputs_flat = create_inputs(input_shapes) preprocessors_flat = (flatten_input_structure(preprocessors) if preprocessors is not None else tuple( None for _ in inputs_flat)) assert len(inputs_flat) == len(preprocessors_flat), (inputs_flat, preprocessors_flat) preprocessed_inputs = [ preprocessor(input_) if preprocessor is not None else input_ for preprocessor, input_ in zip(preprocessors_flat, inputs_flat) ] embedding_fn = feedforward_model(*args, output_size=embedding_dim, name=f'feedforward_{name}', **kwargs) embedding_fn = PicklableModel(inputs_flat, embedding_fn(preprocessed_inputs), name=name) embedding_fn.observation_keys = observation_keys or tuple() embedding_fn.goal_keys = goal_keys or tuple() embedding_fn.all_keys = embedding_fn.observation_keys + embedding_fn.goal_keys return embedding_fn
def __init__(self, image_shape, latent_dim=32, beta=1.0, encoder_config=None, decoder_config=None, kernel_regularizer=regularizers.l2(5e-4), name='online_vae_preprocessor', *args, **kwargs): super(OnlineVAEPreprocessor, self).__init__() self.image_shape = image_shape self.latent_dim = latent_dim self.beta = beta if encoder_config: self.encoder = PicklableModel.from_config(encoder_config) else: self.encoder = create_vae_encoder_model( image_shape=image_shape, latent_dim=latent_dim, **kwargs) if decoder_config: self.decoder = PicklableModel.from_config(decoder_config) else: self.decoder = create_vae_decoder_model(latent_dim, **kwargs)
def create_feedforward_Q_function(input_shapes, *args, preprocessors=None, observation_keys=None, goal_keys=None, name='feedforward_Q', **kwargs): inputs_flat = create_inputs(input_shapes) preprocessors_flat = (flatten_input_structure(preprocessors) if preprocessors is not None else tuple( None for _ in inputs_flat)) assert len(inputs_flat) == len(preprocessors_flat), (inputs_flat, preprocessors_flat) preprocessed_inputs = [ preprocessor(input_) if preprocessor is not None else input_ for preprocessor, input_ in zip(preprocessors_flat, inputs_flat) ] Q_function = feedforward_model(*args, output_size=1, name=name, **kwargs) Q_function = PicklableModel(inputs_flat, Q_function(preprocessed_inputs)) preprocessed_inputs_fn = PicklableModel(inputs_flat, preprocessed_inputs) Q_function.observation_keys = observation_keys or () Q_function.goal_keys = goal_keys or () Q_function.all_keys = observation_keys + goal_keys Q_function.actions_preprocessors = preprocessors['actions'] Q_function.observations_preprocessors = preprocessors['observations'] Q_function.preprocessed_inputs_fn = preprocessed_inputs_fn return Q_function
def from_config(self, config): assert 'encoder' in config and 'decoder' in config, ( f'Need to speciy encoder and decoder configs, {config}') rae_preprocessor = config['cls'](image_shape=config['image_shape'], latent_dim=config['latent_dim']) rae_preprocessor.encoder = PicklableModel.from_config( config['encoder']) rae_preprocessor.decoder = PicklableModel.from_config( config['decoder']) rae_preprocessor.set_weights(config['weights']) return rae_preprocessor
def create_feedforward_Q_function(input_shapes, *args, preprocessors=None, observation_keys=None, name='feedforward_Q', **kwargs): inputs = create_inputs(input_shapes) if preprocessors is None: preprocessors = tree.map_structure(lambda _: None, inputs) preprocessed_inputs = apply_preprocessors(preprocessors, inputs) Q_function = feedforward_model(*args, output_size=1, name=name, **kwargs) Q_function = PicklableModel(inputs, Q_function(preprocessed_inputs)) Q_function.observation_keys = observation_keys return Q_function
def create_feedforward_reward_classifier_function( input_shapes, *args, preprocessors=None, observation_keys=None, name='feedforward_reward_classifier', kernel_regularizer_lambda=1e-3, # output_activation=tf.math.log_sigmoid, **kwargs): inputs_flat = create_inputs(input_shapes) preprocessors_flat = (flatten_input_structure(preprocessors) if preprocessors is not None else tuple( None for _ in inputs_flat)) assert len(inputs_flat) == len(preprocessors_flat), (inputs_flat, preprocessors_flat) preprocessed_inputs = [ preprocessor(input_) if preprocessor is not None else input_ for preprocessor, input_ in zip(preprocessors_flat, inputs_flat) ] reward_classifier_function = feedforward_model( *args, output_size=1, kernel_regularizer=tf.keras.regularizers.l2(kernel_regularizer_lambda) if kernel_regularizer_lambda else None, name=name, # output_activation=output_activation, **kwargs) # from IPython import embed; embed() reward_classifier_function = PicklableModel( inputs_flat, reward_classifier_function(preprocessed_inputs)) reward_classifier_function.observation_keys = observation_keys reward_classifier_function.observations_preprocessors = preprocessors return reward_classifier_function
def create_dynamics_model(input_shapes, dynamics_latent_dim, *args, preprocessors=None, observation_keys=None, goal_keys=None, name='dynamics_model', encoder_kwargs=None, decoder_kwargs=None, **kwargs): inputs_flat = create_inputs(input_shapes) preprocessors_flat = ( flatten_input_structure(preprocessors) if preprocessors is not None else tuple(None for _ in inputs_flat)) assert len(inputs_flat) == len(preprocessors_flat), ( inputs_flat, preprocessors_flat) preprocessed_inputs = [ preprocessor(input_) if preprocessor is not None else input_ for preprocessor, input_ in zip(preprocessors_flat, inputs_flat) ] encoder = feedforward_model( *args, output_size=dynamics_latent_dim, name=f'{name}_encoder', **encoder_kwargs) output_size = sum([ shape.as_list()[0] for shape in input_shapes['observations'].values() ]) decoder = feedforward_model( *args, output_size=output_size, name=f'{name}_decoder', **decoder_kwargs) latent = encoder(preprocessed_inputs) dynamics_pred = decoder(latent) dynamics_model = PicklableModel(inputs_flat, dynamics_pred, name=name) dynamics_model.observation_keys = observation_keys or tuple() dynamics_model.goal_keys = goal_keys or tuple() dynamics_model.all_keys = dynamics_model.observation_keys + dynamics_model.goal_keys dynamics_model.encoder = PicklableModel(inputs_flat, latent, name=f'{name}_encoder_model') return dynamics_model
def create_encoder_model(input_shape, latent_dim, trainable=True, kernel_regularizer_type='l2', kernel_regularizer_lambda=5e-4, name='encoder', **kwargs): x = tfkl.Input(shape=input_shape, name='pixel_input') kernel_regularizer = REGULARIZERS[kernel_regularizer_type]( kernel_regularizer_lambda) encoder = convnet_model( # preprocessed_image_range=(0, 1), # Preprocess images to [0, 1] kernel_regularizer=kernel_regularizer, **kwargs) fc = tfkl.Dense(latent_dim, activation='linear') z = fc(encoder(x)) return PicklableModel(x, z, name=name)
def create_rae(image_shape, latent_dim, name='regularized_ae', **kwargs): encoder = create_encoder_model(image_shape, latent_dim, **kwargs) decoder = create_decoder_model(latent_dim, image_shape, **kwargs) z = encoder(encoder.inputs) reconstruction = decoder(z) rae = PicklableModel(encoder.inputs, [z, reconstruction], name=name) # rae = PicklableModel( # encoder.inputs, # decoder(encoder(encoder.inputs)), # name=name) # rae.encoder = encoder # rae.decoder = decoder # rae.latent_dim = latent_dim return rae
def create_decoder_model(latent_dim, output_shape, trainable=True, kernel_regularizer_type='l2', kernel_regularizer_lambda=5e-4, name='decoder', **kwargs): assert kernel_regularizer_type in REGULARIZERS, ( f'Regularizer type must be one of {str(REGULARIZERS.keys())}') z = tfkl.Input(shape=(latent_dim, ), name='latent_input') kernel_regularizer = REGULARIZERS[kernel_regularizer_type]( kernel_regularizer_lambda) decoder = convnet_transpose_model( output_shape=output_shape, kernel_regularizer_type=kernel_regularizer, output_activation='sigmoid', # Want [0, 1] outputs **kwargs) return PicklableModel(z, decoder(z), name=name)
def get_rnd_networks_from_variant(variant, env): rnd_params = variant['algorithm_params']['rnd_params'] target_network = None predictor_network = None observation_keys = variant['policy_params']['kwargs']['observation_keys'] if not observation_keys: observation_keys = env.observation_keys observation_shapes = OrderedDict( ((key, value) for key, value in env.observation_shape.items() if key in observation_keys)) inputs_flat = create_inputs(observation_shapes) target_network, predictor_network = [], [] for input_tensor in inputs_flat: if 'pixels' in input_tensor.name: # check logic from softlearning.preprocessors.utils import get_convnet_preprocessor target_network.append( get_convnet_preprocessor( 'rnd_target_conv', **rnd_params['convnet_params'])(input_tensor)) predictor_network.append( get_convnet_preprocessor( 'rnd_predictor_conv', **rnd_params['convnet_params'])(input_tensor)) else: target_network.append(input_tensor) predictor_network.append(input_tensor) target_network = tf.keras.layers.Lambda( lambda inputs: tf.concat(training_utils.cast_if_floating_dtype(inputs), axis=-1))(target_network) predictor_network = tf.keras.layers.Lambda( lambda inputs: tf.concat(training_utils.cast_if_floating_dtype(inputs), axis=-1))(predictor_network) target_network = get_feedforward_preprocessor( 'rnd_target_fc', **rnd_params['fc_params'])(target_network) predictor_network = get_feedforward_preprocessor( 'rnd_predictor_fc', **rnd_params['fc_params'])(predictor_network) # Initialize RN weights target_network = PicklableModel(inputs_flat, target_network) target_network.set_weights([ np.random.normal(0, 0.1, size=weight.shape) for weight in target_network.get_weights() ]) predictor_network = PicklableModel(inputs_flat, predictor_network) return target_network, predictor_network
def create_vae(image_shape, latent_dim, *args, beta=1.0, name='beta_vae', **kwargs): encoder = create_vae_encoder_model( image_shape=image_shape, latent_dim=latent_dim, **kwargs) decoder = create_vae_decoder_model(latent_dim, **kwargs) outputs = decoder(encoder(encoder.inputs)[2]) vae = PicklableModel(encoder.inputs, outputs, name=name) vae.beta = beta vae.encoder = encoder vae.decoder = decoder vae.latent_dim = latent_dim return vae
def create_distance_estimator(input_shapes, *args, preprocessors=None, observation_keys=None, goal_keys=None, name='distance_estimator', classifier_params=None, **kwargs): inputs_flat = create_inputs(input_shapes) preprocessors_flat = (flatten_input_structure(preprocessors) if preprocessors is not None else tuple( None for _ in inputs_flat)) assert len(inputs_flat) == len(preprocessors_flat), (inputs_flat, preprocessors_flat) preprocessed_inputs = [ preprocessor(input_) if preprocessor is not None else input_ for preprocessor, input_ in zip(preprocessors_flat, inputs_flat) ] output_size = 1 if not classifier_params else int( classifier_params.get('bins', 1) + 1) distance_fn = feedforward_model(*args, output_size=output_size, name=name, **kwargs) distance_fn = PicklableModel(inputs_flat, distance_fn(preprocessed_inputs)) # preprocessed_inputs_fn = PicklableModel(inputs_flat, preprocessed_inputs) distance_fn.observation_keys = observation_keys or tuple() distance_fn.goal_keys = goal_keys or tuple() distance_fn.all_keys = distance_fn.observation_keys + distance_fn.goal_keys distance_fn.classifier_params = classifier_params # distance_fn.observations_preprocessors = preprocessors['s1'] # distance_fn.preprocessed_inputs_fn = preprocessed_inputs_fn return distance_fn