def test_scoping_creates_new_variables_across_instances(self): output_size = 5 x = tf.random.uniform((1, 13)) self.assertFalse(tf.compat.v1.trainable_variables()) fn1 = feedforward_model( output_size=output_size, hidden_layer_sizes=(6, 4, 2), name='feedforward_function_1') _ = fn1([x]) num_trainable_variables_1 = len(tf.compat.v1.trainable_variables()) fn2 = feedforward_model( output_size=output_size, hidden_layer_sizes=(6, 4, 2), name='feedforward_function_2') _ = fn2([x]) num_trainable_variables_2 = len(tf.compat.v1.trainable_variables()) self.assertGreater(num_trainable_variables_1, 0) self.assertEqual( num_trainable_variables_1 * 2, num_trainable_variables_2) num_trainable_variables_3 = len(tf.compat.v1.trainable_variables()) self.assertEqual( num_trainable_variables_2, num_trainable_variables_3)
def test_scoping_creates_new_variables_across_instances(self): output_size = 5 x = tf.random_uniform((1, 13)) self.assertFalse(tf.trainable_variables()) fn1 = feedforward_model(input_shapes=(x.shape[1:], ), output_size=output_size, hidden_layer_sizes=(6, 4, 2), name='feedforward_function_1') num_trainable_variables_1 = len(tf.trainable_variables()) fn2 = feedforward_model(input_shapes=(x.shape[1:], ), output_size=output_size, hidden_layer_sizes=(6, 4, 2), name='feedforward_function_2') num_trainable_variables_2 = len(tf.trainable_variables()) self.assertGreater(num_trainable_variables_1, 0) self.assertEqual(num_trainable_variables_1 * 2, num_trainable_variables_2) # Make sure that all variables were created before calling the fn _ = fn1([x]) _ = fn2([x]) num_trainable_variables_3 = len(tf.trainable_variables()) self.assertEqual(num_trainable_variables_2, num_trainable_variables_3)
def create_dynamics_model(input_shapes, dynamics_latent_dim, *args, preprocessors=None, observation_keys=None, goal_keys=None, name='dynamics_model', encoder_kwargs=None, decoder_kwargs=None, **kwargs): inputs_flat = create_inputs(input_shapes) preprocessors_flat = ( flatten_input_structure(preprocessors) if preprocessors is not None else tuple(None for _ in inputs_flat)) assert len(inputs_flat) == len(preprocessors_flat), ( inputs_flat, preprocessors_flat) preprocessed_inputs = [ preprocessor(input_) if preprocessor is not None else input_ for preprocessor, input_ in zip(preprocessors_flat, inputs_flat) ] encoder = feedforward_model( *args, output_size=dynamics_latent_dim, name=f'{name}_encoder', **encoder_kwargs) output_size = sum([ shape.as_list()[0] for shape in input_shapes['observations'].values() ]) decoder = feedforward_model( *args, output_size=output_size, name=f'{name}_decoder', **decoder_kwargs) latent = encoder(preprocessed_inputs) dynamics_pred = decoder(latent) dynamics_model = PicklableModel(inputs_flat, dynamics_pred, name=name) dynamics_model.observation_keys = observation_keys or tuple() dynamics_model.goal_keys = goal_keys or tuple() dynamics_model.all_keys = dynamics_model.observation_keys + dynamics_model.goal_keys dynamics_model.encoder = PicklableModel(inputs_flat, latent, name=f'{name}_encoder_model') return dynamics_model
def state_estimator_model( input_shape, num_hidden_units=256, num_hidden_layers=2, output_size=4, # (x, y, z_cos, z_sin) kernel_regularizer=None, preprocessor_params=None, preprocessor=None, name='state_estimator_preprocessor'): # TODO: Make this take in observation keys instead of this hardcoded output size. obs_preprocessor_params = (preprocessor_params or DEFAULT_STATE_ESTIMATOR_PREPROCESSOR_PARAMS) # preprocessor = convnet_model( # name='convnet_preprocessor_state_est', # **convnet_kwargs) if preprocessor is None: preprocessor = get_preprocessor_from_params(None, obs_preprocessor_params) state_estimator = feedforward_model( hidden_layer_sizes=(num_hidden_units, ) * num_hidden_layers, output_size=output_size, output_activation=tf.keras.activations.tanh, kernel_regularizer= kernel_regularizer, # tf.keras.regularizers.l2(0.001), name='feedforward_state_est') model = tfk.Sequential([ tfk.Input(shape=input_shape, name='pixels', dtype=tf.uint8), preprocessor, state_estimator, ], name=name) return model
def create_feedforward_Q_function(input_shapes, *args, preprocessors=None, observation_keys=None, name='feedforward_Q', **kwargs): print(input_shapes) inputs_flat = create_inputs(input_shapes) preprocessors_flat = (flatten_input_structure(preprocessors) if preprocessors is not None else tuple( None for _ in inputs_flat)) assert len(inputs_flat) == len(preprocessors_flat), (inputs_flat, preprocessors_flat) preprocessed_inputs = [ tf.cast(preprocessor(input_), dtype=tf.float32) if preprocessor is not None else tf.cast(input_, dtype=tf.float32) for preprocessor, input_ in zip(preprocessors_flat, inputs_flat) ] Q_function = feedforward_model(*args, output_size=1, name=name, **kwargs) Q_function = PicklableModel(inputs_flat, Q_function(preprocessed_inputs)) Q_function.observation_keys = observation_keys return Q_function
def create_feedforward_Q_function(input_shapes, *args, preprocessors=None, observation_keys=None, goal_keys=None, name='feedforward_Q', **kwargs): inputs_flat = create_inputs(input_shapes) preprocessors_flat = (flatten_input_structure(preprocessors) if preprocessors is not None else tuple( None for _ in inputs_flat)) assert len(inputs_flat) == len(preprocessors_flat), (inputs_flat, preprocessors_flat) preprocessed_inputs = [ preprocessor(input_) if preprocessor is not None else input_ for preprocessor, input_ in zip(preprocessors_flat, inputs_flat) ] Q_function = feedforward_model(*args, output_size=1, name=name, **kwargs) Q_function = PicklableModel(inputs_flat, Q_function(preprocessed_inputs)) preprocessed_inputs_fn = PicklableModel(inputs_flat, preprocessed_inputs) Q_function.observation_keys = observation_keys or () Q_function.goal_keys = goal_keys or () Q_function.all_keys = observation_keys + goal_keys Q_function.actions_preprocessors = preprocessors['actions'] Q_function.observations_preprocessors = preprocessors['observations'] Q_function.preprocessed_inputs_fn = preprocessed_inputs_fn return Q_function
def __call__(self, x, output_units, **condition_kwargs): if not self._built: self._shift_and_log_scale_model = feedforward_model( hidden_layer_sizes=self.hidden_layer_sizes, output_shape=[(1 if self.shift_only else 2) * output_units], activation=self.activation, output_activation=self.output_activation) self._built = True # condition_kwargs is a dict, but feedforward_model implicitly flattens # these values. Effectively the same as # self._shift_and_log_scale_model(tree.flatten((x, condition_kwargs))) shift_and_log_scale = self._shift_and_log_scale_model( (x, condition_kwargs)) # It would be nice to have these be encapsulated in the # `self._shift_and_log_scale_model`, but the issue is that # `tf.keras.Sequential` can't return tuples/lists, and functional # model type would have to know the input shape in advance. # The correct way here would be to create a subclassed model and # instantiate the model in the `build` method. shift, log_scale = tf.keras.layers.Lambda( lambda x: tf.split(x, 2, axis=-1))(shift_and_log_scale) bijector = bijectors.affine_scalar.AffineScalar(shift=shift, log_scale=log_scale) return bijector
def create_embedding_fn(input_shapes, embedding_dim, *args, preprocessors=None, observation_keys=None, goal_keys=None, name='embedding_fn', **kwargs): inputs_flat = create_inputs(input_shapes) preprocessors_flat = (flatten_input_structure(preprocessors) if preprocessors is not None else tuple( None for _ in inputs_flat)) assert len(inputs_flat) == len(preprocessors_flat), (inputs_flat, preprocessors_flat) preprocessed_inputs = [ preprocessor(input_) if preprocessor is not None else input_ for preprocessor, input_ in zip(preprocessors_flat, inputs_flat) ] embedding_fn = feedforward_model(*args, output_size=embedding_dim, name=f'feedforward_{name}', **kwargs) embedding_fn = PicklableModel(inputs_flat, embedding_fn(preprocessed_inputs), name=name) embedding_fn.observation_keys = observation_keys or tuple() embedding_fn.goal_keys = goal_keys or tuple() embedding_fn.all_keys = embedding_fn.observation_keys + embedding_fn.goal_keys return embedding_fn
def feedforward_Q_function(input_shapes, *args, preprocessors=None, observation_keys=None, name='feedforward_Q', **kwargs): inputs = create_inputs(input_shapes) if preprocessors is None: preprocessors = tree.map_structure(lambda _: None, inputs) preprocessors = tree.map_structure_up_to(inputs, preprocessors_lib.deserialize, preprocessors) preprocessed_inputs = apply_preprocessors(preprocessors, inputs) # NOTE(hartikainen): `feedforward_model` would do the `cast_and_concat` # step for us, but tf2.2 broke the sequential multi-input handling: See: # https://github.com/tensorflow/tensorflow/issues/37061. out = tf.keras.layers.Lambda(cast_and_concat)(preprocessed_inputs) Q_model_body = feedforward_model(*args, output_shape=[1], name=name, **kwargs) Q_model = tf.keras.Model(inputs, Q_model_body(out), name=name) Q_function = StateActionValueFunction(model=Q_model, observation_keys=observation_keys, name=name) return Q_function
def _shift_and_log_scale_diag_net(self, output_size): shift_and_log_scale_diag_net = feedforward_model( hidden_layer_sizes=self._hidden_layer_sizes, output_size=output_size, activation=self._activation, output_activation=self._output_activation) return shift_and_log_scale_diag_net
def get_feedforward_preprocessor(observation_shape, name='feedforward_preprocessor', **kwargs): from softlearning.models.feedforward import feedforward_model preprocessor = feedforward_model(input_shapes=(observation_shape, ), name=name, **kwargs) return preprocessor
def get_random_nn_preprocessor(name='random_nn_preprocessor', **kwargs): from softlearning.models.feedforward import feedforward_model preprocessor = feedforward_model(name=name, **kwargs) # Don't update weights in this random NN import ipdb ipdb.set_trace() preprocessor = tf.stop_gradient(preprocessor) return preprocessor
def __init__(self, observation_space, output_size, *args, **kwargs): super(FeedforwardPreprocessor, self).__init__(observation_space, output_size) assert isinstance(observation_space, spaces.Box) input_shapes = (observation_space.shape, ) self._feedforward = feedforward_model(*args, input_shapes=input_shapes, output_size=output_size, **kwargs)
def create_feedforward_V_function(observation_shape, *args, observation_preprocessor=None, name='feedforward_V', **kwargs): input_shapes = (observation_shape, ) preprocessors = (observation_preprocessor, None) return feedforward_model(input_shapes, *args, output_size=1, preprocessors=preprocessors, **kwargs)
def create_feedforward_Q_function(observation_shape, action_shape, *args, observation_preprocessor=None, name='feedforward_Q', **kwargs): input_shapes = (observation_shape, action_shape) preprocessors = (observation_preprocessor, None) return feedforward_model(input_shapes, *args, preprocessors=preprocessors, name=name, **kwargs)
def _shift_and_scale_diag_net(self, inputs, output_size): preprocessed_inputs = self._preprocess_inputs(inputs) shift_and_scale_diag = feedforward_model( hidden_layer_sizes=self._hidden_layer_sizes, output_shape=(output_size, ), activation=self._activation, output_activation=self._output_activation)(preprocessed_inputs) shift, scale = tf.keras.layers.Lambda(lambda x: tf.split( x, num_or_size_splits=2, axis=-1))(shift_and_scale_diag) scale = tf.keras.layers.Lambda(lambda x: tf.math.softplus(x))(scale) shift_and_scale_diag_model = tf.keras.Model(inputs, (shift, scale)) return shift_and_scale_diag_model
def create_feedforward_reward_classifier(observation_shape, *args, observation_preprocessor=None, name='feedforward_classifier', **kwargs): input_shapes = (observation_shape, ) preprocessors = (observation_preprocessor, None) return feedforward_model( input_shapes, *args, output_size=1, preprocessors=preprocessors, kernel_regularizer=tf.keras.regularizers.l2(0.001), **kwargs)
def test_clone_model(self): """Make sure that cloning works and clones can predict. TODO(hartikainen): This test weirdly mixed `tf.keras.backend.eval` with `self.evaluate`. Should figure out the best way to handle keras and test sessions. """ output_size = 5 x_np = np.random.uniform(0, 1, (1, 13)).astype(np.float32) x = tf.constant(x_np) fn1 = feedforward_model(input_shapes=(x.shape[1:], x.shape[1:]), output_size=output_size, hidden_layer_sizes=(6, 4, 2), name='feedforward_function') tf.keras.backend.get_session().run(tf.global_variables_initializer()) fn2 = tf.keras.models.clone_model(fn1) for variable_1, variable_2 in zip(fn1.trainable_variables, fn2.trainable_variables): self.assertEqual(variable_1.shape, variable_2.shape) if 'kernel' in variable_1.name: self.assertNotAllClose(tf.keras.backend.eval(variable_1), tf.keras.backend.eval(variable_2)) self.assertEqual( len(set(fn1.trainable_variables) & set(fn2.trainable_variables)), 0) result_1 = fn1([x, x]) result_1_predict = fn1.predict([x_np, x_np]) result_1_eval = tf.keras.backend.eval(result_1) result_2 = fn2([x, x]) result_2_predict = fn2.predict([x_np, x_np]) result_2_eval = tf.keras.backend.eval(result_2) self.assertEqual(fn1.name, fn2.name) self.assertEqual(result_1_predict.shape, result_2_predict.shape) self.assertAllEqual(result_1_predict, result_1_eval) self.assertAllEqual(result_2_predict, result_2_eval)
def create_feedforward_Q_function(input_shapes, *args, preprocessors=None, observation_keys=None, name='feedforward_Q', **kwargs): inputs = create_inputs(input_shapes) if preprocessors is None: preprocessors = tree.map_structure(lambda _: None, inputs) preprocessed_inputs = apply_preprocessors(preprocessors, inputs) Q_function = feedforward_model(*args, output_size=1, name=name, **kwargs) Q_function = PicklableModel(inputs, Q_function(preprocessed_inputs)) Q_function.observation_keys = observation_keys return Q_function
def test_clone_model(self): """Make sure that cloning works and clones can predict.""" output_size = 5 x_np = np.random.uniform(0, 1, (1, 13)).astype(np.float32) x = tf.constant(x_np) fn1 = feedforward_model( output_size=output_size, hidden_layer_sizes=(6, 4, 2), name='feedforward_function') result_1 = fn1([x, x]) tf.compat.v1.keras.backend.get_session().run( tf.compat.v1.global_variables_initializer()) fn2 = tf.keras.models.clone_model(fn1) result_2 = fn2([x, x]) variable_names = [x.name for x in fn1.variables] for variable_name, variable_1, variable_2 in zip( variable_names, fn1.get_weights(), fn2.get_weights()): self.assertEqual(variable_1.shape, variable_2.shape) if 'kernel' in variable_name: self.assertNotAllClose(variable_1, variable_2) self.assertEqual( len(set(fn1.trainable_variables) & set(fn2.trainable_variables)), 0) with self.assertRaises(ValueError): # TODO(hartikainen): investigate why this fails result_1_predict = fn1.predict([x_np, x_np]) result_1_eval = tf.compat.v1.keras.backend.eval(result_1) result_2_predict = fn2.predict([x_np, x_np]) result_2_eval = tf.compat.v1.keras.backend.eval(result_2) self.assertEqual(fn1.name, fn2.name) self.assertEqual(result_1_predict.shape, result_2_predict.shape) self.assertAllEqual(result_1_predict, result_1_eval) self.assertAllEqual(result_2_predict, result_2_eval)
def _fn(x, output_units, **condition_kwargs): """MLP which concatenates the condition kwargs to input.""" shift_and_log_scale = feedforward_model( hidden_layer_sizes=hidden_layer_sizes, output_size=(1 if shift_only else 2) * output_units, activation=activation, output_activation=output_activation, name=name, )([x, condition_kwargs]) if shift_only: return shift_and_log_scale, None shift, log_scale = tf.keras.layers.Lambda( lambda shift_and_scale: tf.split(shift_and_scale, 2, axis=-1))( shift_and_log_scale) return shift, log_scale
def create_distance_estimator(input_shapes, *args, preprocessors=None, observation_keys=None, goal_keys=None, name='distance_estimator', classifier_params=None, **kwargs): inputs_flat = create_inputs(input_shapes) preprocessors_flat = (flatten_input_structure(preprocessors) if preprocessors is not None else tuple( None for _ in inputs_flat)) assert len(inputs_flat) == len(preprocessors_flat), (inputs_flat, preprocessors_flat) preprocessed_inputs = [ preprocessor(input_) if preprocessor is not None else input_ for preprocessor, input_ in zip(preprocessors_flat, inputs_flat) ] output_size = 1 if not classifier_params else int( classifier_params.get('bins', 1) + 1) distance_fn = feedforward_model(*args, output_size=output_size, name=name, **kwargs) distance_fn = PicklableModel(inputs_flat, distance_fn(preprocessed_inputs)) # preprocessed_inputs_fn = PicklableModel(inputs_flat, preprocessed_inputs) distance_fn.observation_keys = observation_keys or tuple() distance_fn.goal_keys = goal_keys or tuple() distance_fn.all_keys = distance_fn.observation_keys + distance_fn.goal_keys distance_fn.classifier_params = classifier_params # distance_fn.observations_preprocessors = preprocessors['s1'] # distance_fn.preprocessed_inputs_fn = preprocessed_inputs_fn return distance_fn
def create_feedforward_reward_classifier_function( input_shapes, *args, preprocessors=None, observation_keys=None, name='feedforward_reward_classifier', kernel_regularizer_lambda=1e-3, # output_activation=tf.math.log_sigmoid, **kwargs): inputs_flat = create_inputs(input_shapes) preprocessors_flat = (flatten_input_structure(preprocessors) if preprocessors is not None else tuple( None for _ in inputs_flat)) assert len(inputs_flat) == len(preprocessors_flat), (inputs_flat, preprocessors_flat) preprocessed_inputs = [ preprocessor(input_) if preprocessor is not None else input_ for preprocessor, input_ in zip(preprocessors_flat, inputs_flat) ] reward_classifier_function = feedforward_model( *args, output_size=1, kernel_regularizer=tf.keras.regularizers.l2(kernel_regularizer_lambda) if kernel_regularizer_lambda else None, name=name, # output_activation=output_activation, **kwargs) # from IPython import embed; embed() reward_classifier_function = PicklableModel( inputs_flat, reward_classifier_function(preprocessed_inputs)) reward_classifier_function.observation_keys = observation_keys reward_classifier_function.observations_preprocessors = preprocessors return reward_classifier_function
def test_scoping_reuses_variables_on_single_instance(self): output_size = 5 x1 = tf.random.uniform((3, 2)) x2 = tf.random.uniform((3, 13)) self.assertFalse(tf.compat.v1.trainable_variables()) fn = feedforward_model( output_size=output_size, hidden_layer_sizes=(6, 4, 2), name='feedforward_function') self.assertEqual(len(tf.compat.v1.trainable_variables()), 0) _ = fn([x1, x2]) num_trainable_variables_1 = len(tf.compat.v1.trainable_variables()) self.assertGreater(num_trainable_variables_1, 0) _ = fn([x2, x1]) num_trainable_variables_2 = len(tf.compat.v1.trainable_variables()) self.assertEqual(num_trainable_variables_1, num_trainable_variables_2)
def test_clone_model(self): """Make sure that cloning works and clones can predict.""" output_shape = (5, ) x_np = np.random.uniform(0, 1, (1, 13)).astype(np.float32) x = tf.constant(x_np) fn1 = feedforward_model(output_shape=output_shape, hidden_layer_sizes=(6, 4, 2), name='feedforward_function') result_1 = fn1([x, x]).numpy() fn2 = tf.keras.models.clone_model(fn1) result_2 = fn2([x, x]).numpy() variable_names = [x.name for x in fn1.variables] for variable_name, variable_1, variable_2 in zip( variable_names, fn1.get_weights(), fn2.get_weights()): self.assertEqual(variable_1.shape, variable_2.shape) if 'kernel' in variable_name: self.assertNotAllClose(variable_1, variable_2) self.assertEqual( len( set((v1.experimental_ref() for v1 in fn1.trainable_variables)) & set((v2.experimental_ref() for v2 in fn2.trainable_variables))), 0) result_1_predict = fn1.predict((x_np, x_np)) result_2_predict = fn2.predict((x_np, x_np)) self.assertEqual(fn1.name, fn2.name) self.assertEqual(result_1_predict.shape, result_2_predict.shape) self.assertAllEqual(result_1_predict, result_1) self.assertAllEqual(result_2_predict, result_2)
def convnet_preprocessor(input_shapes, image_shape, output_size, conv_filters=(32, 32), conv_kernel_sizes=((5, 5), (5, 5)), pool_type='MaxPool2D', pool_sizes=((2, 2), (2, 2)), pool_strides=(2, 2), dense_hidden_layer_sizes=(64, 64), data_format='channels_last', name="convnet_preprocessor", make_picklable=True, *args, **kwargs): if data_format == 'channels_last': H, W, C = image_shape elif data_format == 'channels_first': C, H, W = image_shape inputs = [ tf.keras.layers.Input(shape=input_shape) for input_shape in input_shapes ] concatenated_input = tf.keras.layers.Lambda( lambda x: tf.concat(x, axis=-1))(inputs) images_flat, input_raw = tf.keras.layers.Lambda( lambda x: [x[..., :H * W * C], x[..., H * W * C:]])(concatenated_input) images = tf.keras.layers.Reshape(image_shape)(images_flat) conv_out = images for filters, kernel_size, pool_size, strides in zip( conv_filters, conv_kernel_sizes, pool_sizes, pool_strides): conv_out = tf.keras.layers.Conv2D(filters=filters, kernel_size=kernel_size, padding="SAME", activation=tf.nn.relu, *args, **kwargs)(conv_out) conv_out = getattr(tf.keras.layers, pool_type)(pool_size=pool_size, strides=strides)(conv_out) flattened = tf.keras.layers.Flatten()(conv_out) concatenated_output = tf.keras.layers.Lambda( lambda x: tf.concat(x, axis=-1))([flattened, input_raw]) output = (feedforward_model( input_shapes=(concatenated_output.shape[1:].as_list(), ), output_size=output_size, hidden_layer_sizes=dense_hidden_layer_sizes, activation='relu', output_activation='linear', *args, **kwargs)([concatenated_output]) if dense_hidden_layer_sizes else concatenated_output) model = PicklableKerasModel(inputs, output, name=name) return model
def get_feedforward_preprocessor(name='feedforward_preprocessor', **kwargs): from softlearning.models.feedforward import feedforward_model preprocessor = feedforward_model(name=name, **kwargs) return preprocessor
def test_without_name(self): fn = feedforward_model( output_size=1, hidden_layer_sizes=(6, 4, 2)) self.assertEqual(fn.name, 'feedforward_model')