Exemplo n.º 1
0
    def test_is_pickleable(self):
        input_shape = (28, 28, 3)
        gcr = GaussianCNNRegressor(input_shape=input_shape,
                                   num_filters=(3, 6),
                                   filter_dims=(3, 3),
                                   strides=(1, 1),
                                   padding='SAME',
                                   hidden_sizes=(32, ),
                                   output_dim=1,
                                   adaptive_std=False,
                                   use_trust_region=False)

        with tf.compat.v1.variable_scope(
                'GaussianCNNRegressor/GaussianCNNRegressorModel', reuse=True):
            bias = tf.compat.v1.get_variable(
                'dist_params/mean_network/hidden_0/bias')
        bias.load(tf.ones_like(bias).eval())

        result1 = gcr.predict([np.ones(input_shape)])
        h = pickle.dumps(gcr)

        with tf.compat.v1.Session(graph=tf.Graph()):
            gcr_pickled = pickle.loads(h)
            result2 = gcr_pickled.predict([np.ones(input_shape)])
            assert np.array_equal(result1, result2)
Exemplo n.º 2
0
    def __init__(
        self,
        env_spec,
        subsample_factor=1.,
        regressor_args=None,
        name='GaussianCNNBaseline',
    ):
        if not isinstance(env_spec.observation_space, akro.Box) or \
                not len(env_spec.observation_space.shape) in (2, 3):
            raise ValueError(
                '{} can only process 2D, 3D akro.Image or'
                ' akro.Box observations, but received an env_spec with '
                'observation_space of type {} and shape {}'.format(
                    type(self).__name__,
                    type(env_spec.observation_space).__name__,
                    env_spec.observation_space.shape))

        super().__init__(env_spec)
        if regressor_args is None:
            regressor_args = dict()

        self._regressor = GaussianCNNRegressor(
            input_shape=(env_spec.observation_space.shape),
            output_dim=1,
            subsample_factor=subsample_factor,
            name=name,
            **regressor_args)
        self.name = name
        self.env_spec = env_spec
Exemplo n.º 3
0
    def test_log_likelihood_sym(self, output_dim):
        input_shape = (28, 28, 3)
        gcr = GaussianCNNRegressor(input_shape=input_shape,
                                   filters=((3, (3, 3)), (6, (3, 3))),
                                   strides=(1, 1),
                                   padding='SAME',
                                   hidden_sizes=(32, ),
                                   output_dim=1,
                                   adaptive_std=False,
                                   use_trust_region=False)

        new_input_var = tf.compat.v1.placeholder(tf.float32,
                                                 shape=(None, ) + input_shape)
        new_ys_var = tf.compat.v1.placeholder(dtype=tf.float32,
                                              name='ys',
                                              shape=(None, output_dim))

        data = np.full(input_shape, 0.5)
        label = np.ones(output_dim)

        outputs = gcr.log_likelihood_sym(new_input_var,
                                         new_ys_var,
                                         name='ll_sym')
        ll_from_sym = self.sess.run(outputs,
                                    feed_dict={
                                        new_input_var: [data],
                                        new_ys_var: [label]
                                    })
        mean, log_std = gcr._f_pdists([data])
        ll = gcr.model.networks['default'].dist.log_likelihood(
            [label], dict(mean=mean, log_std=log_std))
        assert np.allclose(ll, ll_from_sym, rtol=0, atol=1e-5)
Exemplo n.º 4
0
    def __init__(
            self,
            env_spec,
            subsample_factor=1.,
            regressor_args=None,
            name='GaussianCNNBaseline',
    ):
        super().__init__(env_spec)
        if regressor_args is None:
            regressor_args = dict()

        self._regressor = GaussianCNNRegressor(
            input_shape=(env_spec.observation_space.shape),
            output_dim=1,
            subsample_factor=subsample_factor,
            name=name,
            **regressor_args)
        self.name = name
Exemplo n.º 5
0
    def test_fit_unnormalized(self):
        gcr = GaussianCNNRegressor(input_shape=(10, 10, 3),
                                   num_filters=(3, 6),
                                   filter_dims=(3, 3),
                                   strides=(1, 1),
                                   padding='SAME',
                                   hidden_sizes=(32, ),
                                   output_dim=1,
                                   adaptive_std=True,
                                   normalize_inputs=False,
                                   normalize_outputs=False)

        train_data, test_data = get_train_test_data()
        observations, returns = train_data

        for _ in range(20):
            gcr.fit(observations, returns)

        paths, expected = test_data

        prediction = gcr.predict(paths['observations'])
        average_error = 0.0
        for i in range(len(expected)):
            average_error += np.abs(expected[i] - prediction[i])
        average_error /= len(expected)
        assert average_error <= 0.1

        x_mean = self.sess.run(gcr.model.networks['default'].x_mean)
        x_mean_expected = np.zeros_like(x_mean)
        x_std = self.sess.run(gcr.model.networks['default'].x_std)
        x_std_expected = np.ones_like(x_std)
        assert np.array_equal(x_mean, x_mean_expected)
        assert np.array_equal(x_std, x_std_expected)

        y_mean = self.sess.run(gcr.model.networks['default'].y_mean)
        y_mean_expected = np.zeros_like(y_mean)
        y_std = self.sess.run(gcr.model.networks['default'].y_std)
        y_std_expected = np.ones_like(y_std)

        assert np.allclose(y_mean, y_mean_expected)
        assert np.allclose(y_std, y_std_expected)
Exemplo n.º 6
0
    def test_fit_normalized(self):
        gcr = GaussianCNNRegressor(input_shape=(10, 10, 3),
                                   filters=((3, (3, 3)), (6, (3, 3))),
                                   strides=(1, 1),
                                   padding='SAME',
                                   hidden_sizes=(32, ),
                                   output_dim=1,
                                   adaptive_std=False,
                                   use_trust_region=True)

        train_data, test_data = get_train_test_data()
        observations, returns = train_data

        for _ in range(20):
            gcr.fit(observations, returns)

        paths, expected = test_data

        prediction = gcr.predict(paths['observations'])
        average_error = 0.0
        for i, exp in enumerate(expected):
            average_error += np.abs(exp - prediction[i])
        average_error /= len(expected)
        assert average_error <= 0.1

        x_mean = self.sess.run(gcr.model.networks['default'].x_mean)
        x_mean_expected = np.mean(observations, axis=0, keepdims=True)
        x_std = self.sess.run(gcr.model.networks['default'].x_std)
        x_std_expected = np.std(observations, axis=0, keepdims=True)

        assert np.allclose(x_mean, x_mean_expected)
        assert np.allclose(x_std, x_std_expected)

        y_mean = self.sess.run(gcr.model.networks['default'].y_mean)
        y_mean_expected = np.mean(returns, axis=0, keepdims=True)
        y_std = self.sess.run(gcr.model.networks['default'].y_std)
        y_std_expected = np.std(returns, axis=0, keepdims=True)

        assert np.allclose(y_mean, y_mean_expected)
        assert np.allclose(y_std, y_std_expected)
Exemplo n.º 7
0
    def test_fit_without_trusted_region(self):
        gcr = GaussianCNNRegressor(input_shape=(10, 10, 3),
                                   filters=((3, (3, 3)), (6, (3, 3))),
                                   strides=(1, 1),
                                   padding='SAME',
                                   hidden_sizes=(32, ),
                                   output_dim=1,
                                   adaptive_std=False,
                                   use_trust_region=False)
        train_data, test_data = get_train_test_data()
        observations, returns = train_data

        for _ in range(20):
            gcr.fit(observations, returns)

        paths, expected = test_data

        prediction = gcr.predict(paths['observations'])
        average_error = 0.0
        for i, exp in enumerate(expected):
            average_error += np.abs(exp - prediction[i])
        average_error /= len(expected)
        assert average_error <= 0.1
Exemplo n.º 8
0
    def test_optimizer_args(self, mock_lbfgs):
        lbfgs_args = dict(max_opt_itr=25)
        gcr = GaussianCNNRegressor(input_shape=(10, 10, 3),
                                   filters=((3, (3, 3)), (6, (3, 3))),
                                   strides=(1, 1),
                                   padding='SAME',
                                   hidden_sizes=(32, ),
                                   output_dim=1,
                                   optimizer=LbfgsOptimizer,
                                   optimizer_args=lbfgs_args,
                                   use_trust_region=True)

        assert mock_lbfgs.return_value is gcr._optimizer

        mock_lbfgs.assert_called_with(max_opt_itr=25)
Exemplo n.º 9
0
    def test_fit_smaller_subsample_factor(self):
        gcr = GaussianCNNRegressor(input_shape=(10, 10, 3),
                                   num_filters=(3, 6),
                                   filter_dims=(3, 3),
                                   strides=(1, 1),
                                   padding='SAME',
                                   hidden_sizes=(32, ),
                                   output_dim=1,
                                   subsample_factor=0.9,
                                   adaptive_std=False)
        train_data, test_data = get_train_test_data()
        observations, returns = train_data

        for _ in range(20):
            gcr.fit(observations, returns)

        paths, expected = test_data

        prediction = gcr.predict(paths['observations'])
        average_error = 0.0
        for i in range(len(expected)):
            average_error += np.abs(expected[i] - prediction[i])
        average_error /= len(expected)
        assert average_error <= 0.05
Exemplo n.º 10
0
    def test_is_pickleable2(self):
        input_shape = (28, 28, 3)
        gcr = GaussianCNNRegressor(input_shape=input_shape,
                                   filters=((3, (3, 3)), (6, (3, 3))),
                                   strides=(1, 1),
                                   padding='SAME',
                                   hidden_sizes=(32, ),
                                   output_dim=1,
                                   adaptive_std=False,
                                   use_trust_region=False)

        with tf.compat.v1.variable_scope(
                'GaussianCNNRegressor/GaussianCNNRegressorModel', reuse=True):
            x_mean = tf.compat.v1.get_variable('normalized_vars/x_mean')
        x_mean.load(tf.ones_like(x_mean).eval())
        x1 = gcr.model.networks['default'].x_mean.eval()
        h = pickle.dumps(gcr)
        with tf.compat.v1.Session(graph=tf.Graph()):
            gcr_pickled = pickle.loads(h)
            x2 = gcr_pickled.model.networks['default'].x_mean.eval()
            assert np.array_equal(x1, x2)
Exemplo n.º 11
0
class GaussianCNNBaseline(Baseline):
    """GaussianCNNBaseline With Model.

    It fits the input data to a gaussian distribution estimated by a CNN.

    Args:
        env_spec (metarl.envs.env_spec.EnvSpec): Environment specification.
        subsample_factor (float): The factor to subsample the data. By
            default it is 1.0, which means using all the data.
        regressor_args (dict): Arguments for regressor.
        name (str): Name of baseline.

    """
    def __init__(
        self,
        env_spec,
        subsample_factor=1.,
        regressor_args=None,
        name='GaussianCNNBaseline',
    ):
        if not isinstance(env_spec.observation_space, akro.Box) or \
                not len(env_spec.observation_space.shape) in (2, 3):
            raise ValueError(
                '{} can only process 2D, 3D akro.Image or'
                ' akro.Box observations, but received an env_spec with '
                'observation_space of type {} and shape {}'.format(
                    type(self).__name__,
                    type(env_spec.observation_space).__name__,
                    env_spec.observation_space.shape))

        super().__init__(env_spec)
        if regressor_args is None:
            regressor_args = dict()

        self._regressor = GaussianCNNRegressor(
            input_shape=(env_spec.observation_space.shape),
            output_dim=1,
            subsample_factor=subsample_factor,
            name=name,
            **regressor_args)
        self.name = name
        self.env_spec = env_spec

    def fit(self, paths):
        """Fit regressor based on paths.

        Args:
            paths (dict[numpy.ndarray]): Sample paths.

        """
        observations = np.concatenate([p['observations'] for p in paths])
        if isinstance(self.env_spec.observation_space, akro.Image):
            observations = normalize_pixel_batch(observations)

        returns = np.concatenate([p['returns'] for p in paths])
        self._regressor.fit(observations, returns.reshape((-1, 1)))

    def predict(self, path):
        """Predict value based on paths.

        Args:
            path (dict[numpy.ndarray]): Sample paths.

        Returns:
            numpy.ndarray: Predicted value.

        """
        observations = path['observations']
        if isinstance(self.env_spec.observation_space, akro.Image):
            observations = normalize_pixel_batch(observations)

        return self._regressor.predict(observations).flatten()

    def get_param_values(self):
        """Get parameter values.

        Returns:
            List[np.ndarray]: A list of values of each parameter.

        """
        return self._regressor.get_param_values()

    def set_param_values(self, flattened_params):
        """Set param values.

        Args:
            flattened_params (np.ndarray): A numpy array of parameter values.

        """
        self._regressor.set_param_values(flattened_params)

    def get_params_internal(self):
        """Get the params, which are the trainable variables.

        Returns:
            List[tf.Variable]: A list of trainable variables in the current
            variable scope.

        """
        return self._regressor.get_params_internal()
Exemplo n.º 12
0
class GaussianCNNBaseline(Baseline):
    """GaussianCNNBaseline With Model.

    It fits the input data to a gaussian distribution estimated by a CNN.

    Args:
        env_spec (metarl.envs.env_spec.EnvSpec): Environment specification.
        subsample_factor (float): The factor to subsample the data. By
            default it is 1.0, which means using all the data.
        regressor_args (dict): Arguments for regressor.
        name (str): Name of baseline.

    """

    def __init__(
            self,
            env_spec,
            subsample_factor=1.,
            regressor_args=None,
            name='GaussianCNNBaseline',
    ):
        super().__init__(env_spec)
        if regressor_args is None:
            regressor_args = dict()

        self._regressor = GaussianCNNRegressor(
            input_shape=(env_spec.observation_space.shape),
            output_dim=1,
            subsample_factor=subsample_factor,
            name=name,
            **regressor_args)
        self.name = name

    def fit(self, paths):
        """Fit regressor based on paths.

        Args:
            paths (dict[numpy.ndarray]): Sample paths.

        """
        observations = np.concatenate([p['observations'] for p in paths])
        returns = np.concatenate([p['returns'] for p in paths])
        self._regressor.fit(observations, returns.reshape((-1, 1)))

    def predict(self, path):
        """Predict value based on paths.

        Args:
            path (dict[numpy.ndarray]): Sample paths.

        Returns:
            numpy.ndarray: Predicted value.

        """
        return self._regressor.predict(path['observations']).flatten()

    def get_param_values(self):
        """Get parameter values.

        Returns:
            List[np.ndarray]: A list of values of each parameter.

        """
        return self._regressor.get_param_values()

    def set_param_values(self, flattened_params):
        """Set param values.

        Args:
            flattened_params (np.ndarray): A numpy array of parameter values.

        """
        self._regressor.set_param_values(flattened_params)

    def get_params_internal(self):
        """Get the params, which are the trainable variables.

        Returns:
            List[tf.Variable]: A list of trainable variables in the current
            variable scope.

        """
        return self._regressor.get_params_internal()