def test_apply_gradients(self):
        component = DummyWithOptimizer(variable_value=2.0)

        test = ComponentTest(
            component=component,
            input_spaces=dict(input_=FloatBox(add_batch_rank=True)))

        expected_grad = 0.69314718
        expected_outputs = [expected_grad, 2.0]
        test.test(("calc_grads"), expected_outputs=expected_outputs)

        # Now apply the grad and check the variable value.
        expected_loss = np.square(np.log(2.0))
        expected_outputs = [None, expected_loss, expected_loss]
        var_values_before = test.read_variable_values(
            component.variable_registry)
        test.test(("step"), expected_outputs=expected_outputs)

        # Check against variable now. Should change by -learning_rate*grad.
        var_values_after = test.read_variable_values(
            component.variable_registry)
        expected_new_value = var_values_before[
            "dummy-with-optimizer/variable"] - (component.learning_rate *
                                                expected_grad)
        recursive_assert_almost_equal(
            var_values_after["dummy-with-optimizer/variable"],
            expected_new_value,
            decimals=5)
    def test_multiple_sequences(self):
        gae = GeneralizedAdvantageEstimation(gae_lambda=self.gae_lambda, discount=self.gamma)

        test = ComponentTest(component=gae, input_spaces=self.input_spaces)

        rewards_ = self.rewards.sample(10, fill_value=0.5)
        baseline_values_ = self.baseline_values.sample(10, fill_value=1.0)
        terminals_ = [False] * 10
        terminals_[5] = True
        sequence_indices = [False] * 10
        sequence_indices[5] = True
        terminals_ = np.asarray(terminals_)

        input_ = [baseline_values_, rewards_, terminals_, sequence_indices]
        advantage_expected = self.gae_helper(
            baseline=baseline_values_,
            reward=rewards_,
            gamma=self.gamma,
            gae_lambda=self.gae_lambda,
            terminals=terminals_,
            sequence_indices=sequence_indices
        )

        print("Advantage expected:", advantage_expected)
        advantage = test.test(("calc_gae_values", input_))
        print("Got advantage = ", advantage)
        recursive_assert_almost_equal(advantage_expected, advantage, decimals=5)

        test.terminate()
示例#3
0
    def test_normal(self):
        # Create 5 normal distributions (2 parameters (mean and stddev) each).
        param_space = FloatBox(shape=(10,), add_batch_rank=True)
        input_spaces = dict(
            parameters=param_space,
            deterministic=bool,
        )

        # The Component to test.
        normal = Normal(switched_off_apis={"log_prob", "kl_divergence"})
        test = ComponentTest(component=normal, input_spaces=input_spaces)

        # Batch of size=2 and deterministic (True).
        input_ = [input_spaces["parameters"].sample(1), True]
        expected = input_[0][:, :5]
        # Sample n times, expect always mean value (deterministic draw).
        for _ in range(50):
            test.test(("draw", input_), expected_outputs=expected)
            test.test(("sample_deterministic", input_[0]), expected_outputs=expected)

        # Batch of size=1 and non-deterministic -> expect roughly the mean.
        input_ = [input_spaces["parameters"].sample(1), False]
        expected = input_[0][:, :5]
        outs = []
        for _ in range(50):
            out = test.test(("draw", input_))
            outs.append(out)
            out = test.test(("sample_stochastic", input_[0]))
            outs.append(out)

        recursive_assert_almost_equal(np.mean(outs), expected.mean(), decimals=1)
示例#4
0
    def test_simple_python_preprocessor_stack(self):
        """
        Tests a pure python preprocessor stack.
        """
        space = FloatBox(shape=(2, ), add_batch_rank=True)
        # python PreprocessorStack
        multiply = dict(type="multiply", factor=0.5, scope="m")
        divide = dict(type="divide", divisor=0.5, scope="d")
        stack = PreprocessorStack(multiply, divide, backend="python")
        for sub_comp_scope in ["m", "d"]:
            stack.sub_components[sub_comp_scope].create_variables(
                input_spaces=dict(inputs=space))

        #test = ComponentTest(component=stack, input_spaces=dict(inputs=float))

        for _ in range_(3):
            # Call fake API-method directly (ok for PreprocessorStack).
            stack.reset()
            input_ = np.asarray([[1.0], [2.0], [3.0], [4.0]])
            expected = input_
            #test.test(("preprocess", input_), expected_outputs=expected)
            out = stack.preprocess(input_)
            recursive_assert_almost_equal(out, input_)

            input_ = space.sample()
            #test.test(("preprocess", input_), expected_outputs=expected)
            out = stack.preprocess(input_)
            recursive_assert_almost_equal(out, input_)
示例#5
0
    def test_bernoulli(self):
        # Create 5 bernoulli distributions (or a multiple thereof if we use batch-size > 1).
        param_space = FloatBox(shape=(5,), add_batch_rank=True)

        # The Component to test.
        bernoulli = Bernoulli(switched_off_apis={"log_prob", "kl_divergence"})
        input_spaces = dict(
            parameters=param_space,
            deterministic=bool,
        )
        test = ComponentTest(component=bernoulli, input_spaces=input_spaces)

        # Batch of size=6 and deterministic (True).
        input_ = [input_spaces["parameters"].sample(6), True]
        expected = input_[0] > 0.5
        # Sample n times, expect always max value (max likelihood for deterministic draw).
        for _ in range(10):
            test.test(("draw", input_), expected_outputs=expected)
            test.test(("sample_deterministic", input_[0]), expected_outputs=expected)

        # Batch of size=6 and non-deterministic -> expect roughly the mean.
        input_ = [input_spaces["parameters"].sample(6), False]
        outs = []
        for _ in range(20):
            out = test.test(("draw", input_))
            outs.append(out)
            out = test.test(("sample_stochastic", input_[0]))
            outs.append(out)

        recursive_assert_almost_equal(np.mean(outs), 0.5, decimals=1)
示例#6
0
    def test_categorical(self):
        # Create 5 categorical distributions of 3 categories each.
        param_space = FloatBox(shape=(5, 3), add_batch_rank=True)

        # The Component to test.
        categorical = Categorical(switched_off_apis={"log_prob", "kl_divergence"})
        input_spaces = dict(
            parameters=param_space,
            deterministic=bool,
        )
        test = ComponentTest(component=categorical, input_spaces=input_spaces)

        # Batch of size=3 and deterministic (True).
        input_ = [input_spaces["parameters"].sample(3), True]
        expected = np.argmax(input_[0], axis=-1)
        # Sample n times, expect always max value (max likelihood for deterministic draw).
        for _ in range(10):
            test.test(("draw", input_), expected_outputs=expected)
            test.test(("sample_deterministic", input_[0]), expected_outputs=expected)

        # Batch of size=3 and non-deterministic -> expect roughly the mean.
        input_ = [input_spaces["parameters"].sample(3), False]
        outs = []
        for _ in range(20):
            out = test.test(("draw", input_))
            outs.append(out)
            out = test.test(("sample_stochastic", input_[0]))
            outs.append(out)

        recursive_assert_almost_equal(np.mean(outs), 1.0, decimals=1)
    def test_single_non_terminal_sequence(self):
        gae = GeneralizedAdvantageEstimation(gae_lambda=self.gae_lambda, discount=self.gamma)

        test = ComponentTest(component=gae, input_spaces=self.input_spaces)

        rewards_ = self.rewards.sample(10, fill_value=0.5)
        baseline_values_ = self.baseline_values.sample(10, fill_value=1.0)
        terminals_ = self.terminals.sample(size=10, fill_value=False)

        # Final sequence index must always be true.
        sequence_indices = [False] * 10
        # Assume sequence indices = terminals here.
        input_ = [baseline_values_, rewards_, terminals_, sequence_indices]

        advantage_expected = self.gae_helper(
            baseline=baseline_values_,
            reward=rewards_,
            gamma=self.gamma,
            gae_lambda=self.gae_lambda,
            terminals=terminals_,
            sequence_indices=sequence_indices
        )

        advantage = test.test(("calc_gae_values", input_))
        recursive_assert_almost_equal(advantage_expected, advantage, decimals=5)
        print("Expected advantage:", advantage_expected)
        print("Got advantage:", advantage)

        test.terminate()
示例#8
0
    def test_one_hot(self):
        """
        Tests a torch one hot function.
        """
        if get_backend() == "pytorch":
            # Flat action array.
            inputs = torch.tensor([0, 1], dtype=torch.int32)
            one_hot = pytorch_one_hot(inputs, depth=2)

            expected = torch.tensor([[1., 0.], [0., 1.]])
            recursive_assert_almost_equal(one_hot, expected)

            # Container space.
            inputs = torch.tensor([[0, 3, 2], [1, 2, 0]], dtype=torch.int32)
            one_hot = pytorch_one_hot(inputs, depth=4)

            expected = torch.tensor(
                [[[1, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 0]],
                 [[0, 1, 0, 0], [0, 0, 1, 0], [
                     1,
                     0,
                     0,
                     0,
                 ]]],
                dtype=torch.int32)
            recursive_assert_almost_equal(one_hot, expected)
示例#9
0
    def test_gumbel_softmax_distribution(self):
        # 5-categorical Gumble-Softmax.
        param_space = Tuple(FloatBox(shape=(5, )), add_batch_rank=True)
        values_space = FloatBox(shape=(5, ), add_batch_rank=True)
        input_spaces = dict(parameters=param_space,
                            deterministic=bool,
                            values=values_space)

        gumble_softmax_distribution = GumbelSoftmax(
            switched_off_apis={"kl_divergence", "entropy"}, temperature=1.0)
        test = ComponentTest(component=gumble_softmax_distribution,
                             input_spaces=input_spaces)

        # Batch of size=2 and deterministic (True).
        input_ = [param_space.sample(2), True]
        expected = np.argmax(input_[0], axis=-1)
        # Sample n times, expect always argmax value (deterministic draw).
        for _ in range(50):
            test.test(("draw", input_), expected_outputs=expected, decimals=5)
            test.test(("sample_deterministic", tuple([input_[0]])),
                      expected_outputs=expected,
                      decimals=5)

        # TODO: finish this test case, using an actual Gumble-Softmax distribution from the
        # paper: https://arxiv.org/pdf/1611.01144.pdf.
        return

        # Batch of size=1 and non-deterministic -> expect roughly the mean.
        input_ = [param_space.sample(1), False]
        expected = "???"
        outs = []
        for _ in range(100):
            out = test.test(("draw", input_))
            outs.append(np.argmax(out, axis=-1))
            out = test.test(("sample_stochastic", tuple([input_[0]])))
            outs.append(np.argmax(out, axis=-1))

        recursive_assert_almost_equal(np.mean(outs),
                                      expected.mean(),
                                      decimals=1)

        # Test log-likelihood outputs.
        means = np.array([[0.1, 0.2, 0.3, 0.4, 5.0]])
        stds = np.array([[0.8, 0.2, 0.3, 2.0, 4.0]])
        # Make sure values are within low and high.
        values = np.array([[0.9, 0.2, 0.4, -0.1, -1.05]])

        # TODO: understand and comment the following formula to get the log-prob.
        # Unsquash values, then get log-llh from regular gaussian.
        unsquashed_values = np.arctanh((values - low) / (high - low) * 2.0 -
                                       1.0)
        log_prob_unsquashed = np.log(norm.pdf(unsquashed_values, means, stds))
        log_prob = log_prob_unsquashed - np.sum(
            np.log(1 - np.tanh(unsquashed_values)**2), axis=-1, keepdims=True)

        test.test(("log_prob", [tuple([means, stds]), values]),
                  expected_outputs=log_prob,
                  decimals=4)
示例#10
0
    def test_beta(self):
        # Create 5 beta distributions (2 parameters (alpha and beta) each).
        param_space = Tuple(
            FloatBox(shape=(5, )),  # alpha
            FloatBox(shape=(5, )),  # beta
            add_batch_rank=True)
        values_space = FloatBox(shape=(5, ), add_batch_rank=True)
        input_spaces = dict(
            parameters=param_space,
            values=values_space,
            deterministic=bool,
        )

        # The Component to test.
        low, high = -1.0, 2.0
        beta_distribution = Beta(low=low,
                                 high=high,
                                 switched_off_apis={"kl_divergence"})
        test = ComponentTest(component=beta_distribution,
                             input_spaces=input_spaces)

        # Batch of size=2 and deterministic (True).
        input_ = [input_spaces["parameters"].sample(2), True]
        # Mean for a Beta distribution: 1 / [1 + (beta/alpha)]
        expected = (1.0 /
                    (1.0 + input_[0][1] / input_[0][0])) * (high - low) + low
        # Sample n times, expect always mean value (deterministic draw).
        for _ in range(50):
            test.test(("draw", input_), expected_outputs=expected, decimals=5)
            test.test(("sample_deterministic", tuple([input_[0]])),
                      expected_outputs=expected,
                      decimals=5)

        # Batch of size=1 and non-deterministic -> expect roughly the mean.
        input_ = [input_spaces["parameters"].sample(1), False]
        expected = (1.0 /
                    (1.0 + input_[0][1] / input_[0][0])) * (high - low) + low
        outs = []
        for _ in range(50):
            out = test.test(("draw", input_))
            outs.append(out)
            out = test.test(("sample_stochastic", tuple([input_[0]])))
            outs.append(out)

        recursive_assert_almost_equal(np.mean(outs),
                                      expected.mean(),
                                      decimals=1)

        # Test log-likelihood outputs (against scipy).
        alpha_ = values_space.sample(1)
        beta_ = values_space.sample(1)
        values = values_space.sample(1)
        values_scaled = values * (high - low) + low
        test.test(("log_prob", [tuple([alpha_, beta_]), values_scaled]),
                  expected_outputs=np.log(beta.pdf(values, alpha_, beta_)),
                  decimals=4)
示例#11
0
    def test_multivariate_normal(self):
        # Create batch0=n (batch-rank), batch1=2 (can be used for m mixed Gaussians), num-events=3 (trivariate)
        # distributions (2 parameters (mean and stddev) each).
        num_events = 3  # 3=trivariate Gaussian
        num_mixed_gaussians = 2  # 2x trivariate Gaussians (mixed)
        param_space = Tuple(
            FloatBox(shape=(num_mixed_gaussians, num_events)),  # mean
            FloatBox(shape=(num_mixed_gaussians,
                            num_events)),  # diag (variance)
            add_batch_rank=True)
        values_space = FloatBox(shape=(num_mixed_gaussians, num_events),
                                add_batch_rank=True)
        input_spaces = dict(
            parameters=param_space,
            values=values_space,
            deterministic=bool,
        )

        # The Component to test.
        multivariate_normal = MultivariateNormal(
            switched_off_apis={"kl_divergence"})
        test = ComponentTest(component=multivariate_normal,
                             input_spaces=input_spaces)

        input_ = [input_spaces["parameters"].sample(4), True]
        expected = input_[0][0]  # 0=mean
        # Sample n times, expect always mean value (deterministic draw).
        for _ in range(50):
            test.test(("draw", input_), expected_outputs=expected)
            test.test(("sample_deterministic", tuple([input_[0]])),
                      expected_outputs=expected)

        # Batch of size=1 and non-deterministic -> expect roughly the mean.
        input_ = [input_spaces["parameters"].sample(1), False]
        expected = input_[0][0]  # 0=mean
        outs = []
        for _ in range(50):
            out = test.test(("draw", input_))
            outs.append(out)
            out = test.test(("sample_stochastic", tuple([input_[0]])))
            outs.append(out)

        recursive_assert_almost_equal(np.mean(outs),
                                      expected.mean(),
                                      decimals=1)

        # Test log-likelihood outputs (against scipy).
        means = values_space.sample(2)
        stds = values_space.sample(2)
        values = values_space.sample(2)
        test.test(
            ("log_prob", [tuple([means, stds]), values]),
            # Sum up the individual log-probs as we have a diag (independent) covariance matrix.
            expected_outputs=np.sum(np.log(norm.pdf(values, means, stds)),
                                    axis=-1),
            decimals=4)
    def test_batched_backend_equivalence(self):
        return
        """
        Tests if Python and TensorFlow backend return the same output
        for a standard DQN-style preprocessing stack.
        """
        env_spec = dict(
            type="openai",
            gym_env="Pong-v0",
            frameskip=4,
            max_num_noops=30,
            episodic_life=True
        )
        # Test with batching because we assume vector environments to be the normal case going forward.
        env = SequentialVectorEnv(num_envs=4, env_spec=env_spec, num_background_envs=2)
        in_space = env.state_space

        agent_config = config_from_path("configs/ray_apex_for_pong.json")
        preprocessing_spec = deepcopy(agent_config["preprocessing_spec"])

        # Set up python preprocessor.
        scopes = [preprocessor["scope"] for preprocessor in preprocessing_spec]
        # Set backend to python.
        for spec in preprocessing_spec:
            spec["backend"] = "python"
        python_processor = PreprocessorStack(*preprocessing_spec, backend="python")
        for sub_comp_scope in scopes:
            python_processor.sub_components[sub_comp_scope].create_variables(dict(preprocessing_inputs=in_space))
        python_processor.reset()

        # To have the use case we considered so far, use agent interface for TF backend.
        agent_config.pop("type")
        agent = ApexAgent(state_space=env.state_space, action_space=env.action_space, **agent_config)

        # Generate a few states from random set points. Test if preprocessed states are almost equal
        states = np.asarray(env.reset_all())
        actions, agent_preprocessed_states = agent.get_action(
            states=states, use_exploration=False, extra_returns="preprocessed_states")
        print("TensorFlow preprocessed shape: {}".format(np.asarray(agent_preprocessed_states).shape))
        python_preprocessed_states = python_processor.preprocess(states)
        print("Python preprocessed shape: {}".format(np.asarray(python_preprocessed_states).shape))
        print("Asserting (almost) equal values:")
        for tf_state, python_state in zip(agent_preprocessed_states, python_preprocessed_states):
            flat_tf = np.ndarray.flatten(tf_state)
            flat_python = np.ndarray.flatten(python_state)
            for x, y in zip(flat_tf, flat_python):
                recursive_assert_almost_equal(x, y, decimals=3)

        states, _, _, _ = env.step(actions)
        actions, agent_preprocessed_states = agent.get_action(
            states=states, use_exploration=False, extra_returns="preprocessed_states")
        print("TensorFlow preprocessed shape: {}".format(np.asarray(agent_preprocessed_states).shape))
        python_preprocessed_states = python_processor.preprocess(states)
        print("Python preprocessed shape: {}".format(np.asarray(python_preprocessed_states).shape))
        print("Asserting (almost) equal values:")
        recursive_assert_almost_equal(agent_preprocessed_states, python_preprocessed_states, decimals=3)
    def test_backend_equivalence(self):
        """
        Tests if Python and TensorFlow backend return the same output
        for a standard DQN-style preprocessing stack.
        """
        in_space = IntBox(256, shape=(210, 160, 3), dtype="uint8", add_batch_rank=True)

        # Regression test: Incrementally add preprocessors.
        to_use = []
        for i, decimals in zip(range_(len(self.preprocessing_spec)), [0, 0, 2, 2]):
            to_use.append(i)
            incremental_spec = []
            incremental_scopes = []
            for index in to_use:
                incremental_spec.append(deepcopy(self.preprocessing_spec[index]))
                incremental_scopes.append(self.preprocessing_spec[index]["scope"])

            print("Comparing incremental spec: {}".format(incremental_scopes))

            # Set up python preprocessor.
            # Set backend to python.
            for spec in incremental_spec:
                spec["backend"] = "python"
            python_preprocessor = PreprocessorStack(*incremental_spec, backend="python")
            for sub_comp_scope in incremental_scopes:
                python_preprocessor.sub_components[sub_comp_scope].create_variables(
                    input_spaces=dict(preprocessing_inputs=in_space), action_space=None
                )
                python_preprocessor.sub_components[sub_comp_scope].check_input_spaces(
                    input_spaces=dict(preprocessing_inputs=in_space), action_space=None
                )
                #build_space = python_processor.sub_components[sub_comp_scope].get_preprocessed_space(build_space)
                python_preprocessor.reset()

            # To compare to tf, use an equivalent tf PreprocessorStack.
            # Switch back to tf.
            for spec in incremental_spec:
                spec["backend"] = "tf"
            tf_preprocessor = PreprocessorStack(*incremental_spec, backend="tf")

            test = ComponentTest(component=tf_preprocessor, input_spaces=dict(
                inputs=in_space
            ))

            # Generate a few states from random set points. Test if preprocessed states are almost equal
            states = in_space.sample(size=self.batch_size)
            python_preprocessed_states = python_preprocessor.preprocess(states)
            tf_preprocessed_states = test.test(("preprocess", states), expected_outputs=None)

            print("Asserting (almost) equal values:")
            for tf_state, python_state in zip(tf_preprocessed_states, python_preprocessed_states):
                recursive_assert_almost_equal(tf_state, python_state, decimals=decimals)
            print("Success comparing: {}".format(incremental_scopes))
示例#14
0
    def test_bernoulli(self):
        # Create 5 bernoulli distributions (or a multiple thereof if we use batch-size > 1).
        param_space = FloatBox(shape=(5, ), add_batch_rank=True)
        values_space = BoolBox(shape=(5, ), add_batch_rank=True)

        # The Component to test.
        bernoulli = Bernoulli(switched_off_apis={"kl_divergence"})
        input_spaces = dict(
            parameters=param_space,
            values=values_space,
            deterministic=bool,
        )
        test = ComponentTest(component=bernoulli, input_spaces=input_spaces)

        # Batch of size=6 and deterministic (True).
        input_ = [input_spaces["parameters"].sample(6), True]
        expected = input_[0] > 0.5
        # Sample n times, expect always max value (max likelihood for deterministic draw).
        for _ in range(10):
            test.test(("draw", input_), expected_outputs=expected)
            test.test(("sample_deterministic", input_[0]),
                      expected_outputs=expected)

        # Batch of size=6 and non-deterministic -> expect roughly the mean.
        input_ = [input_spaces["parameters"].sample(6), False]
        outs = []
        for _ in range(20):
            out = test.test(("draw", input_))
            outs.append(out)
            out = test.test(("sample_stochastic", input_[0]))
            outs.append(out)

        recursive_assert_almost_equal(np.mean(outs), 0.5, decimals=1)

        # Test log-likelihood outputs.
        test.test(
            (
                "log_prob",
                [
                    np.array([[0.1, 0.2, 0.3, 0.4, 0.5]]),
                    np.array([[True, False, False, True, True]])
                    # probability that result is the given value
                ]),
            expected_outputs=np.log(np.array([[0.1, 0.8, 0.7, 0.4, 0.5]])))

        # Test entropy outputs.
        input_ = np.array([[0.1, 0.2, 0.3, 0.4, 0.5]])
        # Binary Entropy with natural log.
        expected_entropy = -(input_ * np.log(input_)) - (
            (1.0 - input_) * np.log(1.0 - input_))
        test.test(("entropy", input_), expected_outputs=expected_entropy)
    def test_python_sequence_preprocessor(self):
        seq_len = 3
        space = FloatBox(shape=(1,), add_batch_rank=True)
        sequencer = Sequence(sequence_length=seq_len, batch_size=4, add_rank=True, backend="python")
        sequencer.create_variables(input_spaces=dict(preprocessing_inputs=space))

        #test = ComponentTest(component=sequencer, input_spaces=dict(apply=space))

        for _ in range_(3):
            sequencer._graph_fn_reset()
            self.assertEqual(sequencer.index, -1)
            input_ = np.asarray([[1.0], [2.0], [3.0], [4.0]])
            out = sequencer._graph_fn_apply(input_)
            self.assertEqual(sequencer.index, 0)
            recursive_assert_almost_equal(
                out, np.asarray([[[1.0, 1.0, 1.0]], [[2.0, 2.0, 2.0]], [[3.0, 3.0, 3.0]], [[4.0, 4.0, 4.0]]])
            )
            input_ = np.asarray([[1.1], [2.2], [3.3], [4.4]])
            out = sequencer._graph_fn_apply(input_)
            self.assertEqual(sequencer.index, 1)
            recursive_assert_almost_equal(
                out, np.asarray([[[1.0, 1.0, 1.1]], [[2.0, 2.0, 2.2]], [[3.0, 3.0, 3.3]], [[4.0, 4.0, 4.4]]])
            )
            input_ = np.asarray([[1.11], [2.22], [3.33], [4.44]])
            out = sequencer._graph_fn_apply(input_)
            self.assertEqual(sequencer.index, 2)
            recursive_assert_almost_equal(
                out, np.asarray([[[1.0, 1.1, 1.11]], [[2.0, 2.2, 2.22]], [[3.0, 3.3, 3.33]], [[4.0, 4.4, 4.44]]])
            )
            input_ = np.asarray([[10], [20], [30], [40]])
            out = sequencer._graph_fn_apply(input_)
            self.assertEqual(sequencer.index, 0)
            recursive_assert_almost_equal(
                out, np.asarray([[[1.1, 1.11, 10]], [[2.2, 2.22, 20]], [[3.3, 3.33, 30]], [[4.4, 4.44, 40]]])
            )
示例#16
0
    def test_time_rank_folding_for_large_dense_nn(self):
        vector_dim = 256
        input_space = FloatBox(shape=(vector_dim, ),
                               add_batch_rank=True,
                               add_time_rank=True)
        base_config = config_from_path("configs/test_large_dense_nn.json")
        neural_net_wo_folding = NeuralNetwork.from_spec(base_config)

        test = ComponentTest(component=neural_net_wo_folding,
                             input_spaces=dict(nn_input=input_space))

        # Pull a large batch+time ranked sample.
        sample_shape = (256, 200)
        inputs = input_space.sample(sample_shape)

        start = time.monotonic()
        runs = 10
        for _ in range(runs):
            print(".", flush=True, end="")
            test.test(("call", inputs), expected_outputs=None)
        runtime_wo_folding = time.monotonic() - start

        print(
            "\nTesting large dense NN w/o time-rank folding: {}x pass through with {}-data took "
            "{}s".format(runs, sample_shape, runtime_wo_folding))

        neural_net_w_folding = NeuralNetwork.from_spec(base_config)

        # Folded space.
        input_space_folded = FloatBox(shape=(vector_dim, ),
                                      add_batch_rank=True)
        inputs = input_space.sample(sample_shape[0] * sample_shape[1])

        test = ComponentTest(component=neural_net_w_folding,
                             input_spaces=dict(nn_input=input_space_folded))

        start = time.monotonic()
        for _ in range(runs):
            print(".", flush=True, end="")
            test.test(("call", inputs), expected_outputs=None)
        runtime_w_folding = time.monotonic() - start

        print(
            "\nTesting large dense NN w/ time-rank folding: {}x pass through with {}-data took "
            "{}s".format(runs, sample_shape, runtime_w_folding))

        recursive_assert_almost_equal(runtime_w_folding,
                                      runtime_wo_folding,
                                      decimals=0)
    def test_python_image_crop(self):
        image_crop = ImageCrop(x=7, y=1, width=8, height=12, backend="python")
        image_crop.create_variables(input_spaces=dict(
            inputs=FloatBox(shape=(16, 16, 3)), add_batch_rank=False))

        input_image = cv2.imread(
            os.path.join(os.path.dirname(__file__),
                         "images/16x16x3_image.bmp"))
        expected = cv2.imread(
            os.path.join(os.path.dirname(__file__),
                         "images/8x12x3_image_cropped.bmp"))
        assert expected is not None

        out = image_crop._graph_fn_call(input_image)
        recursive_assert_almost_equal(out, expected)
示例#18
0
    def test_grayscale_python_with_uint8_image(self):
        # last rank is always the color rank (its dim must match len(grayscale-weights))
        space = IntBox(256,
                       shape=(1, 1, 3),
                       dtype="uint8",
                       add_batch_rank=True)
        grayscale = GrayScale(keep_rank=False, backend="python")

        # Run the test (batch of 2 images).
        input_ = space.sample(size=2)
        expected = np.round(np.dot(input_[:, :, :, :3], [0.299, 0.587, 0.114]),
                            0).astype(dtype=input_.dtype)

        out = grayscale._graph_fn_apply(input_)
        recursive_assert_almost_equal(out, expected)
示例#19
0
    def test_normal(self):
        # Create 5 normal distributions (2 parameters (mean and stddev) each).
        param_space = Tuple(
            FloatBox(shape=(5, )),  # mean
            FloatBox(shape=(5, )),  # stddev
            add_batch_rank=True)
        values_space = FloatBox(shape=(5, ), add_batch_rank=True)
        input_spaces = dict(
            parameters=param_space,
            values=values_space,
            deterministic=bool,
        )

        # The Component to test.
        normal = Normal(switched_off_apis={"kl_divergence"})
        test = ComponentTest(component=normal, input_spaces=input_spaces)

        # Batch of size=2 and deterministic (True).
        input_ = [param_space.sample(2), True]
        expected = input_[0][0]  # 0 = mean
        # Sample n times, expect always mean value (deterministic draw).
        for _ in range(50):
            test.test(("draw", input_), expected_outputs=expected)
            test.test(("sample_deterministic", tuple([input_[0]])),
                      expected_outputs=expected)

        # Batch of size=1 and non-deterministic -> expect roughly the mean.
        input_ = [param_space.sample(1), False]
        expected = input_[0][0]  # 0 = mean
        outs = []
        for _ in range(50):
            out = test.test(("draw", input_))
            outs.append(out)
            out = test.test(("sample_stochastic", tuple([input_[0]])))
            outs.append(out)

        recursive_assert_almost_equal(np.mean(outs),
                                      expected.mean(),
                                      decimals=1)

        # Test log-likelihood outputs.
        means = np.array([[0.1, 0.2, 0.3, 0.4, 100.0]])
        stds = np.array([[0.8, 0.2, 0.3, 2.0, 50.0]])
        values = np.array([[1.0, 2.0, 0.4, 10.0, 5.4]])
        test.test(("log_prob", [tuple([means, stds]), values]),
                  expected_outputs=np.log(norm.pdf(values, means, stds)),
                  decimals=4)
示例#20
0
    def test_apex_weight_syncing(self):
        env = RandomEnv(state_space=spaces.IntBox(2),
                        action_space=spaces.IntBox(2),
                        deterministic=True)

        agent = Agent.from_spec(
            config_from_path("configs/apex_agent_for_random_env.json"),
            state_space=env.state_space,
            action_space=env.action_space)

        policy_weights = agent.get_policy_weights()
        print('policy weights: {}'.format(policy_weights))

        for variable, weights in policy_weights.items():
            weights += 0.01
        agent.set_policy_weights(policy_weights)

        new_weights = agent.get_policy_weights()
        recursive_assert_almost_equal(policy_weights, new_weights)
示例#21
0
    def test_apex_weight_syncing(self):
        agent_config = config_from_path("configs/ray_apex_for_pong.json")
        agent_config["execution_spec"].pop("ray_spec")
        environment = OpenAIGymEnv("Pong-v0", frameskip=4)

        agent = Agent.from_spec(
            agent_config,
            state_space=environment.state_space,
            action_space=environment.action_space
        )

        weights = agent.get_weights()["policy_weights"]
        print("type weights = ", type(weights))
        for variable, value in weights.items():
            print("Type value = ", type(value))
            value += 0.01
        agent.set_weights(weights)

        new_weights = agent.get_weights()["policy_weights"]
        recursive_assert_almost_equal(weights, new_weights)
    def test_reshape_python_with_time_rank_unfolding(self):
        # Unfold time rank from batch rank with given time-dimension (2 out of 8 -> batch will be 4 after unfolding).
        in_space = FloatBox(shape=(4, 4),
                            add_batch_rank=True,
                            add_time_rank=False)
        in_space_before_folding = FloatBox(shape=(4, 4),
                                           add_batch_rank=True,
                                           add_time_rank=True)
        reshape = ReShape(unfold_time_rank=True, backend="python")
        reshape.create_variables(
            dict(preprocessing_inputs=in_space,
                 input_before_time_rank_folding=in_space_before_folding))

        # seq-len=2, batch-size=4 -> unfold from 8.
        inputs = in_space.sample(size=8)
        inputs_before_folding = in_space_before_folding.sample(size=(4, 2))
        expected = np.reshape(inputs, newshape=(4, 2, 4, 4))
        out = reshape._graph_fn_apply(inputs, inputs_before_folding)

        recursive_assert_almost_equal(out, expected)
示例#23
0
    def test_beta(self):
        # Create 5 beta distributions (2 parameters (alpha and beta) each).
        param_space = Tuple(
            FloatBox(shape=(5, )),  # alpha
            FloatBox(shape=(5, )),  # beta
            add_batch_rank=True)
        input_spaces = dict(
            parameters=param_space,
            deterministic=bool,
        )

        # The Component to test.
        beta_distribution = Beta(
            switched_off_apis={"log_prob", "kl_divergence"})
        test = ComponentTest(component=beta_distribution,
                             input_spaces=input_spaces)

        # Batch of size=2 and deterministic (True).
        input_ = [input_spaces["parameters"].sample(2), True]
        # Mean for a Beta distribution: 1 / [1 + (beta/alpha)]
        expected = 1.0 / (1.0 + input_[0][1] / input_[0][0])
        # Sample n times, expect always mean value (deterministic draw).
        for _ in range(50):
            test.test(("draw", input_), expected_outputs=expected)
            test.test(("sample_deterministic", tuple([input_[0]])),
                      expected_outputs=expected)

        # Batch of size=1 and non-deterministic -> expect roughly the mean.
        input_ = [input_spaces["parameters"].sample(1), False]
        expected = 1.0 / (1.0 + input_[0][1] / input_[0][0])
        outs = []
        for _ in range(50):
            out = test.test(("draw", input_))
            outs.append(out)
            out = test.test(("sample_stochastic", tuple([input_[0]])))
            outs.append(out)

        recursive_assert_almost_equal(np.mean(outs),
                                      expected.mean(),
                                      decimals=1)
示例#24
0
    def test_reverse_apply_decays_to_sequence(self):
        """
        Tests reverse decaying a sequence of 1-step TD errors for GAE.
        """
        sequence_helper = SequenceHelper()
        decay_value = 0.5

        test = ComponentTest(component=sequence_helper,
                             input_spaces=self.input_spaces)
        td_errors = np.asarray([0.1, 0.2, 0.3, 0.4])
        indices = np.array([0, 0, 0, 1])
        expected_output_sequence_manual = np.asarray([
            0.1 + 0.5 * 0.2 + 0.25 * 0.3 + 0.125 * 0.4,
            0.2 + 0.5 * 0.3 + 0.25 * 0.4, 0.3 + 0.5 * 0.4, 0.4
        ])
        expected_output_sequence_numpy = self.decay_td_sequence(
            td_errors, decay=decay_value)
        recursive_assert_almost_equal(expected_output_sequence_manual,
                                      expected_output_sequence_numpy)
        test.test(("reverse_apply_decays_to_sequence",
                   [td_errors, indices, decay_value]),
                  expected_outputs=expected_output_sequence_manual)
示例#25
0
    def test_with_manual_numbers_and_lambda_0_5(self):
        lambda_ = 0.5
        lg = lambda_ * self.gamma
        gae = GeneralizedAdvantageEstimation(gae_lambda=lambda_, discount=self.gamma)

        test = ComponentTest(component=gae, input_spaces=self.input_spaces)

        # Batch of 2 sequences.
        rewards_ = np.array([0.1, 0.2, 0.3])
        baseline_values_ = np.array([1.0, 2.0, 3.0])
        terminals_ = np.array([False, False, False])

        # Final sequence index must always be true.
        sequence_indices = np.array([False, False, True])
        input_ = [baseline_values_, rewards_, terminals_, sequence_indices]

        # Test TD-error outputs.
        td = np.array([1.08, 1.17, 0.27])
        test.test(("calc_td_errors", input_), expected_outputs=td, decimals=5)

        expected_gaes_manual = np.array([
            td[0] + lg * td[1] + lg * lg * td[2],
            td[1] + lg * td[2],
            td[2]
        ])
        expected_gaes_helper = self.gae_helper(
            baseline_values_, rewards_, self.gamma, lambda_, terminals_, sequence_indices
        )
        recursive_assert_almost_equal(expected_gaes_manual, expected_gaes_helper, decimals=5)
        advantages = test.test(("calc_gae_values", input_), expected_outputs=expected_gaes_manual)

        print("Rewards:", rewards_)
        print("Baseline-values:", baseline_values_)
        print("Terminals:", terminals_)
        print("Expected advantage:", expected_gaes_manual)
        print("Got advantage:", advantages)

        test.terminate()
示例#26
0
    def test_bootstrapping(self):
        """
        Tests boot-strapping for GAE purposes.
        """
        sequence_helper = SequenceHelper()
        discount = 0.99

        test = ComponentTest(component=sequence_helper,
                             input_spaces=self.input_spaces)

        # No terminals - just boot-strap with final sequence index.
        values = np.asarray([1.0, 2.0, 3.0, 4.0])
        rewards = np.asarray([0, 0, 0, 0])
        sequence_indices = np.asarray([0, 0, 0, 1])
        terminals = np.asarray([0, 0, 0, 0])

        expected_deltas = self.deltas(values, rewards, discount, terminals,
                                      sequence_indices)
        deltas = test.test(("bootstrap_values",
                            [rewards, values, terminals, sequence_indices]))
        recursive_assert_almost_equal(expected_deltas, deltas, decimals=5)

        # Final index is also terminal.
        values = np.asarray([1.0, 2.0, 3.0, 4.0])
        rewards = np.asarray([0, 0, 0, 0])
        sequence_indices = np.asarray([0, 0, 0, 1])
        terminals = np.asarray([0, 0, 0, 1])

        expected_deltas = self.deltas(values, rewards, discount, terminals,
                                      sequence_indices)
        deltas = test.test(("bootstrap_values",
                            [rewards, values, terminals, sequence_indices]))
        recursive_assert_almost_equal(expected_deltas, deltas, decimals=5)

        # Mixed: i = 1 is also terminal, i = 3 is only sequence.
        values = np.asarray([1.0, 2.0, 3.0, 4.0])
        rewards = np.asarray([0, 0, 0, 0])
        sequence_indices = np.asarray([0, 1, 0, 1])
        terminals = np.asarray([0, 1, 0, 0])

        expected_deltas = self.deltas(values, rewards, discount, terminals,
                                      sequence_indices)
        deltas = test.test(("bootstrap_values",
                            [rewards, values, terminals, sequence_indices]))
        recursive_assert_almost_equal(expected_deltas, deltas, decimals=5)
示例#27
0
    def test_calc_decays(self):
        """
        Tests counting sequence lengths based on terminal configurations.
        """
        sequence_helper = SequenceHelper()
        decay_value = 0.5

        test = ComponentTest(component=sequence_helper,
                             input_spaces=self.input_spaces)
        input_ = np.asarray([0, 0, 0, 0])
        expected_decays = [1.0, 0.5, 0.25, 0.125]
        lengths, decays = test.test(
            ("calc_sequence_decays", [input_, decay_value]))

        # Check lengths and decays.
        recursive_assert_almost_equal(x=lengths, y=[4])
        recursive_assert_almost_equal(x=decays, y=expected_decays)

        input_ = np.asarray([0, 0, 1, 0])
        expected_decays = [1.0, 0.5, 0.25, 1.0]
        lengths, decays = test.test(
            ("calc_sequence_decays", [input_, decay_value]))

        recursive_assert_almost_equal(x=lengths, y=[3, 1])
        recursive_assert_almost_equal(x=decays, y=expected_decays)

        input_ = np.asarray([1, 1, 1, 1])
        expected_decays = [1.0, 1.0, 1.0, 1.0]
        lengths, decays = test.test(
            ("calc_sequence_decays", [input_, decay_value]))

        recursive_assert_almost_equal(x=lengths, y=[1, 1, 1, 1])
        recursive_assert_almost_equal(x=decays, y=expected_decays)
示例#28
0
    def test_joint_cumulative_distribution(self):
        param_space = Dict(
            {
                "a":
                FloatBox(shape=(4, )),  # 4-discrete
                "b":
                Dict({
                    "ba":
                    Tuple([
                        FloatBox(shape=(3, )),
                        FloatBox(0.1, 1.0, shape=(3, ))
                    ]),  # 3-variate normal
                    "bb":
                    Tuple([FloatBox(shape=(2, )),
                           FloatBox(shape=(2, ))]),  # beta -1 to 1
                    "bc":
                    Tuple([
                        FloatBox(shape=(4, )),
                        FloatBox(0.1, 1.0, shape=(4, ))
                    ]),  # normal (dim=4)
                })
            },
            add_batch_rank=True)

        values_space = Dict(
            {
                "a":
                IntBox(4),
                "b":
                Dict({
                    "ba": FloatBox(shape=(3, )),
                    "bb": FloatBox(shape=(2, )),
                    "bc": FloatBox(shape=(4, ))
                })
            },
            add_batch_rank=True)

        input_spaces = dict(parameters=param_space,
                            values=values_space,
                            deterministic=bool)

        low, high = -1.0, 1.0
        joined_cumulative_distribution = JointCumulativeDistribution(
            distribution_specs={
                "/a": Categorical(),
                "/b/ba": MultivariateNormal(),
                "/b/bb": Beta(low=low, high=high),
                "/b/bc": Normal()
            },
            switched_off_apis={"kl_divergence"})
        test = ComponentTest(component=joined_cumulative_distribution,
                             input_spaces=input_spaces)

        # Batch of size=2 and deterministic (True).
        input_ = [param_space.sample(2), True]
        input_[0]["a"] = softmax(input_[0]["a"])
        expected_mean = {
            "a": np.argmax(input_[0]["a"], axis=-1),
            "b": {
                "ba":
                input_[0]["b"]["ba"][0],  # [0]=Mean
                # Mean for a Beta distribution: 1 / [1 + (beta/alpha)] * range + low
                "bb":
                (1.0 /
                 (1.0 + input_[0]["b"]["bb"][1] / input_[0]["b"]["bb"][0])) *
                (high - low) + low,
                "bc":
                input_[0]["b"]["bc"][0],
            }
        }
        # Sample n times, expect always mean value (deterministic draw).
        for _ in range(50):
            test.test(("draw", input_), expected_outputs=expected_mean)
            test.test(("sample_deterministic", tuple([input_[0]])),
                      expected_outputs=expected_mean)

        # Batch of size=1 and non-deterministic -> expect roughly the mean.
        input_ = [param_space.sample(1), False]
        input_[0]["a"] = softmax(input_[0]["a"])
        expected_mean = {
            "a": np.sum(input_[0]["a"] * np.array([0, 1, 2, 3])),
            "b": {
                "ba":
                input_[0]["b"]["ba"][0],  # [0]=Mean
                # Mean for a Beta distribution: 1 / [1 + (beta/alpha)] * range + low
                "bb":
                (1.0 /
                 (1.0 + input_[0]["b"]["bb"][1] / input_[0]["b"]["bb"][0])) *
                (high - low) + low,
                "bc":
                input_[0]["b"]["bc"][0],
            }
        }

        outs = []
        for _ in range(100):
            out = test.test(("draw", input_))
            outs.append(out)
            out = test.test(("sample_stochastic", tuple([input_[0]])))
            outs.append(out)

        recursive_assert_almost_equal(np.mean(np.stack(
            [o["a"][0] for o in outs], axis=0),
                                              axis=0),
                                      expected_mean["a"],
                                      atol=0.2)
        recursive_assert_almost_equal(np.mean(np.stack(
            [o["b"]["ba"][0] for o in outs], axis=0),
                                              axis=0),
                                      expected_mean["b"]["ba"][0],
                                      decimals=1)
        recursive_assert_almost_equal(np.mean(np.stack(
            [o["b"]["bb"][0] for o in outs], axis=0),
                                              axis=0),
                                      expected_mean["b"]["bb"][0],
                                      decimals=1)
        recursive_assert_almost_equal(np.mean(np.stack(
            [o["b"]["bc"][0] for o in outs], axis=0),
                                              axis=0),
                                      expected_mean["b"]["bc"][0],
                                      decimals=1)

        # Test log-likelihood outputs.
        params = param_space.sample(1)
        params["a"] = softmax(params["a"])
        # Make sure beta-values are within 0.0 and 1.0 for the numpy calculation (which doesn't have scaling).
        values = values_space.sample(1)
        log_prob_beta = np.log(
            beta.pdf(values["b"]["bb"], params["b"]["bb"][0],
                     params["b"]["bb"][1]))
        # Now do the scaling for b/bb (beta values).
        values["b"]["bb"] = values["b"]["bb"] * (high - low) + low
        expected_log_llh = np.log(params["a"][0][values["a"][0]]) + \
            np.sum(np.log(norm.pdf(values["b"]["ba"][0], params["b"]["ba"][0], params["b"]["ba"][1]))) + \
            np.sum(log_prob_beta) + \
            np.sum(np.log(norm.pdf(values["b"]["bc"][0], params["b"]["bc"][0], params["b"]["bc"][1])))

        test.test(("log_prob", [params, values]),
                  expected_outputs=expected_log_llh,
                  decimals=1)
示例#29
0
    def test_squashed_normal(self):
        param_space = Tuple(FloatBox(shape=(5, )),
                            FloatBox(shape=(5, )),
                            add_batch_rank=True)
        values_space = FloatBox(shape=(5, ), add_batch_rank=True)
        input_spaces = dict(parameters=param_space,
                            deterministic=bool,
                            values=values_space)

        low, high = -2.0, 1.0
        squashed_distribution = SquashedNormal(
            switched_off_apis={"kl_divergence"}, low=low, high=high)
        test = ComponentTest(component=squashed_distribution,
                             input_spaces=input_spaces)

        # Batch of size=2 and deterministic (True).
        input_ = [param_space.sample(2), True]
        expected = ((np.tanh(input_[0][0]) + 1.0) /
                    2.0) * (high - low) + low  # [0] = mean
        # Sample n times, expect always mean value (deterministic draw).
        for _ in range(50):
            test.test(("draw", input_), expected_outputs=expected, decimals=5)
            test.test(("sample_deterministic", tuple([input_[0]])),
                      expected_outputs=expected,
                      decimals=5)

        # Batch of size=1 and non-deterministic -> expect roughly the mean.
        input_ = [param_space.sample(1), False]
        expected = ((np.tanh(input_[0][0]) + 1.0) /
                    2.0) * (high - low) + low  # [0] = mean
        outs = []
        for _ in range(500):
            out = test.test(("draw", input_))
            outs.append(out)
            self.assertTrue(out.max() <= high)
            self.assertTrue(out.min() >= low)
            out = test.test(("sample_stochastic", tuple([input_[0]])))
            outs.append(out)
            self.assertTrue(out.max() <= high)
            self.assertTrue(out.min() >= low)

        recursive_assert_almost_equal(np.mean(outs),
                                      expected.mean(),
                                      decimals=1)

        # Test log-likelihood outputs.
        means = np.array([[0.1, 0.2, 0.3, 0.4, 5.0]])
        stds = np.array([[0.8, 0.2, 0.3, 2.0, 4.0]])
        # Make sure values are within low and high.
        values = np.array([[0.9, 0.2, 0.4, -0.1, -1.05]])

        # TODO: understand and comment the following formula to get the log-prob.
        # Unsquash values, then get log-llh from regular gaussian.
        unsquashed_values = np.arctanh((values - low) / (high - low) * 2.0 -
                                       1.0)
        log_prob_unsquashed = np.log(norm.pdf(unsquashed_values, means, stds))
        log_prob = log_prob_unsquashed - np.sum(
            np.log(1 - np.tanh(unsquashed_values)**2), axis=-1, keepdims=True)

        test.test(("log_prob", [tuple([means, stds]), values]),
                  expected_outputs=log_prob,
                  decimals=4)
示例#30
0
    def test_mixture(self):
        # Create a mixture distribution consisting of 3 bivariate normals.
        num_distributions = 3
        num_events_per_multivariate = 2  # 2=bivariate
        param_space = Dict(
            {
                "categorical":
                FloatBox(shape=(num_distributions, ), low=-1.5, high=2.3),
                "parameters0":
                Tuple(
                    FloatBox(shape=(num_events_per_multivariate, )),  # mean
                    FloatBox(shape=(num_events_per_multivariate, )),  # diag
                ),
                "parameters1":
                Tuple(
                    FloatBox(shape=(num_events_per_multivariate, )),  # mean
                    FloatBox(shape=(num_events_per_multivariate, )),  # diag
                ),
                "parameters2":
                Tuple(
                    FloatBox(shape=(num_events_per_multivariate, )),  # mean
                    FloatBox(shape=(num_events_per_multivariate, )),  # diag
                ),
            },
            add_batch_rank=True)
        values_space = FloatBox(shape=(num_events_per_multivariate, ),
                                add_batch_rank=True)
        input_spaces = dict(
            parameters=param_space,
            values=values_space,
            deterministic=bool,
        )

        # The Component to test.
        mixture = MixtureDistribution(
            # Try different spec types.
            MultivariateNormal(),
            "multi-variate-normal",
            "multivariate_normal",
            switched_off_apis={"entropy", "kl_divergence"})
        test = ComponentTest(component=mixture, input_spaces=input_spaces)

        # Batch of size=n and deterministic (True).
        input_ = [input_spaces["parameters"].sample(1), True]
        # Make probs for categorical.
        categorical_probs = softmax(input_[0]["categorical"])

        # Note: Usually, the deterministic draw should return the max-likelihood value
        # Max-likelihood for a 3-Mixed Bivariate: mean-of-argmax(categorical)()
        # argmax = np.argmax(input_[0]["categorical"], axis=-1)
        #expected = np.array([input_[0]["parameters{}".format(idx)][0][i] for i, idx in enumerate(argmax)])
        #    input_[0]["categorical"][:, 1:2] * input_[0]["parameters1"][0] + \
        #    input_[0]["categorical"][:, 2:3] * input_[0]["parameters2"][0]

        # The mean value is a 2D vector (bivariate distribution).
        expected = categorical_probs[:, 0:1] * input_[0]["parameters0"][0] + \
            categorical_probs[:, 1:2] * input_[0]["parameters1"][0] + \
            categorical_probs[:, 2:3] * input_[0]["parameters2"][0]

        for _ in range(50):
            test.test(("draw", input_), expected_outputs=expected)
            test.test(("sample_deterministic", tuple([input_[0]])),
                      expected_outputs=expected)

        # Batch of size=1 and non-deterministic -> expect roughly the mean.
        input_ = [input_spaces["parameters"].sample(1), False]
        # Make probs for categorical.
        categorical_probs = softmax(input_[0]["categorical"])

        expected = categorical_probs[:, 0:1] * input_[0]["parameters0"][0] + \
            categorical_probs[:, 1:2] * input_[0]["parameters1"][0] + \
            categorical_probs[:, 2:3] * input_[0]["parameters2"][0]
        outs = []
        for _ in range(50):
            out = test.test(("draw", input_))
            outs.append(out)
            out = test.test(("sample_stochastic", tuple([input_[0]])))
            outs.append(out)

        recursive_assert_almost_equal(np.mean(np.array(outs), axis=0),
                                      expected,
                                      decimals=1)

        # Test log-likelihood outputs (against scipy).
        params = param_space.sample(1)
        # Make sure categorical params are softmaxed.
        category_probs = softmax(params["categorical"][0])
        values = values_space.sample(1)
        expected = \
            category_probs[0] * \
            np.sum(np.log(norm.pdf(values[0], params["parameters0"][0][0], params["parameters0"][1][0])), axis=-1) + \
            category_probs[1] * \
            np.sum(np.log(norm.pdf(values[0], params["parameters1"][0][0], params["parameters1"][1][0])), axis=-1) + \
            category_probs[2] * \
            np.sum(np.log(norm.pdf(values[0], params["parameters2"][0][0], params["parameters2"][1][0])), axis=-1)
        test.test(("log_prob", [params, values]),
                  expected_outputs=np.array([expected]),
                  decimals=1)