Exemplo n.º 1
0
    def test_mixture_adapter(self):
        input_space = Float(shape=(16,), main_axes="B")
        output_space = Float(shape=(3,), main_axes="B")

        adapter = MixtureDistributionAdapter(
            "normal-distribution-adapter", "beta-distribution-adapter",
            output_space=output_space,
            activation="relu"  # Don't do this in real life! This is just to test.
        )
        batch_size = 2
        inputs = input_space.sample(batch_size)
        out = adapter(inputs)
        weights = adapter.get_weights()
        params0 = np.split(dense(inputs, weights[2], weights[3]), 2, axis=-1)
        params0[1] = np.exp(np.clip(params0[1], MIN_LOG_NN_OUTPUT, MAX_LOG_NN_OUTPUT))

        params1 = dense(inputs, weights[4], weights[5])
        params1 = np.clip(params1, np.log(SMALL_NUMBER), -np.log(SMALL_NUMBER))
        params1 = np.log(np.exp(params1) + 1.0) + 1.0
        params1 = np.split(params1, 2, axis=-1)

        expected = {
            "categorical": relu(dense(inputs, weights[0], weights[1])), "parameters0": params0, "parameters1": params1
        }
        check(out, expected, decimals=5)
Exemplo n.º 2
0
    def test_subclassing_network_with_primitive_int_output_space(self):
        input_space = Float(-1.0, 1.0, shape=(5,), main_axes="B")
        output_space = Int(3, main_axes="B")

        # Using keras subclassing.
        network = self.MyModel()
        nn = Network(network=network, output_space=output_space)

        # Simple function call -> Expect output for all int-inputs.
        input_ = input_space.sample(6)
        result = nn(input_)
        weights = nn.get_weights()
        expected = dense(np.concatenate(
            [dense(input_, weights[0], weights[1]), dense(input_, weights[2], weights[3])],
            axis=-1
        ), weights[4], weights[5])

        check(result, expected)

        # Function call with value -> Expect output for only that int-value
        input_ = input_space.sample(6)
        values = output_space.sample(6)
        result = nn(input_, values)
        weights = nn.get_weights()
        expected = dense(np.concatenate(
            [dense(input_, weights[0], weights[1]), dense(input_, weights[2], weights[3])],
            axis=-1
        ), weights[4], weights[5])
        expected = np.sum(expected * one_hot(values, depth=output_space.num_categories), axis=-1)

        check(result, expected)
Exemplo n.º 3
0
    def test_func_api_network_with_automatically_handling_container_input_space(self):
        # Simple vectors plus image as inputs (see e.g. SAC).
        input_space = Dict(A=Float(-1.0, 1.0, shape=(2,)), B=Int(5), C=Float(-1.0, 1.0, shape=(2, 2, 3)), main_axes="B")
        output_space = Float(shape=(3,), main_axes="B")  # simple output

        # Only define a base-core network and let the automation handle the complex input structure via
        # `pre-concat` nets.
        core_nn = tf.keras.models.Sequential()
        core_nn.add(tf.keras.layers.Dense(3, activation="relu"))
        core_nn.add(tf.keras.layers.Dense(3))

        # Use no distributions.
        nn = Network(
            network=core_nn,
            input_space=input_space,
            pre_concat_networks=dict(
                # leave "A" out -> "A" input will go unaltered into concat step.
                B=lambda i: tf.one_hot(i, depth=input_space["B"].num_categories, axis=-1),
                C=tf.keras.layers.Flatten()
            ),
            output_space=output_space,
            distributions=False
        )

        # Simple function call.
        input_ = input_space.sample(6)
        result = nn(input_)
        weights = nn.get_weights()
        expected = dense(dense(relu(dense(np.concatenate([
            input_["A"],
            one_hot(input_["B"], depth=input_space["B"].num_categories),
            np.reshape(input_["C"], newshape=(6, -1))
        ], axis=-1), weights[0], weights[1])), weights[2], weights[3]), weights[4], weights[5])

        check(result, expected)
Exemplo n.º 4
0
    def test_normal(self):
        # Create 5 normal distributions (2 parameters (mean and stddev) each).
        param_space = Tuple(
            Float(shape=(5, )),  # mean
            Float(0.5, 1.0, shape=(5, )),  # stddev
            main_axes="B")
        values_space = Float(shape=(5, ), main_axes="B")

        # The Component to test.
        normal = Normal()

        # Batch of size=2 and deterministic (True).
        input_ = param_space.sample(2)
        expected = input_[0]  # 0 = mean
        # Sample n times, expect always mean value (deterministic draw).
        for _ in range(50):
            out = normal.sample(input_, deterministic=True)
            check(out, expected)
            normal.sample_deterministic(input_)
            check(out, expected)

        # Batch of size=1 and non-deterministic -> expect roughly the mean.
        input_ = param_space.sample(1)
        expected = input_[0][0]  # 0 = mean
        outs = []
        for _ in range(100):
            out = normal.sample(input_, deterministic=False)
            outs.append(out)
            out = normal.sample_stochastic(input_)
            outs.append(out)

        check(np.mean(outs), expected.mean(), decimals=1)

        means = np.array([[0.1, 0.2, 0.3, 0.4, 50.0]])
        log_stds = np.array([[0.8, -0.2, 0.3, -1.0, 10.0]])
        # The normal-adapter does this following line with the NN output (interpreted as log(stddev)):
        # Doesn't really matter here in this test case, though.
        stds = np.exp(
            np.clip(log_stds, a_min=MIN_LOG_NN_OUTPUT,
                    a_max=MAX_LOG_NN_OUTPUT))
        values = np.array([[1.0, 2.0, 0.4, 10.0, 5.4]])

        # Test log-likelihood outputs.
        out = normal.log_prob((means, stds), values)
        expected_outputs = np.log(norm.pdf(values, means, stds))
        check(out, expected_outputs)

        # Test entropy outputs.
        out = normal.entropy((means, stds))
        # See: https://en.wikipedia.org/wiki/Normal_distribution#Maximum_entropy
        expected_entropy = 0.5 * (1 + np.log(2 * np.square(stds) * np.pi))
        check(out, expected_entropy)
Exemplo n.º 5
0
    def test_plain_output_adapter(self):
        input_space = Float(-1.0, 1.0, shape=(5,), main_axes="B")
        output_space = Float(shape=(3,), main_axes="B")

        adapter = PlainOutputAdapter(output_space)

        # Simple function call -> Expect output for all int-inputs.
        input_ = input_space.sample(6)
        result = adapter(input_)
        weights = adapter.get_weights()
        expected = dense(input_, weights[0], weights[1])

        check(result, expected)
Exemplo n.º 6
0
    def test_bernoulli_adapter(self):
        input_space = Float(shape=(16,), main_axes="B")
        output_space = Bool(shape=(2,), main_axes="B")

        adapter = BernoulliDistributionAdapter(output_space=output_space, activation="relu")
        batch_size = 32
        inputs = input_space.sample(batch_size)
        out = adapter(inputs)
        weights = adapter.get_weights()

        # Parameters are the plain logits (no sigmoid).
        expected = relu(dense(inputs, weights[0], weights[1]))
        check(out, expected, decimals=5)
Exemplo n.º 7
0
    def test_categorical_adapter(self):
        input_space = Float(shape=(16,), main_axes="B")
        output_space = Int(2, shape=(3, 2), main_axes="B")

        adapter = CategoricalDistributionAdapter(
            output_space=output_space, kernel_initializer="ones", activation="relu"
        )
        batch_size = 2
        inputs = input_space.sample(batch_size)
        out = adapter(inputs)
        weights = adapter.get_weights()
        expected = np.reshape(relu(dense(inputs, weights[0], weights[1])), newshape=(batch_size, 3, 2, 2))
        check(out, expected, decimals=5)
Exemplo n.º 8
0
    def test_normal_adapter(self):
        input_space = Float(shape=(8,), main_axes="B")
        output_space = Float(shape=(3, 2), main_axes="B")

        adapter = NormalDistributionAdapter(output_space=output_space, activation="linear")
        batch_size = 3
        inputs = input_space.sample(batch_size)
        out = adapter(inputs)
        weights = adapter.get_weights()
        expected = np.split(np.reshape(dense(inputs, weights[0], weights[1]), newshape=(batch_size, 3, 4)), 2, axis=-1)
        expected[1] = np.clip(expected[1], MIN_LOG_NN_OUTPUT, MAX_LOG_NN_OUTPUT)
        expected[1] = np.exp(expected[1])
        check(out, expected, decimals=5)
Exemplo n.º 9
0
    def test_beta_adapter(self):
        input_space = Float(shape=(8,), main_axes="B")
        output_space = Float(shape=(3, 2), main_axes="B")

        adapter = BetaDistributionAdapter(output_space=output_space)
        batch_size = 5
        inputs = input_space.sample(batch_size)
        out = adapter(inputs)
        weights = adapter.get_weights()
        expected = np.reshape(dense(inputs, weights[0], weights[1]), newshape=(batch_size, 3, 4))
        expected = np.clip(expected, np.log(SMALL_NUMBER), -np.log(SMALL_NUMBER))
        expected = np.log(np.exp(expected) + 1.0) + 1.0
        expected = np.split(expected, 2, axis=-1)
        check(out, expected, decimals=5)
Exemplo n.º 10
0
    def test_plain_output_adapter_with_pre_network(self):
        input_space = Float(-1.0, 1.0, shape=(5,), main_axes="B")
        output_space = Float(shape=(3,), main_axes="B")

        adapter = PlainOutputAdapter(output_space, pre_network=tf.keras.models.Sequential(
            tf.keras.layers.Dense(units=10, activation="relu")
        ))

        # Simple function call -> Expect output for all int-inputs.
        input_ = input_space.sample(6)
        result = adapter(input_)
        weights = adapter.get_weights()
        expected = dense(relu(dense(input_, weights[0], weights[1])), weights[2], weights[3])

        check(result, expected)
Exemplo n.º 11
0
    def test_categorical(self):
        # Create 5 categorical distributions of 3 categories each.
        param_space = Float(shape=(5, 3), low=-1.0, high=2.0, main_axes="B")
        values_space = Int(3, shape=(5, ), main_axes="B")

        # The Component to test.
        categorical = Categorical()

        # Batch of size=3 and deterministic (True).
        input_ = param_space.sample(3)
        expected = np.argmax(input_, axis=-1)
        # Sample n times, expect always max value (max likelihood for deterministic draw).
        for _ in range(10):
            out = categorical.sample(input_, deterministic=True)
            check(out, expected)
            out = categorical.sample_deterministic(input_)
            check(out, expected)

        # Batch of size=3 and non-deterministic -> expect roughly the mean.
        input_ = param_space.sample(3)
        outs = []
        for _ in range(100):
            out = categorical.sample(input_, deterministic=False)
            outs.append(out)
            out = categorical.sample_stochastic(input_)
            outs.append(out)

        check(np.mean(outs), 1.0, decimals=0)

        input_ = param_space.sample(1)
        probs = softmax(input_)
        values = values_space.sample(1)

        # Test log-likelihood outputs.
        out = categorical.log_prob(input_, values)
        check(out,
              np.log(
                  np.array([[
                      probs[0][0][values[0][0]], probs[0][1][values[0][1]],
                      probs[0][2][values[0][2]], probs[0][3][values[0][3]],
                      probs[0][4][values[0][4]]
                  ]])),
              decimals=4)

        # Test entropy outputs.
        out = categorical.entropy(input_)
        expected_entropy = -np.sum(probs * np.log(probs), axis=-1)
        check(out, expected_entropy)
Exemplo n.º 12
0
    def test_copying_a_network(self):
        # Using keras layer as network spec.
        layer = tf.keras.layers.Dense(4)

        nn = Network(network=layer, output_space=Dict({"a": Float(shape=(2,)), "b": Int(2)}))
        # Simple call -> Should return dict with "a"->float(2,) and "b"->float(2,)
        input_ = Float(-1.0, 1.0, shape=(5,), main_axes="B").sample(5)
        _ = nn(input_)
        weights = nn.get_weights()
        expected_a = dense(dense(input_, weights[0], weights[1]), weights[2], weights[3])
        expected_b = dense(dense(input_, weights[0], weights[1]), weights[4], weights[5])

        # Do the copy.
        nn_copy = nn.copy()
        result = nn_copy(input_)
        check(result, dict(a=expected_a, b=expected_b))
Exemplo n.º 13
0
    def test_layer_network_with_container_output_space(self):
        # Using keras layer as network spec.
        layer = tf.keras.layers.Dense(10)

        nn = Network(
            network=layer,
            output_space=Dict({"a": Float(shape=(2, 3)), "b": Int(3)})
        )
        # Simple call -> Should return dict with "a"->float(2,3) and "b"->float(3,)
        input_ = Float(-1.0, 1.0, shape=(5,), main_axes="B").sample(5)
        result = nn(input_)
        weights = nn.get_weights()
        expected_a = np.reshape(dense(dense(input_, weights[0], weights[1]), weights[2], weights[3]), newshape=(-1, 2, 3))
        expected_b = dense(dense(input_, weights[0], weights[1]), weights[4], weights[5])

        check(result, dict(a=expected_a, b=expected_b))
Exemplo n.º 14
0
    def test_bernoulli(self):
        # Create 5 bernoulli distributions (or a multiple thereof if we use batch-size > 1).
        param_space = Float(-1.0, 1.0, shape=(5, ), main_axes="B")

        # The Component to test.
        bernoulli = Bernoulli()
        # Batch of size=6 and deterministic (True).
        input_ = param_space.sample(6)
        expected = sigmoid(input_) > 0.5
        # Sample n times, expect always max value (max likelihood for deterministic draw).
        for _ in range(10):
            out = bernoulli.sample(input_, deterministic=True)
            check(out, expected)
            out = bernoulli.sample_deterministic(input_)
            check(out, expected)

        # Batch of size=6 and non-deterministic -> expect roughly the mean.
        input_ = param_space.sample(6)
        outs = []
        for _ in range(100):
            out = bernoulli.sample(input_, deterministic=False)
            outs.append(out)
            out = bernoulli.sample_stochastic(input_)
            outs.append(out)

        check(np.mean(outs), 0.5, decimals=1)

        logits = np.array([[0.1, -0.2, 0.3, -4.4, 2.0]])
        probs = sigmoid(logits)

        # Test log-likelihood outputs.
        values = np.array([[True, False, False, True, True]])
        out = bernoulli.log_prob(logits, values=values)
        expected_log_probs = np.log(np.where(values, probs, 1.0 - probs))
        check(out, expected_log_probs)

        # Test entropy outputs.
        # Binary Entropy with natural log.
        expected_entropy = -(probs * np.log(probs)) - (
            (1.0 - probs) * np.log(1.0 - probs))
        out = bernoulli.entropy(logits)
        check(out, expected_entropy)
Exemplo n.º 15
0
    def test_layer_network_with_container_output_space_and_one_distribution(self):
        input_space = Float(-1.0, 1.0, shape=(5,), main_axes="B")
        output_space = Dict({"a": Float(shape=(2, 3)), "b": Int(3)}, main_axes="B")
        # Using keras layer as network spec.
        layer = tf.keras.layers.Dense(10)

        nn = Network(
            network=layer,
            output_space=output_space,
            # Only one output component is a distribution, the other not (Int).
            distributions=dict(a=True)
        )
        # Simple call -> Should return sample dict with "a"->float(2,3) and "b"->int(3,).
        input_ = input_space.sample(1000)
        result = nn(input_)
        check(np.mean(result["a"]), 0.0, decimals=0)
        check(np.mean(np.sum(softmax(result["b"]), axis=-1)), 1.0, decimals=5)

        # Call with value -> Should return likelihood of "a"-value and output for "b"-value.
        input_ = input_space.sample(3)
        value = output_space.sample(3)
        result, likelihood = nn(input_, value)
        self.assertTrue(result["a"] is None)  # a is None b/c value was already given for likelihood calculation
        self.assertTrue(result["b"].shape == (3,))  # b is the (batched) output values for the given int-numbers
        self.assertTrue(result["b"].dtype == np.float32)
        self.assertTrue(likelihood.shape == (3,))  # (total) likelihood is some float
        self.assertTrue(likelihood.dtype == np.float32)

        # Extract only the "b" value-output (one output for each int category).
        # Also: No likelihood output b/c "a" was invalidated.
        del value["a"]
        value["b"] = None
        result = nn(input_, value)
        self.assertTrue(result["a"] is None)
        self.assertTrue(result["b"].shape == (3, 3))
        self.assertTrue(result["b"].dtype == np.float32)

        value = output_space.sample(3)
        value["a"] = None
        del value["b"]
        result = nn(input_, value)
        self.assertTrue(result is None)
Exemplo n.º 16
0
    def test_gumbel_softmax_distribution(self):
        # 5-categorical Gumble-Softmax.
        param_space = Float(shape=(5, ), main_axes="B")
        values_space = Float(shape=(5, ), main_axes="B")

        gumble_softmax_distribution = GumbelSoftmax(temperature=1.0)

        # Batch of size=2 and deterministic (True).
        input_ = param_space.sample(2)
        expected = softmax(input_)
        # Sample n times, expect always argmax value (deterministic draw).
        for _ in range(50):
            out = gumble_softmax_distribution.sample(input_,
                                                     deterministic=True)
            check(out, expected)
            out = gumble_softmax_distribution.sample_deterministic(input_)
            check(out, expected)

        # Batch of size=1 and non-deterministic -> expect roughly the vector of probs.
        input_ = param_space.sample(1)
        expected = softmax(input_)
        outs = []
        for _ in range(100):
            out = gumble_softmax_distribution.sample(input_)
            outs.append(out)
            out = gumble_softmax_distribution.sample_stochastic(input_)
            outs.append(out)

        check(np.mean(outs, axis=0), expected, decimals=1)

        return  # TODO: Figure out Gumbel Softmax log-prob calculation (our current implementation does not correspond with paper's formula).

        def gumbel_log_density(y, probs, num_categories, temperature=1.0):
            # https://arxiv.org/pdf/1611.01144.pdf.
            density = np.math.factorial(num_categories - 1) * np.math.pow(temperature, num_categories - 1) * \
                (np.sum(probs / np.power(y, temperature), axis=-1) ** -num_categories) * \
                np.prod(probs / np.power(y, temperature + 1.0), axis=-1)
            return np.log(density)

        # Test log-likelihood outputs.
        input_ = param_space.sample(3)
        values = values_space.sample(3)
        expected = gumbel_log_density(values,
                                      softmax(input_),
                                      num_categories=param_space.shape[0])

        out = gumble_softmax_distribution.log_prob(input_, values)
        check(out, expected)
Exemplo n.º 17
0
    def test_func_api_network_with_manually_handling_container_input_space(self):
        # Simple vector plus image as inputs (see e.g. SAC).
        input_space = Dict(A=Float(-1.0, 1.0, shape=(2,)), B=Float(-1.0, 1.0, shape=(2, 2, 3)), main_axes="B")
        output_space = Float(shape=(3,), main_axes="B")  # simple output

        # Using keras functional API to create network.
        keras_input = input_space.create_keras_input()
        # Simply flatten an concat everything, then output.
        o = tf.keras.layers.Flatten()(keras_input["B"])
        o = tf.concat([keras_input["A"], o], axis=-1)
        network = tf.keras.Model(inputs=keras_input, outputs=o)

        # Use no distributions.
        nn = Network(
            network=network,
            output_space=output_space,
            distributions=False
        )

        # Simple function call.
        input_ = input_space.sample(6)
        result = nn(input_)
        weights = nn.get_weights()
        expected = dense(np.concatenate([input_["A"], np.reshape(input_["B"], newshape=(6, -1))], axis=-1), weights[0], weights[1])

        check(result, expected)

        # Function call with value -> Expect error as we only have float outputs (w/o distributions).
        input_ = input_space.sample(6)
        values = output_space.sample(6)
        error = True
        try:
            nn(input_, values)
            error = False
        except SurrealError:
            pass
        self.assertTrue(error)
Exemplo n.º 18
0
    def test_multivariate_normal(self):
        # Create batch0=n (batch-rank), batch1=2 (can be used for m mixed Gaussians), num-events=3 (trivariate)
        # distributions (2 parameters (mean and stddev) each).
        num_events = 3  # 3=trivariate Gaussian
        num_mixed_gaussians = 2  # 2x trivariate Gaussians (mixed)
        param_space = Tuple(
            Float(shape=(num_mixed_gaussians, num_events)),  # mean
            Float(0.5, 1.0,
                  shape=(num_mixed_gaussians, num_events)),  # diag (variance)
            main_axes="B")
        values_space = Float(shape=(num_mixed_gaussians, num_events),
                             main_axes="B")

        # The Component to test.
        distribution = MultivariateNormal()

        input_ = param_space.sample(4)
        expected = input_[0]  # 0=mean
        # Sample n times, expect always mean value (deterministic draw).
        for _ in range(50):
            out = distribution.sample(input_, deterministic=True)
            check(out, expected)
            out = distribution.sample_deterministic(input_)
            check(out, expected)

        # Batch of size=1 and non-deterministic -> expect roughly the mean.
        input_ = param_space.sample(1)
        expected = input_[0]  # 0=mean
        outs = []
        for _ in range(100):
            out = distribution.sample(input_, deterministic=False)
            outs.append(out)
            out = distribution.sample_stochastic(input_)
            outs.append(out)

        check(np.mean(outs), expected.mean(), decimals=1)

        means = values_space.sample(2)
        stds = values_space.sample(2)
        values = values_space.sample(2)

        # Test log-likelihood outputs (against scipy).
        out = distribution.log_prob((means, stds), values)
        # Sum up the individual log-probs as we have a diag (independent) covariance matrix.
        check(out,
              np.sum(np.log(norm.pdf(values, means, stds)), axis=-1),
              decimals=4)
Exemplo n.º 19
0
    def test_dueling_network(self):
        input_space = Float(-1.0, 1.0, shape=(2,), main_axes="B")
        output_space = Dict({"A": Float(shape=(4,)), "V": Float()}, main_axes="B")  # V=single node
        # Using keras layer as main network spec.
        layer = tf.keras.layers.Dense(5)

        nn = Network(
            network=layer,
            output_space=output_space,
            # Only two output components are distributions (a and b), the others not (c=Float, d=Int).
            adapters=dict(A=dict(pre_network=tf.keras.layers.Dense(2)), V=dict(pre_network=tf.keras.layers.Dense(3)))
        )
        # Simple call -> Should return sample dict.
        input_ = input_space.sample(10)
        result = nn(input_)

        weights = nn.get_weights()
        expected_a = dense(dense(dense(input_, weights[0], weights[1]), weights[2], weights[3]), weights[4], weights[5])
        expected_v = np.reshape(
            dense(dense(dense(input_, weights[0], weights[1]), weights[6], weights[7]), weights[8], weights[9]),
            newshape=(10,)
        )
        check(result["A"], expected_a, decimals=5)
        check(result["V"], expected_v, decimals=5)
Exemplo n.º 20
0
    def test_beta(self):
        # Create 5 beta distributions (2 parameters (alpha and beta) each).
        param_space = Tuple(
            Float(shape=(5, )),  # alpha
            Float(shape=(5, )),  # beta
            main_axes="B")
        values_space = Float(shape=(5, ), main_axes="B")

        # The Component to test.
        low, high = -1.0, 2.0
        beta_distribution = Beta(low=low, high=high)

        # Batch of size=2 and deterministic (True).
        input_ = param_space.sample(2)
        # Mean for a Beta distribution: 1 / [1 + (beta/alpha)]
        expected = (1.0 / (1.0 + input_[1] / input_[0])) * (high - low) + low
        # Sample n times, expect always mean value (deterministic draw).
        for _ in range(100):
            out = beta_distribution.sample(input_, deterministic=True)
            check(out, expected)
            out = beta_distribution.sample_deterministic(input_)
            check(out, expected)

        # Batch of size=1 and non-deterministic -> expect roughly the mean.
        input_ = param_space.sample(1)
        expected = (1.0 / (1.0 + input_[1] / input_[0])) * (high - low) + low
        outs = []
        for _ in range(100):
            out = beta_distribution.sample(input_, deterministic=False)
            outs.append(out)
            out = beta_distribution.sample_stochastic(input_)
            outs.append(out)

        check(np.mean(outs), expected.mean(), decimals=1)

        alpha_ = values_space.sample(1)
        beta_ = values_space.sample(1)
        values = values_space.sample(1)
        values_scaled = values * (high - low) + low

        # Test log-likelihood outputs (against scipy).
        out = beta_distribution.log_prob((alpha_, beta_), values_scaled)
        check(out, np.log(beta.pdf(values, alpha_, beta_)), decimals=4)

        # TODO: Test entropy outputs (against scipy).
        out = beta_distribution.entropy((alpha_, beta_))
Exemplo n.º 21
0
    def test_layer_network_with_container_output_space_and_distributions(self):
        input_space = Float(-1.0, 1.0, shape=(10,), main_axes="B")
        output_space = Dict({"a": Float(shape=(2, 3)), "b": Int(3)}, main_axes="B")

        # Using keras layer as network spec.
        layer = tf.keras.layers.Dense(10)

        nn = Network(
            network=layer,
            output_space=output_space,
            distributions=True
        )
        # Simple call -> Should return sample dict with "a"->float(2,3) and "b"->int(3).
        input_ = input_space.sample(1000)
        result = nn(input_)
        check(np.mean(result["a"]), 0.0, decimals=0)
        check(np.mean(result["b"]), 1, decimals=0)

        # Call with value -> Should return likelihood of value.
        input_ = input_space.sample(3)
        value = output_space.sample(3)
        likelihood = nn(input_, value)
        self.assertTrue(likelihood.shape == (3,))
        self.assertTrue(likelihood.dtype == np.float32)
Exemplo n.º 22
0
    def test_func_api_network_with_primitive_int_output_space_and_distribution(self):
        input_space = Float(-1.0, 1.0, shape=(3,), main_axes="B")
        output_space = Int(5, main_axes="B")

        # Using keras functional API to create network.
        i = tf.keras.layers.Input(shape=(3,))
        d = tf.keras.layers.Dense(10)(i)
        e = tf.keras.layers.Dense(5)(i)
        o = tf.concat([d, e], axis=-1)
        network = tf.keras.Model(inputs=i, outputs=o)

        # Use default distributions (i.e. categorical for Int).
        nn = Network(
            network=network,
            output_space=output_space,
            distributions="default"
        )
        input_ = input_space.sample(1000)
        result = nn(input_)
        # Check the sample for a proper mean value.
        check(np.mean(result), 2, decimals=0)

        # Function call with value -> Expect probabilities for given int-values.
        input_ = input_space.sample(6)
        values = output_space.sample(6)
        result = nn(input_, values)
        weights = nn.get_weights()
        expected = dense(np.concatenate(
            [dense(input_, weights[0], weights[1]), dense(input_, weights[2], weights[3])],
            axis=-1
        ), weights[4], weights[5])
        expected = softmax(expected)
        expected = np.sum(expected * one_hot(values, depth=output_space.num_categories), axis=-1)

        check(result, expected)

        # Function call with "likelihood" option set -> Expect sample plus probabilities for sampled int-values.
        input_ = input_space.sample(1000)
        sample, probs = nn(input_, likelihood=True)
        check(np.mean(sample), 2, decimals=0)
        check(np.mean(probs), 1.0 / output_space.num_categories, decimals=1)
Exemplo n.º 23
0
    def test_copying_an_adapter(self):
        input_space = Float(-1.0, 1.0, shape=(5,), main_axes="B")
        output_space = Float(shape=(3,), main_axes="B")

        adapter = PlainOutputAdapter(output_space, pre_network=None)

        # Simple function call -> Expect output for all int-inputs.
        input_ = input_space.sample(3)
        result = adapter(input_)
        weights = adapter.get_weights()
        expected = dense(input_, weights[0], weights[1])

        check(result, expected)

        new_adapter = adapter.copy()
        new_weights = new_adapter.get_weights()
        # Check all weights.
        check(weights, new_weights)
        # Do a pass and double-check.
        result = new_adapter(input_)
        check(result, expected)
Exemplo n.º 24
0
    def test_mixture(self):
        # Create a mixture distribution consisting of 3 bivariate normals weighted by an internal
        # categorical distribution.
        num_distributions = 3
        num_events_per_multivariate = 2  # 2=bivariate
        param_space = Dict(
            {
                "categorical":
                Float(shape=(num_distributions, ), low=-1.5, high=2.3),
                "parameters0":
                Tuple(
                    Float(shape=(num_events_per_multivariate, )),  # mean
                    Float(shape=(num_events_per_multivariate, ),
                          low=0.5,
                          high=1.0),  # diag
                ),
                "parameters1":
                Tuple(
                    Float(shape=(num_events_per_multivariate, )),  # mean
                    Float(shape=(num_events_per_multivariate, ),
                          low=0.5,
                          high=1.0),  # diag
                ),
                "parameters2":
                Tuple(
                    Float(shape=(num_events_per_multivariate, )),  # mean
                    Float(shape=(num_events_per_multivariate, ),
                          low=0.5,
                          high=1.0),  # diag
                ),
            },
            main_axes="B")
        values_space = Float(shape=(num_events_per_multivariate, ),
                             main_axes="B")
        # The Component to test.
        mixture = MixtureDistribution(
            # Try different spec types.
            MultivariateNormal(),
            "multi-variate-normal",
            "multivariate_normal")

        # Batch of size=n and deterministic (True).
        input_ = param_space.sample(1)
        # Make probs for categorical.
        categorical_probs = softmax(input_["categorical"])

        # Note: Usually, the deterministic draw should return the max-likelihood value
        # Max-likelihood for a 3-Mixed Bivariate: mean-of-argmax(categorical)()
        # argmax = np.argmax(input_[0]["categorical"], axis=-1)
        #expected = np.array([input_[0]["parameters{}".format(idx)][0][i] for i, idx in enumerate(argmax)])
        #    input_[0]["categorical"][:, 1:2] * input_[0]["parameters1"][0] + \
        #    input_[0]["categorical"][:, 2:3] * input_[0]["parameters2"][0]

        # The mean value is a 2D vector (bivariate distribution).
        expected = categorical_probs[:, 0:1] * input_["parameters0"][0] + \
            categorical_probs[:, 1:2] * input_["parameters1"][0] + \
            categorical_probs[:, 2:3] * input_["parameters2"][0]

        for _ in range(20):
            out = mixture.sample(input_, deterministic=True)
            check(out, expected)
            out = mixture.sample_deterministic(input_)
            check(out, expected)

        # Batch of size=1 and non-deterministic -> expect roughly the mean.
        input_ = param_space.sample(1)
        # Make probs for categorical.
        categorical_probs = softmax(input_["categorical"])
        expected = categorical_probs[:, 0:1] * input_["parameters0"][0] + \
            categorical_probs[:, 1:2] * input_["parameters1"][0] + \
            categorical_probs[:, 2:3] * input_["parameters2"][0]
        outs = []
        for _ in range(500):
            out = mixture.sample(input_, deterministic=False)
            outs.append(out)
            out = mixture.sample_stochastic(input_)
            outs.append(out)
        check(np.mean(np.array(outs), axis=0), expected, decimals=1)

        return
        # TODO: prob/log-prob tests for Mixture.

        # Test log-likelihood outputs (against scipy).
        for i in range(20):
            params = param_space.sample(1)
            # Make sure categorical params are softmaxed.
            category_probs = softmax(params["categorical"][0])
            values = values_space.sample(1)
            expected = 0.0
            v = []
            for j in range(3):
                v.append(
                    multivariate_normal.pdf(
                        values[0],
                        mean=params["parameters{}".format(j)][0][0],
                        cov=params["parameters{}".format(j)][1][0]))
                expected += category_probs[j] * v[-1]
            out = mixture.prob(params, values)
            check(out[0], expected, atol=0.1)

            expected = np.zeros(shape=(3, ))
            for j in range(3):
                expected[j] = np.log(category_probs[j]) + np.log(
                    multivariate_normal.pdf(
                        values[0],
                        mean=params["parameters{}".format(j)][0][0],
                        cov=params["parameters{}".format(j)][1][0]))
            expected = np.log(np.sum(np.exp(expected)))
            out = mixture.log_prob(params, values)
            print("{}: out={} expected={}".format(i, out, expected))
            check(out, np.array([expected]), atol=0.25)
Exemplo n.º 25
0
    def test_layer_network_with_container_output_space_and_mix_of_distributions_and_no_distributions(self):
        input_space = Float(-1.0, 1.0, shape=(5,), main_axes="B")
        output_space = Dict({
            "a": Float(shape=(2, 3)), "b": Int(3), "c": Float(-0.1, 1.0, shape=(2,)), "d": Int(3, shape=(2,))
        }, main_axes="B")
        # Using keras layer as network spec.
        layer = tf.keras.layers.Dense(10)

        nn = Network(
            network=layer,
            output_space=output_space,
            # Only two output components are distributions (a and b), the others not (c=Float, d=Int).
            distributions=dict(a="default", b=True)
        )
        # Simple call -> Should return sample dict.
        input_ = input_space.sample(10000)
        result = nn(input_)
        check(np.mean(result["a"]), 0.0, decimals=0)
        check(np.mean(result["b"]), 1.0, decimals=0)
        check(np.mean(result["d"]), 0.0, decimals=0)
        self.assertTrue(result["d"].shape == (10000, 2, 3))
        self.assertTrue(result["d"].dtype == np.float32)

        # Change limits of input a little to get more chances of reaching extreme float outputs (for "c").
        input_space = Float(-10.0, 10.0, shape=(5,), main_axes="B")
        input_ = input_space.sample(10000)
        result = nn(input_)
        self.assertFalse(np.any(result["c"].numpy() > 1.0))
        self.assertTrue(np.any(result["c"].numpy() > 0.9))
        self.assertFalse(np.any(result["c"].numpy() < -0.1))
        self.assertTrue(np.any(result["c"].numpy() < 0.0))

        # Call with (complete) value -> Should return likelihood of "a"+"b"-values and outputs for "c"/"d"-values.
        input_ = input_space.sample(100)
        value = output_space.sample(100)
        # Delete float value ("c") from value otherwise this would create an error as we can't get a likelihood value
        # for a non-distribution float output.
        del value["c"]
        result, likelihood = nn(input_, value)
        # a is None b/c value was already given for likelihood calculation
        self.assertTrue(result["a"] is None)
        # b is None b/c value was already given for likelihood calculation
        self.assertTrue(result["b"] is None)
        # c is None b/c value was not given for "c" as it would result in ERROR (float component w/o distribution).
        self.assertTrue(result["c"] is None)
        # d are the (batched) output values for the given int-numbers
        self.assertTrue(result["d"].shape == (100, 2))
        self.assertTrue(result["d"].dtype == np.float32)

        self.assertTrue(likelihood.shape == (100,))  # (total) likelihood is some float
        self.assertTrue(likelihood.dtype == np.float32)

        # Calculate likelihood only for "a" component.
        del value["b"]
        # Extract only the "c" sample-output.
        value["c"] = None
        # We don't want outputs for "d".
        del value["d"]
        result, likelihood = nn(input_, value)
        # a and b are None as we are using these for likelihood calculations only.
        self.assertTrue(result["a"] is None)
        self.assertTrue(result["b"] is None)
        # c is a float sample.
        self.assertTrue(result["c"].shape == (100, 2))
        self.assertTrue(result["c"].dtype == np.float32)
        self.assertFalse(np.any(result["c"].numpy() > 1.0))
        self.assertFalse(np.any(result["c"].numpy() < -0.1))
        # d is None (no output desired).
        self.assertTrue(result["d"] is None)

        value = output_space.sample(100)
        # Calculate likelihood only for "b" component.
        value["a"] = None
        # Do nothing for "c".
        # Leave "d" and expect output-values for each given int.
        del value["c"]

        result, likelihood = nn(input_, value)

        self.assertTrue(result["a"] is None)
        self.assertTrue(result["b"] is None)
        self.assertTrue(result["c"] is None)

        # d are the (batched) output values for the given int-numbers
        self.assertTrue(result["d"].shape == (100, 2))
        self.assertTrue(result["d"].dtype == np.float32)

        self.assertTrue(likelihood.shape == (100,))  # (total) likelihood is some float
        self.assertTrue(likelihood.dtype == np.float32)

        # Add "c" again, but as None (will not cause ERROR then and return the output).
        value["c"] = None
        result, likelihood = nn(input_, value)
        self.assertTrue(result["c"].shape == (100, 2))
        self.assertTrue(result["c"].dtype == np.float32)
        self.assertFalse(np.any(result["c"].numpy() > 1.0))
        self.assertFalse(np.any(result["c"].numpy() < -0.1))
Exemplo n.º 26
0
    def test_joint_cumulative_distribution(self):
        param_space = Dict(
            {
                "a":
                Float(shape=(4, )),  # 4-discrete
                "b":
                Dict({
                    "ba":
                    Tuple([Float(shape=(3, )),
                           Float(0.1, 1.0, shape=(3, ))]),  # 3-variate normal
                    "bb":
                    Tuple([Float(shape=(2, )),
                           Float(shape=(2, ))]),  # beta -1 to 1
                    "bc":
                    Tuple([Float(shape=(4, )),
                           Float(0.1, 1.0, shape=(4, ))]),  # normal (dim=4)
                })
            },
            main_axes="B")

        values_space = Dict(
            {
                "a":
                Int(4),
                "b":
                Dict({
                    "ba": Float(shape=(3, )),
                    "bb": Float(shape=(2, )),
                    "bc": Float(shape=(4, ))
                })
            },
            main_axes="B")

        low, high = -1.0, 1.0
        cumulative_distribution = JointCumulativeDistribution(
            distributions={
                "a": Categorical(),
                "b": {
                    "ba": MultivariateNormal(),
                    "bb": Beta(low=low, high=high),
                    "bc": Normal()
                }
            })

        # Batch of size=2 and deterministic (True).
        input_ = param_space.sample(2)
        input_["a"] = softmax(input_["a"])
        expected_mean = {
            "a": np.argmax(input_["a"], axis=-1),
            "b": {
                "ba":
                input_["b"]["ba"][0],  # [0]=Mean
                # Mean for a Beta distribution: 1 / [1 + (beta/alpha)] * range + low
                "bb":
                (1.0 / (1.0 + input_["b"]["bb"][1] / input_["b"]["bb"][0])) *
                (high - low) + low,
                "bc":
                input_["b"]["bc"][0],
            }
        }
        # Sample n times, expect always mean value (deterministic draw).
        for _ in range(20):
            out = cumulative_distribution.sample(input_, deterministic=True)
            check(out, expected_mean)
            out = cumulative_distribution.sample_deterministic(input_)
            check(out, expected_mean)

        # Batch of size=1 and non-deterministic -> expect roughly the mean.
        input_ = param_space.sample(1)
        input_["a"] = softmax(input_["a"])
        expected_mean = {
            "a": np.sum(input_["a"] * np.array([0, 1, 2, 3])),
            "b": {
                "ba":
                input_["b"]["ba"][0],  # [0]=Mean
                # Mean for a Beta distribution: 1 / [1 + (beta/alpha)] * range + low
                "bb":
                (1.0 / (1.0 + input_["b"]["bb"][1] / input_["b"]["bb"][0])) *
                (high - low) + low,
                "bc":
                input_["b"]["bc"][0],
            }
        }

        outs = []
        for _ in range(500):
            out = cumulative_distribution.sample(input_)
            outs.append(out)
            out = cumulative_distribution.sample_stochastic(input_)
            outs.append(out)

        check(np.mean(np.stack([o["a"][0] for o in outs], axis=0), axis=0),
              expected_mean["a"],
              atol=0.3)
        check(np.mean(np.stack([o["b"]["ba"][0] for o in outs], axis=0),
                      axis=0),
              expected_mean["b"]["ba"][0],
              decimals=1)
        check(np.mean(np.stack([o["b"]["bb"][0] for o in outs], axis=0),
                      axis=0),
              expected_mean["b"]["bb"][0],
              decimals=1)
        check(np.mean(np.stack([o["b"]["bc"][0] for o in outs], axis=0),
                      axis=0),
              expected_mean["b"]["bc"][0],
              decimals=1)

        # Test log-likelihood outputs.
        params = param_space.sample(1)
        params["a"] = softmax(params["a"])
        # Make sure beta-values are within 0.0 and 1.0 for the numpy calculation (which doesn't have scaling).
        values = values_space.sample(1)
        log_prob_beta = np.log(
            beta.pdf(values["b"]["bb"], params["b"]["bb"][0],
                     params["b"]["bb"][1]))
        # Now do the scaling for b/bb (beta values).
        values["b"]["bb"] = values["b"]["bb"] * (high - low) + low
        expected_log_llh = np.log(params["a"][0][values["a"][0]]) + \
            np.sum(np.log(norm.pdf(values["b"]["ba"][0], params["b"]["ba"][0], params["b"]["ba"][1]))) + \
            np.sum(log_prob_beta) + \
            np.sum(np.log(norm.pdf(values["b"]["bc"][0], params["b"]["bc"][0], params["b"]["bc"][1])))

        out = cumulative_distribution.log_prob(params, values)
        check(out, expected_log_llh, decimals=0)
Exemplo n.º 27
0
    def test_squashed_normal(self):
        param_space = Tuple(Float(-1.0, 1.0, shape=(5, )),
                            Float(0.5, 1.0, shape=(5, )),
                            main_axes="B")

        low, high = -2.0, 1.0
        squashed_distribution = SquashedNormal(low=low, high=high)

        # Batch of size=2 and deterministic (True).
        input_ = param_space.sample(2)
        expected = ((np.tanh(input_[0]) + 1.0) /
                    2.0) * (high - low) + low  # [0] = mean
        # Sample n times, expect always mean value (deterministic draw).
        for _ in range(50):
            out = squashed_distribution.sample(input_, deterministic=True)
            check(out, expected)
            out = squashed_distribution.sample_deterministic(input_)
            check(out, expected)

        # Batch of size=1 and non-deterministic -> expect roughly the mean.
        input_ = param_space.sample(1)
        expected = ((np.tanh(input_[0]) + 1.0) /
                    2.0) * (high - low) + low  # [0] = mean
        outs = []
        for _ in range(500):
            out = squashed_distribution.sample(input_, deterministic=False)
            outs.append(out)
            self.assertTrue(np.max(out) <= high)
            self.assertTrue(np.min(out) >= low)
            out = squashed_distribution.sample_stochastic(input_)
            outs.append(out)
            self.assertTrue(np.max(out) <= high)
            self.assertTrue(np.min(out) >= low)

        check(np.mean(outs), expected.mean(), decimals=1)

        means = np.array([[0.1, 0.2, 0.3, 0.4, 50.0],
                          [-0.1, -0.2, -0.3, -0.4, -1.0]])
        log_stds = np.array([[0.8, -0.2, 0.3, -1.0, 10.0],
                             [0.7, -0.3, 0.4, -0.9, 8.0]])
        # The normal-adapter does this following line with the NN output (interpreted as log(stddev)):
        # Doesn't really matter here in this test case, though.
        stds = np.exp(
            np.clip(log_stds, a_min=MIN_LOG_NN_OUTPUT,
                    a_max=MAX_LOG_NN_OUTPUT))
        # Make sure values are within low and high.
        values = np.array([[0.9, 0.2, 0.4, -0.1, -1.05],
                           [-0.9, -0.2, 0.4, -0.1, -1.05]])

        # Test log-likelihood outputs.
        # TODO: understand and comment the following formula to get the log-prob.
        # Unsquash values, then get log-llh from regular gaussian.
        unsquashed_values = np.arctanh((values - low) / (high - low) * 2.0 -
                                       1.0)
        log_prob_unsquashed = np.log(norm.pdf(unsquashed_values, means, stds))
        log_prob = log_prob_unsquashed - np.sum(
            np.log(1 - np.tanh(unsquashed_values)**2), axis=-1, keepdims=True)

        out = squashed_distribution.log_prob((means, stds), values)
        check(out, log_prob)

        # Test entropy outputs.
        # TODO
        return