def test_gradient_on_dense_spn(self, num_decomps, num_subsets,
                                   num_mixtures, input_dist, num_vars,
                                   num_components, softplus):
        batch_size = 9

        mean_init = np.arange(num_vars * num_components).reshape(
            num_vars, num_components)
        gl = spn.GaussianLeaf(num_vars=num_vars,
                              num_components=num_components,
                              loc_init=mean_init,
                              softplus_scale=softplus)

        gen = spn.DenseSPNGenerator(
            num_decomps=num_decomps,
            num_subsets=num_subsets,
            num_mixtures=num_mixtures,
            node_type=spn.DenseSPNGenerator.NodeType.LAYER,
            input_dist=input_dist)

        root = gen.generate(gl, root_name="root")

        with tf.name_scope("Weights"):
            spn.generate_weights(root,
                                 tf.initializers.random_uniform(0.0, 1.0),
                                 log=True)

        init = spn.initialize_weights(root)

        self.assertTrue(root.is_valid())

        log_val = root.get_log_value()

        spn_grad = spn.Gradient(log=True)

        spn_grad.get_gradients(root)

        mean_grad_custom, var_grad_custom = gl._compute_gradient(
            spn_grad.gradients[gl])

        mean_grad_tf, var_grad_tf = tf.gradients(
            log_val, [gl.loc_variable, gl.scale_variable])

        fd = {gl: np.random.rand(batch_size, num_vars)}

        with self.test_session() as sess:
            sess.run(init)
            mu_grad_custom_val, var_grad_custom_val = sess.run(
                [mean_grad_custom, var_grad_custom], fd)

            mu_grad_tf_val, var_grad_tf_val = sess.run(
                [mean_grad_tf, var_grad_tf], fd)

        self.assertAllClose(mu_grad_custom_val,
                            mu_grad_tf_val,
                            atol=1e-4,
                            rtol=1e-4)
        self.assertAllClose(var_grad_custom_val,
                            var_grad_tf_val,
                            atol=1e-4,
                            rtol=1e-4)
    def test_compute_gradient(self):
        batch_size = 2
        num_vars = 2
        num_components = 2
        gl = spn.GaussianLeaf(num_vars=num_vars,
                              num_components=num_components,
                              loc_init=np.arange(
                                  num_vars * num_components).reshape(
                                      (num_vars, num_components)))
        init = gl.initialize()

        gl_out = gl._compute_log_value()

        mu_grad_tf, var_grad_tf = tf.gradients(
            gl_out, [gl.loc_variable, gl.scale_variable])

        # Gradient with respect to out, so gradient to propagate is just 1
        incoming_grad = tf.ones((batch_size, num_vars * num_components))

        mu_grad_spn, var_grad_spn = gl._compute_gradient(incoming_grad)

        x = np.random.rand(batch_size, num_vars)

        with self.test_session() as sess:
            sess.run(init)
            fd = {gl: x}
            mu_grad_tf_out, var_grad_tf_out = sess.run(
                [mu_grad_tf, var_grad_tf], feed_dict=fd)
            mu_grad_spn_out, var_grad_spn_out = sess.run(
                [mu_grad_spn, var_grad_spn], feed_dict=fd)

        self.assertAllClose(mu_grad_tf_out, mu_grad_spn_out)
        self.assertAllClose(var_grad_tf_out, var_grad_spn_out)
    def test_value(self):
        num_vars = 8
        data = np.stack(
            [np.random.normal(a, size=BATCH_SIZE) for a in range(num_vars)],
            axis=1)

        data = np.concatenate([
            data,
            np.stack([
                np.random.normal(a, size=BATCH_SIZE) + num_vars
                for a in range(num_vars)
            ],
                     axis=1)
        ],
                              axis=0).astype(np.float32)

        gq = spn.GaussianLeaf(num_vars=num_vars,
                              num_components=2,
                              learn_dist_params=False,
                              initialization_data=data)

        value_op = gq._compute_value()
        log_value_op = gq._compute_log_value()

        modes = np.stack(
            [np.arange(num_vars) for _ in range(BATCH_SIZE)] +
            [np.arange(num_vars) + num_vars for _ in range(BATCH_SIZE)],
            axis=0)
        val_at_mode = stats.norm.pdf(0)

        with self.test_session() as sess:
            sess.run(
                [gq.loc_variable.initializer, gq.scale_variable.initializer])
            value_out, log_value_out = sess.run([value_op, log_value_op],
                                                feed_dict={gq.feed: modes})

        value_out = value_out.reshape((BATCH_SIZE * 2, num_vars, 2))
        log_value_out = log_value_out.reshape((BATCH_SIZE * 2, num_vars, 2))

        # We'll be quite tolerant for the error, as our output is really just an empirical mean
        self.assertAllClose(value_out[:BATCH_SIZE, :, 0],
                            np.ones([BATCH_SIZE, num_vars]) * val_at_mode,
                            rtol=1e-2,
                            atol=1e-2)

        self.assertAllClose(value_out[BATCH_SIZE:, :, 1],
                            np.ones([BATCH_SIZE, num_vars]) * val_at_mode,
                            rtol=1e-2,
                            atol=1e-2)

        self.assertAllClose(np.exp(log_value_out[:BATCH_SIZE, :, 0]),
                            np.ones([BATCH_SIZE, num_vars]) * val_at_mode,
                            rtol=1e-2,
                            atol=1e-2)

        self.assertAllClose(np.exp(log_value_out[BATCH_SIZE:, :, 1]),
                            np.ones([BATCH_SIZE, num_vars]) * val_at_mode,
                            rtol=1e-2,
                            atol=1e-2)
    def test_sum_update_1(self):
        child1 = spn.GaussianLeaf(num_vars=1,
                                  num_components=1,
                                  total_counts_init=3,
                                  loc_init=0.0,
                                  scale_init=1.0,
                                  learn_dist_params=True)
        child2 = spn.GaussianLeaf(num_vars=1,
                                  num_components=1,
                                  total_counts_init=7,
                                  loc_init=1.0,
                                  scale_init=4.0,
                                  learn_dist_params=True)
        root = spn.Sum(child1, child2)
        root.generate_weights()

        value_inference_type = spn.InferenceType.MARGINAL
        init_weights = spn.initialize_weights(root)
        learning = spn.EMLearning(root,
                                  log=True,
                                  value_inference_type=value_inference_type,
                                  use_unweighted=True)
        reset_accumulators = learning.reset_accumulators()
        accumulate_updates = learning.accumulate_updates()
        update_spn = learning.update_spn()
        train_likelihood = learning.value.values[root]

        with self.test_session() as sess:
            sess.run(init_weights)
            sess.run(reset_accumulators)
            sess.run(accumulate_updates, {child1: [[0.0]], child2: [[0.0]]})
            sess.run(update_spn)

            child1_n = sess.run(child1._total_count_variable)
            child2_n = sess.run(child2._total_count_variable)

        # equalWeight is true, so update passes the data point to the component
        # with highest likelihood without considering the weight of each component.
        # In this case, N(0|0,1) > N(0|1,4), so child1 is picked.
        # If component weights are taken into account, then child2 will be picked
        # since 0.3*N(0|0,1) < 0.7*N(0|1,4).
        # self.assertEqual(root.n, 11)
        self.assertEqual(child1_n, 4)
        self.assertEqual(child2_n, 7)
 def test_compute_scope(self):
     gl = spn.GaussianLeaf(num_vars=32, num_components=4)
     scope = gl._compute_scope()
     for b in range(0, len(scope), 4):
         [self.assertEqual(scope[b], scope[b + i]) for i in range(1, 4)]
         [
             self.assertNotEqual(scope[b], scope[b + i])
             for i in range(4,
                            len(scope) - b, 4)
         ]
    def test_split_in_quantiles(self):
        quantiles = [np.random.rand(32, 32) + i * 2 for i in range(4)]
        data = np.concatenate(quantiles, axis=0)
        np.random.shuffle(data)
        gq = spn.GaussianLeaf(num_vars=32,
                              num_components=4,
                              learn_dist_params=False)

        values_per_quantile = gq._split_in_quantiles(data)

        for val, q in zip(values_per_quantile, quantiles):
            self.assertAllClose(np.sort(q, axis=0), val)
    def test_learn_from_data(self, softplus):
        quantiles = [np.random.rand(32, 32) + i * 2 for i in range(4)]
        data = np.concatenate(quantiles, axis=0)
        np.random.shuffle(data)
        gq = spn.GaussianLeaf(num_vars=32,
                              num_components=4,
                              learn_dist_params=False,
                              initialization_data=data,
                              softplus_scale=softplus)
        true_vars = np.stack([np.var(q, axis=0) for q in quantiles], axis=-1)
        true_means = np.stack([np.mean(q, axis=0) for q in quantiles], axis=-1)

        if softplus:
            self.assertAllClose(np.log(1 + np.exp(gq._scale_init)),
                                np.sqrt(true_vars))
        else:
            self.assertAllClose(gq._scale_init, np.sqrt(true_vars))
        self.assertAllClose(gq._loc_init, true_means)
    def test_mpe_state(self):
        num_vars = 4
        data = np.stack(
            [np.random.normal(a, size=BATCH_SIZE) for a in range(num_vars)],
            axis=1)

        data = np.concatenate([
            data,
            np.stack([
                np.random.normal(a, size=BATCH_SIZE) + num_vars
                for a in range(num_vars)
            ],
                     axis=1)
        ],
                              axis=0).astype(np.float32)

        gq = spn.GaussianLeaf(num_vars=num_vars,
                              num_components=2,
                              initialization_data=data,
                              learn_dist_params=False)

        batch_size = 3
        left = np.random.randint(2, size=batch_size * num_vars).reshape(
            (-1, num_vars))
        counts = np.stack((left, 1 - left), axis=-1)

        mpe_truth = []
        for vars in left:
            for i, val in enumerate(vars):
                mpe_truth.append(i if val == 1 else i + num_vars)

        mpe_truth = np.reshape(mpe_truth, (-1, num_vars))

        mpe_state = gq._compute_mpe_state(
            tf.convert_to_tensor(counts, dtype=tf.float32))

        with self.test_session() as sess:
            sess.run([gq.initialize()])
            mpe_state_out = sess.run(mpe_state)
        # Again we must be quite tolerant, but that's ok, the targets are 1.0 apart
        self.assertAllClose(mpe_truth, mpe_state_out, atol=1e-1, rtol=1e-1)
    def test_learn_from_data_prior(self):
        prior_beta = 3.0
        prior_alpha = 2.0
        N = 32
        quantiles = [np.random.rand(N, 32) + i * 2 for i in range(4)]
        data = np.concatenate(quantiles, axis=0)
        np.random.shuffle(data)
        gq = spn.GaussianLeaf(num_vars=32,
                              num_components=4,
                              learn_dist_params=False,
                              initialization_data=data,
                              prior_alpha=prior_alpha,
                              prior_beta=prior_beta,
                              use_prior=True)

        mus = [np.mean(q, axis=0, keepdims=True) for q in quantiles]
        ssq = np.stack(
            [np.sum((x - mu)**2, axis=0) for x, mu in zip(quantiles, mus)],
            axis=-1)
        true_vars = (2 * prior_beta + ssq) / (2 * prior_alpha + 2 + N)

        self.assertAllClose(gq._scale_init, np.sqrt(true_vars))
    def test_param_learning(self, softplus_scale):
        spn.conf.argmax_zero = True
        num_vars = 2
        num_components = 2
        batch_size = 32
        count_init = 100

        # Create means and variances
        means = np.array([[0, 1], [10, 15]])
        vars = np.array([[0.25, 0.5], [0.33, 0.67]])

        # Sample some data
        data0 = [
            stats.norm(loc=m, scale=np.sqrt(v)).rvs(batch_size // 2).astype(
                np.float32) for m, v in zip(means[0], vars[0])
        ]
        data1 = [
            stats.norm(loc=m, scale=np.sqrt(v)).rvs(batch_size // 2).astype(
                np.float32) for m, v in zip(means[1], vars[1])
        ]
        data = np.stack([np.concatenate(data0),
                         np.concatenate(data1)],
                        axis=-1)

        with tf.Graph().as_default() as graph:
            # Set up SPN
            gq = spn.GaussianLeaf(num_vars=num_vars,
                                  num_components=num_components,
                                  initialization_data=data,
                                  total_counts_init=count_init,
                                  learn_dist_params=True,
                                  softplus_scale=softplus_scale)

            mixture00 = spn.Sum((gq, [0, 1]), name="Mixture00")
            weights00 = spn.Weights(initializer=tf.initializers.constant(
                [0.25, 0.75]),
                                    num_weights=2)
            mixture00.set_weights(weights00)
            mixture01 = spn.Sum((gq, [0, 1]), name="Mixture01")
            weights01 = spn.Weights(initializer=tf.initializers.constant(
                [0.75, 0.25]),
                                    num_weights=2)
            mixture01.set_weights(weights01)

            mixture10 = spn.Sum((gq, [2, 3]), name="Mixture10")
            weights10 = spn.Weights(initializer=tf.initializers.constant(
                [2 / 3, 1 / 3]),
                                    num_weights=2)
            mixture10.set_weights(weights10)
            mixture11 = spn.Sum((gq, [2, 3]), name="Mixture11")
            weights11 = spn.Weights(initializer=tf.initializers.constant(
                [1 / 3, 2 / 3]),
                                    num_weights=2)
            mixture11.set_weights(weights11)

            prod0 = spn.Product(mixture00, mixture10, name="Prod0")
            prod1 = spn.Product(mixture01, mixture11, name="Prod1")

            root = spn.Sum(prod0, prod1, name="Root")
            root_weights = spn.Weights(initializer=tf.initializers.constant(
                [1 / 2, 1 / 2]),
                                       num_weights=2)
            root.set_weights(root_weights)

            # Generate new data from slightly shifted Gaussians
            data0 = np.concatenate([
                stats.norm(loc=m, scale=np.sqrt(v)).rvs(batch_size //
                                                        2).astype(np.float32)
                for m, v in zip(means[0] + 0.2, vars[0])
            ])
            data1 = np.concatenate([
                stats.norm(loc=m, scale=np.sqrt(v)).rvs(batch_size //
                                                        2).astype(np.float32)
                for m, v in zip(means[1] + 1.0, vars[1])
            ])

            # Compute actual log probabilities of roots
            empirical_means = gq._loc_init
            empirical_vars = np.square(
                gq._scale_init) if not softplus_scale else np.square(
                    np.log(np.exp(gq._scale_init) + 1))
            log_probs0 = [
                stats.norm(loc=m, scale=np.sqrt(v)).logpdf(data0)
                for m, v in zip(empirical_means[0], empirical_vars[0])
            ]
            log_probs1 = [
                stats.norm(loc=m, scale=np.sqrt(v)).logpdf(data1)
                for m, v in zip(empirical_means[1], empirical_vars[1])
            ]

            # Compute actual log probabilities of mixtures
            mixture00_val = np.logaddexp(log_probs0[0] + np.log(1 / 4),
                                         log_probs0[1] + np.log(3 / 4))
            mixture01_val = np.logaddexp(log_probs0[0] + np.log(3 / 4),
                                         log_probs0[1] + np.log(1 / 4))

            mixture10_val = np.logaddexp(log_probs1[0] + np.log(2 / 3),
                                         log_probs1[1] + np.log(1 / 3))
            mixture11_val = np.logaddexp(log_probs1[0] + np.log(1 / 3),
                                         log_probs1[1] + np.log(2 / 3))

            # Compute actual log probabilities of products
            prod0_val = mixture00_val + mixture10_val
            prod1_val = mixture01_val + mixture11_val

            # Compute the index of the max probability at the products layer
            prod_winner = np.argmax(np.stack([prod0_val, prod1_val], axis=-1),
                                    axis=-1)

            # Compute the indices of the max component per mixture
            component_winner00 = np.argmax(np.stack(
                [log_probs0[0] + np.log(1 / 4), log_probs0[1] + np.log(3 / 4)],
                axis=-1),
                                           axis=-1)
            component_winner01 = np.argmax(np.stack(
                [log_probs0[0] + np.log(3 / 4), log_probs0[1] + np.log(1 / 4)],
                axis=-1),
                                           axis=-1)
            component_winner10 = np.argmax(np.stack(
                [log_probs1[0] + np.log(2 / 3), log_probs1[1] + np.log(1 / 3)],
                axis=-1),
                                           axis=-1)
            component_winner11 = np.argmax(np.stack(
                [log_probs1[0] + np.log(1 / 3), log_probs1[1] + np.log(2 / 3)],
                axis=-1),
                                           axis=-1)

            # Initialize true counts
            counts_per_component = np.zeros((2, 2))
            sum_data_val = np.zeros((2, 2))
            sum_data_squared_val = np.zeros((2, 2))

            data00 = []
            data01 = []
            data10 = []
            data11 = []

            # Compute true counts
            counts_per_step = np.zeros((batch_size, num_vars, num_components))
            for i, (prod_ind, d0,
                    d1) in enumerate(zip(prod_winner, data0, data1)):
                if prod_ind == 0:
                    # mixture 00 and mixture 10
                    counts_per_step[i, 0, component_winner00[i]] = 1
                    counts_per_component[0, component_winner00[i]] += 1
                    sum_data_val[0, component_winner00[i]] += data0[i]
                    sum_data_squared_val[
                        0, component_winner00[i]] += data0[i] * data0[i]
                    (data00 if component_winner00[i] == 0 else data01).append(
                        data0[i])

                    counts_per_step[i, 1, component_winner10[i]] = 1
                    counts_per_component[1, component_winner10[i]] += 1
                    sum_data_val[1, component_winner10[i]] += data1[i]
                    sum_data_squared_val[
                        1, component_winner10[i]] += data1[i] * data1[i]
                    (data10 if component_winner10[i] == 0 else data11).append(
                        data1[i])
                else:
                    counts_per_step[i, 0, component_winner01[i]] = 1
                    counts_per_component[0, component_winner01[i]] += 1
                    sum_data_val[0, component_winner01[i]] += data0[i]
                    sum_data_squared_val[
                        0, component_winner01[i]] += data0[i] * data0[i]
                    (data00 if component_winner01[i] == 0 else data01).append(
                        data0[i])

                    counts_per_step[i, 1, component_winner11[i]] = 1
                    counts_per_component[1, component_winner11[i]] += 1
                    sum_data_val[1, component_winner11[i]] += data1[i]
                    sum_data_squared_val[
                        1, component_winner11[i]] += data1[i] * data1[i]
                    (data10 if component_winner11[i] == 0 else data11).append(
                        data1[i])

            # Setup learning Ops
            value_inference_type = spn.InferenceType.MARGINAL
            init_weights = spn.initialize_weights(root)
            learning = spn.EMLearning(
                root, log=True, value_inference_type=value_inference_type)
            reset_accumulators = learning.reset_accumulators()
            accumulate_updates = learning.accumulate_updates()
            update_spn = learning.update_spn()
            train_likelihood = learning.value.values[root]
            avg_train_likelihood = tf.reduce_mean(train_likelihood)

            # Setup feed dict and update ops
            fd = {gq: np.stack([data0, data1], axis=-1)}
            update_ops = gq._compute_hard_em_update(
                learning._mpe_path.counts[gq])

            with self.test_session(graph=graph) as sess:
                sess.run(init_weights)

                # Get log probabilities of Gaussian leaf
                log_probs = sess.run(learning.value.values[gq], fd)

                # Get log probabilities of mixtures
                mixture00_graph, mixture01_graph, mixture10_graph, mixture11_graph = sess.run(
                    [
                        learning.value.values[mixture00],
                        learning.value.values[mixture01],
                        learning.value.values[mixture10],
                        learning.value.values[mixture11]
                    ], fd)

                # Get log probabilities of products
                prod0_graph, prod1_graph = sess.run([
                    learning.value.values[prod0], learning.value.values[prod1]
                ], fd)

                # Get counts for graph
                counts = sess.run(
                    tf.reduce_sum(learning._mpe_path.counts[gq], axis=0), fd)
                counts_per_sample = sess.run(learning._mpe_path.counts[gq], fd)

                accum, sum_data_graph, sum_data_squared_graph = sess.run([
                    update_ops['accum'], update_ops['sum_data'],
                    update_ops['sum_data_squared']
                ], fd)

            with self.test_session(graph=graph) as sess:
                sess.run(init_weights)
                sess.run(reset_accumulators)

                data_per_component_op = graph.get_tensor_by_name(
                    "EMLearning/GaussianLeaf/DataPerComponent:0")
                squared_data_per_component_op = graph.get_tensor_by_name(
                    "EMLearning/GaussianLeaf/SquaredDataPerComponent:0")

                update_vals, data_per_component_out, squared_data_per_component_out = sess.run(
                    [
                        accumulate_updates, data_per_component_op,
                        squared_data_per_component_op
                    ], fd)

                # Get likelihood before update
                lh_before = sess.run(avg_train_likelihood, fd)
                sess.run(update_spn)

                # Get likelihood after update
                lh_after = sess.run(avg_train_likelihood, fd)

                # Get variables after update
                total_counts_graph, scale_graph, mean_graph = sess.run([
                    gq._total_count_variable, gq.scale_variable,
                    gq.loc_variable
                ])

        self.assertAllClose(prod0_val, prod0_graph.ravel())
        self.assertAllClose(prod1_val, prod1_graph.ravel())

        self.assertAllClose(log_probs[:, 0], log_probs0[0])
        self.assertAllClose(log_probs[:, 1], log_probs0[1])
        self.assertAllClose(log_probs[:, 2], log_probs1[0])
        self.assertAllClose(log_probs[:, 3], log_probs1[1])

        self.assertAllClose(mixture00_val, mixture00_graph.ravel())
        self.assertAllClose(mixture01_val, mixture01_graph.ravel())
        self.assertAllClose(mixture10_val, mixture10_graph.ravel())
        self.assertAllClose(mixture11_val, mixture11_graph.ravel())

        self.assertAllEqual(counts, counts_per_component.ravel())
        self.assertAllEqual(accum, counts_per_component)

        self.assertAllClose(
            counts_per_step,
            counts_per_sample.reshape((batch_size, num_vars, num_components)))

        self.assertAllClose(sum_data_val, sum_data_graph)
        self.assertAllClose(sum_data_squared_val, sum_data_squared_graph)

        self.assertAllClose(total_counts_graph,
                            count_init + counts_per_component)
        self.assertTrue(np.all(np.not_equal(mean_graph, gq._loc_init)))
        self.assertTrue(np.all(np.not_equal(scale_graph, gq._scale_init)))

        mean_new_vals = []
        variance_new_vals = []
        variance_left, variance_right = [], []
        for i, obs in enumerate([data00, data01, data10, data11]):
            # Note that this does not depend on accumulating anything!
            # It actually is copied (more-or-less) from
            # https://github.com/whsu/spn/blob/master/spn/normal_leaf_node.py
            x = np.asarray(obs).astype(np.float32)
            n = count_init
            k = len(obs)
            if softplus_scale:
                var_old = np.square(
                    np.log(
                        np.exp(gq._scale_init.astype(np.float32)).ravel()[i] +
                        1))
            else:
                var_old = np.square(gq._scale_init.astype(
                    np.float32)).ravel()[i]
            mean = (n * gq._loc_init.astype(np.float32).ravel()[i] +
                    np.sum(obs)) / (n + k)
            dx = x - gq._loc_init.astype(np.float32).ravel()[i]
            dm = mean - gq._loc_init.astype(np.float32).ravel()[i]
            var = (n * var_old + dx.dot(dx)) / (n + k) - dm * dm

            mean_new_vals.append(mean)
            variance_new_vals.append(var)
            variance_left.append((n * var_old + dx.dot(dx)) / (n + k))
            variance_right.append(dm * dm)

        mean_new_vals = np.asarray(mean_new_vals).reshape((2, 2))
        variance_new_vals = np.asarray(variance_new_vals).reshape((2, 2))

        def assert_non_zero_at_ij_equal(arr, i, j, truth):
            # Select i-th variable and j-th component
            arr = arr[:, i, j]
            self.assertAllClose(arr[arr != 0.0], truth)

        assert_non_zero_at_ij_equal(data_per_component_out, 0, 0, data00)
        assert_non_zero_at_ij_equal(data_per_component_out, 0, 1, data01)
        assert_non_zero_at_ij_equal(data_per_component_out, 1, 0, data10)
        assert_non_zero_at_ij_equal(data_per_component_out, 1, 1, data11)

        assert_non_zero_at_ij_equal(squared_data_per_component_out, 0, 0,
                                    np.square(data00))
        assert_non_zero_at_ij_equal(squared_data_per_component_out, 0, 1,
                                    np.square(data01))
        assert_non_zero_at_ij_equal(squared_data_per_component_out, 1, 0,
                                    np.square(data10))
        assert_non_zero_at_ij_equal(squared_data_per_component_out, 1, 1,
                                    np.square(data11))

        self.assertAllClose(mean_new_vals, mean_graph)
        # self.assertAllClose(np.asarray(variance_left).reshape((2, 2)), var_graph_left)
        self.assertAllClose(
            variance_new_vals,
            np.square(scale_graph if not softplus_scale else np.
                      log(np.exp(scale_graph) + 1)))

        self.assertGreater(lh_after, lh_before)