Пример #1
0
    def test_leaf_no_variance_gaussian(self):
        np.random.seed(17)
        x = np.concatenate(
            (
                np.random.multivariate_normal([10, 10], np.eye(2), 500),
                np.random.multivariate_normal([1, 1], np.eye(2), 500),
            ),
            axis=0,
        )
        y = np.array([1] * 1000).reshape(-1, 1)

        data = concatenate_yx(y, x)

        ds_context = Context(parametric_types=[Gaussian])
        ds_context.feature_size = 2

        leaf = create_conditional_leaf(data, ds_context, [0])
        l = likelihood(leaf, data)
        self.assertEqual(np.var(l[:, 0]), 0)
        self.assertAlmostEqual(l[0, 0], 0.398942280401432)

        data[:, 0] = 2
        leaf = create_conditional_leaf(data, ds_context, [0])
        l = likelihood(leaf, data)
        self.assertEqual(np.var(l[:, 0]), 0)
        self.assertAlmostEqual(l[0, 0], 0.398942280401432)

        data3 = np.array(data)
        data3[:, 0] = 3
        leaf = create_conditional_leaf(data3, ds_context, [0])
        l = likelihood(leaf, data)
        self.assertAlmostEqual(np.var(l[:, 0]), 0)
        self.assertAlmostEqual(l[0, 0], 0.241970724519143)
Пример #2
0
    def fit(self, X, y=None):
        y = y.reshape(y.shape[0], -1)
        self.num_labels = y.shape[1]
        self.context = Context(parametric_types=[Bernoulli] *
                               self.num_labels).add_domains(y)
        self.context.feature_size = X.shape[1]
        self.scope = list(range(y.shape[1]))
        data = concatenate_yx(y, X)

        cspn_type = 1
        if cspn_type == 0:
            self.cspn = create_conditional_leaf(data, self.context, self.scope)
        elif cspn_type == 1:
            split_rows = get_split_conditional_rows_KMeans()
            self.cspn, subtasks = create_sum(data=data,
                                             node_id=0,
                                             parent_id=0,
                                             pos=0,
                                             context=self.context,
                                             scope=self.scope,
                                             split_rows=split_rows)
            for i, subtask in enumerate(subtasks):
                self.cspn.children[i] = create_conditional_leaf(
                    subtask[1]['data'], self.context, subtask[1]['scope'])
            print(self.cspn)
Пример #3
0
    def test_leaf_bernoulli_bootstrap(self):
        np.random.seed(17)
        x = np.concatenate(
            (
                np.random.multivariate_normal([10, 10], np.eye(2), 100),
                np.random.multivariate_normal([1, 1], np.eye(2), 100),
            ),
            axis=0,
        )
        y = np.array([1] * 100 + [0] * 100).reshape(-1, 1)

        data = concatenate_yx(y, x)

        ds_context = Context(parametric_types=[Bernoulli])
        ds_context.feature_size = 2

        leaf = create_conditional_leaf(data, ds_context, [0])

        l = likelihood(leaf, data)
        neg_data = np.concatenate([1 - y, x], axis=1)
        lneg = likelihood(leaf, neg_data)

        np.testing.assert_array_almost_equal(l + lneg, 1.0)

        self.assertTrue(np.all(l >= 0.5))
        self.assertTrue(np.all(lneg < 0.5))
Пример #4
0
    def test_leaf_categorical(self):
        np.random.seed(17)
        x = np.concatenate(
            (
                np.random.multivariate_normal([20, 20], np.eye(2), 500),
                np.random.multivariate_normal([10, 10], np.eye(2), 500),
                np.random.multivariate_normal([1, 1], np.eye(2), 500),
            ),
            axis=0,
        )
        y = np.array([2] * 500 + [1] * 500 + [0] * 500).reshape(-1, 1)

        data = concatenate_yx(y, x)

        ds_context = Context(parametric_types=[Categorical])
        ds_context.feature_size = 2

        leaf = create_conditional_leaf(data, ds_context, [0])

        l0 = likelihood(leaf, concatenate_yx(np.ones_like(y) * 0, x))
        l1 = likelihood(leaf, concatenate_yx(np.ones_like(y) * 1, x))
        l2 = likelihood(leaf, concatenate_yx(np.ones_like(y) * 2, x))

        np.testing.assert_array_almost_equal(l0 + l1 + l2, 1.0)

        self.assertTrue(np.all(l0[1000:1500] > 0.85))
        self.assertTrue(np.all(l0[0:1000] < 0.15))

        self.assertTrue(np.all(l1[500:1000] > 0.85))
        self.assertTrue(np.all(l1[0:500] < 0.15))
        self.assertTrue(np.all(l1[1000:1500] < 0.15))

        self.assertTrue(np.all(l2[0:500] > 0.85))
        self.assertTrue(np.all(l2[500:15000] < 0.15))
Пример #5
0
    def test_leaf_gaussian(self):
        np.random.seed(17)
        x = np.concatenate(
            (
                np.random.multivariate_normal([10, 10], np.eye(2), 5000),
                np.random.multivariate_normal([1, 1], np.eye(2), 5000),
            ),
            axis=0,
        )
        y = np.array(
            np.random.normal(20, 2, 5000).tolist() +
            np.random.normal(60, 2, 5000).tolist()).reshape(-1, 1)

        # associates y=20 with X=[10,10]
        # associates y=60 with X=[1,1]

        data = concatenate_yx(y, x)

        ds_context = Context(parametric_types=[Gaussian])
        ds_context.feature_size = 2

        leaf = create_conditional_leaf(data, ds_context, [0])

        self.assertFalse(np.any(np.isnan(likelihood(leaf, data))))

        self.assertGreater(get_ll(leaf, [20, 10, 10]),
                           get_ll(leaf, [20, 1, 1]))
        self.assertGreater(get_ll(leaf, [60, 1, 1]),
                           get_ll(leaf, [60, 10, 10]))
        self.assertAlmostEqual(get_ll(leaf, [60, 1, 1]), 0.3476232862652)
        self.assertAlmostEqual(get_ll(leaf, [20, 10, 10]), 0.3628922322773634)
Пример #6
0
    def test_leaf_mpe_bernoulli(self):
        np.random.seed(17)
        x = np.concatenate(
            (
                np.random.multivariate_normal([10, 10], np.eye(2), 5000),
                np.random.multivariate_normal([1, 1], np.eye(2), 5000),
            ),
            axis=0,
        )
        y = np.array([0] * 5000 + [1] * 5000).reshape(-1, 1)

        # associates y=0 with X=[10,10]
        # associates y=1 with X=[1,1]

        data = concatenate_yx(y, x)

        ds_context = Context(parametric_types=[Bernoulli])
        ds_context.feature_size = 2

        leaf = create_conditional_leaf(data, ds_context, [0])

        res = mpe(leaf, np.array([np.nan, 10, 10]).reshape(-1, 3))
        self.assertAlmostEqual(res[0, 0], 0)

        res = mpe(leaf, np.array([np.nan, 1, 1]).reshape(-1, 3))
        self.assertAlmostEqual(res[0, 0], 1)

        res = mpe(leaf, np.array([np.nan, 1, 1, np.nan, 10, 10]).reshape(-1, 3))
        self.assertAlmostEqual(res[0, 0], 1)
        self.assertAlmostEqual(res[1, 0], 0)

        with self.assertRaises(AssertionError):
            mpe(leaf, np.array([np.nan, 1, 1, np.nan, 10, 10, 5, 10, 10]).reshape(-1, 3))
Пример #7
0
    def test_leaf_sampling(self):
        np.random.seed(17)
        x = np.concatenate(
            (
                np.random.multivariate_normal([10, 10], np.eye(2), 5000),
                np.random.multivariate_normal([1, 1], np.eye(2), 5000),
            ),
            axis=0,
        )
        y = np.array(
            np.random.normal(20, 2, 5000).tolist() +
            np.random.normal(60, 2, 5000).tolist()).reshape(-1, 1)

        # associates y=20 with X=[10,10]
        # associates y=60 with X=[1,1]

        data = concatenate_yx(y, x)

        ds_context = Context(parametric_types=[Gaussian])
        ds_context.feature_size = 2

        leaf = create_conditional_leaf(data, ds_context, [0])

        res = sample_instances(
            leaf,
            np.array([np.nan, 10, 10] * 1000).reshape(-1, 3), 17)
        self.assertAlmostEqual(np.mean(res[:, 0]), 20.456669723751173)

        res = sample_instances(leaf,
                               np.array([np.nan, 1, 1] * 1000).reshape(-1, 3),
                               17)
        self.assertAlmostEqual(np.mean(res[:, 0]), 59.496663076099196)

        res = sample_instances(
            leaf,
            np.array([np.nan, 1, 1, np.nan, 10, 10] * 1000).reshape(-1, 3), 17)
        self.assertAlmostEqual(np.mean(res[::2, 0]), 59.546359637084564)
        self.assertAlmostEqual(np.mean(res[1::2, 0]), 20.452118792501008)

        with self.assertRaises(AssertionError):
            sample_instances(
                leaf,
                np.array([np.nan, 1, 1, np.nan, 10, 10, 5, 10,
                          10]).reshape(-1, 3), 17)
Пример #8
0
    def test_leaf_no_variance_bernoulli(self):
        np.random.seed(17)
        x = np.concatenate(
            (
                np.random.multivariate_normal([10, 10], np.eye(2), 500),
                np.random.multivariate_normal([1, 1], np.eye(2), 500),
            ),
            axis=0,
        )
        y = np.array([1] * 1000).reshape(-1, 1)

        data = concatenate_yx(y, x)

        ds_context = Context(parametric_types=[Bernoulli])
        ds_context.feature_size = 2

        leaf = create_conditional_leaf(data, ds_context, [0])
        l = likelihood(leaf, data)
        self.assertTrue(np.all(l >= 0.5))
Пример #9
0
    def test_leaf_sampling_categorical(self):
        np.random.seed(17)
        x = np.concatenate(
            (
                np.random.multivariate_normal([20, 20], np.eye(2), 500),
                np.random.multivariate_normal([10, 10], np.eye(2), 500),
                np.random.multivariate_normal([1, 1], np.eye(2), 500),
            ),
            axis=0,
        )
        y = np.array([2] * 500 + [1] * 500 + [0] * 500).reshape(-1, 1)

        data = concatenate_yx(y, x)

        ds_context = Context(parametric_types=[Categorical])
        ds_context.feature_size = 2

        leaf = create_conditional_leaf(data, ds_context, [0])

        res = sample_instances(
            leaf,
            np.array([np.nan, 10, 10] * 1000).reshape(-1, 3), RandomState(17))
        self.assertAlmostEqual(np.mean(res[:, 0]), 1, 1)

        res = sample_instances(leaf,
                               np.array([np.nan, 1, 1] * 1000).reshape(-1, 3),
                               RandomState(17))
        self.assertAlmostEqual(np.mean(res[:, 0]), 0, 1)

        res = sample_instances(
            leaf,
            np.array([np.nan, 1, 1, np.nan, 10, 10] * 1000).reshape(-1, 3),
            RandomState(17))
        self.assertAlmostEqual(np.mean(res[::2, 0]), 0, 1)
        self.assertAlmostEqual(np.mean(res[1::2, 0]), 1, 1)

        with self.assertRaises(AssertionError):
            sample_instances(
                leaf,
                np.array([np.nan, 1, 1, np.nan, 10, 10, 5, 10,
                          10]).reshape(-1, 3), RandomState(17))