def test_leaf_no_variance_gaussian(self): np.random.seed(17) x = np.concatenate( ( np.random.multivariate_normal([10, 10], np.eye(2), 500), np.random.multivariate_normal([1, 1], np.eye(2), 500), ), axis=0, ) y = np.array([1] * 1000).reshape(-1, 1) data = concatenate_yx(y, x) ds_context = Context(parametric_types=[Gaussian]) ds_context.feature_size = 2 leaf = create_conditional_leaf(data, ds_context, [0]) l = likelihood(leaf, data) self.assertEqual(np.var(l[:, 0]), 0) self.assertAlmostEqual(l[0, 0], 0.398942280401432) data[:, 0] = 2 leaf = create_conditional_leaf(data, ds_context, [0]) l = likelihood(leaf, data) self.assertEqual(np.var(l[:, 0]), 0) self.assertAlmostEqual(l[0, 0], 0.398942280401432) data3 = np.array(data) data3[:, 0] = 3 leaf = create_conditional_leaf(data3, ds_context, [0]) l = likelihood(leaf, data) self.assertAlmostEqual(np.var(l[:, 0]), 0) self.assertAlmostEqual(l[0, 0], 0.241970724519143)
def fit(self, X, y=None): y = y.reshape(y.shape[0], -1) self.num_labels = y.shape[1] self.context = Context(parametric_types=[Bernoulli] * self.num_labels).add_domains(y) self.context.feature_size = X.shape[1] self.scope = list(range(y.shape[1])) data = concatenate_yx(y, X) cspn_type = 1 if cspn_type == 0: self.cspn = create_conditional_leaf(data, self.context, self.scope) elif cspn_type == 1: split_rows = get_split_conditional_rows_KMeans() self.cspn, subtasks = create_sum(data=data, node_id=0, parent_id=0, pos=0, context=self.context, scope=self.scope, split_rows=split_rows) for i, subtask in enumerate(subtasks): self.cspn.children[i] = create_conditional_leaf( subtask[1]['data'], self.context, subtask[1]['scope']) print(self.cspn)
def test_leaf_bernoulli_bootstrap(self): np.random.seed(17) x = np.concatenate( ( np.random.multivariate_normal([10, 10], np.eye(2), 100), np.random.multivariate_normal([1, 1], np.eye(2), 100), ), axis=0, ) y = np.array([1] * 100 + [0] * 100).reshape(-1, 1) data = concatenate_yx(y, x) ds_context = Context(parametric_types=[Bernoulli]) ds_context.feature_size = 2 leaf = create_conditional_leaf(data, ds_context, [0]) l = likelihood(leaf, data) neg_data = np.concatenate([1 - y, x], axis=1) lneg = likelihood(leaf, neg_data) np.testing.assert_array_almost_equal(l + lneg, 1.0) self.assertTrue(np.all(l >= 0.5)) self.assertTrue(np.all(lneg < 0.5))
def test_leaf_categorical(self): np.random.seed(17) x = np.concatenate( ( np.random.multivariate_normal([20, 20], np.eye(2), 500), np.random.multivariate_normal([10, 10], np.eye(2), 500), np.random.multivariate_normal([1, 1], np.eye(2), 500), ), axis=0, ) y = np.array([2] * 500 + [1] * 500 + [0] * 500).reshape(-1, 1) data = concatenate_yx(y, x) ds_context = Context(parametric_types=[Categorical]) ds_context.feature_size = 2 leaf = create_conditional_leaf(data, ds_context, [0]) l0 = likelihood(leaf, concatenate_yx(np.ones_like(y) * 0, x)) l1 = likelihood(leaf, concatenate_yx(np.ones_like(y) * 1, x)) l2 = likelihood(leaf, concatenate_yx(np.ones_like(y) * 2, x)) np.testing.assert_array_almost_equal(l0 + l1 + l2, 1.0) self.assertTrue(np.all(l0[1000:1500] > 0.85)) self.assertTrue(np.all(l0[0:1000] < 0.15)) self.assertTrue(np.all(l1[500:1000] > 0.85)) self.assertTrue(np.all(l1[0:500] < 0.15)) self.assertTrue(np.all(l1[1000:1500] < 0.15)) self.assertTrue(np.all(l2[0:500] > 0.85)) self.assertTrue(np.all(l2[500:15000] < 0.15))
def test_leaf_gaussian(self): np.random.seed(17) x = np.concatenate( ( np.random.multivariate_normal([10, 10], np.eye(2), 5000), np.random.multivariate_normal([1, 1], np.eye(2), 5000), ), axis=0, ) y = np.array( np.random.normal(20, 2, 5000).tolist() + np.random.normal(60, 2, 5000).tolist()).reshape(-1, 1) # associates y=20 with X=[10,10] # associates y=60 with X=[1,1] data = concatenate_yx(y, x) ds_context = Context(parametric_types=[Gaussian]) ds_context.feature_size = 2 leaf = create_conditional_leaf(data, ds_context, [0]) self.assertFalse(np.any(np.isnan(likelihood(leaf, data)))) self.assertGreater(get_ll(leaf, [20, 10, 10]), get_ll(leaf, [20, 1, 1])) self.assertGreater(get_ll(leaf, [60, 1, 1]), get_ll(leaf, [60, 10, 10])) self.assertAlmostEqual(get_ll(leaf, [60, 1, 1]), 0.3476232862652) self.assertAlmostEqual(get_ll(leaf, [20, 10, 10]), 0.3628922322773634)
def test_leaf_mpe_bernoulli(self): np.random.seed(17) x = np.concatenate( ( np.random.multivariate_normal([10, 10], np.eye(2), 5000), np.random.multivariate_normal([1, 1], np.eye(2), 5000), ), axis=0, ) y = np.array([0] * 5000 + [1] * 5000).reshape(-1, 1) # associates y=0 with X=[10,10] # associates y=1 with X=[1,1] data = concatenate_yx(y, x) ds_context = Context(parametric_types=[Bernoulli]) ds_context.feature_size = 2 leaf = create_conditional_leaf(data, ds_context, [0]) res = mpe(leaf, np.array([np.nan, 10, 10]).reshape(-1, 3)) self.assertAlmostEqual(res[0, 0], 0) res = mpe(leaf, np.array([np.nan, 1, 1]).reshape(-1, 3)) self.assertAlmostEqual(res[0, 0], 1) res = mpe(leaf, np.array([np.nan, 1, 1, np.nan, 10, 10]).reshape(-1, 3)) self.assertAlmostEqual(res[0, 0], 1) self.assertAlmostEqual(res[1, 0], 0) with self.assertRaises(AssertionError): mpe(leaf, np.array([np.nan, 1, 1, np.nan, 10, 10, 5, 10, 10]).reshape(-1, 3))
def test_leaf_sampling(self): np.random.seed(17) x = np.concatenate( ( np.random.multivariate_normal([10, 10], np.eye(2), 5000), np.random.multivariate_normal([1, 1], np.eye(2), 5000), ), axis=0, ) y = np.array( np.random.normal(20, 2, 5000).tolist() + np.random.normal(60, 2, 5000).tolist()).reshape(-1, 1) # associates y=20 with X=[10,10] # associates y=60 with X=[1,1] data = concatenate_yx(y, x) ds_context = Context(parametric_types=[Gaussian]) ds_context.feature_size = 2 leaf = create_conditional_leaf(data, ds_context, [0]) res = sample_instances( leaf, np.array([np.nan, 10, 10] * 1000).reshape(-1, 3), 17) self.assertAlmostEqual(np.mean(res[:, 0]), 20.456669723751173) res = sample_instances(leaf, np.array([np.nan, 1, 1] * 1000).reshape(-1, 3), 17) self.assertAlmostEqual(np.mean(res[:, 0]), 59.496663076099196) res = sample_instances( leaf, np.array([np.nan, 1, 1, np.nan, 10, 10] * 1000).reshape(-1, 3), 17) self.assertAlmostEqual(np.mean(res[::2, 0]), 59.546359637084564) self.assertAlmostEqual(np.mean(res[1::2, 0]), 20.452118792501008) with self.assertRaises(AssertionError): sample_instances( leaf, np.array([np.nan, 1, 1, np.nan, 10, 10, 5, 10, 10]).reshape(-1, 3), 17)
def test_leaf_no_variance_bernoulli(self): np.random.seed(17) x = np.concatenate( ( np.random.multivariate_normal([10, 10], np.eye(2), 500), np.random.multivariate_normal([1, 1], np.eye(2), 500), ), axis=0, ) y = np.array([1] * 1000).reshape(-1, 1) data = concatenate_yx(y, x) ds_context = Context(parametric_types=[Bernoulli]) ds_context.feature_size = 2 leaf = create_conditional_leaf(data, ds_context, [0]) l = likelihood(leaf, data) self.assertTrue(np.all(l >= 0.5))
def test_leaf_sampling_categorical(self): np.random.seed(17) x = np.concatenate( ( np.random.multivariate_normal([20, 20], np.eye(2), 500), np.random.multivariate_normal([10, 10], np.eye(2), 500), np.random.multivariate_normal([1, 1], np.eye(2), 500), ), axis=0, ) y = np.array([2] * 500 + [1] * 500 + [0] * 500).reshape(-1, 1) data = concatenate_yx(y, x) ds_context = Context(parametric_types=[Categorical]) ds_context.feature_size = 2 leaf = create_conditional_leaf(data, ds_context, [0]) res = sample_instances( leaf, np.array([np.nan, 10, 10] * 1000).reshape(-1, 3), RandomState(17)) self.assertAlmostEqual(np.mean(res[:, 0]), 1, 1) res = sample_instances(leaf, np.array([np.nan, 1, 1] * 1000).reshape(-1, 3), RandomState(17)) self.assertAlmostEqual(np.mean(res[:, 0]), 0, 1) res = sample_instances( leaf, np.array([np.nan, 1, 1, np.nan, 10, 10] * 1000).reshape(-1, 3), RandomState(17)) self.assertAlmostEqual(np.mean(res[::2, 0]), 0, 1) self.assertAlmostEqual(np.mean(res[1::2, 0]), 1, 1) with self.assertRaises(AssertionError): sample_instances( leaf, np.array([np.nan, 1, 1, np.nan, 10, 10, 5, 10, 10]).reshape(-1, 3), RandomState(17))