def create_leaf(data, ds_context, scope): idx = scope[0] meta_type = ds_context.meta_types[idx] if meta_type == MetaType.REAL: if identity_numeric: return create_identity_leaf(data, scope) if prior_weight == 0.: return create_piecewise_leaf(data, ds_context, scope, prior_weight=None) else: return create_piecewise_leaf(data, ds_context, scope, prior_weight=prior_weight) elif meta_type == MetaType.DISCRETE: unique, counts = np.unique(data[:,0], return_counts=True) sorted_counts = np.zeros(len(ds_context.domains[idx]), dtype=np.float64) for i, x in enumerate(unique): sorted_counts[int(x)] = counts[i] p = sorted_counts / data.shape[0] #Do regularization if prior_weight > 0.: p += prior_weight p = p/np.sum(p) return Categorical(p, scope) else: raise Exception("Mehtod learn_mspn_for_aqp(...) cannot create leaf for " + str(meta_type))
def test_PWL_no_variance(self): data = np.array([1.0, 1.0]).reshape(-1, 1) ds_context = Context([MetaType.REAL]) ds_context.add_domains(data) with self.assertRaises(AssertionError): create_piecewise_leaf(data, ds_context, scope=[0], hist_source="kde")
def test_sample_range(self): np.random.seed(10) data = np.random.normal(20, scale=5, size=1000).reshape((1000, 1)) numpy_data = np.array(data, np.float64) meta_types = [MetaType.REAL] domains = [[np.min(numpy_data[:, 0]), np.max(numpy_data[:, 0])]] ds_context = Context(meta_types=meta_types, domains=domains) rand_gen = np.random.RandomState(100) pwl = create_piecewise_leaf(data, ds_context, scope=[0], prior_weight=None) rang = [NumericRange([[20]])] ranges = np.array(rang) samples = SamplingRange.sample_piecewise_node(pwl, 10, rand_gen, ranges) self.assertEqual(len(samples), 10) self.assertAlmostEqual(np.average(samples), 20) rang = [NumericRange([[20, 100]])] ranges = np.array(rang) samples = SamplingRange.sample_piecewise_node(pwl, 10, rand_gen, ranges) self.assertTrue(all(samples[samples > 20])) self.assertTrue(all(samples[samples < 100])) rang = [NumericRange([[10, 13], [20, 100]])] ranges = np.array(rang) samples = SamplingRange.sample_piecewise_node(pwl, 10, rand_gen, ranges) self.assertFalse( any(samples[np.where((samples > 13) & (samples < 20))])) self.assertFalse(any(samples[samples < 10]))
def test_PWL_no_variance(self): data = np.array([1.0, 1.0]).reshape(-1, 1) ds_context = Context([MetaType.REAL]) ds_context.add_domains(data) leaf = create_piecewise_leaf(data, ds_context, scope=[0], hist_source="kde") prob = np.exp(log_likelihood(leaf, data)) self.assertAlmostEqual(float(prob[0]), 2 / 6) self.assertAlmostEqual(float(prob[1]), 2 / 6)
def test_Piecewise_expectations_with_evidence(self): adata = np.zeros((20000, 2)) adata[:, 1] = 0 adata[:, 0] = np.random.normal(loc=100.0, scale=5.00, size=adata.shape[0]) bdata = np.zeros_like(adata) bdata[:, 1] = 1 bdata[:, 0] = np.random.normal(loc=50.0, scale=5.00, size=bdata.shape[0]) data = np.vstack((adata, bdata)) ds_context = Context(meta_types=[MetaType.REAL, MetaType.DISCRETE]) ds_context.parametric_types = [None, Categorical] ds_context.add_domains(data) L = create_piecewise_leaf( adata[:, 0].reshape(-1, 1), ds_context, scope=[0], prior_weight=None, hist_source="numpy") * create_parametric_leaf( adata[:, 1].reshape(-1, 1), ds_context, scope=[1]) R = create_piecewise_leaf( bdata[:, 0].reshape(-1, 1), ds_context, scope=[0], prior_weight=None, hist_source="numpy") * create_parametric_leaf( bdata[:, 1].reshape(-1, 1), ds_context, scope=[1]) spn = 0.5 * L + 0.5 * R evidence = np.zeros((2, 2)) evidence[1, 1] = 1 evidence[:, 0] = np.nan expectation = Expectation(spn, set([0]), evidence) self.assertAlmostEqual(np.mean(adata[:, 0]), expectation[0, 0], 2) self.assertAlmostEqual(np.mean(bdata[:, 0]), expectation[1, 0], 2)
def test_inference_range(self): np.random.seed(10) data = np.random.normal(20, scale=5, size=1000).reshape((1000, 1)) numpy_data = np.array(data, np.float64) meta_types = [MetaType.REAL] domains = [[np.min(numpy_data[:, 0]), np.max(numpy_data[:, 0])]] ds_context = Context(meta_types=meta_types, domains=domains) pwl = create_piecewise_leaf(data, ds_context, scope=[0], prior_weight=None) rang = [NumericRange([[20]])] ranges = np.array([rang]) prob = InferenceRange.piecewise_likelihood_range(pwl, ranges)[0][0] self.assertAlmostEqual(prob, 0.086475210674) rang = [NumericRange([[21]])] ranges = np.array([rang]) prob = InferenceRange.piecewise_likelihood_range(pwl, ranges)[0][0] self.assertAlmostEqual(prob, 0.0855907611968) rang = [NumericRange([[19]])] ranges = np.array([rang]) prob = InferenceRange.piecewise_likelihood_range(pwl, ranges)[0][0] self.assertAlmostEqual(prob, 0.0833451329643) rang = [NumericRange([[-20]])] ranges = np.array([rang]) prob = InferenceRange.piecewise_likelihood_range(pwl, ranges)[0][0] self.assertAlmostEqual(prob, 0) rang = [NumericRange([[20, 100]])] ranges = np.array([rang]) prob = InferenceRange.piecewise_likelihood_range(pwl, ranges)[0][0] self.assertAlmostEqual(prob, 0.493416517396) rang = [NumericRange([[-20, 20]])] ranges = np.array([rang]) prob = InferenceRange.piecewise_likelihood_range(pwl, ranges)[0][0] self.assertAlmostEqual(prob, 0.506583482604) rang = [NumericRange([[-20, 100]])] ranges = np.array([rang]) prob = InferenceRange.piecewise_likelihood_range(pwl, ranges)[0][0] self.assertAlmostEqual(prob, 1) rang = [NumericRange([[-20, -10]])] ranges = np.array([rang]) prob = InferenceRange.piecewise_likelihood_range(pwl, ranges)[0][0] self.assertAlmostEqual(prob, 0)
def test_PWL(self): #data = np.array([1.0, 1.0, 2.0, 3.0]*100).reshape(-1, 1) data = np.r_[np.random.normal(10, 5, (300, 1)), np.random.normal(20, 10, (700, 1))] ds_context = Context([MetaType.REAL]) ds_context.add_domains(data) leaf = create_piecewise_leaf(data, ds_context, scope=[0], prior_weight=None, hist_source="kde") prob = np.exp(log_likelihood(leaf, data))
def test_Piecewise_expectations(self): data = np.random.normal(loc=100.0, scale=5.00, size=20000).reshape(-1, 1) ds_context = Context(meta_types=[MetaType.REAL]) ds_context.add_domains(data) pl = create_piecewise_leaf(data, ds_context, scope=[0], prior_weight=None) expectation = Expectation(pl, set([0])) self.assertAlmostEqual(np.mean(data[:, 0]), expectation[0, 0], 2) data = np.random.randint(0, high=100, size=2000).reshape(-1, 1) ds_context = Context(meta_types=[MetaType.DISCRETE]) ds_context.add_domains(data) pl = create_piecewise_leaf(data, ds_context, scope=[0], prior_weight=None) expectation = Expectation(pl, set([0])) self.assertAlmostEqual(np.mean(data[:, 0]), expectation[0, 0], 3)
def learn_leaf_from_context(data, ds_context, scope): """ Wrapper function to infer leaf type from the context object :param data: np.array: the data slice :param ds_context: Context: the context oobject for the data/spn :param scope: List: the scope of the variables :return: a correct leaf """ assert len(scope) == 1, "scope for more than one variable?" idx = scope[0] conditional_type = ds_context.parametric_types[idx] assert issubclass(conditional_type, Leaf), 'no instance of leaf ' if issubclass(conditional_type, Parametric): return create_parametric_leaf(data, ds_context, scope) if issubclass(conditional_type, Conditional): return create_conditional_leaf(data, ds_context, scope) if issubclass(conditional_type, Histogram): return create_histogram_leaf(data, ds_context, scope) if issubclass(conditional_type, PiecewiseLinear): return create_piecewise_leaf(data, ds_context, scope) raise Exception('No fitting leaf type found')
def create_leaf(data, ds_context, scope): return create_piecewise_leaf(data, ds_context, scope, isotonic=False, prior_weight=None)