Exemplo n.º 1
0
    def create_leaf(data, ds_context, scope):
        idx = scope[0]
        meta_type = ds_context.meta_types[idx]
        
        if meta_type == MetaType.REAL:
            if identity_numeric:
                return create_identity_leaf(data, scope)
        
            if prior_weight == 0.:
                return create_piecewise_leaf(data, ds_context, scope, prior_weight=None)
            else:
                return create_piecewise_leaf(data, ds_context, scope, prior_weight=prior_weight)
            

        elif meta_type == MetaType.DISCRETE:
            
            unique, counts = np.unique(data[:,0], return_counts=True)
            
            sorted_counts = np.zeros(len(ds_context.domains[idx]), dtype=np.float64)
            for i, x in enumerate(unique):
                sorted_counts[int(x)] = counts[i] 
            
            p = sorted_counts / data.shape[0]
            
            #Do regularization
            if prior_weight > 0.:
                p += prior_weight
            p = p/np.sum(p)
            
            return Categorical(p, scope)

        else:
            raise Exception("Mehtod learn_mspn_for_aqp(...) cannot create leaf for " + str(meta_type))
Exemplo n.º 2
0
 def test_PWL_no_variance(self):
     data = np.array([1.0, 1.0]).reshape(-1, 1)
     ds_context = Context([MetaType.REAL])
     ds_context.add_domains(data)
     with self.assertRaises(AssertionError):
         create_piecewise_leaf(data,
                               ds_context,
                               scope=[0],
                               hist_source="kde")
    def test_sample_range(self):

        np.random.seed(10)
        data = np.random.normal(20, scale=5, size=1000).reshape((1000, 1))
        numpy_data = np.array(data, np.float64)
        meta_types = [MetaType.REAL]
        domains = [[np.min(numpy_data[:, 0]), np.max(numpy_data[:, 0])]]
        ds_context = Context(meta_types=meta_types, domains=domains)
        rand_gen = np.random.RandomState(100)
        pwl = create_piecewise_leaf(data,
                                    ds_context,
                                    scope=[0],
                                    prior_weight=None)

        rang = [NumericRange([[20]])]
        ranges = np.array(rang)
        samples = SamplingRange.sample_piecewise_node(pwl, 10, rand_gen,
                                                      ranges)
        self.assertEqual(len(samples), 10)
        self.assertAlmostEqual(np.average(samples), 20)

        rang = [NumericRange([[20, 100]])]
        ranges = np.array(rang)
        samples = SamplingRange.sample_piecewise_node(pwl, 10, rand_gen,
                                                      ranges)
        self.assertTrue(all(samples[samples > 20]))
        self.assertTrue(all(samples[samples < 100]))

        rang = [NumericRange([[10, 13], [20, 100]])]
        ranges = np.array(rang)
        samples = SamplingRange.sample_piecewise_node(pwl, 10, rand_gen,
                                                      ranges)
        self.assertFalse(
            any(samples[np.where((samples > 13) & (samples < 20))]))
        self.assertFalse(any(samples[samples < 10]))
Exemplo n.º 4
0
    def test_PWL_no_variance(self):
        data = np.array([1.0, 1.0]).reshape(-1, 1)
        ds_context = Context([MetaType.REAL])
        ds_context.add_domains(data)
        leaf = create_piecewise_leaf(data, ds_context, scope=[0], hist_source="kde")
        prob = np.exp(log_likelihood(leaf, data))

        self.assertAlmostEqual(float(prob[0]), 2 / 6)
        self.assertAlmostEqual(float(prob[1]), 2 / 6)
Exemplo n.º 5
0
    def test_Piecewise_expectations_with_evidence(self):
        adata = np.zeros((20000, 2))
        adata[:, 1] = 0
        adata[:, 0] = np.random.normal(loc=100.0,
                                       scale=5.00,
                                       size=adata.shape[0])

        bdata = np.zeros_like(adata)
        bdata[:, 1] = 1
        bdata[:, 0] = np.random.normal(loc=50.0,
                                       scale=5.00,
                                       size=bdata.shape[0])

        data = np.vstack((adata, bdata))

        ds_context = Context(meta_types=[MetaType.REAL, MetaType.DISCRETE])
        ds_context.parametric_types = [None, Categorical]
        ds_context.add_domains(data)
        L = create_piecewise_leaf(
            adata[:, 0].reshape(-1, 1),
            ds_context,
            scope=[0],
            prior_weight=None,
            hist_source="numpy") * create_parametric_leaf(
                adata[:, 1].reshape(-1, 1), ds_context, scope=[1])
        R = create_piecewise_leaf(
            bdata[:, 0].reshape(-1, 1),
            ds_context,
            scope=[0],
            prior_weight=None,
            hist_source="numpy") * create_parametric_leaf(
                bdata[:, 1].reshape(-1, 1), ds_context, scope=[1])

        spn = 0.5 * L + 0.5 * R

        evidence = np.zeros((2, 2))
        evidence[1, 1] = 1
        evidence[:, 0] = np.nan
        expectation = Expectation(spn, set([0]), evidence)

        self.assertAlmostEqual(np.mean(adata[:, 0]), expectation[0, 0], 2)
        self.assertAlmostEqual(np.mean(bdata[:, 0]), expectation[1, 0], 2)
    def test_inference_range(self):

        np.random.seed(10)
        data = np.random.normal(20, scale=5, size=1000).reshape((1000, 1))
        numpy_data = np.array(data, np.float64)
        meta_types = [MetaType.REAL]
        domains = [[np.min(numpy_data[:, 0]), np.max(numpy_data[:, 0])]]
        ds_context = Context(meta_types=meta_types, domains=domains)
        pwl = create_piecewise_leaf(data,
                                    ds_context,
                                    scope=[0],
                                    prior_weight=None)

        rang = [NumericRange([[20]])]
        ranges = np.array([rang])
        prob = InferenceRange.piecewise_likelihood_range(pwl, ranges)[0][0]
        self.assertAlmostEqual(prob, 0.086475210674)

        rang = [NumericRange([[21]])]
        ranges = np.array([rang])
        prob = InferenceRange.piecewise_likelihood_range(pwl, ranges)[0][0]
        self.assertAlmostEqual(prob, 0.0855907611968)

        rang = [NumericRange([[19]])]
        ranges = np.array([rang])
        prob = InferenceRange.piecewise_likelihood_range(pwl, ranges)[0][0]
        self.assertAlmostEqual(prob, 0.0833451329643)

        rang = [NumericRange([[-20]])]
        ranges = np.array([rang])
        prob = InferenceRange.piecewise_likelihood_range(pwl, ranges)[0][0]
        self.assertAlmostEqual(prob, 0)

        rang = [NumericRange([[20, 100]])]
        ranges = np.array([rang])
        prob = InferenceRange.piecewise_likelihood_range(pwl, ranges)[0][0]
        self.assertAlmostEqual(prob, 0.493416517396)

        rang = [NumericRange([[-20, 20]])]
        ranges = np.array([rang])
        prob = InferenceRange.piecewise_likelihood_range(pwl, ranges)[0][0]
        self.assertAlmostEqual(prob, 0.506583482604)

        rang = [NumericRange([[-20, 100]])]
        ranges = np.array([rang])
        prob = InferenceRange.piecewise_likelihood_range(pwl, ranges)[0][0]
        self.assertAlmostEqual(prob, 1)

        rang = [NumericRange([[-20, -10]])]
        ranges = np.array([rang])
        prob = InferenceRange.piecewise_likelihood_range(pwl, ranges)[0][0]
        self.assertAlmostEqual(prob, 0)
Exemplo n.º 7
0
    def test_PWL(self):
        #data = np.array([1.0, 1.0, 2.0, 3.0]*100).reshape(-1, 1)

        data = np.r_[np.random.normal(10, 5, (300, 1)),
                     np.random.normal(20, 10, (700, 1))]

        ds_context = Context([MetaType.REAL])
        ds_context.add_domains(data)
        leaf = create_piecewise_leaf(data,
                                     ds_context,
                                     scope=[0],
                                     prior_weight=None,
                                     hist_source="kde")
        prob = np.exp(log_likelihood(leaf, data))
Exemplo n.º 8
0
    def test_Piecewise_expectations(self):
        data = np.random.normal(loc=100.0, scale=5.00,
                                size=20000).reshape(-1, 1)
        ds_context = Context(meta_types=[MetaType.REAL])
        ds_context.add_domains(data)
        pl = create_piecewise_leaf(data,
                                   ds_context,
                                   scope=[0],
                                   prior_weight=None)
        expectation = Expectation(pl, set([0]))

        self.assertAlmostEqual(np.mean(data[:, 0]), expectation[0, 0], 2)

        data = np.random.randint(0, high=100, size=2000).reshape(-1, 1)
        ds_context = Context(meta_types=[MetaType.DISCRETE])
        ds_context.add_domains(data)
        pl = create_piecewise_leaf(data,
                                   ds_context,
                                   scope=[0],
                                   prior_weight=None)
        expectation = Expectation(pl, set([0]))

        self.assertAlmostEqual(np.mean(data[:, 0]), expectation[0, 0], 3)
Exemplo n.º 9
0
def learn_leaf_from_context(data, ds_context, scope):
    """
    Wrapper function to infer leaf type from the context object
    :param data: np.array: the data slice
    :param ds_context: Context: the context oobject for the data/spn
    :param scope: List: the scope of the variables
    :return: a correct leaf
    """
    assert len(scope) == 1, "scope for more than one variable?"
    idx = scope[0]

    conditional_type = ds_context.parametric_types[idx]
    assert issubclass(conditional_type, Leaf), 'no instance of leaf '

    if issubclass(conditional_type, Parametric):
        return create_parametric_leaf(data, ds_context, scope)
    if issubclass(conditional_type, Conditional):
        return create_conditional_leaf(data, ds_context, scope)
    if issubclass(conditional_type, Histogram):
        return create_histogram_leaf(data, ds_context, scope)
    if issubclass(conditional_type, PiecewiseLinear):
        return create_piecewise_leaf(data, ds_context, scope)
    raise Exception('No fitting leaf type found')
Exemplo n.º 10
0
def create_leaf(data, ds_context, scope):
    return create_piecewise_leaf(data,
                                 ds_context,
                                 scope,
                                 isotonic=False,
                                 prior_weight=None)