Exemplo n.º 1
0
    def test_Histogram_discrete_inference(self):
        data = np.array([1, 1, 2, 3, 3, 3]).reshape(-1, 1)
        ds_context = Context([MetaType.DISCRETE])
        ds_context.add_domains(data)
        hist = create_histogram_leaf(data, ds_context, [0], alpha=False)
        prob = np.exp(log_likelihood(hist, data))

        self.assertAlmostEqual(float(prob[0]), 2 / 6)
        self.assertAlmostEqual(float(prob[1]), 2 / 6)
        self.assertAlmostEqual(float(prob[2]), 1 / 6)
        self.assertAlmostEqual(float(prob[3]), 3 / 6)
        self.assertAlmostEqual(float(prob[4]), 3 / 6)
        self.assertAlmostEqual(float(prob[5]), 3 / 6)

        data = np.array([1, 1, 2, 3, 3, 3]).reshape(-1, 1)
        ds_context = Context([MetaType.DISCRETE])
        ds_context.add_domains(data)
        hist = create_histogram_leaf(data, ds_context, [0], alpha=True)
        # print(np.var(data.shape[0]))
        prob = np.exp(log_likelihood(hist, data))
        self.assertAlmostEqual(float(prob[0]), 3 / 9)
        self.assertAlmostEqual(float(prob[1]), 3 / 9)
        self.assertAlmostEqual(float(prob[2]), 2 / 9)
        self.assertAlmostEqual(float(prob[3]), 4 / 9)
        self.assertAlmostEqual(float(prob[4]), 4 / 9)
        self.assertAlmostEqual(float(prob[5]), 4 / 9)
Exemplo n.º 2
0
def create_piecewise_leaf(data, ds_context, scope, isotonic=False, prior_weight=0.1, hist_source="numpy"):
    assert len(scope) == 1, "scope of univariate Piecewise for more than one variable?"
    assert data.shape[1] == 1, "data has more than one feature?"

    idx = scope[0]
    meta_type = ds_context.meta_types[idx]

    hist = create_histogram_leaf(data, ds_context, scope, alpha=False, hist_source=hist_source)
    densities = hist.densities
    bins = hist.breaks
    repr_points = hist.bin_repr_points

    if meta_type == MetaType.REAL:
        EPS = 1e-8
        if len(densities) > 1:

            def pairwise(iterable):
                "s -> (s0,s1), (s1,s2), (s2, s3), ..."
                a, b = itertools.tee(iterable)
                next(b, None)
                return zip(a, b)

            x = [bins[0] - EPS] + [b0 + (b1 - b0) / 2 for (b0, b1) in pairwise(bins)] + [bins[-1] + EPS]
        else:
            assert len(bins) == 2
            x = [bins[0] - EPS] + [(bins[0] + (bins[1] - bins[0]) / 2)] + [bins[-1] + EPS]

    elif meta_type == MetaType.DISCRETE:
        tail_width = 1
        x = [b for b in bins[:-1]]
        x = [x[0] - tail_width] + x + [x[-1] + tail_width]

    else:
        raise Exception("Invalid statistical type: " + meta_type)

    y = [0.0] + [d for d in densities] + [0.0]

    assert len(densities) == len(bins) - 1
    assert len(x) == len(y), (len(x), len(y))
    x, y = np.array(x), np.array(y)

    if isotonic:
        x, y = isotonic_unimodal_regression_R(x, y)

    auc = np.trapz(y, x)
    y = y / auc

    node = PiecewiseLinear(x.tolist(), y.tolist(), repr_points, scope=scope)

    if prior_weight is None:
        return node

    uniform_data = np.zeros_like(data)
    uniform_data[:] = np.nan
    uniform_hist = create_histogram_leaf(uniform_data, ds_context, scope, alpha=False)
    return prior_weight * uniform_hist + (1 - prior_weight) * node
Exemplo n.º 3
0
    def test_Histogram_expectations(self):
        data = np.random.randn(20000).reshape(-1, 1)
        ds_context = Context(meta_types=[MetaType.REAL])
        ds_context.add_domains(data)
        hl = create_histogram_leaf(data, ds_context, scope=[0])
        expectation = Expectation(hl, set([0]))

        self.assertAlmostEqual(np.mean(data[:, 0]), expectation[0, 0], 3)

        data = np.random.randint(0, high=100, size=20000).reshape(-1, 1)
        ds_context = Context(meta_types=[MetaType.DISCRETE])
        ds_context.add_domains(data)
        hl = create_histogram_leaf(data, ds_context, scope=[0])
        expectation = Expectation(hl, set([0]))

        self.assertAlmostEqual(np.mean(data[:, 0]), expectation[0, 0], 3)
Exemplo n.º 4
0
    def test_histogram_to_str_and_back(self):

        data = np.array([1, 1, 2, 3, 3, 3]).reshape(-1, 1)
        ds_context = Context([MetaType.DISCRETE])
        ds_context.add_domains(data)
        hist = create_histogram_leaf(data, ds_context, [0], alpha=False)

        self.check_obj_and_reconstruction(hist)
Exemplo n.º 5
0
 def test_histogram_leaf(self):
     data = np.array([1, 1, 2, 3, 3, 3]).reshape(-1, 1)
     ds_context = Context([MetaType.DISCRETE])
     ds_context.add_domains(data)
     hist = create_histogram_leaf(data, ds_context, [0], alpha=False)
     self.assertTrue(
         np.array_equal(mpe(hist, np.array([[np.nan]])), np.array([[3]])),
         "mpe should be 3")
Exemplo n.º 6
0
 def test_valid_histogram(self):
     np.random.seed(17)
     data = [1] + [5]*20 + [7] + [10]*50 + [20] + [30]*10
     data = np.array(data).reshape((-1, 1))
     ds_context = Context([MetaType.REAL])
     ds_context.add_domains(data)
     
     hist = create_histogram_leaf(data, ds_context, [0], alpha=False, hist_source="kde")
     self.assertGreater(len(hist.bin_repr_points), 1)
Exemplo n.º 7
0
def create_spmn_leaf(data, ds_context, scope):
    assert len(
        scope
    ) == 1, "scope of univariate histogram for more than one variable?"
    assert data.shape[1] == 1, "data has more than one feature?"

    # data = data[~np.isnan(data)]

    idx = scope[0]
    meta_type = ds_context.meta_types[idx]

    if meta_type == MetaType.UTILITY:
        hist = create_histogram_leaf(data, ds_context, scope)
        return Utility(hist.breaks,
                       hist.densities,
                       hist.bin_repr_points,
                       scope=idx)
    else:
        return create_histogram_leaf(data, ds_context, scope)
Exemplo n.º 8
0
 def test_mixture_gaussians(self):
     np.random.seed(17)
     data = np.random.normal(10, 1, size=200).tolist() + np.random.normal(30, 1, size=200).tolist()
     data = np.array(data).reshape((-1, 1))
     ds_context = Context([MetaType.REAL])
     ds_context.add_domains(data)
     hist = create_histogram_leaf(data, ds_context, [0], alpha=False, hist_source="kde")
     x = np.linspace(0, 60, 1000).tolist() + data[:, 0].tolist()
     x = np.sort(x)
     from scipy.stats import norm
     y = 0.5 * norm.pdf(x, 10, 1) + 0.5 * norm.pdf(x, 30, 1)
     ye = likelihood(hist, x.reshape((-1, 1)))
     error = np.sum(np.abs(ye[:, 0] - y))
     # print(error)
     self.assertLessEqual(error, 7)
Exemplo n.º 9
0
def learn_leaf_from_context(data, ds_context, scope):
    """
    Wrapper function to infer leaf type from the context object
    :param data: np.array: the data slice
    :param ds_context: Context: the context oobject for the data/spn
    :param scope: List: the scope of the variables
    :return: a correct leaf
    """
    assert len(scope) == 1, "scope for more than one variable?"
    idx = scope[0]

    conditional_type = ds_context.parametric_types[idx]
    assert issubclass(conditional_type, Leaf), 'no instance of leaf '

    if issubclass(conditional_type, Parametric):
        return create_parametric_leaf(data, ds_context, scope)
    if issubclass(conditional_type, Conditional):
        return create_conditional_leaf(data, ds_context, scope)
    if issubclass(conditional_type, Histogram):
        return create_histogram_leaf(data, ds_context, scope)
    if issubclass(conditional_type, PiecewiseLinear):
        return create_piecewise_leaf(data, ds_context, scope)
    raise Exception('No fitting leaf type found')
Exemplo n.º 10
0
def create_leaf(data, ds_context, scope):
    #return create_piecewise_leaf(data, ds_context, scope, isotonic=False, prior_weight=0.01)
    return create_histogram_leaf(data, ds_context, scope, alpha=0.005)
Exemplo n.º 11
0
if __name__ == '__main__':
    add_histogram_inference_support()
    np.random.seed(17)
    data = np.random.normal(10, 0.01, size=2000).tolist() + np.random.normal(
        30, 10, size=2000).tolist()
    data = np.array(data).reshape((-1, 10))
    data[data < 0] = 0
    data = (data * 1).astype(int)

    ds_context = Context(meta_types=[MetaType.DISCRETE] * data.shape[1])
    ds_context.add_domains(data)

    data[:, 0] = 0
    data[:, 1] = 1

    spn = learn(data, ds_context)
    spn = create_histogram_leaf(data[:, 0].reshape((-1, 1)), ds_context, [0], alpha=False, hist_source="kde") * \
          create_histogram_leaf(data[:, 1].reshape((-1, 1)), ds_context, [1], alpha=False, hist_source="kde")

    spn = 0.3 * create_histogram_leaf(data[:, 0].reshape((-1, 1)), ds_context, [0], alpha=False, hist_source="kde") + \
          0.7 * create_histogram_leaf(data[:, 0].reshape((-1, 1)), ds_context, [0], alpha=False, hist_source="kde")

    py_ll = log_likelihood(spn, data)

    tf_graph, placeholder = spn_to_tf_graph(spn, data)

    log_tf_out = eval_tf(tf_graph, placeholder, data)

    print("results are similar for Log TF and Python?",
          np.all(np.isclose(py_ll, log_tf_out)))