Exemplo n.º 1
0
    def check_obj_and_reconstruction(self, obj):

        str_val = spn_to_str_equation(obj)

        obj_val = str_to_spn(str_val)

        self.assertEqual(str_val, spn_to_str_equation(obj_val))
Exemplo n.º 2
0
    def test_eval_parametric(self):
        data = np.array([1, 1, 1, 1, 1, 1, 1], dtype=np.float32).reshape(
            (1, 7))

        spn = (Gaussian(mean=1.0, stdev=1.0, scope=[0]) *
               Exponential(l=1.0, scope=[1]) *
               Gamma(alpha=1.0, beta=1.0, scope=[2]) *
               LogNormal(mean=1.0, stdev=1.0, scope=[3]) *
               Poisson(mean=1.0, scope=[4]) * Bernoulli(p=0.6, scope=[5]) *
               Categorical(p=[0.1, 0.2, 0.7], scope=[6]))

        ll = log_likelihood(spn, data)

        tf_ll = eval_tf(spn, data)

        self.assertTrue(np.all(np.isclose(ll, tf_ll)))

        spn_copy = Copy(spn)

        tf_graph, data_placeholder, variable_dict = spn_to_tf_graph(
            spn_copy, data, 1)

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            tf_graph_to_spn(variable_dict)

        str_val = spn_to_str_equation(spn)
        str_val2 = spn_to_str_equation(spn_copy)

        self.assertEqual(str_val, str_val2)
Exemplo n.º 3
0
def to_str():
    spn = create_SPN()
    spn_marg = marginalize()

    from spn.io.Text import spn_to_str_equation

    print(spn_to_str_equation(spn))
    print(spn_to_str_equation(spn_marg))
Exemplo n.º 4
0
    def test_multiple_sum(self):
        spn = 0.6 * (0.4 * Gaussian(0.0, 1.0, scope=0) + 0.6 * Gaussian(
            2.0, 1.0, scope=0)) + 0.4 * Gaussian(2.0, 1.0, scope=0)

        spn_text = "(0.6*((0.4*(Gaussian(V0|mean=0.0;stdev=1.0)) + 0.6*(Gaussian(V0|mean=2.0;stdev=1.0)))) + 0.4*(Gaussian(V0|mean=2.0;stdev=1.0)))"

        print(spn_to_str_equation(spn))

        self.assertEqual(spn_to_str_equation(spn), spn_text)
Exemplo n.º 5
0
def learn_CNET():
    import numpy as np

    np.random.seed(123)

    train_data = np.random.binomial(1, [0.1, 0.2, 0.3, 0.4], size=(1000, 4))
    print(np.mean(train_data, axis=0))

    from spn.structure.leaves.cltree.CLTree import create_cltree_leaf
    from spn.structure.Base import Context
    from spn.structure.leaves.parametric.Parametric import Bernoulli

    ds_context = Context(
        parametric_types=[Bernoulli, Bernoulli, Bernoulli, Bernoulli
                          ]).add_domains(train_data)

    from spn.algorithms.LearningWrappers import learn_parametric, learn_cnet

    cnet_naive_mle = learn_cnet(train_data,
                                ds_context,
                                cond="naive_mle",
                                min_instances_slice=20,
                                min_features_slice=1)
    cnet_random = learn_cnet(train_data,
                             ds_context,
                             cond="random",
                             min_instances_slice=20,
                             min_features_slice=1)

    from spn.algorithms.Statistics import get_structure_stats
    from spn.io.Text import spn_to_str_equation
    from spn.algorithms.Inference import log_likelihood

    print(get_structure_stats(cnet_naive_mle))
    print(spn_to_str_equation(cnet_naive_mle))
    ll = log_likelihood(cnet_naive_mle, train_data)
    print(np.mean(ll))
    print(get_structure_stats(cnet_random))
    print(spn_to_str_equation(cnet_random))
    ll = log_likelihood(cnet_random, train_data)
    print(np.mean(ll))

    from spn.algorithms.MPE import mpe

    train_data_mpe = train_data.astype(float)
    train_data_mpe[:, 0] = np.nan
    print(mpe(cnet_random, train_data_mpe)[:30])

    ll = log_likelihood(cnet_random, train_data_mpe)
    print(np.mean(ll))
Exemplo n.º 6
0
    def test_sum(self):
        spn = 0.5 * Gaussian(0.0, 1.0, scope=0) + 0.5 * Gaussian(
            2.0, 1.0, scope=0)

        spn_text = "(0.5*(Gaussian(V0|mean=0.0;stdev=1.0)) + 0.5*(Gaussian(V0|mean=2.0;stdev=1.0)))"

        self.assertEqual(spn_to_str_equation(spn), spn_text)
Exemplo n.º 7
0
def save_exp(spn, ds_name, size, words, data):
    print(get_structure_stats(spn))

    path = os.path.dirname(__file__)
    outprefix = path + "/spns/%s_%s/" % (ds_name, size)

    if not os.path.exists(outprefix):
        os.makedirs(outprefix)

    with open(outprefix + "eqq.txt", "w") as text_file:
        print(spn_to_str_equation(spn, words), file=text_file)

    with open(outprefix + "spn.txt", "w") as text_file:
        print(spn_to_str_ref_graph(spn, words), file=text_file)

    with codecs.open(outprefix + "spn.json", "w", "utf-8-sig") as text_file:
        text_file.write(to_JSON(spn))

    with codecs.open(outprefix + "stats.txt", "w", "utf-8-sig") as text_file:
        text_file.write(get_structure_stats(spn))
        text_file.write("\n")
        text_file.write("ads=%s \t muls=%s\n" % fpga_count_ops(spn))

    np.savetxt(outprefix + "all_data.txt",
               data,
               delimiter=";",
               header=";".join(words))
Exemplo n.º 8
0
def spn_to_ete(spn, context=None, unroll=False, symbols=_symbols):
    assert spn is not None

    tree = Tree()
    tree.id = spn.id
    tree.node_type = type(spn)
    tree.name = symbols.get(tree.node_type, spn.name)

    queue = []

    if not isinstance(spn, Leaf):
        for i, child in enumerate(spn.children):
            if unroll:
                if child in queue:
                    return "-> " + spn.id
                else:
                    queue.append(child)
            c = spn_to_ete(child, context=context, unroll=unroll)
            if isinstance(spn, Sum):
                c.support = spn.weights[i]
            tree.add_child(c)
    else:
        feature_names = None
        if context is not None:
            feature_names = context.feature_names

        try:
            tree.name = spn_to_str_equation(spn, feature_names=feature_names)
        except:
            if feature_names is None:
                feature_names = []
            tree.name += "(%s)" % ",".join(feature_names)

    return tree
Exemplo n.º 9
0
    def test_spn(self):
        spn = 0.4 * (Gaussian(0.0, 1.0, scope=0) * Gaussian(2.0, 3.0, scope=1)) + \
              0.6 * (Gaussian(4.0, 5.0, scope=0) * Gaussian(6.0, 7.0, scope=1))

        spn_text = "(0.4*((Gaussian(V0|mean=0.0;stdev=1.0) * Gaussian(V1|mean=2.0;stdev=3.0))) + " + \
                   "0.6*((Gaussian(V0|mean=4.0;stdev=5.0) * Gaussian(V1|mean=6.0;stdev=7.0))))"

        self.assertEqual(spn_to_str_equation(spn), spn_text)
Exemplo n.º 10
0
def learn_CLTSPN():
    import numpy as np

    np.random.seed(123)

    train_data = np.random.binomial(
        1, [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.1], size=(100, 10))
    print(np.mean(train_data, axis=0))

    from spn.structure.leaves.cltree.CLTree import create_cltree_leaf
    from spn.structure.Base import Context
    from spn.structure.leaves.parametric.Parametric import Bernoulli

    ds_context = Context(parametric_types=[
        Bernoulli,
        Bernoulli,
        Bernoulli,
        Bernoulli,
        Bernoulli,
        Bernoulli,
        Bernoulli,
        Bernoulli,
        Bernoulli,
        Bernoulli,
    ]).add_domains(train_data)

    from spn.algorithms.LearningWrappers import learn_parametric

    spn = learn_parametric(
        train_data,
        ds_context,
        min_instances_slice=20,
        min_features_slice=1,
        multivariate_leaf=True,
        leaves=create_cltree_leaf,
    )

    from spn.algorithms.Statistics import get_structure_stats

    print(get_structure_stats(spn))

    from spn.io.Text import spn_to_str_equation

    print(spn_to_str_equation(spn))

    from spn.algorithms.Inference import log_likelihood

    ll = log_likelihood(spn, train_data)
    print(np.mean(ll))
Exemplo n.º 11
0
def to_cpp2(node):
    vartype = "double"

    spn_eqq = spn_to_str_equation(
        node,
        node_to_str={
            Histogram:
            lambda node, x, y: "leaf_node_%s(data[i][%s])" %
            (node.name, node.scope[0])
        })

    spn_function = """
    {vartype} likelihood(int i, {vartype} data[][{scope_size}]){{
        return {spn_eqq};
    }}
    """.format(vartype=vartype, scope_size=len(node.scope), spn_eqq=spn_eqq)

    init_code = ""
    leaves_functions = ""
    for l in get_nodes_by_type(node, Leaf):
        leaf_name = "leaf_node_%s" % (l.name)
        leave_function, leave_init = _leaf_to_cpp[type(l)](l, leaf_name,
                                                           vartype)

        leaves_functions += leave_function
        init_code += leave_init

    return """
#include <iostream>
#include <string>
#include <vector>
#include <boost/algorithm/string.hpp>
#include <boost/lexical_cast.hpp>
#include <iomanip>
#include <chrono>


using namespace std;

{leaves_functions}

{spn_function}

int main() 
{{

    {init_code}
 
    vector<string> lines;
    for (string line; getline(std::cin, line);) {{
        lines.push_back( line );
    }}
    
    int n = lines.size()-1;
    int f = {scope_size};
    auto data = new {vartype}[n][{scope_size}]();
    
    for(int i=0; i < n; i++){{
        std::vector<std::string> strs;
        boost::split(strs, lines[i+1], boost::is_any_of(";"));
        
        for(int j=0; j < f; j++){{
            data[i][j] = boost::lexical_cast<{vartype}>(strs[j]);
        }}
    }}
    
    auto result = new {vartype}[n];
    
    chrono::high_resolution_clock::time_point begin = chrono::high_resolution_clock::now();
    for(int j=0; j < 1000; j++){{
        for(int i=0; i < n; i++){{
            result[i] = likelihood(i, data);
        }}
    }}
    chrono::high_resolution_clock::time_point end = chrono::high_resolution_clock::now();

    delete[] data;
    
    long double avglikelihood = 0;
    for(int i=0; i < n; i++){{
        avglikelihood += log(result[i]);
        cout << setprecision(60) << log(result[i]) << endl;
    }}
    
    delete[] result;

    cout << setprecision(15) << "avg ll " << avglikelihood/n << endl;
    
    cout << "size of variables " << sizeof({vartype}) * 8 << endl;

    cout << setprecision(15)<< "time per instance " << (chrono::duration_cast<chrono::nanoseconds>(end-begin).count()  / 1000.0) /n << " ns" << endl;
    cout << setprecision(15) << "time per task " << (chrono::duration_cast<chrono::nanoseconds>(end-begin).count()  / 1000.0)  << " ns" << endl;


    return 0;
}}
    """.format(spn_function=spn_function,
               vartype=vartype,
               leaves_functions=leaves_functions,
               scope_size=len(node.scope),
               init_code=init_code)
Exemplo n.º 12
0
    scope=[0],
    init_weights=b_lf_1_init_weights)
b_lf_2_init_weights = {Gaussian: 0.3, Gamma: 0.7}
# b_lf_2_init_weights = np.array([.3, .7])
b_fat_right_leaf_2, _priors = type_mixture_leaf_factory(
    leaf_type='pm',
    leaf_meta_type=MetaType.REAL,
    type_to_param_map=pm_continuous_param_map,
    scope=[1],
    init_weights=b_lf_2_init_weights)
l_r_prod.children = [b_fat_right_leaf_1, b_fat_right_leaf_2]

#
# composing
rebuild_scopes_bottom_up(root)
assign_ids(root)
print(root)
print(spn_to_str_equation(root))

global_W = compute_global_type_weights(root)
print('GLOBAL_W', global_W)

global_W = compute_global_type_weights(root, aggr_type=True)
print('GLOBAL_W', global_W)

gw_map = compute_leaf_global_mix_weights(root)
print('G MIX W', gw_map)

part_map = compute_partition_id_map(root)
print('PARTITION MAP', part_map)
Exemplo n.º 13
0
    def test_prod(self):
        spn = Gaussian(0.0, 1.0, scope=0) * Gaussian(2.0, 1.0, scope=1)

        spn_text = "(Gaussian(V0|mean=0.0;stdev=1.0) * Gaussian(V1|mean=2.0;stdev=1.0))"

        self.assertEqual(spn_to_str_equation(spn), spn_text)
Exemplo n.º 14
0
print(np.amax(testdata))
#testdata = whiten(testdata)
print(np.amax(testdata))

testdata = testdata.astype(np.float32)

print(testdata.dtype)

ll = log_likelihood(thespn, testdata)
print(ll, np.exp(ll))

optimized_spn = optimize_tf(thespn,
                            testdata,
                            epochs=100,
                            optimizer=tf.train.RMSPropOptimizer(1e-4))
lloptimized = log_likelihood(optimized_spn, testdata)
print(lloptimized, np.exp(lloptimized))
print(np.mean(lloptimized))
print(np.mean(ll))

#If done right, first value will be better. As expected, since we optimized !

txt = spn_to_str_equation(optimized_spn)

#Uncomment if you wish to save optimized structure
'''
text_file = open("./optca.txt", "w")
text_file.write(txt)
text_file.close()
'''
Exemplo n.º 15
0
 def check_obj_and_reconstruction(self, obj):
     self.assertEqual(
         spn_to_str_equation(obj),
         spn_to_str_equation(str_to_spn(spn_to_str_equation(obj))))
Exemplo n.º 16
0
                                      (Gamma, {'alpha': 20, 'beta': 2}),
                                      (Exponential, {'l': 5})],
                      }

    ds_context.param_form_map = type_param_map

    spn = learn_rand_spn(data,
                         ds_context,
                         min_instances_slice=500,
                         row_a=2, row_b=5,
                         col_a=2, col_b=5,
                         col_threshold=0.3,
                         memory=None, rand_gen=rand_gen)

    add_parametric_text_support()
    print(spn_to_str_equation(spn))
    print(spn.scope)

    #
    # sampling again
    X, _Z, P = sample_instances(spn, D, N, rand_gen, return_Zs=True,
                                return_partition=True, dtype=np.float64)

    #
    # visualizing
    stats = get_structure_stats_dict(spn)
    inv_leaf_map = {l.id: spn_to_str_equation(l)  # l.__class__.__name__
                    for l in get_nodes_by_type(spn, Leaf)}
    title_str = "{} samples from spn with {} sums {} prods {} leaves".format(N,
                                                                             stats['sum'],
                                                                             stats['prod'],
Exemplo n.º 17
0
def create_random_unconstrained_type_mixture_leaf(
        data,
        ds_context,
        scope,
        min_k=MIN_K_CAT,
        max_k=MAX_K_CAT,
        max_hyper_p_cat=MAX_HYPER_P_CAT,
        min_alpha=MIN_ALPHA_GAMMA,
        max_alpha=MAX_ALPHA_GAMMA):
    """
    Method to be employed by LearnSPN-like pipeline to create a type leaf, based on convext parameters
    """
    assert len(
        scope
    ) == 1, "scope of univariate histogram for more than one variable?"
    assert data.shape[1] == 1, "data has more than one feature?"

    idx = scope[0]
    rand_gen = ds_context.rand_gen
    meta_type = ds_context.meta_types[idx]
    true_type = ds_context.types[idx]
    param_map = ds_context.param_form_map[meta_type]
    priors = ds_context.priors

    allowed_param_forms = []
    for tm, t_map in param_map.items():
        for p_class, p_map in t_map.items():
            allowed_param_forms.append((p_class, p_map))
    # n_param_forms = int(np.sum([len(t_map) for tm, t_map in param_map.items()]))
    n_param_forms = len(allowed_param_forms)
    print(n_param_forms, 'meta type', meta_type, 'true type', true_type,
          'allowed forms', allowed_param_forms)

    #
    # random init weights: only 1.0 over the true type
    # rand_init_weights = np.zeros(n_param_forms)
    rand_init_weights = {}
    allowed_types = np.array([
        PARAM_FORM_TYPE_MAP[p_c] == true_type
        for p_c, p_map in allowed_param_forms
    ],
                             dtype=bool)
    n_types = int(allowed_types.sum())
    print('Allowed types', allowed_types, n_types)
    inv_type_map = {}
    j = 0
    for i, t in enumerate(allowed_types):
        if t:
            inv_type_map[j] = i
            j += 1
    nonzero_weight_id = rand_gen.choice(n_types)
    nonzero_weight_id = inv_type_map[nonzero_weight_id]
    for j, (p_c, _p_map) in enumerate(allowed_param_forms):
        if j == nonzero_weight_id:
            rand_init_weights[p_c] = 1.0
        else:
            rand_init_weights[p_c] = 0.0
    print('Selected weights', rand_init_weights)
    assert np.array([v for v in rand_init_weights.values()]).sum() == 1.0

    #
    # random defaults
    defaults = {
        Categorical: {
            'k': rand_gen.choice(range(min_k, max_k)),
            'hyper-p': rand_gen.choice(max_hyper_p_cat) + 1
        },
        Gamma: {
            'alpha': rand_gen.choice(range(min_alpha, max_alpha))
        }
    }
    print(
        '\n\trandom default params for gamma and categorical:\n\t\t{}'.format(
            defaults))

    #
    # random parameters
    param_map = random_params_from_priors(param_map, defaults, priors,
                                          rand_gen)
    print(
        '\n\trandom default params for gamma and categorical:\n\t\t{}'.format(
            defaults))

    leaf, _leaf_prior = type_mixture_leaf_factory(
        leaf_type='pm',
        leaf_meta_type=meta_type,
        type_to_param_map=param_map,
        scope=scope,
        init_weights=rand_init_weights)
    print('\nCreated random type leaf: {}'.format(spn_to_str_equation(leaf)))

    return leaf
Exemplo n.º 18
0
    def fmt_chld(w, c): return str(w) + \
                               "*(" + spn_to_str_equation(c, feature_names, node_to_str) + ")"

    children_strs = map(lambda i: fmt_chld(
Exemplo n.º 19
0
from spn.algorithms.Inference import likelihood
from spn.io.Text import str_to_spn, to_JSON, spn_to_str_equation
from spn.structure.StatisticalTypes import MetaType
from spn.structure.leaves.parametric.Inference import add_parametric_inference_support

from spn.structure.leaves.parametric.Parametric import *
from spn.structure.leaves.parametric.Text import add_parametric_text_support

if __name__ == '__main__':
    add_parametric_text_support()
    add_parametric_inference_support()


    cat = Categorical(p=[0.1, 0.2, 0.7])
    cat.scope.append(0)
    print(spn_to_str_equation(cat))
    catspn = str_to_spn(spn_to_str_equation(cat))
    print(spn_to_str_equation(catspn))

    original = Gaussian(mean=0, stdev=10)
    original.scope.append(0)
    s = spn_to_str_equation(original)
    print(s)
    recovered = str_to_spn(s)

    print(str_to_spn("Gaussian(V0|mean=1;stdev=10)"))

    gamma = Gamma(alpha=1, beta=2)
    gamma.scope.append(0)
    print(spn_to_str_equation(gamma))
Exemplo n.º 20
0
 def to_text(self):
     return spn_to_str_equation(self.spn)
Exemplo n.º 21
0
                         row_b=args.beta_rows[1],
                         col_a=args.beta_cols[0],
                         col_b=args.beta_cols[1],
                         col_threshold=args.col_split_threshold,
                         memory=None,
                         rand_gen=rand_gen)

    rebuild_scopes_bottom_up(spn)
    assign_ids(spn)
    learn_end_t = perf_counter()

    stats = get_structure_stats_dict(spn)
    logging.info('\n\nLearned spn in {} with stats:\n\t{}'.format(
        learn_end_t - learn_start_t, stats))

    print(spn_to_str_equation(spn))
    print(spn.scope)

    #
    # storing the spn on file
    spn_output_path = os.path.join(out_path, 'spn.model.pkl')
    store_start_t = perf_counter()
    with open(spn_output_path, 'wb') as f:
        pickle.dump(spn, f)
    store_end_t = perf_counter()
    logging.info('Stored spn to {} (in {} secs)'.format(
        spn_output_path, store_end_t - store_start_t))

    #
    # actual sampling, generating the data
    # returning a partition matrix (discarding the Zs?)
Exemplo n.º 22
0
                                  )

    learn_end_t = perf_counter()
    learning_time = learn_end_t - learn_start_t
    logging.info(
        '\n\nLearned spn in {} secs\n\t with stats:\n\t{}'.format(learning_time, get_structure_stats_dict2(spn)))

    dump_obj(out_path, 'spn.model.pkl', spn)

    add_typed_leaves_text_support()
    add_parametric_inference_support()
    add_histogram_inference_support()
    add_histogram_text_support()
    add_piecewise_text_support()
    add_piecewise_inference_support()
    logging.info(spn_to_str_equation(spn))
    logging.info(spn.scope)

    infer_start_t = perf_counter()

    infer_end_t = perf_counter()
    #print('Done in {}'.format(infer_end_t - infer_start_t))

    samples = []
    sample = {}

    if X_miss is None:
        X = X_train
        logging.info('Training on original train')
    else:
        logging.info('Training on train split with MISSING VALUES')
Exemplo n.º 23
0
if __name__ == '__main__':
    add_parametric_inference_support()
    add_parametric_text_support()

    np.random.seed(42)
    data = np.random.randint(low=0, high=3, size=600).reshape(-1, 3)

    #print(data)

    ds_context = Context(
        meta_types=[MetaType.DISCRETE, MetaType.DISCRETE, MetaType.DISCRETE])
    ds_context.add_domains(data)
    ds_context.parametric_types = [Poisson, Poisson, Categorical]

    spn = Sum()

    for label, count in zip(*np.unique(data[:, 2], return_counts=True)):
        branch = learn_parametric(data[data[:, 2] == label, :],
                                  ds_context,
                                  min_instances_slice=10000)
        spn.children.append(branch)
        spn.weights.append(count / data.shape[0])

    spn.scope.extend(branch.scope)

    print(spn)

    print(spn_to_str_equation(spn))

    print(log_likelihood(spn, data))