Exemplo n.º 1
0
 def __init__(self, vtree_filename, sdd_filename):
     # Load the Sdd, convert to psdd
     vtree = Vtree.read(vtree_filename)
     manager = SddManager(vtree)
     alpha = io.sdd_read(sdd_filename, manager)
     pmanager = PSddManager(vtree)
     # Storing psdd
     self.beta = pmanager.copy_and_normalize_sdd(alpha, vtree)
Exemplo n.º 2
0
def run_test(vtree_filename,sdd_filename,seed=0,enum_models=0):

    # READ SDD
    with Timer("reading vtree and sdd"):
        vtree = Vtree.read(vtree_filename)
        manager = SddManager(vtree)
        alpha = io.sdd_read(sdd_filename,manager)

    # CONVERT TO PSDD
    with Timer("converting to psdd"):
        pmanager = PSddManager(vtree)
        beta = pmanager.copy_and_normalize_sdd(alpha,vtree)
        prior = UniformSmoothing(2.0)
        #prior.initialize_psdd(beta)
        Prior.random_parameters(beta,seed=seed)

    # PRINT SOME STATS
    print "================================"
    print " sdd model count: %s" % fmt(alpha.model_count(vtree))
    print "       sdd count: %s" % fmt(alpha.count())
    print "        sdd size: %s" % fmt(alpha.size())
    print "================================"
    print "psdd model count: %s" % fmt(beta.model_count())
    print "      psdd count: %s" % fmt(beta.count())
    print "       psdd size: %s" % fmt(beta.size())
    print "================================"
    print "     theta count: %s" % fmt(beta.theta_count())
    print "      zero count: %s" % fmt(beta.zero_count())
    print "      true count: %s" % fmt(beta.true_count())

    if beta.vtree.var_count <= 10:
        print beta.as_table()
    mpe_val,mpe_inst = beta.mpe()
    mpe_val = mpe_val if beta.is_false_sdd else mpe_val/beta.theta_sum
    print "mpe: %s %.8f" % (mpe_inst,mpe_val)

    if enum_models:
        models = []
        with Timer("enumerating %d models" % enum_models):
            for model in beta.enumerate_mpe(pmanager):
                models.append(model)
                if len(models) >= enum_models: break

        for model in models[:10]:
            print model
        print "%d models (%d max)" % (len(models),10)

        """
        with Timer("evaluating %d models" % enum_models):
            for model in models:
                if not alpha.is_model(model):
                    print "error: non-model", model
                if not alpha._is_bits_and_data_clear(): # random check
                    print "error: bits or data not clear"
        """
    return beta,pmanager
Exemplo n.º 3
0
def run_test(vtree_filename,sdd_filename,\
             print_models=10,count_models=100,enum_models=0):
    # READ SDD
    with Timer("reading vtree and sdd"):
        vtree = Vtree.read(vtree_filename)
        manager = SddManager(vtree)
        alpha = io.sdd_read(sdd_filename,manager)

    with Timer("counting %d models" % count_models):
        for i in range(count_models):
            alpha.model_count(vtree)

    # PRINT SOME STATS
    print("================================")
    print(" sdd model count: %s" % fmt(alpha.model_count(vtree)))
    print("       sdd count: %s" % fmt(alpha.count()))
    print("        sdd size: %s" % fmt(alpha.size()))

    if print_models:
        models = []
        with Timer("enumerating models"):
            for model in alpha.models(vtree,lexical=True):
                st = "".join( str(val) for var,val in model )
                models.append(st)
                if len(models) >= print_models: break
        for model in models:
            print(model)
        print("%d models (%d max)" % (len(models),print_models))

    if enum_models:
        models = []

        with Timer("enumerating %d models" % enum_models):
            for model in alpha.models(vtree):
                models.append(model)
                if len(models) >= enum_models: break

        with Timer("evaluating %d models" % enum_models):
            for model in models:
                if not alpha.is_model(model):
                    print("error: non-model", model)
                if not alpha._is_bits_and_data_clear(): # random check
                    print("error: bits or data not clear")

    return alpha,manager
Exemplo n.º 4
0
def run_test(vtree_filename,sdd_filename,\
             print_models=10,count_models=100,enum_models=0):
    # READ SDD
    with Timer("reading vtree and sdd"):
        vtree = Vtree.read(vtree_filename)
        manager = SddManager(vtree)
        alpha = io.sdd_read(sdd_filename,manager)

    with Timer("counting %d models" % count_models):
        for i in xrange(count_models):
            alpha.model_count(vtree)

    # PRINT SOME STATS
    print "================================"
    print " sdd model count: %s" % fmt(alpha.model_count(vtree))
    print "       sdd count: %s" % fmt(alpha.count())
    print "        sdd size: %s" % fmt(alpha.size())

    if print_models:
        models = []
        with Timer("enumerating models"):
            for model in alpha.models(vtree,lexical=True):
                st = "".join( str(val) for var,val in model )
                models.append(st)
                if len(models) >= print_models: break
        for model in models:
            print model
        print "%d models (%d max)" % (len(models),print_models)

    if enum_models:
        models = []

        with Timer("enumerating %d models" % enum_models):
            for model in alpha.models(vtree):
                models.append(model)
                if len(models) >= enum_models: break

        with Timer("evaluating %d models" % enum_models):
            for model in models:
                if not alpha.is_model(model):
                    print "error: non-model", model
                if not alpha._is_bits_and_data_clear(): # random check
                    print "error: bits or data not clear"

    return alpha,manager
Exemplo n.º 5
0
def run_test(vtree_filename,sdd_filename,seed=0,enum_models=0):

    # READ SDD
    with Timer("reading vtree and sdd"):
        vtree = Vtree.read(vtree_filename)
        manager = SddManager(vtree)
        alpha = io.sdd_read(sdd_filename,manager)

    # CONVERT TO PSDD
    with Timer("converting to two psdds"):
        pmanager1 = PSddManager(vtree)
        pmanager2 = PSddManager(vtree)
        beta  = pmanager1.copy_and_normalize_sdd(alpha,vtree)
        gamma = pmanager2.copy_and_normalize_sdd(alpha,vtree)
        #prior = DirichletPrior(2.0)
        prior = UniformSmoothing(1.0)
        prior.initialize_psdd(beta)
        Prior.random_parameters(gamma,seed=(seed+1))

    # PRINT SOME STATS
    print("================================")
    print(" sdd model count: %s" % fmt(alpha.model_count(vtree)))
    print("       sdd count: %s" % fmt(alpha.count()))
    print("        sdd size: %s" % fmt(alpha.size()))
    print("================================")
    print("psdd model count: %s" % fmt(beta.model_count()))
    print("      psdd count: %s" % fmt(beta.count()))
    print("       psdd size: %s" % fmt(beta.size()))
    print("================================")
    print("     theta count: %s" % fmt(beta.theta_count()))
    print("      zero count: %s" % fmt(beta.zero_count()))
    print("      true count: %s" % fmt(beta.true_count()))

    if beta.vtree.var_count <= PSddNode._brute_force_limit:
        print("=== beta ===")
        print(beta.as_table())
        print("=== gamma ===")
        print(gamma.as_table())
        print("=== end ===")

        print("brute force:")
        print("kl(beta,gamma)  = %.8g" % beta.kl_psdd_brute_force(gamma))
        print("kl(gamma,beta)  = %.8g" % gamma.kl_psdd_brute_force(beta))
        print("kl(beta,beta)   = %.8g" % beta.kl_psdd_brute_force(beta))
        print("kl(gamma,gamma) = %.8g" % gamma.kl_psdd_brute_force(gamma))
    print("compute:")
    print("kl(beta,gamma)  = %.8g" % beta.kl_psdd(gamma))
    print("kl(gamma,beta)  = %.8g" % gamma.kl_psdd(beta))
    print("kl(beta,beta)   = %.8g" % beta.kl_psdd(beta))
    print("kl(gamma,gamma) = %.8g" % gamma.kl_psdd(gamma))

    print("compute:")
    print("kl(beta,gamma)  = %.8g" % beta.kl_psdd_alt(gamma))
    print("kl(gamma,beta)  = %.8g" % gamma.kl_psdd_alt(beta))
    print("kl(beta,beta)   = %.8g" % beta.kl_psdd_alt(beta))
    print("kl(gamma,gamma) = %.8g" % gamma.kl_psdd_alt(gamma))

    ess = 2.0
    prior = UniformSmoothing(ess)
    print("log prior (ess=%.8f,mc=%d):" % (ess,beta.model_count()))
    if beta.vtree.var_count <= PSddNode._brute_force_limit:
        print("method 1 = %.8g" % prior.log_prior_brute_force(beta))
    print("method 2 = %.8g" % prior.log_prior(beta))

    return beta,pmanager1
Exemplo n.º 6
0
import glob
from pylab import *

from pypsdd import Vtree,SddManager,PSddManager,io
from pypsdd import DataSet,Prior,DirichletPrior,UniformSmoothing

k = 50 # number of training sets
Ns = range(8,13) # dataset sizes
vtree_filename = "pypsdd/tests/examples/example.vtree"
sdd_filename = "pypsdd/tests/examples/example.sdd"

print "reading vtree and sdd ..."
vtree = Vtree.read(vtree_filename)
manager = SddManager(vtree)
alpha = io.sdd_read(sdd_filename,manager)

print "converting to two psdds ..."
pmanager1 = PSddManager(vtree)
pmanager2 = PSddManager(vtree)
beta  = pmanager1.copy_and_normalize_sdd(alpha,vtree)
gamma = pmanager2.copy_and_normalize_sdd(alpha,vtree)
Prior.random_parameters(beta) # randomly parameterize beta

print "simulating datasets from beta ..."
# for each N, simulate a set of k datasets
train_sets = [ [DataSet.simulate(beta,2**N) for i in xrange(k)] for N in Ns ]

print "running learning experiments ..."
results = []
prior = DirichletPrior(2.0)
Exemplo n.º 7
0
def run_test(vtree_filename,
             sdd_filename,
             N=1024,
             seed=0,
             print_models=True,
             test_learning=True):

    # READ SDD
    with Timer("reading vtree and sdd"):
        vtree = Vtree.read(vtree_filename)
        manager = SddManager(vtree)
        alpha = io.sdd_read(sdd_filename, manager)

    # CONVERT TO PSDD
    with Timer("converting to psdd"):
        pmanager = PSddManager(vtree)
        beta = pmanager.copy_and_normalize_sdd(alpha, vtree)

    if test_learning:
        # SIMULATE DATASETS
        with Timer("simulating datasets"):
            #prior = DirichletPrior(2.0)
            prior = UniformSmoothing(1024.0)
            prior.initialize_psdd(beta)
            training = DataSet.simulate(beta, N, seed=seed)
            testing = DataSet.simulate(beta, N, seed=(seed + 1))

        # LEARN A PSDD
        with Timer("learning complete data"):
            beta.learn(training, prior)

        with Timer("evaluate log likelihood"):
            train_ll = beta.log_likelihood(training) / training.N
            test_ll = beta.log_likelihood(testing) / testing.N

    # PRINT SOME STATS
    print "================================"
    print " sdd model count: %s" % fmt(alpha.model_count(vtree))
    print "       sdd count: %s" % fmt(alpha.count())
    print "        sdd size: %s" % fmt(alpha.size())
    print "================================"
    print "psdd model count: %s" % fmt(beta.model_count())
    print "      psdd count: %s" % fmt(beta.count())
    print "       psdd size: %s" % fmt(beta.size())
    print "================================"
    print "     theta count: %s" % fmt(beta.theta_count())
    print "      zero count: %s" % fmt(beta.zero_count())
    print "      true count: %s" % fmt(beta.true_count())

    if test_learning:
        print "================================"
        print "   training size: %d" % training.N
        print "    testing size: %d" % testing.N
        print " unique training: %d" % len(training)
        print "  unique testing: %d" % len(testing)
        print "================================"
        print "     training ll: %.8f" % train_ll
        print "      testing ll: %.8f" % test_ll

        print "================================"
        print training

    value = beta.value()
    print "================================"
    print "      p(T) value: %.8f" % beta.value()

    e_inst = Inst.from_literal(1, pmanager.var_count)
    pval = beta.value(evidence=e_inst)
    e_inst = Inst.from_literal(-1, pmanager.var_count)
    nval = beta.value(evidence=e_inst)
    print "p(a)+p(~a) value: %.8f" % (pval + nval)
    print "      p(a) value: %.8f" % pval
    print "     p(~a) value: %.8f" % nval
    if value:
        print "     probability: %.8f" % beta.probability(evidence=e_inst)

    var_marginals = beta.marginals()
    value = var_marginals[0]
    check = True
    for var in xrange(1, pmanager.var_count + 1):
        e_inst = Inst.from_literal(1, pmanager.var_count)
        pval = beta.value(evidence=e_inst)
        e_inst = Inst.from_literal(-1, pmanager.var_count)
        nval = beta.value(evidence=e_inst)
        if abs(pval + nval - value) > 1e-8: check = False
    assert check
    print " marginals check: %s" % ("ok" if check else "NOT OK")

    inst = InstMap()
    inst[1] = 1
    inst[pmanager.var_count] = 0
    var_marginals = beta.marginals(evidence=inst)
    value = var_marginals[0]
    check = True
    for var in xrange(2, pmanager.var_count):
        inst[var] = 1
        pval = beta.value(evidence=inst)
        inst[var] = 0
        nval = beta.value(evidence=inst)
        del inst[var]
        if abs(pval + nval - value) > 1e-8: check = False
    assert check
    print " marginals check: %s" % ("ok" if check else "NOT OK")

    return beta, pmanager
Exemplo n.º 8
0
import glob
from pylab import *

from pypsdd import Vtree, SddManager, PSddManager, io
from pypsdd import DataSet, Prior, DirichletPrior, UniformSmoothing

k = 50  # number of training sets
Ns = range(8, 13)  # dataset sizes
vtree_filename = "pypsdd/tests/examples/example.vtree"
sdd_filename = "pypsdd/tests/examples/example.sdd"

print "reading vtree and sdd ..."
vtree = Vtree.read(vtree_filename)
manager = SddManager(vtree)
alpha = io.sdd_read(sdd_filename, manager)

print "converting to two psdds ..."
pmanager1 = PSddManager(vtree)
pmanager2 = PSddManager(vtree)
beta = pmanager1.copy_and_normalize_sdd(alpha, vtree)
gamma = pmanager2.copy_and_normalize_sdd(alpha, vtree)
Prior.random_parameters(beta)  # randomly parameterize beta

print "simulating datasets from beta ..."
# for each N, simulate a set of k datasets
train_sets = [[DataSet.simulate(beta, 2**N) for i in xrange(k)] for N in Ns]

print "running learning experiments ..."
results = []
prior = DirichletPrior(2.0)
Exemplo n.º 9
0
def run_test(vtree_filename,sdd_filename,seed=0,enum_models=0):

    # READ SDD
    with Timer("reading vtree and sdd"):
        vtree = Vtree.read(vtree_filename)
        manager = SddManager(vtree)
        alpha = io.sdd_read(sdd_filename,manager)

    # CONVERT TO PSDD
    with Timer("converting to two psdds"):
        pmanager1 = PSddManager(vtree)
        pmanager2 = PSddManager(vtree)
        beta  = pmanager1.copy_and_normalize_sdd(alpha,vtree)
        gamma = pmanager2.copy_and_normalize_sdd(alpha,vtree)
        #prior = DirichletPrior(2.0)
        prior = UniformSmoothing(1.0)
        prior.initialize_psdd(beta)
        Prior.random_parameters(gamma,seed=(seed+1))

    # PRINT SOME STATS
    print "================================"
    print " sdd model count: %s" % fmt(alpha.model_count(vtree))
    print "       sdd count: %s" % fmt(alpha.count())
    print "        sdd size: %s" % fmt(alpha.size())
    print "================================"
    print "psdd model count: %s" % fmt(beta.model_count())
    print "      psdd count: %s" % fmt(beta.count())
    print "       psdd size: %s" % fmt(beta.size())
    print "================================"
    print "     theta count: %s" % fmt(beta.theta_count())
    print "      zero count: %s" % fmt(beta.zero_count())
    print "      true count: %s" % fmt(beta.true_count())

    if beta.vtree.var_count <= PSddNode._brute_force_limit:
        print "=== beta ==="
        print beta.as_table()
        print "=== gamma ==="
        print gamma.as_table()
        print "=== end ==="

        print "brute force:"
        print "kl(beta,gamma)  = %.8g" % beta.kl_psdd_brute_force(gamma)
        print "kl(gamma,beta)  = %.8g" % gamma.kl_psdd_brute_force(beta)
        print "kl(beta,beta)   = %.8g" % beta.kl_psdd_brute_force(beta)
        print "kl(gamma,gamma) = %.8g" % gamma.kl_psdd_brute_force(gamma)
    print "compute:"
    print "kl(beta,gamma)  = %.8g" % beta.kl_psdd(gamma)
    print "kl(gamma,beta)  = %.8g" % gamma.kl_psdd(beta)
    print "kl(beta,beta)   = %.8g" % beta.kl_psdd(beta)
    print "kl(gamma,gamma) = %.8g" % gamma.kl_psdd(gamma)

    print "compute:"
    print "kl(beta,gamma)  = %.8g" % beta.kl_psdd_alt(gamma)
    print "kl(gamma,beta)  = %.8g" % gamma.kl_psdd_alt(beta)
    print "kl(beta,beta)   = %.8g" % beta.kl_psdd_alt(beta)
    print "kl(gamma,gamma) = %.8g" % gamma.kl_psdd_alt(gamma)

    ess = 2.0
    prior = UniformSmoothing(ess)
    print "log prior (ess=%.8f,mc=%d):" % (ess,beta.model_count())
    if beta.vtree.var_count <= PSddNode._brute_force_limit:
        print "method 1 = %.8g" % prior.log_prior_brute_force(beta)
    print "method 2 = %.8g" % prior.log_prior(beta)

    return beta,pmanager1
Exemplo n.º 10
0
def run_test(vtree_filename, sdd_filename, N=1024, seed=0):

    # READ SDD
    with Timer("reading vtree and sdd"):
        vtree = Vtree.read(vtree_filename)
        manager = SddManager(vtree)
        alpha = io.sdd_read(sdd_filename, manager)

    # CONVERT TO PSDD
    with Timer("converting to psdd"):
        pmanager = PSddManager(vtree)
        beta = pmanager.copy_and_normalize_sdd(alpha, vtree)
        #prior = DirichletPrior(2.0)
        prior = UniformSmoothing(1024.0)
        prior.initialize_psdd(beta)

    # SIMULATE DATASETS
    with Timer("drawing samples"):
        random.seed(seed)
        training, testing = [], []
        for i in range(N):
            training.append(beta.simulate())
        for i in range(N):
            testing.append(beta.simulate())

    # SIMULATE DATASETS
    with Timer("drawing samples (into dict)"):
        random.seed(seed)
        training, testing = defaultdict(lambda: 1), defaultdict(lambda: 1)
        for i in range(N):
            training[tuple(beta.simulate())] += 1
        for i in range(N):
            testing[tuple(beta.simulate())] += 1

    # SIMULATE DATASETS
    with Timer("drawing samples new (list)"):
        random.seed(seed)
        training, testing = [], []
        for i in range(N):
            inst = [None] * (manager.var_count + 1)
            training.append(beta.simulate(inst=inst))
        for i in range(N):
            inst = [None] * (manager.var_count + 1)
            testing.append(beta.simulate(inst=inst))

    # SIMULATE DATASETS
    """
    with Timer("drawing samples new (map)"):
        random.seed(seed)
        training,testing = [],[]
        for i in xrange(N):
            training.append(beta.simulate())
        for i in xrange(N):
            testing.append(beta.simulate())
    """

    # SIMULATE DATASETS
    with Timer("simulating datasets"):
        training = DataSet.simulate(beta, N, seed=seed)
        testing = DataSet.simulate(beta, N, seed=(seed + 1))

    # PRINT SOME STATS
    print("================================")
    print(" sdd model count: %s" % fmt(alpha.model_count(vtree)))
    print("       sdd count: %s" % fmt(alpha.count()))
    print("        sdd size: %s" % fmt(alpha.size()))
    print("================================")
    print("psdd model count: %s" % fmt(beta.model_count()))
    print("      psdd count: %s" % fmt(beta.count()))
    print("       psdd size: %s" % fmt(beta.size()))
    print("================================")
    print("     theta count: %s" % fmt(beta.theta_count()))
    print("      zero count: %s" % fmt(beta.zero_count()))
    print("      true count: %s" % fmt(beta.true_count()))
    print("================================")
    print("   training size: %d" % training.N)
    print("    testing size: %d" % testing.N)
    print(" unique training: %d" % len(training))
    print("  unique testing: %d" % len(testing))

    if manager.var_count <= PSddNode._brute_force_limit:
        pass

    return beta, manager
Exemplo n.º 11
0
def run_test(vtree_filename,sdd_filename,N=1024,seed=0,
             print_models=True,test_learning=True):

    # READ SDD
    with Timer("reading vtree and sdd"):
        vtree = Vtree.read(vtree_filename)
        manager = SddManager(vtree)
        alpha = io.sdd_read(sdd_filename,manager)

    # CONVERT TO PSDD
    with Timer("converting to psdd"):
        pmanager = PSddManager(vtree)
        beta = pmanager.copy_and_normalize_sdd(alpha,vtree)

    if test_learning:
        # SIMULATE DATASETS
        with Timer("simulating datasets"):
            #prior = DirichletPrior(2.0)
            prior = UniformSmoothing(1024.0)
            prior.initialize_psdd(beta)
            training = DataSet.simulate(beta,N,seed=seed)
            testing  = DataSet.simulate(beta,N,seed=(seed+1))

        # LEARN A PSDD
        with Timer("learning complete data"):
            beta.learn(training,prior)

        with Timer("evaluate log likelihood"):
            train_ll = beta.log_likelihood(training)/training.N
            test_ll = beta.log_likelihood(testing)/testing.N

    # PRINT SOME STATS
    print "================================"
    print " sdd model count: %s" % fmt(alpha.model_count(vtree))
    print "       sdd count: %s" % fmt(alpha.count())
    print "        sdd size: %s" % fmt(alpha.size())
    print "================================"
    print "psdd model count: %s" % fmt(beta.model_count())
    print "      psdd count: %s" % fmt(beta.count())
    print "       psdd size: %s" % fmt(beta.size())
    print "================================"
    print "     theta count: %s" % fmt(beta.theta_count())
    print "      zero count: %s" % fmt(beta.zero_count())
    print "      true count: %s" % fmt(beta.true_count())

    if test_learning:
        print "================================"
        print "   training size: %d" % training.N
        print "    testing size: %d" % testing.N
        print " unique training: %d" % len(training)
        print "  unique testing: %d" % len(testing)
        print "================================"
        print "     training ll: %.8f" % train_ll
        print "      testing ll: %.8f" % test_ll

        print "================================"
        print training

    value = beta.value()
    print "================================"
    print "      p(T) value: %.8f" % beta.value()

    e_inst = Inst.from_literal(1,pmanager.var_count)
    pval = beta.value(evidence=e_inst)
    e_inst = Inst.from_literal(-1,pmanager.var_count)
    nval = beta.value(evidence=e_inst)
    print "p(a)+p(~a) value: %.8f" % (pval+nval)
    print "      p(a) value: %.8f" % pval
    print "     p(~a) value: %.8f" % nval
    if value:
        print "     probability: %.8f" % beta.probability(evidence=e_inst)

    var_marginals = beta.marginals()
    value = var_marginals[0]
    check = True
    for var in xrange(1,pmanager.var_count+1):
        e_inst = Inst.from_literal(1,pmanager.var_count)
        pval = beta.value(evidence=e_inst)
        e_inst = Inst.from_literal(-1,pmanager.var_count)
        nval = beta.value(evidence=e_inst)
        if abs(pval+nval - value) > 1e-8: check = False
    assert check
    print " marginals check: %s" % ("ok" if check else "NOT OK")

    inst = InstMap()
    inst[1] = 1
    inst[pmanager.var_count] = 0
    var_marginals = beta.marginals(evidence=inst)
    value = var_marginals[0]
    check = True
    for var in xrange(2,pmanager.var_count):
        inst[var] = 1
        pval = beta.value(evidence=inst)
        inst[var] = 0
        nval = beta.value(evidence=inst)
        del inst[var]
        if abs(pval+nval - value) > 1e-8: check = False
    assert check
    print " marginals check: %s" % ("ok" if check else "NOT OK")

    return beta,pmanager