def __init__(self, vtree_filename, sdd_filename): # Load the Sdd, convert to psdd vtree = Vtree.read(vtree_filename) manager = SddManager(vtree) alpha = io.sdd_read(sdd_filename, manager) pmanager = PSddManager(vtree) # Storing psdd self.beta = pmanager.copy_and_normalize_sdd(alpha, vtree)
def run_test(vtree_filename,sdd_filename,seed=0,enum_models=0): # READ SDD with Timer("reading vtree and sdd"): vtree = Vtree.read(vtree_filename) manager = SddManager(vtree) alpha = io.sdd_read(sdd_filename,manager) # CONVERT TO PSDD with Timer("converting to psdd"): pmanager = PSddManager(vtree) beta = pmanager.copy_and_normalize_sdd(alpha,vtree) prior = UniformSmoothing(2.0) #prior.initialize_psdd(beta) Prior.random_parameters(beta,seed=seed) # PRINT SOME STATS print "================================" print " sdd model count: %s" % fmt(alpha.model_count(vtree)) print " sdd count: %s" % fmt(alpha.count()) print " sdd size: %s" % fmt(alpha.size()) print "================================" print "psdd model count: %s" % fmt(beta.model_count()) print " psdd count: %s" % fmt(beta.count()) print " psdd size: %s" % fmt(beta.size()) print "================================" print " theta count: %s" % fmt(beta.theta_count()) print " zero count: %s" % fmt(beta.zero_count()) print " true count: %s" % fmt(beta.true_count()) if beta.vtree.var_count <= 10: print beta.as_table() mpe_val,mpe_inst = beta.mpe() mpe_val = mpe_val if beta.is_false_sdd else mpe_val/beta.theta_sum print "mpe: %s %.8f" % (mpe_inst,mpe_val) if enum_models: models = [] with Timer("enumerating %d models" % enum_models): for model in beta.enumerate_mpe(pmanager): models.append(model) if len(models) >= enum_models: break for model in models[:10]: print model print "%d models (%d max)" % (len(models),10) """ with Timer("evaluating %d models" % enum_models): for model in models: if not alpha.is_model(model): print "error: non-model", model if not alpha._is_bits_and_data_clear(): # random check print "error: bits or data not clear" """ return beta,pmanager
def run_test(vtree_filename,sdd_filename,seed=0,enum_models=0): # READ SDD with Timer("reading vtree and sdd"): vtree = Vtree.read(vtree_filename) manager = SddManager(vtree) alpha = io.sdd_read(sdd_filename,manager) # CONVERT TO PSDD with Timer("converting to two psdds"): pmanager1 = PSddManager(vtree) pmanager2 = PSddManager(vtree) beta = pmanager1.copy_and_normalize_sdd(alpha,vtree) gamma = pmanager2.copy_and_normalize_sdd(alpha,vtree) #prior = DirichletPrior(2.0) prior = UniformSmoothing(1.0) prior.initialize_psdd(beta) Prior.random_parameters(gamma,seed=(seed+1)) # PRINT SOME STATS print("================================") print(" sdd model count: %s" % fmt(alpha.model_count(vtree))) print(" sdd count: %s" % fmt(alpha.count())) print(" sdd size: %s" % fmt(alpha.size())) print("================================") print("psdd model count: %s" % fmt(beta.model_count())) print(" psdd count: %s" % fmt(beta.count())) print(" psdd size: %s" % fmt(beta.size())) print("================================") print(" theta count: %s" % fmt(beta.theta_count())) print(" zero count: %s" % fmt(beta.zero_count())) print(" true count: %s" % fmt(beta.true_count())) if beta.vtree.var_count <= PSddNode._brute_force_limit: print("=== beta ===") print(beta.as_table()) print("=== gamma ===") print(gamma.as_table()) print("=== end ===") print("brute force:") print("kl(beta,gamma) = %.8g" % beta.kl_psdd_brute_force(gamma)) print("kl(gamma,beta) = %.8g" % gamma.kl_psdd_brute_force(beta)) print("kl(beta,beta) = %.8g" % beta.kl_psdd_brute_force(beta)) print("kl(gamma,gamma) = %.8g" % gamma.kl_psdd_brute_force(gamma)) print("compute:") print("kl(beta,gamma) = %.8g" % beta.kl_psdd(gamma)) print("kl(gamma,beta) = %.8g" % gamma.kl_psdd(beta)) print("kl(beta,beta) = %.8g" % beta.kl_psdd(beta)) print("kl(gamma,gamma) = %.8g" % gamma.kl_psdd(gamma)) print("compute:") print("kl(beta,gamma) = %.8g" % beta.kl_psdd_alt(gamma)) print("kl(gamma,beta) = %.8g" % gamma.kl_psdd_alt(beta)) print("kl(beta,beta) = %.8g" % beta.kl_psdd_alt(beta)) print("kl(gamma,gamma) = %.8g" % gamma.kl_psdd_alt(gamma)) ess = 2.0 prior = UniformSmoothing(ess) print("log prior (ess=%.8f,mc=%d):" % (ess,beta.model_count())) if beta.vtree.var_count <= PSddNode._brute_force_limit: print("method 1 = %.8g" % prior.log_prior_brute_force(beta)) print("method 2 = %.8g" % prior.log_prior(beta)) return beta,pmanager1
from pypsdd import Vtree,SddManager,PSddManager,io from pypsdd import DataSet,Prior,DirichletPrior,UniformSmoothing k = 50 # number of training sets Ns = range(8,13) # dataset sizes vtree_filename = "pypsdd/tests/examples/example.vtree" sdd_filename = "pypsdd/tests/examples/example.sdd" print "reading vtree and sdd ..." vtree = Vtree.read(vtree_filename) manager = SddManager(vtree) alpha = io.sdd_read(sdd_filename,manager) print "converting to two psdds ..." pmanager1 = PSddManager(vtree) pmanager2 = PSddManager(vtree) beta = pmanager1.copy_and_normalize_sdd(alpha,vtree) gamma = pmanager2.copy_and_normalize_sdd(alpha,vtree) Prior.random_parameters(beta) # randomly parameterize beta print "simulating datasets from beta ..." # for each N, simulate a set of k datasets train_sets = [ [DataSet.simulate(beta,2**N) for i in xrange(k)] for N in Ns ] print "running learning experiments ..." results = [] prior = DirichletPrior(2.0) for batch in train_sets: # for each N in Ns batch_results = [] for train in batch: # for each training set in batch
def run_test(vtree_filename, sdd_filename, N=1024, seed=0, print_models=True, test_learning=True): # READ SDD with Timer("reading vtree and sdd"): vtree = Vtree.read(vtree_filename) manager = SddManager(vtree) alpha = io.sdd_read(sdd_filename, manager) # CONVERT TO PSDD with Timer("converting to psdd"): pmanager = PSddManager(vtree) beta = pmanager.copy_and_normalize_sdd(alpha, vtree) if test_learning: # SIMULATE DATASETS with Timer("simulating datasets"): #prior = DirichletPrior(2.0) prior = UniformSmoothing(1024.0) prior.initialize_psdd(beta) training = DataSet.simulate(beta, N, seed=seed) testing = DataSet.simulate(beta, N, seed=(seed + 1)) # LEARN A PSDD with Timer("learning complete data"): beta.learn(training, prior) with Timer("evaluate log likelihood"): train_ll = beta.log_likelihood(training) / training.N test_ll = beta.log_likelihood(testing) / testing.N # PRINT SOME STATS print "================================" print " sdd model count: %s" % fmt(alpha.model_count(vtree)) print " sdd count: %s" % fmt(alpha.count()) print " sdd size: %s" % fmt(alpha.size()) print "================================" print "psdd model count: %s" % fmt(beta.model_count()) print " psdd count: %s" % fmt(beta.count()) print " psdd size: %s" % fmt(beta.size()) print "================================" print " theta count: %s" % fmt(beta.theta_count()) print " zero count: %s" % fmt(beta.zero_count()) print " true count: %s" % fmt(beta.true_count()) if test_learning: print "================================" print " training size: %d" % training.N print " testing size: %d" % testing.N print " unique training: %d" % len(training) print " unique testing: %d" % len(testing) print "================================" print " training ll: %.8f" % train_ll print " testing ll: %.8f" % test_ll print "================================" print training value = beta.value() print "================================" print " p(T) value: %.8f" % beta.value() e_inst = Inst.from_literal(1, pmanager.var_count) pval = beta.value(evidence=e_inst) e_inst = Inst.from_literal(-1, pmanager.var_count) nval = beta.value(evidence=e_inst) print "p(a)+p(~a) value: %.8f" % (pval + nval) print " p(a) value: %.8f" % pval print " p(~a) value: %.8f" % nval if value: print " probability: %.8f" % beta.probability(evidence=e_inst) var_marginals = beta.marginals() value = var_marginals[0] check = True for var in xrange(1, pmanager.var_count + 1): e_inst = Inst.from_literal(1, pmanager.var_count) pval = beta.value(evidence=e_inst) e_inst = Inst.from_literal(-1, pmanager.var_count) nval = beta.value(evidence=e_inst) if abs(pval + nval - value) > 1e-8: check = False assert check print " marginals check: %s" % ("ok" if check else "NOT OK") inst = InstMap() inst[1] = 1 inst[pmanager.var_count] = 0 var_marginals = beta.marginals(evidence=inst) value = var_marginals[0] check = True for var in xrange(2, pmanager.var_count): inst[var] = 1 pval = beta.value(evidence=inst) inst[var] = 0 nval = beta.value(evidence=inst) del inst[var] if abs(pval + nval - value) > 1e-8: check = False assert check print " marginals check: %s" % ("ok" if check else "NOT OK") return beta, pmanager
from pypsdd import Vtree, SddManager, PSddManager, io from pypsdd import DataSet, Prior, DirichletPrior, UniformSmoothing k = 50 # number of training sets Ns = range(8, 13) # dataset sizes vtree_filename = "pypsdd/tests/examples/example.vtree" sdd_filename = "pypsdd/tests/examples/example.sdd" print "reading vtree and sdd ..." vtree = Vtree.read(vtree_filename) manager = SddManager(vtree) alpha = io.sdd_read(sdd_filename, manager) print "converting to two psdds ..." pmanager1 = PSddManager(vtree) pmanager2 = PSddManager(vtree) beta = pmanager1.copy_and_normalize_sdd(alpha, vtree) gamma = pmanager2.copy_and_normalize_sdd(alpha, vtree) Prior.random_parameters(beta) # randomly parameterize beta print "simulating datasets from beta ..." # for each N, simulate a set of k datasets train_sets = [[DataSet.simulate(beta, 2**N) for i in xrange(k)] for N in Ns] print "running learning experiments ..." results = [] prior = DirichletPrior(2.0) for batch in train_sets: # for each N in Ns batch_results = [] for train in batch: # for each training set in batch
def run_test(vtree_filename,sdd_filename,seed=0,enum_models=0): # READ SDD with Timer("reading vtree and sdd"): vtree = Vtree.read(vtree_filename) manager = SddManager(vtree) alpha = io.sdd_read(sdd_filename,manager) # CONVERT TO PSDD with Timer("converting to two psdds"): pmanager1 = PSddManager(vtree) pmanager2 = PSddManager(vtree) beta = pmanager1.copy_and_normalize_sdd(alpha,vtree) gamma = pmanager2.copy_and_normalize_sdd(alpha,vtree) #prior = DirichletPrior(2.0) prior = UniformSmoothing(1.0) prior.initialize_psdd(beta) Prior.random_parameters(gamma,seed=(seed+1)) # PRINT SOME STATS print "================================" print " sdd model count: %s" % fmt(alpha.model_count(vtree)) print " sdd count: %s" % fmt(alpha.count()) print " sdd size: %s" % fmt(alpha.size()) print "================================" print "psdd model count: %s" % fmt(beta.model_count()) print " psdd count: %s" % fmt(beta.count()) print " psdd size: %s" % fmt(beta.size()) print "================================" print " theta count: %s" % fmt(beta.theta_count()) print " zero count: %s" % fmt(beta.zero_count()) print " true count: %s" % fmt(beta.true_count()) if beta.vtree.var_count <= PSddNode._brute_force_limit: print "=== beta ===" print beta.as_table() print "=== gamma ===" print gamma.as_table() print "=== end ===" print "brute force:" print "kl(beta,gamma) = %.8g" % beta.kl_psdd_brute_force(gamma) print "kl(gamma,beta) = %.8g" % gamma.kl_psdd_brute_force(beta) print "kl(beta,beta) = %.8g" % beta.kl_psdd_brute_force(beta) print "kl(gamma,gamma) = %.8g" % gamma.kl_psdd_brute_force(gamma) print "compute:" print "kl(beta,gamma) = %.8g" % beta.kl_psdd(gamma) print "kl(gamma,beta) = %.8g" % gamma.kl_psdd(beta) print "kl(beta,beta) = %.8g" % beta.kl_psdd(beta) print "kl(gamma,gamma) = %.8g" % gamma.kl_psdd(gamma) print "compute:" print "kl(beta,gamma) = %.8g" % beta.kl_psdd_alt(gamma) print "kl(gamma,beta) = %.8g" % gamma.kl_psdd_alt(beta) print "kl(beta,beta) = %.8g" % beta.kl_psdd_alt(beta) print "kl(gamma,gamma) = %.8g" % gamma.kl_psdd_alt(gamma) ess = 2.0 prior = UniformSmoothing(ess) print "log prior (ess=%.8f,mc=%d):" % (ess,beta.model_count()) if beta.vtree.var_count <= PSddNode._brute_force_limit: print "method 1 = %.8g" % prior.log_prior_brute_force(beta) print "method 2 = %.8g" % prior.log_prior(beta) return beta,pmanager1
def run_test(vtree_filename, sdd_filename, N=1024, seed=0): # READ SDD with Timer("reading vtree and sdd"): vtree = Vtree.read(vtree_filename) manager = SddManager(vtree) alpha = io.sdd_read(sdd_filename, manager) # CONVERT TO PSDD with Timer("converting to psdd"): pmanager = PSddManager(vtree) beta = pmanager.copy_and_normalize_sdd(alpha, vtree) #prior = DirichletPrior(2.0) prior = UniformSmoothing(1024.0) prior.initialize_psdd(beta) # SIMULATE DATASETS with Timer("drawing samples"): random.seed(seed) training, testing = [], [] for i in range(N): training.append(beta.simulate()) for i in range(N): testing.append(beta.simulate()) # SIMULATE DATASETS with Timer("drawing samples (into dict)"): random.seed(seed) training, testing = defaultdict(lambda: 1), defaultdict(lambda: 1) for i in range(N): training[tuple(beta.simulate())] += 1 for i in range(N): testing[tuple(beta.simulate())] += 1 # SIMULATE DATASETS with Timer("drawing samples new (list)"): random.seed(seed) training, testing = [], [] for i in range(N): inst = [None] * (manager.var_count + 1) training.append(beta.simulate(inst=inst)) for i in range(N): inst = [None] * (manager.var_count + 1) testing.append(beta.simulate(inst=inst)) # SIMULATE DATASETS """ with Timer("drawing samples new (map)"): random.seed(seed) training,testing = [],[] for i in xrange(N): training.append(beta.simulate()) for i in xrange(N): testing.append(beta.simulate()) """ # SIMULATE DATASETS with Timer("simulating datasets"): training = DataSet.simulate(beta, N, seed=seed) testing = DataSet.simulate(beta, N, seed=(seed + 1)) # PRINT SOME STATS print("================================") print(" sdd model count: %s" % fmt(alpha.model_count(vtree))) print(" sdd count: %s" % fmt(alpha.count())) print(" sdd size: %s" % fmt(alpha.size())) print("================================") print("psdd model count: %s" % fmt(beta.model_count())) print(" psdd count: %s" % fmt(beta.count())) print(" psdd size: %s" % fmt(beta.size())) print("================================") print(" theta count: %s" % fmt(beta.theta_count())) print(" zero count: %s" % fmt(beta.zero_count())) print(" true count: %s" % fmt(beta.true_count())) print("================================") print(" training size: %d" % training.N) print(" testing size: %d" % testing.N) print(" unique training: %d" % len(training)) print(" unique testing: %d" % len(testing)) if manager.var_count <= PSddNode._brute_force_limit: pass return beta, manager
def run_test(vtree_filename,sdd_filename,N=1024,seed=0, print_models=True,test_learning=True): # READ SDD with Timer("reading vtree and sdd"): vtree = Vtree.read(vtree_filename) manager = SddManager(vtree) alpha = io.sdd_read(sdd_filename,manager) # CONVERT TO PSDD with Timer("converting to psdd"): pmanager = PSddManager(vtree) beta = pmanager.copy_and_normalize_sdd(alpha,vtree) if test_learning: # SIMULATE DATASETS with Timer("simulating datasets"): #prior = DirichletPrior(2.0) prior = UniformSmoothing(1024.0) prior.initialize_psdd(beta) training = DataSet.simulate(beta,N,seed=seed) testing = DataSet.simulate(beta,N,seed=(seed+1)) # LEARN A PSDD with Timer("learning complete data"): beta.learn(training,prior) with Timer("evaluate log likelihood"): train_ll = beta.log_likelihood(training)/training.N test_ll = beta.log_likelihood(testing)/testing.N # PRINT SOME STATS print "================================" print " sdd model count: %s" % fmt(alpha.model_count(vtree)) print " sdd count: %s" % fmt(alpha.count()) print " sdd size: %s" % fmt(alpha.size()) print "================================" print "psdd model count: %s" % fmt(beta.model_count()) print " psdd count: %s" % fmt(beta.count()) print " psdd size: %s" % fmt(beta.size()) print "================================" print " theta count: %s" % fmt(beta.theta_count()) print " zero count: %s" % fmt(beta.zero_count()) print " true count: %s" % fmt(beta.true_count()) if test_learning: print "================================" print " training size: %d" % training.N print " testing size: %d" % testing.N print " unique training: %d" % len(training) print " unique testing: %d" % len(testing) print "================================" print " training ll: %.8f" % train_ll print " testing ll: %.8f" % test_ll print "================================" print training value = beta.value() print "================================" print " p(T) value: %.8f" % beta.value() e_inst = Inst.from_literal(1,pmanager.var_count) pval = beta.value(evidence=e_inst) e_inst = Inst.from_literal(-1,pmanager.var_count) nval = beta.value(evidence=e_inst) print "p(a)+p(~a) value: %.8f" % (pval+nval) print " p(a) value: %.8f" % pval print " p(~a) value: %.8f" % nval if value: print " probability: %.8f" % beta.probability(evidence=e_inst) var_marginals = beta.marginals() value = var_marginals[0] check = True for var in xrange(1,pmanager.var_count+1): e_inst = Inst.from_literal(1,pmanager.var_count) pval = beta.value(evidence=e_inst) e_inst = Inst.from_literal(-1,pmanager.var_count) nval = beta.value(evidence=e_inst) if abs(pval+nval - value) > 1e-8: check = False assert check print " marginals check: %s" % ("ok" if check else "NOT OK") inst = InstMap() inst[1] = 1 inst[pmanager.var_count] = 0 var_marginals = beta.marginals(evidence=inst) value = var_marginals[0] check = True for var in xrange(2,pmanager.var_count): inst[var] = 1 pval = beta.value(evidence=inst) inst[var] = 0 nval = beta.value(evidence=inst) del inst[var] if abs(pval+nval - value) > 1e-8: check = False assert check print " marginals check: %s" % ("ok" if check else "NOT OK") return beta,pmanager