def setUp(self): # model a -> b bn = {} self.arr = [] bn[0] = BN(domain=Domain(), new_domain_variables={ 'a': [0, 1], 'b': [0, 1] }) bn[0].add_cpts([ CPT(Factor(variables=['a'], data=[0.5, 0.5]), child='a'), CPT(Factor(variables=['a', 'b'], data=[0.3, 0.7, 0.4, 0.6]), child='b') ]) self.arr.append([('a', 'b')]) bn[1] = BN(domain=Domain(), new_domain_variables={ 'a': [0, 1], 'b': [0, 1], 'c': [0, 1] }) bn[1].add_cpts([ CPT(Factor(variables=['a'], data=[0.5, 0.5]), child='a'), CPT(Factor(variables=['a', 'b'], data=[0.3, 0.7, 0.4, 0.6]), child='b'), CPT(Factor(variables=['c', 'b'], data=[0.1, 0.9, 0.2, 0.8]), child='c') ]) self.arr.append([('a', 'b'), ('b', 'c')]) self.cbn = [CBN.from_bn(bn[i]) for i in bn.keys()]
def runTest(self): num_vars = 5 num_vals = 10 for x in xrange(num_runs): vs = dict([('V'+str(i),range(num_vals)) for i in xrange(num_vars)]) # should get some random data with 0. f = Factor(variables = vs.keys() ,data = [abs(randint(0,20)) for i in xrange(num_vals**num_vars)] #,data = [5, 0, 1, 2] #,data = [13,0,0,20] #,data = [14,15,20,16] ,domain = Domain() ,new_domain_variables=vs ,check = True) records = [] for inst in f.insts(): records.append(inst + (f[inst],)) rawdata = (vs.keys(), vs, vs.keys(), records) #print records cf = IncrementalCompactFactor(rawdata, rmin=0) #print 'old tree:' #print cf # g = Factor(variables = vs.keys() # #,data = [13,2,0,0] # ,data = [14,15,0,0] # ,domain = Domain() # ,new_domain_variables=vs # ,check = True) g = f.copy(copy_domain=True) def swap_some(x): r = random() if x == 0: if r <= 0.5: return randint(1,5) return 0 elif r <= 0.5: return 0 return x def invert_some(x): return 5-x g.map(swap_some) #print g f += g records = [] for inst in g.insts(): records.append(inst + (g[inst],)) rawdata = (vs.keys(), vs, vs.keys(), records) cf.update(rawdata) #print 'new tree:' #print cf for variables in powerset(vs.keys()): g = f.copy(copy_domain=True) g.marginalise_away(g.variables() - frozenset(variables)) self.assert_(same_factor(g,cf.makeFactor(variables),verbose=True))
def distribution_of(model): """Returns a normalised factor representing the joint instantiation of the model. """ dist = Factor(data=[1], domain=model) for f in model.factors(): dist *= f dist.broadcast(frozenset(model.variables())) dist /= dist.z() return dist
def distribution_of(model): """Returns a normalised factor representing the joint instantiation of the model. """ dist = Factor(data=[1],domain=model) for f in model.factors(): dist *= f dist.broadcast(frozenset(model.variables())) dist /= dist.z() return dist
def _main(): # values for all variables vals = 0, 1 cpts = [] for i in range(1, 9): # construct variable names vu = 'U' + str(i) vr = 'R' + str(i) vx = 'X' + str(2 * i - 1), 'X' + str(2 * i) vy = 'Y' + str(2 * i - 1), 'Y' + str(2 * i) # create input Ui CPT cpts.append( CPT(Factor([vu], [0.5, 0.5], new_domain_variables={vu: vals}), vu)) # Create CPTs for R and two X variables with dummy data if i == 1: r_parents = [vr, vu] else: r_parents = [vr, vu, 'R' + str(i - 1)] tmp_cpts = [] tmp_cpts.append( CPT(Factor(r_parents, new_domain_variables={vr: vals}), vr)) tmp_cpts.append( CPT(Factor([vu, vx[0]], new_domain_variables={vx[0]: vals}), vx[0])) tmp_cpts.append( CPT(Factor([vu, vr, vx[1]], new_domain_variables={vx[1]: vals}), vx[1])) # put in correct data for R and two X variables for cpt in tmp_cpts: data_it = cpt.parent_insts_indices() for pi in cpt.parent_insts(): out = sum(pi) % 2 data_indices = data_it.next() if out == 0: cpt._data[data_indices[0]] = 1.0 cpt._data[data_indices[1]] = 0.0 else: cpt._data[data_indices[0]] = 0.0 cpt._data[data_indices[1]] = 1.0 cpts.extend(tmp_cpts) for j, y in enumerate(vy): cpts.append( CPT( Factor([vx[j], y], [0.6, 0.4, 0.4, 0.6], new_domain_variables={y: vals}), y)) return BNM(cpts)
def rand_factor(vs): n = reduce(operator.mul, [len(vs[v]) for v in vs]) f = Factor(variables=vs.keys(), data=rand_factor_data(n), domain=Domain(), check=True, new_domain_variables=vs) return f
def log_model(p): """Find log P(I) for a factored distribution""" log_p = Factor(data=[0],domain=p) for p_fact in p: p_fact = p_fact.copy() p_fact.map(rlog) log_p += p_fact return log_p
def runTest(self): num_vars = 5 num_vals = 10 for i in xrange(num_runs): vs = dict([('V'+str(i),range(num_vals)) for i in xrange(num_vars)]) # should get some random data with 0. f = Factor(variables = vs.keys() ,data = [abs(randint(0,5)) for i in xrange(num_vals**num_vars)] ,domain = Domain() ,new_domain_variables=vs ,check = True) records = [] for inst in f.insts(): records.append(inst + (f[inst],)) rawdata = (vs.keys(), vs, vs.keys(), records) cf = IncrementalCompactFactor(rawdata) for variables in powerset(vs.keys()): g = f.copy(copy_domain=True) g.marginalise_away(g.variables() - frozenset(variables)) self.assert_(same_factor(g,cf.makeFactor(variables),verbose=True))
def rand_bn(vs, max_potential_parents=15): model = BN(domain=Domain(), new_domain_variables=vs) for child in vs.keys(): parents = list(model.variables()) too_many = len(parents) - max_potential_parents if too_many > 0: for i in xrange(too_many): parents.remove(choice(parents)) fv = rand_subset(parents) | set([child]) n = reduce(operator.mul, [len(vs[v]) for v in fv]) f = Factor(variables=fv, data=rand_factor_data(n), domain=model, check=True) cpt = CPT(f, child, True, True) model *= cpt return model
def tryModel(self, model): self.assertAlmostEquals(dkl(model,model),0) cbn = CBN.from_bn(model.copy(copy_domain=True)) v = choice(tuple(cbn.variables())) f = cbn[v] dat = rand_factor_data(len(f.data())) change_one = None for i,(a,b) in enumerate(zip(f.data(),dat)): if round(a-b,4) == 0: dat[i] += 10.0 break cbn._replace_factor( v , CPT(Factor(variables=f.variables() ,data=dat ,domain=cbn), v, cpt_force=True)) kl = dkl(model,cbn) self.assert_(kl > 0) kl_ = dkl(cbn,model) self.assert_(kl_ > 0)
def generate_dense_bn(density, num_vars=8, num_vals=3): if density > num_vars: raise RuntimeError, 'density must be less than number of variables' vars, parents = generate_dense_parents(density, num_vars) vals = dict([(var, frozenset([i for i in xrange(num_vals)])) for var in vars]) bn = BN(domain=Domain(), new_domain_variables=vals) for child in vars: if child in parents: n = num_vals**(len(parents[child]) + 1) else: n = num_vals parents[child] = frozenset() f = Factor(variables=frozenset([child]) | parents[child], data=rand_factor_data(n), domain=bn, check=True) bn *= CPT(f, child, True, True) return bn
def rand_fr(vs, min_fact=1, max_fact=10, min_fact_vars=1, max_fact_vars=10): model = FR(domain=Domain(), new_domain_variables=vs) for i in xrange(randrange(min_fact, max_fact)): fv = [] while len(fv) == 0: for j in xrange( randrange(min_fact_vars, min(max_fact_vars, len(vs.keys())))): v = choice(vs.keys()) while v in fv: v = choice(vs.keys()) fv.append(v) fv = tuple(fv) n = reduce(operator.mul, [len(vs[v]) for v in fv]) f = Factor(variables=fv, data=rand_factor_data(n), domain=model, check=True) model *= f return model
def tryModel(self, model): kl = dkl(model,model) self.assert_(is_finite(kl)) cbn = CBN.from_bn(model.copy(copy_domain=True)) v = choice(tuple(cbn.variables())) f = cbn[v] dat = rand_factor_data(len(f.data())) change_one = None for i,(a,b) in enumerate(zip(f.data(),dat)): if round(a-b,4) == 0: dat[i] += uniform(1.0,100.0) cbn._replace_factor( v , CPT(Factor(variables=f.variables() ,data=dat ,domain=cbn), v, cpt_force=True)) ikl = dkl(model,cbn) self.assert_(is_finite(ikl)) self.assert_(ikl >= kl) kl = dkl(cbn,cbn) self.assert_(is_finite(kl)) ikl_ = dkl(cbn,model) self.assert_(ikl_ >= kl)
from gPy.Examples import minibn, asia from gPy.Models import FR, BN from gPy.Parameters import Factor, CPT from gPy.Variables import Domain from random import choice, randrange, uniform, shuffle import operator, unittest, pickle xor = BN(domain=Domain(), new_domain_variables={ 'a': [0, 1], 'b': [0, 1], 'c': [0, 1] }) xor.add_cpts([ CPT(Factor(variables=['a'], data=[0.5, 0.5]), child='a'), CPT(Factor(variables=['b'], data=[0.5, 0.5]), child='b'), CPT(Factor(variables=['c', 'a', 'b'], data=[1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0]), child='c') ]) cbn_small_names = ['xor', 'minibn', 'asia'] cbn_small_test_cases = [xor, minibn, asia] cbn_large_names = ['alarm', 'insurance', 'carpo'] try: # load the pickled large Bayes nets. cbn_large_test_cases = map( lambda fn: pickle.load(open('networks/' + fn + '_bn.pck', 'r')), cbn_large_names) except: cbn_large_names = [] cbn_large_test_cases = []
def disp(fn, samples): f = open(fn, 'w') fact = samples.makeFactor(samples.variables()) for var in fact.variables(): print >> f, var, print >> f, 'count' for inst in fact.insts(): for i in inst: print >> f, i, print >> f, fact[inst] f.close() bn0 = BN(domain=Domain(), new_domain_variables={'a': [0, 1], 'b': [0, 1]}) bn0.add_cpts([ CPT(Factor(variables=['a'], data=[0.5, 0.5]), child='a'), CPT(Factor(variables=['a', 'b'], data=[0.3, 0.7, 0.4, 0.6]), child='b') ]) w = CausalWorld(bn0) samples = w.observe(10000) disp('two_depend', samples) bn1 = BN(domain=Domain(), new_domain_variables={'a': [0, 1], 'b': [0, 1]}) bn1.add_cpts([ CPT(Factor(variables=['a'], data=[0.5, 0.5]), child='a'), CPT(Factor(variables=['b'], data=[0.3, 0.7]), child='b') ]) w = CausalWorld(bn1) samples = w.observe(10000) disp('two_independ', samples)
binvals = (0, 1) from gPy.Variables import declare_variable from gPy.Parameters import Factor from gibbs import gibbs_sample from gPy.Models import FR for i in range(10): for j in range(10): declare_variable((i, j), binvals) x = 1 y = 5 data = [ x, # 0,0 y, # 0,1 y, # 1,0 x # 1,1 ] factors = [] for i in range(10): for j in range(10): factors.append(Factor(((i, j), ((i + 1) % 10, j)), data)) factors.append(Factor(((i, j), (i, (j + 1) % 10)), data)) fr = FR(factors) sample = gibbs_sample(fr, 100, 0) cPickle.dump(sample, open(sys.argv[1], 'w'))