Пример #1
0
 def setUp(self):
     # model a -> b
     bn = {}
     self.arr = []
     bn[0] = BN(domain=Domain(),
                new_domain_variables={
                    'a': [0, 1],
                    'b': [0, 1]
                })
     bn[0].add_cpts([
         CPT(Factor(variables=['a'], data=[0.5, 0.5]), child='a'),
         CPT(Factor(variables=['a', 'b'], data=[0.3, 0.7, 0.4, 0.6]),
             child='b')
     ])
     self.arr.append([('a', 'b')])
     bn[1] = BN(domain=Domain(),
                new_domain_variables={
                    'a': [0, 1],
                    'b': [0, 1],
                    'c': [0, 1]
                })
     bn[1].add_cpts([
         CPT(Factor(variables=['a'], data=[0.5, 0.5]), child='a'),
         CPT(Factor(variables=['a', 'b'], data=[0.3, 0.7, 0.4, 0.6]),
             child='b'),
         CPT(Factor(variables=['c', 'b'], data=[0.1, 0.9, 0.2, 0.8]),
             child='c')
     ])
     self.arr.append([('a', 'b'), ('b', 'c')])
     self.cbn = [CBN.from_bn(bn[i]) for i in bn.keys()]
Пример #2
0
    def runTest(self):
        # results from R version 2.6.2 (2008-02-08) ISBN 3-900051-07-0
        r_dx2 = 3453.429
        r_dp = 2.2e-16
        r_ddf = 4
        # regression (NOT known correct! NOT from R)
        r_ix2 = 1.08094376504
        r_ip = 0.58247332853
        r_idf = 2

        vars = ['a','b']
        vals = {'a': [0,1,2], 'b':[0,1,2]}
        ddata  = (vars,vals,vars,[(0,0,640),(0,1,1947),(0,2,648),(1,0,1709),(1,1,1364),(1,2,335),(2,0,0),(2,1,3357),(2,2,0)])
        iddata = (vars,vals,vars,[(0,0,994),(0,1,2359),(0,2,0),(1,0,1027),(1,1,2312),(1,2,0),(2,0,989),(2,1,2319),(2,2,0)])

        x2d = X2Separator(CompactFactor(ddata, domain=Domain()))
        x2i = X2Separator(CompactFactor(iddata, domain=Domain()))

        dp, dx2, dd = x2d.test_independ('a','b',set())
        self.assertEquals(dd,r_ddf)
        self.assertAlmostEquals(dx2,r_dx2,3)
        self.assertAlmostEquals(dp,r_dp,4)

        ip, ix2, id = x2i.test_independ('a','b',set())
        self.assertEquals(id,r_idf)
        self.assertAlmostEquals(ix2,r_ix2,4)
        self.assertAlmostEquals(ip,r_ip,4)
        # since there are more zero counts there should be fewer dof.
        self.assert_(id < dd)

        print '     x2 depend: p =',dp,'x2 =',dx2,'d =',dd
        print '!! x2 independ: p =',ip,'x2 =',ix2,'d =',id


        # regression (NOT known correct! NOT from R)
        r_dg2 = 4224.3017675
        r_dp = 0.0
        r_ddf = 4
        r_ig2 = 1.07917600592
        r_ip = 0.582988392395
        r_idf = 2

        g2d = G2Separator(CompactFactor(ddata, domain=Domain()))
        g2i = G2Separator(CompactFactor(iddata, domain=Domain()))
        dp, dg2, dd = g2d.test_independ('a','b',set())
        self.assertEquals(dd,r_ddf)
        self.assertAlmostEquals(dg2,r_dg2,4)
        self.assertAlmostEquals(dp,r_dp,4)

        ip, ig2, id = g2i.test_independ('a','b',set())
        self.assertEquals(id,r_idf)
        self.assertAlmostEquals(ig2,r_ig2,4)
        self.assertAlmostEquals(ip,r_ip,4)
        print '!!   g2 depend: p =',dp,'g2 =',dg2,'d =',dd
        print '!! g2 independ: p =',ip,'g2 =',ig2,'d =',id
Пример #3
0
    def runTest(self):
        # results from R version 2.6.2 (2008-02-08) ISBN 3-900051-07-0
        r_dx2 = 109.1547
        r_dp  = 0
        r_ddf = 1
        r_ix2 = 0.1236
        r_ip  = 0.7252
        r_idf = 1

        vars = ['a','b']
        vals = {'a': [0,1], 'b':[0,1]}
        ddata = (vars,vals,vars,[(0,0,1509), (0,1,3538), (1,0,1974), (1,1,2979)])
        iddata = (vars,vals,vars,[(0,0,1474), (0,1,3610), (1,0,1441), (1,1,3475)])

        x2d = X2Separator(CompactFactor(ddata, domain=Domain()))
        x2i = X2Separator(CompactFactor(iddata, domain=Domain()))

        dp, dx2, dd = x2d.test_independ('a','b',set())

        self.assertEquals(dd,r_ddf)
        self.assertAlmostEquals(dx2,r_dx2,4)
        self.assertAlmostEquals(dp,r_dp,4)

        ip, ix2, id = x2i.test_independ('a','b',set())

        self.assertEquals(id,r_idf)
        self.assertAlmostEquals(ix2,r_ix2,4)
        self.assertAlmostEquals(ip,r_ip,4)
        print '     x2 depend: p =',dp,'x2 =',dx2,'d =',dd
        print '   x2 independ: p =',ip,'x2 =',ix2,'d =',id

        # regression (NOT known correct! NOT from R)
        r_dg2 = 109.389800878 
        r_dp  = 0
        r_ddf = 1
        r_ig2 = 0.123551704118
        r_ip  = 0.725213854926 
        r_idf = 1

        g2d = G2Separator(CompactFactor(ddata, domain=Domain()))
        g2i = G2Separator(CompactFactor(iddata, domain=Domain()))
        dp, dg2, dd = g2d.test_independ('a','b',set())
        self.assertEquals(dd,r_ddf)
        self.assertAlmostEquals(dg2,r_dg2,4)
        self.assertAlmostEquals(dp,r_dp,4)

        ip, ig2, id = g2i.test_independ('a','b',set())
        self.assertEquals(id,r_idf)
        self.assertAlmostEquals(ig2,r_ig2,4)
        self.assertAlmostEquals(ip,r_ip,4)
        print '!!   g2 depend: p =',dp,'g2 =',dg2,'d =',dd
        print '!! g2 independ: p =',ip,'g2 =',ig2,'d =',id
Пример #4
0
 def setUp(self):
     from gPy.Variables import Domain
     self.bnm = BN(domain=Domain())  # don't use default domain
     self.bnm.from_dnet(read_dnet('Asia.dnet'))
     self.cptdict = self.bnm.factors
     # taken directly from Netica output
     self.marginals = [
         Factor((('VisitAsia'), ), [0.99, 0.01]),
         Factor((('Tuberculosis'), ), [0.9896, 0.0104]),
         Factor((('Smoking'), ), [0.5, 0.5]),
         Factor((('Cancer'), ), [0.945, 0.055]),
         Factor((('TbOrCa'), ), [0.93517, 0.064828]),
         Factor((('XRay'), ), [0.11029, 0.88971]),
         Factor((('Bronchitis'), ), [0.55, 0.45]),
         Factor((('Dyspnea'), ), [0.56403, 0.43597])
     ]
     # taken directly from Netica output
     self.cond_marginals = [
         Factor((('VisitAsia'), ), [0.95192, 0.048077]),
         Factor((('Tuberculosis'), ), [0, 1]),
         Factor((('Smoking'), ), [0.52381, 0.47619]),
         #other marginals are conditional on these values
         #Factor((('Cancer'),),
         #       [1,0]),
         #Factor((('TbOrCa'),),
         #       [0,1]),
         Factor((('XRay'), ), [0.98, 0.02]),
         Factor((('Bronchitis'), ), [0.55714, 0.44286]),
         Factor((('Dyspnea'), ), [0.21143, 0.78857])
     ]
Пример #5
0
    def runTest(self):
        samples = GibbsSampler(self._minibn_do).samples(100000)
        data = CompactFactor(samples,domain=Domain())

        p = self._minibn_do.copy(copy_domain=True)
        p.estimate_parameters(data)
        self.failUnless(same_factor(distribution_of(p), distribution_of(self._minibn_do), dp=2, verbose=True))
Пример #6
0
 def runTest(self):
     # construct factors of various sizes with no data
     for sz in xrange(6):
         vars = ['V'+str(i) for i in xrange(sz)]
         vals = dict([(v,[0,1]) for v in vars])
         data = (vars,vals,vars,[])
         for v_on in subsetn(vars, sz):
             inst = []
             for v in vars:
                 if v in v_on:
                     inst.append(1)
                 else:
                     inst.append(0)
             data[3].append(tuple(inst+[0]))
         d = CompactFactor(data,domain=Domain())
         x2 = X2Separator(d)
         g2 = G2Separator(d)
         for a,b in pairs(vars):
             for s in powerset(set(vars) - set([a,b])):
                 x2p, x2s, x2d = x2.test_independ(a, b, set(s))
                 g2p, g2s, g2d = g2.test_independ(a, b, set(s))
                 # one degree of freedom
                 self.assertEquals(x2d, 0)
                 self.assertEquals(g2d, 0)
                 # default to independent
                 self.assertEquals(x2p, 1)
                 self.assertEquals(g2p, 1)
                 # zero statistics (no data)
                 self.assertEquals(x2s, 0)
                 self.assertEquals(g2s, 0)
Пример #7
0
def rand_factor(vs):
    n = reduce(operator.mul, [len(vs[v]) for v in vs])
    f = Factor(variables=vs.keys(),
               data=rand_factor_data(n),
               domain=Domain(),
               check=True,
               new_domain_variables=vs)
    return f
Пример #8
0
 def runTest(self):
     data = CompactFactor(read_csv(open('tetrad_asia.csv')),domain=Domain())
     ci = PCCI(G2Separator(data))
     g = ICPattern(ci)
     self.assertEquals(g.shd(self._asia_pdag),5)
     self.assertEquals(self._tetrad_pdag.shd(self._asia_pdag),4)
     # I think tetrad is wrong (in terms of implementation)
     self.assertEquals(g.shd(self._tetrad_pdag),1)
Пример #9
0
    def runTest(self):
        num_vars = 5
        num_vals = 10
        for x in xrange(num_runs):
            vs = dict([('V'+str(i),range(num_vals)) for i in xrange(num_vars)])
            # should get some random data with 0.
            f = Factor(variables = vs.keys()
                    ,data = [abs(randint(0,20)) for i in xrange(num_vals**num_vars)]
                    #,data = [5, 0, 1, 2]
                    #,data = [13,0,0,20]
                    #,data = [14,15,20,16]
                    ,domain = Domain()
                    ,new_domain_variables=vs
                    ,check = True)
            records = []
            for inst in f.insts():
                records.append(inst + (f[inst],))
            rawdata = (vs.keys(), vs, vs.keys(), records)
            #print records

            cf = IncrementalCompactFactor(rawdata, rmin=0)
            #print 'old tree:'
            #print cf

#            g = Factor(variables = vs.keys()
#                    #,data = [13,2,0,0]
#                    ,data = [14,15,0,0]
#                    ,domain = Domain()
#                    ,new_domain_variables=vs
#                    ,check = True)
            g = f.copy(copy_domain=True)
            def swap_some(x):
                r = random()
                if x == 0:
                    if r <= 0.5:
                        return randint(1,5)
                    return 0
                elif r <= 0.5:
                    return 0
                return x
            def invert_some(x):
                return 5-x
            g.map(swap_some)
            #print g
            f += g
            records = []
            for inst in g.insts():
                records.append(inst + (g[inst],))
            rawdata = (vs.keys(), vs, vs.keys(), records)
            cf.update(rawdata)
            #print 'new tree:'
            #print cf

            for variables in powerset(vs.keys()):
                g = f.copy(copy_domain=True)
                g.marginalise_away(g.variables() - frozenset(variables))
                self.assert_(same_factor(g,cf.makeFactor(variables),verbose=True))
Пример #10
0
 def testdnet(self):
     from gPy.IO import read_dnet
     from gPy.Models import BN
     from gPy.Variables import Domain
     bnm = BN(domain=Domain())
     bnm.from_dnet(read_dnet('Asia.dnet'))
     self.samegraph(bnm.adg(), self.asia_adg)
     for name, cpt_in_file in self.asia_cpts.items():
         cpt = bnm[name]
         self.samecpt(cpt, cpt_in_file, cpt.child())
Пример #11
0
 def runTest(self):
     data = CompactFactor(read_csv(open('tetrad_xor.csv')),domain=Domain())
     ci = PCCI(G2Separator(data))
     print ci._ind
     for a,b in pairs(data.variables()):
         if a == 'X1' and b == 'X2' or a == 'X2' and b == 'X1':
             self.assert_(ci.has_independence(a, b))
             self.assert_(not ci.has_independence_involving(a,b,'X3'))
         else:
             print a,b
             self.assert_(not ci.has_independence(a,b))
     data = CompactFactor(read_csv(open('tetrad_xor.csv')),domain=Domain())
     ci = PCCI(G2Separator(data))
     for a,b in pairs(data.variables()):
         if a == 'X1' and b == 'X2' or a == 'X2' and b == 'X1':
             self.assert_(ci.has_independence(a, b))
             self.assert_(not ci.has_independence_involving(a,b,'X3'))
         else:
             print a,b
             self.assert_(not ci.has_independence(a,b))
Пример #12
0
    def tryModel(self, model):
        # generate some samples
        cf = []
        sampler = ForwardSampler(model)
        samples = sampler.samples(100)
        cf.append(CompactFactor(samples,domain=Domain()))
        icf = IncrementalCompactFactor(samples, domain=Domain())

        for i in xrange(10):
            samples = sampler.samples(100)
            cf.append(CompactFactor(samples,domain=Domain()))
            icf.update(samples)

            # see if the sum of the CPT Factors in cf match that of icf
            for child in model.variables():
                family = model.adg().parents(child) | set([child])
                a = icf.makeFactor(family)
                b = cf[0].makeFactor(family)
                for f in cf[1:]:
                    b += f.makeFactor(family)
                self.assert_(same_factor(a,b))
Пример #13
0
 def observe(self, num_samples, skip=0):
     """Draw C{num_samples} observational samples, separated by C{skip}
     steps of the sampler.  These samples are appended to any existing
     observational samples and then returned.
     @return: L{IncrementalCompactFactor}
     """
     samples = self._pure_sampler.samples(num_samples,skip)
     if self._data is None:
         self._data = IncrementalCompactFactor(samples,domain=Domain())
     else:
         self._data.update(samples)
     return self._data
Пример #14
0
 def __init__(self, bn,burnin=1000):
     """
     @param bn: A causal Bayesian network from which samples are drawn.
     @type bn: L{CBN}
     @param burnin: The burn in for the L{GibbsSampler} used for generating
     interventional data.
     @type burnin: int
     """
     super(CausalWorld,self).__init__(domain=Domain.copy(bn),variables=bn.variables())
     self._pure_model = bn.copy(copy_domain=True)
     self._pure_sampler = ForwardSampler(self._pure_model)
     self._burnin = burnin
     self._data = None
     self._inter_sampler = {}
     self._inter_data = {}
Пример #15
0
def rand_bn(vs, max_potential_parents=15):
    model = BN(domain=Domain(), new_domain_variables=vs)

    for child in vs.keys():
        parents = list(model.variables())
        too_many = len(parents) - max_potential_parents
        if too_many > 0:
            for i in xrange(too_many):
                parents.remove(choice(parents))

        fv = rand_subset(parents) | set([child])
        n = reduce(operator.mul, [len(vs[v]) for v in fv])
        f = Factor(variables=fv,
                   data=rand_factor_data(n),
                   domain=model,
                   check=True)
        cpt = CPT(f, child, True, True)
        model *= cpt
    return model
Пример #16
0
    def runTest(self):
        num_vars = 5
        num_vals = 10
        for i in xrange(num_runs):
            vs = dict([('V'+str(i),range(num_vals)) for i in xrange(num_vars)])
            # should get some random data with 0.
            f = Factor(variables = vs.keys()
                    ,data = [abs(randint(0,5)) for i in xrange(num_vals**num_vars)]
                    ,domain = Domain()
                    ,new_domain_variables=vs
                    ,check = True)
            records = []
            for inst in f.insts():
                records.append(inst + (f[inst],))
            rawdata = (vs.keys(), vs, vs.keys(), records)

            cf = IncrementalCompactFactor(rawdata)
            for variables in powerset(vs.keys()):
                g = f.copy(copy_domain=True)
                g.marginalise_away(g.variables() - frozenset(variables))
                self.assert_(same_factor(g,cf.makeFactor(variables),verbose=True))
Пример #17
0
def generate_dense_bn(density, num_vars=8, num_vals=3):
    if density > num_vars:
        raise RuntimeError, 'density must be less than number of variables'

    vars, parents = generate_dense_parents(density, num_vars)
    vals = dict([(var, frozenset([i for i in xrange(num_vals)]))
                 for var in vars])
    bn = BN(domain=Domain(), new_domain_variables=vals)
    for child in vars:
        if child in parents:
            n = num_vals**(len(parents[child]) + 1)
        else:
            n = num_vals
            parents[child] = frozenset()

        f = Factor(variables=frozenset([child]) | parents[child],
                   data=rand_factor_data(n),
                   domain=bn,
                   check=True)
        bn *= CPT(f, child, True, True)
    return bn
Пример #18
0
 def query(self, intervention, num_samples,skip=0):
     """Draw C{num_samples} interventional samples, separated by C{skip}
     steps of the sampler.  These samples are appended to any existing
     interventional samples and then returned.  The intervention made is
     that of C{intervention}.
     @param intervention: A dictionary mapping variables in the L{CBN} to
     a single value in the domain.
     @return: L{IncrementalCompactFactor}
     """
     k = frozenset(intervention.keys())
     if not self._inter_sampler.has_key(k):
         do_model = CBN.from_bn(self._pure_model.copy(copy_domain=True))
         do_model.intervene(intervention)
         self._inter_sampler[k] = GibbsSampler(do_model, self._burnin)
     do_sampler = self._inter_sampler[k]
     samples = do_sampler.samples(num_samples,skip)
     if not self._inter_data.has_key(k):
         self._inter_data[k] = IncrementalCompactFactor(samples,domain=Domain())
     else:
         self._inter_data[k].update(samples)
     return self._inter_data[k]
Пример #19
0
def rand_fr(vs, min_fact=1, max_fact=10, min_fact_vars=1, max_fact_vars=10):
    model = FR(domain=Domain(), new_domain_variables=vs)

    for i in xrange(randrange(min_fact, max_fact)):
        fv = []
        while len(fv) == 0:
            for j in xrange(
                    randrange(min_fact_vars, min(max_fact_vars,
                                                 len(vs.keys())))):
                v = choice(vs.keys())
                while v in fv:
                    v = choice(vs.keys())
                fv.append(v)
            fv = tuple(fv)

        n = reduce(operator.mul, [len(vs[v]) for v in fv])
        f = Factor(variables=fv,
                   data=rand_factor_data(n),
                   domain=model,
                   check=True)
        model *= f
    return model
Пример #20
0
 def setUp(self):
     from gPy.Variables import Domain
     bnm = BN(domain=Domain())
     bnm.from_dnet(read_dnet('Asia.dnet'))
     self.hypergraph = bnm._hypergraph
     self.adg = bnm._adg
     self.tarjan = UGraph(range(1,11),
                          ((1,2),(1,3),(2,3),(2,10),(3,10),(4,5),
                           (4,7),(5,6),(5,9),(5,7),(6,7),(6,9),
                           (7,8),(7,9),(8,9),(8,10),(9,10)))
     self.tarjan2 = UGraph(range(1,10),
                           ((1,4),(1,3),(2,3),(2,7),(3,5),(3,6),
                            (4,5),(4,8),(5,6),(5,8),(6,7),(6,9),
                            (7,9),(8,9)))
     self.tarjan3 = UGraph(range(1,10),
                           ((1,4),(1,3),(2,3),(2,7),(3,5),(3,6),
                            (4,5),(4,8),(5,6),(5,8),(6,7),(6,9),
                            (7,9),(8,9),
                            (3,4),(3,7),(4,6),(4,7),(5,7),(6,8),(7,8)))
     self.tarjanh1 = Hypergraph([[3,4],[2,4],[1,2,3]])
     self.tarjanh2 = Hypergraph([[3,4],[2,4],[1,2,3],[2,3,4]])
     self.graph1 = UGraph('ABCDEF',('AB','AC','BD','CE','EF'))
     self.graph2 = UGraph('ABCDEF',('AB','AC','BD','CE','EF','BC','CD','DE'))
Пример #21
0
from gPy.Examples import minibn, asia
from gPy.Models import FR, BN
from gPy.Parameters import Factor, CPT
from gPy.Variables import Domain
from random import choice, randrange, uniform, shuffle
import operator, unittest, pickle

xor = BN(domain=Domain(),
         new_domain_variables={
             'a': [0, 1],
             'b': [0, 1],
             'c': [0, 1]
         })
xor.add_cpts([
    CPT(Factor(variables=['a'], data=[0.5, 0.5]), child='a'),
    CPT(Factor(variables=['b'], data=[0.5, 0.5]), child='b'),
    CPT(Factor(variables=['c', 'a', 'b'],
               data=[1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0]),
        child='c')
])
cbn_small_names = ['xor', 'minibn', 'asia']
cbn_small_test_cases = [xor, minibn, asia]
cbn_large_names = ['alarm', 'insurance', 'carpo']
try:
    # load the pickled large Bayes nets.
    cbn_large_test_cases = map(
        lambda fn: pickle.load(open('networks/' + fn + '_bn.pck', 'r')),
        cbn_large_names)
except:
    cbn_large_names = []
    cbn_large_test_cases = []
Пример #22
0

def disp(fn, samples):
    f = open(fn, 'w')
    fact = samples.makeFactor(samples.variables())
    for var in fact.variables():
        print >> f, var,
    print >> f, 'count'
    for inst in fact.insts():
        for i in inst:
            print >> f, i,
        print >> f, fact[inst]
    f.close()


bn0 = BN(domain=Domain(), new_domain_variables={'a': [0, 1], 'b': [0, 1]})
bn0.add_cpts([
    CPT(Factor(variables=['a'], data=[0.5, 0.5]), child='a'),
    CPT(Factor(variables=['a', 'b'], data=[0.3, 0.7, 0.4, 0.6]), child='b')
])
w = CausalWorld(bn0)
samples = w.observe(10000)
disp('two_depend', samples)

bn1 = BN(domain=Domain(), new_domain_variables={'a': [0, 1], 'b': [0, 1]})
bn1.add_cpts([
    CPT(Factor(variables=['a'], data=[0.5, 0.5]), child='a'),
    CPT(Factor(variables=['b'], data=[0.3, 0.7]), child='b')
])
w = CausalWorld(bn1)
samples = w.observe(10000)