Exemplo n.º 1
0
    def test_bde2(self):
        myskl = Data(read_csv(open('myksl.dat')))

        # > ksbln.prior <- jointprior(kslbn)
        # Imaginary sample size: 64
        # > kslbn.fit <- getnetwork(learn(kslbn,myksl,ksbln.prior))

        # deal scores
        #         > score(nodes(kslbn.fit)$Smok)
        #         [1] -637.9544
        #         > score(nodes(kslbn.fit)$Alc)
        #         [1] -752.027
        #         > score(nodes(kslbn.fit)$Work)
        #         [1] -463.1716
        #         > score(nodes(kslbn.fit)$Sex)
        #         [1] -751.0772
        #         > score(nodes(kslbn.fit)$Year)
        #         [1] -666.6585
        #         > score(kslbn.fit)
        #         [1] -3270.889
        node_scores = (-637.9544,-752.027,-463.1716,-751.0772,-666.6585)
        net_score = -3270.889
        score = 0
        for i, cpt in enumerate(self.cpts):
            cpt = cpt.get_counts_sql(myskl)
            this_score = cpt.bdeu_score(64)
            self.assertAlmostEqual(this_score,node_scores[i],places) 
            score += this_score
        self.assertAlmostEqual(score,net_score,places) 
Exemplo n.º 2
0
    def test_bde3(self):
        # just tests that myskl has the right counts
        myskl = Data(read_csv(open('myksl.dat')))

        # > ksbln.prior <- jointprior(kslbn)
        # Imaginary sample size: 64
        # > kslbn.fit <- getnetwork(learn(kslbn,myksl,ksbln.prior))

        # deal scores
        #         > score(nodes(kslbn.fit)$Smok)
        #         [1] -637.9544
        #         > score(nodes(kslbn.fit)$Alc)
        #         [1] -752.027
        #         > score(nodes(kslbn.fit)$Work)
        #         [1] -463.1716
        #         > score(nodes(kslbn.fit)$Sex)
        #         [1] -751.0772
        #         > score(nodes(kslbn.fit)$Year)
        #         [1] -666.6585
        #         > score(kslbn.fit)
        #         [1] -3270.889
        node_scores = (-637.9544,-752.027,-463.1716,-751.0772,-666.6585)
        net_score = -3270.889
        score = 0
        for i, cpt in enumerate(self.cpts):
            child = cpt.child()
            parents = list(cpt.variables() - set([child]))
            this_score = myskl.family_score(child,parents,64.0)
            self.assertAlmostEqual(this_score,node_scores[i],places)
            score += this_score
        self.assertAlmostEqual(score,net_score,places) 
Exemplo n.º 3
0
 def runTest(self):
     data = CompactFactor(read_csv(open('tetrad_asia.csv')),domain=Domain())
     ci = PCCI(G2Separator(data))
     g = ICPattern(ci)
     self.assertEquals(g.shd(self._asia_pdag),5)
     self.assertEquals(self._tetrad_pdag.shd(self._asia_pdag),4)
     # I think tetrad is wrong (in terms of implementation)
     self.assertEquals(g.shd(self._tetrad_pdag),1)
Exemplo n.º 4
0
 def test_smalldata_sql(self):
     rawdata = read_csv(open('fake_data'))
     dat = Data(rawdata)
     self.assertEqual(dat.makeFactor([]).z(),5)
     self.samefactor(dat.makeFactor(['bar']),Factor(['bar'],[2,2,1]))
     self.samefactor(dat.makeFactor(['blah']),Factor(['blah'],[3,1,1]))
     self.samefactor(dat.makeFactor(['foo']),Factor(['foo'],[3,1,1,0]))
     self.samefactor(dat.makeFactor(['bar','foo']),
                     Factor(['bar','foo'],[2,0,0,0,1,1,0,0,0,0,1,0]))
Exemplo n.º 5
0
 def test_smalldata_sql(self):
     rawdata = read_csv(open('fake_data'))
     dat = Data(rawdata)
     self.assertEqual(dat.makeFactor([]).z(), 5)
     self.samefactor(dat.makeFactor(['bar']), Factor(['bar'], [2, 2, 1]))
     self.samefactor(dat.makeFactor(['blah']), Factor(['blah'], [3, 1, 1]))
     self.samefactor(dat.makeFactor(['foo']), Factor(['foo'], [3, 1, 1, 0]))
     self.samefactor(
         dat.makeFactor(['bar', 'foo']),
         Factor(['bar', 'foo'], [2, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0]))
Exemplo n.º 6
0
 def runTest(self):
     data = CompactFactor(read_csv(open('tetrad_xor.csv')),domain=Domain())
     ci = PCCI(G2Separator(data))
     print ci._ind
     for a,b in pairs(data.variables()):
         if a == 'X1' and b == 'X2' or a == 'X2' and b == 'X1':
             self.assert_(ci.has_independence(a, b))
             self.assert_(not ci.has_independence_involving(a,b,'X3'))
         else:
             print a,b
             self.assert_(not ci.has_independence(a,b))
     data = CompactFactor(read_csv(open('tetrad_xor.csv')),domain=Domain())
     ci = PCCI(G2Separator(data))
     for a,b in pairs(data.variables()):
         if a == 'X1' and b == 'X2' or a == 'X2' and b == 'X1':
             self.assert_(ci.has_independence(a, b))
             self.assert_(not ci.has_independence_involving(a,b,'X3'))
         else:
             print a,b
             self.assert_(not ci.has_independence(a,b))
Exemplo n.º 7
0
 def test_ipf2(self):
     # just tests termination
     alarm = CompactFactor(read_csv(open('alarm_1K.dat')))
     vs = list(alarm.variables())
     vs.append(vs[0])
     marginals = {}
     model = RFR()
     for i in range(len(vs) - 1):
         hyperedge = frozenset(vs[i:i + 2])
         model *= Factor(hyperedge)
         marginals[hyperedge] = alarm[hyperedge].normalised()
     model.ipf(marginals, 0.001)
Exemplo n.º 8
0
 def test_ipf2(self):
     # just tests termination
     alarm = CompactFactor(read_csv(open('alarm_1K.dat')))
     vs = list(alarm.variables())
     vs.append(vs[0])
     marginals = {}
     model = RFR()
     for i in range(len(vs)-1):
         hyperedge = frozenset(vs[i:i+2])
         model *= Factor(hyperedge)
         marginals[hyperedge] = alarm[hyperedge].normalised()
     model.ipf(marginals,0.001)
Exemplo n.º 9
0
    def setUp(self):
        from gPy.Variables import Domain
        self.domain = Domain()
        self.bnm = BN(domain=self.domain)
        self.bnm.from_dnet(read_dnet('Asia.dnet'))
        self.cptdict = {}

        # taken directly from Netica output
        self.marginals = [
            Factor((('VisitAsia'),),
                   [0.99,0.01]),
            Factor((('Tuberculosis'),),
                   [0.9896,0.0104]),
            Factor((('Smoking'),),
                   [0.5,0.5]),
            Factor((('Cancer'),),
                   [0.945,0.055]),
            Factor((('TbOrCa'),),
                   [0.93517, 0.064828]),
            Factor((('XRay'),),
                   [0.11029, 0.88971]),
            Factor((('Bronchitis'),),
                   [0.55,0.45]),
            Factor((('Dyspnea'),),
                   [0.56403,0.43597])            
            ]
        # taken directly from Netica output
        self.cond_marginals = [
            Factor((('VisitAsia'),),
                   [0.95192,0.048077]),
            Factor((('Tuberculosis'),),
                   [0,1]),
            Factor((('Smoking'),),
                   [0.52381,0.47619]),
            #other marginals are conditional on these values
            #Factor((('Cancer'),),
            #       [1,0]),
            #Factor((('TbOrCa'),),
            #       [0,1]),
            Factor((('XRay'),),
                   [0.98, 0.02]),
            Factor((('Bronchitis'),),
                   [0.55714,0.44286]),
            Factor((('Dyspnea'),),
                   [0.21143,0.78857])            
            ]
        for cpt in self.bnm:
            self.cptdict[cpt.child()] = cpt

        self.rawdata = read_csv(open('alarm_1K.dat'))
Exemplo n.º 10
0
    def setUp(self):
        from gPy.Variables import Domain
        self.domain = Domain()
        self.bnm = BN(domain=self.domain)
        self.bnm.from_dnet(read_dnet('Asia.dnet'))
        self.cptdict = {}

        # taken directly from Netica output
        self.marginals = [
            Factor((('VisitAsia'), ), [0.99, 0.01]),
            Factor((('Tuberculosis'), ), [0.9896, 0.0104]),
            Factor((('Smoking'), ), [0.5, 0.5]),
            Factor((('Cancer'), ), [0.945, 0.055]),
            Factor((('TbOrCa'), ), [0.93517, 0.064828]),
            Factor((('XRay'), ), [0.11029, 0.88971]),
            Factor((('Bronchitis'), ), [0.55, 0.45]),
            Factor((('Dyspnea'), ), [0.56403, 0.43597])
        ]
        # taken directly from Netica output
        self.cond_marginals = [
            Factor((('VisitAsia'), ), [0.95192, 0.048077]),
            Factor((('Tuberculosis'), ), [0, 1]),
            Factor((('Smoking'), ), [0.52381, 0.47619]),
            #other marginals are conditional on these values
            #Factor((('Cancer'),),
            #       [1,0]),
            #Factor((('TbOrCa'),),
            #       [0,1]),
            Factor((('XRay'), ), [0.98, 0.02]),
            Factor((('Bronchitis'), ), [0.55714, 0.44286]),
            Factor((('Dyspnea'), ), [0.21143, 0.78857])
        ]
        for cpt in self.bnm:
            self.cptdict[cpt.child()] = cpt

        self.rawdata = read_csv(open('alarm_1K.dat'))
Exemplo n.º 11
0
 def setUp(self):
     self.rawdata = read_csv(open('alarm_1K.dat'))
Exemplo n.º 12
0
from gPy.IO import read_csv
from gPy.Parameters import CompactFactor
import gPy.Parameters

florida = CompactFactor(read_csv(open('florida.dat')))
#create a normal factor
table = florida['Murderer', 'Sentence', 'Victim']
print table
print 'Number of observations is %d' % table.z()
gPy.Parameters.precision = 6
print table.normalised()
Exemplo n.º 13
0
from gPy.IO import read_csv
from gPy.Parameters import CompactFactor
import gPy.Parameters


florida = CompactFactor(read_csv(open('florida.dat')))
#create a normal factor
table = florida['Murderer', 'Sentence', 'Victim']
print table
print 'Number of observations is %d' % table.z()
gPy.Parameters.precision = 6
print table.normalised()
Exemplo n.º 14
0
 def setUp(self):
     self.rawdata = read_csv(open('alarm_1K.dat'))
Exemplo n.º 15
0
from gPy.Data import Data2
from gPy.IO import read_csv
import sys, gzip

data = Data2(read_csv(
    open('/home/jc/godot/research/icml08/data/asia_100.data')),
             rmin=3)
#for k, v in data._data.items():
#    print k, v
#print data.marginal(['VisitAsia'])
#print data.marginal(['VisitAsia','TbOrCa','XRay','Dyspnea'])

#data = Data2(read_csv(gzip.open('/home/jc/godot/research/icml08/data/insurance_100.data.gz')),rmin=5)
#for k, v in data._data.items():
#    print k, v
#sys.exit()
data._test(['VisitAsia', 'TbOrCa', 'XRay', 'Dyspnea'])
#data._test(['Accident','ILiCost'])
sys.exit()
print data.marginal(['Accident', 'ILiCost'])
vs = sorted(data._variables)
for v in vs:
    for w in vs:
        for z in vs:
            print v, w, z, data.marginal(frozenset([v, w, z]))
Exemplo n.º 16
0
from gPy.IO import read_csv
from gPy.Parameters import CompactFactor
from gPy.Demos import marginalise_gui

cancer = CompactFactor(read_csv(open('cancer.dat')))
#create a normal factor
data = cancer['Smoker', 'Cancer', 'Bronchitis']
marginalise_gui(data.normalised())





Exemplo n.º 17
0
from gPy.Examples import asia
from gPy.Parameters import CompactFactor
from gPy.IO import read_csv
import sys

data = CompactFactor(read_csv(open(sys.argv[1])))
print asia.bdeu_score(data)


def score_adg(adg,data):
    print '^^^^'
    for child in adg.vertices():
        parents = adg.parents(child)
        family = parents | set([child])
        data_cpt = data.makeFactor(family).makeCPT(child,False)
        print child, data_cpt.bdeu_score()
    print 'vvvvv'
    print
    
adg = asia.adg()
score_adg(adg,data)
adg.remove_arrow('Smoking','Cancer')
score_adg(adg,data)
Exemplo n.º 18
0
from gPy.Examples import asia
from gPy.Data import CompactFactor
from gPy.IO import read_csv
import sys

data = CompactFactor(read_csv(open(sys.argv[1])))
print asia.bdeu_score(data)


def score_adg(adg,data):
    print '^^^^'
    for child in adg.vertices():
        parents = adg.parents(child)
        family = parents | set([child])
        data_cpt = data.makeFactor(family).makeCPT(child,False)
        print child, data_cpt.bdeu_score()
    print 'vvvvv'
    print
    
adg = asia.adg()
score_adg(adg,data)
Exemplo n.º 19
0
"""Throwaway script to test BIC score search
"""

#from gPy.Data import Data
from gPy.Data import CompactFactor
import sys, gzip
from gPy.IO import read_csv

data = CompactFactor(read_csv(gzip.open('/home/jc/godot/research/icml08/data/insurance_100.data.gz')))
for v in data.variables():
    print v
    print
    print data.bic_search(v)
Exemplo n.º 20
0
from gPy.Data import Data2
from gPy.IO import read_csv
import sys, gzip

data = Data2(read_csv(open("/home/jc/godot/research/icml08/data/asia_100.data")), rmin=3)
# for k, v in data._data.items():
#    print k, v
# print data.marginal(['VisitAsia'])
# print data.marginal(['VisitAsia','TbOrCa','XRay','Dyspnea'])

# data = Data2(read_csv(gzip.open('/home/jc/godot/research/icml08/data/insurance_100.data.gz')),rmin=5)
# for k, v in data._data.items():
#    print k, v
# sys.exit()
data._test(["VisitAsia", "TbOrCa", "XRay", "Dyspnea"])
# data._test(['Accident','ILiCost'])
sys.exit()
print data.marginal(["Accident", "ILiCost"])
vs = sorted(data._variables)
for v in vs:
    for w in vs:
        for z in vs:
            print v, w, z, data.marginal(frozenset([v, w, z]))