Exemplo n.º 1
0
 def setUp(self):
     skel = GraphSkeleton()
     skel.load("unittestdict.txt")
     skel.toporder()
     nodedata = NodeData()
     nodedata.load("unittestdict.txt")
     self.instance = DiscreteBayesianNetwork(skel, nodedata)
Exemplo n.º 2
0
    def setUp(self):
        nodedata = NodeData.load("unittestlgdict.txt")
        skel = GraphSkeleton()
        skel.load("unittestdict.txt")
        skel.toporder()

        self.lgb = LGBayesianNetwork(nodedata)
Exemplo n.º 3
0
 def setUp(self):
     skel = GraphSkeleton()
     skel.load("unittestdict.txt")
     skel.toporder()
     nodedata = NodeData.load("unittestdict.txt")
     self.bn = DiscreteBayesianNetwork(nodedata)
     self.fn = TableCPDFactorization(self.bn)
Exemplo n.º 4
0
 def setUp(self):
     skel = GraphSkeleton()
     skel.load("unittestdict.txt")
     skel.toporder()
     nodedata = NodeData.load("unittestdict.txt")
     self.instance = DiscreteBayesianNetwork(nodedata)
     self.factor = TableCPDFactor("Grade", self.instance)
     self.factor2 = TableCPDFactor("Letter", self.instance)
Exemplo n.º 5
0
    def test_hybn_mte_estimateparams(self):
        skel = GraphSkeleton()
        skel.load("../tests/bn_skeleton.txt")
        skel.toporder()
        
        with open('../tests/dataBR2.json', 'r') as f:
            samples = eval(f.read())

        result = self.l.hybn_mte_estimateparams(self.skel, self.samplelgseq)        
def q_without_ros():
    skel = GraphSkeleton()
    skel.V = ["prize_door", "guest_door", "monty_door"]
    skel.E = [["prize_door", "monty_door"],
              ["guest_door", "monty_door"]]
    skel.toporder()
    nd = NodeData()
    nd.Vdata = {
        "prize_door": {
            "numoutcomes": 3,
            "parents": None,
            "children": ["monty_door"],
            "vals": ["A", "B", "C"],
            "cprob": [1.0/3, 1.0/3, 1.0/3],
        },
        "guest_door": {
            "numoutcomes": 3,
            "parents": None,
            "children": ["monty_door"],
            "vals": ["A", "B", "C"],
            "cprob": [1.0/3, 1.0/3, 1.0/3],
        },
        "monty_door": {
            "numoutcomes": 3,
            "parents": ["prize_door", "guest_door"],
            "children": None,
            "vals": ["A", "B", "C"],
            "cprob": {
                "['A', 'A']": [0., 0.5, 0.5],
                "['B', 'B']": [0.5, 0., 0.5],
                "['C', 'C']": [0.5, 0.5, 0.],
                "['A', 'B']": [0., 0., 1.],
                "['A', 'C']": [0., 1., 0.],
                "['B', 'A']": [0., 0., 1.],
                "['B', 'C']": [1., 0., 0.],
                "['C', 'A']": [0., 1., 0.],
                "['C', 'B']": [1., 0., 0.],
            },
        },
    }
    bn = DiscreteBayesianNetwork(skel, nd)
    fn = TableCPDFactorization(bn)

    query = {
        "prize_door": ["A","B","C"],
    }
    evidence = {
        "guest_door": "A",
        "monty_door": "B",
    }

    res = fn.condprobve(query, evidence)
    print res.vals
    print res.scope
    print res.card
    print res.stride
Exemplo n.º 7
0
class TestOrderedSkeleton(unittest.TestCase):

    def setUp(self):
        self.os = OrderedSkeleton()
        self.os.load("unittestdict.txt")
        self.gs = GraphSkeleton()
        self.gs.load("unittestdict.txt")

    def test_constructor(self):
        self.assertNotEqual(self.os.V, self.gs.V)
        self.gs.toporder()
        self.assertEqual(self.os.V, self.gs.V)
    def load(self, file_name):
        #### Load BN
        nd = NodeData()
        skel = GraphSkeleton()
        nd.load(file_name)  # any input file
        skel.load(file_name)

        # topologically order graphskeleton
        skel.toporder()

        super(DiscreteBayesianNetworkExt, self).__init__(skel, nd)
        ##TODO load evidence
Exemplo n.º 9
0
class TestDynDiscBayesianNetwork(unittest.TestCase):

    def setUp(self):
        self.nd = NodeData.load("unittestdyndict.txt")
        self.skel = GraphSkeleton()
        self.skel.load("unittestdyndict.txt")
        self.skel.toporder()
        self.d = DynDiscBayesianNetwork(self.skel, self.nd)

    def test_randomsample(self):
        sample = self.d.randomsample(10)
        for i in range(1, 10):
            self.assertEqual(sample[0]['Difficulty'], sample[i]['Difficulty'])
Exemplo n.º 10
0
 def setUp(self):
     skel = GraphSkeleton()
     skel.load("unittestdict.txt")
     skel.toporder()
     nodedata = NodeData.load("unittestdict.txt")
     self.bn = DiscreteBayesianNetwork(nodedata)
     agg = SampleAggregator()
     agg.aggregate(self.bn.randomsample(50))
     self.rseq = agg.seq
     self.ravg = agg.avg
     self.fn = TableCPDFactorization(self.bn)
     evidence = dict(Letter='weak')
     agg.aggregate(self.fn.gibbssample(evidence, 51))
     self.gseq = agg.seq
     self.gavg = agg.avg
Exemplo n.º 11
0
class TestHyBayesianNetwork(unittest.TestCase):
    def setUp(self):
        self.nd = NodeData()
        self.nd.load("unittesthdict.txt")
        self.nd.entriestoinstances()
        self.skel = GraphSkeleton()
        self.skel.load("unittestdict.txt")
        self.skel.toporder()
        self.hybn = HyBayesianNetwork(self.skel, self.nd)

    def test_randomsample(self):
        sample = self.hybn.randomsample(1)[0]
        self.assertTrue(isinstance(sample['Grade'], float))
        self.assertTrue(isinstance(sample['Intelligence'], str))
        self.assertEqual(sample["SAT"][-12:], 'blueberries!')
Exemplo n.º 12
0
 def setUp(self):
     skel = GraphSkeleton()
     skel.load("unittestdict.txt")
     skel.toporder()
     nodedata = NodeData.load("unittestdict.txt")
     self.bn = DiscreteBayesianNetwork(nodedata)
     agg = SampleAggregator()
     agg.aggregate(self.bn.randomsample(50))
     self.rseq = agg.seq
     self.ravg = agg.avg
     self.fn = TableCPDFactorization(self.bn)
     evidence = dict(Letter='weak')
     agg.aggregate(self.fn.gibbssample(evidence, 51))
     self.gseq = agg.seq
     self.gavg = agg.avg
Exemplo n.º 13
0
class TestHyBayesianNetwork(unittest.TestCase):

    def setUp(self):
        self.nd = HybridNodeData.load("unittesthdict.txt")
        self.nd.entriestoinstances()
        self.skel = GraphSkeleton()
        self.skel.load("unittestdict.txt")
        self.skel.toporder()
        self.hybn = HyBayesianNetwork(self.skel, self.nd)

    def test_randomsample(self):
        sample = self.hybn.randomsample(1)[0]
        self.assertTrue(isinstance(sample['Grade'], float))
        self.assertTrue(isinstance(sample['Intelligence'], str))
        self.assertEqual(sample["SAT"][-12:], 'blueberries!')
Exemplo n.º 14
0
def createData():
   nd = NodeData()
   skel = GraphSkeleton()
   fpath = "job_interview.txt"
   nd.load(fpath)
   skel.load(fpath)
   skel.toporder()
   bn = DiscreteBayesianNetwork(skel, nd)

   learner = PGMLearner()
   data = bn.randomsample(1000)
   X, Y = 'Grades', 'Offer'
   c,p,w=learner.discrete_condind(data, X, Y, ['Interview'])
   print "independence between X and Y: ", c, " p-value ", p, " witness node: ", w
   result = learner.discrete_constraint_estimatestruct(data)
   print result.E
Exemplo n.º 15
0
 def construct(self):
     skel = GraphSkeleton()
     skel.V = self.nodes.keys()
     skel.E = []
     for node, ndata in self.nodes.iteritems():
         if ndata['parents']:
             for p in ndata['parents']:
                 skel.E.append([p, node])
                 self.nodes[p]['children'].append(node)
     for node, ndata in self.nodes.iteritems():
         if len(ndata['children']) == 0:
             ndata['children'] = None
     data = NodeData()
     data.Vdata = self.nodes
     skel.toporder()
     bn = DiscreteBayesianNetwork(skel, data)
     return bn
Exemplo n.º 16
0
class TestGraphSkeleton(unittest.TestCase):
    def setUp(self):
        self.instance = GraphSkeleton()
        self.instance.V = [1, 2, 3, 4, 5]
        self.instance.E = [[5, 1], [1, 2]]

    def test_getparents(self):
        self.assertEqual(self.instance.getparents(1), [5])
        self.assertEqual(self.instance.getparents(4), [])

    def test_getchildren(self):
        self.assertEqual(self.instance.getchildren(5), [1])
        self.assertEqual(self.instance.getchildren(4), [])

    def test_toporder(self):
        self.instance.toporder()
        self.assertTrue(self.instance.V.index(5) < self.instance.V.index(1))
        self.assertTrue(self.instance.V.index(5) < self.instance.V.index(2))
Exemplo n.º 17
0
def net2():
    nd = NodeData()
    skel = GraphSkeleton()
    nd.load("net.txt")  # an input file
    skel.load("net.txt")

    # topologically order graphskeleton
    skel.toporder()

    # load bayesian network
    lgbn = LGBayesianNetwork(skel, nd)

    in_data = read_data.getdata2()
    learner = PGMLearner()
    bn = learner.lg_mle_estimateparams(skel, in_data)

    p = cal_prob(in_data[300:500], bn)
    print p
    return 0
Exemplo n.º 18
0
def net2():
    nd = NodeData()
    skel = GraphSkeleton()
    nd.load("net.txt")  # an input file
    skel.load("net.txt")

    # topologically order graphskeleton
    skel.toporder()

    # load bayesian network
    lgbn = LGBayesianNetwork(skel, nd)

    in_data=read_data.getdata2()
    learner = PGMLearner()
    bn=learner.lg_mle_estimateparams(skel,in_data)

    p=cal_prob(in_data[300:500],bn)
    print p
    return 0
Exemplo n.º 19
0
class TestGraphSkeleton(unittest.TestCase):

    def setUp(self):
        self.instance = GraphSkeleton()
        self.instance.V = [1,2,3,4,5]
        self.instance.E = [[5,1],[1,2]]

    def test_getparents(self):
        self.assertEqual(self.instance.getparents(1), [5])
        self.assertEqual(self.instance.getparents(4), [])

    def test_getchildren(self):
        self.assertEqual(self.instance.getchildren(5), [1])
        self.assertEqual(self.instance.getchildren(4), [])

    def test_toporder(self):
        self.instance.toporder()
        self.assertTrue(self.instance.V.index(5)<self.instance.V.index(1))
        self.assertTrue(self.instance.V.index(5)<self.instance.V.index(2))
Exemplo n.º 20
0
    def setUp(self):
        # instantiate learner
        self.l = PGMLearner()

        # generate graph skeleton
        skel = GraphSkeleton()
        skel.load("unittestdict.txt")
        skel.toporder()

        # generate sample sequence to try to learn from - discrete
        nd = NodeData.load("unittestdict.txt")
        self.samplediscbn = DiscreteBayesianNetwork(nd)
        self.samplediscseq = self.samplediscbn.randomsample(5000)

        # generate sample sequence to try to learn from - discrete
        nda = NodeData.load("unittestlgdict.txt")
        self.samplelgbn = LGBayesianNetwork(nda)
        self.samplelgseq = self.samplelgbn.randomsample(10000)

        self.skel = skel
Exemplo n.º 21
0
    def setUp(self):
        # instantiate learner
        self.l = PGMLearner()

        # generate graph skeleton
        skel = GraphSkeleton()
        skel.load("unittestdict.txt")
        skel.toporder()

        # generate sample sequence to try to learn from - discrete
        nd = NodeData.load("unittestdict.txt")
        self.samplediscbn = DiscreteBayesianNetwork(nd)
        self.samplediscseq = self.samplediscbn.randomsample(5000)

        # generate sample sequence to try to learn from - discrete
        nda = NodeData.load("unittestlgdict.txt")
        self.samplelgbn = LGBayesianNetwork(nda)
        self.samplelgseq = self.samplelgbn.randomsample(10000)

        self.skel = skel
Exemplo n.º 22
0
    def test_structure_estimation(self):
        req = DiscreteStructureEstimationRequest()

        skel = GraphSkeleton()
        skel.load(self.data_path)
        skel.toporder()
        teacher_nd = NodeData()
        teacher_nd.load(self.teacher_data_path)
        bn = DiscreteBayesianNetwork(skel, teacher_nd)
        data = bn.randomsample(8000)
        for v in data:
            gs = DiscreteGraphState()
            for k_s, v_s in v.items():
                gs.node_states.append(DiscreteNodeState(node=k_s, state=v_s))
            req.states.append(gs)

        res = self.struct_estimate(req)
        self.assertIsNotNone(res.graph)
        self.assertEqual(len(res.graph.nodes), 5)
        self.assertGreater(len(res.graph.edges), 0)
Exemplo n.º 23
0
    def test_structure_estimation(self):
        req = DiscreteStructureEstimationRequest()

        skel = GraphSkeleton()
        skel.load(self.data_path)
        skel.toporder()
        teacher_nd = NodeData()
        teacher_nd.load(self.teacher_data_path)
        bn = DiscreteBayesianNetwork(skel, teacher_nd)
        data = bn.randomsample(8000)
        for v in data:
            gs = DiscreteGraphState()
            for k_s, v_s in v.items():
                gs.node_states.append(DiscreteNodeState(node=k_s, state=v_s))
            req.states.append(gs)

        res = self.struct_estimate(req)
        self.assertIsNotNone(res.graph)
        self.assertEqual(len(res.graph.nodes), 5)
        self.assertGreater(len(res.graph.edges), 0)
Exemplo n.º 24
0
def main():

    in_data = read_data.getdata()
    f_data = format_data(in_data)
    nd = NodeData()
    nd.load("net4.txt")  # an input file
    skel = GraphSkeleton()
    skel.load("net4.txt")
    skel.toporder()
    bn = DiscreteBayesianNetwork(skel, nd)

    #training dataset:70%
    bn2 = em(f_data[1:6000], bn, skel)

    pr_training = precision(f_data[1:6000], bn2)

    print "Prediction accuracy for training data:", pr_training[1]

    #testing dataset:30%
    pr = precision(f_data[6700:6800], bn2)
    print "Prediction accuracy for test data:", pr[1]
Exemplo n.º 25
0
    def test_param_estimation(self):
        req = DiscreteParameterEstimationRequest()

        # load graph structure
        skel = GraphSkeleton()
        skel.load(self.data_path)
        req.graph.nodes = skel.V
        req.graph.edges = [GraphEdge(k, v) for k,v in skel.E]
        skel.toporder()

        # generate trial data
        teacher_nd = NodeData()
        teacher_nd.load(self.teacher_data_path)
        bn = DiscreteBayesianNetwork(skel, teacher_nd)
        data = bn.randomsample(200)
        for v in data:
            gs = DiscreteGraphState()
            for k_s, v_s in v.items():
                gs.node_states.append(DiscreteNodeState(node=k_s, state=v_s))
            req.states.append(gs)

        self.assertEqual(len(self.param_estimate(req).nodes), 5)
Exemplo n.º 26
0
    def test_param_estimation(self):
        req = DiscreteParameterEstimationRequest()

        # load graph structure
        skel = GraphSkeleton()
        skel.load(self.data_path)
        req.graph.nodes = skel.V
        req.graph.edges = [GraphEdge(k, v) for k, v in skel.E]
        skel.toporder()

        # generate trial data
        teacher_nd = NodeData()
        teacher_nd.load(self.teacher_data_path)
        bn = DiscreteBayesianNetwork(skel, teacher_nd)
        data = bn.randomsample(200)
        for v in data:
            gs = DiscreteGraphState()
            for k_s, v_s in v.items():
                gs.node_states.append(DiscreteNodeState(node=k_s, state=v_s))
            req.states.append(gs)

        self.assertEqual(len(self.param_estimate(req).nodes), 5)
Exemplo n.º 27
0
def main():

    in_data=read_data.getdata()
    f_data=format_data(in_data)
    nd = NodeData()
    nd.load("net4.txt")    # an input file
    skel = GraphSkeleton()
    skel.load("net4.txt")
    skel.toporder()
    bn=DiscreteBayesianNetwork(skel,nd)


#training dataset:70%
    bn2=em(f_data[1:6000],bn,skel)

    pr_training = precision(f_data[1:6000],bn2)

    print "Prediction accuracy for training data:" , pr_training[1]

#testing dataset:30%
    pr=precision(f_data[6700:6800],bn2)
    print "Prediction accuracy for test data:", pr[1]
truth_l = []
for row in truth_r:
	truth_l.append(row[0])

w = csv.writer(open("bayesian_outcome.txt", "wb"))

count = 0

for  i in range(104):
	nd = NodeData()
	skel = GraphSkeleton()
	nd.load('bayes_net/'+str(i)+".txt")    # any input file
	skel.load('bayes_net/'+str(i)+".txt")

	# topologically order graphskeleton
	skel.toporder()

	# load bayesian network
	# load bayesian network
	bn = DiscreteBayesianNetwork(skel, nd)
	dic1 = {}
	k = 1
	for c in data_l[i]:
		dic1[str(k)] = str(c)
		k += 2
	
	print dic1
	k = 2 * len(data_l[i]) - 2
	dic2 = {}
	word = ''
	while k >= 0:
Exemplo n.º 29
0
    def train(self):
        """Train the associated module for one epoch."""
        assert len(self.ds) > 0, "Dataset cannot be empty."


            
        gbds = []
        
        ds2 = []
        for seq in self.ds:
            for state_, action_, reward_ in seq:

                #sample = dict(theta=state_[0],thetaV=state_[1],s=state_[2],sV=state_[3],Action=action_[0],Reward=reward_[0])
                sample = dict(StateA=state_[0],StateB=state_[2],StateC=state_[1],StateD=state_[3],Action=action_[0],Reward=reward_[0])

                 #print state_, action_, reward_
#                sample = dict(StateA=state_[0],StateB=state_[2],StateC=state_[1],StateD=state_[3],Action=action_[0],Reward=reward_[0])
                #sample = dict(theta=state_[0],thetaPrime=state_[2],s=state_[1],sPrime=state_[3],Action=action_[0],Reward=reward_[0])

                if sample["Reward"] >= 0:
                    gbds.append(sample)
                    
                if sample["Reward"] == -1:
                    ds2.append(sample)
                #print sample["Reward"]

        # sort samples for highest reward
#        bdss = sorted(gbds, key=lambda tup: tup["Reward"],reverse=True)
#        
        #print "BDS: "
        #print json.dumps(gbds, indent=2)
#        print "BDSS: "
#        print json.dumps(bdss, indent=2)
        
        #tokeep = bdss[:max(2,len(bdss)/2)]
        
        #print bds
        # estimate parameters
#        print "data size: ", len(bds),  len(gbds)
        N = 200
        if len(gbds) < N:
            l = N - len(gbds)
            n = len(ds2)
            
            t = len(ds2[n-l:])
            gbds.extend(ds2[n-l:])
            
        print "ds:, ", len(gbds), len(ds2)
        
        
        if len(gbds) < 100:
#            print "burn"
            self.module.burn = True
            return
        else:
            self.module.burn = False
            
        
        if len(gbds) < 5: #there was no rewarding action, so nothing to learn
          self.module.burn = True
          return
          
        N = 200
        if len(gbds) > N:
            #only take the newest N samples

            l = len(gbds)
            gbds = gbds[l-N:]
#            print "new effective set", len(gbds)
        
        skel = GraphSkeleton()
        #load network topology
        skel.load("net2.txt")
#        skel.load("workfile")
        skel.toporder()


        # estimate parameters
        self.module.net = self.learner.lg_mle_estimateparams(skel, gbds)
Exemplo n.º 30
0
import json

from libpgm.nodedata import NodeData
from libpgm.graphskeleton import GraphSkeleton
from libpgm.lgbayesiannetwork import LGBayesianNetwork
from libpgm.pgmlearner import PGMLearner

# generate some data to use
nd = NodeData()
nd.load("gaussGrades.txt")  # an input file
skel = GraphSkeleton()
skel.load("gaussGrades.txt")
skel.toporder()
lgbn = LGBayesianNetwork(skel, nd)
data = lgbn.randomsample(8000)

print data

# instantiate my learner
learner = PGMLearner()

# estimate structure
result = learner.lg_constraint_estimatestruct(data)

# output
print json.dumps(result.E, indent=2)
Exemplo n.º 31
0
def fun(inputData):

    #Defining formatting data method
    def format_data(df):
        result = []
        for row in df.itertuples():
            #print(row.Pclass)
            result.append(
                dict(great=row.great,
                     good=row.good,
                     clean=row.clean,
                     comfortable=row.comfortable,
                     bad=row.bad,
                     old=row.old,
                     Cleanliness=row.Cleanliness,
                     Location=row.Location,
                     Service=row.Service,
                     Rooms=row.Rooms,
                     Value=row.Value,
                     Overall=row.Overall))
        return result

    #load all preprocessed training data
    df = pd.read_csv('features.csv', sep=',')

    #format data to let them correctly processed by libpgm functions
    node_data = format_data(df)

    skel = GraphSkeleton()
    #load structure of our net
    skel.load("./our-skel.txt")
    #setting the topologic order
    skel.toporder()
    #learner which will estimate parameters e if needed net structure
    learner = PGMLearner()

    #estismting parameters for our own model
    res = learner.discrete_mle_estimateparams(skel, node_data)

    # get CPT
    a = TableCPDFactorization(res)
    #compute the query and evidences as dicts
    query = dict(Overall=1)
    # prepare dictionary of values (dopo gli uguali devi mettere i valori che leggi dalla GUI)

    evidence = dict(Value=inputData[0],
                    Location=inputData[1],
                    Cleanliness=inputData[2],
                    Service=inputData[3],
                    Rooms=inputData[4],
                    bad=inputData[5],
                    old=inputData[6],
                    good=inputData[7],
                    great=inputData[8],
                    comfortable=inputData[9],
                    clean=inputData[10])

    print(query)
    print(evidence)

    #run the query given evidence
    result = a.condprobve(query, evidence)

    print json.dumps(result.vals, indent=2)
    #res.Vdata["Overall"]["vals"][pos]
    #arr=[]
    dizionario = {}
    for i in range(1, 6):
        dizionario[res.Vdata["Overall"]["vals"][i - 1]] = result.vals[i - 1]
    #    arr.append(dizionario)
    #print(str(arr))
    return dizionario
Exemplo n.º 32
0
 def setUp(self):
     skel = GraphSkeleton()
     skel.load("unittestdict.txt")
     skel.toporder()
     nodedata = NodeData.load("unittestdict.txt")
     self.instance = DiscreteBayesianNetwork(nodedata)
Exemplo n.º 33
0
# result = learner.discrete_mle_estimateparams(skel, data)
#
# # output - toggle comment to see
# print json.dumps(result.Vdata, indent=2)

# (5) --------------------------------------------------------------------------
# Compute the probability distribution over a specific node or nodes

# load nodedata and graphskeleton
nd = NodeData()
skel = GraphSkeleton()
nd.load("../tests/unittestdict.txt")
skel.load("../tests/unittestdict.txt")

# toporder graph skeleton
print skel.toporder()

# load evidence
evidence = {"Intelligence": "high"}
query = {"Grade": "A"}

# load bayesian network
bn = DiscreteBayesianNetwork(skel, nd)

# load factorization
fn = TableCPDFactorization(bn)

# # calculate probability distribution
# result = fn.condprobve(query, evidence)
# print json.dumps(result.vals, indent=2)
# print json.dumps(result.scope, indent=2)
Exemplo n.º 34
0
def bn_learn(attr, cicli, passed_file):
    path_to_sentiments = 'sentiment_AFINN'

    print "Using AFINN sentiment dictionary"

    if attr == 0:
        print "Considering tweets' number"
    elif attr == 1:
        print "Considering averaged number of positive, negative and neutral tweets"
    elif attr == 2:
        print "Considering averaged value of positive and negative tweets"
    elif attr == 3:
        print "Considering positive and negative tweets\' increment"
    elif attr == 4:
        print "Considering bullisment index obtained by number of tweets sentiment"
    elif attr == 5:
        print "Considering bullisment index obtained by tweets value of sentiment"

    print "And considering market trend"

    all_data = []
    files = [
        path_to_sentiments + "/" + file
        for file in os.listdir(path_to_sentiments) if file.endswith('.json')
    ]
    for file in files:
        with open(file) as sentiment_file:
            data = json.load(sentiment_file)

            vdata = {}
            if attr == 0:
                vdata["com"] = data["n_tweets"]
            elif attr == 1:
                vdata["pos"] = data["n_pos_ave"]
                vdata["neg"] = data["n_neg_ave"]
                vdata["neu"] = data["n_neu_ave"]
            elif attr == 2:
                vdata["pos"] = data["pos_val_ave"]
                vdata["neg"] = data["neg_val_ave"]
            elif attr == 3:
                vdata["pos"] = data["pos_inc"]
                vdata["neg"] = data["neg_inc"]
            elif attr == 4:
                vdata["com"] = data["bull_ind"]
            elif attr == 5:
                vdata["com"] = data["bull_ind_val"]

            vdata["market"] = data["market_inc"]

            all_data.append(vdata)

    skel = GraphSkeleton()
    if len(all_data[0]) == 2:
        skel.load("network_struct_1_vertex.json")
        print "Loading structure with 2 node"
    elif len(all_data[0]) == 3:
        skel.load("network_struct_2_vertex.json")
        print "Loading structure with 3 node"
    elif len(all_data[0]) == 4:
        skel.load("network_struct_3_vertex.json")
        print "Loading structure with 4 node"
    skel.toporder()

    learner = PGMLearner()
    result = learner.lg_mle_estimateparams(skel, all_data)
    for key in result.Vdata.keys():
        result.Vdata[key]['type'] = 'lg'

    prob_pos = prob_neg = prob_neu = 0
    for data in all_data:
        if data['market'] == 1:
            prob_pos += 1
        elif data['market'] == 0:
            prob_neu += 1
        else:
            prob_neg += 1
    prob_pos = float(prob_pos) / float(len(all_data))
    prob_neg = float(prob_neg) / float(len(all_data))
    prob_neu = float(prob_neu) / float(len(all_data))

    tmp = {}
    tmp['numoutcomes'] = len(all_data)
    tmp['cprob'] = [prob_pos, prob_neg, prob_neu]
    tmp['parents'] = result.Vdata['market']['parents']
    tmp['vals'] = ['positive', 'negative', 'neutral']
    tmp['type'] = 'discrete'
    tmp['children'] = result.Vdata['market']['children']
    result.Vdata['market'] = tmp

    node = Discrete(result.Vdata["market"])
    print "Loading node as Discrete"

    estimated, real = mcmc_json(passed_file, attr, cicli, node)

    return estimated, real
Exemplo n.º 35
0
    def learn(self):
        print "ds: ", len(self.dataset)
        #print self.dataset
        
        data = []
        
        rw = []
        
        
        bestreward = -100
        for seq in self.dataset:
            for state_, action_, reward_ in seq:
                if reward_[0] > bestreward:
                    bestreward = reward_[0]
                
                # find limit for theta
                 
        print "bestrw", bestreward
        nds = []
        lt=[]
        ls = []
        ltv =[]
        lsv=[]
        
        i = 0
        for seq in self.dataset:
            for state_, action_, reward_ in seq:
#                if reward_[0] == 0:
#                    print state_, action_, reward_
                #print state_, reward_
                if reward_[0] == bestreward:
                    ns = (state_, action_[0], reward_[0])
                    nds.append(ns)
#                    print state_[0], state_[2], reward_[0], bestreward
                    
                    t = state_[0]
                    tv= state_[1]
                    
                    s = state_[2] 
                    sv = state_[3]    
                    if t > 0.05:
                        print "hmmm,", i, t
                        #raise Exception(i)
                        
                    i += 1
                    lt.append(t)
                    ls.append(s)
                    ltv.append(tv)
                    lsv.append(sv)
        
        
        limits = dict(theta=[min(lt),max(lt)],s=[min(ls),max(ls)],thetaV=[min(ltv),max(ltv)],sV=[min(lsv),max(lsv)])
        
        print "limits: ", limits
                    
#        print "all good things:", nds
                    
                
                
        #convert ds
        for seq in self.dataset:
            for state_, action_, reward_ in seq:
                
#                sample = dict(theta=state_[0],thetaPrime=state_[1],s=state_[2],sPrime=state_[3],Action=action_[0],Reward=reward_[0])
#
#                
#                dtpo = min( abs(sample["thetaPrime"] - limits["theta"][0]), abs(sample["thetaPrime"] - limits["theta"][1]))
#                dto = min( abs(sample["theta"] - limits["theta"][0]), abs(sample["theta"] - limits["theta"][1]))
#                dspo = min( abs(sample["sPrime"] - limits["s"][0]), abs(sample["sPrime"] - limits["s"][1]))
#                dso = min( abs(sample["s"] - limits["s"][0]), abs(sample["s"] - limits["s"][1]))
#                             
#               #print dspo, dso
#                
#                netsample = dict(theta=sample["theta"],s=sample["s"],Action=sample["Action"],Reward=sample["Reward"])
#                # did this action improve theta or s??
#                if dtpo <= dto or dspo <= dso: #yes it did            
##                    data.append(netsample)
#                    rw.append(sample["Reward"])
                sample = dict(theta=state_[0],thetaV=state_[1],s=state_[2],sV=state_[3],Action=action_[0],Reward=reward_[0])

                #print state_, action_, reward_
                #print sample
                if sample["Reward"] != 990:
                    data.append(sample)
                    if numpy.random.random() >= 9.1:
                        continue
                
                
          

        import matplotlib.pyplot as plt
        import pandas as pd
        df = pd.DataFrame(rw)
#        print df        
        
#        plt.figure()
#        df[0].diff().hist()
        
        # instantiate my learner 
        learner = PGMLearner()
        
        # estimate parameters
        rbn = []
        for i in range(0,1):
            result = learner.lg_constraint_estimatestruct(data,bins=10, pvalparam=0.05)
            rbn.append(result)
            print len(result.E), result.E
            
        result = rbn[0]
        
        # output - toggle comment to see
       

        print json.dumps(result.V, indent=2)
        print len(result.E), "Edges", result.E
        
        import pydot

        # this time, in graph_type we specify we want a DIrected GRAPH
        graph = pydot.Dot(graph_type='digraph')
        nd = {}
        for n in result.V:
            nd[n] = pydot.Node(n)
            graph.add_node(nd[n])
            
        for e in result.E:
            
            graph.add_edge(pydot.Edge(nd[e[0]], nd[e[1]]))
            
        graph.write_png('eg.png')
        from IPython.display import Image
        Image('eg.png')
        
        
        f = open('workfile', 'w')
        f.write("{\n \"V\":")
        f.write(json.dumps(result.V))
        f.write(",\n \"E\":")
        f.write(json.dumps(result.E))
        f.write("}")
        f.close()
        
        skel = GraphSkeleton()
        skel.load("workfile")
        
        # topologically order graphskeleton
        skel.toporder()
        

        return
Exemplo n.º 36
0
# checking if input from user was approppriate
if set(userinput).issubset(dictionary):
    # initializing probabilities lists
    wkdayProbList = []
    hourProbList = []
    locatProbList = []
    activProbList = []

    #INITIALIZING BN 1
    # load nodedata and graphskeleton
    nd1 = NodeData()
    skel1 = GraphSkeleton()
    nd1.load(path_bn1)
    skel1.load(path_bn1)
    skel1.toporder()  # toporder graph skeleton

    #INITIALIZING BN 2
    # load nodedata and graphskeleton
    nd2 = NodeData()
    skel2 = GraphSkeleton()
    nd2.load(path_bn2)
    skel2.load(path_bn2)
    skel2.toporder()  # toporder graph skeleton

    # FINDING NEXT ACTIVITY ATTRIBUTES THROUGH INFERENCE ON BN 1
    # wkday variable query
    evidence1 = dict(wkdayT0=userinput[0])
    for i, item in enumerate(wkdayValsList):
        # loading bayesian network and factorization - needs to be done at every iteration
        bn1 = DiscreteBayesianNetwork(skel1, nd1)
Exemplo n.º 37
0
Arquivo: pgm.py Projeto: ml4ai/b3
class Graph:
    def __init__(self):
        self.node = dict()
        self.obs = dict()

    def addnode(self, node):
        self.node[node.name] = node

    def removeNode(self, name):
        if self.node.has_key(name):
            del self.node[name]

    def addobs(self, node, value):
        self.obs[node.name] = [node, value]

    def removeObs(self, name):
        if self.obs.has_key(name):
            del self.obs[name]

    def setup(self):
        self.nd = NodeData()
        self.skel = GraphSkeleton()
        self.skel.V, self.skel.E = [], []
        self.nd.Vdata = {}
        for i, node in enumerate(self.node.values()):
            dNode = {}
            node.sId = str(i)
            dNode["numoutcomes"] = len(node.values)
            dNode["vals"] = node.values
            dNode["cprob"] = node.cpt
            #             dNode["parents"] = map(lambda x: if x=x.name, node.parents);
            self.skel.V.append(node.name)
            aParents = []
            for parent in node.parents:
                if parent == None: continue
                aParents.append(parent.name)
                self.skel.E.append([parent.name, node.name])
            dNode["parents"] = aParents if len(aParents) > 0 else None
            self.nd.Vdata[node.name] = dNode
        self.skel.toporder()
        self.bn = DiscreteBayesianNetwork(self.skel, self.nd)
        self.fn = TableCPDFactorization(self.bn)

#     def setup(self):
#         self.nd = NodeData();
#         self.skel = GraphSkeleton();
#         self.skel.V, self.skel.E = [], [];
#         self.nd.Vdata = {};
#         for i,node in enumerate(self.node.values()):
#             dNode = {};
#             node.sId = str(i);
#             dNode["numoutcomes"] = len(node.values);
#             dNode["vals"] = node.values;
#             dNode["cprob"] = node.cpt;
# #             dNode["parents"] = map(lambda x: if x=x.name, node.parents);
#             self.skel.V.append(node.name);
#             aParents = [];
#             for parent in node.parents:
#                 if parent==None: continue;
#                 aParents.append(parent.name);
#                 self.skel.E.append([parent.name, node.name]);
#             dNode["parents"] = aParents if len(aParents)>0 else None;
#             self.nd.Vdata[node.name] = dNode;
#         self.skel.toporder();
#         self.bn = DiscreteBayesianNetwork(self.skel, self.nd);
#         self.fn = TableCPDFactorization(self.bn);

    def getPost(self, query, evidence):
        result = self.fn.specificquery(query, evidence)
        return result

    def write2dot(self, fname="graph.dot"):
        f = open(fname, "w")
        f.write("digraph G {\n")
        f.write("node[shape=circle, width=0.4];\n")
        for node in self.node.values():
            l = "\"" + node.name + "\""
            f.write(node.sId)
            if node in map(lambda x: x[0], self.obs):
                f.write("[label=" + l + ",style=filled,color=blue]")
            else:
                f.write("[label=" + l + "]")
            f.write(";\n")
            for parent in node.parents:
                if parent == None: continue
                f.write(parent.sId + " -> " + node.sId + ";\n")
        f.write("}")
        f.close()

    def write2pdf(self, fname="graph.pdf"):
        if ".pdf" in fname:
            fname = fname[:-4]
        pdfFile = fname + ".pdf"
        dotFile = fname + ".dot"
        self.write2dot(dotFile)
        call(['dot', '-Tpdf', dotFile, '-o', pdfFile])