Exemplo n.º 1
0
def estimate_distrib(skel, samples, query, evidence):
    learner = PGMLearner()
    bayesnet = learner.discrete_mle_estimateparams(skel, samples)
    tablecpd = TableCPDFactorization(bayesnet)
    fac = tablecpd.condprobve(query, evidence)
    df2 = printdist(fac, bayesnet)
    return df2
Exemplo n.º 2
0
def learn_net(data):
    '''learns Bayes net on raw data'''
    data_dict = data.to_dict('records')
    learner = PGMLearner()
    skel = learner.discrete_constraint_estimatestruct(data=data_dict,indegree=1)
    skel.toporder()
    disc_bayes_net = learner.discrete_mle_estimateparams(graphskeleton=skel,data=data_dict)
    return disc_bayes_net
Exemplo n.º 3
0
def learn_net_discretize(data, vars_to_discretize, n_bins):
    '''learn Bayes net after selected variables have been discretized'''
    data_subset, bins = discretize(data, vars_to_discretize, n_bins=n_bins)
    data_dict = data_subset.to_dict('records')
    learner = PGMLearner()
    skel = learner.discrete_constraint_estimatestruct(data=data_dict,indegree=1)
    skel.toporder()
    disc_bayes_net = learner.discrete_mle_estimateparams(graphskeleton=skel,data=data_dict)
    return disc_bayes_net, bins
Exemplo n.º 4
0
def test_libpgm(df1):

    data = df1.T.to_dict().values()
    #pprint(data)
    skel = GraphSkeleton()
    skel.load("bn_struct.txt")
    
    learner = PGMLearner()
    result = learner.discrete_mle_estimateparams(skel, data)
    
    print json.dumps(result.Vdata, indent=2)
Exemplo n.º 5
0
def learnBN(fdata_array, bn_file):

    bn_path = os.path.join(experiment_dir, 'parameters', bn_file + '.txt')

    skel = GraphSkeleton()
    skel.load(bn_path)
    skel.toporder()

    learner = PGMLearner()
    bn = learner.discrete_mle_estimateparams(skel, fdata_array)

    return bn
def getBNparams(graph, ddata, n):
    # Gets Disc. BN parameters given a graph skeleton
    #skeleton should include t-1 and t nodes for each variable
    nodes = range(1, (n * 2) + 1)
    nodes = map(str, nodes)
    edges = gk.edgelist(graph)
    for i in range(len(edges)):
        edges[i] = list([edges[i][0], str(n + int(edges[i][1]))])
    skel = GraphSkeleton()
    skel.V = nodes
    skel.E = edges
    learner = PGMLearner()
    result = learner.discrete_mle_estimateparams(skel, ddata)
    return result
Exemplo n.º 7
0
def em(data, bn, skel):
    lk_last = 100
    times = 0
    while 1:
        d2 = data_with_hidden(data, bn)
        learner = PGMLearner()  #toolbox
        bn = learner.discrete_mle_estimateparams(skel, d2)  #toolbox
        lk = likelihood(d2, bn)
        print "LogLikelihood:", lk
        times += 1

        if abs((lk - lk_last) / lk_last) < 0.001:
            break
        lk_last = lk
    print times
    return bn
def em(data,bn,skel):
    lk_last=100
    times=0
    while 1:
        d2=data_with_hidden(data,bn)
        learner = PGMLearner()#toolbox
        bn=learner.discrete_mle_estimateparams(skel,d2)#toolbox
        lk=likelihood(d2,bn)
        print "LogLikelihood:", lk
        times +=1

        if abs((lk-lk_last)/lk_last)<0.001:
            break
        lk_last=lk
    print times
    return bn
Exemplo n.º 9
0
def calc_accuracy(dff_train, dff_train_target, nb_iterations):
    
    result = np.zeros(nb_iterations)

    for itera in range(nb_iterations):
        XX_train, XX_test, yy_train, yy_test = train_test_split(dff_train, dff_train_target, test_size=0.33)
        data4bn = format_data(XX_train)
        learner = PGMLearner()
        # estimate parameters
        result_bn = learner.discrete_mle_estimateparams(skel, data4bn)
        #result_bn.Vdata
        result_predict = calc_BNprob(XX_test)
        BN_test_probs = pd.DataFrame()
        BN_test_probs['ground_truth'] = yy_test
        Test_prob = pd.concat([yy_test.reset_index().Surv, result_predict],  axis = 1, ignore_index = True)                    .rename(columns = {0:'ground_truth' , 1:'class_resu'})
        accuracy = Test_prob[Test_prob.ground_truth == Test_prob.class_resu].shape[0]/(1.0*Test_prob.shape[0])
        #print("Accuracy is {}").format(accuracy)
        result[itera] = accuracy
        
    return result
Exemplo n.º 10
0
# (8) --------------------------------------------------------------------------
# Learn the CPDs of a discrete Bayesian network, given data and a structure:

# say I have some data
data = bn.randomsample(200)

# and a graphskeleton
skel = GraphSkeleton()
skel.load("../tests/unittestdict.txt")

# instantiate my learner 
learner = PGMLearner()

# estimate parameters
result = learner.discrete_mle_estimateparams(skel, data)

# output - toggle comment to see
#print json.dumps(result.Vdata, indent=2)

# (9) -------------------------------------------------------------------------
# Learn the structure of a discrete Bayesian network, given only data:

# say I have some data
data = bn.randomsample(2000)

# instantiate my learner 
learner = PGMLearner()

# estimate parameters
result = learner.discrete_constraint_estimatestruct(data)
Exemplo n.º 11
0
def fun(inputData):

    #Defining formatting data method
    def format_data(df):
        result = []
        for row in df.itertuples():
            #print(row.Pclass)
            result.append(
                dict(great=row.great,
                     good=row.good,
                     clean=row.clean,
                     comfortable=row.comfortable,
                     bad=row.bad,
                     old=row.old,
                     Cleanliness=row.Cleanliness,
                     Location=row.Location,
                     Service=row.Service,
                     Rooms=row.Rooms,
                     Value=row.Value,
                     Overall=row.Overall))
        return result

    #load all preprocessed training data
    df = pd.read_csv('features.csv', sep=',')

    #format data to let them correctly processed by libpgm functions
    node_data = format_data(df)

    skel = GraphSkeleton()
    #load structure of our net
    skel.load("./our-skel.txt")
    #setting the topologic order
    skel.toporder()
    #learner which will estimate parameters e if needed net structure
    learner = PGMLearner()

    #estismting parameters for our own model
    res = learner.discrete_mle_estimateparams(skel, node_data)

    # get CPT
    a = TableCPDFactorization(res)
    #compute the query and evidences as dicts
    query = dict(Overall=1)
    # prepare dictionary of values (dopo gli uguali devi mettere i valori che leggi dalla GUI)

    evidence = dict(Value=inputData[0],
                    Location=inputData[1],
                    Cleanliness=inputData[2],
                    Service=inputData[3],
                    Rooms=inputData[4],
                    bad=inputData[5],
                    old=inputData[6],
                    good=inputData[7],
                    great=inputData[8],
                    comfortable=inputData[9],
                    clean=inputData[10])

    print(query)
    print(evidence)

    #run the query given evidence
    result = a.condprobve(query, evidence)

    print json.dumps(result.vals, indent=2)
    #res.Vdata["Overall"]["vals"][pos]
    #arr=[]
    dizionario = {}
    for i in range(1, 6):
        dizionario[res.Vdata["Overall"]["vals"][i - 1]] = result.vals[i - 1]
    #    arr.append(dizionario)
    #print(str(arr))
    return dizionario
Exemplo n.º 12
0

nd       = NodeData()
skel     = GraphSkeleton()

#The structure is defined in the file titanic_skel
jsonpath ="titanic_skel.json"
skel.load(jsonpath)

#instatiate the learner
learner = PGMLearner()

# The methos estimates the parameters for a discrete Bayesian network with
# a structure given by graphskeleton in order to maximize the probability 
# of data given by data
result_params = learner.discrete_mle_estimateparams(skel, training_data)

result_params.Vdata['Class']# to inspect the network


# Check the prediction accuracy

# In[ ]:


#results = calc_accuracy(dff_train, dff_train_target, 100)
#plt.hist(results, bins='auto')
calc_accuracy(df_train, df_train_target, 1)


# ## Learning the structure
Exemplo n.º 13
0
class TestPGMLearner(unittest.TestCase):
    
    def setUp(self):
        # instantiate learner
        self.l = PGMLearner()

        # generate graph skeleton
        skel = GraphSkeleton()
        skel.load("unittestdict.txt")
        skel.toporder()

        # generate sample sequence to try to learn from - discrete
        nd = NodeData()
        nd.load("unittestdict.txt")
        self.samplediscbn = DiscreteBayesianNetwork(skel, nd)
        self.samplediscseq = self.samplediscbn.randomsample(5000)

        # generate sample sequence to try to learn from - discrete
        nda = NodeData()
        nda.load("unittestlgdict.txt")
        self.samplelgbn = LGBayesianNetwork(skel, nda)
        self.samplelgseq = self.samplelgbn.randomsample(10000)

        self.skel = skel

    def test_discrete_mle_estimateparams(self):
        result = self.l.discrete_mle_estimateparams(self.skel, self.samplediscseq)
        indexa = result.Vdata['SAT']['vals'].index('lowscore')
        self.assertTrue(result.Vdata['SAT']['cprob']["['low']"][indexa] < 1 and result.Vdata['SAT']['cprob']["['low']"][indexa] > .9)
        indexb = result.Vdata['Letter']['vals'].index('weak')
        self.assertTrue(result.Vdata['Letter']['cprob']["['A']"][indexb] < .15 and result.Vdata['Letter']['cprob']["['A']"][indexb] > .05)

    def test_lg_mle_estimateparams(self):
        result = self.l.lg_mle_estimateparams(self.skel, self.samplelgseq)
        self.assertTrue(result.Vdata['SAT']['mean_base'] < 15 and result.Vdata['SAT']['mean_base'] > 5)
        self.assertTrue(result.Vdata['Letter']['variance'] < 15 and result.Vdata['Letter']['variance'] > 5)

    def test_discrete_constraint_estimatestruct(self):
        result = self.l.discrete_constraint_estimatestruct(self.samplediscseq)
        self.assertTrue(["Difficulty", "Grade"] in result.E)

    def test_lg_constraint_estimatestruct(self):
        result = self.l.lg_constraint_estimatestruct(self.samplelgseq)
        self.assertTrue(["Intelligence", "Grade"] in result.E)

    def test_discrete_condind(self):
        chi, pv, witness = self.l.discrete_condind(self.samplediscseq, "Difficulty", "Letter", ["Grade"])
        self.assertTrue(pv > .05)
        self.assertTrue(witness, ["Grade"])
        chia, pva, witnessa = self.l.discrete_condind(self.samplediscseq, "Difficulty", "Intelligence", [])  
        self.assertTrue(pva < .05)

    def test_discrete_estimatebn(self):
        result = self.l.discrete_estimatebn(self.samplediscseq)
        self.assertTrue(result.V)
        self.assertTrue(result.E)
        self.assertTrue(result.Vdata["Difficulty"]["cprob"][0])

    def test_lg_estimatebn(self):
        result = self.l.lg_estimatebn(self.samplelgseq)
        self.assertTrue(result.V)
        self.assertTrue(result.E)
        self.assertTrue(result.Vdata["Intelligence"]["mean_base"])
def learnDiscreteBN_with_structure(df, continous_columns, features_column_names, label_column='cat',
                                   draw_network=False):
    features_df = df.copy()
    features_df = features_df.drop(label_column, axis=1)

    labels_df = DataFrame()
    labels_df[label_column] = df[label_column].copy()

    for i in continous_columns:
        bins = np.arange((min(features_df[i])), (max(features_df[i])),
                         ((max(features_df[i]) - min(features_df[i])) / 5.0))
        features_df[i] = pandas.np.digitize(features_df[i], bins=bins)

    data = []
    for index, row in features_df.iterrows():
        dict = {}
        for i in features_column_names:
            dict[i] = row[i]
        dict[label_column] = labels_df[label_column][index]
        data.append(dict)

    print "Init done"
    learner = PGMLearner()

    graph = GraphSkeleton()

    graph.V = []
    graph.E = []

    graph.V.append(label_column)

    for vertice in features_column_names:
        graph.V.append(vertice)
        graph.E.append([vertice, label_column])

    test = learner.discrete_mle_estimateparams(graphskeleton=graph, data=data)

    print "done learning"

    edges = test.E
    vertices = test.V
    probas = test.Vdata

    # print probas

    dot_string = 'digraph BN{\n'
    dot_string += 'node[fontname="Arial"];\n'

    dataframes = {}

    print "save data"
    for vertice in vertices:
        print "New vertice: " + str(vertice)
        dataframe = DataFrame()

        pp = pprint.PrettyPrinter(indent=4)
        # pp.pprint(probas[vertice])
        dot_string += vertice.replace(" ", "_") + ' [label="' + vertice + '\n' + '" ]; \n'

        if len(probas[vertice]['parents']) == 0:
            dataframe['Outcome'] = None
            dataframe['Probability'] = None
            vertex_dict = {}
            for index_outcome, outcome in enumerate(probas[vertice]['vals']):
                vertex_dict[str(outcome)] = probas[vertice]["cprob"][index_outcome]

            od = collections.OrderedDict(sorted(vertex_dict.items()))
            # print "Vertice: " + str(vertice)
            # print "%-7s|%-11s" % ("Outcome", "Probability")
            # print "-------------------"
            for k, v in od.iteritems():
                # print "%-7s|%-11s" % (str(k), str(round(v, 3)))
                dataframe.loc[len(dataframe)] = [k, v]
            dataframes[vertice] = dataframe
        else:
            # pp.pprint(probas[vertice])
            dataframe['Outcome'] = None

            vertexen = {}
            for index_outcome, outcome in enumerate(probas[vertice]['vals']):
                temp = []
                for parent_index, parent in enumerate(probas[vertice]["parents"]):
                    # print str([str(float(index_outcome))])
                    temp = probas[vertice]["cprob"]
                    dataframe[parent] = None
                vertexen[str(outcome)] = temp

            dataframe['Probability'] = None
            od = collections.OrderedDict(sorted(vertexen.items()))

            # [str(float(i)) for i in ast.literal_eval(key)]


            # str(v[key][int(float(k))-1])

            # print "Vertice: " + str(vertice) + " with parents: " + str(probas[vertice]['parents'])
            # print "Outcome" + "\t\t" + '\t\t'.join(probas[vertice]['parents']) + "\t\tProbability"
            # print "------------" * len(probas[vertice]['parents']) *3
            # pp.pprint(od.values())

            counter = 0
            # print number_of_cols
            for outcome, cprobs in od.iteritems():
                for key in cprobs.keys():
                    array_frame = []
                    array_frame.append((outcome))
                    print_string = str(outcome) + "\t\t"
                    for parent_value, parent in enumerate([i for i in ast.literal_eval(key)]):
                        # print "parent-value:"+str(parent_value)
                        # print "parten:"+str(parent)
                        array_frame.append(int(float(parent)))
                        # print "lengte array_frame: "+str(len(array_frame))
                        print_string += parent + "\t\t"
                    array_frame.append(cprobs[key][counter])
                    # print "lengte array_frame (2): "+str(len(array_frame))
                    # print  cprobs[key][counter]
                    print_string += str(cprobs[key][counter]) + "\t"
                    # for stront in [str(round(float(i), 3)) for i in ast.literal_eval(key)]:
                    #     print_string += stront + "\t\t"
                    # print "print string: " + print_string
                    # print "array_frame:" + str(array_frame)
                    dataframe.loc[len(dataframe)] = array_frame
                counter += 1
        print "Vertice " + str(vertice) + " done"
        dataframes[vertice] = dataframe

    for edge in edges:
        dot_string += edge[0].replace(" ", "_") + ' -> ' + edge[1].replace(" ", "_") + ';\n'

    dot_string += '}'
    # src = Source(dot_string)
    # src.render('../data/BN', view=draw_network)
    # src.render('../data/BN', view=False)
    print "vizualisation done"
    return dataframes
from libpgm.tablecpdfactorization import TableCPDFactorization
from libpgm.pgmlearner import PGMLearner

text = open("../unifiedMLData2.json")
data=text.read()
printable = set(string.printable)
asciiData=filter(lambda x: x in printable, data)

listofDicts=json.loads(asciiData)

skel = GraphSkeleton()
skel.load("../skeleton.json")

learner = PGMLearner()

result = learner.discrete_mle_estimateparams(skel, listofDicts)

tcf=TableCPDFactorization(result)

#Rating 1 Given Genre  is Drama
myquery = dict(rating=[1])
myevidence = dict(genre='Drama')
result=tcf.specificquery(query=myquery,evidence=myevidence)
print result


tcf.refresh()

#Rating 2 Given Genre  is Drama
myquery = dict(rating=[2])
myevidence = dict(genre='Drama')
Exemplo n.º 16
0
class TestPGMLearner(unittest.TestCase):

    def setUp(self):
        # instantiate learner
        self.l = PGMLearner()

        # generate graph skeleton
        skel = GraphSkeleton()
        skel.load("unittestdict.txt")
        skel.toporder()

        # generate sample sequence to try to learn from - discrete
        nd = NodeData()
        nd.load("unittestdict.txt")
        self.samplediscbn = DiscreteBayesianNetwork(skel, nd)
        self.samplediscseq = self.samplediscbn.randomsample(5000)

        # generate sample sequence to try to learn from - discrete
        nda = NodeData()
        nda.load("unittestlgdict.txt")
        self.samplelgbn = LGBayesianNetwork(skel, nda)
        self.samplelgseq = self.samplelgbn.randomsample(10000)

        self.skel = skel

    def test_discrete_mle_estimateparams(self):
        result = self.l.discrete_mle_estimateparams(self.skel, self.samplediscseq)
        indexa = result.Vdata['SAT']['vals'].index('lowscore')
        self.assertTrue(result.Vdata['SAT']['cprob']["['low']"][indexa] < 1 and result.Vdata['SAT']['cprob']["['low']"][indexa] > .9)
        indexb = result.Vdata['Letter']['vals'].index('weak')
        self.assertTrue(result.Vdata['Letter']['cprob']["['A']"][indexb] < .15 and result.Vdata['Letter']['cprob']["['A']"][indexb] > .05)

    def test_lg_mle_estimateparams(self):
        result = self.l.lg_mle_estimateparams(self.skel, self.samplelgseq)
        self.assertTrue(result.Vdata['SAT']['mean_base'] < 15 and result.Vdata['SAT']['mean_base'] > 5)
        self.assertTrue(result.Vdata['Letter']['variance'] < 15 and result.Vdata['Letter']['variance'] > 5)

    def test_discrete_constraint_estimatestruct(self):
        result = self.l.discrete_constraint_estimatestruct(self.samplediscseq)
        self.assertTrue(["Difficulty", "Grade"] in result.E)

    def test_lg_constraint_estimatestruct(self):
        result = self.l.lg_constraint_estimatestruct(self.samplelgseq)
        self.assertTrue(["Intelligence", "Grade"] in result.E)

    def test_discrete_condind(self):
        chi, pv, witness = self.l.discrete_condind(self.samplediscseq, "Difficulty", "Letter", ["Grade"])
        self.assertTrue(pv > .05)
        self.assertTrue(witness, ["Grade"])
        chia, pva, witnessa = self.l.discrete_condind(self.samplediscseq, "Difficulty", "Intelligence", [])
        self.assertTrue(pva < .05)

    def test_discrete_estimatebn(self):
        result = self.l.discrete_estimatebn(self.samplediscseq)
        self.assertTrue(result.V)
        self.assertTrue(result.E)
        self.assertTrue(result.Vdata["Difficulty"]["cprob"][0])

    def test_lg_estimatebn(self):
        result = self.l.lg_estimatebn(self.samplelgseq)
        self.assertTrue(result.V)
        self.assertTrue(result.E)
        self.assertTrue(result.Vdata["Intelligence"]["mean_base"])
Exemplo n.º 17
0
class PGMLearnerServer(object):
    def __init__(self):
        self.learner = PGMLearner()
        rospy.Service("~discrete/parameter_estimation",
                      DiscreteParameterEstimation,
                      self.discrete_parameter_estimation_cb)
        rospy.Service("~discrete/query", DiscreteQuery, self.discrete_query_cb)
        rospy.Service("~discrete/structure_estimation",
                      DiscreteStructureEstimation,
                      self.discrete_structure_estimation_cb)
        rospy.Service("~linear_gaussian/parameter_estimation",
                      LinearGaussianParameterEstimation,
                      self.lg_parameter_estimation_cb)
        rospy.Service("~linear_gaussian/structure_estimation",
                      LinearGaussianStructureEstimation,
                      self.lg_structure_estimation_cb)

    def discrete_parameter_estimation_cb(self, req):
        skel = U.graph_skeleton_from_ros(req.graph)
        skel.toporder()
        data = U.graph_states_dict_from_ros(req.states)
        res = self.learner.discrete_mle_estimateparams(skel, data)
        return DiscreteParameterEstimationResponse(
            U.discrete_nodes_to_ros(res.Vdata))

    def discrete_query_cb(self, req):
        nd = U.discrete_nodedata_from_ros(req.nodes)
        skel = U.graph_skeleton_from_node_data(nd)
        skel.toporder()
        bn = DiscreteBayesianNetwork(skel, nd)
        fn = TableCPDFactorization(bn)
        q = {n: nd.Vdata[n]["vals"] for n in req.query}
        ev = {ns.node: ns.state for ns in req.evidence}

        rospy.loginfo("resolving query %s with evidence %s" % (q, ev))
        ans = fn.condprobve(query=q, evidence=ev)
        rospy.loginfo("%s -> %s" % (ans.scope, ans.vals))
        res = DiscreteQueryResponse()
        node = DiscreteNode()
        node.name = ans.scope[0]
        node.outcomes = q[node.name]
        node.CPT.append(ConditionalProbability(node.outcomes, ans.vals))
        res.nodes.append(node)
        return res

    def discrete_structure_estimation_cb(self, req):
        states = [{ns.node: ns.state
                   for ns in s.node_states} for s in req.states]
        pvalparam = 0.05  # default value
        indegree = 1  # default value
        if req.pvalparam != 0.0:
            pvalparam = req.pvalparam
        if req.indegree != 0:
            indegree = req.indegree
        res = self.learner.discrete_constraint_estimatestruct(
            states, pvalparam=pvalparam, indegree=indegree)
        return DiscreteStructureEstimationResponse(
            U.graph_skeleton_to_ros(res))

    def lg_parameter_estimation_cb(self, req):
        skel = U.graph_skeleton_from_ros(req.graph)
        skel.toporder()
        data = U.graph_states_dict_from_ros(req.states)
        res = self.learner.lg_mle_estimateparams(skel, data)
        rospy.logdebug("parameter estimation: %s" % res.Vdata)
        return LinearGaussianParameterEstimationResponse(
            U.linear_gaussian_nodes_to_ros(res.Vdata))

    def lg_structure_estimation_cb(self, req):
        states = [{ns.node: ns.state
                   for ns in s.node_states} for s in req.states]
        rospy.logdebug(states)
        pvalparam = 0.05  # default value
        bins = 10  # default value
        indegree = 1  # default value
        if req.pvalparam != 0.0:
            pvalparam = req.pvalparam
        if req.bins != 0:
            bins = req.bins
        if req.indegree != 0:
            indegree = req.indegree
        rospy.logdebug("bins: %d, pvalparam: %f, indegree: %d" %
                       (bins, pvalparam, indegree))
        res = self.learner.lg_constraint_estimatestruct(states,
                                                        pvalparam=pvalparam,
                                                        bins=bins,
                                                        indegree=indegree)
        rospy.logdebug("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
        rospy.logdebug(res.V)
        rospy.logdebug(res.E)
        return LinearGaussianStructureEstimationResponse(
            U.graph_skeleton_to_ros(res))
Exemplo n.º 18
0
            di['HandStrength'] = 'Medium'
        elif (di['HandStrength'] == 'TwoPairs'
              or (di['HandStrength'] == 'OnePair' and
                  (di['Rank'] == 'A' or di['Rank'] == 'K' or di['Rank'] == 'Q'
                   or di['Rank'] == 'J'))):
            di['HandStrength'] = 'Weak'
        else:
            di['HandStrength'] = 'VeryWeak'
print('###################    PART A    ################################')
print('*************** Network Parameters for BN agent1*****************')
for skeleton in ["Poker_Network.txt"
                 ]:  # loading skeleton of Network from given-file
    skel = GraphSkeleton()
    skel.load(skeleton)
    learner = PGMLearner()
    result = learner.discrete_mle_estimateparams(skel, mdata)
    print json.dumps(result.Vdata, indent=2)

print('*************** Network Parameters for BN agent2*****************')
for skeleton in ["Poker_Network.txt"]:
    skel = GraphSkeleton()
    skel.load(skeleton)
    learner = PGMLearner()
    result = learner.discrete_mle_estimateparams(skel, mdata2)
    print json.dumps(result.Vdata, indent=2)

print('########################## PART B    ################################')
print('**************  NB1 parameters for agent 1  *************************')
for skeleton in ["Poker_Network1.txt"]:
    skel = GraphSkeleton()
    skel.load(skeleton)
Exemplo n.º 19
0
#load all preprocessed training data
df = pd.read_csv('features.csv', sep=',')

#format data to let them correctly processed by libpgm functions
node_data = format_data(df)

skel = GraphSkeleton()
#load structure of our net
skel.load("./skel-learned2.txt")
#setting the topologic order
skel.toporder()
#learner which will estimate parameters e if needed net structure
learner = PGMLearner()

#estismting parameters for our own model
res = learner.discrete_mle_estimateparams(skel, node_data)
"""
#estimating net structure given training data and paramenters this is an alternative to create a new model on our data
net = learner.discrete_estimatebn(node_data)

with open("reteTestMeta.csv", "a") as gv:
  gv.write(json.dumps(net.V, indent=2))
  gv.write(json.dumps(net.E, indent=2))  
res = learner.discrete_mle_estimateparams(net, node_data)
with open("modelloMeta.csv", "a") as gv:
  gv.write(json.dumps(res.E, indent=2))
  gv.write(json.dumps(res.Vdata, indent=2))  
"""
#compute performances for each oveall score
for score in range(1, 6):
    target = []
Exemplo n.º 20
0
# (8) --------------------------------------------------------------------------
# Learn the CPDs of a discrete Bayesian network, given data and a structure:

# say I have some data
data = bn.randomsample(200)

# and a graphskeleton
skel = GraphSkeleton()
skel.load("../tests/unittestdict.txt")

# instantiate my learner
learner = PGMLearner()

# estimate parameters
result = learner.discrete_mle_estimateparams(skel, data)

# output - toggle comment to see
#print json.dumps(result.Vdata, indent=2)

# (9) -------------------------------------------------------------------------
# Learn the structure of a discrete Bayesian network, given only data:

# say I have some data
data = bn.randomsample(2000)

# instantiate my learner
learner = PGMLearner()

# estimate parameters
result = learner.discrete_constraint_estimatestruct(data)
Exemplo n.º 21
0
from libpgm.pgmlearner import PGMLearner

text = open("../unifiedMLData2.json")
data = text.read()
printable = set(string.printable)
asciiData = filter(lambda x: x in printable, data)

#listofDicts=json.dumps(data)
listofDicts = json.loads(asciiData)

skel = GraphSkeleton()
skel.load("../skeleton.json")

learner = PGMLearner()

result = learner.discrete_mle_estimateparams(skel, listofDicts)

tcf = TableCPDFactorization(result)

#Rating 1 Given Occupation is student
myquery = dict(rating=[1])
myevidence = dict(occupation='student')
result = tcf.specificquery(query=myquery, evidence=myevidence)
print result

tcf.refresh()

#Rating 2 Given Occupation is student
myquery = dict(rating=[2])
myevidence = dict(occupation='student')
result = tcf.specificquery(query=myquery, evidence=myevidence)
Exemplo n.º 22
0
def learnDiscreteBN_with_structure(df,
                                   continous_columns,
                                   features_column_names,
                                   label_column='cat',
                                   draw_network=False):
    features_df = df.copy()
    features_df = features_df.drop(label_column, axis=1)

    labels_df = DataFrame()
    labels_df[label_column] = df[label_column].copy()

    for i in continous_columns:
        bins = np.arange((min(features_df[i])), (max(features_df[i])),
                         ((max(features_df[i]) - min(features_df[i])) / 5.0))
        features_df[i] = pandas.np.digitize(features_df[i], bins=bins)

    data = []
    for index, row in features_df.iterrows():
        dict = {}
        for i in features_column_names:
            dict[i] = row[i]
        dict[label_column] = labels_df[label_column][index]
        data.append(dict)

    print "Init done"
    learner = PGMLearner()

    graph = GraphSkeleton()

    graph.V = []
    graph.E = []

    graph.V.append(label_column)

    for vertice in features_column_names:
        graph.V.append(vertice)
        graph.E.append([vertice, label_column])

    test = learner.discrete_mle_estimateparams(graphskeleton=graph, data=data)

    print "done learning"

    edges = test.E
    vertices = test.V
    probas = test.Vdata

    # print probas

    dot_string = 'digraph BN{\n'
    dot_string += 'node[fontname="Arial"];\n'

    dataframes = {}

    print "save data"
    for vertice in vertices:
        print "New vertice: " + str(vertice)
        dataframe = DataFrame()

        pp = pprint.PrettyPrinter(indent=4)
        # pp.pprint(probas[vertice])
        dot_string += vertice.replace(
            " ", "_") + ' [label="' + vertice + '\n' + '" ]; \n'

        if len(probas[vertice]['parents']) == 0:
            dataframe['Outcome'] = None
            dataframe['Probability'] = None
            vertex_dict = {}
            for index_outcome, outcome in enumerate(probas[vertice]['vals']):
                vertex_dict[str(
                    outcome)] = probas[vertice]["cprob"][index_outcome]

            od = collections.OrderedDict(sorted(vertex_dict.items()))
            # print "Vertice: " + str(vertice)
            # print "%-7s|%-11s" % ("Outcome", "Probability")
            # print "-------------------"
            for k, v in od.iteritems():
                # print "%-7s|%-11s" % (str(k), str(round(v, 3)))
                dataframe.loc[len(dataframe)] = [k, v]
            dataframes[vertice] = dataframe
        else:
            # pp.pprint(probas[vertice])
            dataframe['Outcome'] = None

            vertexen = {}
            for index_outcome, outcome in enumerate(probas[vertice]['vals']):
                temp = []
                for parent_index, parent in enumerate(
                        probas[vertice]["parents"]):
                    # print str([str(float(index_outcome))])
                    temp = probas[vertice]["cprob"]
                    dataframe[parent] = None
                vertexen[str(outcome)] = temp

            dataframe['Probability'] = None
            od = collections.OrderedDict(sorted(vertexen.items()))

            # [str(float(i)) for i in ast.literal_eval(key)]

            # str(v[key][int(float(k))-1])

            # print "Vertice: " + str(vertice) + " with parents: " + str(probas[vertice]['parents'])
            # print "Outcome" + "\t\t" + '\t\t'.join(probas[vertice]['parents']) + "\t\tProbability"
            # print "------------" * len(probas[vertice]['parents']) *3
            # pp.pprint(od.values())

            counter = 0
            # print number_of_cols
            for outcome, cprobs in od.iteritems():
                for key in cprobs.keys():
                    array_frame = []
                    array_frame.append((outcome))
                    print_string = str(outcome) + "\t\t"
                    for parent_value, parent in enumerate(
                        [i for i in ast.literal_eval(key)]):
                        # print "parent-value:"+str(parent_value)
                        # print "parten:"+str(parent)
                        array_frame.append(int(float(parent)))
                        # print "lengte array_frame: "+str(len(array_frame))
                        print_string += parent + "\t\t"
                    array_frame.append(cprobs[key][counter])
                    # print "lengte array_frame (2): "+str(len(array_frame))
                    # print  cprobs[key][counter]
                    print_string += str(cprobs[key][counter]) + "\t"
                    # for stront in [str(round(float(i), 3)) for i in ast.literal_eval(key)]:
                    #     print_string += stront + "\t\t"
                    # print "print string: " + print_string
                    # print "array_frame:" + str(array_frame)
                    dataframe.loc[len(dataframe)] = array_frame
                counter += 1
        print "Vertice " + str(vertice) + " done"
        dataframes[vertice] = dataframe

    for edge in edges:
        dot_string += edge[0].replace(" ", "_") + ' -> ' + edge[1].replace(
            " ", "_") + ';\n'

    dot_string += '}'
    # src = Source(dot_string)
    # src.render('../data/BN', view=draw_network)
    # src.render('../data/BN', view=False)
    print "vizualisation done"
    return dataframes