Пример #1
0
def timer(inputfile, trials, datalength):

    # load nodedata and graphskeleton
    nd = NodeData()
    skel = GraphSkeleton()
    #print "bp1"
    nd.load(inputfile)
    #print "bp2"
    skel.load(inputfile)
    #print "bp3"

#    msg = "%d, %d" % (asizeof(nd), asizeof(skel))
 #   print >>op, msg

    # topologically order graphskeleton
    skel.toporder()

    # load bayesian network
    bn = DiscreteBayesianNetwork(skel, nd)

    # instantiate pgm learner
    l = PGMLearner()

    # free unused memory
    del nd
    
    #sum1 = summary.summarize(muppy.get_objects())
    #summary.print_(sum1)
    
    # TIME
    totaltime = 0
    for _ in range(trials): 
        data = bn.randomsample(datalength)
        start = time.clock()
        ret = l.discrete_mle_estimateparams(skel, data)
        elapsed = time.clock() - start
        totaltime += elapsed
    totaltime /= trials


    print json.dumps(ret.Vdata, indent=1)
    return totaltime
Пример #2
0
def timer(inputfile, trials):

    # load nodedata and graphskeleton
    nd = NodeData()
    skel = GraphSkeleton()
    nd.load(inputfile)
    skel.load(inputfile)

    # topologically order graphskeleton
    skel.toporder()

    # load bayesian network
    bn = DiscreteBayesianNetwork(skel, nd)
    
    # TIME
    totaltime = 0
    for _ in range(trials): 
        start = time.clock()
        ret = bn.randomsample(100)
        elapsed = time.clock() - start
        totaltime += elapsed
    totaltime /= trials

    return totaltime
Пример #3
0
    def discrete_mle_estimateparams(self, graphskeleton, data):
        '''
        Estimate parameters for a discrete Bayesian network with a structure given by *graphskeleton* in order to maximize the probability of data given by *data*. This function takes the following arguments:

            1. *graphskeleton* -- An instance of the :doc:`GraphSkeleton <graphskeleton>` class containing vertex and edge data.
            2. *data* -- A list of dicts containing samples from the network in {vertex: value} format. Example::

                    [
                        {
                            'Grade': 'B',
                            'SAT': 'lowscore',
                            ...
                        },
                        ...
                    ]

        This function normalizes the distribution of a node's outcomes for each combination of its parents' outcomes. In doing so it creates an estimated tabular conditional probability distribution for each node. It then instantiates a :doc:`DiscreteBayesianNetwork <discretebayesiannetwork>` instance based on the *graphskeleton*, and modifies that instance's *Vdata* attribute to reflect the estimated CPDs. It then returns the instance. 

        The Vdata attribute instantiated is in the format seen in :doc:`unittestdict`, as described in :doc:`discretebayesiannetwork`.

        Usage example: this would learn parameters from a set of 200 discrete samples::

            import json

            from libpgm.nodedata import NodeData
            from libpgm.graphskeleton import GraphSkeleton
            from libpgm.discretebayesiannetwork import DiscreteBayesianNetwork
            from libpgm.pgmlearner import PGMLearner
            
            # generate some data to use
            nd = NodeData()
            nd.load("../tests/unittestdict.txt")    # an input file
            skel = GraphSkeleton()
            skel.load("../tests/unittestdict.txt")
            skel.toporder()
            bn = DiscreteBayesianNetwork(skel, nd)
            data = bn.randomsample(200)

            # instantiate my learner 
            learner = PGMLearner()

            # estimate parameters from data and skeleton
            result = learner.discrete_mle_estimateparams(skel, data)

            # output
            print json.dumps(result.Vdata, indent=2)

        '''
        assert (isinstance(graphskeleton, GraphSkeleton)), "First arg must be a loaded GraphSkeleton class."
        assert (isinstance(data, list) and data and isinstance(data[0], dict)), "Second arg must be a list of dicts."

        # instantiate Bayesian network, and add parent and children data
        bn = DiscreteBayesianNetwork()
        graphskeleton.toporder()
        bn.V = graphskeleton.V
        bn.E = graphskeleton.E
        bn.Vdata = dict()
        for vertex in bn.V: 
            bn.Vdata[vertex] = dict()
            bn.Vdata[vertex]["children"] = graphskeleton.getchildren(vertex)
            bn.Vdata[vertex]["parents"] = graphskeleton.getparents(vertex)
            
            # make placeholders for vals, cprob, and numoutcomes
            bn.Vdata[vertex]["vals"] = []
            if (bn.Vdata[vertex]["parents"] == []):
                bn.Vdata[vertex]["cprob"] = []
            else:
                bn.Vdata[vertex]["cprob"] = dict()

            bn.Vdata[vertex]["numoutcomes"] = 0

        # determine which outcomes are possible for each node
        for sample in data:
            for vertex in bn.V:
                if (sample[vertex] not in bn.Vdata[vertex]["vals"]):
                    bn.Vdata[vertex]["vals"].append(sample[vertex])
                    bn.Vdata[vertex]["numoutcomes"] += 1

        # lay out probability tables, and put a [num, denom] entry in all spots:

        # define helper function to recursively set up cprob table
        def addlevel(vertex, _dict, key, depth, totaldepth):
            if depth == totaldepth:
                _dict[str(key)] = []
                for _ in range(bn.Vdata[vertex]["numoutcomes"]):
                    _dict[str(key)].append([0, 0])
                return
            else:
                for val in bn.Vdata[bn.Vdata[vertex]["parents"][depth]]["vals"]:
                    ckey = key[:]
                    ckey.append(str(val))
                    addlevel(vertex, _dict, ckey, depth+1, totaldepth)

        # put [0, 0] at each entry of cprob table
        for vertex in bn.V:
            if (bn.Vdata[vertex]["parents"]):
                root = bn.Vdata[vertex]["cprob"]
                numparents = len(bn.Vdata[vertex]["parents"])
                addlevel(vertex, root, [], 0, numparents)
            else:
                for _ in range(bn.Vdata[vertex]["numoutcomes"]):
                    bn.Vdata[vertex]["cprob"].append([0, 0])

        # fill out entries with samples:
        for sample in data:
            for vertex in bn.V:
                    
                # compute index of result
                rindex = bn.Vdata[vertex]["vals"].index(sample[vertex])

                # go to correct place in Vdata
                if bn.Vdata[vertex]["parents"]:
                    pvals = [str(sample[t]) for t in bn.Vdata[vertex]["parents"]]
                    lev = bn.Vdata[vertex]["cprob"][str(pvals)]
                else:
                    lev = bn.Vdata[vertex]["cprob"]

                # increase all denominators for the current condition
                for entry in lev:
                    entry[1] += 1

                # increase numerator for current outcome
                lev[rindex][0] += 1

        # convert arrays to floats
        for vertex in bn.V:
            if not bn.Vdata[vertex]["parents"]:
                bn.Vdata[vertex]["cprob"] = [x[0]/float(x[1]) for x in bn.Vdata[vertex]["cprob"]]
            else:
                for key in bn.Vdata[vertex]["cprob"].keys():
                    try: 
                        bn.Vdata[vertex]["cprob"][key] = [x[0]/float(x[1]) for x in bn.Vdata[vertex]["cprob"][key]]
                        
                    # default to even distribution if no data points
                    except ZeroDivisionError:
                        bn.Vdata[vertex]["cprob"][key] = [1/float(bn.Vdata[vertex]["numoutcomes"]) for x in bn.Vdata[vertex]["cprob"][key]]

        # return cprob table with estimated probability distributions
        return bn