示例#1
0
    def simple_search(self, max_edges=None, tol=10**-6):
        """ Simple data driven structure learning search.
                        max_edges is the upper bound on the number of edges added to the BN.
                """

        # If max_edges is not provided try to add up to twice as many edges as
        # there are nodes
        if max_edges is None: max_edges = 2 * self.node_index.size

        for i in self.node_index:
            self.p_score(i)
        best_deltas = np.amax(self.delta_cache, axis=1)
        cnode = np.argmax(best_deltas)
        pnode = np.argmax(self.delta_cache[cnode, :])
        cur_iter = 0
        while np.max(best_deltas) > tol and cur_iter < max_edges:
            print('iteration %s' % cur_iter)
            print(best_deltas[cnode], cnode, self.delta_index[cnode, pnode])
            self.add_edge_and_sync(cnode, pnode)

            best_deltas = np.amax(self.delta_cache, axis=1)
            cnode = np.argmax(best_deltas)
            pnode = np.argmax(self.delta_cache[cnode, :])
            cur_iter += 1
        return self.net_score, np.sum(np.diag(self.scores))
示例#2
0
    def score_net(self):
        """ Score the constructed BN and do nothing else """

        score = 0
        for child in self.node_index:
            subset = [child] + self.BN.pnodes[child]
            self.scores[child] = self.objfunc(self.data[:, subset],
                                              self.arity[subset])
        self.net_score = np.sum(self.scores)
        return self.net_score
示例#3
0
 def gsrestarts(self, nrestarts=10, tol=10**-6):
     """
             Stochastically perturbed descent search - the primary general searching
             method. Attempts to improve optimality of high order relations and
             to avoid potentially present local minima.
             """
     self.grad_search()
     tmpBN = deepcopy(self.BN)
     tmpscore = np.sum(self.scores)
     for iter in range(nrestarts):
         [self.BN.remove_random_edge(self.remove_edge_and_sync) for i in \
         range(np.random.randint(1,self.arity.size/2))]
         self.score_net()
         self.grad_search()
         current_score = np.sum(self.scores)
         if current_score > tmpscore:
             print('found')
             tmpBN = deepcopy(self.BN)
             tmpscore = current_score
     self.BN = deepcopy(tmpBN)
     self.score_net()
示例#4
0
    def __init__(self, dt, objfunc='bdm', cache_size=1):
        """
                dt: dataset instance of dutils.dataset
                objfunc: 'bdm' or 'mdl', defalut is 'bdm' 
                If C extension is properly compiled 'cmdla' (AIC) and
                'cmdlb' (BIC)options may also be availabe
                                
                """

        self.objfunc = eval(objfunc)

        self.data = dt.data
        self.arity = dt.arity
        self.variables = dt.variables

        self.BN = bnet(self.variables)

        node_index = np.asarray([i for i, j in enumerate(self.variables)])
        self.node_index = node_index

        self.scores = [
            self.objfunc(self.data[:, [i]], self.arity[[i]])
            for i in node_index
        ]
        self.net_score = np.sum(self.scores)

        self.cache_size = self.arity.size
        if cache_size:
            self.cache_size = cache_size

        self.delta_cache = np.zeros((node_index.size, self.cache_size))
        self.delta_index = np.zeros((node_index.size, self.cache_size),
                                    dtype=np.int)
        self.delta_tmp = np.zeros(node_index.size)

        self.remove_deltas = np.zeros(
            node_index.size)  #[0 for i in node_index]
        #self.remove_candidates=np.zeros(node_index.size,dtype=np.int)
        self.remove_candidates = [[] for i in node_index]
        #self.rdelta_cache=np.zeros((node_index.size,cache_size))
        #self.rdelta_index=np.zeros((node_index.size,cache_size),dtype=np.int)
        #self.rdelta_tmp=np.zeros(node_index.size)

        for i in self.node_index:
            self.p_score(i)
            self.reverse_p_score(i)
示例#5
0
    def grad_search(self, max_edges=None, tol=10**-6):
        """ 
                Simple data driven structure learning search.
                max_edges is the upper bound on the number of edges
                added to the BN. Should NOT be used as a general search
                (without understanding of the limitations of max descent
                searching techniqes),
                designed to be a subroutine for a more sophisticated method.
                """

        # If max_edges is not provided try to add up to three timese as many edges as
        # there are nodes
        if max_edges is None: max_edges = 3 * self.node_index.size

        best_deltas = np.amax(self.delta_cache, axis=1)
        #cnode=np.argmax(best_deltas)
        #pnode=np.argmax(self.delta_cache[cnode,:])
        cur_iter = 0
        while max(np.max(best_deltas),np.max(self.remove_deltas))>tol \
                and cur_iter<max_edges:
            #print( 'iteration %s' %cur_iter)
            if max(best_deltas) > max(self.remove_deltas):
                cnode = np.argmax(best_deltas)
                pnode = np.argmax(self.delta_cache[cnode, :])
                #print("adding edge (%d %d) with %f" \
                #       %(cnode,self.delta_index[cnode,pnode],best_deltas[cnode]))
                self.add_edge_and_sync(cnode, pnode)
                self.reverse_p_score(cnode)
            else:
                cnode = np.argmax(self.remove_deltas)
                pnode = self.remove_candidates[cnode][0]
                #print("removing edge (%d %d) with %f" \
                #       %(cnode,pnode,self.remove_deltas[cnode]))
                self.remove_edge_and_sync(cnode, pnode)
                self.remove_candidates[cnode] = 0
                self.remove_deltas[cnode] = 0
                self.reverse_p_score(cnode)

            best_deltas = np.amax(self.delta_cache, axis=1)
            cur_iter += 1

        return self.net_score, np.sum(self.scores)