Пример #1
0
def test_find():
    """
    FUNCTION: find, in general.py.
    """
    assert_array_equal(general.find(np.array([[True, False]])), np.array([[0]]))
    assert_array_equal(general.find(np.array([[False, True, True, False]])), np.array([[1, 2]]))
    assert_array_equal(general.find(np.array([[False, False]])), np.array([[]]))
def test_find():
    """
    FUNCTION: find, in general.py.
    """
    assert_array_equal(general.find(np.array([[True, False]])),
                       np.array([[0]]))
    assert_array_equal(general.find(np.array([[False, True, True, False]])),
                       np.array([[1, 2]]))
    assert_array_equal(general.find(np.array([[False, False]])),
                       np.array([[]]))
Пример #3
0
def minimum_spanning_tree(C1, C2):
    """
    This function finds the minimum spanning tree using Prim's algorithm.
    We assume that absent edges have 0 cost. To find the maximum spanning
    tree, use -1*C.
    We partition the nodes into those in U and those not in U.
    closest[i] is the vertex in U that is closest to i in V-U.
    lowcost[i] is the cost of the edge [i, closest[i]], or infinity if i has
    been used.
    For details see
        - Aho, Hopcroft & Ullman 1983, "Data structures and algorithms",
        p 237.
    Parameters
    ----------
    C1: Numpy matrix
        C1[i,j] is the primary cost of connecting i to j.
    C2: Numpy matrix
        C2[i,j] is the (optional) secondary cost of connecting i to j, used
        to break ties.
    """
    n = C1.shape[0]
    A = np.zeros((n,n))

    closest = np.zeros((1,n))
    used = np.zeros((1,n))
    used[0,0] = 1
    C1 = C1 + np.nan_to_num((C1 == 0) * np.Inf )
    C2 = C2 + np.nan_to_num((C2 == 0) * np.Inf )
    lowcost1 = C1[0,:]
    lowcost2 = C2[0,:]

    for i in range(1,n):
        ks = find(np.array(lowcost1) == np.min(lowcost1))
        k = ks[0, np.argmin(lowcost2[0, ks])]
        k=int(k)
        cll=int(closest[0,k])
        #A[k, closest[0,k]] = 1
        #A[closest[0,k], k] = 1
        A[k, cll] = 1
        A[cll, k] = 1
        lowcost1[0,k] = np.nan_to_num(np.Inf)
        lowcost2[0,k] = np.nan_to_num(np.Inf)
        used[0,k] = 1
        NU = find(used == 0)

        for ji in range(0, NU.shape[1]):
            j = NU[0, ji]
            if C1[k, j] < lowcost1[0, j]:
                lowcost1[0, j] = float(C1[k, j])
                lowcost2[0, j] =  float(C2[k, j])
                closest[0, j] =  float(k)

    return A
Пример #4
0
def minimum_spanning_tree(C1, C2):
    """
    This function finds the minimum spanning tree using Prim's algorithm.
    We assume that absent edges have 0 cost. To find the maximum spanning
    tree, use -1*C.

    We partition the nodes into those in U and those not in U.
    closest[i] is the vertex in U that is closest to i in V-U.
    lowcost[i] is the cost of the edge [i, closest[i]], or infinity if i has
    been used.

    For details see
        - Aho, Hopcroft & Ullman 1983, "Data structures and algorithms",
        p 237.

    Parameters
    ----------
    C1: Numpy matrix
        C1[i,j] is the primary cost of connecting i to j.

    C2: Numpy matrix
        C2[i,j] is the (optional) secondary cost of connecting i to j, used
        to break ties.
    """
    n = C1.shape[0]
    A = np.zeros((n,n))

    closest = np.zeros((1,n))
    used = np.zeros((1,n))
    used[0,0] = 1
    C1 = C1 + np.nan_to_num((C1 == 0) * np.Inf )
    C2 = C2 + np.nan_to_num((C2 == 0) * np.Inf )
    lowcost1 = C1[0,:]
    lowcost2 = C2[0,:]

    for i in range(1,n):
        ks = find(np.array(lowcost1) == np.min(lowcost1))
        k = ks[0, np.argmin(lowcost2[0, ks])]
        A[k, closest[0,k]] = 1
        A[closest[0,k], k] = 1
        lowcost1[0,k] = np.nan_to_num(np.Inf)
        lowcost2[0,k] = np.nan_to_num(np.Inf)
        used[0,k] = 1
        NU = find(used == 0)

        for ji in range(0, NU.shape[1]):
            j = NU[0, ji]
            if C1[k, j] < lowcost1[0, j]:
                lowcost1[0, j] = float(C1[k, j])
                lowcost2[0, j] =  float(C2[k, j])
                closest[0, j] =  float(k)

    return A
Пример #5
0
def triangulate(G, order):
    """
    This function ensures that the input graph is triangulated (chordal),
    i.e., every cycle of length > 3 has a chord. To find the maximal
    cliques, we save each induced cluster (created by adding connecting
    neighbors) that is not a subset of any previously saved cluster. (A
    cluster is a complete, but not necessarily maximal, set of nodes.)
    Parameters
    ----------
    G: Numpy ndarray
        G[i,j] = 1 iff there is an edge between node i and node j.
    order: List
        The order in which to eliminate the nodes.
    """
    MG = G.copy()
    
    """Obtain the the number of nodes in the graph"""
    n = G.shape[0]
    eliminated = np.zeros((1,n))
    cliques = []
    for i in range(0,n):
        """Obtain the index of the next node to be eliminated"""
        u = order[0,i]
        U = find(eliminated == 0)
        #nodes = np.intersect1d_nu(neighbours(G, u), U)#################################################################################################################################################
        nodes = np.intersect1d(neighbours(G, u), U)
        nodes = np.union1d(nodes, np.array([u]))
        """
        Connect all uneliminated neighbours of the node to be eliminated
        together.
        """
        for i in nodes:
            for j in nodes:
                i=int(i)
                j=int(j)
                G[i, j] = 1
        G = setdiag(G, 0)
        u=int(u)
        """Mark the node as 'eliminated'"""
        eliminated[0, u] = 1

        """
        If the generated clique is a subset of an existing clique, then it is
        not a maximal clique, so it is excluded from the list if cliques.
        """
        exclude = False
        for c in range(0, len(cliques)):
            if issubset(nodes, np.array(cliques[c])):
                exclude = True
                break

        if not exclude:
            cliques.append(nodes)

    return [G, cliques]
Пример #6
0
def triangulate(G, order):
    """
    This function ensures that the input graph is triangulated (chordal),
    i.e., every cycle of length > 3 has a chord. To find the maximal
    cliques, we save each induced cluster (created by adding connecting
    neighbors) that is not a subset of any previously saved cluster. (A
    cluster is a complete, but not necessarily maximal, set of nodes.)

    Parameters
    ----------
    G: Numpy ndarray
        G[i,j] = 1 iff there is an edge between node i and node j.

    order: List
        The order in which to eliminate the nodes.
    """
    MG = G.copy()
    
    """Obtain the the number of nodes in the graph"""
    n = G.shape[0]
    eliminated = np.zeros((1,n))
    cliques = []
    for i in range(0,n):
        """Obtain the index of the next node to be eliminated"""
        u = order[0,i]
        U = find(eliminated == 0)
        nodes = np.intersect1d_nu(neighbours(G, u), U)
        nodes = np.union1d(nodes, np.array([u]))
        """
        Connect all uneliminated neighbours of the node to be eliminated
        together.
        """
        for i in nodes:
            for j in nodes:
                G[i, j] = 1
        G = setdiag(G, 0)

        """Mark the node as 'eliminated'"""
        eliminated[0, u] = 1

        """
        If the generated clique is a subset of an existing clique, then it is
        not a maximal clique, so it is excluded from the list if cliques.
        """
        exclude = False
        for c in range(0, len(cliques)):
            if issubset(nodes, np.array(cliques[c])):
                exclude = True
                break

        if not exclude:
            cliques.append(nodes)

    return [G, cliques]
Пример #7
0
    def marginalize_pot(self, onto, maximize=False):
        """
        This method marginalizes (or maximizes) a discrete potential onto
        a smaller domain.

        Parameters
        ----------
        onto: List
            The list of nodes specifying the domain onto which to
            marginalize.

        maiximize: Bool
            This value is false if the function must marginalize
            the potential over a domain, and true if it must maximize
            the potential over a domain.
        """
        ns = np.zeros((1, np.max(self.domain) + 1))
        ns[0, self.domain] = self.sizes
        """Marginalize the table"""
        smallT = self.T
        """Determine which dimensions to sum/max over"""
        sum_over = np.setdiff1d(np.array(self.domain), np.array(onto))
        ndx = []
        for i in sum_over:
            temp = find(np.array([np.array(self.domain) == i]))
            if temp.shape != (1, ):
                ndx.append(temp[0, 0])
        ndx = np.array(ndx)

        maximizers = dict()
        if maximize:
            """
            Determine which variables to resulting argmax values will
            be dependants on. These values are used for back tracking.
            """
            dependants = np.setdiff1d(
                np.array(self.domain[:]),
                np.array(self.observed_domain[:])).squeeze().tolist()
            if type(dependants) != list:
                dependants = [dependants]

            count = 0
            for i in xrange(0, len(ndx)):
                if ndx[i] < smallT.ndim:
                    """
                    If this node is unobserved, save its backtracking info.
                    """
                    if sum_over[count] not in self.observed_domain[:]:
                        """Save backtracking information"""
                        if sum_over[count] in dependants:
                            dependants.remove(sum_over[count])
                        """Determine which values maximized the array"""
                        argmax = np.argmax(smallT, ndx[i]).squeeze()
                        if argmax.shape == ():
                            argmax = np.array(argmax).tolist()
                        """Save backtracking data"""
                        maximizers[sum_over[count]] = \
                                [dependants[:], argmax]
                    """Maximize out the required dimensions"""
                    smallT = np.max(smallT, ndx[i])
                    """Compensate for reduced dimensions of smallT"""
                    ndx = ndx - 1
                    count = count + 1
        else:
            for i in xrange(0, len(ndx)):
                if ndx[i] < smallT.ndim:
                    """Sum over the dimension ndx[i]"""
                    smallT = np.sum(smallT, ndx[i])
                    """Compensate for reduced dimensions of smallT"""
                    ndx = ndx - 1
        """Create marginalized potential"""
        smallpot = dpot(onto, ns[0, onto], smallT)
        return [smallpot, maximizers]
Пример #8
0
def best_first_elim_order(G, node_sizes, stage=[]):
    """
    This function greedily searches for an optimal elimination order.
    Find an order in which to eliminate nodes from the graph in such a way
    as to try and minimize the weight of the resulting triangulated graph.
    The weight of a graph is the sum of the weights of each of its cliques;
    the weight of a clique is the product of the weights of each of its
    members; the weight of a node is the number of values it can take on.
    Since this is an NP-hard problem, we use the following greedy heuristic:
    At each step, eliminate that node which will result in the addition of
    the least number of fill-in edges, breaking ties by choosing the node
    that induces the lighest clique.
    For details, see
    - Kjaerulff, "Triangulation of graphs -- algorithms giving small total
        state space", Univ. Aalborg tech report, 1990 (www.cs.auc.dk/~uk)
    - C. Huang and A. Darwiche, "Inference in Belief Networks: A procedural
        guide", Intl. J. Approx. Reasoning, 11, 1994
    Parameters
    ----------
    G: Numpy ndarray
        G[i,j] = 1 iff there is an edge between node i and node j.
    node_sizes: List
        The node sizes, where ns[i] = the number of discrete values
        node i can take on [1 if observed].
    stage: List
        stage[i] is a list of the nodes that must be eliminated at i'th
        stage.
    """
    """Obtain the number of nodes in the graph"""
    n = G.shape[0]
    if stage == []:
        stage = [range(0, n)]
    MG = G.copy()
    uneliminated = np.ones((1, n))
    order = np.zeros((1, n))
    t = 0
    """For each node in the graph"""
    for i in range(0, n):
        """Find the indices of the unelminated elements"""
        U = find(uneliminated == 1)
        """Find which nodes can be removed in this stage."""
        #valid = np.intersect1d_nu(np.array(U), np.array([stage[t]]))###################################################################################################################################
        valid = np.intersect1d(np.array(U), np.array([stage[t]]))
        """
        Determine which of the valid nodes will add the least number of fill in
        edges once eliminated. If 2 or more nodes add the same least number of
        fill in edges, then choose the one that results in the lightest clique.
        """
        min_fill = np.zeros((1, len(valid)))
        min_weight = np.zeros((1, len(valid)))
        """For each node that is valid for elimination"""
        for j in range(0, len(valid)):
            k = valid[j]
            """Obtain the uneliminated neighbours of the node to be eliminated"""
            nbrs = neighbours(G, k)
            #nbrs = np.intersect1d_nu(np.array([nbrs]), np.array(U))####################################################################################################################################
            nbrs = np.intersect1d(np.array([nbrs]), np.array(U))
            l = len(nbrs)
            M = np.zeros((l, l))
            count = 0
            for x in nbrs:
                for y in range(0, len(nbrs)):
                    M[count, y] = MG[x, nbrs[y]]
                count = count + 1
            """Save the number of fill-in edges required to eliminate node j"""
            min_fill[0, j] = l**2 - np.sum(M)
            nbrs = nbrs.tolist()
            nbrs.insert(0, k)
            """Save the clique weight obtained by eliminating node j"""
            min_weight[0, j] = np.prod(node_sizes[0, nbrs])
        """Determine which of the nodes create the lightest clique."""
        lightest_nbrs = find(min_weight == np.min(min_weight))
        """
        Determine which of nodes found in the step above, require the least
        number of fill-in edges to eliminate.
        """
        best_nbr_ndx = np.argmin(min_fill[0, lightest_nbrs.tolist()])
        j = lightest_nbrs[0, best_nbr_ndx]
        """
        Determine which of the nodes found in the step above are valid for
        elimination, these are the nodes to be eliminated.
        """
        k = valid[j]
        uneliminated[0, k] = 0
        """Add the nodes to be eliminated to the elimination order"""
        order[0, i] = k
        """Determine the nieghbours of the nodes to be eliminated"""
        ns = neighbours(G, k)
        #ns = np.intersect1d_nu(np.array([ns]), np.array(U))############################################################################################################################################
        ns = np.intersect1d(np.array([ns]), np.array(U))
        """Eliminate the nodes"""
        if len(ns) != 0:
            for x in ns:
                for y in ns:
                    G[x, y] = 1
            G = setdiag(G, 0)
        """
        If all the nodes valid for elimination in this stage have been
        eliminated, then advance to the next stage.
        """
        if np.sum(np.abs(uneliminated[0, stage[t]])) == 0:
            t = t + 1
    return order
Пример #9
0
    def marginalize_pot(self, onto, maximize=False, evidence=[]):
        """
        This method marginalizes (or maximizes) a discrete potential onto
        a smaller domain.

        Parameters
        ----------
        onto: List
            The list of nodes specifying the domain onto which to
            marginalize.

        maiximize: Bool
            This value is false if the function must marginalize
            the potential over a domain, and true if it must maximize
            the potential over a domain.
        """        
        ns = np.zeros(np.max(self.domain)+1)
        ns[self.domain] = self.sizes

        """Marginalize the table"""
        smallT = self.T

        """Determine which dimensions to sum/max over"""
        sum_over = np.setdiff1d(np.array(self.domain), np.array(onto))
        ndx = []
        for i in sum_over:
            temp = find(np.array([np.array(self.domain) == i]))
            if temp.shape != (1,):
                ndx.append(temp[0,0])
        ndx = np.array(ndx)

        maximizers = dict()
        if maximize:
            """
            Determine which variables to resulting argmax values will
            be dependants on. These values are used for back tracking.
            """
            dependants = np.setdiff1d(np.array(self.domain[:]),
                np.array(self.observed_domain[:])).squeeze().tolist()
            if type(dependants) != list:
                dependants = [dependants]

            count = 0
            for i in xrange(0, len(ndx)):
                if ndx[i]<smallT.ndim:
                    """
                    If this node is unobserved, save its backtracking info.
                    """
                    if sum_over[count] not in self.observed_domain[:]:
                        """Save backtracking information"""
                        if sum_over[count] in dependants:
                            dependants.remove(sum_over[count])

                        """Determine which values maximized the array"""
                        argmax = np.argmax(smallT, ndx[i]).squeeze()
                        if argmax.shape == ():
                            argmax = np.array(argmax).tolist()

                        """Save backtracking data"""
                        maximizers[sum_over[count]] = \
                                [dependants[:], argmax]

                    """Maximize out the required dimensions"""
                    smallT = np.max(smallT, ndx[i])

                    """Compensate for reduced dimensions of smallT"""
                    ndx = ndx - 1
                    count = count + 1
        else:
            for i in xrange(0, len(ndx)):
                if ndx[i]<smallT.ndim:
                    """Sum over the dimension ndx[i]"""
                    smallT = np.sum(smallT, ndx[i])
                    """Compensate for reduced dimensions of smallT"""
                    ndx = ndx - 1

        """Create marginalized potential"""
        smallpot = DiscretePotential(onto, ns[onto], smallT)
        return [smallpot, maximizers]
Пример #10
0
def best_first_elim_order(G, node_sizes, stage=[]):
    """
    This function greedily searches for an optimal elimination order.

    Find an order in which to eliminate nodes from the graph in such a way
    as to try and minimize the weight of the resulting triangulated graph.
    The weight of a graph is the sum of the weights of each of its cliques;
    the weight of a clique is the product of the weights of each of its
    members; the weight of a node is the number of values it can take on.

    Since this is an NP-hard problem, we use the following greedy heuristic:
    At each step, eliminate that node which will result in the addition of
    the least number of fill-in edges, breaking ties by choosing the node
    that induces the lighest clique.

    For details, see
    - Kjaerulff, "Triangulation of graphs -- algorithms giving small total
        state space", Univ. Aalborg tech report, 1990 (www.cs.auc.dk/~uk)
    - C. Huang and A. Darwiche, "Inference in Belief Networks: A procedural
        guide", Intl. J. Approx. Reasoning, 11, 1994

    Parameters
    ----------
    G: Numpy ndarray
        G[i,j] = 1 iff there is an edge between node i and node j.

    node_sizes: List
        The node sizes, where ns[i] = the number of discrete values
        node i can take on [1 if observed].

    stage: List
        stage[i] is a list of the nodes that must be eliminated at i'th
        stage.
    """
    """Obtain the number of nodes in the graph"""
    n = G.shape[0]
    if stage == []:
        stage = [range(0, n)]
    MG = G.copy()
    uneliminated = np.ones((1, n))
    order = np.zeros((1, n))
    t = 0

    """For each node in the graph"""
    for i in range(0, n):
        """Find the indices of the unelminated elements"""
        U = find(uneliminated == 1)

        """Find which nodes can be removed in this stage."""
        valid = np.intersect1d_nu(np.array(U), np.array([stage[t]]))

        """
        Determine which of the valid nodes will add the least number of fill in
        edges once eliminated. If 2 or more nodes add the same least number of
        fill in edges, then choose the one that results in the lightest clique.
        """
        min_fill = np.zeros((1, len(valid)))
        min_weight = np.zeros((1, len(valid)))
        """For each node that is valid for elimination"""
        for j in range(0, len(valid)):
            k = valid[j]
            
            """Obtain the uneliminated neighbours of the node to be eliminated"""
            nbrs = neighbours(G, k)
            nbrs = np.intersect1d_nu(np.array([nbrs]), np.array(U))
            l = len(nbrs)
            M = np.zeros((l, l))
            count = 0
            for x in nbrs:
                for y in range(0, len(nbrs)):
                    M[count, y] = MG[x, nbrs[y]]
                count = count + 1

            """Save the number of fill-in edges required to eliminate node j"""
            min_fill[0, j] = l**2 - np.sum(M)
            nbrs = nbrs.tolist()
            nbrs.insert(0, k)
            """Save the clique weight obtained by eliminating node j"""
            min_weight[0, j] = np.prod(node_sizes[nbrs])

        """Determine which of the nodes create the lightest clique."""
        lightest_nbrs = find(min_weight == np.min(min_weight))
        
        """
        Determine which of nodes found in the step above, require the least
        number of fill-in edges to eliminate.
        """
        best_nbr_ndx = np.argmin(min_fill[0, lightest_nbrs.tolist()])
        j = lightest_nbrs[0, best_nbr_ndx]
        
        """
        Determine which of the nodes found in the step above are valid for
        elimination, these are the nodes to be eliminated.
        """
        k = valid[j]
        uneliminated[0, k] = 0
        
        """Add the nodes to be eliminated to the elimination order"""
        order[0, i] = k
        
        """Determine the nieghbours of the nodes to be eliminated"""
        ns = neighbours(G, k)
        ns = np.intersect1d_nu(np.array([ns]), np.array(U))
        
        """Eliminate the nodes"""
        if len(ns) != 0:
            for x in ns:
                for y in ns:
                    G[x, y] = 1
            G = setdiag(G, 0)

        """
        If all the nodes valid for elimination in this stage have been
        eliminated, then advance to the next stage.
        """
        if np.sum(np.abs(uneliminated[0, stage[t]])) == 0:
            t = t + 1
    return order