def test_find(): """ FUNCTION: find, in general.py. """ assert_array_equal(general.find(np.array([[True, False]])), np.array([[0]])) assert_array_equal(general.find(np.array([[False, True, True, False]])), np.array([[1, 2]])) assert_array_equal(general.find(np.array([[False, False]])), np.array([[]]))
def minimum_spanning_tree(C1, C2): """ This function finds the minimum spanning tree using Prim's algorithm. We assume that absent edges have 0 cost. To find the maximum spanning tree, use -1*C. We partition the nodes into those in U and those not in U. closest[i] is the vertex in U that is closest to i in V-U. lowcost[i] is the cost of the edge [i, closest[i]], or infinity if i has been used. For details see - Aho, Hopcroft & Ullman 1983, "Data structures and algorithms", p 237. Parameters ---------- C1: Numpy matrix C1[i,j] is the primary cost of connecting i to j. C2: Numpy matrix C2[i,j] is the (optional) secondary cost of connecting i to j, used to break ties. """ n = C1.shape[0] A = np.zeros((n,n)) closest = np.zeros((1,n)) used = np.zeros((1,n)) used[0,0] = 1 C1 = C1 + np.nan_to_num((C1 == 0) * np.Inf ) C2 = C2 + np.nan_to_num((C2 == 0) * np.Inf ) lowcost1 = C1[0,:] lowcost2 = C2[0,:] for i in range(1,n): ks = find(np.array(lowcost1) == np.min(lowcost1)) k = ks[0, np.argmin(lowcost2[0, ks])] k=int(k) cll=int(closest[0,k]) #A[k, closest[0,k]] = 1 #A[closest[0,k], k] = 1 A[k, cll] = 1 A[cll, k] = 1 lowcost1[0,k] = np.nan_to_num(np.Inf) lowcost2[0,k] = np.nan_to_num(np.Inf) used[0,k] = 1 NU = find(used == 0) for ji in range(0, NU.shape[1]): j = NU[0, ji] if C1[k, j] < lowcost1[0, j]: lowcost1[0, j] = float(C1[k, j]) lowcost2[0, j] = float(C2[k, j]) closest[0, j] = float(k) return A
def minimum_spanning_tree(C1, C2): """ This function finds the minimum spanning tree using Prim's algorithm. We assume that absent edges have 0 cost. To find the maximum spanning tree, use -1*C. We partition the nodes into those in U and those not in U. closest[i] is the vertex in U that is closest to i in V-U. lowcost[i] is the cost of the edge [i, closest[i]], or infinity if i has been used. For details see - Aho, Hopcroft & Ullman 1983, "Data structures and algorithms", p 237. Parameters ---------- C1: Numpy matrix C1[i,j] is the primary cost of connecting i to j. C2: Numpy matrix C2[i,j] is the (optional) secondary cost of connecting i to j, used to break ties. """ n = C1.shape[0] A = np.zeros((n,n)) closest = np.zeros((1,n)) used = np.zeros((1,n)) used[0,0] = 1 C1 = C1 + np.nan_to_num((C1 == 0) * np.Inf ) C2 = C2 + np.nan_to_num((C2 == 0) * np.Inf ) lowcost1 = C1[0,:] lowcost2 = C2[0,:] for i in range(1,n): ks = find(np.array(lowcost1) == np.min(lowcost1)) k = ks[0, np.argmin(lowcost2[0, ks])] A[k, closest[0,k]] = 1 A[closest[0,k], k] = 1 lowcost1[0,k] = np.nan_to_num(np.Inf) lowcost2[0,k] = np.nan_to_num(np.Inf) used[0,k] = 1 NU = find(used == 0) for ji in range(0, NU.shape[1]): j = NU[0, ji] if C1[k, j] < lowcost1[0, j]: lowcost1[0, j] = float(C1[k, j]) lowcost2[0, j] = float(C2[k, j]) closest[0, j] = float(k) return A
def triangulate(G, order): """ This function ensures that the input graph is triangulated (chordal), i.e., every cycle of length > 3 has a chord. To find the maximal cliques, we save each induced cluster (created by adding connecting neighbors) that is not a subset of any previously saved cluster. (A cluster is a complete, but not necessarily maximal, set of nodes.) Parameters ---------- G: Numpy ndarray G[i,j] = 1 iff there is an edge between node i and node j. order: List The order in which to eliminate the nodes. """ MG = G.copy() """Obtain the the number of nodes in the graph""" n = G.shape[0] eliminated = np.zeros((1,n)) cliques = [] for i in range(0,n): """Obtain the index of the next node to be eliminated""" u = order[0,i] U = find(eliminated == 0) #nodes = np.intersect1d_nu(neighbours(G, u), U)################################################################################################################################################# nodes = np.intersect1d(neighbours(G, u), U) nodes = np.union1d(nodes, np.array([u])) """ Connect all uneliminated neighbours of the node to be eliminated together. """ for i in nodes: for j in nodes: i=int(i) j=int(j) G[i, j] = 1 G = setdiag(G, 0) u=int(u) """Mark the node as 'eliminated'""" eliminated[0, u] = 1 """ If the generated clique is a subset of an existing clique, then it is not a maximal clique, so it is excluded from the list if cliques. """ exclude = False for c in range(0, len(cliques)): if issubset(nodes, np.array(cliques[c])): exclude = True break if not exclude: cliques.append(nodes) return [G, cliques]
def triangulate(G, order): """ This function ensures that the input graph is triangulated (chordal), i.e., every cycle of length > 3 has a chord. To find the maximal cliques, we save each induced cluster (created by adding connecting neighbors) that is not a subset of any previously saved cluster. (A cluster is a complete, but not necessarily maximal, set of nodes.) Parameters ---------- G: Numpy ndarray G[i,j] = 1 iff there is an edge between node i and node j. order: List The order in which to eliminate the nodes. """ MG = G.copy() """Obtain the the number of nodes in the graph""" n = G.shape[0] eliminated = np.zeros((1,n)) cliques = [] for i in range(0,n): """Obtain the index of the next node to be eliminated""" u = order[0,i] U = find(eliminated == 0) nodes = np.intersect1d_nu(neighbours(G, u), U) nodes = np.union1d(nodes, np.array([u])) """ Connect all uneliminated neighbours of the node to be eliminated together. """ for i in nodes: for j in nodes: G[i, j] = 1 G = setdiag(G, 0) """Mark the node as 'eliminated'""" eliminated[0, u] = 1 """ If the generated clique is a subset of an existing clique, then it is not a maximal clique, so it is excluded from the list if cliques. """ exclude = False for c in range(0, len(cliques)): if issubset(nodes, np.array(cliques[c])): exclude = True break if not exclude: cliques.append(nodes) return [G, cliques]
def marginalize_pot(self, onto, maximize=False): """ This method marginalizes (or maximizes) a discrete potential onto a smaller domain. Parameters ---------- onto: List The list of nodes specifying the domain onto which to marginalize. maiximize: Bool This value is false if the function must marginalize the potential over a domain, and true if it must maximize the potential over a domain. """ ns = np.zeros((1, np.max(self.domain) + 1)) ns[0, self.domain] = self.sizes """Marginalize the table""" smallT = self.T """Determine which dimensions to sum/max over""" sum_over = np.setdiff1d(np.array(self.domain), np.array(onto)) ndx = [] for i in sum_over: temp = find(np.array([np.array(self.domain) == i])) if temp.shape != (1, ): ndx.append(temp[0, 0]) ndx = np.array(ndx) maximizers = dict() if maximize: """ Determine which variables to resulting argmax values will be dependants on. These values are used for back tracking. """ dependants = np.setdiff1d( np.array(self.domain[:]), np.array(self.observed_domain[:])).squeeze().tolist() if type(dependants) != list: dependants = [dependants] count = 0 for i in xrange(0, len(ndx)): if ndx[i] < smallT.ndim: """ If this node is unobserved, save its backtracking info. """ if sum_over[count] not in self.observed_domain[:]: """Save backtracking information""" if sum_over[count] in dependants: dependants.remove(sum_over[count]) """Determine which values maximized the array""" argmax = np.argmax(smallT, ndx[i]).squeeze() if argmax.shape == (): argmax = np.array(argmax).tolist() """Save backtracking data""" maximizers[sum_over[count]] = \ [dependants[:], argmax] """Maximize out the required dimensions""" smallT = np.max(smallT, ndx[i]) """Compensate for reduced dimensions of smallT""" ndx = ndx - 1 count = count + 1 else: for i in xrange(0, len(ndx)): if ndx[i] < smallT.ndim: """Sum over the dimension ndx[i]""" smallT = np.sum(smallT, ndx[i]) """Compensate for reduced dimensions of smallT""" ndx = ndx - 1 """Create marginalized potential""" smallpot = dpot(onto, ns[0, onto], smallT) return [smallpot, maximizers]
def best_first_elim_order(G, node_sizes, stage=[]): """ This function greedily searches for an optimal elimination order. Find an order in which to eliminate nodes from the graph in such a way as to try and minimize the weight of the resulting triangulated graph. The weight of a graph is the sum of the weights of each of its cliques; the weight of a clique is the product of the weights of each of its members; the weight of a node is the number of values it can take on. Since this is an NP-hard problem, we use the following greedy heuristic: At each step, eliminate that node which will result in the addition of the least number of fill-in edges, breaking ties by choosing the node that induces the lighest clique. For details, see - Kjaerulff, "Triangulation of graphs -- algorithms giving small total state space", Univ. Aalborg tech report, 1990 (www.cs.auc.dk/~uk) - C. Huang and A. Darwiche, "Inference in Belief Networks: A procedural guide", Intl. J. Approx. Reasoning, 11, 1994 Parameters ---------- G: Numpy ndarray G[i,j] = 1 iff there is an edge between node i and node j. node_sizes: List The node sizes, where ns[i] = the number of discrete values node i can take on [1 if observed]. stage: List stage[i] is a list of the nodes that must be eliminated at i'th stage. """ """Obtain the number of nodes in the graph""" n = G.shape[0] if stage == []: stage = [range(0, n)] MG = G.copy() uneliminated = np.ones((1, n)) order = np.zeros((1, n)) t = 0 """For each node in the graph""" for i in range(0, n): """Find the indices of the unelminated elements""" U = find(uneliminated == 1) """Find which nodes can be removed in this stage.""" #valid = np.intersect1d_nu(np.array(U), np.array([stage[t]]))################################################################################################################################### valid = np.intersect1d(np.array(U), np.array([stage[t]])) """ Determine which of the valid nodes will add the least number of fill in edges once eliminated. If 2 or more nodes add the same least number of fill in edges, then choose the one that results in the lightest clique. """ min_fill = np.zeros((1, len(valid))) min_weight = np.zeros((1, len(valid))) """For each node that is valid for elimination""" for j in range(0, len(valid)): k = valid[j] """Obtain the uneliminated neighbours of the node to be eliminated""" nbrs = neighbours(G, k) #nbrs = np.intersect1d_nu(np.array([nbrs]), np.array(U))#################################################################################################################################### nbrs = np.intersect1d(np.array([nbrs]), np.array(U)) l = len(nbrs) M = np.zeros((l, l)) count = 0 for x in nbrs: for y in range(0, len(nbrs)): M[count, y] = MG[x, nbrs[y]] count = count + 1 """Save the number of fill-in edges required to eliminate node j""" min_fill[0, j] = l**2 - np.sum(M) nbrs = nbrs.tolist() nbrs.insert(0, k) """Save the clique weight obtained by eliminating node j""" min_weight[0, j] = np.prod(node_sizes[0, nbrs]) """Determine which of the nodes create the lightest clique.""" lightest_nbrs = find(min_weight == np.min(min_weight)) """ Determine which of nodes found in the step above, require the least number of fill-in edges to eliminate. """ best_nbr_ndx = np.argmin(min_fill[0, lightest_nbrs.tolist()]) j = lightest_nbrs[0, best_nbr_ndx] """ Determine which of the nodes found in the step above are valid for elimination, these are the nodes to be eliminated. """ k = valid[j] uneliminated[0, k] = 0 """Add the nodes to be eliminated to the elimination order""" order[0, i] = k """Determine the nieghbours of the nodes to be eliminated""" ns = neighbours(G, k) #ns = np.intersect1d_nu(np.array([ns]), np.array(U))############################################################################################################################################ ns = np.intersect1d(np.array([ns]), np.array(U)) """Eliminate the nodes""" if len(ns) != 0: for x in ns: for y in ns: G[x, y] = 1 G = setdiag(G, 0) """ If all the nodes valid for elimination in this stage have been eliminated, then advance to the next stage. """ if np.sum(np.abs(uneliminated[0, stage[t]])) == 0: t = t + 1 return order
def marginalize_pot(self, onto, maximize=False, evidence=[]): """ This method marginalizes (or maximizes) a discrete potential onto a smaller domain. Parameters ---------- onto: List The list of nodes specifying the domain onto which to marginalize. maiximize: Bool This value is false if the function must marginalize the potential over a domain, and true if it must maximize the potential over a domain. """ ns = np.zeros(np.max(self.domain)+1) ns[self.domain] = self.sizes """Marginalize the table""" smallT = self.T """Determine which dimensions to sum/max over""" sum_over = np.setdiff1d(np.array(self.domain), np.array(onto)) ndx = [] for i in sum_over: temp = find(np.array([np.array(self.domain) == i])) if temp.shape != (1,): ndx.append(temp[0,0]) ndx = np.array(ndx) maximizers = dict() if maximize: """ Determine which variables to resulting argmax values will be dependants on. These values are used for back tracking. """ dependants = np.setdiff1d(np.array(self.domain[:]), np.array(self.observed_domain[:])).squeeze().tolist() if type(dependants) != list: dependants = [dependants] count = 0 for i in xrange(0, len(ndx)): if ndx[i]<smallT.ndim: """ If this node is unobserved, save its backtracking info. """ if sum_over[count] not in self.observed_domain[:]: """Save backtracking information""" if sum_over[count] in dependants: dependants.remove(sum_over[count]) """Determine which values maximized the array""" argmax = np.argmax(smallT, ndx[i]).squeeze() if argmax.shape == (): argmax = np.array(argmax).tolist() """Save backtracking data""" maximizers[sum_over[count]] = \ [dependants[:], argmax] """Maximize out the required dimensions""" smallT = np.max(smallT, ndx[i]) """Compensate for reduced dimensions of smallT""" ndx = ndx - 1 count = count + 1 else: for i in xrange(0, len(ndx)): if ndx[i]<smallT.ndim: """Sum over the dimension ndx[i]""" smallT = np.sum(smallT, ndx[i]) """Compensate for reduced dimensions of smallT""" ndx = ndx - 1 """Create marginalized potential""" smallpot = DiscretePotential(onto, ns[onto], smallT) return [smallpot, maximizers]
def best_first_elim_order(G, node_sizes, stage=[]): """ This function greedily searches for an optimal elimination order. Find an order in which to eliminate nodes from the graph in such a way as to try and minimize the weight of the resulting triangulated graph. The weight of a graph is the sum of the weights of each of its cliques; the weight of a clique is the product of the weights of each of its members; the weight of a node is the number of values it can take on. Since this is an NP-hard problem, we use the following greedy heuristic: At each step, eliminate that node which will result in the addition of the least number of fill-in edges, breaking ties by choosing the node that induces the lighest clique. For details, see - Kjaerulff, "Triangulation of graphs -- algorithms giving small total state space", Univ. Aalborg tech report, 1990 (www.cs.auc.dk/~uk) - C. Huang and A. Darwiche, "Inference in Belief Networks: A procedural guide", Intl. J. Approx. Reasoning, 11, 1994 Parameters ---------- G: Numpy ndarray G[i,j] = 1 iff there is an edge between node i and node j. node_sizes: List The node sizes, where ns[i] = the number of discrete values node i can take on [1 if observed]. stage: List stage[i] is a list of the nodes that must be eliminated at i'th stage. """ """Obtain the number of nodes in the graph""" n = G.shape[0] if stage == []: stage = [range(0, n)] MG = G.copy() uneliminated = np.ones((1, n)) order = np.zeros((1, n)) t = 0 """For each node in the graph""" for i in range(0, n): """Find the indices of the unelminated elements""" U = find(uneliminated == 1) """Find which nodes can be removed in this stage.""" valid = np.intersect1d_nu(np.array(U), np.array([stage[t]])) """ Determine which of the valid nodes will add the least number of fill in edges once eliminated. If 2 or more nodes add the same least number of fill in edges, then choose the one that results in the lightest clique. """ min_fill = np.zeros((1, len(valid))) min_weight = np.zeros((1, len(valid))) """For each node that is valid for elimination""" for j in range(0, len(valid)): k = valid[j] """Obtain the uneliminated neighbours of the node to be eliminated""" nbrs = neighbours(G, k) nbrs = np.intersect1d_nu(np.array([nbrs]), np.array(U)) l = len(nbrs) M = np.zeros((l, l)) count = 0 for x in nbrs: for y in range(0, len(nbrs)): M[count, y] = MG[x, nbrs[y]] count = count + 1 """Save the number of fill-in edges required to eliminate node j""" min_fill[0, j] = l**2 - np.sum(M) nbrs = nbrs.tolist() nbrs.insert(0, k) """Save the clique weight obtained by eliminating node j""" min_weight[0, j] = np.prod(node_sizes[nbrs]) """Determine which of the nodes create the lightest clique.""" lightest_nbrs = find(min_weight == np.min(min_weight)) """ Determine which of nodes found in the step above, require the least number of fill-in edges to eliminate. """ best_nbr_ndx = np.argmin(min_fill[0, lightest_nbrs.tolist()]) j = lightest_nbrs[0, best_nbr_ndx] """ Determine which of the nodes found in the step above are valid for elimination, these are the nodes to be eliminated. """ k = valid[j] uneliminated[0, k] = 0 """Add the nodes to be eliminated to the elimination order""" order[0, i] = k """Determine the nieghbours of the nodes to be eliminated""" ns = neighbours(G, k) ns = np.intersect1d_nu(np.array([ns]), np.array(U)) """Eliminate the nodes""" if len(ns) != 0: for x in ns: for y in ns: G[x, y] = 1 G = setdiag(G, 0) """ If all the nodes valid for elimination in this stage have been eliminated, then advance to the next stage. """ if np.sum(np.abs(uneliminated[0, stage[t]])) == 0: t = t + 1 return order