def createRandomUndiGraphTree(nodeNumber): """ Build a random undigraph and return it, the returned graph is a tree Examples -------- >>> import markovNetworkLearning as mnl >>> g=mnl.createRandomUndiGraphTree(15) Parameters ---------- nodeNumber : int the number of nodes in the graph Returns ------- pyAgrum.UndiGraph the resulting undigraph """ tree = gum.UndiGraph() if nodeNumber > 0: tree.addNode() for i in range(1, nodeNumber): otherNode = random.choice(list(tree.nodes())) tree.addNode() tree.addEdge(otherNode, i) return tree
def create_complete_undigraph(size): graph = gum.UndiGraph() for i in range(size): graph.addNodeWithId(i) for j in range(i): graph.addEdge(i, j) return graph
def testNonRegressionAddEdge(self): ug = gum.UndiGraph() ug.addNodes(4) with self.assertRaises(gum.InvalidNode): ug.addEdge(1, 6) with self.assertRaises(gum.InvalidNode): ug.addEdge(7, 0) with self.assertRaises(gum.InvalidNode): ug.addEdge(6, 7)
def _isDSep_tech2_parents(bn: "pyAgrum.BayesNet", sx: NodeSet, sy: NodeSet, zset: NodeSet) -> bool: """Test of d-separation of ``sx`` and ``sy`` given ``Z``, considering only the paths with an arc coming into ``x`` using the graph-moralization method Parameters ---------- bn: pyAgrum.BayesNet the bayesian network sx: Set[int] source nodes sy: Set[int] destinantion nodes zset: Set[int] blocking set Returns ------- bool """ G = pyAgrum.UndiGraph() ancesters = sx | sy anc = frozenset(ancesters) for i in anc: ancester(i, bn, ancesters) for i in zset: ancesters.add(i) ancester(i, bn, ancesters) for i in ancesters: G.addNodeWithId(i) for b in G.nodes(): for a in (set(bn.parents(b)) - sx): G.addEdge(a, b) for nod in G.nodes(): parents_nod = set(bn.parents(nod)) - sx for par in parents_nod: for par2 in parents_nod: if par2 != par: G.addEdge(par, par2) _removeZ(G, zset) if _is_path_x_y(G, sx, sy): return False return True
def _isDSep_tech2_children(bn: "pyAgrum.BayesNet", sx: NodeSet, sy: NodeSet, zset: NodeSet) -> bool: """Test of d-separation of ``x`` and ``y`` given ``zset``, considering only the paths with an arc coming from ``x`` using the graph-moralization method Parameters ---------- bn: pyAgrum.BayesNet the bayesian network sx: Set[int] source nodes sy: Set[int] destinantion nodes zset: Set[int] blocking set Returns ------- bool """ G = pyAgrum.UndiGraph() ancesters = sx | sy for i in sy: ancester(i, bn, ancesters) # sx's ancesters will not be added since sx already is in ancesters for i in zset: ancesters.add(i) ancester(i, bn, ancesters) for i in ancesters: G.addNodeWithId(i) se = set(G.nodes()) - sx for b in se: for a in bn.parents(b): G.addEdge(a, b) for nod in se: parents_nod = bn.parents(nod) for par in parents_nod: for par2 in parents_nod: if par2 != par: G.addEdge(par, par2) _removeZ(G, zset) if _is_path_x_y(G, sx, sy): return False return True
def testSimpleGraph(self): g = gum.UndiGraph() g.addNode() g.addNode() g.addEdge(0, 1) jtg = gum.JunctionTreeGenerator() jt = jtg.junctionTree(g) self.assertEqual(jt.size(), 1) self.assertEqual(jt.clique(0), {0, 1}) jtg = gum.JunctionTreeGenerator() bjt = jtg.binaryJoinTree(g) self.assertEqual(bjt.size(), 1) self.assertEqual(bjt.clique(0), {0, 1})
def _reduce_moralize(bn: "pyAgrum.BayesNet", x: NodeSet, y: NodeSet, zset: NodeSet) -> "pyAgrum.UndiGraph": """ Returns the undirected graph obtained by reducing (ancestor graph) and moralizing the Bayesian network ``bn`` Parameters ---------- bn: pyAgrum.BayesNet the BayesNet x: Set[int|str] NodeSet generating the ancestor graph y: Set[int|str] Second NodeSet generating the ancestor graph zset: Set[int|str] Third NodeSet generating the ancestor graph Returns ------- pyAgrum.UndiGraph The reduced moralized graph """ G = pyAgrum.UndiGraph() Ancetre = x | y anc = frozenset(Ancetre) for i in anc: ancester(i, bn, Ancetre) for i in zset: Ancetre.add(i) ancester(i, bn, Ancetre) for i in Ancetre: G.addNodeWithId(i) for b in G.nodes(): for a in bn.parents(b): G.addEdge(a, b) for nod in G.nodes(): parents_nod = bn.parents(nod) for par in parents_nod: for par2 in parents_nod: if par2 != par: G.addEdge(par, par2) return G
def testConstructorFromUG(self): ug = gum.UndiGraph() ug.addNodes(4) ug.addEdge(0, 2) ug.addEdge(1, 2) ug.addEdge(2, 3) mixed_graph = gum.MixedGraph() mixed_graph.addNodes(4) mixed_graph.addEdge(0, 2) mixed_graph.addEdge(1, 2) mixed_graph.addEdge(2, 3) mg = gum.MixedGraph(ug) self.assertEqual(mixed_graph, mg)
def _cDecomposition(cm: CausalModel) -> List[Set[int]]: undi = pyAgrum.UndiGraph() s = set(cm.nodes()) - cm.latentVariablesIds() for n in s: undi.addNodeWithId(n) for latent in cm.latentVariablesIds(): for a, b in it.combinations(cm.children(latent), 2): undi.addEdge(a, b) def undiCComponent(g, n, se): for i in g.neighbours(n): if i not in se: se.add(i) undiCComponent(g, i, se) components = [] while len(s) != 0: c = s.pop() sc = set([c]) undiCComponent(undi, c, sc) s -= sc components.append(sc) return components
def read_graph(file_name): print("Loading file {}".format(file_name)) dot_graph = dot.graph_from_dot_file(file_name) isUndiGraph = False # Cleaning nodes for node in dot_graph.get_nodes(): name = node.get_name() if name in ['node', 'edge']: if name == 'edge': if node.get_attributes() and node.get_attributes( )['dir'] == 'none': isUndiGraph = True dot_graph.del_node(node) # Getting node names node_name_map = {} for i, node in enumerate(dot_graph.get_nodes()): node_name_map[node.get_name()] = i nodeId = max(node_name_map.values()) + 1 for edge in dot_graph.get_edges(): source = edge.get_source() destination = edge.get_destination() if source not in node_name_map.keys(): node_name_map[source] = nodeId nodeId += 1 if destination not in node_name_map.keys(): node_name_map[destination] = nodeId nodeId += 1 edges = [] arcs = [] for edge in dot_graph.get_edges(): if (isUndiGraph or (edge.get_attributes() and edge.get_attributes()['dir'] == 'none')): edges.append( gum.Edge(node_name_map[edge.get_source()], node_name_map[edge.get_destination()])) else: arcs.append( gum.Arc(node_name_map[edge.get_source()], node_name_map[edge.get_destination()])) if not edges: # DAG graph = gum.DAG() for node_name in node_name_map: graph.addNodeWithId(node_name_map[node_name]) for arc in arcs: graph.addArc(arc.tail(), arc.head()) elif not arcs: # UndiGraph graph = gum.UndiGraph() for node_name in node_name_map: graph.addNodeWithId(node_name_map[node_name]) for edge in edges: graph.addEdge(edge.first(), edge.second()) else: # MixedGraph graph = gum.MixedGraph() for node_name in node_name_map: graph.addNodeWithId(node_name_map[node_name]) for edge in edges: graph.addEdge(edge.first(), edge.second()) for arc in arcs: graph.addArc(arc.tail(), arc.head()) # Since python3.7, dict are insertion ordered so # just returning values should be fine but we never know ! return graph, list(node_name_map.keys())
def testAddNodes(self): self._testAddNodes(gum.DiGraph()) self._testAddNodes(gum.UndiGraph()) self._testAddNodes(gum.MixedGraph()) self._testAddNodes(gum.DAG())
def _fitTAN(X, y, bn, learner, variableList, target): """ parameters: X: {array-like, sparse matrix} of shape (n_samples, n_features) training data y: array-like of shape (n_samples) Target values returns: void Uses Tree-Augmented NaiveBayes to learn the network structure and its parameters. """ # a list of all the variables in our Bayesian network sorted by their index # the number of columns in our data d = X.shape[1] # If there is only one input column, TAN works exactly the same as NaiveBayes if (d < 2): _fitNaiveBayes(X, y, bn, learner, variableList, target, None) return probabilityY = learner.pseudoCount([target]).normalize().tolist() mutualInformation = dict() undirectedGraph = gum.UndiGraph() # we calculate the mutual information of all pairs of variables for i in range(d): undirectedGraph.addNodeWithId(i) for j in range(i): probabilityList = learner.pseudoCount([variableList[i], variableList[j], target]).normalize().tolist() probabilityXi = learner.pseudoCount([variableList[i], target]).normalize().tolist() probabilityXj = learner.pseudoCount([variableList[j], target]).normalize().tolist() temp = 0 for yIndex in range(len(probabilityList)): for xjIndex in range(len(probabilityList[yIndex])): for xiIndex in range(len(probabilityList[yIndex][xjIndex])): if probabilityList[yIndex][xjIndex][xiIndex] > 0: temp = temp + probabilityList[yIndex][xjIndex][xiIndex] * math.log( probabilityList[yIndex][xjIndex][xiIndex] * probabilityY[yIndex] / ( probabilityXi[yIndex][xiIndex] * probabilityXj[yIndex][xjIndex])) mutualInformation[(i, j)] = temp # if the mutual information between two variables is bigger than this threshold, we add an edge between them threshold = 0 for var in mutualInformation: threshold = threshold + mutualInformation[var] threshold = float(threshold) / (d * (d - 1)) mutualInformation = {k: v for k, v in sorted(mutualInformation.items(), key=(lambda item: item[1]), reverse=True)} for var in mutualInformation: (i, j) = var # since it's sorted in descending order we know that if this value is under the threshold all the other following values will also be under the threshold if mutualInformation[var] < threshold: break # if the mutual information between xi and xj we add an edge between the two nodes undirectedGraph.addEdge(i, j) # if the edge causes a cycle, we delete the edge and pass on to the following pair of variables if (undirectedGraph.hasUndirectedCycle()): undirectedGraph.eraseEdge(i, j) # dict(int:set(int)): each key is a node from every connected part of the graph. The set associated is a set of all nodes that are part of the same connected part of the graph connectedParts = undirectedGraph.connectedComponents() for node in connectedParts: # int: the id of the node that will be used as a root to orient the undirected graph, initialised as 0 root = 0 # we choose the node with the largest mutual information with y as the root. We save the largest mutual information in the following variable maxMutualInformation = -99999 for x0 in connectedParts[node]: mutual = 0 probabilityList = learner.pseudoCount([variableList[x0], target]).normalize().tolist() probabilityY = learner.pseudoCount([target]).normalize().tolist() probabilityX = learner.pseudoCount([variableList[x0]]).normalize().tolist() for yIndex in range(len(probabilityList)): for xIndex in range(len(probabilityList[yIndex])): if probabilityList[yIndex][xIndex] > 0: mutual = mutual + probabilityList[yIndex][xIndex] * math.log( probabilityList[yIndex][xIndex] / (probabilityY[yIndex] * probabilityX[xIndex])) if mutual > maxMutualInformation: maxMutualInformation = mutual root = x0 ListOfNodes = [root] for tailId in ListOfNodes: # for every element in the list of nodes we create an arc between this element and every neighbour of the element in the undirected graph that is not already in the list of nodes. # Since the graph contains no cycles we know that if headId is already in the list then the arc (headId,tailId) has already been added, meaning the arc (tailId,headId) shouldn't be added neighbours = undirectedGraph.neighbours(tailId) for headId in neighbours: if headId not in ListOfNodes: bn.addArc(variableList[tailId], variableList[headId]) ListOfNodes.append(headId) for i in range(d): bn.addArc(target, variableList[i]) bn = learner.learnParameters(bn.dag()) return bn
def _fitChowLiu(X, y, bn, learner, variableList, target): """ parameters: X: {array-like, sparse matrix} of shape (n_samples, n_features) training data y: array-like of shape (n_samples) Target values returns: void Uses the Chow-Liu algorithm to learn the network structure and its parameters.""" # since the chow liu algorithm doesn't differentiate between input and output variables, we construct a matrix that includes them both dimension = y.shape yColumn = numpy.reshape(y, (dimension[0], 1)) xAndY = numpy.concatenate((yColumn, X), axis=1) d = xAndY.shape[1] mutualInformation = dict() undirectedGraph = gum.UndiGraph() # we calculate the mutual information of all pairs of variables for i in range(d): undirectedGraph.addNodeWithId(i) if (i > 0): probabilityXi = learner.pseudoCount([variableList[i - 1]]).normalize().tolist() for j in range(i): if j > 0: probabilityList = learner.pseudoCount([variableList[i - 1], variableList[j - 1]]).normalize().tolist() probabilityXj = learner.pseudoCount([variableList[j - 1]]).normalize().tolist() else: probabilityList = learner.pseudoCount([variableList[i - 1], target]).normalize().tolist() probabilityXj = learner.pseudoCount([target]).normalize().tolist() mutual = 0 for xjIndex in range(len(probabilityList)): for xiIndex in range(len(probabilityList[xjIndex])): if probabilityList[xjIndex][xiIndex] > 0: mutual = mutual + probabilityList[xjIndex][xiIndex] * math.log( probabilityList[xjIndex][xiIndex] / (probabilityXi[xiIndex] * probabilityXj[xjIndex])) mutualInformation[(i, j)] = mutual # sorting the dictionary of mutualInformation in descending order by the values associated mutualInformation = {k: v for k, v in sorted(mutualInformation.items(), key=(lambda item: item[1]), reverse=True)} for (i, j) in mutualInformation: # if the mutual information between xi and xj we add an edge between the two nodes undirectedGraph.addEdge(i, j) # if the edge causes a cycle, we delete the edge and pass on to the following pair of variables if (undirectedGraph.hasUndirectedCycle()): undirectedGraph.eraseEdge(i, j) ListOfNodes = [0] for tailId in ListOfNodes: # for every element in the list of nodes we create an arc between this element and every neighbour of the element in the undirected graph that is not already in the list of nodes. # Since the graph contains no cycles we know that if headId is already in the list then the arc (headId,tailId) has already been added, meaning the arc (tailId,headId) shouldn't be added neighbours = undirectedGraph.neighbours(tailId) for headId in neighbours: if headId not in ListOfNodes: if tailId > 0: bn.addArc(variableList[tailId - 1], variableList[headId - 1]) else: bn.addArc(target, variableList[headId - 1]) ListOfNodes.append(headId) bn = learner.learnParameters(bn.dag()) return bn