def createRandomUndiGraphTree(nodeNumber):
    """
    Build a random undigraph and return it, the returned graph is a tree

    Examples
    --------
    >>> import markovNetworkLearning as mnl
    >>> g=mnl.createRandomUndiGraphTree(15)

    Parameters
    ----------
    nodeNumber : int
            the number of nodes in the graph

    Returns
    -------
    pyAgrum.UndiGraph
            the resulting undigraph
    """
    tree = gum.UndiGraph()
    if nodeNumber > 0:
        tree.addNode()
    for i in range(1, nodeNumber):
        otherNode = random.choice(list(tree.nodes()))
        tree.addNode()
        tree.addEdge(otherNode, i)
    return tree
Exemplo n.º 2
0
def create_complete_undigraph(size):
    graph = gum.UndiGraph()
    for i in range(size):
        graph.addNodeWithId(i)
        for j in range(i):
            graph.addEdge(i, j)
    return graph
Exemplo n.º 3
0
 def testNonRegressionAddEdge(self):
     ug = gum.UndiGraph()
     ug.addNodes(4)
     with self.assertRaises(gum.InvalidNode):
         ug.addEdge(1, 6)
     with self.assertRaises(gum.InvalidNode):
         ug.addEdge(7, 0)
     with self.assertRaises(gum.InvalidNode):
         ug.addEdge(6, 7)
Exemplo n.º 4
0
def _isDSep_tech2_parents(bn: "pyAgrum.BayesNet", sx: NodeSet, sy: NodeSet, zset: NodeSet) -> bool:
  """Test of d-separation of ``sx`` and ``sy`` given ``Z``, considering only the paths with an arc coming into ``x``
  using the graph-moralization method

  Parameters
  ----------
  bn: pyAgrum.BayesNet
      the bayesian network
  sx: Set[int]
      source nodes
  sy: Set[int]
      destinantion nodes
  zset: Set[int]
      blocking set

  Returns
  -------
  bool
  """
  G = pyAgrum.UndiGraph()
  ancesters = sx | sy
  anc = frozenset(ancesters)
  for i in anc:
    ancester(i, bn, ancesters)

  for i in zset:
    ancesters.add(i)
    ancester(i, bn, ancesters)
  for i in ancesters:
    G.addNodeWithId(i)

  for b in G.nodes():
    for a in (set(bn.parents(b)) - sx):
      G.addEdge(a, b)

  for nod in G.nodes():
    parents_nod = set(bn.parents(nod)) - sx
    for par in parents_nod:
      for par2 in parents_nod:
        if par2 != par:
          G.addEdge(par, par2)

  _removeZ(G, zset)

  if _is_path_x_y(G, sx, sy):
    return False

  return True
Exemplo n.º 5
0
def _isDSep_tech2_children(bn: "pyAgrum.BayesNet", sx: NodeSet, sy: NodeSet, zset: NodeSet) -> bool:
  """Test of d-separation of ``x`` and ``y`` given ``zset``, considering only the paths with an arc coming from ``x``
  using the graph-moralization method

  Parameters
  ----------
  bn: pyAgrum.BayesNet
      the bayesian network
  sx: Set[int]
      source nodes
  sy: Set[int]
      destinantion nodes
  zset: Set[int]
      blocking set

  Returns
  -------
  bool
  """
  G = pyAgrum.UndiGraph()
  ancesters = sx | sy
  for i in sy:
    ancester(i, bn, ancesters)
  # sx's ancesters will not be added since sx already is in ancesters
  for i in zset:
    ancesters.add(i)
    ancester(i, bn, ancesters)
  for i in ancesters:
    G.addNodeWithId(i)
  se = set(G.nodes()) - sx
  for b in se:
    for a in bn.parents(b):
      G.addEdge(a, b)

  for nod in se:
    parents_nod = bn.parents(nod)
    for par in parents_nod:
      for par2 in parents_nod:
        if par2 != par:
          G.addEdge(par, par2)
  _removeZ(G, zset)

  if _is_path_x_y(G, sx, sy):
    return False

  return True
Exemplo n.º 6
0
    def testSimpleGraph(self):
        g = gum.UndiGraph()
        g.addNode()
        g.addNode()
        g.addEdge(0, 1)

        jtg = gum.JunctionTreeGenerator()
        jt = jtg.junctionTree(g)

        self.assertEqual(jt.size(), 1)
        self.assertEqual(jt.clique(0), {0, 1})

        jtg = gum.JunctionTreeGenerator()
        bjt = jtg.binaryJoinTree(g)

        self.assertEqual(bjt.size(), 1)
        self.assertEqual(bjt.clique(0), {0, 1})
Exemplo n.º 7
0
def _reduce_moralize(bn: "pyAgrum.BayesNet", x: NodeSet, y: NodeSet, zset: NodeSet) -> "pyAgrum.UndiGraph":
  """
  Returns the undirected graph obtained by reducing (ancestor graph) and moralizing the Bayesian network ``bn``

  Parameters
  ----------
  bn: pyAgrum.BayesNet
      the BayesNet
  x: Set[int|str]
      NodeSet generating the ancestor graph
  y: Set[int|str]
      Second NodeSet generating the ancestor graph
  zset: Set[int|str]
      Third NodeSet generating the ancestor graph

  Returns
  -------
  pyAgrum.UndiGraph
      The reduced moralized graph
  """
  G = pyAgrum.UndiGraph()

  Ancetre = x | y
  anc = frozenset(Ancetre)
  for i in anc:
    ancester(i, bn, Ancetre)

  for i in zset:
    Ancetre.add(i)
    ancester(i, bn, Ancetre)
  for i in Ancetre:
    G.addNodeWithId(i)

  for b in G.nodes():
    for a in bn.parents(b):
      G.addEdge(a, b)

  for nod in G.nodes():
    parents_nod = bn.parents(nod)
    for par in parents_nod:
      for par2 in parents_nod:
        if par2 != par:
          G.addEdge(par, par2)

  return G
Exemplo n.º 8
0
    def testConstructorFromUG(self):
        ug = gum.UndiGraph()

        ug.addNodes(4)

        ug.addEdge(0, 2)
        ug.addEdge(1, 2)
        ug.addEdge(2, 3)

        mixed_graph = gum.MixedGraph()

        mixed_graph.addNodes(4)

        mixed_graph.addEdge(0, 2)
        mixed_graph.addEdge(1, 2)
        mixed_graph.addEdge(2, 3)

        mg = gum.MixedGraph(ug)

        self.assertEqual(mixed_graph, mg)
Exemplo n.º 9
0
def _cDecomposition(cm: CausalModel) -> List[Set[int]]:
  undi = pyAgrum.UndiGraph()
  s = set(cm.nodes()) - cm.latentVariablesIds()
  for n in s:
    undi.addNodeWithId(n)

  for latent in cm.latentVariablesIds():
    for a, b in it.combinations(cm.children(latent), 2):
      undi.addEdge(a, b)

  def undiCComponent(g, n, se):
    for i in g.neighbours(n):
      if i not in se:
        se.add(i)
        undiCComponent(g, i, se)

  components = []
  while len(s) != 0:
    c = s.pop()
    sc = set([c])
    undiCComponent(undi, c, sc)
    s -= sc
    components.append(sc)
  return components
Exemplo n.º 10
0
def read_graph(file_name):
    print("Loading file {}".format(file_name))
    dot_graph = dot.graph_from_dot_file(file_name)
    isUndiGraph = False

    # Cleaning nodes
    for node in dot_graph.get_nodes():
        name = node.get_name()
        if name in ['node', 'edge']:
            if name == 'edge':
                if node.get_attributes() and node.get_attributes(
                )['dir'] == 'none':
                    isUndiGraph = True
            dot_graph.del_node(node)

    # Getting node names
    node_name_map = {}
    for i, node in enumerate(dot_graph.get_nodes()):
        node_name_map[node.get_name()] = i
    nodeId = max(node_name_map.values()) + 1
    for edge in dot_graph.get_edges():
        source = edge.get_source()
        destination = edge.get_destination()
        if source not in node_name_map.keys():
            node_name_map[source] = nodeId
            nodeId += 1
        if destination not in node_name_map.keys():
            node_name_map[destination] = nodeId
            nodeId += 1

    edges = []
    arcs = []
    for edge in dot_graph.get_edges():
        if (isUndiGraph or (edge.get_attributes()
                            and edge.get_attributes()['dir'] == 'none')):
            edges.append(
                gum.Edge(node_name_map[edge.get_source()],
                         node_name_map[edge.get_destination()]))
        else:
            arcs.append(
                gum.Arc(node_name_map[edge.get_source()],
                        node_name_map[edge.get_destination()]))

    if not edges:  # DAG
        graph = gum.DAG()
        for node_name in node_name_map:
            graph.addNodeWithId(node_name_map[node_name])
        for arc in arcs:
            graph.addArc(arc.tail(), arc.head())

    elif not arcs:  # UndiGraph
        graph = gum.UndiGraph()
        for node_name in node_name_map:
            graph.addNodeWithId(node_name_map[node_name])
        for edge in edges:
            graph.addEdge(edge.first(), edge.second())

    else:  # MixedGraph
        graph = gum.MixedGraph()
        for node_name in node_name_map:
            graph.addNodeWithId(node_name_map[node_name])
        for edge in edges:
            graph.addEdge(edge.first(), edge.second())
        for arc in arcs:
            graph.addArc(arc.tail(), arc.head())

    # Since python3.7, dict are insertion ordered so
    # just returning values should be fine but we never know !
    return graph, list(node_name_map.keys())
Exemplo n.º 11
0
 def testAddNodes(self):
     self._testAddNodes(gum.DiGraph())
     self._testAddNodes(gum.UndiGraph())
     self._testAddNodes(gum.MixedGraph())
     self._testAddNodes(gum.DAG())
Exemplo n.º 12
0
def _fitTAN(X, y, bn, learner, variableList, target):
  """
  parameters:
      X: {array-like, sparse matrix} of shape (n_samples, n_features)
      training data
      y: array-like of shape (n_samples)
      Target values

  returns:
      void

  Uses Tree-Augmented NaiveBayes to learn the network structure and its parameters.
  """

  # a list of all the variables in our Bayesian network sorted by their index

  # the number of columns in our data
  d = X.shape[1]

  # If there is only one input column, TAN works exactly the same as NaiveBayes
  if (d < 2):
    _fitNaiveBayes(X, y, bn, learner, variableList, target, None)
    return

  probabilityY = learner.pseudoCount([target]).normalize().tolist()
  mutualInformation = dict()
  undirectedGraph = gum.UndiGraph()

  # we calculate the mutual information of all pairs of variables
  for i in range(d):
    undirectedGraph.addNodeWithId(i)
    for j in range(i):
      probabilityList = learner.pseudoCount([variableList[i], variableList[j], target]).normalize().tolist()
      probabilityXi = learner.pseudoCount([variableList[i], target]).normalize().tolist()
      probabilityXj = learner.pseudoCount([variableList[j], target]).normalize().tolist()
      temp = 0
      for yIndex in range(len(probabilityList)):
        for xjIndex in range(len(probabilityList[yIndex])):
          for xiIndex in range(len(probabilityList[yIndex][xjIndex])):
            if probabilityList[yIndex][xjIndex][xiIndex] > 0:
              temp = temp + probabilityList[yIndex][xjIndex][xiIndex] * math.log(
                probabilityList[yIndex][xjIndex][xiIndex] * probabilityY[yIndex] / (
                      probabilityXi[yIndex][xiIndex] * probabilityXj[yIndex][xjIndex]))
      mutualInformation[(i, j)] = temp
  # if the mutual information between two variables is bigger than this threshold, we add an edge between them
  threshold = 0
  for var in mutualInformation:
    threshold = threshold + mutualInformation[var]
  threshold = float(threshold) / (d * (d - 1))

  mutualInformation = {k: v for k, v in sorted(mutualInformation.items(), key=(lambda item: item[1]), reverse=True)}

  for var in mutualInformation:
    (i, j) = var
    # since it's sorted in descending order we know that if this value is under the threshold all the other following values will also be under the threshold
    if mutualInformation[var] < threshold:
      break
    # if the mutual information between xi and xj we add an edge between the two nodes
    undirectedGraph.addEdge(i, j)

    # if the edge causes a cycle, we delete the edge and pass on to the following pair of variables
    if (undirectedGraph.hasUndirectedCycle()):
      undirectedGraph.eraseEdge(i, j)
    # dict(int:set(int)): each key is a node from every connected part of the graph. The set associated is a set of all nodes that are part of the same connected part of the graph
  connectedParts = undirectedGraph.connectedComponents()

  for node in connectedParts:
    # int: the id of the node that will be used as a root to orient the undirected graph, initialised as 0
    root = 0
    # we choose the node with the largest mutual information with y as the root. We save the largest mutual information in the following variable
    maxMutualInformation = -99999
    for x0 in connectedParts[node]:
      mutual = 0
      probabilityList = learner.pseudoCount([variableList[x0], target]).normalize().tolist()
      probabilityY = learner.pseudoCount([target]).normalize().tolist()
      probabilityX = learner.pseudoCount([variableList[x0]]).normalize().tolist()
      for yIndex in range(len(probabilityList)):
        for xIndex in range(len(probabilityList[yIndex])):
          if probabilityList[yIndex][xIndex] > 0:
            mutual = mutual + probabilityList[yIndex][xIndex] * math.log(
              probabilityList[yIndex][xIndex] / (probabilityY[yIndex] * probabilityX[xIndex]))
      if mutual > maxMutualInformation:
        maxMutualInformation = mutual
        root = x0
    ListOfNodes = [root]
    for tailId in ListOfNodes:
      # for every element in the list of nodes we create an arc between this element and every neighbour of the element in the undirected graph that is not already in the list of nodes.
      # Since the graph contains no cycles we know that if headId is already in the list then the arc (headId,tailId) has already been added, meaning the arc (tailId,headId) shouldn't be added
      neighbours = undirectedGraph.neighbours(tailId)
      for headId in neighbours:
        if headId not in ListOfNodes:
          bn.addArc(variableList[tailId], variableList[headId])
          ListOfNodes.append(headId)
  for i in range(d):
    bn.addArc(target, variableList[i])

  bn = learner.learnParameters(bn.dag())
  return bn
Exemplo n.º 13
0
def _fitChowLiu(X, y, bn, learner, variableList, target):
  """
  parameters:
      X: {array-like, sparse matrix} of shape (n_samples, n_features)
      training data
      y: array-like of shape (n_samples)
      Target values

  returns:
      void

  Uses the Chow-Liu algorithm to learn the network structure and its parameters."""

  # since the chow liu algorithm doesn't differentiate between input and output variables, we construct a matrix that includes them both
  dimension = y.shape
  yColumn = numpy.reshape(y, (dimension[0], 1))
  xAndY = numpy.concatenate((yColumn, X), axis=1)
  d = xAndY.shape[1]

  mutualInformation = dict()
  undirectedGraph = gum.UndiGraph()

  # we calculate the mutual information of all pairs of variables
  for i in range(d):
    undirectedGraph.addNodeWithId(i)
    if (i > 0):
      probabilityXi = learner.pseudoCount([variableList[i - 1]]).normalize().tolist()
    for j in range(i):
      if j > 0:
        probabilityList = learner.pseudoCount([variableList[i - 1], variableList[j - 1]]).normalize().tolist()
        probabilityXj = learner.pseudoCount([variableList[j - 1]]).normalize().tolist()
      else:
        probabilityList = learner.pseudoCount([variableList[i - 1], target]).normalize().tolist()
        probabilityXj = learner.pseudoCount([target]).normalize().tolist()
      mutual = 0
      for xjIndex in range(len(probabilityList)):
        for xiIndex in range(len(probabilityList[xjIndex])):
          if probabilityList[xjIndex][xiIndex] > 0:
            mutual = mutual + probabilityList[xjIndex][xiIndex] * math.log(
              probabilityList[xjIndex][xiIndex] / (probabilityXi[xiIndex] * probabilityXj[xjIndex]))
      mutualInformation[(i, j)] = mutual
  # sorting the dictionary of mutualInformation in descending order by the values associated
  mutualInformation = {k: v for k, v in sorted(mutualInformation.items(), key=(lambda item: item[1]), reverse=True)}

  for (i, j) in mutualInformation:

    # if the mutual information between xi and xj we add an edge between the two nodes
    undirectedGraph.addEdge(i, j)

    # if the edge causes a cycle, we delete the edge and pass on to the following pair of variables
    if (undirectedGraph.hasUndirectedCycle()):
      undirectedGraph.eraseEdge(i, j)

  ListOfNodes = [0]
  for tailId in ListOfNodes:
    # for every element in the list of nodes we create an arc between this element and every neighbour of the element in the undirected graph that is not already in the list of nodes.
    # Since the graph contains no cycles we know that if headId is already in the list then the arc (headId,tailId) has already been added, meaning the arc (tailId,headId) shouldn't be added
    neighbours = undirectedGraph.neighbours(tailId)
    for headId in neighbours:
      if headId not in ListOfNodes:
        if tailId > 0:
          bn.addArc(variableList[tailId - 1], variableList[headId - 1])
        else:
          bn.addArc(target, variableList[headId - 1])
        ListOfNodes.append(headId)

  bn = learner.learnParameters(bn.dag())
  return bn