示例#1
0
 def test_delete_operator_3(self):
     G = 100
     p = 20
     for i in range(G):
         A = sempler.generators.dag_avg_deg(p, 3, 1, 1)
         cpdag = utils.dag_to_cpdag(A)
         for x in range(p):
             # Can only apply the operator to X -> Y or X - Y
             for y in np.where(cpdag[x, :] != 0)[0]:
                 for H in utils.subsets(utils.na(y, x, cpdag)):
                     output = ges.delete(x, y, H, cpdag)
                     # Verify the new vstructures
                     vstructs = utils.vstructures(output)
                     for h in H:
                         vstruct = (x, h, y) if x < y else (y, h, x)
                         self.assertIn(vstruct, vstructs)
                     # Verify whole connectivity
                     truth = cpdag.copy()
                     # Remove edge
                     truth[x, y], truth[y, x] = 0, 0
                     # Orient y -> h
                     truth[list(H), y] = 0
                     truth[list(utils.neighbors(x, cpdag) & H), x] = 0
                     self.assertTrue((output == truth).all())
     print("\nExhaustively checked delete operator on %i CPDAGS" % (i + 1))
示例#2
0
 def test_insert_2(self):
     G = 100
     p = 20
     for i in range(G):
         A = sempler.generators.dag_avg_deg(p, 3, 1, 1)
         cpdag = utils.dag_to_cpdag(A)
         for x in range(p):
             # Can only apply the operator to non-adjacent nodes
             adj_x = utils.adj(x, cpdag)
             Y = set(range(p)) - adj_x
             for y in Y:
                 for T in utils.subsets(utils.neighbors(y, cpdag) - adj_x):
                     # print(x,y,T)
                     output = ges.insert(x, y, T, cpdag)
                     # Verify the new vstructures
                     vstructs = utils.vstructures(output)
                     for t in T:
                         vstruct = (x, y, t) if x < t else (t, y, x)
                         self.assertIn(vstruct, vstructs)
                     # Verify whole connectivity
                     truth = cpdag.copy()
                     # Add edge x -> y
                     truth[x, y] = 1
                     # Orient t -> y
                     truth[list(T), y] = 1
                     truth[y, list(T)] = 0
                     self.assertTrue((output == truth).all())
     print("\nExhaustively checked insert operator on %i CPDAGS" % (i + 1))
示例#3
0
文件: main.py 项目: juangamella/ges
def delete(x, y, H, A):
    """
    Applies the delete operator:
      1) deletes the edge x -> y or x - y
      2) for every node h in H
           * orients the edge y -> h
           * if the edge with x is undirected, orients it as x -> h

    Note that H must be a subset of the neighbors of y which are
    adjacent to x. A ValueError exception is thrown otherwise.

    Parameters
    ----------
    x : int
        the "origin" node (i.e. x -> y or x - y)
    y : int
        the "target" node
    H : iterable of ints
        a subset of the neighbors of y which are adjacent to x
    A : np.array
        the current adjacency matrix

    Returns
    -------
    new_A : np.array
        the adjacency matrix resulting from applying the operator

    """
    H = set(H)
    # Check inputs
    if A[x, y] == 0:
        raise ValueError("There is no (un)directed edge from x=%d to y=%d" %
                         (x, y))
    # neighbors of y which are adjacent to x
    na_yx = utils.na(y, x, A)
    if not H <= na_yx:
        raise ValueError(
            "The given set H is not valid, H=%s is not a subset of NA_yx=%s" %
            (H, na_yx))
    # Apply operator
    new_A = A.copy()
    # delete the edge between x and y
    new_A[x, y], new_A[y, x] = 0, 0
    # orient the undirected edges between y and H towards H
    new_A[list(H), y] = 0
    # orient any undirected edges between x and H towards H
    n_x = utils.neighbors(x, A)
    new_A[list(H & n_x), x] = 0
    return new_A
示例#4
0
文件: main.py 项目: juangamella/ges
def turn(x, y, C, A):
    """
    Applies the turning operator: For an edge x - y or x <- y,
      1) orients the edge as x -> y
      2) for all c in C, orients the previously undirected edge c -> y

    Parameters
    ----------
    x : int
        the origin node (i.e. x -> y)
    y : int
        the target node
    C : iterable of ints
        a subset of the neighbors of y
    A : np.array
        the current adjacency matrix

    Returns
    -------
    new_A : np.array
        the adjacency matrix resulting from applying the operator

    """
    # Check inputs
    if A[x, y] != 0 and A[y, x] == 0:
        raise ValueError("The edge %d -> %d is already exists" % (x, y))
    if A[x, y] == 0 and A[y, x] == 0:
        raise ValueError("x=%d and y=%d are not connected" % (x, y))
    if not C <= utils.neighbors(y, A):
        raise ValueError("Not all nodes in C=%s are neighbors of y=%d" %
                         (C, y))
    if len({x, y} & C) > 0:
        raise ValueError("C should not contain x or y")
    # Apply operator
    new_A = A.copy()
    # Turn edge x -> y
    new_A[y, x] = 0
    new_A[x, y] = 1
    # Orient edges c -> y for c in C
    new_A[y, list(C)] = 0
    return new_A
示例#5
0
文件: main.py 项目: juangamella/ges
def score_valid_turn_operators_undir(x, y, A, cache, debug=0):
    """Logic for finding and scoring the valid turn operators that can be
    applied to the edge x - y.

    Parameters
    ----------
    x : int
        the origin node (i.e. x -> y)
    y : int
        the target node
    A : np.array
        the current adjacency matrix
    cache : instance of ges.scores.DecomposableScore
        the score cache to compute the score of the
        operators that are valid
    debug : bool or string
        if debug traces should be printed (True/False). If a non-empty
        string is passed, traces are printed with the given string as
        prefix (useful for indenting the prints from a calling
        function)

    Returns
    -------
    valid_operators : list of tuples
        a list of tubles, each containing a valid operator, its score
        and the resulting connectivity matrix

    """
    # Proposition 31, condition (ii) in GIES paper (Hauser & Bühlmann
    # 2012) is violated if:
    #   1. all neighbors of y are adjacent to x, or
    #   2. y has no neighbors (besides u)
    # then there are no valid operators.
    non_adjacents = list(utils.neighbors(y, A) - utils.adj(x, A) - {x})
    if len(non_adjacents) == 0:
        print("    turn(%d,%d) : ne(y) \\ adj(x) = Ø => stopping" %
              (x, y)) if debug > 1 else None
        return []
    # Otherwise, construct all the possible subsets which will satisfy
    # condition (ii), i.e. all subsets of neighbors of y with at least
    # one which is not adjacent to x
    p = len(A)
    C0 = sorted(utils.neighbors(y, A) - {x})
    subsets = np.zeros((2**len(C0), p + 1), dtype=bool)
    subsets[:, C0] = utils.cartesian([np.array([False, True])] * len(C0),
                                     dtype=bool)
    # Remove all subsets which do not contain at least one non-adjacent node to x
    to_remove = (subsets[:, non_adjacents] == False).all(axis=1)
    subsets = utils.delete(subsets, to_remove, axis=0)
    # With condition (ii) guaranteed, we now check conditions (i,iii)
    # for each subset
    valid_operators = []
    print("    turn(%d,%d) C0=" % (x, y), set(C0)) if debug > 1 else None
    while len(subsets) > 0:
        print("      len(subsets)=%d, len(valid_operators)=%d" %
              (len(subsets), len(valid_operators))) if debug > 1 else None
        # Access the next subset
        C = set(np.where(subsets[0, :])[0])
        subsets = subsets[1:]
        # Condition (i): C is a clique in the subgraph induced by the
        # chain component of y. Because C is composed of neighbors of
        # y, this is equivalent to C being a clique in A. NOTE: This
        # is also how it is described in Alg. 5 of the paper
        cond_1 = utils.is_clique(C, A)
        if not cond_1:
            # Remove from consideration all other sets C' which
            # contain C, as the clique condition will also not hold
            supersets = subsets[:, list(C)].all(axis=1)
            subsets = utils.delete(subsets, supersets, axis=0)
            continue
        # Condition (iii): Note that condition (iii) from proposition
        # 31 appears to be wrong in the GIES paper; instead we use the
        # definition of condition (iii) from Alg. 5 of the paper:
        # Let na_yx (N in the GIES paper) be the neighbors of Y which
        # are adjacent to X. Then, {x,y} must separate C and na_yx \ C
        # in the subgraph induced by the chain component of y,
        # i.e. all the simple paths from one set to the other contain
        # a node in {x,y}.
        subgraph = utils.induced_subgraph(utils.chain_component(y, A), A)
        na_yx = utils.na(y, x, A)
        if not utils.separates({x, y}, C, na_yx - C, subgraph):
            continue
        # At this point C passes both conditions
        #   Apply operator
        new_A = turn(x, y, C, A)
        #   Compute the change in score
        new_score = cache.local_score(
            y,
            utils.pa(y, A) | C | {x}) + cache.local_score(
                x,
                utils.pa(x, A) | (C & na_yx))
        old_score = cache.local_score(y, utils.pa(y, A) | C) + \
            cache.local_score(x, utils.pa(x, A) | (C & na_yx) | {y})
        print("        new score = %0.6f, old score = %0.6f, y=%d, C=%s" %
              (new_score, old_score, y, C)) if debug > 1 else None
        #   Add to the list of valid operators
        valid_operators.append((new_score - old_score, new_A, x, y, C))
        print("    turn(%d,%d,%s) -> %0.16f" %
              (x, y, C, new_score - old_score)) if debug else None
    # Return all valid operators
    return valid_operators
示例#6
0
文件: main.py 项目: juangamella/ges
def score_valid_turn_operators_dir(x, y, A, cache, debug=0):
    """Logic for finding and scoring the valid turn operators that can be
    applied to the edge x <- y.

    Parameters
    ----------
    x : int
        the origin node (i.e. x -> y)
    y : int
        the target node
    A : np.array
        the current adjacency matrix
    cache : instance of ges.scores.DecomposableScore
        the score cache to compute the score of the
        operators that are valid
    debug : bool or string
        if debug traces should be printed (True/False). If a non-empty
        string is passed, traces are printed with the given string as
        prefix (useful for indenting the prints from a calling
        function)

    Returns
    -------
    valid_operators : list of tuples
        a list of tubles, each containing a valid operator, its score
        and the resulting connectivity matrix

    """
    # One-hot encode all subsets of T0, plus one extra column to mark
    # if they pass validity condition 2 (see below). The set C passed
    # to the turn operator will be C = NAyx U T.
    p = len(A)
    T0 = sorted(utils.neighbors(y, A) - utils.adj(x, A))
    if len(T0) == 0:
        subsets = np.zeros((1, p + 1), dtype=bool)
    else:
        subsets = np.zeros((2**len(T0), p + 1), dtype=bool)
        subsets[:, T0] = utils.cartesian([np.array([False, True])] * len(T0),
                                         dtype=bool)
    valid_operators = []
    print("    turn(%d,%d) T0=" % (x, y), set(T0)) if debug > 1 else None
    while len(subsets) > 0:
        print("      len(subsets)=%d, len(valid_operators)=%d" %
              (len(subsets), len(valid_operators))) if debug > 1 else None
        # Access the next subset
        T = np.where(subsets[0, :-1])[0]
        passed_cond_2 = subsets[0, -1]
        subsets = subsets[1:]  # update the list of remaining subsets
        # Check that the validity conditions hold for T
        C = utils.na(y, x, A) | set(T)
        # Condition 1: Test that C = NA_yx U T is a clique
        cond_1 = utils.is_clique(C, A)
        if not cond_1:
            # Remove from consideration all other sets T' which
            # contain T, as the clique condition will also not hold
            supersets = subsets[:, T].all(axis=1)
            subsets = utils.delete(subsets, supersets, axis=0)
        # Condition 2: Test that all semi-directed paths from y to x contain a
        # member from C U neighbors(x)
        if passed_cond_2:
            # If a subset of T satisfied condition 2, so does T
            cond_2 = True
        else:
            # otherwise, check condition 2
            cond_2 = True
            for path in utils.semi_directed_paths(y, x, A):
                if path == [y, x]:
                    pass
                elif len((C | utils.neighbors(x, A)) & set(path)) == 0:
                    cond_2 = False
                    break
            if cond_2:
                # If condition 2 holds for C U neighbors(x), that is,
                # for C = NAyx U T U neighbors(x), then it holds for
                # all supersets of T
                supersets = subsets[:, T].all(axis=1)
                subsets[supersets, -1] = True
        # If both conditions hold, apply operator and compute its score
        print("      turn(%d,%d,%s)" % (x, y, C), "na_yx =", utils.na(y, x, A),
              "T =", T, "validity:", cond_1, cond_2) if debug > 1 else None
        if cond_1 and cond_2:
            # Apply operator
            new_A = turn(x, y, C, A)
            # Compute the change in score
            new_score = cache.local_score(
                y,
                utils.pa(y, A) | C | {x}) + cache.local_score(
                    x,
                    utils.pa(x, A) - {y})
            old_score = cache.local_score(y, utils.pa(y, A) | C) + \
                cache.local_score(x, utils.pa(x, A))
            print("        new score = %0.6f, old score = %0.6f, y=%d, C=%s" %
                  (new_score, old_score, y, C)) if debug > 1 else None
            # Add to the list of valid operators
            valid_operators.append((new_score - old_score, new_A, x, y, C))
            print("    turn(%d,%d,%s) -> %0.16f" %
                  (x, y, C, new_score - old_score)) if debug else None
    # Return all the valid operators
    return valid_operators
示例#7
0
文件: main.py 项目: juangamella/ges
def score_valid_insert_operators(x, y, A, cache, debug=0):
    """Generate and score all valid insert(x,y,T) operators involving the edge
    x-> y, and all possible subsets T of neighbors of y which
    are NOT adjacent to x.

    Parameters
    ----------
    x : int
        the origin node (i.e. x -> y)
    y : int
        the target node
    A : np.array
        the current adjacency matrix
    cache : instance of ges.scores.DecomposableScore
        the score cache to compute the score of the
        operators that are valid
    debug : int
        if larger than 0, debug are traces printed. Higher values
        correspond to increased verbosity

    Returns
    -------
    valid_operators : list of tuples
        a list of tubles, each containing a valid operator, its score
        and the resulting connectivity matrix

    """
    p = len(A)
    if A[x, y] != 0 or A[y, x] != 0:
        raise ValueError("x=%d and y=%d are already connected" % (x, y))
    # One-hot encode all subsets of T0, plus one extra column to mark
    # if they pass validity condition 2 (see below)
    T0 = sorted(utils.neighbors(y, A) - utils.adj(x, A))
    if len(T0) == 0:
        subsets = np.zeros((1, p + 1), dtype=bool)
    else:
        subsets = np.zeros((2**len(T0), p + 1), dtype=bool)
        subsets[:, T0] = utils.cartesian([np.array([False, True])] * len(T0),
                                         dtype=bool)
    valid_operators = []
    print("    insert(%d,%d) T0=" % (x, y), set(T0)) if debug > 1 else None
    while len(subsets) > 0:
        print("      len(subsets)=%d, len(valid_operators)=%d" %
              (len(subsets), len(valid_operators))) if debug > 1 else None
        # Access the next subset
        T = np.where(subsets[0, :-1])[0]
        passed_cond_2 = subsets[0, -1]
        subsets = subsets[1:]
        # Check that the validity conditions hold for T
        na_yxT = utils.na(y, x, A) | set(T)
        # Condition 1: Test that NA_yx U T is a clique
        cond_1 = utils.is_clique(na_yxT, A)
        if not cond_1:
            # Remove from consideration all other sets T' which
            # contain T, as the clique condition will also not hold
            supersets = subsets[:, T].all(axis=1)
            subsets = utils.delete(subsets, supersets, axis=0)
        # Condition 2: Test that all semi-directed paths from y to x contain a
        # member from NA_yx U T
        if passed_cond_2:
            # If a subset of T satisfied condition 2, so does T
            cond_2 = True
        else:
            # Check condition 2
            cond_2 = True
            for path in utils.semi_directed_paths(y, x, A):
                if len(na_yxT & set(path)) == 0:
                    cond_2 = False
                    break
            if cond_2:
                # If condition 2 holds for NA_yx U T, then it holds for all supersets of T
                supersets = subsets[:, T].all(axis=1)
                subsets[supersets, -1] = True
        print("      insert(%d,%d,%s)" % (x, y, T), "na_yx U T = ", na_yxT,
              "validity:", cond_1, cond_2) if debug > 1 else None
        # If both conditions hold, apply operator and compute its score
        if cond_1 and cond_2:
            # Apply operator
            new_A = insert(x, y, T, A)
            # Compute the change in score
            aux = na_yxT | utils.pa(y, A)
            old_score = cache.local_score(y, aux)
            new_score = cache.local_score(y, aux | {x})
            print("        new: s(%d, %s) = %0.6f old: s(%d, %s) = %0.6f" %
                  (y, aux | {x}, new_score, y, aux,
                   old_score)) if debug > 1 else None
            # Add to the list of valid operators
            valid_operators.append((new_score - old_score, new_A, x, y, T))
            print("    insert(%d,%d,%s) -> %0.16f" %
                  (x, y, T, new_score - old_score)) if debug else None
    # Return all the valid operators
    return valid_operators