def dci_skeletons_bootstrap_multiple(
        X1,
        X2,
        alpha_skeleton_grid: list = [0.1, 0.5],
        max_set_size: int = 3,
        difference_ug: list = None,
        nodes_cond_set: set = None,
        edge_threshold: float = 0.05,
        sample_fraction: float = 0.7,
        n_bootstrap_iterations: int = 50,
        alpha_ug: float = 1.,
        max_iter: int = 1000,
        n_jobs: int = 1,
        random_state: int = None,
        verbose: int = 0,
        lam: float = 0,
        true_diff: Optional[Set] = None
):
    if difference_ug is None or nodes_cond_set is None:
        difference_ug, nodes_cond_set = dci_undirected_graph(
            X1,
            X2,
            alpha=alpha_ug,
            max_iter=max_iter,
            edge_threshold=edge_threshold,
            verbose=verbose
        )
        if verbose > 0: print(f"{len(difference_ug)} edges in the difference UG, over {len(nodes_cond_set)} nodes")

    bootstrap_samples1 = bootstrap_generator(n_bootstrap_iterations, sample_fraction, X1, random_state=random_state)
    bootstrap_samples2 = bootstrap_generator(n_bootstrap_iterations, sample_fraction, X2, random_state=random_state)

    bootstrap_results = Parallel(n_jobs, verbose=verbose)(
        delayed(dci_skeleton_multiple)(
            X1[safe_mask(X1, subsample1), :],
            X2[safe_mask(X2, subsample2), :],
            alpha_skeleton_grid=alpha_skeleton_grid,
            max_set_size=max_set_size,
            difference_ug=difference_ug,
            nodes_cond_set=nodes_cond_set,
            verbose=verbose,
            lam=lam, true_diff=true_diff)
        for subsample1, subsample2 in zip(bootstrap_samples1, bootstrap_samples2))

    p = X1.shape[1]
    alpha2adjacency = {alpha: np.zeros([p, p]) for alpha in alpha_skeleton_grid}
    for res in bootstrap_results:
        for alpha in alpha_skeleton_grid:
            alpha2adjacency[alpha] += 1 / n_bootstrap_iterations * edges2adjacency(X1.shape[1], res[alpha],
                                                                                   undirected=True)

    return bootstrap_results, alpha2adjacency
def dci(
        X1,
        X2,
        alpha_ug: float = 1.0,
        alpha_skeleton: float = 0.1,
        alpha_orient: float = 0.1,
        max_set_size: Optional[int] = 3,
        difference_ug: list = None,
        nodes_cond_set: set = None,
        max_iter: int = 1000,
        edge_threshold: float = 0,
        verbose: int = 0,
        lam: float = 0,
        progress: bool = False,
        order_independent: bool = True
):
    """
    Uses the Difference Causal Inference (DCI) algorithm to estimate the difference-DAG between two settings.

    Parameters
    ----------
    X1: array, shape = [n_samples, n_features]
        First dataset.    
    X2: array, shape = [n_samples, n_features]
        Second dataset.
    alpha_ug: float, default = 1.0
        L1 regularization parameter for estimating the difference undirected graph via KLIEP algorithm.
    alpha_skeleton: float, default = 0.1
        Significance level parameter for determining presence of edges in the skeleton of the difference graph. 
        Lower alpha_skeleton results in sparser difference graph.
    alpha_orient: float, default = 0.1
        Significance level parameter for determining orientation of an edge. 
        Lower alpha_orient results in more directed edges in the difference-DAG.
    max_set_size: int, default = 3
        Maximum conditioning set size used to test regression invariance.
        Smaller maximum conditioning set size results in faster computation time. For large datasets recommended max_set_size is 3.
        If None, conditioning sets of all sizes will be used.
    difference_ug: list, default = None
        List of tuples that represents edges in the difference undirected graph. If difference_ug is None, 
        KLIEP algorithm for estimating the difference undirected graph will be run. 
        If the number of nodes is small, difference_ug could be taken to be the complete graph between all the nodes.
    nodes_cond_set: set
        Nodes to be considered as conditioning sets.
    max_iter: int, default = 1000
        Maximum number of iterations for gradient descent in KLIEP algorithm.
    edge_threshold: float, default = 0
        Edge weight cutoff for keeping an edge for KLIEP algorithm (all edges above or equal to this threshold are kept).
    verbose: int, default = 0
        The verbosity level of logging messages.
    lam: float, default = 0
        Amount of regularization for regression (becomes ridge regression if nonzero).

    See Also
    --------
    dci_undirected_graph, dci_skeleton, dci_orient

    Returns
    -------
    adjacency_matrix: array, shape  = [n_features, n_features]
        Estimated difference-DAG. Edges that were found to be different between two settings but the orientation
        could not be determined, are represented by assigning 1 in both directions, i.e. adjacency_matrix[i,j] = 1
        and adjacency_matrix[j,i] = 1. Otherwise for oriented edges, only adjacency_matrix[i,j] = 1 is assigned. 
        Assignment of 0 in the adjacency matrix represents no edge.

    References
    ----------
        [1] Wang, Y., Squires, C., Belyaeva, A., & Uhler, C. (2018). Direct estimation of differences in causal graphs. 
        In Advances in Neural Information Processing Systems (pp. 3770-3781).
    """

    assert 0 <= alpha_skeleton <= 1, "alpha_skeleton must be in [0,1] range."
    assert 0 <= alpha_orient <= 1, "alpha_orient must be in [0,1] range."

    num_nodes = X1.shape[1]
    # obtain sufficient statistics
    suffstat1 = gauss_ci_suffstat(X1)
    suffstat2 = gauss_ci_suffstat(X2)
    rh1 = RegressionHelper(suffstat1)
    rh2 = RegressionHelper(suffstat2)

    # compute the difference undirected graph via KLIEP if the differece_ug is not provided
    if difference_ug is None or nodes_cond_set is None:
        difference_ug, nodes_cond_set = dci_undirected_graph(
            X1,
            X2,
            alpha=alpha_ug,
            max_iter=max_iter,
            edge_threshold=edge_threshold,
            verbose=verbose
        )
        if verbose > 0: print(f"{len(difference_ug)} edges in the difference UG, over {len(nodes_cond_set)} nodes")

    # estimate the skeleton of the difference-DAG 
    skeleton = dci_skeleton(
        X1,
        X2,
        difference_ug,
        nodes_cond_set,
        rh1=rh1,
        rh2=rh2,
        alpha=alpha_skeleton,
        max_set_size=max_set_size,
        verbose=verbose,
        lam=lam,
        progress=progress
    )
    if verbose > 0: print(f"{len(skeleton)} edges in the difference skeleton")

    # orient edges of the skeleton of the difference-DAG
    orient_algorithm = dci_orient if not order_independent else dci_orient_order_independent
    adjacency_matrix = orient_algorithm(
        X1,
        X2,
        skeleton,
        nodes_cond_set,
        rh1=rh1,
        rh2=rh2,
        alpha=alpha_orient,
        max_set_size=max_set_size,
        verbose=verbose
    )

    return adjacency_matrix
def dci_multiple(
        X1: np.ndarray,
        X2: np.ndarray,
        alpha_skeleton_grid: list = [0.1, 0.5],
        max_set_size: int = 3,
        difference_ug: list = None,
        nodes_cond_set: set = None,
        edge_threshold: float = 0.05,
        sample_fraction: float = 0.7,
        n_bootstrap_iterations: int = 50,
        alpha_ug: float = 1.,
        max_iter: int = 1000,
        alpha_orient_grid: list = [.1],
        n_jobs: int = 1,
        random_state: int = None,
        verbose: int = 0,
        lam: float = 0,
        true_diff: Optional[Set] = None,
        difference_ug_method: str = 'constraint'
):
    if difference_ug is None or nodes_cond_set is None:
        if difference_ug_method == 'constraint':
            difference_ug, nodes_cond_set = constraint_diff_ug(X1, X2, alpha=alpha_ug)
        elif difference_ug_method == 'kliep':
            difference_ug, nodes_cond_set = dci_undirected_graph(
                X1,
                X2,
                alpha=alpha_ug,
                max_iter=max_iter,
                edge_threshold=edge_threshold,
                verbose=verbose
            )
        else:
            raise ValueError("`difference_ug_method` should be either 'constraint' or 'kliep'")
    if verbose > 0:
        print(f"{len(difference_ug)} edges in the difference UG, over {len(nodes_cond_set)} nodes")
    if true_diff:
        difference_ug = {frozenset({i, j}) for i, j in difference_ug}
        true_skel = {frozenset({i, j}) for i, j in true_diff}
        print(f"in difference UG: {len(true_skel - difference_ug)} false negatives, {len(difference_ug - true_skel)} false positives")
        print(f"{len(difference_ug)} edges in the difference UG, over {len(nodes_cond_set)} nodes")

    bootstrap_samples1 = list(bootstrap_generator(n_bootstrap_iterations, sample_fraction, X1, random_state=random_state))
    bootstrap_samples2 = list(bootstrap_generator(n_bootstrap_iterations, sample_fraction, X2, random_state=random_state))

    skeleton_results = Parallel(n_jobs, verbose=verbose)(
        delayed(dci_skeleton_multiple)(
            X1[safe_mask(X1, subsample1), :],
            X2[safe_mask(X2, subsample2), :],
            alpha_skeleton_grid=alpha_skeleton_grid,
            max_set_size=max_set_size,
            difference_ug=difference_ug,
            nodes_cond_set=nodes_cond_set,
            verbose=verbose,
            lam=lam,
            true_diff=true_diff)
        for subsample1, subsample2 in zip(bootstrap_samples1, bootstrap_samples2)
    )

    p = X1.shape[1]
    alpha2adjacency_skeleton = {alpha: np.zeros([p, p]) for alpha in alpha_skeleton_grid}
    for res in skeleton_results:
        for alpha in alpha_skeleton_grid:
            alpha2adjacency_skeleton[alpha] += 1 / n_bootstrap_iterations * edges2adjacency(X1.shape[1], res[alpha],
                                                                                   undirected=True)

    alpha2adjacency_oriented = dict()
    for alpha_orient in alpha_orient_grid:
        orientation_results = Parallel(n_jobs, verbose=verbose)(
            delayed(dci_orient_order_independent)(
                X1[safe_mask(X1, subsample1), :],
                X2[safe_mask(X1, subsample2), :],
                skeleton,
                nodes_cond_set=nodes_cond_set,
                alpha=alpha_orient,
                max_set_size=max_set_size,
                verbose=verbose)
            for subsample1, subsample2, skeleton in zip(bootstrap_samples1, bootstrap_samples2, skeleton_results)
        )
        for alpha_skel in alpha_skeleton_grid:
            bootstrap_amat = 1/n_bootstrap_iterations * sum([
                orientation_results[i][alpha_skel] for i in range(n_bootstrap_iterations)
            ])
            alpha2adjacency_oriented[(alpha_skel, alpha_orient)] = bootstrap_amat

    return alpha2adjacency_skeleton, alpha2adjacency_oriented