Пример #1
0
def _compute_eigens(H):
    """Computes the eigen values and vectors as described in the paper:
    Zhou, Dengyong, Jiayuan Huang, and Bernhard Scholkopf.
    "Learning with hypergraphs: Clustering, classification, and embedding."
    Advances in neural information processing systems. 2006.
    (http://machinelearning.wustl.edu/mlpapers/paper_files/NIPS2006_630.pdf)

    This algorithm uses the normalized Laplacian to compute eigenvalues.

    :param H: the hypergraph to compute eigenvalues and eigenvectos on.
    :returns: array -- (..., N) of eigenvalues (these are not necessarily
              ordered).
              array -- (..., N, N) of eigenvectors, such that the 
                       column ``eigenvectors[:,i]`` is the eigenvector corresponding to the
                       eigenvalue ``eigenvalues[i]`
    :raises: TypeError -- Algorithm only applicable to undirected hypergraphs
    :warn: Algorithm may give unpredictable if hypergraph contains more than one
           connected components
    """
    if not isinstance(H, UndirectedHypergraph):
        raise TypeError("Algorithm only applicable to undirected hypergraphs")

    if len(ucomp.connected_components(H)) > 1):
        warnings.warn("Algrithm may give unpredictable results for \
                input Hypergraph containing more than one components")

    # Get index<->node mappings and index<->hyperedge_id mappings for matrices
    indices_to_nodes, nodes_to_indices = \
        umat.get_node_mapping(H)
    indices_to_hyperedge_ids, hyperedge_ids_to_indices = \
        umat.get_hyperedge_id_mapping(H)

    delta = _compute_normalized_laplacian(H,
                                          nodes_to_indices,
                                          hyperedge_ids_to_indices)
Пример #2
0
def stationary_distribution(H, pi=None, P=None):
    """Computes the stationary distribution of a random walk on the given
    hypergraph using the iterative approach explained in the paper:
    (http://pages.cs.wisc.edu/~shuchi/courses/787-F09/scribe-notes/lec15.pdf)

    :param H: the hypergraph to find the stationary distribution on.
    :param pi: the initial distribution over the nodes. If not provided,
            it will be created with a random distribution.
    :param P: the transition matrix for the hypergraph. If not provided,
            it will be created.
    :returns: list -- list of the stationary probabilities for all nodes
            in the hypergraph.
    :raises: TypeError -- Algorithm only applicable to undirected hypergraphs

    """
    if not isinstance(H, UndirectedHypergraph):
        raise TypeError("Algorithm only applicable to undirected hypergraphs")

    indices_to_nodes, nodes_to_indices = \
        umat.get_node_mapping(H)
    indices_to_hyperedge_ids, hyperedge_ids_to_indices = \
        umat.get_hyperedge_id_mapping(H)

    if P is None:
        P = _compute_transition_matrix(H,
                                       nodes_to_indices,
                                       hyperedge_ids_to_indices)
def normalized_hypergraph_cut(H, threshold=0):
    """Executes the min-cut algorithm described in the paper:
    Zhou, Dengyong, Jiayuan Huang, and Bernhard Scholkopf.
    "Learning with hypergraphs: Clustering, classification, and embedding."
    Advances in neural information processing systems. 2006.
    (http://machinelearning.wustl.edu/mlpapers/paper_files/NIPS2006_630.pdf)

    This algorithm uses the normalized Laplacian to partition the hypergraph
    into two disjoint components.

    :param H: the hypergraph to perform the hypergraph-cut algorithm on.
    :param threshold: The threshold value for the partitioning algorithm.
                    Typically, the value zero is selected for this purpose.
    :returns: set -- the S set of nodes in the S-T partition
              set -- the T set of nodes in the S-T partition
    :raises: TypeError -- Algorithm only applicable to undirected hypergraphs

    """
    if not isinstance(H, UndirectedHypergraph):
        raise TypeError("Algorithm only applicable to undirected hypergraphs")

    # TODO: make sure that the hypergraph is connected

    # Get index<->node mappings and index<->hyperedge_id mappings for matrices
    indices_to_nodes, nodes_to_indices = \
        umat.get_node_mapping(H)
    indices_to_hyperedge_ids, hyperedge_ids_to_indices = \
        umat.get_hyperedge_id_mapping(H)

    delta = _compute_normalized_laplacian(H,
                                          nodes_to_indices,
                                          hyperedge_ids_to_indices)

    # Since the eigs method in sparse.linalg library doesn't find
    # all the eigenvalues and eigenvectors, it doesn't give us an
    # exact and correct solution. Therefore, we should use the
    # numpy library which works on dense graphs. This might be
    # problematic for large graphs.
    # New note: I think we only need the 2 smallest eigenvalues, which
    # can be found with the sparse solver. Look into this if run-time
    # becomes an issue.

    # eigenvalues,eigenvectors = linalg.eigs(delta,k=numberOfEigenValues)
    eigenvalues, eigenvectors = np.linalg.eig(delta.todense())

    second_min_index = np.argsort(eigenvalues)[1]
    second_eigenvector = eigenvectors[:, second_min_index]
    partition_index = [i for i in range(len(second_eigenvector))
                       if second_eigenvector[i] >= threshold]

    S, T = set(), set()
    for key, value in nodes_to_indices.items():
        if value in partition_index:
            S.add(key)
        else:
            T.add(key)

    return S, T
Пример #4
0
def test_laplacian():
    '''
    To test the normalized cut, I wrote the same functions in Matlab
    and compared the result of it with the current library code. In
    this test, I calculated the delta matrix using Matlab and found
    the summation of each column of this matrix and compared that
    summation to the result of our code.
    The following is the matlab snippet that is used for this test:
    M = [1 0 1 0 0 0 0 0 0;1 1 0 0 1 0 0 0 0 ;1 0 1 0 0 0 0 0 0;\
    0 1 0 1 0 0 0 0 0;0 0 0 1 0 0 0 0 0;0 0 0 0 1 1 0 0 0;\
    0 0 0 0 0 1 0 0 1;0 0 0 0 0 1 1 1 0;0 0 0 0 0 0 1 0 0;\
    0 0 0 0 0 0 1 0 0;0 0 0 0 0 0 0 1 1;0 0 0 0 0 0 0 0 1];
    W = diag([9.1 10 1 1 3 2 4 3.5 4.1]);
    d_v = diag(H * diag(W));
    d_e = diag(sum(M));
    d_v_sqrt = sqrtm(d_v);
    d_v_sqrt_inv = inv(d_v_sqrt);
    d_e_inv = inv(d_e);
    M_trans = M';
    theta = d_v_sqrt_inv * H * W * d_e_inv * H_trans * d_v_sqrt_inv
    [n m] = size(M);
    I = eye(n);
    delta = I-theta
    '''
    H = UndirectedHypergraph()
    H.read('./tests/data/basic_undirected_hypergraph.txt')
    indices_to_nodes, nodes_to_indices = \
        umat.get_node_mapping(H)
    indices_to_hyperedge_id, hyperedge_id_to_indices = \
        umat.get_hyperedge_id_mapping(H)

    delta = partitioning._compute_normalized_laplacian(
        H, nodes_to_indices, hyperedge_id_to_indices)

    delta_column_sum = np.sum(delta.todense(), axis=0)
    delta_column_sum = np.squeeze(np.asarray(delta_column_sum))

    Matlab_output = {
        'v1': 0.0973,
        'v2': -0.3008,
        'v3': 0.0973,
        'v4': 0.0286,
        'v5': 0.3492,
        'v7': 0.2065,
        'v8': -0.0156,
        'v9': -0.2176,
        'v10': 0.1170,
        'v11': 0.1170,
        'v12': -0.0616,
        'v13': 0.1486
    }

    for key, value in Matlab_output.items():
        index = nodes_to_indices.get(key)
        assert fabs(delta_column_sum[index] - value) < 10e-4
Пример #5
0
def stationary_distribution(H, pi=None, P=None):
    """Computes the stationary distribution of a random walk on the given
    hypergraph using the iterative approach explained in the paper:
    Aurelien Ducournau, Alain Bretto, Random walks in directed hypergraphs and
    application to semi-supervised image segmentation,
    Computer Vision and Image Understanding, Volume 120, March 2014,
    Pages 91-102, ISSN 1077-3142, http://dx.doi.org/10.1016/j.cviu.2013.10.012.
    (http://www.sciencedirect.com/science/article/pii/S1077314213002038)

    :param H: the hypergraph to find the 'Stationary Distribution'
            algorithm on.
    :param pi: the initial distribution over the nodes. If not provided,
            it will be created with a random distribution.
    :param P: the transition matrix for the hypergraph. If not provided,
            it will be created.
    :returns: list -- list of the stationary probabilities for all nodes
            in the hypergraph.
    :raises: TypeError -- Algorithm only applicable to undirected hypergraphs
    :raises: AssertionError -- Each node must have at least 1 outgoing
             hyperedge (even if it's only a self-loop).

    """
    if not isinstance(H, UndirectedHypergraph):
        raise TypeError("Algorithm only applicable to undirected hypergraphs")

    for node in H.node_iterator():
        if len(H.get_forward_star(node)) == 0:
            raise AssertionError("Each node must have at least 1 outgoing \
                                  hyperedge (even if it's only a self-loop).")

    indices_to_nodes, nodes_to_indices = \
        umat.get_node_mapping(H)
    indices_to_hyperedge_ids, hyperedge_ids_to_indices = \
        umat.get_hyperedge_id_mapping(H)

    if P is None:
        P = _compute_transition_matrix(H, nodes_to_indices,
                                       hyperedge_ids_to_indices)

    node_count = len(H.get_node_set())
    if pi is None:
        pi = _create_random_starter(node_count)
    pi_star = _create_random_starter(node_count)
    while not _has_converged(pi_star, pi):
        pi = pi_star
        pi_star = pi * P

    return pi
def test_laplacian():
    '''
    To test the normalized cut, I wrote the same functions in Matlab
    and compared the result of it with the current library code. In
    this test, I calculated the delta matrix using Matlab and found
    the summation of each column of this matrix and compared that
    summation to the result of our code.
    The following is the matlab snippet that is used for this test:
    M = [1 0 1 0 0 0 0 0 0;1 1 0 0 1 0 0 0 0 ;1 0 1 0 0 0 0 0 0;\
    0 1 0 1 0 0 0 0 0;0 0 0 1 0 0 0 0 0;0 0 0 0 1 1 0 0 0;\
    0 0 0 0 0 1 0 0 1;0 0 0 0 0 1 1 1 0;0 0 0 0 0 0 1 0 0;\
    0 0 0 0 0 0 1 0 0;0 0 0 0 0 0 0 1 1;0 0 0 0 0 0 0 0 1];
    W = diag([9.1 10 1 1 3 2 4 3.5 4.1]);
    d_v = diag(H * diag(W));
    d_e = diag(sum(M));
    d_v_sqrt = sqrtm(d_v);
    d_v_sqrt_inv = inv(d_v_sqrt);
    d_e_inv = inv(d_e);
    M_trans = M';
    theta = d_v_sqrt_inv * H * W * d_e_inv * H_trans * d_v_sqrt_inv
    [n m] = size(M);
    I = eye(n);
    delta = I-theta
    '''
    H = UndirectedHypergraph()
    H.read('./tests/data/basic_undirected_hypergraph.txt')
    indices_to_nodes, nodes_to_indices = \
        umat.get_node_mapping(H)
    indices_to_hyperedge_id, hyperedge_id_to_indices = \
        umat.get_hyperedge_id_mapping(H)

    delta = partitioning._compute_normalized_laplacian(H, nodes_to_indices,
                                                       hyperedge_id_to_indices)
    
    delta_column_sum = np.sum(delta.todense(), axis=0)
    delta_column_sum = np.squeeze(np.asarray(delta_column_sum))

    Matlab_output = {'v1': 0.0973, 'v2': -0.3008, 'v3': 0.0973,
                     'v4': 0.0286, 'v5': 0.3492, 'v7': 0.2065, 'v8': -0.0156,
                     'v9': -0.2176, 'v10': 0.1170, 'v11': 0.1170,
                     'v12': -0.0616, 'v13': 0.1486}

    for key, value in Matlab_output.items():
        index = nodes_to_indices.get(key)
        assert fabs(delta_column_sum[index]-value) < 10e-4
Пример #7
0
def stationary_distribution(H, pi=None, P=None):
    """Computes the stationary distribution of a random walk on the given
    hypergraph using the iterative approach explained in the paper:
    (http://pages.cs.wisc.edu/~shuchi/courses/787-F09/scribe-notes/lec15.pdf)

    :param H: the hypergraph to find the stationary distribution on.
    :param pi: the initial distribution over the nodes. If not provided,
            it will be created with a random distribution.
    :param P: the transition matrix for the hypergraph. If not provided,
            it will be created.
    :returns: list -- list of the stationary probabilities for all nodes
            in the hypergraph.
    :raises: TypeError -- Algorithm only applicable to undirected hypergraphs

    """
    if not isinstance(H, UndirectedHypergraph):
        raise TypeError("Algorithm only applicable to undirected hypergraphs")

    indices_to_nodes, nodes_to_indices = \
        umat.get_node_mapping(H)
    indices_to_hyperedge_ids, hyperedge_ids_to_indices = \
        umat.get_hyperedge_id_mapping(H)

    if P is None:
        P = _compute_transition_matrix(H,
                                       nodes_to_indices,
                                       hyperedge_ids_to_indices)

    node_count = len(H.get_node_set())
    if pi is None:
        pi = _create_random_starter(node_count)
    pi_star = _create_random_starter(node_count)
    while not _has_converged(pi_star, pi):
        pi = pi_star
        pi_star = pi * P

    return pi