Exemplo n.º 1
0
def analyzeClustering_modularity(G, n_clusters, clustering):
    """
    Compute the modularity score for a partitioning/clustering

    Parameters
    ----------
    G : cugraph.Graph
        cuGraph graph descriptor. This graph should have edge weights.
    n_clusters : integer
        Specifies the number of clusters in the given clustering
    clustering : cudf.Series
        The cluster assignment to analyze.

    Returns
    -------
    score : float
        The computed modularity score

    Examples
    --------
    >>> M = cudf.read_csv('datasets/karate.csv',
                          delimiter = ' ',
                          dtype=['int32', 'int32', 'float32'],
                          header=None)
    >>> G = cugraph.Graph()
    >>> G.from_cudf_edgelist(M, source='0', destination='1', edge_attr='2')
    >>> df = cugraph.spectralBalancedCutClustering(G, 5)
    >>> score = cugraph.analyzeClustering_modularity(G, 5, df['cluster'])
    """

    score = spectral_clustering_wrapper.analyzeClustering_modularity(
        G, n_clusters, clustering)

    return score
Exemplo n.º 2
0
def analyzeClustering_modularity(G,
                                 n_clusters,
                                 clustering,
                                 vertex_col_name='vertex',
                                 cluster_col_name='cluster'):
    """
    Compute the modularity score for a partitioning/clustering

    Parameters
    ----------
    G : cugraph.Graph
        cuGraph graph descriptor. This graph should have edge weights.
    n_clusters : integer
        Specifies the number of clusters in the given clustering
    clustering : cudf.DataFrame
        The cluster assignment to analyze.
    vertex_col_name : str
        The name of the column in the clustering dataframe identifying
        the external vertex id
    cluster_col_name : str
        The name of the column in the clustering dataframe identifying
        the cluster id

    Returns
    -------
    score : float
        The computed modularity score

    Examples
    --------
    >>> M = cudf.read_csv('datasets/karate.csv',
                          delimiter = ' ',
                          dtype=['int32', 'int32', 'float32'],
                          header=None)
    >>> G = cugraph.Graph()
    >>> G.from_cudf_edgelist(M, source='0', destination='1', edge_attr='2')
    >>> df = cugraph.spectralBalancedCutClustering(G, 5)
    >>> score = cugraph.analyzeClustering_modularity(G, 5, df,
    >>>   'vertex', 'cluster')
    """

    if G.renumbered:
        clustering = G.add_internal_vertex_id(clustering,
                                              vertex_col_name,
                                              vertex_col_name,
                                              drop=True)

    clustering = clustering.sort_values(vertex_col_name)

    score = spectral_clustering_wrapper.analyzeClustering_modularity(
        G, n_clusters, clustering[cluster_col_name])

    return score
Exemplo n.º 3
0
def analyzeClustering_modularity(G, n_clusters, clustering,
                                 vertex_col_name='vertex',
                                 cluster_col_name='cluster'):
    """
    Compute the modularity score for a given partitioning/clustering.
    The assumption is that “clustering” is the results from a call
    from a special clustering algorithm and contains columns named
    “vertex” and “cluster”.

    Parameters
    ----------
    G : cugraph.Graph or networkx.Graph
        graph descriptor. This graph should have edge weights.

    n_clusters : integer
        Specifies the number of clusters in the given clustering

    clustering : cudf.DataFrame
        The cluster assignment to analyze.

    vertex_col_name : str or list of str, optional (default='vertex')
        The names of the column in the clustering dataframe identifying
        the external vertex id

    cluster_col_name : str, optional (default='cluster')
        The name of the column in the clustering dataframe identifying
        the cluster id

    Returns
    -------
    score : float
        The computed modularity score

    Examples
    --------
    >>> M = cudf.read_csv(datasets_path / 'karate.csv',
    ...                   delimiter = ' ',
    ...                   dtype=['int32', 'int32', 'float32'],
    ...                   header=None)
    >>> G = cugraph.Graph()
    >>> G.from_cudf_edgelist(M, source='0', destination='1', edge_attr='2')
    >>> df = cugraph.spectralBalancedCutClustering(G, 5)
    >>> score = cugraph.analyzeClustering_modularity(G, 5, df)

    """
    if type(vertex_col_name) is list:
        if not all(isinstance(name, str) for name in vertex_col_name):
            raise Exception("vertex_col_name must be list of string")
    elif type(vertex_col_name) is not str:
        raise Exception("vertex_col_name must be a string")

    if type(cluster_col_name) is not str:
        raise Exception("cluster_col_name must be a string")

    G, isNx = ensure_cugraph_obj_for_nx(G)

    if G.renumbered:
        clustering = G.add_internal_vertex_id(clustering,
                                              'vertex',
                                              vertex_col_name,
                                              drop=True)

    clustering = clustering.sort_values('vertex')

    score = spectral_clustering_wrapper.analyzeClustering_modularity(
        G, n_clusters, clustering[cluster_col_name]
    )

    return score