Пример #1
0
def analyzeClustering_ratio_cut(G, n_clusters, clustering):
    """
    Compute the ratio cut score for a partitioning/clustering

    Parameters
    ----------
    G : cugraph.Graph
        cuGraph graph descriptor. This graph should have edge weights.
    n_clusters : integer
        Specifies the number of clusters in the given clustering
    clustering : cudf.Series
        The cluster assignment to analyze.

    Returns
    -------
    score : float
        The computed ratio cut score

    Examples
    --------
    >>> M = cudf.read_csv('datasets/karate.csv',
                          delimiter = ' ',
                          dtype=['int32', 'int32', 'float32'],
                          header=None)
    >>> G = cugraph.Graph()
    >>> G.from_cudf_edgelist(M, source='0', destination='1', edge_attr='2')
    >>> df = cugraph.spectralBalancedCutClustering(G, 5)
    >>> score = cugraph.analyzeClustering_ratio_cut(G, 5, df['cluster'])
    """

    score = spectral_clustering_wrapper.analyzeClustering_ratio_cut(
        G, n_clusters, clustering)

    return score
Пример #2
0
def analyzeClustering_ratio_cut(G,
                                n_clusters,
                                clustering,
                                vertex_col_name='vertex',
                                cluster_col_name='cluster'):
    """
    Compute the ratio cut score for a partitioning/clustering

    Parameters
    ----------
    G : cugraph.Graph
        cuGraph graph descriptor. This graph should have edge weights.
    n_clusters : integer
        Specifies the number of clusters in the given clustering
    clustering : cudf.DataFrame
        The cluster assignment to analyze.
    vertex_col_name : str
        The name of the column in the clustering dataframe identifying
        the external vertex id
    cluster_col_name : str
        The name of the column in the clustering dataframe identifying
        the cluster id

    Returns
    -------
    score : float
        The computed ratio cut score

    Examples
    --------
    >>> M = cudf.read_csv('datasets/karate.csv',
                          delimiter = ' ',
                          dtype=['int32', 'int32', 'float32'],
                          header=None)
    >>> G = cugraph.Graph()
    >>> G.from_cudf_edgelist(M, source='0', destination='1', edge_attr='2')
    >>> df = cugraph.spectralBalancedCutClustering(G, 5)
    >>> score = cugraph.analyzeClustering_ratio_cut(G, 5, df,
    >>>   'vertex', 'cluster')
    """

    if type(vertex_col_name) is not str:
        raise Exception("vertex_col_name must be a string")

    if type(cluster_col_name) is not str:
        raise Exception("cluster_col_name must be a string")

    if G.renumbered:
        clustering = G.add_internal_vertex_id(clustering,
                                              vertex_col_name,
                                              vertex_col_name,
                                              drop=True)

    clustering = clustering.sort_values(vertex_col_name)

    score = spectral_clustering_wrapper.analyzeClustering_ratio_cut(
        G, n_clusters, clustering[cluster_col_name])

    return score