Python clustering_score примеры, wbia_lca.cluster_tools.clustering_score Python примеры использования

Пример #1

0

Показать файл

    def pprint_short(self, initial_str='', stop_after_from=False):
        out_str = initial_str + 'From cids:'
        for cid in sorted(self.from_clusters.keys()):
            out_str += ' %s: %a' % (cid, sorted(self.from_clusters[cid]))

        if logger.getEffectiveLevel() <= logging.DEBUG:
            check_score = ct.clustering_score(self.subgraph, self.from_n2c)
            if abs(check_score - self.from_score) > 1e-7:
                out_str += 'from score error: should be %d, but is %d' % (
                    check_score,
                    self.from_score,
                )
        if stop_after_from or self.to_clusters is None:
            logger.info(out_str)
            return

        out_str += '; to:'
        for cid in sorted(self.to_clusters.keys()):
            out_str += ' %a' % sorted(self.to_clusters[cid])

        if logger.getEffectiveLevel() <= logging.DEBUG:
            check_score = ct.clustering_score(self.subgraph, self.to_n2c)
            if check_score != self.to_score:
                out_str += '\nto score error: should be %d, but is %d\n' % (
                    check_score,
                    self.to_score,
                )
        out_str += '; delta %d' % self.delta_score()
        logger.info(out_str)

Пример #2

0

Показать файл

Файл: test_cluster_tools.py Проект: WildMeOrg/wbia-tpl-lca

def test_cluster_scoring_and_weights():
    G = ex_graph_fig1()

    logger.info('=====================')
    logger.info('Testing cid_list_score')
    cids = list(ct.cids_from_range(4))
    n2c_random = {
        'a': cids[0],
        'b': cids[0],
        'f': cids[0],
        'c': cids[1],
        'g': cids[1],
        'd': cids[2],
        'e': cids[2],
        'i': cids[2],
        'h': cids[3],
        'j': cids[3],
        'k': cids[3],
    }
    clustering_random = ct.build_clustering(n2c_random)
    score = ct.cid_list_score(G, clustering_random, n2c_random,
                              [cids[0], cids[2], cids[3]])
    logger.info('Score between clusters [c0, c2, c3] should be -5 and is %s' %
                (score, ))

    logger.info('=====================')
    logger.info('Testing clustering_score')
    """ First clustering:  all together """
    n2c_single_cluster = {n: 'c0' for n in G.nodes}
    logger.info('Score with all together should be 21.  Score = %s' %
                (ct.clustering_score(G, n2c_single_cluster), ))
    """ Second clustering:  all separate """
    n2c_all_separate = {n: 'c' + str(i) for i, n in enumerate(G.nodes)}
    logger.info('Score with all together should be -21.  Score = %s' %
                (ct.clustering_score(G, n2c_all_separate), ))
    """ Third clustering: optimal, by hand """
    cids = list(ct.cids_from_range(4))
    n2c_optimal = {
        'a': cids[0],
        'b': cids[0],
        'd': cids[0],
        'e': cids[0],
        'c': cids[1],
        'h': cids[2],
        'i': cids[2],
        'f': cids[3],
        'g': cids[3],
        'j': cids[3],
        'k': cids[3],
    }
    logger.info('Optimal score should be 49. Score = %s' %
                (ct.clustering_score(G, n2c_optimal), ))

    negatives, positives = ct.get_weight_lists(G, sort_positive=True)
    logger.info('Length of negatives should be 10.  It is %s' %
                (len(negatives), ))
    logger.info('Length of positives should be 11.  It is %s' %
                (len(positives), ))
    logger.info('0th positive should be 8.  It is %s' % (positives[0], ))
    logger.info('Last positive should be 2.  It is %s' % (positives[-1], ))

Пример #3

0

Показать файл

Файл: lca_alg2.py Проект: WildMeOrg/wbia-tpl-lca

def run_lca_alg2(G, best_clustering, exp_alt_clustering, msg, trace_on=False):
    exp_alt_node2cid = ct.build_node_to_cluster_mapping(exp_alt_clustering)
    exp_alt_score = ct.clustering_score(G, exp_alt_node2cid)

    best_node2cid = ct.build_node_to_cluster_mapping(best_clustering)
    alt_clustering, alt_score = lca_alg2(G,
                                         best_clustering,
                                         best_node2cid,
                                         trace_on=trace_on)

    failed = False
    if not ct.same_clustering(alt_clustering, exp_alt_clustering):
        failed = True
        logger.info('%s FAILED' % (msg, ))
    else:
        logger.info('%s success' % (msg, ))

    if alt_score != exp_alt_score:
        failed = True
        logger.info('score %d, expected_score %d. FAILED' %
                    (alt_score, exp_alt_score))

    if failed:
        logger.info('current structures with failure:')
        alt_node2cid = ct.build_node_to_cluster_mapping(alt_clustering)
        ct.print_structures(G, alt_clustering, alt_node2cid, alt_score)

Пример #4

0

Показать файл

Файл: lca_alg2.py Проект: WildMeOrg/wbia-tpl-lca

def test_lca_alg1_constrained():
    logger.info('\n=========================\n'
                'Test lca_alg1_constrained\n'
                '=========================')
    G = tct.ex_graph_fig1()
    G['g']['j']['weight'] = -4  # a little larger than original to break a tie
    in_same = [('f', 'i')]
    in_different = [('d', 'e')]
    clustering, score = lca_alg1_constrained(G, in_same, in_different)
    node2cid = ct.build_node_to_cluster_mapping(clustering)
    correct_score = ct.clustering_score(G, node2cid)

    exp_clustering = {
        0: {'a', 'b', 'd'},
        1: {'f', 'g', 'h', 'i', 'k'},
        2: {'c'},
        3: {'e'},
        4: {'j'},
    }
    is_same = ct.same_clustering(clustering,
                                 exp_clustering,
                                 output_differences=True)
    if is_same:
        logger.info('constrained (d,e) different and (f,i) same: success')
    else:
        logger.info('constrained (d,e) different and (f,i) same: FAIL')

    if score != correct_score:
        logger.info('scoring error:  actual %a, correct %a' %
                    (score, correct_score))
    else:
        logger.info('scoring correct:  actual %a, correct %a' %
                    (score, correct_score))

Пример #5

0

Показать файл

def build_example_LCA():
    G = tct.ex_graph_fig1()
    n2c_optimal = {
        'a': 0,
        'b': 0,
        'd': 0,
        'e': 0,
        'c': 1,
        'h': 2,
        'i': 2,
        'f': 3,
        'g': 3,
        'j': 3,
        'k': 3,
    }
    clustering_opt = ct.build_clustering(n2c_optimal)
    cid0 = 2
    cid1 = 3
    nodes_in_clusters = list(clustering_opt[2] | clustering_opt[3])
    subG = G.subgraph(nodes_in_clusters)

    score = ct.cid_list_score(subG, clustering_opt, n2c_optimal, [cid0, cid1])
    a = LCA(subG, clustering_opt, [cid0, cid1], score)

    to_clusters = {0: {'f', 'h', 'i', 'j'}, 1: {'g', 'k'}}
    subG = G.subgraph(nodes_in_clusters)
    to_node2cid = {
        n: cid
        for cid in range(len(to_clusters)) for n in to_clusters[cid]
    }
    to_score = ct.clustering_score(subG, to_node2cid)
    a.set_to_clusters(to_clusters, to_score)

    return a, G

Пример #6

0

Показать файл

Файл: lca_alg2.py Проект: WildMeOrg/wbia-tpl-lca

def best_alternative_len2(G, clustering, node2cid):
    """Return the best alternative to the current clustering when G has
    exactly two nodes.
    """
    if len(clustering) == 2:
        alt_clustering = {0: set(G.nodes())}
    else:
        alt_clustering = {c: {n} for c, n in enumerate(G.nodes())}
    alt_node2cid = ct.build_node_to_cluster_mapping(alt_clustering)
    alt_score = ct.clustering_score(G, alt_node2cid)
    return alt_clustering, alt_score

Пример #7

0

Показать файл

Файл: graph_algorithm.py Проект: WildMeOrg/wbia-tpl-lca

    def __init__(self, edges, clusters, aug_names, params, aug_request_cb,
                 aug_result_cb):
        self.params = params
        logger.info('======================================')
        logger.info('Construction of graph_algorithm object')
        logger.info(dt.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
        self.weight_mgr = wm.weight_manager(aug_names,
                                            params['tries_before_edge_done'],
                                            aug_request_cb, aug_result_cb)
        self.G = nx.Graph()
        weighted_edges = self.weight_mgr.get_initial_edges(edges)
        self.G.add_weighted_edges_from(weighted_edges)
        logger.info('Initial graph has %d nodes and %d edges' %
                    (len(self.G.nodes), len(self.G.edges)))
        self._next_cid = 0
        self.build_clustering(clusters)
        self.node2cid = ct.build_node_to_cluster_mapping(self.clustering)
        self.score = ct.clustering_score(self.G, self.node2cid)

        self.phase = 'scoring'
        self.cid2lca = cid_to_lca.CID2LCA()
        self.queues = lca_queues.lca_queues()
        self.new_lcas(self.clustering.keys(),
                      use_pairs=True,
                      use_singles=False)
        if self.queues.num_lcas() == 0:
            logger.info("Phase shift immediately into 'splitting'")
            self.phase = 'splitting'
            self.new_lcas(self.clustering.keys(),
                          use_pairs=False,
                          use_singles=True)
        self.queues.info_long(max_entries=10)

        self.num_verifier_results = 0
        self.num_human_results = 0
        self.removed_nodes = set()

        self.draw_obj = None
        if self.params['draw_iterations']:
            self.draw_obj = draw_lca.draw_lca(self.params['drawing_prefix'])
        """  Need to set these callbacks to request and receive
        information from the verfication algorithm and to do the same
        from human reviewers. """
        self.remove_nodes_cb = None
        self.status_request_cb = None
        self.status_return_cb = None
        self.results_request_cb = None
        self.results_return_cb = None
        self.log_request_cb = None
        self.log_return_cb = None
        self.trace_start_human_gt_cb = None
        self.trace_iter_compare_to_gt_cb = None
        self.should_stop_cb = None
        logger.info('Completed graph algorithm initialization')

Пример #8

0

Показать файл

    def pprint(self, stop_after_from=False):
        logger.info('from_n2c: %s' % (self.from_n2c, ))
        logger.info('subgraph nodes %s' % (self.subgraph.nodes(), ))
        check_score = ct.clustering_score(self.subgraph, self.from_n2c)
        logger.info('from clusters (score = %a, checking %a):' %
                    (self.from_score, check_score))
        if self.from_score != check_score:
            logger.info('lca: SCORING ERROR in from')
        for cid in sorted(self.from_clusters.keys()):
            logger.info('    %s: %a' % (cid, self.from_clusters[cid]))
        if stop_after_from:
            return

        check_score = ct.clustering_score(self.subgraph, self.to_n2c)
        logger.info('to clusters (score = %a, checking = %a):' %
                    (self.to_score, check_score))
        if self.to_score != check_score:
            logger.info('SCORING ERROR in to')
        for cid in sorted(self.to_clusters.keys()):
            logger.info('    %d: %a' % (cid, self.to_clusters[cid]))
        logger.info('score_difference %a' % self.delta_score())

        logger.info('inconsistent_pairs: %s' % (self.inconsistent, ))

Пример #9

0

Показать файл

def run_lca_alg1(G,
                 expected_clustering,
                 msg,
                 stop_at_two=False,
                 trace_on=False):
    node2cid = ct.build_node_to_cluster_mapping(expected_clustering)
    expected_score = ct.clustering_score(G, node2cid)
    clustering, score = lca_alg1(G, stop_at_two=stop_at_two, trace_on=trace_on)
    failed = False
    if not ct.same_clustering(clustering, expected_clustering):
        failed = True
        logger.info('%s FAILED' % (msg, ))
    else:
        logger.info('%s success' % (msg, ))

    if score != expected_score:
        failed = True
        logger.info('score %d, expected_score %d. FAILED' %
                    (score, expected_score))

    if failed:
        logger.info('current structures with failure:')
        node2cid = ct.build_node_to_cluster_mapping(clustering)
        ct.print_structures(G, clustering, node2cid, score)

Пример #10

0

Показать файл

def lca_alg1(curr_G, stop_at_two=False, trace_on=False):
    if len(curr_G) == 0:
        return {}, 0
    elif len(curr_G) == 1:
        clustering = {0: set(curr_G.nodes())}
        return clustering, 0

    neg_edges, pos_edges = ct.get_weight_lists(curr_G, sort_positive=True)
    clustering = {c: {n} for c, n in enumerate(sorted(curr_G.nodes()))}
    node2cid = ct.build_node_to_cluster_mapping(clustering)

    G_prime = nx.Graph()
    G_prime.add_nodes_from(curr_G)
    G_prime.add_weighted_edges_from(neg_edges)
    score = ct.clustering_score(G_prime, node2cid)

    if trace_on:
        logger.info('====================')
        logger.info('====  lca_alg1  ====')
        logger.info('====================')
        ct.print_structures(G_prime, clustering, node2cid, score)

    for e in pos_edges:
        if trace_on:
            logger.info('=======================')
            logger.info('Start of next iteration')
            logger.info('=======================')
        if e[0] < e[1]:
            n0, n1 = e[0], e[1]
        else:
            n1, n0 = e[0], e[1]
        wgt = e[2]
        n0_cid, n1_cid = node2cid[n0], node2cid[n1]
        if trace_on:
            logger.info('n0=%s, n1=%s, wgt=%a, n0_cid=%a, n1_cid=%a' %
                        (n0, n1, wgt, n0_cid, n1_cid))

        is_merge_allowed = not stop_at_two or len(clustering) > 2
        if trace_on:
            logger.info('is_merge_allowed %s' % (is_merge_allowed, ))

        if n0_cid == n1_cid:
            if trace_on:
                logger.info('In the same cluster')
            score += wgt
        elif is_merge_allowed and not ct.has_edges_between_them(
                G_prime, clustering[n0_cid], clustering[n1_cid]):
            if trace_on:
                logger.info('Merging disjoint clusters')
            sc_delta = ct.merge_clusters(n0_cid, n1_cid, G_prime, clustering,
                                         node2cid)
            assert sc_delta == 0
            score += sc_delta + wgt  # why might sc_delta be non-zero here???
        else:
            sc_merged = (ct.score_delta_after_merge(n0_cid, n1_cid, G_prime,
                                                    clustering) + wgt)
            if trace_on:
                logger.info('sc_merged=%a' % sc_merged)
            sc_unmerged = -wgt
            if trace_on:
                logger.info('sc_unmerged=%a' % sc_unmerged)
            if len(clustering[n0_cid]) == 1 or len(clustering[n1_cid]) == 1:
                sc_n0_to_n1 = sc_n1_to_n0 = min(sc_merged, sc_unmerged) - 9999
                n0_to_move = n1_to_move = []
                if trace_on:
                    logger.info('not checking moving nodes because '
                                'at least one cluster is length 1')
            else:
                sc_n0_to_n1, n0_to_move = best_shift(n0,
                                                     n1,
                                                     G_prime,
                                                     clustering,
                                                     node2cid,
                                                     trace_on=trace_on)
                sc_n0_to_n1 += wgt
                if trace_on:
                    logger.info('sc_n0_to_n1=%a, n0_to_move=%a' %
                                (sc_n0_to_n1, n0_to_move))
                sc_n1_to_n0, n1_to_move = best_shift(n1,
                                                     n0,
                                                     G_prime,
                                                     clustering,
                                                     node2cid,
                                                     trace_on=trace_on)
                sc_n1_to_n0 += wgt
                if trace_on:
                    logger.info('sc_n1_to_n0=%a, n1_to_move=%a' %
                                (sc_n1_to_n0, n1_to_move))

            if is_merge_allowed and sc_merged >= max(sc_unmerged, sc_n0_to_n1,
                                                     sc_n1_to_n0):
                ct.merge_clusters(n0_cid, n1_cid, G_prime, clustering,
                                  node2cid)
                score += sc_merged
                if trace_on:
                    logger.info('Choose merge')
            elif sc_unmerged >= max(sc_n0_to_n1, sc_n1_to_n0):
                score += sc_unmerged
                if trace_on:
                    logger.info('Choose unmerged - unchanged')
            elif sc_n0_to_n1 >= sc_n1_to_n0:
                ct.shift_between_clusters(n0_cid, n0_to_move, n1_cid,
                                          clustering, node2cid)
                score += sc_n0_to_n1
                if trace_on:
                    logger.info('Choose to shift from cluster %a to %a' %
                                (n0_cid, n1_cid))
            else:
                ct.shift_between_clusters(n1_cid, n1_to_move, n0_cid,
                                          clustering, node2cid)
                score += sc_n1_to_n0
                if trace_on:
                    logger.info('Choose to shift from cluster %a to %a' %
                                (n1_cid, n0_cid))
        G_prime.add_weighted_edges_from([e])
        if trace_on:
            ct.print_structures(G_prime, clustering, node2cid, score)

    return clustering, score

Пример #11

0

Показать файл

Файл: lca_alg2.py Проект: WildMeOrg/wbia-tpl-lca

def lca_alg1_constrained(curr_G, in_same=[], in_different=[], trace_on=False):
    """
    Use algorithm 1 to find the best clustering of the current
    subgraph subject to the constraints that all pairs of nodes from
    in_same must be in the same cluster and all pairs of nodes from
    in_different must be in different clusters.

    This does not check that the constraints from in_same and
    in_different can all be satisfied. In implementation the in_same
    constraints take precedence, but in use, one of the two in_same
    and in_different lists will be empty.
    """
    clustering = build_initial_from_constraints(curr_G, in_same)
    node2cid = ct.build_node_to_cluster_mapping(clustering)

    neg_edges, pos_edges = ct.get_weight_lists(curr_G, sort_positive=True)
    G_prime = nx.Graph()
    G_prime.add_nodes_from(curr_G)
    G_prime.add_weighted_edges_from(neg_edges)

    edges = [(p[0], p[1], curr_G[p[0]][p[1]]['weight']) for p in in_same]
    G_prime.add_weighted_edges_from(edges)
    score = ct.clustering_score(G_prime, node2cid)

    if trace_on:
        logger.info('=================================')
        logger.info('=====  lca_alg1_constrained  ====')
        logger.info('=================================')
        ct.print_structures(G_prime, clustering, node2cid, score)

    for e in pos_edges:
        if trace_on:
            logger.info('=======================')
            logger.info('Start of next iteration')
            logger.info('=======================')

        if e[0] < e[1]:
            n0, n1 = e[0], e[1]
        else:
            n1, n0 = e[0], e[1]

        if (n0, n1) in in_same:
            if trace_on:
                logger.info('Skipping (%a, %a) because already in graph' %
                            (n0, n1))
            continue

        wgt = e[2]
        n0_cid, n1_cid = node2cid[n0], node2cid[n1]
        if trace_on:
            logger.info('n0=%s, n1=%s, wgt=%a, n0_cid=%a, n1_cid=%a' %
                        (n0, n1, wgt, n0_cid, n1_cid))

        if n0_cid == n1_cid:
            if trace_on:
                logger.info('Already in the same cluster')
            score += wgt

        elif keep_separate(clustering[n0_cid], clustering[n1_cid],
                           in_different):
            if trace_on:
                logger.info('Must be kept separate')
            score -= wgt

        elif not ct.has_edges_between_them(G_prime, clustering[n0_cid],
                                           clustering[n1_cid]):
            if trace_on:
                logger.info('Merging disjoint clusters')
            sc_delta = ct.merge_clusters(n0_cid, n1_cid, G_prime, clustering,
                                         node2cid)
            assert sc_delta == 0
            score += sc_delta + wgt

        else:
            sc_merged = (ct.score_delta_after_merge(n0_cid, n1_cid, G_prime,
                                                    clustering) + wgt)
            if trace_on:
                logger.info('sc_merged=%a' % sc_merged)
            sc_unmerged = -wgt
            if trace_on:
                logger.info('sc_unmerged=%a' % sc_unmerged)

            if sc_merged > sc_unmerged:
                ct.merge_clusters(n0_cid, n1_cid, G_prime, clustering,
                                  node2cid)
                score += sc_merged
                if trace_on:
                    logger.info('Merging clusters with edges between')
            else:
                score += sc_unmerged
                if trace_on:
                    logger.info('No merge of clusters with edges between ')

        G_prime.add_weighted_edges_from([e])
        if trace_on:
            ct.print_structures(G_prime, clustering, node2cid, score)

    return clustering, score

Пример #12

0

Показать файл

Файл: lca_alg2.py Проект: WildMeOrg/wbia-tpl-lca

def lca_alg2(G, clustering, node2cid, trace_on=False):
    """
    If it is a single cluster, then stop the original algorithm when
    there are two clusters.  Perhaps can run alternative multiple times

    If there are multiple clusterings, then one option is a merge, but
    add others based on inconsistency

    Don't allow len(G) <= 1it is two, and the
    nodes are disconnected, there is also no alternative.  If it is two,
    then split/merging vs. merging/splitting is the alternative.
    """
    assert len(G) >= 2

    if len(G) == 2:
        return best_alternative_len2(G, clustering, node2cid)
    """ Form the first estimate of the best alternative.  If there is just
        one cluster in the current (local) best clustering then rerun
        Alg1 constrained to stop at at most two.  Otherwise, just form
        a single clustering.
    """
    if len(clustering) == 1:
        best_clustering, best_score = a1.lca_alg1(G, stop_at_two=True)
        best_node2cid = ct.build_node_to_cluster_mapping(best_clustering)
    else:
        best_clustering = {0: set(G.nodes())}
        best_node2cid = {n: 0 for n in G.nodes()}
        best_score = ct.clustering_score(G, best_node2cid)

    if trace_on:
        logger.info('In lca_alg2, before checking inconsistent\n'
                    'best_clustering %a, best_score %d, checking %d' %
                    (best_clustering, best_score,
                     ct.clustering_score(G, best_node2cid)))

    inconsistent = inconsistent_edges(G, clustering, node2cid)
    inconsistent.sort(key=lambda e: abs(e[2]), reverse=True)
    if trace_on:
        logger.info('In lca_alg2: clustering %s' % (clustering, ))
        logger.info('In lca_alg2: inconsistent edges %s' % (inconsistent, ))
        logger.info('Starting inconsistent edge loop')

    for e in inconsistent:
        if trace_on:
            logger.info('e = %s' % (e, ))
        if e[2] < 0:
            if trace_on:
                logger.info('Forcing edge into different clusters')
            new_clustering, new_score = lca_alg1_constrained(G,
                                                             in_same=[],
                                                             in_different=[
                                                                 (e[0], e[1])
                                                             ])
        else:
            if trace_on:
                logger.info('Forcing edge into same cluster')
            new_clustering, new_score = lca_alg1_constrained(G,
                                                             in_same=[(e[0],
                                                                       e[1])],
                                                             in_different=[])

        if trace_on:
            logger.info('Best score returned by lca_alg1_constrained is %s' %
                        (new_score, ))
            logger.info(
                'Checking',
                ct.clustering_score(
                    G, ct.build_node_to_cluster_mapping(new_clustering)),
            )
        if new_score > best_score:
            if trace_on:
                logger.info('New best')
            best_score = new_score
            best_clustering = new_clustering

    return best_clustering, best_score

Пример #13

0

Показать файл

Файл: test_cluster_tools.py Проект: WildMeOrg/wbia-tpl-lca

def test_merge():
    logger.info('===========================')
    logger.info('test_merge')
    G = ex_graph_fig1()
    cids = list(ct.cids_from_range(4))
    logger.info(cids)
    n2c_optimal = {
        'a': cids[0],
        'b': cids[0],
        'd': cids[0],
        'e': cids[0],
        'c': cids[1],
        'h': cids[2],
        'i': cids[2],
        'f': cids[3],
        'g': cids[3],
        'j': cids[3],
        'k': cids[3],
    }
    clustering = ct.build_clustering(n2c_optimal)

    logger.info('-------------')
    logger.info('score_delta_after_merge')
    delta = ct.score_delta_after_merge(cids[2], cids[3], G, clustering)
    logger.info('possible merge of 2, 3; delta should be -4, and is %s' %
                (delta, ))

    logger.info('-------------')
    logger.info('merge_clusters')
    score_before = ct.clustering_score(G, n2c_optimal)
    delta = ct.merge_clusters(cids[0], cids[2], G, clustering, n2c_optimal)
    score_after = ct.clustering_score(G, n2c_optimal)
    logger.info('delta = %s should be %s' % (
        delta,
        score_after - score_before,
    ))
    logger.info('---')
    for c in clustering:
        logger.info('%s: %s' % (c, clustering[c]))
    logger.info('---')
    for n in G.nodes:
        logger.info('%s: %s' % (n, n2c_optimal[n]))

    logger.info('--------')
    logger.info('Retesting merge with order of clusters reversed')
    n2c_optimal = {
        'a': cids[0],
        'b': cids[0],
        'd': cids[0],
        'e': cids[0],
        'c': cids[1],
        'h': cids[2],
        'i': cids[2],
        'f': cids[3],
        'g': cids[3],
        'j': cids[3],
        'k': cids[3],
    }
    clustering = ct.build_clustering(n2c_optimal)

    logger.info('-------------')
    logger.info('score_delta_after_merge')
    delta = ct.score_delta_after_merge(cids[3], cids[2], G, clustering)
    logger.info('possible merge of 3, 2; delta should be -4, and is %s' %
                (delta, ))

    logger.info('-------------')
    logger.info('merge_clusters')
    score_before = ct.clustering_score(G, n2c_optimal)
    delta = ct.merge_clusters(cids[2], cids[0], G, clustering, n2c_optimal)
    score_after = ct.clustering_score(G, n2c_optimal)
    logger.info('delta = %s should be %s' % (
        delta,
        score_after - score_before,
    ))
    logger.info('---')
    for c in clustering:
        logger.info('%s: %s' % (c, clustering[c]))
    logger.info('---')
    for n in G.nodes:
        logger.info('%s: %s' % (n, n2c_optimal[n]))

Python clustering_score примеры использования