def dbscan(edgelist=None, distance_matrix=None, threshold=None):
    """ cluster using DBSCAN algorithm """
    if edgelist is not None:
        distance_matrix, names = utils.edgelist_to_distance_matrix(edgelist)

    threshold = threshold or 2.8

    core_samples, labels = sklearn.cluster.dbscan(distance_matrix, metric='precomputed',
                                algorithm='brute', eps=threshold, min_samples=2)
    return labels
def test_edgelist_to_distance_matrix1():
    edgelist = numpy.array([('node002', 'node001', 2.0), ('node003', 'node001', 4.0),
                    ('node003', 'node002', 1.2)], dtype=[('n1', 'S7'),('n2', 'S7'),('d', 'f64')])
    matrix, names = utils.edgelist_to_distance_matrix(edgelist)
    expected = ['node001', 'node002', 'node003']
    assert all([n == e for n,e in zip(names,expected)])
    assert matrix.shape == (3,3)
    assert matrix[0][0] == 0.0
    assert matrix[1][0] == 2.0
    assert matrix[2][0] == 4.0
def spectral(edgelist=None, distance_matrix=None):
    """ cluster using spectral clustering """

    if edgelist is not None:
        distance_matrix, names = utils.edgelist_to_distance_matrix(edgelist)


    sc = sklearn.cluster.SpectralClustering(n_clusters=10, affinity='precomputed')
    labels = sc.fit_predict(distance_matrix)

    return labels
def test_edgelist_to_distance_matrix1():
    edgelist = numpy.array([('node002', 'node001', 2.0),
                            ('node003', 'node001', 4.0),
                            ('node003', 'node002', 1.2)],
                           dtype=[('n1', 'S7'), ('n2', 'S7'), ('d', 'f64')])
    matrix, names = utils.edgelist_to_distance_matrix(edgelist)
    expected = ['node001', 'node002', 'node003']
    assert all([n == e for n, e in zip(names, expected)])
    assert matrix.shape == (3, 3)
    assert matrix[0][0] == 0.0
    assert matrix[1][0] == 2.0
    assert matrix[2][0] == 4.0
def hierarchical_clustering(edgelist=None, distance_matrix=None,
                            names=None, method='complete', threshold=None):
    """ create a flat clustering based on hierarchical clustering methods and a threshold """
    if edgelist is not None:
        distance_matrix, names = utils.edgelist_to_distance_matrix(edgelist)

    linkage = sch.linkage(distance_matrix, method=method)

    threshold = threshold or 0.7*linkage[:,2].max()
    labels = sch.fcluster(linkage, threshold, criterion='distance')

    return labels
def test_edgelist_to_distance_matrix2():
    edgelist = utils.read_edgelist_file(test_files + 'edgelist-pentax-pce.txt')

    matrix, names = utils.edgelist_to_distance_matrix(edgelist)
    expected = ['Pentax_OptioA40_0_30521.JPG', 'Pentax_OptioA40_0_30522.JPG',
                'Pentax_OptioA40_0_30523.JPG', 'Pentax_OptioA40_0_30524.JPG',
                'Pentax_OptioA40_0_30525.JPG']
    assert all([n == e for n,e in zip(names,expected)])
    assert matrix.shape == (5,5)
    expected = [0.0, 704.17228119005165, 433.30115575430841, 154.4174623739334, 658.55780052635578]
    assert all([n == e for n,e in zip(matrix[0,:],expected)])
    expected = [704.17228119005165, 0.0, 476.46750462930402, 244.28991913458535, 585.63345061046823]
    assert all([n == e for n,e in zip(matrix[1,:],expected)])
def agglomerative_clustering(edgelist=None, distance_matrix=None, num_clusters=4, method='complete', metric='precomputed'):
    """ computes an agglomerative clustering as one of the hierarchical clustering methods """
    if edgelist is not None:
        distance_matrix, names = utils.edgelist_to_distance_matrix(edgelist)

    num_clusters=int(input("Enter the number of clusters: "))
    assert isinstance(num_clusters, int)


    method_options = list(_TREE_BUILDERS.keys())
    print('The list of available methods:', method_options, file=sys.stdout)
    in_method = input('Input the method name:')
    assert isinstance(in_method, str)    # native str on Py2 and Py3
    method = in_method.strip()

    if method == 'ward':
        metric = 'euclidean'


    else:

        metric_options = ['precomputed', 'cosine', 'euclidean', 'cityblock']
        print('The list of available metrics:', metric_options , file=sys.stdout)

        in_metric = input('Input the metric name:')
        assert isinstance(in_metric, str)    # native str on Py2 and Py3
        metric = in_metric.strip()

    #tree_cutoff_options = [True, False, 'auto']
    tree_cutoff_options = []



    #for method in method_options:
    #    for metric in metric_options:
            #for tree_cutoff in tree_cutoff_options:

    model = sklearn.cluster.AgglomerativeClustering(linkage=method, affinity=metric,
                                                             n_clusters=num_clusters, connectivity=distance_matrix, compute_full_tree='auto')
    model = model.fit(distance_matrix)
    labels = model.labels_

    print(method, metric)
    #plot_dendrogram(model, labels=labels)


    return labels
def test_edgelist_to_distance_matrix2():
    edgelist = utils.read_edgelist_file(test_files + 'edgelist-pentax-pce.txt')

    matrix, names = utils.edgelist_to_distance_matrix(edgelist)
    expected = [
        'Pentax_OptioA40_0_30521.JPG', 'Pentax_OptioA40_0_30522.JPG',
        'Pentax_OptioA40_0_30523.JPG', 'Pentax_OptioA40_0_30524.JPG',
        'Pentax_OptioA40_0_30525.JPG'
    ]
    assert all([n == e for n, e in zip(names, expected)])
    assert matrix.shape == (5, 5)
    expected = [
        0.0, 704.17228119005165, 433.30115575430841, 154.4174623739334,
        658.55780052635578
    ]
    assert all([n == e for n, e in zip(matrix[0, :], expected)])
    expected = [
        704.17228119005165, 0.0, 476.46750462930402, 244.28991913458535,
        585.63345061046823
    ]
    assert all([n == e for n, e in zip(matrix[1, :], expected)])