def test_scikit_vs_scipy(): """Test scikit linkage with full connectivity (i.e. unstructured) vs scipy """ n, p, k = 10, 5, 3 rng = np.random.RandomState(0) # Not using a lil_matrix here, just to check that non sparse # matrices are well handled connectivity = np.ones((n, n)) for linkage in _TREE_BUILDERS.keys(): for i in range(5): X = .1 * rng.normal(size=(n, p)) X -= 4. * np.arange(n)[:, np.newaxis] X -= X.mean(axis=1)[:, np.newaxis] out = hierarchy.linkage(X, method=linkage) children_ = out[:, :2].astype(np.int) children, _, n_leaves, _ = _TREE_BUILDERS[linkage](X, connectivity) cut = _hc_cut(k, children, n_leaves) cut_ = _hc_cut(k, children_, n_leaves) assess_same_labelling(cut, cut_) # Test error management in _hc_cut assert_raises(ValueError, _hc_cut, n_leaves + 1, children, n_leaves)
def test_scikit_vs_scipy(): """Test scikit linkage with full connectivity (i.e. unstructured) vs scipy """ n, p, k = 10, 5, 3 rng = np.random.RandomState(0) # Not using a lil_matrix here, just to check that non sparse # matrices are well handled connectivity = np.ones((n, n)) for linkage in _TREE_BUILDERS.keys(): for i in range(5): X = .1 * rng.normal(size=(n, p)) X -= 4. * np.arange(n)[:, np.newaxis] X -= X.mean(axis=1)[:, np.newaxis] out = hierarchy.linkage(X, method=linkage) children_ = out[:, :2].astype(np.int) children, _, n_leaves, _ = _TREE_BUILDERS[linkage](X, connectivity) cut = _hc_cut(k, children, n_leaves) cut_ = _hc_cut(k, children_, n_leaves) assess_same_labelling(cut, cut_) # Test error management in _hc_cut assert_raises(ValueError, _hc_cut, n_leaves + 1, children, n_leaves)
def test_scikit_vs_scipy(): # Test scikit linkage with full connectivity (i.e. unstructured) vs scipy n, p, k = 10, 5, 3 rng = np.random.RandomState(0) # Not using a lil_matrix here, just to check that non sparse # matrices are well handled connectivity = np.ones((n, n)) for linkage in _TREE_BUILDERS.keys(): for i in range(5): X = .1 * rng.normal(size=(n, p)) X -= 4. * np.arange(n)[:, np.newaxis] X -= X.mean(axis=1)[:, np.newaxis] out = hierarchy.linkage(X, method=linkage) children_ = out[:, :2].astype(np.int, copy=False) children, _, n_leaves, _ = _TREE_BUILDERS[linkage](X, connectivity) # Sort the order of child nodes per row for consistency children.sort(axis=1) assert_array_equal(children, children_, 'linkage tree differs' ' from scipy impl for' ' linkage: ' + linkage) cut = _hc_cut(k, children, n_leaves) cut_ = _hc_cut(k, children_, n_leaves) assess_same_labelling(cut, cut_) # Test error management in _hc_cut assert_raises(ValueError, _hc_cut, n_leaves + 1, children, n_leaves)
def test_scikit_vs_scipy(): # Test scikit linkage with full connectivity (i.e. unstructured) vs scipy n, p, k = 10, 5, 3 rng = np.random.RandomState(0) # Not using a lil_matrix here, just to check that non sparse # matrices are well handled connectivity = np.ones((n, n)) for linkage in _TREE_BUILDERS.keys(): for i in range(5): X = .1 * rng.normal(size=(n, p)) X -= 4. * np.arange(n)[:, np.newaxis] X -= X.mean(axis=1)[:, np.newaxis] out = hierarchy.linkage(X, method=linkage) children_ = out[:, :2].astype(np.int, copy=False) children, _, n_leaves, _ = _TREE_BUILDERS[linkage](X, connectivity) # Sort the order of child nodes per row for consistency children.sort(axis=1) assert_array_equal( children, children_, 'linkage tree differs' ' from scipy impl for' ' linkage: ' + linkage) cut = _hc_cut(k, children, n_leaves) cut_ = _hc_cut(k, children_, n_leaves) assess_same_labelling(cut, cut_) # Test error management in _hc_cut assert_raises(ValueError, _hc_cut, n_leaves + 1, children, n_leaves)
def agglomerative_clustering(edgelist=None, distance_matrix=None, num_clusters=4, method='complete', metric='precomputed'): """ computes an agglomerative clustering as one of the hierarchical clustering methods """ if edgelist is not None: distance_matrix, names = utils.edgelist_to_distance_matrix(edgelist) num_clusters=int(input("Enter the number of clusters: ")) assert isinstance(num_clusters, int) method_options = list(_TREE_BUILDERS.keys()) print('The list of available methods:', method_options, file=sys.stdout) in_method = input('Input the method name:') assert isinstance(in_method, str) # native str on Py2 and Py3 method = in_method.strip() if method == 'ward': metric = 'euclidean' else: metric_options = ['precomputed', 'cosine', 'euclidean', 'cityblock'] print('The list of available metrics:', metric_options , file=sys.stdout) in_metric = input('Input the metric name:') assert isinstance(in_metric, str) # native str on Py2 and Py3 metric = in_metric.strip() #tree_cutoff_options = [True, False, 'auto'] tree_cutoff_options = [] #for method in method_options: # for metric in metric_options: #for tree_cutoff in tree_cutoff_options: model = sklearn.cluster.AgglomerativeClustering(linkage=method, affinity=metric, n_clusters=num_clusters, connectivity=distance_matrix, compute_full_tree='auto') model = model.fit(distance_matrix) labels = model.labels_ print(method, metric) #plot_dendrogram(model, labels=labels) return labels