def test_maxinconsts_one_cluster_linkage(self): # Tests maxinconsts(Z, R) on linkage with one cluster. Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double) R = np.asarray([[0, 0, 0, 0.3]], dtype=np.double) MD = maxinconsts(Z, R) expectedMD = calculate_maximum_inconsistencies(Z, R) assert_allclose(MD, expectedMD, atol=1e-15)
def check_maxinconsts_Q_linkage(self, method): # Tests maxinconsts(Z, R) on the Q data set X = hierarchy_test_data.Q_X Z = linkage(X, method) R = inconsistent(Z) MD = maxinconsts(Z, R) expectedMD = calculate_maximum_inconsistencies(Z, R) assert_allclose(MD, expectedMD, atol=1e-15)
def process(self, **kwargs): Z = kwargs['Linkage'] IncM = kwargs['IncM'] R = IncM['R'] MI = hierarchy.maxinconsts(Z, R) return {'MaxInc': MI}
def check_maxinconsts_Q_linkage(self, method): # Tests maxinconsts(Z, R) on the Q data set X = eo['Q-X'] Y = pdist(X) Z = linkage(X, method) R = inconsistent(Z) MD = maxinconsts(Z, R) expectedMD = calculate_maximum_inconsistencies(Z, R) assert_allclose(MD, expectedMD, atol=1e-15)
low inconsistency is a group where the parent and children link heights are similar. It can also be said that this group reflects real similarity between children. The depth of the inconsistency cuttoff tells us how many levels for which the cutoff applies for each group. """ # Then compute the linkages (average represents the MRP dataset best) Z = linkage(dist_vec, method='average') # Compute cophenetic correlation distance between Z and flat dist_mat [c, d] = cophenet(Z, Y=dist_vec) # Compute the inconsistency matrix for non-singleton cluster (d=2) R = inconsistent(Z) # Now compute the maximum inconsistency coefficient per Cluster MI = maxinconsts(Z, R) # Cluster the events based on inconsistency threshold of 1.2 indices = fcluster(Z, t=1.0, criterion='inconsistent') # Visualize the distribution of correlation values samp_inds = numpy.random.random_integers(0, len(dist_vec), 10000) samp_corrs = [] for ind in samp_inds: samp_corrs.append(dist_vec[ind]) # Plot the dendrogram...if it's not way too huge dendrogram(Z, color_threshold=1 - corr_thresh, distance_sort='ascending') plt.show() group_ids = list(set(indices))
def get_clusters_Hierarchy_clustering(x, hier_dict): #default value L_method = 'single' L_metric = 'euclidean' t = 0.9 criterionH = 'inconsistent' depth = 2 R = None colR = 3 #L_metric can be 'braycurtis’, ‘canberra’, ‘chebyshev’, ‘cityblock’, #‘correlation’, ‘cosine’, ‘dice’, ‘euclidean’, ‘hamming’, ‘jaccard’, # ‘kulsinski’, ‘mahalanobis’, ‘matching’, ‘minkowski’, #‘rogerstanimoto’, ‘russellrao’, ‘seuclidean’, ‘sokalmichener’, #‘sokalsneath’, ‘sqeuclidean’ #**Note that ‘jensenshannon’,‘yule’may result in a condensed distance matrix which contains infinite value if 'L_metric' in hier_dict.keys(): L_metric = hier_dict['L_metric'] # L_method can be 'single', 'complete','average','weighted','centroid','median','ward' if 'L_method' in hier_dict.keys(): L_method = hier_dict['L_method'] if L_method == 'centroid' or L_method == 'median' or L_method == 'ward': if L_metric != 'euclidean': L_metric = 'euclidean' print('\n') print('*************Note:**************') print('Method ' + str(L_method) + ' requires the distance metric to be Euclidean') if 'optimal_ordering' in hier_dict.keys(): optimal_ordering = hier_dict['optimal_ordering'] else: optimal_ordering = False Z = linkage(x, method=L_method, metric=L_metric, optimal_ordering=optimal_ordering) #criterion can be if 'criterionH' in hier_dict.keys(): criterionH = hier_dict['criterionH'] else: criterionH = 'inconsistent' if 'depth' in hier_dict.keys(): depth = hier_dict['depth'] else: depth = 2 if 't' in hier_dict.keys(): t = hier_dict['t'] #for 'maxclust' or 'maxclust_monocrit' criteria, #t would be max number of clusters requested. elif criterionH == 'maxclust_monocrit' or criterionH == 'maxclust': t = 20 if 'R' in hier_dict.keys(): R = hier_dict['R'] if criterionH == 'inconsistent' or criterionH == 'maxclust_monocrit': #The inconsistency matrix to use for the 'inconsistent' criterion. #R is computed if not provided. if R is None: R = inconsistent(Z, d=depth) else: R = np.asarray(R, order='c') if criterionH == 'monocrit': if R is None: R = inconsistent(Z, d=depth) #colR is the column of 'R' to use as the statistic return fcluster(Z, criterion='monocrit', t=t, monocrit=maxRstat(Z, R, colR)) elif criterionH == 'maxclust_monocrit': return fcluster(Z, criterion='maxclust_monocrit', t=t, monocrit=maxinconsts(Z, R)) else: return fcluster(Z, criterion=criterionH, depth=depth, R=R, t=t)