Exemplo n.º 1
0
 def test_maxinconsts_one_cluster_linkage(self):
     # Tests maxinconsts(Z, R) on linkage with one cluster.
     Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double)
     R = np.asarray([[0, 0, 0, 0.3]], dtype=np.double)
     MD = maxinconsts(Z, R)
     expectedMD = calculate_maximum_inconsistencies(Z, R)
     assert_allclose(MD, expectedMD, atol=1e-15)
Exemplo n.º 2
0
 def test_maxinconsts_one_cluster_linkage(self):
     # Tests maxinconsts(Z, R) on linkage with one cluster.
     Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double)
     R = np.asarray([[0, 0, 0, 0.3]], dtype=np.double)
     MD = maxinconsts(Z, R)
     expectedMD = calculate_maximum_inconsistencies(Z, R)
     assert_allclose(MD, expectedMD, atol=1e-15)
Exemplo n.º 3
0
 def check_maxinconsts_Q_linkage(self, method):
     # Tests maxinconsts(Z, R) on the Q data set
     X = hierarchy_test_data.Q_X
     Z = linkage(X, method)
     R = inconsistent(Z)
     MD = maxinconsts(Z, R)
     expectedMD = calculate_maximum_inconsistencies(Z, R)
     assert_allclose(MD, expectedMD, atol=1e-15)
Exemplo n.º 4
0
 def check_maxinconsts_Q_linkage(self, method):
     # Tests maxinconsts(Z, R) on the Q data set
     X = hierarchy_test_data.Q_X
     Z = linkage(X, method)
     R = inconsistent(Z)
     MD = maxinconsts(Z, R)
     expectedMD = calculate_maximum_inconsistencies(Z, R)
     assert_allclose(MD, expectedMD, atol=1e-15)
Exemplo n.º 5
0
    def process(self, **kwargs):
        Z = kwargs['Linkage']
        IncM = kwargs['IncM']
        R = IncM['R']

        MI = hierarchy.maxinconsts(Z, R)

        return {'MaxInc': MI}
Exemplo n.º 6
0
 def check_maxinconsts_Q_linkage(self, method):
     # Tests maxinconsts(Z, R) on the Q data set
     X = eo['Q-X']
     Y = pdist(X)
     Z = linkage(X, method)
     R = inconsistent(Z)
     MD = maxinconsts(Z, R)
     expectedMD = calculate_maximum_inconsistencies(Z, R)
     assert_allclose(MD, expectedMD, atol=1e-15)
Exemplo n.º 7
0
low inconsistency is a group where the parent and children link heights are
similar. It can also be said that this group reflects real similarity between
children. The depth of the inconsistency cuttoff tells us how many levels for
which the cutoff applies for each group.
"""

# Then compute the linkages (average represents the MRP dataset best)
Z = linkage(dist_vec, method='average')

# Compute cophenetic correlation distance between Z and flat dist_mat
[c, d] = cophenet(Z, Y=dist_vec)

# Compute the inconsistency matrix for non-singleton cluster (d=2)
R = inconsistent(Z)
# Now compute the maximum inconsistency coefficient per Cluster
MI = maxinconsts(Z, R)

# Cluster the events based on inconsistency threshold of 1.2
indices = fcluster(Z, t=1.0, criterion='inconsistent')

# Visualize the distribution of correlation values
samp_inds = numpy.random.random_integers(0, len(dist_vec), 10000)
samp_corrs = []
for ind in samp_inds:
    samp_corrs.append(dist_vec[ind])

# Plot the dendrogram...if it's not way too huge
dendrogram(Z, color_threshold=1 - corr_thresh, distance_sort='ascending')
plt.show()

group_ids = list(set(indices))
Exemplo n.º 8
0
def get_clusters_Hierarchy_clustering(x, hier_dict):
    #default value
    L_method = 'single'
    L_metric = 'euclidean'
    t = 0.9
    criterionH = 'inconsistent'
    depth = 2
    R = None
    colR = 3
    #L_metric can be 'braycurtis’, ‘canberra’, ‘chebyshev’, ‘cityblock’,
    #‘correlation’, ‘cosine’, ‘dice’, ‘euclidean’, ‘hamming’, ‘jaccard’,
    # ‘kulsinski’, ‘mahalanobis’, ‘matching’, ‘minkowski’,
    #‘rogerstanimoto’, ‘russellrao’, ‘seuclidean’, ‘sokalmichener’,
    #‘sokalsneath’, ‘sqeuclidean’
    #**Note that ‘jensenshannon’,‘yule’may result in a condensed distance matrix which contains infinite value
    if 'L_metric' in hier_dict.keys():
        L_metric = hier_dict['L_metric']

# L_method can be 'single', 'complete','average','weighted','centroid','median','ward'
    if 'L_method' in hier_dict.keys():
        L_method = hier_dict['L_method']
    if L_method == 'centroid' or L_method == 'median' or L_method == 'ward':
        if L_metric != 'euclidean':
            L_metric = 'euclidean'
            print('\n')
            print('*************Note:**************')
            print('Method ' + str(L_method) +
                  ' requires the distance metric to be Euclidean')

    if 'optimal_ordering' in hier_dict.keys():
        optimal_ordering = hier_dict['optimal_ordering']
    else:
        optimal_ordering = False
    Z = linkage(x,
                method=L_method,
                metric=L_metric,
                optimal_ordering=optimal_ordering)
    #criterion can be
    if 'criterionH' in hier_dict.keys():
        criterionH = hier_dict['criterionH']
    else:
        criterionH = 'inconsistent'
    if 'depth' in hier_dict.keys():
        depth = hier_dict['depth']
    else:
        depth = 2
    if 't' in hier_dict.keys():
        t = hier_dict['t']
        #for 'maxclust' or 'maxclust_monocrit' criteria,
        #t would be max number of clusters requested.
    elif criterionH == 'maxclust_monocrit' or criterionH == 'maxclust':
        t = 20

    if 'R' in hier_dict.keys():
        R = hier_dict['R']
    if criterionH == 'inconsistent' or criterionH == 'maxclust_monocrit':
        #The inconsistency matrix to use for the 'inconsistent' criterion.
        #R is computed if not provided.
        if R is None:
            R = inconsistent(Z, d=depth)
        else:
            R = np.asarray(R, order='c')
    if criterionH == 'monocrit':
        if R is None:
            R = inconsistent(Z, d=depth)
            #colR  is the column of 'R' to use as the statistic
        return fcluster(Z,
                        criterion='monocrit',
                        t=t,
                        monocrit=maxRstat(Z, R, colR))
    elif criterionH == 'maxclust_monocrit':
        return fcluster(Z,
                        criterion='maxclust_monocrit',
                        t=t,
                        monocrit=maxinconsts(Z, R))
    else:
        return fcluster(Z, criterion=criterionH, depth=depth, R=R, t=t)