def perform_merging(groups, sim_mad, sim_dip, data): mask = numpy.where(groups > -1)[0] clusters = numpy.unique(groups[mask]) dmin = numpy.inf to_merge = [None, None] for ic1 in xrange(len(clusters)): idx1 = numpy.where(groups == clusters[ic1])[0] sd1 = numpy.take(data, idx1, axis=0) m1 = numpy.median(sd1, 0) for ic2 in xrange(ic1 + 1, len(clusters)): idx2 = numpy.where(groups == clusters[ic2])[0] sd2 = numpy.take(data, idx2, axis=0) m2 = numpy.median(sd2, 0) v_n = (m1 - m2) pr_1 = numpy.dot(sd1, v_n) pr_2 = numpy.dot(sd2, v_n) if sim_dip > 0: sub_data = numpy.concatenate([pr_1, pr_2]) if len(sub_data) > 10: dist = dip(sub_data) / dip_threshold( len(sub_data), sim_dip) else: dist = numpy.inf else: med1 = numpy.median(pr_1) med2 = numpy.median(pr_2) mad1 = numpy.median(numpy.abs(pr_1 - med1))**2 mad2 = numpy.median(numpy.abs(pr_2 - med2))**2 norm = mad1 + mad2 dist = numpy.sqrt((med1 - med2)**2 / norm) if dist < dmin: dmin = dist to_merge = [ic1, ic2] if sim_dip > 0: thr = 1 else: thr = sim_mad / 0.674 if dmin < thr: groups[numpy.where( groups == clusters[to_merge[1]])[0]] = clusters[to_merge[0]] return True, groups return False, groups
def perform_merging(groups, merging_method, merging_param, data): mask = numpy.where(groups > -1)[0] clusters = numpy.unique(groups[mask]) dmin = numpy.inf to_merge = [None, None] for ic1 in xrange(len(clusters)): idx1 = numpy.where(groups == clusters[ic1])[0] sd1 = numpy.take(data, idx1, axis=0) if merging_method in ['distance', 'dip', 'folding', 'bhatta']: m1 = numpy.median(sd1, 0) for ic2 in xrange(ic1 + 1, len(clusters)): idx2 = numpy.where(groups == clusters[ic2])[0] sd2 = numpy.take(data, idx2, axis=0) if merging_method in ['distance', 'dip', 'folding', 'bhatta']: m2 = numpy.median(sd2, 0) v_n = (m1 - m2) pr_1 = numpy.dot(sd1, v_n) pr_2 = numpy.dot(sd2, v_n) if merging_method == 'folding': sub_data = numpy.concatenate([pr_1, pr_2]) unimodal, p_value, phi, _ = batch_folding_test_with_MPA( sub_data, True) if unimodal: dist = p_value else: dist = numpy.inf elif merging_method == 'nd-folding': sub_data = numpy.vstack((sd1, sd2))[:, :3] unimodal, p_value, phi, _ = batch_folding_test_with_MPA( sub_data, True) if unimodal: dist = p_value else: dist = numpy.inf elif merging_method == 'dip': sub_data = numpy.concatenate([pr_1, pr_2]) if len(sub_data) > 5: dist = dip(sub_data) / dip_threshold( len(sub_data), merging_param) else: dist = numpy.inf elif merging_method == 'distance': med1 = numpy.median(pr_1) med2 = numpy.median(pr_2) mad1 = numpy.median(numpy.abs(pr_1 - med1))**2 mad2 = numpy.median(numpy.abs(pr_2 - med2))**2 norm = mad1 + mad2 dist = numpy.sqrt((med1 - med2)**2 / norm) elif merging_method == 'bhatta': try: dist = bhatta_dist(pr_1, pr_2) except Exception: dist = numpy.inf elif merging_method == 'nd-bhatta': try: dist = nd_bhatta_dist(sd1.T, sd2.T) except Exception: dist = numpy.inf if dist < dmin: dmin = dist to_merge = [ic1, ic2] if merging_method == 'dip': thr = 1 elif merging_method in [ 'folding', 'nd-folding', 'bhatta', 'nd-bhatta' ]: thr = merging_param elif merging_method == 'distance': thr = merging_param / 0.674 if dmin < thr: ic1, ic2 = to_merge c1, c2 = clusters[ic1], clusters[ic2] selection = numpy.where(groups == c2)[0] groups[selection] = c1 merge = (c1, c2) return True, groups, merge, dmin return False, groups, None, None