示例#1
0
    def perform_merging(groups, sim_mad, sim_dip, data):
        mask = numpy.where(groups > -1)[0]
        clusters = numpy.unique(groups[mask])
        dmin = numpy.inf
        to_merge = [None, None]

        for ic1 in xrange(len(clusters)):
            idx1 = numpy.where(groups == clusters[ic1])[0]
            sd1 = numpy.take(data, idx1, axis=0)
            m1 = numpy.median(sd1, 0)
            for ic2 in xrange(ic1 + 1, len(clusters)):
                idx2 = numpy.where(groups == clusters[ic2])[0]
                sd2 = numpy.take(data, idx2, axis=0)
                m2 = numpy.median(sd2, 0)
                v_n = (m1 - m2)
                pr_1 = numpy.dot(sd1, v_n)
                pr_2 = numpy.dot(sd2, v_n)

                if sim_dip > 0:
                    sub_data = numpy.concatenate([pr_1, pr_2])
                    if len(sub_data) > 10:
                        dist = dip(sub_data) / dip_threshold(
                            len(sub_data), sim_dip)
                    else:
                        dist = numpy.inf
                else:
                    med1 = numpy.median(pr_1)
                    med2 = numpy.median(pr_2)
                    mad1 = numpy.median(numpy.abs(pr_1 - med1))**2
                    mad2 = numpy.median(numpy.abs(pr_2 - med2))**2
                    norm = mad1 + mad2
                    dist = numpy.sqrt((med1 - med2)**2 / norm)

                if dist < dmin:
                    dmin = dist
                    to_merge = [ic1, ic2]

        if sim_dip > 0:
            thr = 1
        else:
            thr = sim_mad / 0.674

        if dmin < thr:
            groups[numpy.where(
                groups == clusters[to_merge[1]])[0]] = clusters[to_merge[0]]
            return True, groups

        return False, groups
示例#2
0
    def perform_merging(groups, merging_method, merging_param, data):
        mask = numpy.where(groups > -1)[0]
        clusters = numpy.unique(groups[mask])
        dmin = numpy.inf
        to_merge = [None, None]

        for ic1 in xrange(len(clusters)):
            idx1 = numpy.where(groups == clusters[ic1])[0]
            sd1 = numpy.take(data, idx1, axis=0)

            if merging_method in ['distance', 'dip', 'folding', 'bhatta']:
                m1 = numpy.median(sd1, 0)

            for ic2 in xrange(ic1 + 1, len(clusters)):
                idx2 = numpy.where(groups == clusters[ic2])[0]
                sd2 = numpy.take(data, idx2, axis=0)

                if merging_method in ['distance', 'dip', 'folding', 'bhatta']:
                    m2 = numpy.median(sd2, 0)
                    v_n = (m1 - m2)
                    pr_1 = numpy.dot(sd1, v_n)
                    pr_2 = numpy.dot(sd2, v_n)

                if merging_method == 'folding':
                    sub_data = numpy.concatenate([pr_1, pr_2])
                    unimodal, p_value, phi, _ = batch_folding_test_with_MPA(
                        sub_data, True)
                    if unimodal:
                        dist = p_value
                    else:
                        dist = numpy.inf
                elif merging_method == 'nd-folding':
                    sub_data = numpy.vstack((sd1, sd2))[:, :3]
                    unimodal, p_value, phi, _ = batch_folding_test_with_MPA(
                        sub_data, True)
                    if unimodal:
                        dist = p_value
                    else:
                        dist = numpy.inf
                elif merging_method == 'dip':
                    sub_data = numpy.concatenate([pr_1, pr_2])
                    if len(sub_data) > 5:
                        dist = dip(sub_data) / dip_threshold(
                            len(sub_data), merging_param)
                    else:
                        dist = numpy.inf
                elif merging_method == 'distance':
                    med1 = numpy.median(pr_1)
                    med2 = numpy.median(pr_2)
                    mad1 = numpy.median(numpy.abs(pr_1 - med1))**2
                    mad2 = numpy.median(numpy.abs(pr_2 - med2))**2
                    norm = mad1 + mad2
                    dist = numpy.sqrt((med1 - med2)**2 / norm)
                elif merging_method == 'bhatta':
                    try:
                        dist = bhatta_dist(pr_1, pr_2)
                    except Exception:
                        dist = numpy.inf
                elif merging_method == 'nd-bhatta':
                    try:
                        dist = nd_bhatta_dist(sd1.T, sd2.T)
                    except Exception:
                        dist = numpy.inf

                if dist < dmin:
                    dmin = dist
                    to_merge = [ic1, ic2]

        if merging_method == 'dip':
            thr = 1
        elif merging_method in [
                'folding', 'nd-folding', 'bhatta', 'nd-bhatta'
        ]:
            thr = merging_param
        elif merging_method == 'distance':
            thr = merging_param / 0.674

        if dmin < thr:
            ic1, ic2 = to_merge
            c1, c2 = clusters[ic1], clusters[ic2]
            selection = numpy.where(groups == c2)[0]
            groups[selection] = c1
            merge = (c1, c2)
            return True, groups, merge, dmin

        return False, groups, None, None