예제 #1
0
def anneal_clusters(vbParam):

    N, K = vbParam.rhat.shape

    stability = calculate_stability(vbParam.rhat)
    if np.all(stability > 0.8):
        cc = [[k] for k in range(K)]
        return vbParam.rhat.argmax(1), stability, cc

    maha = mfm.calc_mahalonobis(vbParam, vbParam.muhat.transpose((1, 0, 2)))
    maha = np.maximum(maha, maha.T)

    maha_thresh_min = 0
    for k_target in range(K - 1, 0, -1):
        # get connected components with k_target number of them
        cc, maha_thresh_min = get_k_cc(maha, maha_thresh_min, k_target)
        # calculate soft assignment for each cc
        rhat_cc = np.zeros([N, len(cc)])
        for i, units in enumerate(cc):
            rhat_cc[:, i] = np.sum(vbParam.rhat[:, units], axis=1)
        rhat_cc[rhat_cc < 0.001] = 0.0
        rhat_cc = rhat_cc / np.sum(rhat_cc, axis=1, keepdims=True)

        # calculate stability for each component
        # and make decision
        stability = calculate_stability(rhat_cc)
        if np.all(stability > 0.8) or k_target == 1:
            return rhat_cc.argmax(1), stability, cc
예제 #2
0
파일: cluster.py 프로젝트: AkiHase/yass
    def recover_spikes(self, vbParam, pca, maha_dist=1):
    
        N, D = pca.shape
        # Cat: TODO: check if this maha thresholding recovering distance is good
        threshold = np.sqrt(chi2.ppf(0.99, D))

        # update rhat on full data
        maskedData = mfm.maskData(pca[:,:,np.newaxis], np.ones([N, 1]), np.arange(N))
        vbParam.update_local(maskedData)

        # calculate mahalanobis distance
        maha = mfm.calc_mahalonobis(vbParam, pca[:,:,np.newaxis])
        idx_recovered = np.where(~np.all(maha >= threshold, axis=1))[0]
        vbParam.rhat = vbParam.rhat[idx_recovered]

        # zero out low assignment vals
        if True:
            vbParam.rhat[vbParam.rhat < self.assignment_delete_threshold] = 0
            vbParam.rhat = vbParam.rhat/np.sum(vbParam.rhat,
                                             1, keepdims=True)

        return idx_recovered, vbParam
예제 #3
0
    def cluster_annealing(self, vbParam):

        N, K = vbParam.rhat.shape

        stability = self.calculate_stability(vbParam.rhat)
        if (K == 2) or np.all(stability > 0.8):
            cc = [[k] for k in range(K)]
            return vbParam.rhat.argmax(1), stability, cc

        maha = mfm.calc_mahalonobis(vbParam, vbParam.muhat.transpose(
            (1, 0, 2)))
        maha = np.maximum(maha, maha.T)
        #N, K = vbParam.rhat.shape
        #mu = np.copy(vbParam.muhat[:,:,0].T)
        #mudiff = mu[:,np.newaxis] - mu
        #prec = vbParam.Vhat[:,:,:,0].T * vbParam.nuhat[:,np.newaxis, np.newaxis]
        #maha = np.matmul(np.matmul(mudiff[:, :, np.newaxis], prec[:, np.newaxis]), mudiff[:, :, :, np.newaxis])[:, :, 0, 0]

        # decrease number of connected components one at a time.
        # in any step if all components are stables, stop and return
        # otherwise, go until there are only two connected components and return it
        maha_thresh_min = 0
        for k_target in range(K - 1, 1, -1):
            # get connected components with k_target number of them
            cc, maha_thresh_min = self.get_k_cc(maha, maha_thresh_min,
                                                k_target)
            # calculate soft assignment for each cc
            rhat_cc = np.zeros([N, len(cc)])
            for i, units in enumerate(cc):
                rhat_cc[:, i] = np.sum(vbParam.rhat[:, units], axis=1)
            rhat_cc[rhat_cc < 0.001] = 0.0
            rhat_cc = rhat_cc / np.sum(rhat_cc, axis=1, keepdims=True)

            # calculate stability for each component
            # and make decision
            stability = self.calculate_stability(rhat_cc)
            if np.all(stability > 0.8) or k_target == 2:
                return rhat_cc.argmax(1), stability, cc