Exemplo n.º 1
0
def test_partial_fit():
    # Test that fit is equivalent to calling partial_fit multiple times
    X, y = make_blobs(n_samples=100)
    brc = Birch(n_clusters=3)
    brc.fit(X)
    brc_partial = Birch(n_clusters=None)
    brc_partial.partial_fit(X[:50])
    brc_partial.partial_fit(X[50:])
    assert_array_almost_equal(brc_partial.subcluster_centers_,
                              brc.subcluster_centers_)

    # Test that same global labels are obtained after calling partial_fit
    # with None
    brc_partial.set_params(n_clusters=3)
    brc_partial.partial_fit(None)
    assert_array_equal(brc_partial.subcluster_labels_, brc.subcluster_labels_)
Exemplo n.º 2
0
def birchCluster(zD, maxd, out='dict', N=None, start=0, stop=None):
    #The radius of the subcluster obtained by merging a new sample and the closest subcluster should be lesser than the threshold.
    #Otherwise a new subcluster is started. Setting this value to be very low promotes splitting and vice-versa.
    data = zD.dictPos
    stop = len(zD.pList) if not stop else stop
    X = [[data['x'][i], data['y'][i], data['z'][i]]
         for i in range(start, stop)]
    brc = Birch(branching_factor=50,
                n_clusters=None,
                threshold=maxd,
                compute_labels=True)
    brc.fit(X)
    if N:
        brc.set_params(n_clusters=N)
    brc.partial_fit(np.matrix(X))
    groups = brc.predict(X)
    if out == 'dict':
        return list2dict(zD, groups)
    elif out == 'list':
        return groups
    else:
        raise Exception("Out argument must have valus 'dict' or 'list'")
Exemplo n.º 3
0
class Mini():


    def __init__(self,minis,mini_names,mini_finds,sample_freq):
        self.mini_names =mini_names
        self.minis = minis
        self.sample_freq = sample_freq
        self.mini_finds=mini_finds
        self.offsets= self.fit_paras= self.event_sizes= self.amplitudes= self.fast_constants= self.slow_constants=self.a_constants=self.cur_labels = None
        self.dict=['mini_names','minis','offsets','fit_paras','event_sizes','amplitudes','fast_constants','slow_constants','a_constants','cur_labels','mini_finds']
        self.delete_index = set()

    def _delete_mini(self,index):
        # truly delete
        for name in self.dict:
            if hasattr(self,name):
                llist=getattr(self,name)
                if isinstance(llist,list):
                    llist.pop(index)
                    #print(llist==getattr(self,name))
                else:
                    print(name)
                    setattr(self,name,list(llist))
                    llist = getattr(self, name)
                    llist.pop(index)
    def mark_delete_mini(self,indexs):
        # delete candidate
        # indexs is list or union or tuple
        self.delete_index=self.delete_index.union(indexs)
    def truly_delete_mini(self):
        print(self.delete_index)
        self.delete_index=list(self.delete_index)
        self.delete_index.sort(reverse=True)
        for number in self.delete_index:
            self._delete_mini(number)
        self.delete_index=set() # clear the delete flush

    def reindex_mini(self):
        self.mini_reindex={'label':{},'sweep':{}}
        #self.mini_reindex['label']=func_base.list_to_dict(self.cur_labels,self.minis)
        self.mini_reindex['label']=func_base.list_to_dict(self.cur_labels,range(len(self.cur_labels)))
        #self.mini_reindex['sweep']=func_base.list_to_dict([x[0] for x in self.mini_finds],self.minis)
        self.mini_reindex['sweep']=func_base.list_to_dict([x[0] for x in self.mini_finds],range(len(self.mini_finds)))
        print(self.mini_reindex['label'])
    # self.minis_number,self.event_sizes,self.offsets,self.fast_constants,self.slow_constants,self.rise_10_90s,self.decay_90_50s=mini_base.statis(self.minis)
    def statis(self):
        if not self.minis:
            print('couldn\'t find any minis' )
            return
        #print(self.minis)
        self.mini_number=len(self.minis)


        def templete_func(x,a0,a1,tau1,tau2,t0):

            try:
                return np.piecewise(x,[x>=t0,x<t0],[lambda x: a0+a1*(1-math.exp((x-t0)/tau1))*(math.exp((x-t0)/tau2)),a0])
            except:
                print('xxx',x)

        self.fit_paras=[]
        self.event_sizes=[]
        self.amplitudes=[]
        self.offsets=[]
        self.fast_constants=[]
        self.slow_constants=[]
        self.a_constants=[]


        # fit use two expenent function
        param_bounds=([-np.inf,-np.inf,0,0,-np.inf],[np.inf,0,np.inf,np.inf,np.inf])
        #nn=0
        for mini in self.minis:
            self.amplitudes.append(max(mini)-min(mini))
            minilen= len(mini)
            # if too large  fitcurve cannt work
            if minilen>10000:
                minilen=10000
                mini=mini[:minilen]
            x_label=np.arange(0,minilen)/self.sample_freq
            #nn+=1
            #print(len(x_label))
            try:
                paraments,pcov = curve_fit(templete_func,x_label,mini,bounds=param_bounds)
            except:
                #print(nn)
                print("mini",mini,"label",x_label)
                plt.figure()
                plt.plot(x_label,mini)
                plt.show()
                raise

            self.fit_paras.append(paraments)
            self.offsets.append(paraments[4])
            self.fast_constants.append(paraments[2])
            self.slow_constants.append(paraments[3])
            self.a_constants.append(paraments[1])
            fit_mini=templete_func(x_label,*paraments)
            self.event_sizes.append(max(fit_mini)-min(fit_mini))


    def mini_dim_reduce(self,dim=5):

        # PCA anylysis
        pca=PCA(n_components=dim)
        # Convert Python sequence to NumPy array, filling missing values
        minis=np.array(list(itertools.zip_longest(*self.minis, fillvalue=0))).T
        # transform return array like
        self.proced_minis=pca.fit_transform(minis)

        print('explained variance ratio (first two components): %s' %str(pca.explained_variance_ratio_))

    def get_mini_info(self,index):
        #print(locals())
        mini=self.minis[index]
        x_label=np.arange(len(mini))/self.sample_freq
        return self.mini_names[index],mini,self.cur_labels[index],x_label


    def classify(self,n_cluster=5):
        # Using BIRCH cluster
        self.birch = Birch(threshold=0.5,n_clusters=n_cluster)
        self.birch.fit(self.proced_minis)
        self.ori_labels = self.birch.labels_
        self.ori_centroids = self.birch.subcluster_centers_
        self.ori_n_clusters = np.unique(self.ori_labels)
        self.ori_n_cluster = np.unique(self.ori_labels).size
        self.cur_labels = self.ori_labels
        self.cur_centroids = self.ori_centroids
        self.cur_n_cluster = self.ori_n_cluster
        self.cur_n_clusters = self.ori_n_clusters

    def set_n_cluster(self,n_cluster):
        self.birch.set_params(n_clusters=n_cluster)
        self.cur_labels = self.ori_labels=self.birch.predict(self.proced_minis)
        self.cur_n_cluster = np.unique(self.cur_labels).size
        self.cur_n_clusters = np.unique(self.cur_labels)
        self.cur_centroids = self.birch.subcluster_centers_
Exemplo n.º 4
0
    affinity_propagation_test_performance_metrics_for_plotting[item + 1] = affinity_propagation_test_performance_metric_array[item]
Figures.save_valid_test_performance_measures_vs_hyper_parameters_figure(affinity_propagation_parameter_search_space_for_plotting,
                                                                        affinity_propagation_valid_performance_metrics_for_plotting,
                                                                        affinity_propagation_test_performance_metrics_for_plotting,
                                                                        'Adjusted Mutual Information Score',
                                                                        'AffinityPropagation Clustering damping parameter',
                                                                        'Affinity_Propagation_Performance',
                                                                        0,
                                                                        0.5,
                                                                        left_horizontal_limit=0.5)

# Do BIRCH, optimizing number of calls to partial_fit over a validation set
current_optimal_birch_number_of_calls = 1
initial_optimal_birch_clusterer = Birch()
initial_optimal_birch_clusterer.partial_fit(train_data_set)
initial_optimal_birch_clusterer.set_params(n_clusters=number_of_classes)
initial_birch_valid_predictions = initial_optimal_birch_clusterer.predict(valid_data_set)
initial_birch_test_predictions = initial_optimal_birch_clusterer.predict(test_data_set)

# Add one to the predictions to make them match up with range of labels, then apply Hungarian Fix
for element in range(number_of_valid_observations):
    initial_birch_valid_predictions[element] += 1
for element in range(number_of_test_observations):
    initial_birch_test_predictions[element] += 1
initial_birch_valid_predictions = Clustering.Hungarian_Fix(initial_birch_valid_predictions,
                                                           valid_labels).astype('int')
initial_birch_test_predictions = Clustering.Hungarian_Fix(initial_birch_test_predictions,
                                                          test_labels).astype('int')

# Set a starting point for optimality of the initial performance metric, to be possibly adjusted later
birch_number_of_calls_integer_search_space_start = current_optimal_birch_number_of_calls + 1