示例#1
0
 def test_specific_user(self):
     link = ClusterLinkage(self.user_ink_data, target_user_id='user_1')
     clustered_data = link.clustered_data()
     cdtw = ClassifierDTW()
     cdtw.train(clustered_data)
     accuracy,_,_ = cdtw.test(self.label_ink_pairs)
     self.assertGreater(accuracy, 91.0)
示例#2
0
 def test_simple(self):
     link = ClusterLinkage(self.user_ink_data)
     clustered_data = link.clustered_data()
     cdtw = ClassifierDTW()
     cdtw.train(clustered_data)
     accuracy,_,_ = cdtw.test(self.label_ink_pairs)
     self.assertGreater(accuracy, 92.0)
示例#3
0
 def test_state_reduction(self):
     cDTW = ClassifierDTW(alpha=0.5,min_cluster_size=10)
     cDTW.train(self.clustered_data,center_type='medoid',
                state_reduction=True)
     accuracy,_,_ = cDTW.test(self.label_ink_pairs)
     if VERBOSE: print accuracy, 87
     self.assertGreater(accuracy, 87.0)
示例#4
0
 def test_simple(self):
     km = ClusterKMeans(self.user_ink_data,algorithm='dtw')
     clustered_data = km.clustered_data()
     cDTW = ClassifierDTW()
     cDTW.train(clustered_data, center_type='centroid')
     accuracy,_,_ = cDTW.test(self.label_ink_pairs)
     self.assertGreater(accuracy, 93.0)
示例#5
0
 def test_optimize(self):
     km = ClusterKMeans(self.user_ink_data,algorithm='dtw')
     km.optimize_cluster_num(self.label_ink_pairs, verbose=False)
     clustered_data = km.clustered_data()
     cDTW = ClassifierDTW()
     cDTW.train(clustered_data, center_type='centroid')
     accuracy,_,_ = cDTW.test(self.label_ink_pairs)
     self.assertGreater(accuracy, 93.0)
示例#6
0
    def optimize_cluster_num(self, test_data, n_iter=30, 
                             threshold=0.001, dview=None, verbose=False):
        # start with 1 prototype for each label
        temp_n_clusters = np.ones(len(self.labels),dtype=np.int) 
        cluster_info = self._partition_data(temp_n_clusters.tolist(),
                                            dview=dview)
        distmats = [distmat for _,distmat in cluster_info]
        trained_prototypes = _train_all_prototypes(self.weighted_ink_data,
                                                   cluster_info, 
                                                   self.labels)
        curr_classifier = ClassifierDTW()
        curr_classifier.trained_prototypes = trained_prototypes

        (accuracy,_,_) = curr_classifier.test(test_data,dview=dview)
        error_rates = [1.0 - accuracy / 100.0]
        n_clusters = [temp_n_clusters.copy()]
        added_labels = []
        if verbose: print error_rates

        # compute all candidates
        candidates = []
        for i in range(len(self.labels)):
            partition,_ = _partition_subset((self.weighted_ink_data[i], 
                                             self.labels[i], 
                                             temp_n_clusters[i]+1),
                                            distmat=distmats[i],
                                            verbose=verbose)
            prototypes = _train_prototypes(self.weighted_ink_data[i], 
                                           partition, 
                                           self.labels[i])
            if verbose: print "candidate for %s has length %d"%(
                self.labels[i], len(prototypes))
            candidates.append(prototypes)        

        for it in range(n_iter):
            if verbose: print "Iteration %d"%it
            # find the most beneficial class to increse prototype by 1
            min_error = 1.0
            min_idx = None
            best_classifier = None
            for i in range(len(self.labels)):
                if len(candidates[i]) < temp_n_clusters[i]+1:
                    # degenerated case. dont need to test
                    error = 1.0
                elif temp_n_clusters[i]+1 > self.maxclust:
                    # maxclust reached
                    error = 1.0
                else:
                    test_classifier = ClassifierDTW()
                    prototypes = curr_classifier.trained_prototypes
                    # filter out all prototypes with the label
                    prototypes = [p 
                                  for p in prototypes 
                                  if p.label != self.labels[i]]
                    prototypes += candidates[i]
                    test_classifier.trained_prototypes = prototypes
                    (accuracy,_,_) = test_classifier.test(test_data, 
                                                          dview=dview)
                    error = 1.0 - accuracy / 100.0
                    if verbose: print "> %f"%error

                if (error < min_error):
                    min_error = error
                    min_idx = i
                    best_classifier = test_classifier
                    
            # Stop if no update found or error reduction is small
            if (min_idx is None) or (error_rates[-1] - min_error < threshold):
                if verbose: print "no improvement. done."
                break
            
            # choose the min_idx
            temp_n_clusters[min_idx] += 1
            error_rates.append(min_error)
            added_labels.append(self.labels[min_idx])
            n_clusters.append(temp_n_clusters.copy())
            curr_classifier = best_classifier

            if verbose:
                print error_rates
                print n_clusters
                print "choosing %s"%self.labels[min_idx]

            # replace the candidate if needed
            if (temp_n_clusters[min_idx]+1 <= self.maxclust):
                partition,_ = _partition_subset(
                    (self.weighted_ink_data[min_idx], 
                     self.labels[min_idx], 
                     temp_n_clusters[min_idx]+1),
                    distmat=distmats[min_idx], 
                    verbose=verbose)
                prototypes = _train_prototypes(
                    self.weighted_ink_data[min_idx],
                    partition, 
                    self.labels[min_idx])
                if verbose:
                    print "candidate for %s has length %d"%(
                        self.labels[min_idx], len(prototypes))
                candidates[min_idx] = prototypes

        self.n_clusters = n_clusters[-1]
        return (error_rates, added_labels)