Пример #1
0
def compare_true_templates():
	docs = doc.get_docs_nested(get_data_dir(sys.argv[2]))
	#confirm = cluster.PerfectCONFIRM(docs)
	confirm = cluster.BestPerfectCONFIRM(docs, lr=0.05)
	confirm.cluster()
	analyzer = metric.KnownClusterAnalyzer(confirm)
	analyzer.print_all()
	analyzer.draw_centers()
	analyzer.clusters[0].center.push_away(analyzer.clusters[1].center)
	print "PUSHING APART!"
	print
	print
	analyzer = metric.KnownClusterAnalyzer(confirm)
	analyzer.draw_centers()
	analyzer.print_all()
	print
	print
Пример #2
0
def get_acc_v_measure(clusters):
	class Mock:
		pass
	m = Mock()
	m.get_clusters = lambda: clusters
	analyzer = metric.KnownClusterAnalyzer(m)
	acc = analyzer.accuracy()
	v = analyzer.v_measure()
	return acc, v
Пример #3
0
def print_cluster_analysis(clusters):
	class Mock:
		pass
	m = Mock()
	m.get_clusters = lambda: clusters
	analyzer = metric.KnownClusterAnalyzer(m)
	analyzer.print_general_info()
	analyzer.print_label_conf_mat()
	analyzer.print_label_cluster_mat()
	analyzer.print_metric_info()
Пример #4
0
def print_analysis(instances, clusters):
	class Mock:
		pass
	m = Mock()
	m.get_clusters = lambda: clusters
	m.get_docs = lambda: instances
	analyzer = metric.KnownClusterAnalyzer(m)
	analyzer.print_general_info()
	analyzer.print_histogram_info()
	analyzer.print_label_conf_mat()
	analyzer.print_label_cluster_mat()
	analyzer.print_label_info()
	analyzer.print_metric_info()
Пример #5
0
def main(args):

    if(len(args) != 2):
                print "Usage: mds.py clustering.pkl"
                print "     C is the cluster in clustering.pkl to display"           
                sys.exit(0)

    path = args[1]
                
    print "Loading"
    clusters = clustering = utils.load_obj(path)

    #map(lambda c: c.set_label(), clustering)
    for i in [5]:  
        clusters = reclusterWithOPTICS(clusters, i)
    
        _docs = reduce(lambda x,y: x+y, map(lambda c: c.members, clusters))
    
    
        confirm = BaseCONFIRM(_docs)
        confirm.clusters = clusters
    
        print "Original Number of Clusters:", len(clustering)
        print "Final Number of Clusters:", len(clusters)
    
        '''print reps
    
        imgs = []
        
        for idx in reps:
            if idx == 0:
                imgs.append(clustering[i].center)
            else:
                idx = idx -1
                imgs.append(clustering[i].members[idx])
                
        
        display(imgs)'''
    #print  len(selectWithHac(clustering))

    #print streamSelector(clustering)

    #print entropy(clustering)
    
    #print "Analyzing"
        analyzer = metric.KnownClusterAnalyzer(confirm)
        analyzer.print_all()

        print "User Queries:", QueryCount
Пример #6
0
def double_cluster_known():
        docs = doc.get_docs_nested(get_data_dir(sys.argv[2]))
        epsilon = float(sys.argv[3])
        organizer = cluster.TemplateSorter(docs)
        organizer.go(epsilon)
        organizer.prune_clusters()
        clusters = organizer.get_clusters()
        print "Initial Clustering Complete"
        print "Reclustering..."
        centers = map(lambda x: x.center, clusters)
        organizer.go(epsilon,templates=centers)
        organizer.prune_clusters()
        clusters = organizer.get_clusters()
        print
        print
        analyzer = metric.KnownClusterAnalyzer(clusters)
        analyzer.draw_centers()
        analyzer.print_all()