Пример #1
0
 def test_dbscan_clustering_with_tweets(self):
     from_date = datetime.datetime(2011, 1, 25, 12, 0, 0)
     to_date = datetime.datetime(2011, 1, 26, 12, 30, 0) 
     items = ws.get_documents_by_date(from_date, to_date, limit=100)             
     
     epsilon = 0.02
     min_pts = 2
     dbscan = DBSCANClusterer(filter_terms=False)
     dbscan.add_documents(items)
     clusters = dbscan.run(epsilon, min_pts, pca=True)
     dbscan.dump_clusters_to_file("dbscan_with_tweets")
     #=======================================================================
     dbscan.plot_scatter()
     #dbscan.plot_growth_timeline(cumulative=False, plot_method="matplotlib")
     # dbscan.plot_growth_timeline(cumulative=True)
     dbscan.plot_growth_timeline(cumulative=True)
Пример #2
0
points = []
points.append([1,1])
points.append([1.5,1])
points.append([1.8,1.5])
points.append([2.1,1])
points.append([3.1,2])
points.append([4.1,2])
points.append([5.1,2])
points.append([10,10])
points.append([11,10.5])
points.append([9.5,11])
points.append([9.9,11.4])
points.append([15.0, 17.0])
points.append([15.0, 17.0])
points.append([7.5, -5.0])
dbscan = DBSCANClusterer()
#Small hacks..in normal usage never set td_matrix by urself 
#and never populate a dummy document_dict
dbscan.td_matrix = points  
dbscan.document_dict = OrderedDict( [('0','dummy'), ('1', 'dummy'), ('2', 'dummy'),('3', 'dummy'),('4', 'dummy'),('5', 'dummy'),
                        ('6', 'dummy'),('7', 'dummy'),('8', 'dummy'),('9', 'dummy'),('10', 'dummy'),('11', 'dummy'),('12', 'dummy'),('13', 'dummy')])
  
class Test_Dbscan_clustering(unittest.TestCase):

    def test_dbscan_cluster(self):
        clusters = dbscan.run(epsilon, min_pts)
        print '\n========== Results of Clustering ============='
        for cluster, members in clusters.iteritems():
            print '\n--------Cluster %d---------' % cluster
            for point in members:
                print point