Пример #1
0
    def test_online_clustering_with_tweets(self):
        from_date = datetime.datetime(2011, 1, 25, 0, 0, 0)
        to_date = datetime.datetime(2011, 1, 26, 0, 00, 0) 
        items = ws.get_top_documents_by_date(from_date, to_date, threshold=1000)             
        
        window = 300
        oc = OnlineClusterer(N=50, window = window)
        for item in items:
            oc.cluster(item)

        clusters=oc.trimclusters()            
        oc.dump_clusters_to_file("online_with_tweets")
        oc.plot_scatter()
        oc.plot_growth_timeline(cumulative=True)

        for cluster in oc.clusters:
            print cluster.id
            print cluster.get_size()
            print '-----------------'
    def test_online_clustering_with_tweets(self):
        from_date = datetime.datetime(2011, 1, 25, 0, 0, 0)
        to_date = datetime.datetime(2011, 1, 26, 0, 00, 0) 
        items = ws.get_documents_by_date(from_date, to_date, limit=200)             
        
        window = 100
        oc = OnlineClusterer(N=50, window = window)
        for item in items:
            oc.cluster(item)

        clusters=oc.trimclusters()            
        oc.dump_clusters_to_file("online_with_tweets")
        #oc.plot_scatter()
        #oc.plot_growth_timeline(cumulative=True)

        for cluster in oc.clusters:
            sorted = cluster.summarize()
            for doc in sorted:
                print doc.dist, doc.raw
            print '--------------------'
    def test_online_clustering_with_tweets(self):
        from_date = datetime.datetime(2011, 1, 25, 0, 0, 0)
        to_date = datetime.datetime(2011, 1, 26, 0, 00, 0)
        items = ws.get_top_documents_by_date(from_date,
                                             to_date,
                                             threshold=1000)

        window = 300
        oc = OnlineClusterer(N=50, window=window)
        for item in items:
            oc.cluster(item)

        clusters = oc.trimclusters()
        oc.dump_clusters_to_file("online_with_tweets")
        oc.plot_scatter()
        oc.plot_growth_timeline(cumulative=True)

        for cluster in oc.clusters:
            print cluster.id
            print cluster.get_size()
            print '-----------------'