Python TweetsGraph.update_graph示例

编程语言: Python

命名空间/包名称: tweets_graph

类/类型: TweetsGraph

方法/功能: update_graph

hotexamples.com的示例: 2

Python TweetsGraph.update_graph - 已找到2个示例。这些是从开源项目中提取的最受好评的tweets_graph.TweetsGraph.update_graph现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

get_graph_avg_degree_of_all_nodes(2)

update_graph(2)

示例#1

显示文件

文件： tweet_processor.py 项目： jgors/cc_insight

    ft2 = open(path.abspath(path.join(tweet_output_path, 'ft2.txt')), 'w')
    close_files = lambda l: [f.close() for f in l]

    tweet_graph = TweetsGraph(time_window=60)

    with open(tweets_incomming_path, 'r') as tweets_incomming:
        # all tweets from the api are utf-8 encoded:
        # https://dev.twitter.com/overview/api/counting-characters
        for cnt, tweet in enumerate(tweets_incomming, start=1):
            try:
                tweet_dict =  json_loads(tweet)     # json.loads uses utf-8 decoding by default
                text = tweet_dict["text"]
                created_at = tweet_dict["created_at"]
                hashtags = [hashtag['text'] for hashtag in tweet_dict['entities']['hashtags']]
                tweet = Tweet(created_at, hashtags)
                tweet_graph.update_graph(tweet)

                cleaned_text = clean_text(text, count_unicode=True)
                # logging.debug('tweet_cnt: {}, num_graph_nodes: {}, avg_deg: {}'.format(
                              # cnt, len(tweet_graph.graph), tweet_graph.get_graph_avg_degree_of_all_nodes()))
                # print 'tweet_cnt: {}, num_graph_nodes: {}, avg_deg: {}'.format(cnt, len(tweet_graph.graph), tweet_graph.get_graph_avg_degree_of_all_nodes())
                ft1.write('{} (timestamp: {})\n'.format(cleaned_text, created_at))
                ft2.write('{}\n'.format(tweet_graph.get_graph_avg_degree_of_all_nodes()))

            except Exception as e:  #  don't normally exception handle in main like this, but play it safe on unknown data.
                # logging.exception("Tweet on ln {} failed to work.  Exception {}".format(cnt, e))
                pass

        ft1.write('\n{} tweets contained unicode.'.format(unicode_tweets_count))
    close_files([ft1, ft2])

示例#2

显示文件

文件： test_suite.py 项目： jgors/cc_insight

    def test_update_graph_with_example_from_instructions_manually(self):
        '''exact example from the online instructions done manually here: 
        https://github.com/InsightDataScience/coding-challenge#building-the-twitter-hashtag-graph'''

        
        # First tweet added to the graph
        self.tweet_graph.update_graph(Tweet('Thu Oct 29 17:51:01 +0000 2015', hashtags=['Spark', 'Apache'])) 
        # graph will have each hashtag as a node and neighbor to one another 
        assert self.tweet_graph.graph == {'apache': set(['spark']), 
                                           'spark': set(['apache'])}
        # get the graph avg degree 
        assert self.tweet_graph.get_graph_avg_degree_of_all_nodes() == '1.00'


        # Second tweet added to the graph
        self.tweet_graph.update_graph(Tweet('Thu Oct 29 17:51:30 +0000 2015', hashtags=['Apache', 'Hadoop', 'Storm'])) 
        # graph gets updated
        assert self.tweet_graph.graph == {'apache': set(['spark', 'hadoop', 'storm']), 
                                           'spark': set(['apache']), 
                                           'hadoop': set(['apache', 'storm']), 
                                           'storm': set(['apache', 'hadoop'])}  
        # get the graph avg degree 
        assert self.tweet_graph.get_graph_avg_degree_of_all_nodes() == '2.00' 


        # Third tweet to the graph
        self.tweet_graph.update_graph(Tweet('Thu Oct 29 17:51:55 +0000 2015', hashtags=['Apache'])) 
        # graph stays unchanged since there was only one hashtag passed in for this tweet
        assert self.tweet_graph.graph == {'apache': set(['spark', 'hadoop', 'storm']), 
                                           'spark': set(['apache']), 
                                           'hadoop': set(['apache', 'storm']), 
                                           'storm': set(['apache', 'hadoop'])}
        # get the graph avg degree 
        assert self.tweet_graph.get_graph_avg_degree_of_all_nodes() == '2.00' 


        # Fourth tweet to the graph
        self.tweet_graph.update_graph(Tweet('Thu Oct 29 17:51:56 +0000 2015', hashtags=['Flink', 'Spark'])) 
        # graph gets updated accordingly
        assert self.tweet_graph.graph == {'apache': set(['spark', 'hadoop', 'storm']), 
                                           'spark': set(['apache', 'flink']), 
                                           'flink': set(['spark']), 
                                           'hadoop': set(['apache', 'storm']), 
                                           'storm': set(['apache', 'hadoop'])}
        # get the graph avg degree 
        assert self.tweet_graph.get_graph_avg_degree_of_all_nodes() == '2.00' 


        # Fifth tweet to the graph
        self.tweet_graph.update_graph(Tweet('Thu Oct 29 17:51:59 +0000 2015', hashtags=['HBase', 'Spark'])) 
        # graph gets updated 
        assert self.tweet_graph.graph == {'flink': set(['spark']), 
                                           'hadoop': set(['apache', 'storm']), 
                                           'storm': set(['apache', 'hadoop']), 
                                           'apache': set(['spark', 'hadoop', 'storm']), 
                                           'hbase': set(['spark']), 
                                           'spark': set(['apache', 'hbase', 'flink'])}
        # get the graph avg degree 
        assert self.tweet_graph.get_graph_avg_degree_of_all_nodes() == '2.00' 


        # Last tweet to the graph
        self.tweet_graph.update_graph(Tweet('Thu Oct 29 17:52:05 +0000 2015', hashtags=['Apache'])) 
        # graph gets updated and now the Spark and Apache edge is remove b/c the tweet with them in it was older than 60s 
        assert self.tweet_graph.graph == {'flink': set(['spark']), 
                                           'hadoop': set(['apache', 'storm']), 
                                           'storm': set(['apache', 'hadoop']), 
                                           'apache': set(['hadoop', 'storm']), 
                                           'hbase': set(['spark']), 
                                           'spark': set(['hbase', 'flink'])}
        # get the graph avg degree 
        assert self.tweet_graph.get_graph_avg_degree_of_all_nodes() == '1.67' 
        
       
        # now test this against the graph we just did manually, but with the same data loaded from file
        tweets_test_graph2 = TweetsGraph()
        testfile = os.path.join(tests_dir, 'test_data', 'data_for_building_hashtag_graph.txt')
        with open(testfile, 'r') as f: 
            for tweet in f:
                tweet_dict = json.loads(tweet)
                hashtags = [hashtag['text'] for hashtag in tweet_dict['entities']['hashtags']]
                tweets_test_graph2.update_graph(Tweet(tweet_dict['created_at'], hashtags))
        # check that the graph output here is the same as for the previous example that was just performed manually  
        assert tweets_test_graph2.graph == self.tweet_graph.graph