class CustomStreamListener(tweepy.StreamListener): def __init__(self, db=TwitterConfig.tweet_db, collection=TwitterConfig.tweet_collection): tweepy.StreamListener.__init__(self) self.mid_list = [] self.ti = TweetInterface(db=db, collection=collection) def save_to_mongo(self,tweet): tweet = json.loads(tweet.json) tweet['_id'] = tweet['id'] self.ti.saveDocument(tweet, must_have_geo_tag=False) def on_status(self, status): print 'get' try: print "%s\t%s\t%s\t%s" % (status.text, status.author.screen_name, status.created_at, status.source, ) self.save_to_mongo(status) except Exception, e: print >> sys.stderr, 'Encountered Exception:', e pass def on_error(self, status_code): print >> sys.stderr, 'Encountered error with status code:', status_code return True # Don't kill the stream def on_timeout(self): print >> sys.stderr, 'Timeout...' return True # Don't kill the stream
class CustomStreamListener(tweepy.StreamListener): def __init__(self): tweepy.StreamListener.__init__(self) self.mid_list = [] self.ti = TweetInterface() def save_to_mongo(self,tweet): tweet = json.loads(tweet.json) if tweet['coordinates'] is None: return tweet['_id'] = tweet['id'] self.ti.saveDocument(tweet) def on_status(self, status): print 'get' try: print "%s\t%s\t%s\t%s\t%s" % (status.text, status.author.screen_name, status.created_at, status.source, status.coordinates['coordinates'] ) self.save_to_mongo(status) except Exception, e: print >> sys.stderr, 'Encountered Exception:', e pass def on_error(self, status_code): print >> sys.stderr, 'Encountered error with status code:', status_code return True # Don't kill the stream def on_timeout(self): print >> sys.stderr, 'Timeout...' return True # Don't kill the stream
def _getFiftenMiniutesData(self): data_interface = None if self.data_source == 'twitter': data_interface = TweetInterface('citybeat_production', 'tweets') elif self.data_source == 'instagram': data_interface = PhotoInterface('citybeat_production', 'photos') _fifteen_minutes_ago = 15 * 60 cursor = data_interface.rangeQuery(self.region, (str(self.cur_time - _fifteen_minutes_ago), str(self.cur_time))) _data = [] for p in cursor: _data.append(p) _data = sorted(_data, key=lambda k: k['created_time']) before = len(_data) _data = processAsPeopleCount(_data) after = len(_data) self.current_value = after self.data = _data
def _extractMostPopularTweet(self): ti = TweetInterface(collection=TwitterConfig.extended_tweet_collection) tweets = {} most_popular_tweet_text = '' max_retweet_count = -1 user_name = '' # 60 minutes now = int(getCurrentStampUTC()) time_span = 60 * 60 end_time = now begin_time = end_time - time_span for tweet in ti.rangeQuery(period=[begin_time, end_time], fields=['text', 'user.screen_name']): text = tweet['text'] count = tweets.get(text, 0) + 1 tweets[text] = count if count > max_retweet_count: max_retweet_count = count most_popular_tweet_text = text user_name = tweet['user']['screen_name'] single_tweet_count = 0 retweet_count = 0 for key, value in tweets.items(): if value == 1: single_tweet_count += 1 else: retweet_count += value most_popular_tweet = {} most_popular_tweet['user_name'] = user_name most_popular_tweet['text'] = most_popular_tweet_text most_popular_tweet['count'] = max_retweet_count tweets_count = {} tweets_count['tweet_percentage'] = 1.0 * single_tweet_count / (single_tweet_count + retweet_count) tweets_count['retweet_percentage'] = 1.0 * retweet_count / (single_tweet_count + retweet_count) return [most_popular_tweet, tweets_count]
def __init__(self): tweepy.StreamListener.__init__(self) self.mid_list = [] self.ti = TweetInterface()
def __init__(self, db=TwitterConfig.tweet_db, collection=TwitterConfig.tweet_collection): tweepy.StreamListener.__init__(self) self.mid_list = [] self.ti = TweetInterface(db=db, collection=collection)
def __init__(self): # emty dictionary self._tweet_interface = TweetInterface() self._photo_interface = PhotoInterface()
class Stats(object): def __init__(self): # emty dictionary self._tweet_interface = TweetInterface() self._photo_interface = PhotoInterface() def getTweetAndPhotoStats(self): stats = {} tweet_basic_count = {} photo_basic_count = {} photo_basic_count['last_minute'] = self._getCurrentCountStats('photos') photo_basic_count['last_24_hour'] = self._get24HoursCountStats('photos') tweet_basic_count['last_minute'] = self._getCurrentCountStats('tweets') tweet_basic_count['last_24_hour'] = self._get24HoursCountStats('tweets') res = self._extractMostPopularTweet() stats['photo_basic_count'] = photo_basic_count stats['tweet_basic_count'] = tweet_basic_count stats['created_time'] = str(getCurrentStampUTC()) stats['tweet_top_mentions'] = self._extractTweetTopMentions() stats['most_popular_tweet'] = res[0] stats['tweet_vs_retweet'] = res[1] return stats def _getCurrentCountStats(self, type): assert type in ['photos', 'tweets'] stats = {} if type == 'photos': res = self._extractPhotoCount() else: res = self._extractTweetCount() stats['count'] = res[0] stats['delta'] = res[1] return stats def _get24HoursCountStats(self, type): assert type in ['photos', 'tweets'] stats = {} stats['current_count'] = self._extract24HoursCountsStats(type=type) stats['last_week_count'] = self._extract24HoursCountsStats(past_week=True, type=type) return stats def _extractTweetCount(self): now = int(getCurrentStampUTC()) # 5 seconds as the latency current_count = self._tweet_interface.rangeQuery(period=[now - 65, now - 5]).count() baseline_count = self._tweet_interface.rangeQuery(period=[now - 65 - 60 * 20, now - 65]).count() / 20.0 if baseline_count == 0.0: return [current_count, stats_config.NO_BASE_LINE] else: return [current_count, (current_count - baseline_count) / baseline_count] def _extractPhotoCount(self): now = int(getCurrentStampUTC()) offset = 4 * 60 current_count = self._photo_interface.rangeQuery(period=[now - offset - 60, now - offset]).count() baseline_count = self._photo_interface.rangeQuery(period=[now - 60 * 21 - offset, now - offset - 60]).count() / 20.0 if baseline_count == 0.0: return [current_count, stats_config.NO_BASE_LINE] else: return [current_count, (current_count - baseline_count) / baseline_count] def _extract24HoursCountsStats(self, past_week=False, type='tweets'): now = int(getCurrentStampUTC()) offset = 0 if past_week: offset = 7 * 24 count_during_past_24_hours = [] for hour in xrange(24): end_time = now - 3600 * (hour + offset) begin_time = end_time - 3600 if type == 'tweets': count_during_past_24_hours.append(self._tweet_interface.rangeQuery(period=[begin_time, end_time]).count()) else: count_during_past_24_hours.append(self._photo_interface.rangeQuery(period=[begin_time, end_time]).count()) return count_during_past_24_hours def _extractTweetTopMentions(self, k=10): # 60 minutes now = int(getCurrentStampUTC()) time_span = 60 * 60 end_time = now begin_time = end_time - time_span cur = self._tweet_interface.rangeQuery(period=[begin_time, end_time], fields=['text']) users = {} twitter_username_re = re.compile(r'(?<=^|(?<=[^a-zA-Z0-9-_\.]))@([A-Za-z]+[A-Za-z0-9_-]+)') for tweet in cur: text = tweet['text'] mentions = twitter_username_re.findall(text) for mention in mentions: count = users.get(mention, 0) + 1 users[mention] = count users = sorted(users.iteritems(), key=operator.itemgetter(1), reverse=True) res = [] for key, value in users: res_pair = {} res_pair['user_name'] = key res_pair['count'] = value res.append(res_pair) if len(res) >= 10: break return res def _extractMostPopularTweet(self): ti = TweetInterface(collection=TwitterConfig.extended_tweet_collection) tweets = {} most_popular_tweet_text = '' max_retweet_count = -1 user_name = '' # 60 minutes now = int(getCurrentStampUTC()) time_span = 60 * 60 end_time = now begin_time = end_time - time_span for tweet in ti.rangeQuery(period=[begin_time, end_time], fields=['text', 'user.screen_name']): text = tweet['text'] count = tweets.get(text, 0) + 1 tweets[text] = count if count > max_retweet_count: max_retweet_count = count most_popular_tweet_text = text user_name = tweet['user']['screen_name'] single_tweet_count = 0 retweet_count = 0 for key, value in tweets.items(): if value == 1: single_tweet_count += 1 else: retweet_count += value most_popular_tweet = {} most_popular_tweet['user_name'] = user_name most_popular_tweet['text'] = most_popular_tweet_text most_popular_tweet['count'] = max_retweet_count tweets_count = {} tweets_count['tweet_percentage'] = 1.0 * single_tweet_count / (single_tweet_count + retweet_count) tweets_count['retweet_percentage'] = 1.0 * retweet_count / (single_tweet_count + retweet_count) return [most_popular_tweet, tweets_count]