def onInstanceLoadFunc(instanceKey, oauthToken, oauthSecret, geographicSetupString, keywords, instanceSetupCode, startTime, temporalLastTimeId, count=count): temporal = dict() for providerId, value in temporalLastTimeId.iteritems(): providerId = int(providerId) for placeId, timeId in value.iteritems(): placeId = int(placeId) timeId = int(timeId) temporal[GeocodeResultAbstract.buildCacheIdTuple( providerId, placeId)] = timeId logger.debug( 'Loaded instance %s last temporal change source %d/%d -> %d' % (instanceKey, providerId, placeId, timeId)) TwitterInstance( instanceKey, webApplication.twitter_instances, TwitterAuthentication(Configuration.CONSUMER_TOKEN, Configuration.CONSUMER_SECRET, oauthToken, oauthSecret), unicode(geographicSetupString), keywords, instanceSetupCode, startTime, temporal) # can delete, was debugging indexes. if args.rebuild_instance_indexes: logger.info('Dropping indexes of instance %s' % instanceKey) getUserCollection(instanceKey).drop_indexes() getTweetCollection(instanceKey).drop_indexes() count[0] += 1
def addTemporalEntry(self, temporalCollection, timeId, userProviderId, userPlaceId, followerProviderId, followerPlaceId, followerPlaceType): if self.is_shutdown: return tupleUserCacheId = GeocodeResultAbstract.buildCacheIdTuple(userProviderId, userPlaceId) dictUserCacheId = GeocodeResultAbstract.buildCacheId(userProviderId, userPlaceId) lastTimeId = self.last_temporal_time_id_by_source.get(tupleUserCacheId,None) destination = '%s_%s' % (followerPlaceType, followerPlaceId) addTemporalEntry(temporalCollection, lastTimeId, timeId, dictUserCacheId, destination, followerProviderId) self.last_temporal_time_id_by_source[tupleUserCacheId] = timeId setInstanceTemporalSourceLastTime(self.instance_key, userProviderId, userPlaceId, timeId)
def addTemporalEntry(self, temporalCollection, timeId, userProviderId, userPlaceId, followerProviderId, followerPlaceId, followerPlaceType): if self.is_shutdown: return tupleUserCacheId = GeocodeResultAbstract.buildCacheIdTuple(userProviderId, userPlaceId) dictUserCacheId = GeocodeResultAbstract.buildCacheId(userProviderId, userPlaceId) lastTimeId = self.last_temporal_time_id_by_source.get(tupleUserCacheId,None) destination = '%s_%s' % (followerPlaceType, followerPlaceId) addTemporalEntry(temporalCollection, lastTimeId, timeId, dictUserCacheId, destination, followerProviderId) self.last_temporal_time_id_by_source[tupleUserCacheId] = timeId setInstanceTemporalSourceLastTime(self.instance_key, userProviderId, userPlaceId, timeId)
def onInstanceLoadFunc(instanceKey, oauthToken, oauthSecret, geographicSetupString, keywords, instanceSetupCode, startTime, temporalLastTimeId, count = count): temporal = dict() for providerId, value in temporalLastTimeId.iteritems(): providerId = int(providerId) for placeId, timeId in value.iteritems(): placeId = int(placeId) timeId = int(timeId) temporal[GeocodeResultAbstract.buildCacheIdTuple(providerId, placeId)] = timeId logger.debug('Loaded instance %s last temporal change source %d/%d -> %d' % (instanceKey, providerId, placeId, timeId)) TwitterInstance(instanceKey, webApplication.twitter_instances, TwitterAuthentication(Configuration.CONSUMER_TOKEN, Configuration.CONSUMER_SECRET, oauthToken, oauthSecret), unicode(geographicSetupString), keywords, instanceSetupCode, startTime, temporal, # Critical because it once worked, if it fails when we restarted # then maybe our server lost network connectivity. isCritical = True) # can delete, was debugging indexes. if args.rebuild_instance_indexes: logger.info('Dropping indexes of instance %s' % instanceKey) getUserCollection(instanceKey).drop_indexes() getTweetCollection(instanceKey).drop_indexes() count[0] += 1
def extractItemFromData(self, data, signalerKey): if signalerKey.provider_id is not None and signalerKey.place_id is not None: data = data['location_tweets'] locationCacheId = GeocodeResultAbstract.buildCacheIdTuple(signalerKey.provider_id, signalerKey.place_id) else: data = data['instance_tweets'] locationCacheId = None aux = data.get(signalerKey.instance) if aux is None: return None if locationCacheId is not None: aux = aux.get(locationCacheId) if aux is None: return None aux = aux.get('success',None) if aux is not None: assert isinstance(aux, RealtimePerformance) tweetsPerDay, tweetsPerDayUpdated = aux.tweets_per_day.time_period_count_updated(True,True,True) tweetsPerHour, tweetsPerHourUpdated = aux.tweets_per_hour.time_period_count_updated(True,True,True) tweetsPerMinute, tweetsPerMinuteUpdated = aux.tweets_per_minute.time_period_count_updated(True,True,True) else: tweetsPerDayUpdated = tweetsPerHourUpdated = tweetsPerMinuteUpdated = False tweetsPerDay = tweetsPerHour = tweetsPerMinute = None aux = data.get(signalerKey.instance) aux = aux.get('geocode_fail') if aux is not None: assert isinstance(aux, RealtimePerformance) failGeocodeTweetsPerDay, failGeocodeTweetsPerDayUpdated = aux.tweets_per_day.time_period_count_updated(True,True,True) failGeocodeTweetsPerHour, failGeocodeTweetsPerHourUpdated = aux.tweets_per_hour.time_period_count_updated(True,True,True) failGeocodeTweetsPerMinute, failGeocodeTweetsPerMinuteUpdated = aux.tweets_per_minute.time_period_count_updated(True,True,True) else: failGeocodeTweetsPerDayUpdated = failGeocodeTweetsPerHourUpdated = failGeocodeTweetsPerMinuteUpdated = False failGeocodeTweetsPerDay = failGeocodeTweetsPerHour = failGeocodeTweetsPerMinute = None if tweetsPerDayUpdated is tweetsPerHourUpdated is tweetsPerMinuteUpdated is \ failGeocodeTweetsPerDayUpdated is failGeocodeTweetsPerHourUpdated is failGeocodeTweetsPerMinuteUpdated is False: newData = False else: newData = True if tweetsPerMinute is None: tweetsPerSecond = None else: tweetsPerSecond = int(tweetsPerMinute / 60) if tweetsPerHour is None: tweetsPerHour = tweetsPerMinute * 60 if tweetsPerDay is None and tweetsPerHour is not None: tweetsPerDay = tweetsPerHour * 24 if failGeocodeTweetsPerMinute is None: failGeocodeTweetsPerSecond = None else: failGeocodeTweetsPerSecond = int(failGeocodeTweetsPerMinute / 60) if failGeocodeTweetsPerHour is None: failGeocodeTweetsPerHour = failGeocodeTweetsPerMinute * 60 if failGeocodeTweetsPerDay is None and failGeocodeTweetsPerHour is not None: failGeocodeTweetsPerDay = failGeocodeTweetsPerHour * 24 if tweetsPerSecond < 1 and tweetsPerDay > 0: tweetsPerSecond = '< 1' if tweetsPerMinute < 1 and tweetsPerDay > 0: tweetsPerMinute = '< 1' if tweetsPerHour < 1 and tweetsPerDay > 0: tweetsPerHour = '< 1' return newData, json.dumps([[tweetsPerSecond, tweetsPerMinute, tweetsPerHour, tweetsPerDay], [failGeocodeTweetsPerSecond, failGeocodeTweetsPerMinute, failGeocodeTweetsPerHour, failGeocodeTweetsPerDay]])
def continueExtractItemFromData(self, data, instanceData, signalerKey): location = signalerKey.location providerId = signalerKey.provider tup = GeocodeResultAbstract.buildCacheIdTuple(providerId,location) return set(instanceData.get(tup, dict()).keys())