def func(templateArguments, instance, location, provider): twitterInstance = self.application.twitter_instances.getInstanceByInstanceKey( instance) if twitterInstance is None: abort(404, "No active search stream found at this address") geocode = geocodeFromCacheById( GeocodeResultAbstract.buildCacheId(provider, location)) assert geocode is not None instanceDescription = getInstanceDescription(twitterInstance) instanceLink = getInstanceLink(twitterInstance) homeLink = getHomeLink(Configuration.PROJECT_NAME) templateArguments.update( { 'home_link': homeLink, 'instance': instance, 'location': location, 'provider': provider, 'instance_link': instanceLink, 'instance_description': instanceDescription, 'place': geocode.display_name_short, 'place_coord': geocode.coordinate, 'startEpoch': twitterInstance.constructed_at, 'server_current_epoch': getEpochMs(), 'max_tweets': Configuration.MAX_CLIENT_LIVE_TWEETS } ) # Client needs to offset to this epoch in case its clock is wrong. if geocode.has_bounding_box: templateArguments.update( {'place_bounding_box': geocode.bounding_box}) if geocode.has_country: templateArguments.update({ 'place_country_link': LocationsPage.link_info.getPageLink( instance, geocode.country.place_id, geocode.country.provider_id) }) templateArguments.update( {'place_country': geocode.country.display_name_short}) if geocode.has_continent: templateArguments.update({ 'place_continent_link': LocationsPage.link_info.getPageLink( instance, geocode.continent.place_id, geocode.continent.provider_id) }) templateArguments.update( {'place_continent': geocode.continent.display_name_short}) return template('location.tpl', templateArguments)
def func(templateArguments, instance): dataType = parseString(request.GET.type,['tweet','user']) start_epoch = parseInteger(request.GET.start_epoch) end_epoch = parseInteger(request.GET.end_epoch) page_num = parseInteger(request.GET.page) place_id = parseInteger(request.GET.place_id) provider_id = parseInteger(request.GET.provider_id) projection_type = parseString(request.GET.projection_type) followee = parseInteger(request.GET.followee) cache_id = GeocodeResultAbstract.buildCacheId(provider_id, place_id) if dataType is None: return redirect_problem('type is a required argument') if page_num is None: page_num = 0 data = [] if dataType == 'tweet': tweets = readTweetsFromCache(None, instance, cache_id, start_epoch, end_epoch, page_num, TwitterCachePage.PAGE_SIZE_FULL_DATA) if tweets is not None: for tweet in tweets: assert isinstance(tweet, Tweet) userHtml = UserInformationPage.getPageLinkImage(instance, tweet.user, target='_self') data.append([ tweet.created_at, userHtml, tweet.user.location_text, tweet.text ]) elif dataType == 'user': if len(projection_type) == 0: projection = None pageSize = TwitterCachePage.PAGE_SIZE_FULL_DATA elif projection_type == 'name-only': projection = UserProjection.IdNameImage() pageSize = TwitterCachePage.PAGE_SIZE_ID_NAME_DATA else: return redirect_problem('Unsupported projection type: %s' % projection_type) if followee is None: return redirect_problem('Followee is required') users = readUsersFromCache(None, instance, cache_id, start_epoch, end_epoch, page_num, pageSize, followee, userProjection=projection) if users is not None: for user in users: assert isinstance(user, User) data.append([user.id, user.name, user.profile_image_url, UserInformationPage.link_info.getPageLink(instance, user.id)]) return {'json' : data}
def addTemporalEntry(self, temporalCollection, timeId, userProviderId, userPlaceId, followerProviderId, followerPlaceId, followerPlaceType): if self.is_shutdown: return tupleUserCacheId = GeocodeResultAbstract.buildCacheIdTuple(userProviderId, userPlaceId) dictUserCacheId = GeocodeResultAbstract.buildCacheId(userProviderId, userPlaceId) lastTimeId = self.last_temporal_time_id_by_source.get(tupleUserCacheId,None) destination = '%s_%s' % (followerPlaceType, followerPlaceId) addTemporalEntry(temporalCollection, lastTimeId, timeId, dictUserCacheId, destination, followerProviderId) self.last_temporal_time_id_by_source[tupleUserCacheId] = timeId setInstanceTemporalSourceLastTime(self.instance_key, userProviderId, userPlaceId, timeId)
def addTemporalEntry(self, temporalCollection, timeId, userProviderId, userPlaceId, followerProviderId, followerPlaceId, followerPlaceType): if self.is_shutdown: return tupleUserCacheId = GeocodeResultAbstract.buildCacheIdTuple(userProviderId, userPlaceId) dictUserCacheId = GeocodeResultAbstract.buildCacheId(userProviderId, userPlaceId) lastTimeId = self.last_temporal_time_id_by_source.get(tupleUserCacheId,None) destination = '%s_%s' % (followerPlaceType, followerPlaceId) addTemporalEntry(temporalCollection, lastTimeId, timeId, dictUserCacheId, destination, followerProviderId) self.last_temporal_time_id_by_source[tupleUserCacheId] = timeId setInstanceTemporalSourceLastTime(self.instance_key, userProviderId, userPlaceId, timeId)
def func(templateArguments, instance, location, provider): twitterInstance = self.application.twitter_instances.getInstanceByInstanceKey(instance) if twitterInstance is None: abort(404, "No active search stream found at this address") geocode = geocodeFromCacheById(GeocodeResultAbstract.buildCacheId(provider,location)) assert geocode is not None instanceDescription = getInstanceDescription(twitterInstance) instanceLink = getInstanceLink(twitterInstance) homeLink = getHomeLink(Configuration.PROJECT_NAME) templateArguments.update({'home_link' : homeLink, 'instance' : instance, 'location' : location, 'provider' : provider, 'instance_link' : instanceLink, 'instance_description' : instanceDescription, 'place' : geocode.display_name_short, 'place_coord' : geocode.coordinate, 'startEpoch' : twitterInstance.constructed_at, 'server_current_epoch' : getEpochMs(), 'max_tweets' : Configuration.MAX_CLIENT_LIVE_TWEETS}) # Client needs to offset to this epoch in case its clock is wrong. if geocode.has_bounding_box: templateArguments.update({'place_bounding_box' : geocode.bounding_box}) if geocode.has_country: templateArguments.update({'place_country_link' : LocationsPage.link_info.getPageLink(instance, geocode.country.place_id, geocode.country.provider_id)}) templateArguments.update({'place_country' : geocode.country.display_name_short}) if geocode.has_continent: templateArguments.update({'place_continent_link' : LocationsPage.link_info.getPageLink(instance, geocode.continent.place_id, geocode.continent.provider_id)}) templateArguments.update({'place_continent' : geocode.continent.display_name_short}) return template('location.tpl', templateArguments)
def func(templateArguments, instance): twitterInstance = self.application.twitter_instances.getInstanceByInstanceKey( instance) if twitterInstance is None: return dict() baseEpoch = twitterInstance.constructed_at start_epoch = parseInteger(request.GET.start_epoch, default=None) end_epoch = parseInteger(request.GET.end_epoch, default=None) source_place_id = parseInteger(request.GET.source_place_id) source_provider_id = parseInteger(request.GET.source_provider_id) if source_place_id is None: logger.error( 'Invalid place ID specified while providing influence data: %s' % unicode(source_place_id)) return dict() source_cache_id = GeocodeResultAbstract.buildCacheId( source_provider_id, source_place_id) temporalCollection = getTemporalInfluenceCollection(instance) if start_epoch is not None: start_time_id = getTimeIdFromTimestamp( baseEpoch, Configuration.TEMPORAL_STEP, start_epoch) else: start_time_id = None if end_epoch is not None: end_time_id = getTimeIdFromTimestamp( baseEpoch, Configuration.TEMPORAL_STEP, end_epoch) else: end_time_id = None timerMs = getEpochMs() cacheData = getTemporalRange(temporalCollection, start_time_id, end_time_id, source_cache_id, preciseFromBack=True, preciseFromFront=True) logger.info('Took %dms to read temporal range data' % (getEpochMs() - timerMs)) timerMs = getEpochMs() geocodeByPlaceType = dict() totalsByPlaceType = dict() if cacheData is not None: for providerId, providerIdData in cacheData.iteritems(): providerId = int(providerId) for destination, count in providerIdData.iteritems(): split = destination.split('_') placeType = int(split[0]) placeId = int(split[1]) record = [placeId, providerId, None, None, count, None] geocodeByPlaceType.setdefault(placeType, list()).append(record) # Process only the records we are going to display. for placeType, records in geocodeByPlaceType.iteritems(): aux = sorted(records, key=lambda x: x[4], reverse=True) aux = aux[:Configuration. DISPLAY_MAX_NUM_INFLUENCE_RECORDS_PER_PLACE_TYPE] geocodeByPlaceType[placeType] = aux for record in aux: cacheId = GeocodeResultAbstract.buildCacheId( record[1], record[0]) geocode = geocodeFromCacheById(cacheId) record[2] = geocode.display_name record[3] = geocode.coordinate count = record[4] record[5] = geocode.bounding_box totalsByPlaceType[placeType] = totalsByPlaceType.get( placeType, 0) + count def getResultPart(placeType): return { 'geocode_list': geocodeByPlaceType.get(placeType, list()), 'total': totalsByPlaceType.get(placeType, 0) } resultData = dict() resultData['city'] = getResultPart( GeocodeResultAbstract.PlaceTypes.CITY) resultData['country'] = getResultPart( GeocodeResultAbstract.PlaceTypes.COUNTRY) resultData['continent'] = getResultPart( GeocodeResultAbstract.PlaceTypes.CONTINENT) logger.info('Took %dms to build temporal range result data' % (getEpochMs() - timerMs)) return {'json': resultData}
def func(templateArguments, instance): dataType = parseString(request.GET.type, ['tweet', 'user']) start_epoch = parseInteger(request.GET.start_epoch) end_epoch = parseInteger(request.GET.end_epoch) page_num = parseInteger(request.GET.page) place_id = parseInteger(request.GET.place_id) provider_id = parseInteger(request.GET.provider_id) projection_type = parseString(request.GET.projection_type) followee = parseInteger(request.GET.followee) cache_id = GeocodeResultAbstract.buildCacheId( provider_id, place_id) if dataType is None: return redirect_problem('type is a required argument') if page_num is None: page_num = 0 data = [] if dataType == 'tweet': tweets = readTweetsFromCache( None, instance, cache_id, start_epoch, end_epoch, page_num, TwitterCachePage.PAGE_SIZE_FULL_DATA) if tweets is not None: for tweet in tweets: assert isinstance(tweet, Tweet) userHtml = UserInformationPage.getPageLinkImage( instance, tweet.user, target='_self') data.append([ tweet.created_at, userHtml, tweet.user.location_text, tweet.text ]) elif dataType == 'user': if len(projection_type) == 0: projection = None pageSize = TwitterCachePage.PAGE_SIZE_FULL_DATA elif projection_type == 'name-only': projection = UserProjection.IdNameImage() pageSize = TwitterCachePage.PAGE_SIZE_ID_NAME_DATA else: return redirect_problem('Unsupported projection type: %s' % projection_type) if followee is None: return redirect_problem('Followee is required') users = readUsersFromCache(None, instance, cache_id, start_epoch, end_epoch, page_num, pageSize, followee, userProjection=projection) if users is not None: for user in users: assert isinstance(user, User) data.append([ user.id, user.name, user.profile_image_url, UserInformationPage.link_info.getPageLink( instance, user.id) ]) return {'json': data}
def func(templateArguments, instance): twitterInstance = self.application.twitter_instances.getInstanceByInstanceKey(instance) if twitterInstance is None: return dict() baseEpoch = twitterInstance.constructed_at start_epoch = parseInteger(request.GET.start_epoch, default=None) end_epoch = parseInteger(request.GET.end_epoch, default=None) source_place_id = parseInteger(request.GET.source_place_id) source_provider_id = parseInteger(request.GET.source_provider_id) if source_place_id is None: logger.error('Invalid place ID specified while providing influence data: %s' % unicode(source_place_id)) return dict() source_cache_id = GeocodeResultAbstract.buildCacheId(source_provider_id, source_place_id) temporalCollection = getTemporalInfluenceCollection(instance) if start_epoch is not None: start_time_id = getTimeIdFromTimestamp(baseEpoch, Configuration.TEMPORAL_STEP, start_epoch) else: start_time_id = None if end_epoch is not None: end_time_id = getTimeIdFromTimestamp(baseEpoch, Configuration.TEMPORAL_STEP, end_epoch) else: end_time_id = None timerMs = getEpochMs() cacheData = getTemporalRange(temporalCollection, start_time_id, end_time_id, source_cache_id, preciseFromBack=True, preciseFromFront=True) logger.info('Took %dms to read temporal range data' % (getEpochMs() - timerMs)) timerMs = getEpochMs() geocodeByPlaceType = dict() totalsByPlaceType = dict() if cacheData is not None: for providerId, providerIdData in cacheData.iteritems(): providerId = int(providerId) for destination, count in providerIdData.iteritems(): split = destination.split('_') placeType = int(split[0]) placeId = int(split[1]) record = [placeId, providerId, None, None, count, None] geocodeByPlaceType.setdefault(placeType,list()).append(record) # Process only the records we are going to display. for placeType, records in geocodeByPlaceType.iteritems(): aux = sorted(records, key=lambda x: x[4], reverse=True) aux = aux[:Configuration.DISPLAY_MAX_NUM_INFLUENCE_RECORDS_PER_PLACE_TYPE] geocodeByPlaceType[placeType] = aux for record in aux: cacheId = GeocodeResultAbstract.buildCacheId(record[1], record[0]) geocode = geocodeFromCacheById(cacheId) record[2] = geocode.display_name record[3] = geocode.coordinate count = record[4] record[5] = geocode.bounding_box totalsByPlaceType[placeType] = totalsByPlaceType.get(placeType,0) + count def getResultPart(placeType): return {'geocode_list' : geocodeByPlaceType.get(placeType,list()), 'total' : totalsByPlaceType.get(placeType, 0)} resultData = dict() resultData['city'] = getResultPart(GeocodeResultAbstract.PlaceTypes.CITY) resultData['country'] = getResultPart(GeocodeResultAbstract.PlaceTypes.COUNTRY) resultData['continent'] = getResultPart(GeocodeResultAbstract.PlaceTypes.CONTINENT) logger.info('Took %dms to build temporal range result data' % (getEpochMs() - timerMs)) return {'json' : resultData}
def manageSocket(self, webSocket, tupleArguments, socketId): instanceId = tupleArguments[0] mainControl = webSocket.controls[self.key] assert isinstance(mainControl, DocumentControl) bytesPerBatch = parseInteger(request.GET.batchSizeBytes, maximum=1024 * 1024 * 256, default=1024 * 1024 * 1) tweetInfo = parseBoolean(request.GET.tweet_info, False) followerInfo = parseBoolean(request.GET.follower_info_full, False) followerInfoShort = parseBoolean(request.GET.follower_info_short, False) providerId = parseInteger(request.GET.provider_id) placeId = parseInteger(request.GET.place_id) startEpoch = parseInteger(request.GET.start_epoch) endEpoch = parseInteger(request.GET.end_epoch) if placeId is not None and providerId is not None: placeCacheId = GeocodeResultAbstract.buildCacheId(providerId, placeId) else: placeCacheId = None if followerInfo: tweetInfo = False followerInfoShort = False elif tweetInfo: followerInfo = False followerInfoShort = False elif followerInfoShort: followerInfo = False tweetInfo = False else: followerInfo = True userTunnelId = 'user_tunnel' tweetTunnelId = None if tweetInfo: tweetTunnelId = 'tweet_tunnel' def openRequiredTunnels(): if tweetInfo: return self.openTunnels(webSocket) else: return self.openTunnel(userTunnelId, webSocket) if not openRequiredTunnels(): logger.error('Failed to open initial tunnels') return False if tweetInfo: followerIdsFlag = False followeeIdsFlag = False analysisFlag = False isFollowersLoadedRequirement = None associatedWithTweetRequirement = True recursiveCacheFlag = False followerIdsProjection = None outputType = 1 # for csv. elif followerInfo: followerIdsFlag = True followeeIdsFlag = True analysisFlag = True isFollowersLoadedRequirement = True associatedWithTweetRequirement = None recursiveCacheFlag = True followerIdsProjection = None # this gives us all data on each follower. outputType = 2 elif followerInfoShort: followerIdsFlag = True followeeIdsFlag = True followerIdsProjection = NoQueryProjection() analysisFlag = True isFollowersLoadedRequirement = True associatedWithTweetRequirement = None recursiveCacheFlag = True outputType = 3 else: raise NotImplementedError() userProjection = UserProjection(True, True, None, True, followerIdsFlag, followerIdsProjection, followeeIdsFlag, UserProjection.Id(), True, False, False, True, True, False, False, False, False, analysisFlag) isFirstIteration = [True] twitterInstance = self.application.twitter_instances.getInstanceByInstanceKey(instanceId) if twitterInstance is None: return False twitterSession = twitterInstance.twitter_thread.twitter_session progressBarTotalId = 'progress-bar-total' progressBarCurrentBatchId = 'progress-bar-current-batch' signaler = EventSignaler(self.key, [webSocket]) updateProgressBarFreq = Timer(400,True) def sendData(tunnelId, data): self.sendDataOnTunnel(webSocket, tunnelId, (unicode(data) + '\r\n')) def sendHeader(): sendData(userTunnelId, getUserHeader(outputType)) if tweetTunnelId is not None: sendData(tweetTunnelId, getTweetHeader()) def doProgressBarChange(percentage, progressBarId): mainControl.executeJavascript('$("#%s").width("%.3f%%");' % (progressBarId, percentage)) sendHeader() counter = [0] previousCounter = [0] def updateSocket(controls, data, bytesCounter=counter, bytesPerBatch=bytesPerBatch, previousCounter=previousCounter, isFirstIteration=isFirstIteration): user = data['user_data'] tweet = data['tweet_data'] percentage = data['percentage'] isFinished = data['isFinished'] control = controls[self.key] assert isinstance(control, DocumentControl) def updateProgressBars(): previousCounter[0] = thisCounter = bytesCounter[0] percentageCurrentBatch = float(thisCounter) / float(bytesPerBatch) * 100 percentageTotal = percentage if percentageTotal >= 100: percentageCurrentBatch = 100 if isFirstIteration[0] and percentageCurrentBatch < percentageTotal: percentageCurrentBatch = percentageTotal doProgressBarChange(percentageTotal, progressBarTotalId) doProgressBarChange(percentageCurrentBatch, progressBarCurrentBatchId) if previousCounter[0] != bytesCounter[0] and updateProgressBarFreq.ticked(): updateProgressBars() dataToSendToClient = '' if user is not None: assert isinstance(user,User) dataToSendToClient = getUserRepresentation(user, outputType) sendData(userTunnelId, dataToSendToClient) if tweet is not None: assert isinstance(tweet, Tweet) dataToSendToClient = getTweetRepresentation(tweet) sendData(tweetTunnelId, dataToSendToClient) dataLength = len(dataToSendToClient) bytesCounter[0] += dataLength if bytesCounter[0] > bytesPerBatch or isFinished: updateProgressBars() isFirstIteration[0] = False bytesCounter[0] = 0 mainControl.executeJavascript('onBatchEnd();') self.closeTunnels(webSocket) if not isFinished: logger.debug('Waiting to receive next data provider') if not openRequiredTunnels(): logger.warning('Failed to reinitialize tunnel slots') webSocket.cleanup() return sendHeader() else: mainControl.executeJavascript('onFinished();') webSocket.cleanup() def onCacheIteration(iteration, total, isFinished, data, iteratorId): # Don't write followee data to output as it would duplicate alot of data. if iteratorId == 'followee': data = None running = not webSocket.is_cleaned_up if running: # We need to do this so that if the client closes the socket we are notified. webSocket.pingFreqLimited() percentage = getPercentage(iteration, total) dataId = None if data is not None: dataId = data.id #logger.info('iteration %.2f of %.2f (%.1f%%) - it: %s, userId: %s' % (iteration, total, percentage,iteratorId,dataId)) user = None tweet = None if data is None: pass elif isinstance(data, User): user = data elif isinstance(data, Tweet): tweet = data if tweet.has_user: user = tweet.user else: logger.error('Invalid data from cache, type: %s' % type(data)) return running signaler.signalEvent({SignalActions.SOCKET: updateSocket, 'percentage' : percentage, 'user_data' : user, 'tweet_data' : tweet, 'isFinished' : isFinished}) gevent.sleep(0) else: logger.debug('Ending cache download prematurely') return running logger.debug('Starting to read data from cache...') # This makes sure the search is finite. epochNow = getEpochMs() if endEpoch is None or endEpoch > epochNow: endEpoch = epochNow if followerInfo or followerInfoShort: readUsersFromCache(twitterSession, instanceId, placeId = placeCacheId, epochMsStartRange=startEpoch, epochMsEndRange=endEpoch, isFollowersLoadedRequirement=isFollowersLoadedRequirement, associatedWithTweetRequirement=associatedWithTweetRequirement, onIterationFunc=onCacheIteration, recursive=recursiveCacheFlag, userProjection=userProjection) else: readTweetsFromCache(twitterSession, instanceId, placeId = placeCacheId, epochMsStartRange=startEpoch, epochMsEndRange=endEpoch, onIterationFunc=onCacheIteration, retrieveUserData=True, userProjection=userProjection) # We want to cleanup everything now since we are done. return False