def func(templateArguments, instance): dataType = parseString(request.GET.type,['tweet','user']) start_epoch = parseInteger(request.GET.start_epoch) end_epoch = parseInteger(request.GET.end_epoch) page_num = parseInteger(request.GET.page) place_id = parseInteger(request.GET.place_id) provider_id = parseInteger(request.GET.provider_id) projection_type = parseString(request.GET.projection_type) followee = parseInteger(request.GET.followee) cache_id = GeocodeResultAbstract.buildCacheId(provider_id, place_id) if dataType is None: return redirect_problem('type is a required argument') if page_num is None: page_num = 0 data = [] if dataType == 'tweet': tweets = readTweetsFromCache(None, instance, cache_id, start_epoch, end_epoch, page_num, TwitterCachePage.PAGE_SIZE_FULL_DATA) if tweets is not None: for tweet in tweets: assert isinstance(tweet, Tweet) userHtml = UserInformationPage.getPageLinkImage(instance, tweet.user, target='_self') data.append([ tweet.created_at, userHtml, tweet.user.location_text, tweet.text ]) elif dataType == 'user': if len(projection_type) == 0: projection = None pageSize = TwitterCachePage.PAGE_SIZE_FULL_DATA elif projection_type == 'name-only': projection = UserProjection.IdNameImage() pageSize = TwitterCachePage.PAGE_SIZE_ID_NAME_DATA else: return redirect_problem('Unsupported projection type: %s' % projection_type) if followee is None: return redirect_problem('Followee is required') users = readUsersFromCache(None, instance, cache_id, start_epoch, end_epoch, page_num, pageSize, followee, userProjection=projection) if users is not None: for user in users: assert isinstance(user, User) data.append([user.id, user.name, user.profile_image_url, UserInformationPage.link_info.getPageLink(instance, user.id)]) return {'json' : data}
def func(templateArguments): argList = list() instanceList = self.application.twitter_instances.getInstanceList() # todo remove this, this generates more instances for debugging. # if len(instanceList) > 0: # count = 0 # while count < 7: # instanceList.append(instanceList[0]) # count += 1 # todo end of remove this. numInstances = len(instanceList) numInstancesPerPage = Configuration.NUM_LANDING_PAGE_INSTANCES_PER_PAGE if numInstances > 3 or numInstances < 1: numInstancesPerRow = Configuration.NUM_LANDING_PAGE_INSTANCES_PER_ROW else: numInstancesPerRow = numInstances thumbnailSpan = 12 / numInstancesPerRow pageNum = parseInteger(request.GET.page, 0, numInstances, 0) startIndex = numInstancesPerPage * pageNum endIndex = startIndex + numInstancesPerPage numPages = int( ceil(float(numInstances) / float(numInstancesPerPage))) for instance in instanceList[startIndex:endIndex]: assert isinstance(instance, TwitterInstance) argList.append((LocationsMapPage.link_info.getPageLink( instance.instance_key), instance.getShortDescription(True), instance.geographic_setup_string)) # Split into rows. argList = splitList(argList, numInstancesPerRow) templateArguments.update({'instances': argList}) # Pagination startSmallPageIndex = pageNum - 5 endSmallPageIndex = 0 if startSmallPageIndex < 0: endSmallPageIndex -= startSmallPageIndex startSmallPageIndex = 0 endSmallPageIndex += (pageNum + 5) offEndBy = endSmallPageIndex - numPages if offEndBy > 0: startSmallPageIndex -= offEndBy if startSmallPageIndex < 0: startSmallPageIndex = 0 endSmallPageIndex = numPages pagination = list() for n in range(startSmallPageIndex, endSmallPageIndex): isCurrentPage = pageNum == n pagination.append( (n + 1, LandingPage.getLandingPageLink(n), isCurrentPage)) step = (numPages - endSmallPageIndex) / 5 if step > 0: for n in range(endSmallPageIndex, numPages, step): pagination.append( (n + 1, LandingPage.getLandingPageLink(n), False)) if pageNum < numPages - 1: templateArguments.update({ 'pagination_next': LandingPage.getLandingPageLink(pageNum + 1) }) if pageNum > 0: templateArguments.update({ 'pagination_previous': LandingPage.getLandingPageLink(pageNum - 1) }) maxInactiveTime = Configuration.MAX_INSTANCE_INACTIVE_TIME_MS maxTotalTime = Configuration.MAX_INSTANCE_TOTAL_AGE_MS templateArguments.update({ 'pagination': pagination, 'thumbnail_span': thumbnailSpan, 'build_instance_link': OAuthSignIn.link_info.getPageLink(), 'maxInactiveTime': self.getHumanTime(maxInactiveTime), 'maxTotalTime': self.getHumanTime(maxTotalTime) }) return template('landing-page.tpl', templateArguments)
def func(templateArguments, instance): twitterInstance = self.application.twitter_instances.getInstanceByInstanceKey( instance) if twitterInstance is None: return dict() baseEpoch = twitterInstance.constructed_at start_epoch = parseInteger(request.GET.start_epoch, default=None) end_epoch = parseInteger(request.GET.end_epoch, default=None) source_place_id = parseInteger(request.GET.source_place_id) source_provider_id = parseInteger(request.GET.source_provider_id) if source_place_id is None: logger.error( 'Invalid place ID specified while providing influence data: %s' % unicode(source_place_id)) return dict() source_cache_id = GeocodeResultAbstract.buildCacheId( source_provider_id, source_place_id) temporalCollection = getTemporalInfluenceCollection(instance) if start_epoch is not None: start_time_id = getTimeIdFromTimestamp( baseEpoch, Configuration.TEMPORAL_STEP, start_epoch) else: start_time_id = None if end_epoch is not None: end_time_id = getTimeIdFromTimestamp( baseEpoch, Configuration.TEMPORAL_STEP, end_epoch) else: end_time_id = None timerMs = getEpochMs() cacheData = getTemporalRange(temporalCollection, start_time_id, end_time_id, source_cache_id, preciseFromBack=True, preciseFromFront=True) logger.info('Took %dms to read temporal range data' % (getEpochMs() - timerMs)) timerMs = getEpochMs() geocodeByPlaceType = dict() totalsByPlaceType = dict() if cacheData is not None: for providerId, providerIdData in cacheData.iteritems(): providerId = int(providerId) for destination, count in providerIdData.iteritems(): split = destination.split('_') placeType = int(split[0]) placeId = int(split[1]) record = [placeId, providerId, None, None, count, None] geocodeByPlaceType.setdefault(placeType, list()).append(record) # Process only the records we are going to display. for placeType, records in geocodeByPlaceType.iteritems(): aux = sorted(records, key=lambda x: x[4], reverse=True) aux = aux[:Configuration. DISPLAY_MAX_NUM_INFLUENCE_RECORDS_PER_PLACE_TYPE] geocodeByPlaceType[placeType] = aux for record in aux: cacheId = GeocodeResultAbstract.buildCacheId( record[1], record[0]) geocode = geocodeFromCacheById(cacheId) record[2] = geocode.display_name record[3] = geocode.coordinate count = record[4] record[5] = geocode.bounding_box totalsByPlaceType[placeType] = totalsByPlaceType.get( placeType, 0) + count def getResultPart(placeType): return { 'geocode_list': geocodeByPlaceType.get(placeType, list()), 'total': totalsByPlaceType.get(placeType, 0) } resultData = dict() resultData['city'] = getResultPart( GeocodeResultAbstract.PlaceTypes.CITY) resultData['country'] = getResultPart( GeocodeResultAbstract.PlaceTypes.COUNTRY) resultData['continent'] = getResultPart( GeocodeResultAbstract.PlaceTypes.CONTINENT) logger.info('Took %dms to build temporal range result data' % (getEpochMs() - timerMs)) return {'json': resultData}
def func(templateArguments, instance): dataType = parseString(request.GET.type, ['tweet', 'user']) start_epoch = parseInteger(request.GET.start_epoch) end_epoch = parseInteger(request.GET.end_epoch) page_num = parseInteger(request.GET.page) place_id = parseInteger(request.GET.place_id) provider_id = parseInteger(request.GET.provider_id) projection_type = parseString(request.GET.projection_type) followee = parseInteger(request.GET.followee) cache_id = GeocodeResultAbstract.buildCacheId( provider_id, place_id) if dataType is None: return redirect_problem('type is a required argument') if page_num is None: page_num = 0 data = [] if dataType == 'tweet': tweets = readTweetsFromCache( None, instance, cache_id, start_epoch, end_epoch, page_num, TwitterCachePage.PAGE_SIZE_FULL_DATA) if tweets is not None: for tweet in tweets: assert isinstance(tweet, Tweet) userHtml = UserInformationPage.getPageLinkImage( instance, tweet.user, target='_self') data.append([ tweet.created_at, userHtml, tweet.user.location_text, tweet.text ]) elif dataType == 'user': if len(projection_type) == 0: projection = None pageSize = TwitterCachePage.PAGE_SIZE_FULL_DATA elif projection_type == 'name-only': projection = UserProjection.IdNameImage() pageSize = TwitterCachePage.PAGE_SIZE_ID_NAME_DATA else: return redirect_problem('Unsupported projection type: %s' % projection_type) if followee is None: return redirect_problem('Followee is required') users = readUsersFromCache(None, instance, cache_id, start_epoch, end_epoch, page_num, pageSize, followee, userProjection=projection) if users is not None: for user in users: assert isinstance(user, User) data.append([ user.id, user.name, user.profile_image_url, UserInformationPage.link_info.getPageLink( instance, user.id) ]) return {'json': data}
def func(templateArguments): argList = list() instanceList = self.application.twitter_instances.getInstanceList() # todo remove this, this generates more instances for debugging. # if len(instanceList) > 0: # count = 0 # while count < 7: # instanceList.append(instanceList[0]) # count += 1 # todo end of remove this. numInstances = len(instanceList) numInstancesPerPage = Configuration.NUM_LANDING_PAGE_INSTANCES_PER_PAGE if numInstances > 3 or numInstances < 1: numInstancesPerRow = Configuration.NUM_LANDING_PAGE_INSTANCES_PER_ROW else: numInstancesPerRow = numInstances thumbnailSpan = 12 / numInstancesPerRow pageNum = parseInteger(request.GET.page,0,numInstances,0) startIndex = numInstancesPerPage * pageNum endIndex = startIndex + numInstancesPerPage numPages = int(ceil(float(numInstances) / float(numInstancesPerPage))) for instance in instanceList[startIndex:endIndex]: assert isinstance(instance, TwitterInstance) argList.append((LocationsMapPage.link_info.getPageLink(instance.instance_key), instance.getShortDescription(True), instance.geographic_setup_string)) # Split into rows. argList = splitList(argList, numInstancesPerRow) templateArguments.update({'instances' : argList}) # Pagination startSmallPageIndex = pageNum - 5 endSmallPageIndex = 0 if startSmallPageIndex < 0: endSmallPageIndex -= startSmallPageIndex startSmallPageIndex = 0 endSmallPageIndex += (pageNum + 5) offEndBy = endSmallPageIndex - numPages if offEndBy > 0: startSmallPageIndex -= offEndBy if startSmallPageIndex < 0: startSmallPageIndex = 0 endSmallPageIndex = numPages pagination = list() for n in range(startSmallPageIndex, endSmallPageIndex): isCurrentPage = pageNum == n pagination.append((n+1,LandingPage.getLandingPageLink(n),isCurrentPage)) step = (numPages - endSmallPageIndex) / 5 if step > 0: for n in range(endSmallPageIndex,numPages,step): pagination.append((n+1,LandingPage.getLandingPageLink(n),False)) if pageNum < numPages - 1: templateArguments.update({'pagination_next' : LandingPage.getLandingPageLink(pageNum + 1)}) if pageNum > 0: templateArguments.update({'pagination_previous' : LandingPage.getLandingPageLink(pageNum - 1)}) maxInactiveTime = Configuration.MAX_INSTANCE_INACTIVE_TIME_MS maxTotalTime = Configuration.MAX_INSTANCE_TOTAL_AGE_MS templateArguments.update({'pagination' : pagination, 'thumbnail_span' : thumbnailSpan, 'build_instance_link' : OAuthSignIn.link_info.getPageLink(), 'maxInactiveTime' : self.getHumanTime(maxInactiveTime), 'maxTotalTime' : self.getHumanTime(maxTotalTime)}) return template('landing-page.tpl',templateArguments)
def func(templateArguments, instance): twitterInstance = self.application.twitter_instances.getInstanceByInstanceKey(instance) if twitterInstance is None: return dict() baseEpoch = twitterInstance.constructed_at start_epoch = parseInteger(request.GET.start_epoch, default=None) end_epoch = parseInteger(request.GET.end_epoch, default=None) source_place_id = parseInteger(request.GET.source_place_id) source_provider_id = parseInteger(request.GET.source_provider_id) if source_place_id is None: logger.error('Invalid place ID specified while providing influence data: %s' % unicode(source_place_id)) return dict() source_cache_id = GeocodeResultAbstract.buildCacheId(source_provider_id, source_place_id) temporalCollection = getTemporalInfluenceCollection(instance) if start_epoch is not None: start_time_id = getTimeIdFromTimestamp(baseEpoch, Configuration.TEMPORAL_STEP, start_epoch) else: start_time_id = None if end_epoch is not None: end_time_id = getTimeIdFromTimestamp(baseEpoch, Configuration.TEMPORAL_STEP, end_epoch) else: end_time_id = None timerMs = getEpochMs() cacheData = getTemporalRange(temporalCollection, start_time_id, end_time_id, source_cache_id, preciseFromBack=True, preciseFromFront=True) logger.info('Took %dms to read temporal range data' % (getEpochMs() - timerMs)) timerMs = getEpochMs() geocodeByPlaceType = dict() totalsByPlaceType = dict() if cacheData is not None: for providerId, providerIdData in cacheData.iteritems(): providerId = int(providerId) for destination, count in providerIdData.iteritems(): split = destination.split('_') placeType = int(split[0]) placeId = int(split[1]) record = [placeId, providerId, None, None, count, None] geocodeByPlaceType.setdefault(placeType,list()).append(record) # Process only the records we are going to display. for placeType, records in geocodeByPlaceType.iteritems(): aux = sorted(records, key=lambda x: x[4], reverse=True) aux = aux[:Configuration.DISPLAY_MAX_NUM_INFLUENCE_RECORDS_PER_PLACE_TYPE] geocodeByPlaceType[placeType] = aux for record in aux: cacheId = GeocodeResultAbstract.buildCacheId(record[1], record[0]) geocode = geocodeFromCacheById(cacheId) record[2] = geocode.display_name record[3] = geocode.coordinate count = record[4] record[5] = geocode.bounding_box totalsByPlaceType[placeType] = totalsByPlaceType.get(placeType,0) + count def getResultPart(placeType): return {'geocode_list' : geocodeByPlaceType.get(placeType,list()), 'total' : totalsByPlaceType.get(placeType, 0)} resultData = dict() resultData['city'] = getResultPart(GeocodeResultAbstract.PlaceTypes.CITY) resultData['country'] = getResultPart(GeocodeResultAbstract.PlaceTypes.COUNTRY) resultData['continent'] = getResultPart(GeocodeResultAbstract.PlaceTypes.CONTINENT) logger.info('Took %dms to build temporal range result data' % (getEpochMs() - timerMs)) return {'json' : resultData}
if args.clear_geocode_data: logger.info('Clearing geocode data..') db = getDatabase() db.place.drop() db.geocode.drop() logger.info('Geocode data cleared') if args.setup_instance_code: print 'Running in setup instance code mode' try: while True: print 'Setting up instance code...' result = raw_input('Enter the maximum number of instances that can consume this code at any one time: ') result = parseInteger(result,0,default=1) code = getCode(result) print 'Instance code with ID: \'%s\' setup, with consume limit: %d' % (code, result) except KeyboardInterrupt: pass print 'Finished!' sys.exit(0) if args.show_database_storage_usage: f = open('db_results.txt', 'w') sys.stdout = f theStep = 1000 * 60 * 15 print 'Running in show database storage mode, update every %dms' % theStep
def manageSocket(self, webSocket, tupleArguments, socketId): instanceId = tupleArguments[0] mainControl = webSocket.controls[self.key] assert isinstance(mainControl, DocumentControl) bytesPerBatch = parseInteger(request.GET.batchSizeBytes, maximum=1024 * 1024 * 256, default=1024 * 1024 * 1) tweetInfo = parseBoolean(request.GET.tweet_info, False) followerInfo = parseBoolean(request.GET.follower_info_full, False) followerInfoShort = parseBoolean(request.GET.follower_info_short, False) providerId = parseInteger(request.GET.provider_id) placeId = parseInteger(request.GET.place_id) startEpoch = parseInteger(request.GET.start_epoch) endEpoch = parseInteger(request.GET.end_epoch) if placeId is not None and providerId is not None: placeCacheId = GeocodeResultAbstract.buildCacheId(providerId, placeId) else: placeCacheId = None if followerInfo: tweetInfo = False followerInfoShort = False elif tweetInfo: followerInfo = False followerInfoShort = False elif followerInfoShort: followerInfo = False tweetInfo = False else: followerInfo = True userTunnelId = 'user_tunnel' tweetTunnelId = None if tweetInfo: tweetTunnelId = 'tweet_tunnel' def openRequiredTunnels(): if tweetInfo: return self.openTunnels(webSocket) else: return self.openTunnel(userTunnelId, webSocket) if not openRequiredTunnels(): logger.error('Failed to open initial tunnels') return False if tweetInfo: followerIdsFlag = False followeeIdsFlag = False analysisFlag = False isFollowersLoadedRequirement = None associatedWithTweetRequirement = True recursiveCacheFlag = False followerIdsProjection = None outputType = 1 # for csv. elif followerInfo: followerIdsFlag = True followeeIdsFlag = True analysisFlag = True isFollowersLoadedRequirement = True associatedWithTweetRequirement = None recursiveCacheFlag = True followerIdsProjection = None # this gives us all data on each follower. outputType = 2 elif followerInfoShort: followerIdsFlag = True followeeIdsFlag = True followerIdsProjection = NoQueryProjection() analysisFlag = True isFollowersLoadedRequirement = True associatedWithTweetRequirement = None recursiveCacheFlag = True outputType = 3 else: raise NotImplementedError() userProjection = UserProjection(True, True, None, True, followerIdsFlag, followerIdsProjection, followeeIdsFlag, UserProjection.Id(), True, False, False, True, True, False, False, False, False, analysisFlag) isFirstIteration = [True] twitterInstance = self.application.twitter_instances.getInstanceByInstanceKey(instanceId) if twitterInstance is None: return False twitterSession = twitterInstance.twitter_thread.twitter_session progressBarTotalId = 'progress-bar-total' progressBarCurrentBatchId = 'progress-bar-current-batch' signaler = EventSignaler(self.key, [webSocket]) updateProgressBarFreq = Timer(400,True) def sendData(tunnelId, data): self.sendDataOnTunnel(webSocket, tunnelId, (unicode(data) + '\r\n')) def sendHeader(): sendData(userTunnelId, getUserHeader(outputType)) if tweetTunnelId is not None: sendData(tweetTunnelId, getTweetHeader()) def doProgressBarChange(percentage, progressBarId): mainControl.executeJavascript('$("#%s").width("%.3f%%");' % (progressBarId, percentage)) sendHeader() counter = [0] previousCounter = [0] def updateSocket(controls, data, bytesCounter=counter, bytesPerBatch=bytesPerBatch, previousCounter=previousCounter, isFirstIteration=isFirstIteration): user = data['user_data'] tweet = data['tweet_data'] percentage = data['percentage'] isFinished = data['isFinished'] control = controls[self.key] assert isinstance(control, DocumentControl) def updateProgressBars(): previousCounter[0] = thisCounter = bytesCounter[0] percentageCurrentBatch = float(thisCounter) / float(bytesPerBatch) * 100 percentageTotal = percentage if percentageTotal >= 100: percentageCurrentBatch = 100 if isFirstIteration[0] and percentageCurrentBatch < percentageTotal: percentageCurrentBatch = percentageTotal doProgressBarChange(percentageTotal, progressBarTotalId) doProgressBarChange(percentageCurrentBatch, progressBarCurrentBatchId) if previousCounter[0] != bytesCounter[0] and updateProgressBarFreq.ticked(): updateProgressBars() dataToSendToClient = '' if user is not None: assert isinstance(user,User) dataToSendToClient = getUserRepresentation(user, outputType) sendData(userTunnelId, dataToSendToClient) if tweet is not None: assert isinstance(tweet, Tweet) dataToSendToClient = getTweetRepresentation(tweet) sendData(tweetTunnelId, dataToSendToClient) dataLength = len(dataToSendToClient) bytesCounter[0] += dataLength if bytesCounter[0] > bytesPerBatch or isFinished: updateProgressBars() isFirstIteration[0] = False bytesCounter[0] = 0 mainControl.executeJavascript('onBatchEnd();') self.closeTunnels(webSocket) if not isFinished: logger.debug('Waiting to receive next data provider') if not openRequiredTunnels(): logger.warning('Failed to reinitialize tunnel slots') webSocket.cleanup() return sendHeader() else: mainControl.executeJavascript('onFinished();') webSocket.cleanup() def onCacheIteration(iteration, total, isFinished, data, iteratorId): # Don't write followee data to output as it would duplicate alot of data. if iteratorId == 'followee': data = None running = not webSocket.is_cleaned_up if running: # We need to do this so that if the client closes the socket we are notified. webSocket.pingFreqLimited() percentage = getPercentage(iteration, total) dataId = None if data is not None: dataId = data.id #logger.info('iteration %.2f of %.2f (%.1f%%) - it: %s, userId: %s' % (iteration, total, percentage,iteratorId,dataId)) user = None tweet = None if data is None: pass elif isinstance(data, User): user = data elif isinstance(data, Tweet): tweet = data if tweet.has_user: user = tweet.user else: logger.error('Invalid data from cache, type: %s' % type(data)) return running signaler.signalEvent({SignalActions.SOCKET: updateSocket, 'percentage' : percentage, 'user_data' : user, 'tweet_data' : tweet, 'isFinished' : isFinished}) gevent.sleep(0) else: logger.debug('Ending cache download prematurely') return running logger.debug('Starting to read data from cache...') # This makes sure the search is finite. epochNow = getEpochMs() if endEpoch is None or endEpoch > epochNow: endEpoch = epochNow if followerInfo or followerInfoShort: readUsersFromCache(twitterSession, instanceId, placeId = placeCacheId, epochMsStartRange=startEpoch, epochMsEndRange=endEpoch, isFollowersLoadedRequirement=isFollowersLoadedRequirement, associatedWithTweetRequirement=associatedWithTweetRequirement, onIterationFunc=onCacheIteration, recursive=recursiveCacheFlag, userProjection=userProjection) else: readTweetsFromCache(twitterSession, instanceId, placeId = placeCacheId, epochMsStartRange=startEpoch, epochMsEndRange=endEpoch, onIterationFunc=onCacheIteration, retrieveUserData=True, userProjection=userProjection) # We want to cleanup everything now since we are done. return False
def getArguments(self): providerId = parseInteger(request.GET.provider_id) placeId = parseInteger(request.GET.place_id) return {'provider_id' : providerId, 'place_id' : placeId}