Пример #1
0
        def func(templateArguments, instance, location, provider):
            twitterInstance = self.application.twitter_instances.getInstanceByInstanceKey(
                instance)
            if twitterInstance is None:
                abort(404, "No active search stream found at this address")

            geocode = geocodeFromCacheById(
                GeocodeResultAbstract.buildCacheId(provider, location))
            assert geocode is not None

            instanceDescription = getInstanceDescription(twitterInstance)
            instanceLink = getInstanceLink(twitterInstance)

            homeLink = getHomeLink(Configuration.PROJECT_NAME)

            templateArguments.update(
                {
                    'home_link': homeLink,
                    'instance': instance,
                    'location': location,
                    'provider': provider,
                    'instance_link': instanceLink,
                    'instance_description': instanceDescription,
                    'place': geocode.display_name_short,
                    'place_coord': geocode.coordinate,
                    'startEpoch': twitterInstance.constructed_at,
                    'server_current_epoch': getEpochMs(),
                    'max_tweets': Configuration.MAX_CLIENT_LIVE_TWEETS
                }
            )  # Client needs to offset to this epoch in case its clock is wrong.

            if geocode.has_bounding_box:
                templateArguments.update(
                    {'place_bounding_box': geocode.bounding_box})

            if geocode.has_country:
                templateArguments.update({
                    'place_country_link':
                    LocationsPage.link_info.getPageLink(
                        instance, geocode.country.place_id,
                        geocode.country.provider_id)
                })
                templateArguments.update(
                    {'place_country': geocode.country.display_name_short})

            if geocode.has_continent:
                templateArguments.update({
                    'place_continent_link':
                    LocationsPage.link_info.getPageLink(
                        instance, geocode.continent.place_id,
                        geocode.continent.provider_id)
                })
                templateArguments.update(
                    {'place_continent': geocode.continent.display_name_short})

            return template('location.tpl', templateArguments)
Пример #2
0
        def func(templateArguments, instance):
            dataType = parseString(request.GET.type,['tweet','user'])
            start_epoch = parseInteger(request.GET.start_epoch)
            end_epoch = parseInteger(request.GET.end_epoch)
            page_num = parseInteger(request.GET.page)
            place_id = parseInteger(request.GET.place_id)
            provider_id = parseInteger(request.GET.provider_id)
            projection_type = parseString(request.GET.projection_type)
            followee = parseInteger(request.GET.followee)

            cache_id = GeocodeResultAbstract.buildCacheId(provider_id, place_id)

            if dataType is None:
                return redirect_problem('type is a required argument')

            if page_num is None:
                page_num = 0

            data = []
            if dataType == 'tweet':
                tweets = readTweetsFromCache(None, instance, cache_id, start_epoch, end_epoch, page_num, TwitterCachePage.PAGE_SIZE_FULL_DATA)
                if tweets is not None:
                    for tweet in tweets:
                        assert isinstance(tweet, Tweet)
                        userHtml = UserInformationPage.getPageLinkImage(instance, tweet.user, target='_self')

                        data.append([ tweet.created_at,
                                      userHtml,
                                      tweet.user.location_text,
                                      tweet.text ])

            elif dataType == 'user':
                if len(projection_type) == 0:
                    projection = None
                    pageSize = TwitterCachePage.PAGE_SIZE_FULL_DATA
                elif projection_type == 'name-only':
                    projection = UserProjection.IdNameImage()
                    pageSize = TwitterCachePage.PAGE_SIZE_ID_NAME_DATA
                else:
                    return redirect_problem('Unsupported projection type: %s' % projection_type)

                if followee is None:
                    return redirect_problem('Followee is required')

                users = readUsersFromCache(None, instance, cache_id, start_epoch, end_epoch, page_num, pageSize, followee, userProjection=projection)
                if users is not None:
                    for user in users:
                        assert isinstance(user, User)
                        data.append([user.id,
                                     user.name,
                                     user.profile_image_url,
                                     UserInformationPage.link_info.getPageLink(instance, user.id)])

            return {'json' : data}
Пример #3
0
    def addTemporalEntry(self, temporalCollection, timeId, userProviderId, userPlaceId, followerProviderId, followerPlaceId, followerPlaceType):
        if self.is_shutdown:
            return

        tupleUserCacheId = GeocodeResultAbstract.buildCacheIdTuple(userProviderId, userPlaceId)
        dictUserCacheId = GeocodeResultAbstract.buildCacheId(userProviderId, userPlaceId)
        lastTimeId = self.last_temporal_time_id_by_source.get(tupleUserCacheId,None)

        destination = '%s_%s' % (followerPlaceType, followerPlaceId)
        addTemporalEntry(temporalCollection, lastTimeId, timeId, dictUserCacheId, destination, followerProviderId)

        self.last_temporal_time_id_by_source[tupleUserCacheId] = timeId
        setInstanceTemporalSourceLastTime(self.instance_key, userProviderId, userPlaceId, timeId)
Пример #4
0
    def addTemporalEntry(self, temporalCollection, timeId, userProviderId, userPlaceId, followerProviderId, followerPlaceId, followerPlaceType):
        if self.is_shutdown:
            return

        tupleUserCacheId = GeocodeResultAbstract.buildCacheIdTuple(userProviderId, userPlaceId)
        dictUserCacheId = GeocodeResultAbstract.buildCacheId(userProviderId, userPlaceId)
        lastTimeId = self.last_temporal_time_id_by_source.get(tupleUserCacheId,None)

        destination = '%s_%s' % (followerPlaceType, followerPlaceId)
        addTemporalEntry(temporalCollection, lastTimeId, timeId, dictUserCacheId, destination, followerProviderId)

        self.last_temporal_time_id_by_source[tupleUserCacheId] = timeId
        setInstanceTemporalSourceLastTime(self.instance_key, userProviderId, userPlaceId, timeId)
Пример #5
0
        def func(templateArguments, instance, location, provider):
            twitterInstance = self.application.twitter_instances.getInstanceByInstanceKey(instance)
            if twitterInstance is None:
                abort(404, "No active search stream found at this address")

            geocode = geocodeFromCacheById(GeocodeResultAbstract.buildCacheId(provider,location))
            assert geocode is not None

            instanceDescription = getInstanceDescription(twitterInstance)
            instanceLink = getInstanceLink(twitterInstance)

            homeLink = getHomeLink(Configuration.PROJECT_NAME)

            templateArguments.update({'home_link' : homeLink,
                                      'instance' : instance,
                                      'location' : location,
                                      'provider' : provider,
                                      'instance_link' : instanceLink,
                                      'instance_description' : instanceDescription,
                                      'place' : geocode.display_name_short,
                                      'place_coord' : geocode.coordinate,
                                      'startEpoch' : twitterInstance.constructed_at,
                                      'server_current_epoch' : getEpochMs(),
                                      'max_tweets' : Configuration.MAX_CLIENT_LIVE_TWEETS}) # Client needs to offset to this epoch in case its clock is wrong.

            if geocode.has_bounding_box:
                templateArguments.update({'place_bounding_box' : geocode.bounding_box})

            if geocode.has_country:
                templateArguments.update({'place_country_link' : LocationsPage.link_info.getPageLink(instance, geocode.country.place_id, geocode.country.provider_id)})
                templateArguments.update({'place_country' : geocode.country.display_name_short})

            if geocode.has_continent:
                templateArguments.update({'place_continent_link' : LocationsPage.link_info.getPageLink(instance, geocode.continent.place_id, geocode.continent.provider_id)})
                templateArguments.update({'place_continent' : geocode.continent.display_name_short})

            return template('location.tpl', templateArguments)
Пример #6
0
        def func(templateArguments, instance):
            twitterInstance = self.application.twitter_instances.getInstanceByInstanceKey(
                instance)
            if twitterInstance is None:
                return dict()

            baseEpoch = twitterInstance.constructed_at

            start_epoch = parseInteger(request.GET.start_epoch, default=None)
            end_epoch = parseInteger(request.GET.end_epoch, default=None)
            source_place_id = parseInteger(request.GET.source_place_id)
            source_provider_id = parseInteger(request.GET.source_provider_id)

            if source_place_id is None:
                logger.error(
                    'Invalid place ID specified while providing influence data: %s'
                    % unicode(source_place_id))
                return dict()

            source_cache_id = GeocodeResultAbstract.buildCacheId(
                source_provider_id, source_place_id)

            temporalCollection = getTemporalInfluenceCollection(instance)

            if start_epoch is not None:
                start_time_id = getTimeIdFromTimestamp(
                    baseEpoch, Configuration.TEMPORAL_STEP, start_epoch)
            else:
                start_time_id = None

            if end_epoch is not None:
                end_time_id = getTimeIdFromTimestamp(
                    baseEpoch, Configuration.TEMPORAL_STEP, end_epoch)
            else:
                end_time_id = None

            timerMs = getEpochMs()
            cacheData = getTemporalRange(temporalCollection,
                                         start_time_id,
                                         end_time_id,
                                         source_cache_id,
                                         preciseFromBack=True,
                                         preciseFromFront=True)
            logger.info('Took %dms to read temporal range data' %
                        (getEpochMs() - timerMs))

            timerMs = getEpochMs()

            geocodeByPlaceType = dict()
            totalsByPlaceType = dict()

            if cacheData is not None:
                for providerId, providerIdData in cacheData.iteritems():
                    providerId = int(providerId)

                    for destination, count in providerIdData.iteritems():
                        split = destination.split('_')
                        placeType = int(split[0])
                        placeId = int(split[1])

                        record = [placeId, providerId, None, None, count, None]

                        geocodeByPlaceType.setdefault(placeType,
                                                      list()).append(record)

                # Process only the records we are going to display.
                for placeType, records in geocodeByPlaceType.iteritems():
                    aux = sorted(records, key=lambda x: x[4], reverse=True)
                    aux = aux[:Configuration.
                              DISPLAY_MAX_NUM_INFLUENCE_RECORDS_PER_PLACE_TYPE]
                    geocodeByPlaceType[placeType] = aux

                    for record in aux:
                        cacheId = GeocodeResultAbstract.buildCacheId(
                            record[1], record[0])
                        geocode = geocodeFromCacheById(cacheId)

                        record[2] = geocode.display_name
                        record[3] = geocode.coordinate
                        count = record[4]
                        record[5] = geocode.bounding_box

                        totalsByPlaceType[placeType] = totalsByPlaceType.get(
                            placeType, 0) + count

            def getResultPart(placeType):
                return {
                    'geocode_list': geocodeByPlaceType.get(placeType, list()),
                    'total': totalsByPlaceType.get(placeType, 0)
                }

            resultData = dict()
            resultData['city'] = getResultPart(
                GeocodeResultAbstract.PlaceTypes.CITY)
            resultData['country'] = getResultPart(
                GeocodeResultAbstract.PlaceTypes.COUNTRY)
            resultData['continent'] = getResultPart(
                GeocodeResultAbstract.PlaceTypes.CONTINENT)

            logger.info('Took %dms to build temporal range result data' %
                        (getEpochMs() - timerMs))

            return {'json': resultData}
Пример #7
0
        def func(templateArguments, instance):
            dataType = parseString(request.GET.type, ['tweet', 'user'])
            start_epoch = parseInteger(request.GET.start_epoch)
            end_epoch = parseInteger(request.GET.end_epoch)
            page_num = parseInteger(request.GET.page)
            place_id = parseInteger(request.GET.place_id)
            provider_id = parseInteger(request.GET.provider_id)
            projection_type = parseString(request.GET.projection_type)
            followee = parseInteger(request.GET.followee)

            cache_id = GeocodeResultAbstract.buildCacheId(
                provider_id, place_id)

            if dataType is None:
                return redirect_problem('type is a required argument')

            if page_num is None:
                page_num = 0

            data = []
            if dataType == 'tweet':
                tweets = readTweetsFromCache(
                    None, instance, cache_id, start_epoch, end_epoch, page_num,
                    TwitterCachePage.PAGE_SIZE_FULL_DATA)
                if tweets is not None:
                    for tweet in tweets:
                        assert isinstance(tweet, Tweet)
                        userHtml = UserInformationPage.getPageLinkImage(
                            instance, tweet.user, target='_self')

                        data.append([
                            tweet.created_at, userHtml,
                            tweet.user.location_text, tweet.text
                        ])

            elif dataType == 'user':
                if len(projection_type) == 0:
                    projection = None
                    pageSize = TwitterCachePage.PAGE_SIZE_FULL_DATA
                elif projection_type == 'name-only':
                    projection = UserProjection.IdNameImage()
                    pageSize = TwitterCachePage.PAGE_SIZE_ID_NAME_DATA
                else:
                    return redirect_problem('Unsupported projection type: %s' %
                                            projection_type)

                if followee is None:
                    return redirect_problem('Followee is required')

                users = readUsersFromCache(None,
                                           instance,
                                           cache_id,
                                           start_epoch,
                                           end_epoch,
                                           page_num,
                                           pageSize,
                                           followee,
                                           userProjection=projection)
                if users is not None:
                    for user in users:
                        assert isinstance(user, User)
                        data.append([
                            user.id, user.name, user.profile_image_url,
                            UserInformationPage.link_info.getPageLink(
                                instance, user.id)
                        ])

            return {'json': data}
Пример #8
0
        def func(templateArguments, instance):
            twitterInstance = self.application.twitter_instances.getInstanceByInstanceKey(instance)
            if twitterInstance is None:
                return dict()

            baseEpoch = twitterInstance.constructed_at

            start_epoch = parseInteger(request.GET.start_epoch, default=None)
            end_epoch = parseInteger(request.GET.end_epoch, default=None)
            source_place_id = parseInteger(request.GET.source_place_id)
            source_provider_id = parseInteger(request.GET.source_provider_id)

            if source_place_id is None:
                logger.error('Invalid place ID specified while providing influence data: %s' % unicode(source_place_id))
                return dict()

            source_cache_id = GeocodeResultAbstract.buildCacheId(source_provider_id, source_place_id)

            temporalCollection = getTemporalInfluenceCollection(instance)

            if start_epoch is not None:
                start_time_id = getTimeIdFromTimestamp(baseEpoch, Configuration.TEMPORAL_STEP, start_epoch)
            else:
                start_time_id = None

            if end_epoch is not None:
                end_time_id = getTimeIdFromTimestamp(baseEpoch, Configuration.TEMPORAL_STEP, end_epoch)
            else:
                end_time_id = None

            timerMs = getEpochMs()
            cacheData = getTemporalRange(temporalCollection, start_time_id, end_time_id, source_cache_id, preciseFromBack=True, preciseFromFront=True)
            logger.info('Took %dms to read temporal range data' % (getEpochMs() - timerMs))

            timerMs = getEpochMs()

            geocodeByPlaceType = dict()
            totalsByPlaceType = dict()

            if cacheData is not None:
                for providerId, providerIdData in cacheData.iteritems():
                    providerId = int(providerId)

                    for destination, count in providerIdData.iteritems():
                        split = destination.split('_')
                        placeType = int(split[0])
                        placeId = int(split[1])

                        record = [placeId,
                                  providerId,
                                  None,
                                  None,
                                  count,
                                  None]

                        geocodeByPlaceType.setdefault(placeType,list()).append(record)

                # Process only the records we are going to display.
                for placeType, records in geocodeByPlaceType.iteritems():
                    aux = sorted(records, key=lambda x: x[4], reverse=True)
                    aux = aux[:Configuration.DISPLAY_MAX_NUM_INFLUENCE_RECORDS_PER_PLACE_TYPE]
                    geocodeByPlaceType[placeType] = aux

                    for record in aux:
                        cacheId = GeocodeResultAbstract.buildCacheId(record[1], record[0])
                        geocode = geocodeFromCacheById(cacheId)

                        record[2] = geocode.display_name
                        record[3] = geocode.coordinate
                        count = record[4]
                        record[5] = geocode.bounding_box

                        totalsByPlaceType[placeType] = totalsByPlaceType.get(placeType,0) + count

            def getResultPart(placeType):
                return {'geocode_list' : geocodeByPlaceType.get(placeType,list()), 'total' : totalsByPlaceType.get(placeType, 0)}

            resultData = dict()
            resultData['city'] =        getResultPart(GeocodeResultAbstract.PlaceTypes.CITY)
            resultData['country'] =     getResultPart(GeocodeResultAbstract.PlaceTypes.COUNTRY)
            resultData['continent'] =   getResultPart(GeocodeResultAbstract.PlaceTypes.CONTINENT)

            logger.info('Took %dms to build temporal range result data' % (getEpochMs() - timerMs))

            return {'json' : resultData}
Пример #9
0
    def manageSocket(self, webSocket, tupleArguments, socketId):
        instanceId = tupleArguments[0]

        mainControl = webSocket.controls[self.key]
        assert isinstance(mainControl, DocumentControl)

        bytesPerBatch       =        parseInteger(request.GET.batchSizeBytes, maximum=1024 * 1024 * 256, default=1024 * 1024 * 1)
        tweetInfo           =        parseBoolean(request.GET.tweet_info, False)
        followerInfo        =        parseBoolean(request.GET.follower_info_full, False)
        followerInfoShort   =        parseBoolean(request.GET.follower_info_short, False)
        providerId          =        parseInteger(request.GET.provider_id)
        placeId             =        parseInteger(request.GET.place_id)
        startEpoch          =        parseInteger(request.GET.start_epoch)
        endEpoch            =        parseInteger(request.GET.end_epoch)

        if placeId is not None and providerId is not None:
            placeCacheId = GeocodeResultAbstract.buildCacheId(providerId, placeId)
        else:
            placeCacheId = None

        if followerInfo:
            tweetInfo = False
            followerInfoShort = False
        elif tweetInfo:
            followerInfo = False
            followerInfoShort = False
        elif followerInfoShort:
            followerInfo = False
            tweetInfo = False
        else:
            followerInfo = True


        userTunnelId = 'user_tunnel'
        tweetTunnelId = None

        if tweetInfo:
            tweetTunnelId = 'tweet_tunnel'

        def openRequiredTunnels():
            if tweetInfo:
                return self.openTunnels(webSocket)
            else:
                return self.openTunnel(userTunnelId, webSocket)

        if not openRequiredTunnels():
            logger.error('Failed to open initial tunnels')
            return False

        if tweetInfo:
            followerIdsFlag = False
            followeeIdsFlag = False
            analysisFlag = False
            isFollowersLoadedRequirement = None
            associatedWithTweetRequirement = True
            recursiveCacheFlag = False
            followerIdsProjection = None
            outputType = 1 # for csv.
        elif followerInfo:
            followerIdsFlag = True
            followeeIdsFlag = True
            analysisFlag = True
            isFollowersLoadedRequirement = True
            associatedWithTweetRequirement = None
            recursiveCacheFlag = True
            followerIdsProjection = None # this gives us all data on each follower.
            outputType = 2
        elif followerInfoShort:
            followerIdsFlag = True
            followeeIdsFlag = True
            followerIdsProjection = NoQueryProjection()
            analysisFlag = True
            isFollowersLoadedRequirement = True
            associatedWithTweetRequirement = None
            recursiveCacheFlag = True
            outputType = 3
        else:
            raise NotImplementedError()

        userProjection = UserProjection(True,
                                        True,
                                        None,
                                        True,
                                        followerIdsFlag,
                                        followerIdsProjection,
                                        followeeIdsFlag,
                                        UserProjection.Id(),
                                        True,
                                        False,
                                        False,
                                        True,
                                        True,
                                        False,
                                        False,
                                        False,
                                        False,
                                        analysisFlag)

        isFirstIteration = [True]

        twitterInstance = self.application.twitter_instances.getInstanceByInstanceKey(instanceId)
        if twitterInstance is None:
            return False

        twitterSession = twitterInstance.twitter_thread.twitter_session
        progressBarTotalId = 'progress-bar-total'
        progressBarCurrentBatchId = 'progress-bar-current-batch'

        signaler = EventSignaler(self.key, [webSocket])

        updateProgressBarFreq = Timer(400,True)

        def sendData(tunnelId, data):
            self.sendDataOnTunnel(webSocket, tunnelId, (unicode(data) + '\r\n'))

        def sendHeader():
            sendData(userTunnelId, getUserHeader(outputType))

            if tweetTunnelId is not None:
                sendData(tweetTunnelId, getTweetHeader())

        def doProgressBarChange(percentage, progressBarId):
            mainControl.executeJavascript('$("#%s").width("%.3f%%");' % (progressBarId, percentage))

        sendHeader()

        counter = [0]
        previousCounter = [0]
        def updateSocket(controls,
                         data,
                         bytesCounter=counter,
                         bytesPerBatch=bytesPerBatch,
                         previousCounter=previousCounter,
                         isFirstIteration=isFirstIteration):
            user = data['user_data']
            tweet = data['tweet_data']
            percentage = data['percentage']
            isFinished = data['isFinished']

            control = controls[self.key]
            assert isinstance(control, DocumentControl)

            def updateProgressBars():
                previousCounter[0] = thisCounter = bytesCounter[0]

                percentageCurrentBatch = float(thisCounter) / float(bytesPerBatch) * 100
                percentageTotal = percentage

                if percentageTotal >= 100:
                    percentageCurrentBatch = 100

                if isFirstIteration[0] and percentageCurrentBatch < percentageTotal:
                    percentageCurrentBatch = percentageTotal

                doProgressBarChange(percentageTotal, progressBarTotalId)
                doProgressBarChange(percentageCurrentBatch, progressBarCurrentBatchId)

            if previousCounter[0] != bytesCounter[0] and updateProgressBarFreq.ticked():
                updateProgressBars()

            dataToSendToClient = ''
            if user is not None:
                assert isinstance(user,User)
                dataToSendToClient = getUserRepresentation(user, outputType)
                sendData(userTunnelId, dataToSendToClient)

            if tweet is not None:
                assert isinstance(tweet, Tweet)
                dataToSendToClient = getTweetRepresentation(tweet)
                sendData(tweetTunnelId, dataToSendToClient)

            dataLength = len(dataToSendToClient)
            bytesCounter[0] += dataLength

            if bytesCounter[0] > bytesPerBatch or isFinished:
                updateProgressBars()
                isFirstIteration[0] = False

                bytesCounter[0] = 0
                mainControl.executeJavascript('onBatchEnd();')

                self.closeTunnels(webSocket)

                if not isFinished:
                    logger.debug('Waiting to receive next data provider')
                    if not openRequiredTunnels():
                        logger.warning('Failed to reinitialize tunnel slots')
                        webSocket.cleanup()
                        return

                    sendHeader()
                else:
                    mainControl.executeJavascript('onFinished();')

                    webSocket.cleanup()

        def onCacheIteration(iteration, total, isFinished, data, iteratorId):
            # Don't write followee data to output as it would duplicate alot of data.
            if iteratorId == 'followee':
                data = None

            running = not webSocket.is_cleaned_up
            if running:
                # We need to do this so that if the client closes the socket we are notified.
                webSocket.pingFreqLimited()

                percentage = getPercentage(iteration, total)
                dataId = None
                if data is not None:
                    dataId = data.id
                #logger.info('iteration %.2f of %.2f (%.1f%%) - it: %s, userId: %s' % (iteration, total, percentage,iteratorId,dataId))

                user = None
                tweet = None
                if data is None:
                    pass
                elif isinstance(data, User):
                    user = data
                elif isinstance(data, Tweet):
                    tweet = data
                    if tweet.has_user:
                        user = tweet.user
                else:
                    logger.error('Invalid data from cache, type: %s' % type(data))
                    return running

                signaler.signalEvent({SignalActions.SOCKET: updateSocket, 'percentage' : percentage, 'user_data' : user, 'tweet_data' : tweet, 'isFinished' : isFinished})
                gevent.sleep(0)
            else:
                logger.debug('Ending cache download prematurely')

            return running

        logger.debug('Starting to read data from cache...')

        # This makes sure the search is finite.
        epochNow = getEpochMs()
        if endEpoch is None or endEpoch > epochNow:
            endEpoch = epochNow

        if followerInfo or followerInfoShort:
            readUsersFromCache(twitterSession,
                               instanceId,
                               placeId = placeCacheId,
                               epochMsStartRange=startEpoch,
                               epochMsEndRange=endEpoch,
                               isFollowersLoadedRequirement=isFollowersLoadedRequirement,
                               associatedWithTweetRequirement=associatedWithTweetRequirement,
                               onIterationFunc=onCacheIteration,
                               recursive=recursiveCacheFlag,
                               userProjection=userProjection)
        else:
            readTweetsFromCache(twitterSession,
                                instanceId,
                                placeId = placeCacheId,
                                epochMsStartRange=startEpoch,
                                epochMsEndRange=endEpoch,
                                onIterationFunc=onCacheIteration,
                                retrieveUserData=True,
                                userProjection=userProjection)

        # We want to cleanup everything now since we are done.
        return False