Exemplo n.º 1
0
def geocodeFromCacheById(cacheId, inMemoryOnly=None):
    if inMemoryOnly is None:
        inMemoryOnly = False

    if 'importance_rating' in cacheId:
        importanceRating = cacheId['importance_rating']
        cacheId = dict(cacheId) # don't modify what was passed in.
        del cacheId['importance_rating'] # remove so doesn't conflict with mongoDB query.
    else:
        importanceRating = None

    if isinstance(cacheId, list):
        success = False
        for item in cacheId:
            if isIntendedForDirectUse(GeocodeResultAbstract.getProviderIdFromCacheId(item)):
                cacheId = item
                success = True
                break

        if not success:
            logger.error('Could not find useful ID from ID list %s' % (unicode(cacheId)))
            return None

    if isIntendedForDirectUse(GeocodeResultAbstract.getProviderIdFromCacheId(cacheId)):
        tup = GeocodeResultAbstract.buildTupleFromCacheId(cacheId)
        returnVal = inMemoryCacheGeocodeData.get(tup,None)

        if returnVal is None:
            if not inMemoryOnly:
                db = getDatabase()
                assert isinstance(db, Database)
                result = db.place.find_one({'_id' : cacheId})
                if result is None:
                    logger.warn('Could not find place cache ID in database: %s' % unicode(cacheId))
                    return None
            else:
                return None

            returnVal = buildGeocodeResult(result['place_data'], cacheId['providerId'], importanceRating)

        # Always update, this will move the item to the back of the ordered dict
        # meaning we have a 'least recently used' cache.
        if returnVal is not None:
            inMemoryCacheGeocodeData[tup] = returnVal

        return returnVal
    else:
        geocode = GeocodeResultAbstract.getGnsByPlaceId(GeocodeResultAbstract.getPlaceIdFromCacheId(cacheId))
        if geocode is None:
            logger.error('Failed to retrieve GNS data with cache ID: %s' % unicode(cacheId))

        return geocode
Exemplo n.º 2
0
    def addTemporalEntry(self, temporalCollection, timeId, userProviderId, userPlaceId, followerProviderId, followerPlaceId, followerPlaceType):
        if self.is_shutdown:
            return

        tupleUserCacheId = GeocodeResultAbstract.buildCacheIdTuple(userProviderId, userPlaceId)
        dictUserCacheId = GeocodeResultAbstract.buildCacheId(userProviderId, userPlaceId)
        lastTimeId = self.last_temporal_time_id_by_source.get(tupleUserCacheId,None)

        destination = '%s_%s' % (followerPlaceType, followerPlaceId)
        addTemporalEntry(temporalCollection, lastTimeId, timeId, dictUserCacheId, destination, followerProviderId)

        self.last_temporal_time_id_by_source[tupleUserCacheId] = timeId
        setInstanceTemporalSourceLastTime(self.instance_key, userProviderId, userPlaceId, timeId)
Exemplo n.º 3
0
    def addTemporalEntry(self, temporalCollection, timeId, userProviderId, userPlaceId, followerProviderId, followerPlaceId, followerPlaceType):
        if self.is_shutdown:
            return

        tupleUserCacheId = GeocodeResultAbstract.buildCacheIdTuple(userProviderId, userPlaceId)
        dictUserCacheId = GeocodeResultAbstract.buildCacheId(userProviderId, userPlaceId)
        lastTimeId = self.last_temporal_time_id_by_source.get(tupleUserCacheId,None)

        destination = '%s_%s' % (followerPlaceType, followerPlaceId)
        addTemporalEntry(temporalCollection, lastTimeId, timeId, dictUserCacheId, destination, followerProviderId)

        self.last_temporal_time_id_by_source[tupleUserCacheId] = timeId
        setInstanceTemporalSourceLastTime(self.instance_key, userProviderId, userPlaceId, timeId)
Exemplo n.º 4
0
        def func(templateArguments):
            placeName = parseString(request.GET.place_name)

            data = list()

            if placeName is None:
                return {'json' : data}

            locations = list()
            maxLocations = 10

            locations += GeocodeResultAbstract.searchGnsByName(placeName,3)
            maxLocations -= len(locations)

            if maxLocations > 0:
                newLocations = geocodeSearch(self.provider_id,placeName,maxLocations)
                maxLocations -= len(newLocations)
                locations += newLocations


            for location in locations:
                assert isinstance(location,GeocodeResultAbstract)
                data.append((location.cache_id,location.bounding_box,location.coordinate,location.display_name))


            return {'json' : data}
Exemplo n.º 5
0
        def func(templateArguments):
            placeName = parseString(request.GET.place_name)

            data = list()

            if placeName is None:
                return {'json': data}

            locations = list()
            maxLocations = 10

            locations += GeocodeResultAbstract.searchGnsByName(placeName, 3)
            maxLocations -= len(locations)

            if maxLocations > 0:
                newLocations = geocodeSearch(self.provider_id, placeName,
                                             maxLocations)
                maxLocations -= len(newLocations)
                locations += newLocations

            for location in locations:
                assert isinstance(location, GeocodeResultAbstract)
                data.append((location.cache_id, location.bounding_box,
                             location.coordinate, location.display_name))

            return {'json': data}
Exemplo n.º 6
0
        def onInstanceLoadFunc(instanceKey,
                               oauthToken,
                               oauthSecret,
                               geographicSetupString,
                               keywords,
                               instanceSetupCode,
                               startTime,
                               temporalLastTimeId,
                               count=count):
            temporal = dict()
            for providerId, value in temporalLastTimeId.iteritems():
                providerId = int(providerId)
                for placeId, timeId in value.iteritems():
                    placeId = int(placeId)
                    timeId = int(timeId)
                    temporal[GeocodeResultAbstract.buildCacheIdTuple(
                        providerId, placeId)] = timeId
                    logger.debug(
                        'Loaded instance %s last temporal change source %d/%d -> %d'
                        % (instanceKey, providerId, placeId, timeId))

            TwitterInstance(
                instanceKey, webApplication.twitter_instances,
                TwitterAuthentication(Configuration.CONSUMER_TOKEN,
                                      Configuration.CONSUMER_SECRET,
                                      oauthToken, oauthSecret),
                unicode(geographicSetupString), keywords, instanceSetupCode,
                startTime, temporal)

            # can delete, was debugging indexes.
            if args.rebuild_instance_indexes:
                logger.info('Dropping indexes of instance %s' % instanceKey)
                getUserCollection(instanceKey).drop_indexes()
                getTweetCollection(instanceKey).drop_indexes()
            count[0] += 1
Exemplo n.º 7
0
        def func(templateArguments, instance, location, provider):
            twitterInstance = self.application.twitter_instances.getInstanceByInstanceKey(
                instance)
            if twitterInstance is None:
                abort(404, "No active search stream found at this address")

            geocode = geocodeFromCacheById(
                GeocodeResultAbstract.buildCacheId(provider, location))
            assert geocode is not None

            instanceDescription = getInstanceDescription(twitterInstance)
            instanceLink = getInstanceLink(twitterInstance)

            homeLink = getHomeLink(Configuration.PROJECT_NAME)

            templateArguments.update(
                {
                    'home_link': homeLink,
                    'instance': instance,
                    'location': location,
                    'provider': provider,
                    'instance_link': instanceLink,
                    'instance_description': instanceDescription,
                    'place': geocode.display_name_short,
                    'place_coord': geocode.coordinate,
                    'startEpoch': twitterInstance.constructed_at,
                    'server_current_epoch': getEpochMs(),
                    'max_tweets': Configuration.MAX_CLIENT_LIVE_TWEETS
                }
            )  # Client needs to offset to this epoch in case its clock is wrong.

            if geocode.has_bounding_box:
                templateArguments.update(
                    {'place_bounding_box': geocode.bounding_box})

            if geocode.has_country:
                templateArguments.update({
                    'place_country_link':
                    LocationsPage.link_info.getPageLink(
                        instance, geocode.country.place_id,
                        geocode.country.provider_id)
                })
                templateArguments.update(
                    {'place_country': geocode.country.display_name_short})

            if geocode.has_continent:
                templateArguments.update({
                    'place_continent_link':
                    LocationsPage.link_info.getPageLink(
                        instance, geocode.continent.place_id,
                        geocode.continent.provider_id)
                })
                templateArguments.update(
                    {'place_continent': geocode.continent.display_name_short})

            return template('location.tpl', templateArguments)
Exemplo n.º 8
0
        def func(templateArguments, instance):
            dataType = parseString(request.GET.type,['tweet','user'])
            start_epoch = parseInteger(request.GET.start_epoch)
            end_epoch = parseInteger(request.GET.end_epoch)
            page_num = parseInteger(request.GET.page)
            place_id = parseInteger(request.GET.place_id)
            provider_id = parseInteger(request.GET.provider_id)
            projection_type = parseString(request.GET.projection_type)
            followee = parseInteger(request.GET.followee)

            cache_id = GeocodeResultAbstract.buildCacheId(provider_id, place_id)

            if dataType is None:
                return redirect_problem('type is a required argument')

            if page_num is None:
                page_num = 0

            data = []
            if dataType == 'tweet':
                tweets = readTweetsFromCache(None, instance, cache_id, start_epoch, end_epoch, page_num, TwitterCachePage.PAGE_SIZE_FULL_DATA)
                if tweets is not None:
                    for tweet in tweets:
                        assert isinstance(tweet, Tweet)
                        userHtml = UserInformationPage.getPageLinkImage(instance, tweet.user, target='_self')

                        data.append([ tweet.created_at,
                                      userHtml,
                                      tweet.user.location_text,
                                      tweet.text ])

            elif dataType == 'user':
                if len(projection_type) == 0:
                    projection = None
                    pageSize = TwitterCachePage.PAGE_SIZE_FULL_DATA
                elif projection_type == 'name-only':
                    projection = UserProjection.IdNameImage()
                    pageSize = TwitterCachePage.PAGE_SIZE_ID_NAME_DATA
                else:
                    return redirect_problem('Unsupported projection type: %s' % projection_type)

                if followee is None:
                    return redirect_problem('Followee is required')

                users = readUsersFromCache(None, instance, cache_id, start_epoch, end_epoch, page_num, pageSize, followee, userProjection=projection)
                if users is not None:
                    for user in users:
                        assert isinstance(user, User)
                        data.append([user.id,
                                     user.name,
                                     user.profile_image_url,
                                     UserInformationPage.link_info.getPageLink(instance, user.id)])

            return {'json' : data}
Exemplo n.º 9
0
            def addTemporalEntryForCurrentUser(follower):
                timeId = getTimeIdFromTimestamp(startTime,
                                                Configuration.TEMPORAL_STEP,
                                                getEpochMs())

                userCacheIds = user.location_geocode.all_geocode_results_cache_id
                followerGeocodeResults = follower.location_geocode.all_geocode_results

                for userCacheId in userCacheIds:
                    userPlaceId = GeocodeResultAbstract.getPlaceIdFromCacheId(
                        userCacheId)
                    userProviderId = GeocodeResultAbstract.getProviderIdFromCacheId(
                        userCacheId)

                    for followerGeocodeResult in followerGeocodeResults:
                        followerPlaceId = followerGeocodeResult.place_id
                        followerProviderId = followerGeocodeResult.provider_id
                        followerPlaceType = followerGeocodeResult.place_type

                        instance.addTemporalEntry(temporalCollection, timeId,
                                                  userProviderId, userPlaceId,
                                                  followerProviderId,
                                                  followerPlaceId,
                                                  followerPlaceType)
Exemplo n.º 10
0
        def func(templateArguments, instance, location, provider):
            twitterInstance = self.application.twitter_instances.getInstanceByInstanceKey(instance)
            if twitterInstance is None:
                abort(404, "No active search stream found at this address")

            geocode = geocodeFromCacheById(GeocodeResultAbstract.buildCacheId(provider,location))
            assert geocode is not None

            instanceDescription = getInstanceDescription(twitterInstance)
            instanceLink = getInstanceLink(twitterInstance)

            homeLink = getHomeLink(Configuration.PROJECT_NAME)

            templateArguments.update({'home_link' : homeLink,
                                      'instance' : instance,
                                      'location' : location,
                                      'provider' : provider,
                                      'instance_link' : instanceLink,
                                      'instance_description' : instanceDescription,
                                      'place' : geocode.display_name_short,
                                      'place_coord' : geocode.coordinate,
                                      'startEpoch' : twitterInstance.constructed_at,
                                      'server_current_epoch' : getEpochMs(),
                                      'max_tweets' : Configuration.MAX_CLIENT_LIVE_TWEETS}) # Client needs to offset to this epoch in case its clock is wrong.

            if geocode.has_bounding_box:
                templateArguments.update({'place_bounding_box' : geocode.bounding_box})

            if geocode.has_country:
                templateArguments.update({'place_country_link' : LocationsPage.link_info.getPageLink(instance, geocode.country.place_id, geocode.country.provider_id)})
                templateArguments.update({'place_country' : geocode.country.display_name_short})

            if geocode.has_continent:
                templateArguments.update({'place_continent_link' : LocationsPage.link_info.getPageLink(instance, geocode.continent.place_id, geocode.continent.provider_id)})
                templateArguments.update({'place_continent' : geocode.continent.display_name_short})

            return template('location.tpl', templateArguments)
Exemplo n.º 11
0
        def onInstanceLoadFunc(instanceKey,
                               oauthToken,
                               oauthSecret,
                               geographicSetupString,
                               keywords,
                               instanceSetupCode,
                               startTime,
                               temporalLastTimeId,
                               count = count):
             temporal = dict()
             for providerId, value in temporalLastTimeId.iteritems():
                 providerId = int(providerId)
                 for placeId, timeId in value.iteritems():
                     placeId = int(placeId)
                     timeId = int(timeId)
                     temporal[GeocodeResultAbstract.buildCacheIdTuple(providerId, placeId)] = timeId
                     logger.debug('Loaded instance %s last temporal change source %d/%d -> %d' % (instanceKey, providerId, placeId, timeId))

             TwitterInstance(instanceKey,
                             webApplication.twitter_instances,
                             TwitterAuthentication(Configuration.CONSUMER_TOKEN, Configuration.CONSUMER_SECRET, oauthToken, oauthSecret),
                             unicode(geographicSetupString),
                             keywords,
                             instanceSetupCode,
                             startTime,
                             temporal,

                             # Critical because it once worked, if it fails when we restarted
                             # then maybe our server lost network connectivity.
                             isCritical = True)

             # can delete, was debugging indexes.
             if args.rebuild_instance_indexes:
                logger.info('Dropping indexes of instance %s' % instanceKey)
                getUserCollection(instanceKey).drop_indexes()
                getTweetCollection(instanceKey).drop_indexes()
             count[0] += 1
Exemplo n.º 12
0
    def continueExtractItemFromData(self, data, instanceData, signalerKey):
        location = signalerKey.location
        providerId = signalerKey.provider

        tup = GeocodeResultAbstract.buildCacheIdTuple(providerId,location)
        return set(instanceData.get(tup, dict()).keys())
Exemplo n.º 13
0
    def extractItemFromData(self, data, signalerKey):
        if signalerKey.provider_id is not None and signalerKey.place_id is not None:
            data = data['location_tweets']
            locationCacheId = GeocodeResultAbstract.buildCacheIdTuple(signalerKey.provider_id, signalerKey.place_id)
        else:
            data = data['instance_tweets']
            locationCacheId = None

        aux = data.get(signalerKey.instance)
        if aux is None:
            return None

        if locationCacheId is not None:
            aux = aux.get(locationCacheId)
            if aux is None:
                return None

        aux = aux.get('success',None)

        if aux is not None:
            assert isinstance(aux, RealtimePerformance)
            tweetsPerDay, tweetsPerDayUpdated = aux.tweets_per_day.time_period_count_updated(True,True,True)
            tweetsPerHour, tweetsPerHourUpdated = aux.tweets_per_hour.time_period_count_updated(True,True,True)
            tweetsPerMinute, tweetsPerMinuteUpdated = aux.tweets_per_minute.time_period_count_updated(True,True,True)
        else:
            tweetsPerDayUpdated = tweetsPerHourUpdated = tweetsPerMinuteUpdated = False
            tweetsPerDay = tweetsPerHour = tweetsPerMinute = None

        aux = data.get(signalerKey.instance)
        aux = aux.get('geocode_fail')
        if aux is not None:
            assert isinstance(aux, RealtimePerformance)
            failGeocodeTweetsPerDay, failGeocodeTweetsPerDayUpdated = aux.tweets_per_day.time_period_count_updated(True,True,True)
            failGeocodeTweetsPerHour, failGeocodeTweetsPerHourUpdated = aux.tweets_per_hour.time_period_count_updated(True,True,True)
            failGeocodeTweetsPerMinute, failGeocodeTweetsPerMinuteUpdated = aux.tweets_per_minute.time_period_count_updated(True,True,True)
        else:
            failGeocodeTweetsPerDayUpdated = failGeocodeTweetsPerHourUpdated = failGeocodeTweetsPerMinuteUpdated = False
            failGeocodeTweetsPerDay = failGeocodeTweetsPerHour = failGeocodeTweetsPerMinute = None

        if tweetsPerDayUpdated is tweetsPerHourUpdated is tweetsPerMinuteUpdated is \
           failGeocodeTweetsPerDayUpdated is failGeocodeTweetsPerHourUpdated is failGeocodeTweetsPerMinuteUpdated is False:
            newData = False
        else:
            newData = True

        if tweetsPerMinute is None:
            tweetsPerSecond = None
        else:
            tweetsPerSecond = int(tweetsPerMinute / 60)
            if tweetsPerHour is None:
                tweetsPerHour = tweetsPerMinute * 60

        if tweetsPerDay is None and tweetsPerHour is not None:
            tweetsPerDay = tweetsPerHour * 24

        if failGeocodeTweetsPerMinute is None:
            failGeocodeTweetsPerSecond = None
        else:
            failGeocodeTweetsPerSecond = int(failGeocodeTweetsPerMinute / 60)
            if failGeocodeTweetsPerHour is None:
                failGeocodeTweetsPerHour = failGeocodeTweetsPerMinute * 60

        if failGeocodeTweetsPerDay is None and failGeocodeTweetsPerHour is not None:
            failGeocodeTweetsPerDay = failGeocodeTweetsPerHour * 24

        if tweetsPerSecond < 1 and tweetsPerDay > 0:
            tweetsPerSecond = '< 1'

        if tweetsPerMinute < 1 and tweetsPerDay > 0:
            tweetsPerMinute = '< 1'

        if tweetsPerHour < 1 and tweetsPerDay > 0:
            tweetsPerHour = '< 1'

        return newData, json.dumps([[tweetsPerSecond,                    tweetsPerMinute,                    tweetsPerHour,                  tweetsPerDay],
                                    [failGeocodeTweetsPerSecond,         failGeocodeTweetsPerMinute,         failGeocodeTweetsPerHour,       failGeocodeTweetsPerDay]])
Exemplo n.º 14
0
        def func(templateArguments, instance):
            twitterInstance = self.application.twitter_instances.getInstanceByInstanceKey(
                instance)
            if twitterInstance is None:
                return dict()

            baseEpoch = twitterInstance.constructed_at

            start_epoch = parseInteger(request.GET.start_epoch, default=None)
            end_epoch = parseInteger(request.GET.end_epoch, default=None)
            source_place_id = parseInteger(request.GET.source_place_id)
            source_provider_id = parseInteger(request.GET.source_provider_id)

            if source_place_id is None:
                logger.error(
                    'Invalid place ID specified while providing influence data: %s'
                    % unicode(source_place_id))
                return dict()

            source_cache_id = GeocodeResultAbstract.buildCacheId(
                source_provider_id, source_place_id)

            temporalCollection = getTemporalInfluenceCollection(instance)

            if start_epoch is not None:
                start_time_id = getTimeIdFromTimestamp(
                    baseEpoch, Configuration.TEMPORAL_STEP, start_epoch)
            else:
                start_time_id = None

            if end_epoch is not None:
                end_time_id = getTimeIdFromTimestamp(
                    baseEpoch, Configuration.TEMPORAL_STEP, end_epoch)
            else:
                end_time_id = None

            timerMs = getEpochMs()
            cacheData = getTemporalRange(temporalCollection,
                                         start_time_id,
                                         end_time_id,
                                         source_cache_id,
                                         preciseFromBack=True,
                                         preciseFromFront=True)
            logger.info('Took %dms to read temporal range data' %
                        (getEpochMs() - timerMs))

            timerMs = getEpochMs()

            geocodeByPlaceType = dict()
            totalsByPlaceType = dict()

            if cacheData is not None:
                for providerId, providerIdData in cacheData.iteritems():
                    providerId = int(providerId)

                    for destination, count in providerIdData.iteritems():
                        split = destination.split('_')
                        placeType = int(split[0])
                        placeId = int(split[1])

                        record = [placeId, providerId, None, None, count, None]

                        geocodeByPlaceType.setdefault(placeType,
                                                      list()).append(record)

                # Process only the records we are going to display.
                for placeType, records in geocodeByPlaceType.iteritems():
                    aux = sorted(records, key=lambda x: x[4], reverse=True)
                    aux = aux[:Configuration.
                              DISPLAY_MAX_NUM_INFLUENCE_RECORDS_PER_PLACE_TYPE]
                    geocodeByPlaceType[placeType] = aux

                    for record in aux:
                        cacheId = GeocodeResultAbstract.buildCacheId(
                            record[1], record[0])
                        geocode = geocodeFromCacheById(cacheId)

                        record[2] = geocode.display_name
                        record[3] = geocode.coordinate
                        count = record[4]
                        record[5] = geocode.bounding_box

                        totalsByPlaceType[placeType] = totalsByPlaceType.get(
                            placeType, 0) + count

            def getResultPart(placeType):
                return {
                    'geocode_list': geocodeByPlaceType.get(placeType, list()),
                    'total': totalsByPlaceType.get(placeType, 0)
                }

            resultData = dict()
            resultData['city'] = getResultPart(
                GeocodeResultAbstract.PlaceTypes.CITY)
            resultData['country'] = getResultPart(
                GeocodeResultAbstract.PlaceTypes.COUNTRY)
            resultData['continent'] = getResultPart(
                GeocodeResultAbstract.PlaceTypes.CONTINENT)

            logger.info('Took %dms to build temporal range result data' %
                        (getEpochMs() - timerMs))

            return {'json': resultData}
Exemplo n.º 15
0
        def func(templateArguments, instance):
            dataType = parseString(request.GET.type, ['tweet', 'user'])
            start_epoch = parseInteger(request.GET.start_epoch)
            end_epoch = parseInteger(request.GET.end_epoch)
            page_num = parseInteger(request.GET.page)
            place_id = parseInteger(request.GET.place_id)
            provider_id = parseInteger(request.GET.provider_id)
            projection_type = parseString(request.GET.projection_type)
            followee = parseInteger(request.GET.followee)

            cache_id = GeocodeResultAbstract.buildCacheId(
                provider_id, place_id)

            if dataType is None:
                return redirect_problem('type is a required argument')

            if page_num is None:
                page_num = 0

            data = []
            if dataType == 'tweet':
                tweets = readTweetsFromCache(
                    None, instance, cache_id, start_epoch, end_epoch, page_num,
                    TwitterCachePage.PAGE_SIZE_FULL_DATA)
                if tweets is not None:
                    for tweet in tweets:
                        assert isinstance(tweet, Tweet)
                        userHtml = UserInformationPage.getPageLinkImage(
                            instance, tweet.user, target='_self')

                        data.append([
                            tweet.created_at, userHtml,
                            tweet.user.location_text, tweet.text
                        ])

            elif dataType == 'user':
                if len(projection_type) == 0:
                    projection = None
                    pageSize = TwitterCachePage.PAGE_SIZE_FULL_DATA
                elif projection_type == 'name-only':
                    projection = UserProjection.IdNameImage()
                    pageSize = TwitterCachePage.PAGE_SIZE_ID_NAME_DATA
                else:
                    return redirect_problem('Unsupported projection type: %s' %
                                            projection_type)

                if followee is None:
                    return redirect_problem('Followee is required')

                users = readUsersFromCache(None,
                                           instance,
                                           cache_id,
                                           start_epoch,
                                           end_epoch,
                                           page_num,
                                           pageSize,
                                           followee,
                                           userProjection=projection)
                if users is not None:
                    for user in users:
                        assert isinstance(user, User)
                        data.append([
                            user.id, user.name, user.profile_image_url,
                            UserInformationPage.link_info.getPageLink(
                                instance, user.id)
                        ])

            return {'json': data}
Exemplo n.º 16
0
    def manageSocket(self, webSocket, tupleArguments, socketId):
        instanceId = tupleArguments[0]

        mainControl = webSocket.controls[self.key]
        assert isinstance(mainControl, DocumentControl)

        bytesPerBatch       =        parseInteger(request.GET.batchSizeBytes, maximum=1024 * 1024 * 256, default=1024 * 1024 * 1)
        tweetInfo           =        parseBoolean(request.GET.tweet_info, False)
        followerInfo        =        parseBoolean(request.GET.follower_info_full, False)
        followerInfoShort   =        parseBoolean(request.GET.follower_info_short, False)
        providerId          =        parseInteger(request.GET.provider_id)
        placeId             =        parseInteger(request.GET.place_id)
        startEpoch          =        parseInteger(request.GET.start_epoch)
        endEpoch            =        parseInteger(request.GET.end_epoch)

        if placeId is not None and providerId is not None:
            placeCacheId = GeocodeResultAbstract.buildCacheId(providerId, placeId)
        else:
            placeCacheId = None

        if followerInfo:
            tweetInfo = False
            followerInfoShort = False
        elif tweetInfo:
            followerInfo = False
            followerInfoShort = False
        elif followerInfoShort:
            followerInfo = False
            tweetInfo = False
        else:
            followerInfo = True


        userTunnelId = 'user_tunnel'
        tweetTunnelId = None

        if tweetInfo:
            tweetTunnelId = 'tweet_tunnel'

        def openRequiredTunnels():
            if tweetInfo:
                return self.openTunnels(webSocket)
            else:
                return self.openTunnel(userTunnelId, webSocket)

        if not openRequiredTunnels():
            logger.error('Failed to open initial tunnels')
            return False

        if tweetInfo:
            followerIdsFlag = False
            followeeIdsFlag = False
            analysisFlag = False
            isFollowersLoadedRequirement = None
            associatedWithTweetRequirement = True
            recursiveCacheFlag = False
            followerIdsProjection = None
            outputType = 1 # for csv.
        elif followerInfo:
            followerIdsFlag = True
            followeeIdsFlag = True
            analysisFlag = True
            isFollowersLoadedRequirement = True
            associatedWithTweetRequirement = None
            recursiveCacheFlag = True
            followerIdsProjection = None # this gives us all data on each follower.
            outputType = 2
        elif followerInfoShort:
            followerIdsFlag = True
            followeeIdsFlag = True
            followerIdsProjection = NoQueryProjection()
            analysisFlag = True
            isFollowersLoadedRequirement = True
            associatedWithTweetRequirement = None
            recursiveCacheFlag = True
            outputType = 3
        else:
            raise NotImplementedError()

        userProjection = UserProjection(True,
                                        True,
                                        None,
                                        True,
                                        followerIdsFlag,
                                        followerIdsProjection,
                                        followeeIdsFlag,
                                        UserProjection.Id(),
                                        True,
                                        False,
                                        False,
                                        True,
                                        True,
                                        False,
                                        False,
                                        False,
                                        False,
                                        analysisFlag)

        isFirstIteration = [True]

        twitterInstance = self.application.twitter_instances.getInstanceByInstanceKey(instanceId)
        if twitterInstance is None:
            return False

        twitterSession = twitterInstance.twitter_thread.twitter_session
        progressBarTotalId = 'progress-bar-total'
        progressBarCurrentBatchId = 'progress-bar-current-batch'

        signaler = EventSignaler(self.key, [webSocket])

        updateProgressBarFreq = Timer(400,True)

        def sendData(tunnelId, data):
            self.sendDataOnTunnel(webSocket, tunnelId, (unicode(data) + '\r\n'))

        def sendHeader():
            sendData(userTunnelId, getUserHeader(outputType))

            if tweetTunnelId is not None:
                sendData(tweetTunnelId, getTweetHeader())

        def doProgressBarChange(percentage, progressBarId):
            mainControl.executeJavascript('$("#%s").width("%.3f%%");' % (progressBarId, percentage))

        sendHeader()

        counter = [0]
        previousCounter = [0]
        def updateSocket(controls,
                         data,
                         bytesCounter=counter,
                         bytesPerBatch=bytesPerBatch,
                         previousCounter=previousCounter,
                         isFirstIteration=isFirstIteration):
            user = data['user_data']
            tweet = data['tweet_data']
            percentage = data['percentage']
            isFinished = data['isFinished']

            control = controls[self.key]
            assert isinstance(control, DocumentControl)

            def updateProgressBars():
                previousCounter[0] = thisCounter = bytesCounter[0]

                percentageCurrentBatch = float(thisCounter) / float(bytesPerBatch) * 100
                percentageTotal = percentage

                if percentageTotal >= 100:
                    percentageCurrentBatch = 100

                if isFirstIteration[0] and percentageCurrentBatch < percentageTotal:
                    percentageCurrentBatch = percentageTotal

                doProgressBarChange(percentageTotal, progressBarTotalId)
                doProgressBarChange(percentageCurrentBatch, progressBarCurrentBatchId)

            if previousCounter[0] != bytesCounter[0] and updateProgressBarFreq.ticked():
                updateProgressBars()

            dataToSendToClient = ''
            if user is not None:
                assert isinstance(user,User)
                dataToSendToClient = getUserRepresentation(user, outputType)
                sendData(userTunnelId, dataToSendToClient)

            if tweet is not None:
                assert isinstance(tweet, Tweet)
                dataToSendToClient = getTweetRepresentation(tweet)
                sendData(tweetTunnelId, dataToSendToClient)

            dataLength = len(dataToSendToClient)
            bytesCounter[0] += dataLength

            if bytesCounter[0] > bytesPerBatch or isFinished:
                updateProgressBars()
                isFirstIteration[0] = False

                bytesCounter[0] = 0
                mainControl.executeJavascript('onBatchEnd();')

                self.closeTunnels(webSocket)

                if not isFinished:
                    logger.debug('Waiting to receive next data provider')
                    if not openRequiredTunnels():
                        logger.warning('Failed to reinitialize tunnel slots')
                        webSocket.cleanup()
                        return

                    sendHeader()
                else:
                    mainControl.executeJavascript('onFinished();')

                    webSocket.cleanup()

        def onCacheIteration(iteration, total, isFinished, data, iteratorId):
            # Don't write followee data to output as it would duplicate alot of data.
            if iteratorId == 'followee':
                data = None

            running = not webSocket.is_cleaned_up
            if running:
                # We need to do this so that if the client closes the socket we are notified.
                webSocket.pingFreqLimited()

                percentage = getPercentage(iteration, total)
                dataId = None
                if data is not None:
                    dataId = data.id
                #logger.info('iteration %.2f of %.2f (%.1f%%) - it: %s, userId: %s' % (iteration, total, percentage,iteratorId,dataId))

                user = None
                tweet = None
                if data is None:
                    pass
                elif isinstance(data, User):
                    user = data
                elif isinstance(data, Tweet):
                    tweet = data
                    if tweet.has_user:
                        user = tweet.user
                else:
                    logger.error('Invalid data from cache, type: %s' % type(data))
                    return running

                signaler.signalEvent({SignalActions.SOCKET: updateSocket, 'percentage' : percentage, 'user_data' : user, 'tweet_data' : tweet, 'isFinished' : isFinished})
                gevent.sleep(0)
            else:
                logger.debug('Ending cache download prematurely')

            return running

        logger.debug('Starting to read data from cache...')

        # This makes sure the search is finite.
        epochNow = getEpochMs()
        if endEpoch is None or endEpoch > epochNow:
            endEpoch = epochNow

        if followerInfo or followerInfoShort:
            readUsersFromCache(twitterSession,
                               instanceId,
                               placeId = placeCacheId,
                               epochMsStartRange=startEpoch,
                               epochMsEndRange=endEpoch,
                               isFollowersLoadedRequirement=isFollowersLoadedRequirement,
                               associatedWithTweetRequirement=associatedWithTweetRequirement,
                               onIterationFunc=onCacheIteration,
                               recursive=recursiveCacheFlag,
                               userProjection=userProjection)
        else:
            readTweetsFromCache(twitterSession,
                                instanceId,
                                placeId = placeCacheId,
                                epochMsStartRange=startEpoch,
                                epochMsEndRange=endEpoch,
                                onIterationFunc=onCacheIteration,
                                retrieveUserData=True,
                                userProjection=userProjection)

        # We want to cleanup everything now since we are done.
        return False
Exemplo n.º 17
0
        def func(templateArguments, instance):
            twitterInstance = self.application.twitter_instances.getInstanceByInstanceKey(instance)
            if twitterInstance is None:
                return dict()

            baseEpoch = twitterInstance.constructed_at

            start_epoch = parseInteger(request.GET.start_epoch, default=None)
            end_epoch = parseInteger(request.GET.end_epoch, default=None)
            source_place_id = parseInteger(request.GET.source_place_id)
            source_provider_id = parseInteger(request.GET.source_provider_id)

            if source_place_id is None:
                logger.error('Invalid place ID specified while providing influence data: %s' % unicode(source_place_id))
                return dict()

            source_cache_id = GeocodeResultAbstract.buildCacheId(source_provider_id, source_place_id)

            temporalCollection = getTemporalInfluenceCollection(instance)

            if start_epoch is not None:
                start_time_id = getTimeIdFromTimestamp(baseEpoch, Configuration.TEMPORAL_STEP, start_epoch)
            else:
                start_time_id = None

            if end_epoch is not None:
                end_time_id = getTimeIdFromTimestamp(baseEpoch, Configuration.TEMPORAL_STEP, end_epoch)
            else:
                end_time_id = None

            timerMs = getEpochMs()
            cacheData = getTemporalRange(temporalCollection, start_time_id, end_time_id, source_cache_id, preciseFromBack=True, preciseFromFront=True)
            logger.info('Took %dms to read temporal range data' % (getEpochMs() - timerMs))

            timerMs = getEpochMs()

            geocodeByPlaceType = dict()
            totalsByPlaceType = dict()

            if cacheData is not None:
                for providerId, providerIdData in cacheData.iteritems():
                    providerId = int(providerId)

                    for destination, count in providerIdData.iteritems():
                        split = destination.split('_')
                        placeType = int(split[0])
                        placeId = int(split[1])

                        record = [placeId,
                                  providerId,
                                  None,
                                  None,
                                  count,
                                  None]

                        geocodeByPlaceType.setdefault(placeType,list()).append(record)

                # Process only the records we are going to display.
                for placeType, records in geocodeByPlaceType.iteritems():
                    aux = sorted(records, key=lambda x: x[4], reverse=True)
                    aux = aux[:Configuration.DISPLAY_MAX_NUM_INFLUENCE_RECORDS_PER_PLACE_TYPE]
                    geocodeByPlaceType[placeType] = aux

                    for record in aux:
                        cacheId = GeocodeResultAbstract.buildCacheId(record[1], record[0])
                        geocode = geocodeFromCacheById(cacheId)

                        record[2] = geocode.display_name
                        record[3] = geocode.coordinate
                        count = record[4]
                        record[5] = geocode.bounding_box

                        totalsByPlaceType[placeType] = totalsByPlaceType.get(placeType,0) + count

            def getResultPart(placeType):
                return {'geocode_list' : geocodeByPlaceType.get(placeType,list()), 'total' : totalsByPlaceType.get(placeType, 0)}

            resultData = dict()
            resultData['city'] =        getResultPart(GeocodeResultAbstract.PlaceTypes.CITY)
            resultData['country'] =     getResultPart(GeocodeResultAbstract.PlaceTypes.COUNTRY)
            resultData['continent'] =   getResultPart(GeocodeResultAbstract.PlaceTypes.CONTINENT)

            logger.info('Took %dms to build temporal range result data' % (getEpochMs() - timerMs))

            return {'json' : resultData}
Exemplo n.º 18
0
        getInstanceCodeCollection().drop()

    if Configuration.PROXIES_ENABLED:
        httpProxy = Configuration.PROXIES.get('http',None)
        httpsProxy = Configuration.PROXIES.get('https',None)

        # Requests API will use these environment variables.
        if httpProxy is not None:
            os.environ['HTTP_PROXY'] = Configuration.PROXIES['http']

        if httpsProxy is not None:
            os.environ['HTTPS_PROXY'] = Configuration.PROXIES['https']

    bottle.debug(Configuration.BOTTLE_DEBUG)

    GeocodeResultAbstract.initializeCountryContinentDataFromCsv()

    dataCollection = DataCollection()
    webApplication = WebApplicationTwitter(None, Configuration.MAX_INSTANCE_INACTIVE_TIME_MS, dataCollection)

    landingPage = LandingPage(webApplication)

    oauthSignIn = OAuthSignIn(webApplication, Configuration.CONSUMER_TOKEN, Configuration.CONSUMER_SECRET)
    oauthCallback = OAuthCallback(webApplication, Configuration.CONSUMER_TOKEN, Configuration.CONSUMER_SECRET, GateInstance.link_info.getPageLink())

    mapWebSocketGroup = LocationMapWsg(application=webApplication,locations=dataCollection.tweets_by_location)
    tweetsByLocationWebSocketGroup = TweetsByLocationWsg(application=webApplication,signaler=dataCollection.tweets_by_location)
    userInformationWebSocketGroup = UserWsg(application=webApplication, dataCollection=dataCollection, signaler=dataCollection.all_users)

    instanceGate = GateInstance(webApplication)
    startInstance = StartInstancePost(webApplication, Configuration.CONSUMER_TOKEN, Configuration.CONSUMER_SECRET, None)