Пример #1
0
 def get(self):
         allCities = CityStats.gql('order by count desc').fetch(5000)
         for c in allCities:
                 memcache_key='city_queue'+c.city+'_'+c.countryCode+str(datetime.datetime.now().date())
                 if memcache.get(memcache_key):
                         logging.info('processed city %s %s skipping' %( c.city, c.countryCode))
                         continue
                 logging.info('adding to queue %s %s' % (c.countryCode, c.city))
                 taskqueue.add(queue_name='default',url='/stats/city/redundant/task', params={'city':c.city, 'country':c.countryCode})
                 memcache.set(memcache_key, 1)
Пример #2
0
	def cityFeed(self, reqId):
		logging.info('City stats feed')
		cityStatsQ= CityStats.gql('WHERE count > 100 ORDER BY count desc ').fetch(80)
		if cityStatsQ is None: 
			logging.info('Not enough data for graph')
			self.repsonse.out.write('Not enough data for graph')
			return
		cityStats = [ x for x in cityStatsQ if not "unknown" in x.city.lower() and x.countryCode != 'US' and x.countryCode != 'XX' ]
		logging.info('retrieved %s stats' % len(cityStats))
		description = {"city_countryCode": ("string", "City Code"),
				"count":("number", "Count")}
		columnnames = [ "city_countryCode", "count" ]
		data_table = gviz_api.DataTable(description)
		cityCnt = []
		for ctCnt in cityStats:
			entry = {"city_countryCode": ctCnt.city + ', ' + ctCnt.countryCode, "count":ctCnt.count}
			cityCnt.append(entry)
		data_table.LoadData(cityCnt)
		
		self.response.headers['Content-Type'] = 'text/plain'
		self.response.out.write(data_table.ToJSonResponse(columns_order=(columnnames) , req_id=reqId))
Пример #3
0
 def post(self):
         city=self.request.get('city',None)
         countryCode=self.request.get('country',None)
         memcache_key='reduncant_city'+city+'_'+countryCode+'_'+str(datetime.datetime.now().date())
         if memcache.get(memcache_key):
                 logging.info('already processed %s %s' %(city, countryCode))
                 return
         redundant=CityStats.gql('WHERE city = :1 and countryCode = :2', city, countryCode).fetch(1000)
         if len(redundant) == 1:
                 logging.info('no duplicates for %s %s' % (city, countryCode))
                 return
         logging.info('city: %s country: %s has duplidates: %s' %(city, countryCode, len(redundant)))
         pivot = max(redundant, key=redundant.count)	
         logging.info('max stats:%s' % pivot.count)
         redundant.remove(pivot)
         for cc in redundant:
                 pivot.count += cc.count
                 if cc.dateUpdated is not None and (pivot.dateUpdated is None or cc.dateUpdated > pivot.dateUpdated):
                         pivot.dateUpdated = cc.dateUpdated
                 cc.delete()
         pivot.put()
         memcache.set(memcache_key, '1')
         logging.info('new count %s' % pivot.count)
Пример #4
0
    def aggregateData(self, currentSession, upper_limit_date):
        logging.info("agregate data for %s" % currentSession.date)
        locationData = RequestUtils.ipResolverAPI(currentSession.ip)
        if len(locationData) == 2:
            logging.info("updating location data")
            city = locationData[0]
            countryCode = locationData[1]

            # self.response.out.write('location api response:<BR> city : %s; country: %s ' % (locationData[1], locationData[3]))
            logging.info("location api response:  %s " % locationData)
            userLocation = UserLocationModel()
            userLocation.user = currentSession.instaright_account
            userLocation.city = city
            userLocation.countryCode = countryCode
            userLocation.date = currentSession.date
            userLocation.put()
            # update country stats and city stats
            logging.info("country update")
            existingCountryStat = CountryStats.gql("WHERE countryCode = :1 ", countryCode).get()
            if existingCountryStat:
                # hack to avoid exception
                if existingCountryStat.count is None:
                    existingCountryStat.count = 1
                else:
                    existingCountryStat.count += 1
                logging.info("updating count %s" % existingCountryStat.count)
                existingCountryStat.dateUpdated = upper_limit_date
                existingCountryStat.put()
            else:
                logging.info("new country")
                countryStat = CountryStats()
                countryStat.countryCode = countryCode
                countryStat.count = 1
                countryStat.count = upper_limit_date
                countryStat.put()
            logging.info("city update")
            existingCityStat = CityStats.gql("WHERE city = :1 and countryCode = :2", city, countryCode).get()
            if existingCityStat:
                # hack to avoid exception
                if existingCityStat.count is None:
                    existingCityStat.count = 1
                else:
                    existingCityStat.count += 1
                existingCityStat.dateUpdated = upper_limit_date
                logging.info("updating count %s" % existingCityStat.count)
                existingCityStat.put()
            else:
                logging.info("new city")
                cityStat = CityStats()
                cityStat.countryCode = countryCode
                cityStat.city = city
                cityStat.count = 1
                cityStat.updateDate = upper_limit_date
                cityStat.put()

        existingLinkStat = LinkStats.gql("WHERE link = :1", currentSession.url).get()
        logging.info("link stats update")
        if existingLinkStat:
            logging.info("new link %s" % currentSession.url)
            existingLinkStat.count = existingLinkStat.count + 1
            existingLinkStat.countUpdated = currentSession.date
            existingLinkStat.lastUpdatedBy = currentSession.instaright_account
            existingLinkStat.put()
        else:
            logging.info("updating link stats: %s" % currentSession.url)
            linkStat = LinkStats()
            linkStat.link = currentSession.url
            linkStat.count = 1
            linkStat.countUpdated = currentSession.date
            linkStat.lastUpdatedBy = currentSession.instaright_account
            linkStat.put()
        # domain update
        shouldUpdateSession = 0
        mode = ""
        if currentSession.domain is None or currentSession.domain == "":
            currentSession.domain = RequestUtils.getDomain(currentSession.url)
            shouldUpdateSession = 1
            mode = "domain change: %s" % currentSession.domain
        if currentSession.date is None or currentSession.date == "":
            date = datetime.datetime.strptime("2009-11-15", "%Y-%m-%d").date()
            currentSession.date = date
            shouldUpdateSession = 2
            mode = "date change: %s" % date
        if shouldUpdateSession > 0:
            logging.info("updating session mode: %s" % mode)
            currentSession.put()
        else:
            logging.info("unchanged session")
        logging.info("done data aggregation")