def get(self): allCities = CityStats.gql('order by count desc').fetch(5000) for c in allCities: memcache_key='city_queue'+c.city+'_'+c.countryCode+str(datetime.datetime.now().date()) if memcache.get(memcache_key): logging.info('processed city %s %s skipping' %( c.city, c.countryCode)) continue logging.info('adding to queue %s %s' % (c.countryCode, c.city)) taskqueue.add(queue_name='default',url='/stats/city/redundant/task', params={'city':c.city, 'country':c.countryCode}) memcache.set(memcache_key, 1)
def cityFeed(self, reqId): logging.info('City stats feed') cityStatsQ= CityStats.gql('WHERE count > 100 ORDER BY count desc ').fetch(80) if cityStatsQ is None: logging.info('Not enough data for graph') self.repsonse.out.write('Not enough data for graph') return cityStats = [ x for x in cityStatsQ if not "unknown" in x.city.lower() and x.countryCode != 'US' and x.countryCode != 'XX' ] logging.info('retrieved %s stats' % len(cityStats)) description = {"city_countryCode": ("string", "City Code"), "count":("number", "Count")} columnnames = [ "city_countryCode", "count" ] data_table = gviz_api.DataTable(description) cityCnt = [] for ctCnt in cityStats: entry = {"city_countryCode": ctCnt.city + ', ' + ctCnt.countryCode, "count":ctCnt.count} cityCnt.append(entry) data_table.LoadData(cityCnt) self.response.headers['Content-Type'] = 'text/plain' self.response.out.write(data_table.ToJSonResponse(columns_order=(columnnames) , req_id=reqId))
def post(self): city=self.request.get('city',None) countryCode=self.request.get('country',None) memcache_key='reduncant_city'+city+'_'+countryCode+'_'+str(datetime.datetime.now().date()) if memcache.get(memcache_key): logging.info('already processed %s %s' %(city, countryCode)) return redundant=CityStats.gql('WHERE city = :1 and countryCode = :2', city, countryCode).fetch(1000) if len(redundant) == 1: logging.info('no duplicates for %s %s' % (city, countryCode)) return logging.info('city: %s country: %s has duplidates: %s' %(city, countryCode, len(redundant))) pivot = max(redundant, key=redundant.count) logging.info('max stats:%s' % pivot.count) redundant.remove(pivot) for cc in redundant: pivot.count += cc.count if cc.dateUpdated is not None and (pivot.dateUpdated is None or cc.dateUpdated > pivot.dateUpdated): pivot.dateUpdated = cc.dateUpdated cc.delete() pivot.put() memcache.set(memcache_key, '1') logging.info('new count %s' % pivot.count)
def aggregateData(self, currentSession, upper_limit_date): logging.info("agregate data for %s" % currentSession.date) locationData = RequestUtils.ipResolverAPI(currentSession.ip) if len(locationData) == 2: logging.info("updating location data") city = locationData[0] countryCode = locationData[1] # self.response.out.write('location api response:<BR> city : %s; country: %s ' % (locationData[1], locationData[3])) logging.info("location api response: %s " % locationData) userLocation = UserLocationModel() userLocation.user = currentSession.instaright_account userLocation.city = city userLocation.countryCode = countryCode userLocation.date = currentSession.date userLocation.put() # update country stats and city stats logging.info("country update") existingCountryStat = CountryStats.gql("WHERE countryCode = :1 ", countryCode).get() if existingCountryStat: # hack to avoid exception if existingCountryStat.count is None: existingCountryStat.count = 1 else: existingCountryStat.count += 1 logging.info("updating count %s" % existingCountryStat.count) existingCountryStat.dateUpdated = upper_limit_date existingCountryStat.put() else: logging.info("new country") countryStat = CountryStats() countryStat.countryCode = countryCode countryStat.count = 1 countryStat.count = upper_limit_date countryStat.put() logging.info("city update") existingCityStat = CityStats.gql("WHERE city = :1 and countryCode = :2", city, countryCode).get() if existingCityStat: # hack to avoid exception if existingCityStat.count is None: existingCityStat.count = 1 else: existingCityStat.count += 1 existingCityStat.dateUpdated = upper_limit_date logging.info("updating count %s" % existingCityStat.count) existingCityStat.put() else: logging.info("new city") cityStat = CityStats() cityStat.countryCode = countryCode cityStat.city = city cityStat.count = 1 cityStat.updateDate = upper_limit_date cityStat.put() existingLinkStat = LinkStats.gql("WHERE link = :1", currentSession.url).get() logging.info("link stats update") if existingLinkStat: logging.info("new link %s" % currentSession.url) existingLinkStat.count = existingLinkStat.count + 1 existingLinkStat.countUpdated = currentSession.date existingLinkStat.lastUpdatedBy = currentSession.instaright_account existingLinkStat.put() else: logging.info("updating link stats: %s" % currentSession.url) linkStat = LinkStats() linkStat.link = currentSession.url linkStat.count = 1 linkStat.countUpdated = currentSession.date linkStat.lastUpdatedBy = currentSession.instaright_account linkStat.put() # domain update shouldUpdateSession = 0 mode = "" if currentSession.domain is None or currentSession.domain == "": currentSession.domain = RequestUtils.getDomain(currentSession.url) shouldUpdateSession = 1 mode = "domain change: %s" % currentSession.domain if currentSession.date is None or currentSession.date == "": date = datetime.datetime.strptime("2009-11-15", "%Y-%m-%d").date() currentSession.date = date shouldUpdateSession = 2 mode = "date change: %s" % date if shouldUpdateSession > 0: logging.info("updating session mode: %s" % mode) currentSession.put() else: logging.info("unchanged session") logging.info("done data aggregation")