def getsportbadge(self): midnight = datetime.datetime.now().date() currentCount=SessionModel.gql('where domain in :1 and date >= :2 and instaright_account = :3', self.sportDomains, midnight, self.user).count() categoryCount = LinkCategory.gql('WHERE category in :1 and date >= :2', self.sportCategories, midnight).fetch(1000) categoryRefined = [ lc for lc in categoryCount if lc.model_details.instaright_account == self.user ] cat_user_count = len(categoryRefined) logging.info('site specific badger(sport): fetched stats %s and category count %s' % (currentCount, cat_user_count)) if currentCount + cat_user_count >= self.sport_tresshold: logging.info('setting news badge for user %s ' %self.user) return 'sport' else: logging.info('for user %s still tresshold of %s still not reached %s' %(self.user, self.sport_tresshold, currentCount)) return None
def get(self, category): format=self.request.get('format',None) if category is None or category == 0: logging.info('not category in request. return empty') return if format == 'json': logging.info('catefory %s json feed' % category) userUtil = UserUtils() allentries = LinkCategory.gql('WHERE category = :1 order by updated desc', category).fetch(50) entries= [ e for e in allentries if hasattr(e,'model_details') and e.model_details is not None ] entries = entries[:10] self.response.headers['Content-Type'] = "application/json" self.response.out.write(simplejson.dumps(entries, default=lambda o: {'u':{'id':str(o.model_details.key()), 't':unicode(o.model_details.title), 'dd': LinkUtils.generate_domain_link(o.model_details.domain),'l':LinkUtils.generate_instaright_link(o.model_details.url_encode26, LinkUtils.make_title(o.model_details.title), o.model_details.url), 'd':o.model_details.domain, 'user': urllib.unquote(o.model_details.instaright_account), 'source': o.model_details.client, 'html_lc':LinkUtils.getLinkCategoryHTML(o), 'u': o.updated.strftime("%Y-%m-%dT%I:%M:%SZ"), 'a':userUtil.getAvatar(o.model_details.instaright_account),'ol':o.url,'c':category, 'lc':LinkUtils.getLinkCategory(o.model_details)}})) return self.response.headers['Content-Type'] = "application/json" self.response.out.write("[{}]")
def get(self, c): logging.info('updates for category %s' % c) lc=LinkCategory.gql('WHERE category = :1 order by updated desc', c).fetch(50) for l in lc: if hasattr(l,'model_details') and l.model_details is not None: logging.info('url %s already has details, skipping update' %l.url) continue logging.info('updating url details %s ' %l.url) s=SessionModel.gql('WHERE url = :1', l.url).get() if s is None: s=SessionModel.gql('WHERE feed_url = :1', l.url).get() if s is None: s=SessionModel.gql('WHERE feed_url = :1', l.url).get() if s is None: logging.info('ERROR: no session model url for %s' % l.url) continue logging.info('session model for url %s FOUND' %l.url) l.model_details=s.key() l.put()
def processLinkCategoriesFromJson(cls, categories, url): if categories is None or len(categories) == 0: logging.info('missing categories. skipping') return cat_dict = eval(categories) if len(cat_dict) == 0: logging.info('no categories. skipping') return for cat, cnt in cat_dict.iteritems(): existingCategory=LinkCategory.gql('WHERE category = :1 and url = :2' , cat, url).get() if existingCategory is None: logging.info('new category %s , init url %s' % (cat, url)) linkCategory = LinkCategory() linkCategory.category = cat linkCategory.url = url linkCategory.put() else: logging.info('updated time for category %s [ %s ]' % (cat, existingCategory.url)) existingCategory.updated = datetime.datetime.now() existingCategory.put()
def getLinkCategory(cls, link_model): category='' logging.info('looking category cache for url hash %s ( %s )' %(link_model.url_hash, link_model.url)) if link_model is None or link_model.url_hash is None: return category mem_key = link_model.url_hash+'_category' cached_category=memcache.get(mem_key) if cached_category is not None: logging.info('got category from cache %s' %cached_category) return ','.join(cached_category) linkCategory=None try: linkCategory=LinkCategory.gql('WHERE category != NULL and url_hash = :1 ' , link_model.url_hash).fetch(1000) except NotSavedError: logging.info('not saved key for url hash %s' % link_model.url_hash) if linkCategory is not None: logging.info('got %s categories for %s' %( len(linkCategory), link_model.url)) cats_tag=[ l.category for l in linkCategory if l.category is not None and len(l.category) > 2 ] category=list(set(cats_tag)) logging.info('got category from query %s' %category) memcache.set(mem_key, category) return ','.join(category)
def getLinkCategoryHTML(cls, link_model): category=None logging.info('looking category cache for url hash %s ( %s )' %(link_model.url_hash, link_model.url)) if link_model is None or link_model.url_hash is None: return category mem_key = link_model.url_hash+'_category' cached_category=memcache.get(mem_key) if cached_category is not None: logging.info('got category from cache %s' %cached_category) category=cached_category linkCategory=None try: linkCategory=LinkCategory.gql('WHERE category != NULL and url_hash = :1 ' , link_model.url_hash).fetch(1000) except NotSavedError: logging.info('not saved key for url hash %s' % link_model.url_hash) if linkCategory is not None and category is None: logging.info('got %s categories for %s' %( len(linkCategory), link_model.url)) cats_tag=[ l.category for l in linkCategory if l.category is not None and len(l.category) > 2 ] category=list(set(cats_tag)) logging.info('got category from query %s' %category) memcache.set(mem_key, category) #NOTE: static css , error html = [ "<span class=\"text_bubble_cats\"><a href=\"/category/"+c+"\">"+c+"</a></span>" for c in category ] return " ".join(html)
def get(self): allData=LinkCategory.getAll() all_categories= [ c.category for c in allData if c is not None ] uniq_categories = set(all_categories) for c in uniq_categories: logging.info('updates for category %s' % c) lc=LinkCategory.gql('WHERE category = :1 order by updated desc', c).fetch(50) for l in lc: if hasattr(l,'model_details') and l.model_details is not None: #logging.info('url %s already has details, skipping update' %l.url) continue logging.info('updating url details %s ' %l.url) s=SessionModel.gql('WHERE url = :1 order by date desc', l.url).get() if s is None: logging.info('no session model for url %s trying feed url' %l.url) s=SessionModel.gql('WHERE feed_url = :1', l.url).get() if s is None: logging.info('no session model for url %s trying shprt url' %l.url) s=SessionModel.gql('WHERE feed_url = :1', l.url).get() if s is None: logging.info('ERROR: no session model url for %s' % l.url) continue l.model_details=s.key() l.put()
def post(self): url=self.request.get('url',None) url_hash = LinkUtils.getUrlHash(url) if url is None: logging.info('no link in request. skipping') return category_api='http://access.alchemyapi.com/calls/url/URLGetCategory?apikey=%s&url=%s&outputMode=json' %(self.alchemy_key, urllib2.quote(url.encode('utf-8'))) logging.info('trying to fetch shared count info %s' %category_api) link=None language=None category=None try: link = Links.gql('WHERE url_hash = :1', url_hash).get() if link is None: link = Links.gql('WHERE url = :1', url).get() except BadValueError: logging.info('url property too long') if link is None: link = Links() else: link.date_updated = datetime.datetime.now().date() json = LinkUtils.getJsonFromApi(category_api) if json is None: logging.info('alchemy api returned no category.skipping') return try: language=json['language'] category=json['category'] score=Cast.toFloat(json['score'],0) if score is not None and score > 0.5 and category is not None: logging.info('category %s score %s' %(category, score)) cats=category.split("_") if cats is None: logging.info('no categories. exit') return memcache_key=url_hash+'_category' current_categories=memcache.get(memcache_key) merge_cat=[] if current_categories is not None: logging.info('merging with existing cats %s' %current_categories) merge_cat.extend(current_categories) merge_cat.extend(cats) else: merge_cat=cats model=None try: model=SessionModel.gql('WHERE url_hash = :1 order by date desc', url).get() if model is None: model=SessionModel.gql('WHERE url = :1 order by date desc', url).get() except BadValueError: logging.info('url too long ... %s' %url) if model is None: logging.info('model not defined ... skipping') return linkDetail=Links.gql('WHERE url_hash = :1' , url_hash).get() if linkDetail is None: linkDetail=Links.gql('WHERE url = :1' , url).get() if linkDetail is not None and linkDetail.categories is not None: logging.info('category found from link details %s' % linkDetail.categories) delic_cats=eval(linkDetail.categories) d_cats=[ c for c in delic_cats ] merge_cat.extend(d_cats) merge_cat=set(merge_cat) logging.info('caching cats %s for url %s' %(merge_cat, url)) memcache.set(memcache_key, list(set(merge_cat))[:4]) for c in merge_cat: taskqueue.add(queue_name='message-broadcast-queue', url='/category/stream', params={'category':c, 'url': url_hash}) existingLinkCat = LinkCategory.gql('WHERE url_hash = :1 and category = :2', url_hash, c).get() if existingLinkCat is None: existingLinkCat = LinkCategory.gql('WHERE url = :1 and category = :2', url, c).get() if existingLinkCat is not None: existingLinkCat.updated=datetime.datetime.now() if existingLinkCat.url_hash is None: existingLinkCat.url_hash = url_hash existingLinkCat.put() logging.info('updated exisitng url(%s) category(%s) update time %s' % (url, c, existingLinkCat.updated)) else: logging.info('new pair: url%s) category(%s) ' % (url, c)) linkCategory=LinkCategory() linkCategory.url=url linkCategory.url_hash = url_hash linkCategory.category=c if model is not None: linkCategory.model_details=model.key() linkCategory.put() if language is not None: link.language = language link.url=url link.url_hash=url_hash link.put() except KeyError: e0, e1 = sys.exc_info()[0],sys.exc_info()[1] logging.info('key error [[%s, %s]] in %s' %(e0, e1, json))