Пример #1
0
        def getsportbadge(self):
               midnight = datetime.datetime.now().date()
               currentCount=SessionModel.gql('where domain in :1 and date >= :2 and instaright_account = :3', self.sportDomains, midnight, self.user).count()
	       categoryCount = LinkCategory.gql('WHERE category in :1 and date >= :2', self.sportCategories, midnight).fetch(1000)
	       categoryRefined = [ lc for lc in categoryCount if lc.model_details.instaright_account == self.user ]
	       cat_user_count = len(categoryRefined)
               logging.info('site specific badger(sport): fetched stats %s and category count %s' % (currentCount, cat_user_count))
               if currentCount + cat_user_count >= self.sport_tresshold:
                        logging.info('setting news badge for user %s ' %self.user)
                        return 'sport'
               else:
                        logging.info('for user %s still tresshold of %s still not reached %s' %(self.user, self.sport_tresshold, currentCount))
                        return None
Пример #2
0
        def get(self, category):
                format=self.request.get('format',None)
                if category is None or category == 0:
                        logging.info('not category in request. return empty')
                        return
                if format == 'json':
                        logging.info('catefory %s json feed' % category)
                        userUtil = UserUtils()
                        allentries = LinkCategory.gql('WHERE category = :1 order by updated desc', category).fetch(50)
                        entries= [ e for e in allentries if hasattr(e,'model_details') and e.model_details is not None ]
                        entries = entries[:10]
			self.response.headers['Content-Type'] = "application/json"
                        self.response.out.write(simplejson.dumps(entries, default=lambda o: {'u':{'id':str(o.model_details.key()), 't':unicode(o.model_details.title), 'dd': LinkUtils.generate_domain_link(o.model_details.domain),'l':LinkUtils.generate_instaright_link(o.model_details.url_encode26, LinkUtils.make_title(o.model_details.title), o.model_details.url), 'd':o.model_details.domain, 'user': urllib.unquote(o.model_details.instaright_account), 'source': o.model_details.client, 'html_lc':LinkUtils.getLinkCategoryHTML(o), 'u': o.updated.strftime("%Y-%m-%dT%I:%M:%SZ"), 'a':userUtil.getAvatar(o.model_details.instaright_account),'ol':o.url,'c':category, 'lc':LinkUtils.getLinkCategory(o.model_details)}}))
			return
                self.response.headers['Content-Type'] = "application/json"
                self.response.out.write("[{}]")
Пример #3
0
 def get(self, c):
        logging.info('updates for category %s' % c)
        lc=LinkCategory.gql('WHERE category = :1 order by updated desc', c).fetch(50)
        for l in lc:
             if hasattr(l,'model_details') and l.model_details is not None:
                  logging.info('url %s already has details, skipping update' %l.url)
                  continue
             logging.info('updating url details %s ' %l.url)
             s=SessionModel.gql('WHERE url = :1', l.url).get()
             if s is None:
                  s=SessionModel.gql('WHERE feed_url = :1', l.url).get()
             if s is None:
                  s=SessionModel.gql('WHERE feed_url = :1', l.url).get()
             if s is None:
                  logging.info('ERROR: no session model url for %s' % l.url)
                  continue
             logging.info('session model for url %s FOUND' %l.url)
             l.model_details=s.key()
             l.put()
Пример #4
0
        def processLinkCategoriesFromJson(cls, categories, url):
                if categories is None or len(categories) == 0:
                        logging.info('missing categories. skipping')
                        return
		cat_dict = eval(categories)
		if len(cat_dict) == 0:
			logging.info('no categories. skipping') 
			return 
		for cat, cnt in cat_dict.iteritems():
                        existingCategory=LinkCategory.gql('WHERE category = :1 and url = :2' , cat, url).get()
			if existingCategory is None:
				logging.info('new category %s , init url %s' % (cat, url))
				linkCategory = LinkCategory()
				linkCategory.category = cat
				linkCategory.url = url
				linkCategory.put()
			else:
                                logging.info('updated time for category %s [ %s ]' % (cat, existingCategory.url))
                                existingCategory.updated = datetime.datetime.now()
				existingCategory.put()
Пример #5
0
 def getLinkCategory(cls, link_model):
         category=''
         logging.info('looking category cache for url hash %s ( %s )' %(link_model.url_hash, link_model.url))
         if link_model is None or link_model.url_hash is None:
                 return category
         mem_key = link_model.url_hash+'_category'
         cached_category=memcache.get(mem_key)
         if cached_category is not None:
                 logging.info('got category from cache %s' %cached_category)
                 return ','.join(cached_category)
         linkCategory=None
         try:
                 linkCategory=LinkCategory.gql('WHERE category != NULL and url_hash = :1 ' , link_model.url_hash).fetch(1000)
         except NotSavedError:
                 logging.info('not saved key for url hash %s' % link_model.url_hash)
         if linkCategory is not None:
                 logging.info('got %s categories for %s' %( len(linkCategory), link_model.url))
                 cats_tag=[ l.category  for l in linkCategory if l.category is not None and len(l.category) > 2 ]
                 category=list(set(cats_tag))
                 logging.info('got category from query %s' %category)
                 memcache.set(mem_key, category)
         return ','.join(category)
Пример #6
0
 def getLinkCategoryHTML(cls, link_model):
         category=None
         logging.info('looking category cache for url hash %s ( %s )' %(link_model.url_hash, link_model.url))
         if link_model is None or link_model.url_hash is None:
                 return category
         mem_key = link_model.url_hash+'_category'
         cached_category=memcache.get(mem_key)
         if cached_category is not None:
                 logging.info('got category from cache %s' %cached_category)
                 category=cached_category
         linkCategory=None
         try:
                 linkCategory=LinkCategory.gql('WHERE category != NULL and url_hash = :1 ' , link_model.url_hash).fetch(1000)
         except NotSavedError:
                 logging.info('not saved key for url hash %s' % link_model.url_hash)
         if linkCategory is not None and category is None:
                 logging.info('got %s categories for %s' %( len(linkCategory), link_model.url))
                 cats_tag=[ l.category  for l in linkCategory if l.category is not None and len(l.category) > 2 ]
                 category=list(set(cats_tag))
                 logging.info('got category from query %s' %category)
                 memcache.set(mem_key, category)
         #NOTE: static css , error
         html = [ "<span class=\"text_bubble_cats\"><a href=\"/category/"+c+"\">"+c+"</a></span>" for c in category ]
         return " ".join(html)
Пример #7
0
 def get(self):
         allData=LinkCategory.getAll()
         all_categories= [ c.category for c in allData if c is not None ]
         uniq_categories = set(all_categories)
         for c in uniq_categories:
                 logging.info('updates for category %s' % c)
                 lc=LinkCategory.gql('WHERE category = :1 order by updated desc', c).fetch(50)
                 for l in lc:
                         if hasattr(l,'model_details') and l.model_details is not None:
                                 #logging.info('url %s already has details, skipping update' %l.url)
                                 continue
                         logging.info('updating url details %s ' %l.url)
                         s=SessionModel.gql('WHERE url = :1 order by date desc', l.url).get()
                         if s is None:
                                 logging.info('no session model for url %s trying feed url' %l.url)
                         s=SessionModel.gql('WHERE feed_url = :1', l.url).get()
                         if s is None:
                                 logging.info('no session model for url %s trying shprt url' %l.url)
                         s=SessionModel.gql('WHERE feed_url = :1', l.url).get()
                         if s is None:
                                 logging.info('ERROR: no session model url for %s' % l.url)
                                 continue
                         l.model_details=s.key()
                         l.put()
Пример #8
0
        def post(self):
		url=self.request.get('url',None)
                url_hash = LinkUtils.getUrlHash(url)
                if url is None:
                        logging.info('no link in request. skipping')
                        return
                category_api='http://access.alchemyapi.com/calls/url/URLGetCategory?apikey=%s&url=%s&outputMode=json' %(self.alchemy_key, urllib2.quote(url.encode('utf-8')))
                logging.info('trying to fetch shared count info %s' %category_api)
                link=None
                language=None
                category=None

		try:
                	link = Links.gql('WHERE url_hash = :1', url_hash).get()
                        if link is None:
                	        link = Links.gql('WHERE url = :1', url).get()
		except BadValueError:
			logging.info('url property too long')
                if link is None:
                        link = Links()
                else:
                        link.date_updated = datetime.datetime.now().date()
                json = LinkUtils.getJsonFromApi(category_api)
                if json is None:
                        logging.info('alchemy api returned no category.skipping')
                        return
                try:
                    language=json['language']
                    category=json['category']
                    score=Cast.toFloat(json['score'],0)
                    if score is not None and score > 0.5 and category is not None:
                            logging.info('category %s score %s' %(category, score))
                            cats=category.split("_")
                            if cats is None:
                                    logging.info('no categories. exit')
                                    return
                            memcache_key=url_hash+'_category'
                            current_categories=memcache.get(memcache_key)
                            merge_cat=[]
                            if current_categories is not None:
                                    logging.info('merging with existing cats %s' %current_categories)
                                    merge_cat.extend(current_categories)
                                    merge_cat.extend(cats)
                            else: 
                                    merge_cat=cats 
                            model=None 
                            try: 
                                    model=SessionModel.gql('WHERE url_hash = :1 order by date desc', url).get() 
                                    if model is None:
                                        model=SessionModel.gql('WHERE url = :1 order by date desc', url).get()
                            except BadValueError:
                                logging.info('url too long ... %s' %url)
                            if model is None:
                                logging.info('model not defined ... skipping')
                                return

                            linkDetail=Links.gql('WHERE url_hash = :1' , url_hash).get()
                            if linkDetail is None:
                                linkDetail=Links.gql('WHERE url = :1' , url).get()
                            if linkDetail is not None and linkDetail.categories is not None:
                                    logging.info('category found from link details %s' % linkDetail.categories)
                                    delic_cats=eval(linkDetail.categories)
                                    d_cats=[ c for c in  delic_cats ]
                                    merge_cat.extend(d_cats)
                            merge_cat=set(merge_cat)
                            logging.info('caching cats %s for url %s' %(merge_cat, url))
                            memcache.set(memcache_key, list(set(merge_cat))[:4])

                            for c in merge_cat:
                                taskqueue.add(queue_name='message-broadcast-queue', url='/category/stream', params={'category':c, 'url': url_hash})
                                existingLinkCat = LinkCategory.gql('WHERE url_hash = :1 and category = :2', url_hash, c).get()
                                if existingLinkCat is None:
                                        existingLinkCat = LinkCategory.gql('WHERE url = :1 and category = :2', url, c).get()
                                if existingLinkCat is not None:
                                        existingLinkCat.updated=datetime.datetime.now()
                                        if existingLinkCat.url_hash is None:
                                                existingLinkCat.url_hash = url_hash
                                        existingLinkCat.put()
                                        logging.info('updated exisitng url(%s) category(%s) update time %s' % (url, c, existingLinkCat.updated))
                                else:
                                        logging.info('new pair: url%s) category(%s) ' % (url, c))
                                        linkCategory=LinkCategory()
                                        linkCategory.url=url
                                        linkCategory.url_hash = url_hash
                                        linkCategory.category=c
                                        if model is not None:
                                                linkCategory.model_details=model.key()
                                        linkCategory.put()

                    if language is not None:
                            link.language = language
                    link.url=url
                    link.url_hash=url_hash
                    link.put()
                except KeyError:
                    e0, e1 = sys.exc_info()[0],sys.exc_info()[1]
                    logging.info('key error [[%s, %s]] in %s' %(e0, e1, json))