示例#1
0
        def post(self):
                url=self.request.get('url',None)
                if url is None:
                        logging.info('no url no recommendations')
                        return
		url = url.encode('utf-8')
                logging.info('getting url hash %s' %url)
                url_hash = LinkUtils.getUrlHash(url)
                if url_hash is None:
                        logging.error("can't determing url hash %s" % url)
                        return
                try:
                        l = Links.gql('WHERE url_hash = :1' , url_hash).get()
                        if l is None:
                                l = Links.gql('WHERE url = :1' , url).get()
                except:
                        l = None
                if l is None:
                        logging.info('no link saved with url %s' % url)
                        l = Links()
                        l.url  = url
                        l.url_hash = url_hash
                        l.put()
                api_call= 'http://api.zemanta.com/services/rest/0.0/'
                args ={'method': 'zemanta.suggest',
                               'api_key': self.z_key,
                               'text': url,
                               'return_categories': 'dmoz',
                               'format': 'json'}
                args_enc = urllib.urlencode(args)
                json= None
                result=None
                try:
                        result = urlfetch.fetch(url=api_call, payload=args_enc,method = urlfetch.POST, headers={'Content-Type': 'application/x-www-form-urlencoded'})
                        json = simplejson.loads(result.content)
                except:
                        logging.info('bad json data from zemanta: %s' % result)

                if json is None or json['status'] != 'ok':
                        logging.info('error while fetching recommendations')
                        return
                articles = json['articles']
                #TODO apply DMOZ categories
                categories = json['categories']
                #relevant_articles = [ (c["title"], c["url"]) for c in articles if c["confidence"] > 0.01 ]
                relevant_articles = [ (c["title"], c["url"]) for c in articles ]
                l.recommendation=str(simplejson.dumps(relevant_articles[0:4]))
                if l.url_hash is None:
                        l.url_hash = url_hash
                l.put()
示例#2
0
	def get(self, url_hash, title):
                try:
                        self.redirect_perm()
                        self.get_user()
                        url_hash = urllib.unquote(url_hash)
		        logging.info('url hash: %s' % url_hash)
                        logging.info('category screen_name %s' %self.screen_name)
			category=None
                        if self.avatar is None:
                                self.avatar='/static/images/noavatar.png'

                        sessionModel = SessionModel.gql('where url_encode26 = :1', url_hash).get()
                        if sessionModel is None:
                                logging.info('not article with hash %s ... redirecting' % url_hash)
                                self.redirect('/')
                                return
                        generated_title =  LinkUtils.make_title(sessionModel.title)
                        if title != generated_title:
                                self.redirect('/article/'+url_hash+'/'+generated_title)
                                return
                        instaright_link =  LinkUtils.generate_instaright_link(url_hash, generated_title)
                        links = Links.gql('where url_hash = :1', url_hash).get()
			userUtil = UserUtils()
                        if links is not None:
                                category = links.categories
                        sessionTitle = LinkUtils.generateUrlTitle(sessionModel.title)
                        template_variables = {'page_footer': PageUtils.get_footer(), 'user':self.screen_name, 'logout_url':'/account/logout', 'avatar':self.avatar,'story_avatar': userUtil.getAvatar(sessionModel.instaright_account), 'story_user': sessionModel.instaright_account, 'domain': sessionModel.domain, 'title':sessionModel.title, 'link': sessionModel.url, 'updated':sessionModel.date, 'id': str(sessionModel.key()), 'instaright_link': instaright_link, 'category': LinkUtils.getLinkCategoryHTML(sessionModel), 'dd': LinkUtils.generate_domain_link(sessionModel.domain)}
		        path = os.path.join(os.path.dirname(__file__), 'templates/article.html')
                        self.response.headers["Content-Type"] = "text/html; charset=utf-8"
		        self.response.out.write(template.render(path, template_variables))
                except:
                        e,e0 = sys.exc_info()[0], sys.exc_info()[1]
                        logging.error('handled error : %s, %s ' %( e, e0 ))
		        self.redirect('/')
示例#3
0
 def get(self):
         url_hash=self.request.get('url_hash', None)
         if url_hash is None:
                 logging.info('no url cant provide rcmds')
                 return
         link=Links.gql('WHERE url_hash = :1' , url_hash).get()
         self.response.headers['Content-Type']="application/json"
         if link is None:
                 logging.info('unexisting link_hash %s, no recommendations' % url_hash)
                 self.response.out.write("{}")
                 return
         if link.recommendation is None:
                 logging.info('no recommendations started new job')
                 taskqueue.add(url='/link/recommendation/task', queue_name='default', params={'url_hash':url_hash })
                 self.response.out.write("{}")
         else:
                 logging.info(' transforming %s to json output ' % link.recommendation)
                 #self.response.out.write(link.recommendation)
                 self.response.out.write(simplejson.dumps(link.recommendation, default = lambda l: {'title': l[0], 'url': l[1]}))
示例#4
0
	def post(self):
		url = self.request.get('url', None)
		domain = self.request.get('domain', None)
		if url is None:
			logging.info('no url giving up')
			return
                if domain is None:
                        logging.info('no domain provided. giving up')
                        return
		link = None
		try:
			link = Links.gql('WHERE url = :1' , url).get()
		except:
			logging.info('error while fetching url from db')
		if link is None:
			link= Links()
		if link.categories is None:
			lh = LinkHandler()
			link=lh.delicious_data(url)	
			if link is None or link.categories is None:
				logging.info('no categories for link %s' % url)
				return
                CategoryUtil.processDomainCategories(link.categories, domain)
示例#5
0
 def get(self):
         order = self.request.get('order',None)
         l = Links()
         self.response.headers["Content-Type"]='text/plain'
         logging.info('get links')
         if order and hasattr(l,order): 
                 if order == 'diggs':
                         links = Links.gql('ORDER by diggs desc' ).fetch(100)
                 elif order == 'redditups':
                         links = Links.gql('ORDER by redditups desc' ).fetch(100)
                 elif order == 'all_score':
                         links = Links.gql('ORDER by all_score desc' ).fetch(100)
                 elif order == 'influence_score':
                         links = Links.gql('ORDER by influence_score desc' ).fetch(100)
                 elif order == 'facebook_like':
                         links = Links.gql('ORDER by facebook_like desc' ).fetch(100)
                 elif order == 'instaright_count':
                         links = Links.gql('ORDER by instaright_count desc' ).fetch(100)
                 elif order == 'overall_score':
                         links = Links.gql('ORDER by overall_score desc' ).fetch(100)
                 elif order == 'redditdowns':
                         links = Links.gql('ORDER by redditdowns desc' ).fetch(100)
                 elif order == 'tweets':
                         links = Links.gql('ORDER by tweets desc' ).fetch(100)
                 elif order == 'delicious_count':
                         links = Links.gql('ORDER by delicious_count desc' ).fetch(100)
                 else:
                         links = Links.gql('ORDER by overall_score desc').fetch(100)
         else:
                 links = Links.gql('ORDER by date_added desc, overall_score desc').fetch(100)
                 logging.info('pre link count: %s' %len(links))
                 order = 'overall_score'
         urls = [ (l.url, str(getattr(l,order)), str(l.date_updated)) for l in links  if l.url != RequestUtils.getDomain(l.url)]
         logging.info('link count: %s' %len(urls))
         if order and hasattr(l,order): 
                 template_variables = {'links' : urls }
 	        path= os.path.join(os.path.dirname(__file__), 'templates/top_links.html')
                 self.response.headers["Content-type"] = "text/html"
 	        self.response.out.write(template.render(path,template_variables))
示例#6
0
 def getEmbededInfo(cls, url_hash):
         l = Links.gql('WHERE url_hash = :1', url_hash).get()
         if l is None or l.embeded is None:
                 return None
         return l.embeded
示例#7
0
        def getAllData(self,url, count=0):

		domain = RequestUtils.getDomain(url)
		logging.info('from %s domain %s' %( url, domain))
		url=urllib2.quote(url.encode('utf-8'))
                url_hash = LinkUtils.getUrlHash(url)

                topsy_api='http://otter.topsy.com/stats.json?url=%s' % url
                tweet_meme_api='http://api.tweetmeme.com/url_info.json?url=%s' %url
                delicious_api='http://feeds.delicious.com/v2/json/urlinfo/data?url=%s&type=json' % url
                digg_api='http://services.digg.com/1.0/endpoint?method=story.getAll&link=%s&type=json' %url
                reddit_api='http://www.reddit.com/api/info.json?url=%s' %url
                facebook_api='https://api.facebook.com/method/fql.query?query=select%20%20like_count,share_count%20from%20link_stat%20where%20url=%22'+url+'%22&format=json'
                linkedin_api='http://www.linkedin.com/cws/share-count?url=%s' % url
		stumble_upon_api='http://www.stumbleupon.com/services/1.01/badge.getinfo?url=%s' %url
		buzz_api = 'https://www.googleapis.com/buzz/v1/activities/count?alt=json&url=%s' % url
		alternate_api='http://api.sharedcount.com/?url=%s' %url

		link = None
		alternate_twitter_score = None
		alternate_buzz_score = None
		alternate_digg_score = None
		alternate_facebook_share_score = None
		alternate_facebook_like_score = None
		alternate_su_score = None
                alternate_linkedin_score = None
		
		try:
                	link = Links.gql('WHERE url_hash  = :1', url_hash).get()
                        if link is None:
                	        link = Links.gql('WHERE url = :1', url).get()
		except BadValueError:
			logging.info('url property too long')
                if link is None:
                        link = Links()
			link.domain = domain
                        link.instapaper_count = Cast.toInt(count,0)
                        link.url = urllib2.unquote(url).decode('utf-8')
                        link.url_hash = LinkUtils.getUrlHash(link.url)
                        link.redditups = 0
                        link.redditdowns = 0
                        link.tweets = 0
                        link.diggs = 0
                        link.delicious_count = 0
                        link.overall_score = 0
                        link.shared = False
                else:
                        link.date_updated = datetime.datetime.now().date()
			link.domain = domain
                        if link.title:
                                link.title=link.title.strip()[:199]
                        if link.url_hash is None:
                                link.url_hash =url_hash 

		#relaxation 
		link.relaxation = 0

                logging.info('trying to fetch shared count info %s' %alternate_api )
                json = LinkUtils.getJsonFromApi(alternate_api)
                if json:
                        try:
                                alternate_twitter_score=Cast.toInt(json['Twitter'],0)
                                alternate_buzz_score=Cast.toInt(json['Buzz'],0)
                                alternate_digg_score=Cast.toInt(json['Diggs'],0)
                                facebook_info = LinkUtils.getJsonFieldSimple(json, "Facebook")
                                logging.info('facebook alternate info %s' % facebook_info)
                                if type(facebook_info) is int:
                                        alternate_facebook_share_score = Cast.toInt(facebook_info, 0)
                                elif type(facebook_info) is dict:
                                        logging.info('likes: %s' % LinkUtils.getJsonFieldSimple(facebook_info, "like_count"))
                                        logging.info('shares : %s' % LinkUtils.getJsonFieldSimple(facebook_info, "share_count"))
                                        alternate_facebook_like_score = Cast.toInt(LinkUtils.getJsonFieldSimple(facebook_info, "like_count"), 0)
                                        alternate_facebook_share_score = Cast.toInt(LinkUtils.getJsonFieldSimple(facebook_info, "share_count"), 0)
                                logging.info('alternate fb likes %s fb share %s ' % (alternate_facebook_like_score, alternate_facebook_share_score))
                                alternate_su_score=Cast.toInt(json['StumbleUpon'],0)
                                alternate_linkedin_score=Cast.toInt(json['LinkedIn'],0)

                        except KeyError:
                                e0, e1 = sys.exc_info()[0],sys.exc_info()[1]
                                logging.info('key error [[%s, %s]] in %s' %(e0, e1, json))

                logging.info('trying to fetch topsi info %s' %topsy_api)
                json = LinkUtils.getJsonFromApi(topsy_api)
                if json:
                        try:
                                link.influence_score=Cast.toInt(json['response']['influential'],0)
                                link.all_score=Cast.toInt(json['response']['all'],0)
                        except KeyError:
                                e0, e1 = sys.exc_info()[0],sys.exc_info()[1]
                                logging.info('key error [[%s, %s]] in %s' %(e0, e1, json))

                logging.info('trying to fetch digg info %s' %digg_api)
                json =LinkUtils.getJsonFromApi(digg_api)
                if json:
                        try:
                                link.diggs =Cast.toInt(json['count'],0)
				logging.info('diggs %s' %link.diggs)
                        except KeyError:
                                e0, e1 = sys.exc_info()[0],sys.exc_info()[1]
                                logging.info('key error [[%s, %s]] in %s' %(e0, e1, json))
		elif alternate_digg_score is not None:
			logging.info('using alternate digg score %s' % alternate_digg_score)
			link.diggs = alternate_digg_score
                if link.diggs is not None:
                        link.overall_score += link.diggs

                logging.info('trying to fetch tweet_meme info %s ' % tweet_meme_api )
                json = LinkUtils.getJsonFromApi(tweet_meme_api)
                if json and 'story' in json:
                        try:
                                link.tweets=Cast.toInt(json['story']['url_count'],0)
                                if json['story']['title'] is not None:
                                        link.title=json['story']['title'].strip()[:199]
			 	if 'excerpt' in json['story']:	
					logging.info('getting excerpt');
                                	link.excerpt = db.Text(unicode(json['story']['excerpt']))
				logging.info('tweets %s' % link.tweets)
                        except KeyError:
				link.relaxation = link.relaxation + 1
                                e0, e1 = sys.exc_info()[0],sys.exc_info()[1]
                                logging.info('key error [[%s, %s]] in %s' %(e0, e1, json))
		elif alternate_twitter_score is not None:
			logging.info('using altenate twitter score %s' % alternate_twitter_score)
			link.tweets = alternate_twitter_score
                if link.tweets is not None:
                	link.overall_score += self.tw_factor * link.tweets

                logging.info('trying to fetch delicious info %s ' % delicious_api)
                json =LinkUtils.getJsonFromApi(delicious_api)
                if json:
                        try:
                                if not link.title and json[0]['title']:
                                        link.title = json[0]['title'].strip()[:199]
                                link.categories = db.Text(unicode(simplejson.dumps(json[0]['top_tags'])))
                                link.delicious_count = Cast.toInt(json[0]['total_posts'],0)
				logging.info('delicious count %s' % link.delicious_count)
                                if link.delicious_count is not None:
                                        link.overall_score += link.delicious_count
                        except KeyError:
                                e0, e1 = sys.exc_info()[0],sys.exc_info()[1]
                                logging.info('key error [[%s, %s]] in %s' %(e0, e1, json))

                logging.info('trying to fetch reddit info %s' % reddit_api)
                json = LinkUtils.getJsonFromApi(reddit_api)
                if json and 'data' in json:
                        try:
                                data = [ x for x in json['data']['children']]
                                top_upped = sorted(data, key=lambda ups: ups['data']['ups'], reverse=True)
                                if top_upped:
                                     link.redditups = Cast.toInt(top_upped[0]['data']['ups'],0)
                                     link.redditdowns = Cast.toInt(top_upped[0]['data']['downs'],0)
                                     link.created = Cast.toInt(top_upped[0]['data']['created'],0)
				     logging.info('reddit ups %s' % link.redditups)
                                     if link.redditups is not None:
                                                link.overall_score += link.redditups
                                     if link.redditdowns is not None:
                                                link.overall_score -= link.redditdowns
                        except KeyError:
				link.relaxation = link.relaxation + 1
                                e0, e1 = sys.exc_info()[0],sys.exc_info()[1]
                                logging.info('key error [[%s, %s]] in %s' %(e0, e1, json))
                logging.info('trying to fetch facebook info %s' %facebook_api)
                json = LinkUtils.getJsonFromApi(facebook_api)
                if json:
                        try:
                                link.facebook_like=Cast.toInt(json[0]['like_count'], 0)
                                link.facebook_share=Cast.toInt(json[0]['share_count'], 0)
				logging.info('facebook likes %s' % link.facebook_like)
				logging.info('facebook share %s' % link.facebook_share)

                        except KeyError:
                                e0, e1 = sys.exc_info()[0],sys.exc_info()[1]
                                logging.info('request: %s == more info: key error [[%s, %s]] in %s' %(facebook_api, e0, e1, json))
		elif alternate_facebook_like_score is not None:
			logging.info('using alternate facebook like count %s' % alternate_facebook_like_score)
			link.facebook_like_score = alternate_facebook_like_score
		elif alternate_facebook_share_score is not None:
			logging.info('using alternate facebook share count %s' % alternate_facebook_share_score)
			link.facebook_share = alternate_facebook_share_score
                if link.facebook_like is not None:
                        link.overall_score += self.fb_factor * link.facebook_like
                if link.facebook_share is not None:
                        link.overall_score += link.facebook_share

		logging.info('trying to fetch stumple upon link %s' % stumble_upon_api)
		json = LinkUtils.getJsonFromApi(stumble_upon_api)
		if json:
			try:
				link.stumble_upons = Cast.toInt(json['result']['views'], 0)
				logging.info('stumle_score %s' % link.stumble_upons)
				if not link.title and json['result']['title']:
                                        link.title = json['result']['title'].strip()[:199]
					logging.info('settting stumble title: %s' % link.title)
			except KeyError:
                                e0, e1 = sys.exc_info()[0],sys.exc_info()[1]
                                logging.info('request: %s == more info: key error [[%s, %s]] in %s' %(stumble_upon_api, e0, e1, json))
		elif alternate_su_score is not None:
			logging.info('using alternate su score %s' % alternate_su_score )
			link.stumble_upons = alternate_su_score
		if link.stumble_upons is not None:
			link.overall_score += link.stumble_upons

		# specific from linkedin since response is in jsonp
		logging.info('trying to fetch linkedin upon link %s' % linkedin_api)
		try:
                        dta = urllib2.urlopen(linkedin_api)
			res = dta.read()
			res = res.replace('IN.Tags.Share.handleCount(','')
			res = res.replace(');','')
			json = simplejson.loads(res)
			
			link.linkedin_share = Cast.toInt(json['count'], 0)
			logging.info('linked in shares %s' % link.linkedin_share)
			if link.linkedin_share is not None:
					link.overall_score += link.linkedin_share
		except:
                                e0, e1 = sys.exc_info()[0],sys.exc_info()[1]
                                logging.info('request: %s == more info: [[%s, %s]] in %s' %(linkedin_api, e0, e1, json))

                if link.linkedin_share is None and alternate_linkedin_score is not None:
                        logging.info('using alternate linkedin score %s' % alternate_linkedin_score)
                        link.linkedin_share = alternate_linkedin_score
                        link.overall_score += alternate_linkedin_score

		logging.info('trying to fetch buzz upon link %s' % buzz_api)
		json = LinkUtils.getJsonFromApi(buzz_api)
		if json:
			try:
				link.buzz_count = Cast.toInt(json['data']['counts']["%s"][0]["count"] % url, 0)
				logging.info('buzz share %s' % link.buzz_count)
			except KeyError:
                                e0, e1 = sys.exc_info()[0],sys.exc_info()[1]
                                logging.info('request: %s == more info: key error [[%s, %s]] in %s' %(stumble_upon_api, e0, e1, json))
		elif alternate_buzz_score is not None:
			logging.info('using alternate buzz score %s' % alternate_buzz_score)
			link.buzz = alternate_buzz_score
		if link.buzz_count is not None:
			link.overall_score += link.buzz_count

                return link
示例#8
0
        def post(self):
		url=self.request.get('url',None)
                url_hash = LinkUtils.getUrlHash(url)
                if url is None:
                        logging.info('no link in request. skipping')
                        return
                category_api='http://access.alchemyapi.com/calls/url/URLGetCategory?apikey=%s&url=%s&outputMode=json' %(self.alchemy_key, urllib2.quote(url.encode('utf-8')))
                logging.info('trying to fetch shared count info %s' %category_api)
                link=None
                language=None
                category=None

		try:
                	link = Links.gql('WHERE url_hash = :1', url_hash).get()
                        if link is None:
                	        link = Links.gql('WHERE url = :1', url).get()
		except BadValueError:
			logging.info('url property too long')
                if link is None:
                        link = Links()
                else:
                        link.date_updated = datetime.datetime.now().date()
                json = LinkUtils.getJsonFromApi(category_api)
                if json is None:
                        logging.info('alchemy api returned no category.skipping')
                        return
                try:
                    language=json['language']
                    category=json['category']
                    score=Cast.toFloat(json['score'],0)
                    if score is not None and score > 0.5 and category is not None:
                            logging.info('category %s score %s' %(category, score))
                            cats=category.split("_")
                            if cats is None:
                                    logging.info('no categories. exit')
                                    return
                            memcache_key=url_hash+'_category'
                            current_categories=memcache.get(memcache_key)
                            merge_cat=[]
                            if current_categories is not None:
                                    logging.info('merging with existing cats %s' %current_categories)
                                    merge_cat.extend(current_categories)
                                    merge_cat.extend(cats)
                            else: 
                                    merge_cat=cats 
                            model=None 
                            try: 
                                    model=SessionModel.gql('WHERE url_hash = :1 order by date desc', url).get() 
                                    if model is None:
                                        model=SessionModel.gql('WHERE url = :1 order by date desc', url).get()
                            except BadValueError:
                                logging.info('url too long ... %s' %url)
                            if model is None:
                                logging.info('model not defined ... skipping')
                                return

                            linkDetail=Links.gql('WHERE url_hash = :1' , url_hash).get()
                            if linkDetail is None:
                                linkDetail=Links.gql('WHERE url = :1' , url).get()
                            if linkDetail is not None and linkDetail.categories is not None:
                                    logging.info('category found from link details %s' % linkDetail.categories)
                                    delic_cats=eval(linkDetail.categories)
                                    d_cats=[ c for c in  delic_cats ]
                                    merge_cat.extend(d_cats)
                            merge_cat=set(merge_cat)
                            logging.info('caching cats %s for url %s' %(merge_cat, url))
                            memcache.set(memcache_key, list(set(merge_cat))[:4])

                            for c in merge_cat:
                                taskqueue.add(queue_name='message-broadcast-queue', url='/category/stream', params={'category':c, 'url': url_hash})
                                existingLinkCat = LinkCategory.gql('WHERE url_hash = :1 and category = :2', url_hash, c).get()
                                if existingLinkCat is None:
                                        existingLinkCat = LinkCategory.gql('WHERE url = :1 and category = :2', url, c).get()
                                if existingLinkCat is not None:
                                        existingLinkCat.updated=datetime.datetime.now()
                                        if existingLinkCat.url_hash is None:
                                                existingLinkCat.url_hash = url_hash
                                        existingLinkCat.put()
                                        logging.info('updated exisitng url(%s) category(%s) update time %s' % (url, c, existingLinkCat.updated))
                                else:
                                        logging.info('new pair: url%s) category(%s) ' % (url, c))
                                        linkCategory=LinkCategory()
                                        linkCategory.url=url
                                        linkCategory.url_hash = url_hash
                                        linkCategory.category=c
                                        if model is not None:
                                                linkCategory.model_details=model.key()
                                        linkCategory.put()

                    if language is not None:
                            link.language = language
                    link.url=url
                    link.url_hash=url_hash
                    link.put()
                except KeyError:
                    e0, e1 = sys.exc_info()[0],sys.exc_info()[1]
                    logging.info('key error [[%s, %s]] in %s' %(e0, e1, json))
示例#9
0
 def update_link(self, url, link):
     existingLink = None
     url_hash = LinkUtils.getUrlHash(url)
     link.url_hash = url_hash
     # qfix for title TODO: find proper solution
     if link.title is not None:
         link.title = link.title.strip()[:199]
     try:
         existingLink = Links.gql("WHERE url_hash  = :1", url_hash).get()
         if existingLink is None:
             existingLink = Links.gql("WHERE url = :1", url).get()
     except:
         logging.info("bad value for url %s" % url)
     if existingLink is not None:
         existingLink.date_updated = link.date_updated
         existingLink.influence_score = link.influence_score
         existingLink.instapaper_count = link.instapaper_count
         existingLink.instaright_count = link.instaright_count
         existingLink.redditups = link.redditups
         existingLink.redditdowns = link.redditdowns
         existingLink.tweets = link.tweets
         existingLink.diggs = link.diggs
         existingLink.excerpt = link.excerpt
         existingLink.categories = link.categories
         existingLink.delicious_count = link.delicious_count
         existingLink.facebook_like = link.facebook_like
         existingLink.domain = link.domain
         if existingLink.url_hash is None:
             existingLink.url_hash = url_hash
         if link.title is not None:
             existingLink.title = link.title.strip()[:199]
         # if increase in score is more then 20%
         if (
             existingLink.overall_score is None
             or existingLink.overall_score == 0
             or link.overall_score / existingLink.overall_score >= 1.2
         ):
             existingLink.shared = False
         existingLink.overall_score = link.overall_score
         existingLink.put()
     else:
         # greater probability for db timeout of new links
         try:
             while True:
                 timeout_ms = 100
                 try:
                     link.put()
                     break
                 except datastore_errors.Timeout:
                     time.sleep(timeout_ms)
                     timeout_ms *= 2
         except apiproxy_errors.DeadlineExceededError:
             logging.info("run out of retries for writing to db")
     logging.info(
         "url %s : influence_score %s, instapaper_count %s, redditups %s, redditdowns %s, tweets %s, diggs %s, delicious count %s facebook like %s"
         % (
             url,
             link.influence_score,
             link.instapaper_count,
             link.redditups,
             link.redditdowns,
             link.tweets,
             link.diggs,
             link.delicious_count,
             link.facebook_like,
         )
     )
示例#10
0
    def post(self):

        url = self.request.get("url", None)
        url_hash = LinkUtils.getUrlHash(url)
        user = self.request.get("user", None)
        title = self.request.get("title", None)

        if url is None:
            logging.info("no url detected. skipping...")
            return
        count = 1
        url = urllib2.unquote(url)
        domain = RequestUtils.getDomain(url)
        if not domain or len(domain) == 0:
            self.response.out.write("not url: %s skipping!\n" % url)
            return
        if domain in self.skip_domains:
            logging.info("filering out %s" % url)
            return
        lu = LinkUtils()
        link = lu.getAllData(url, count)
        logging.info("link overall score: %s" % link.overall_score)
        existingLink = None
        try:
            existingLink = Links.gql("WHERE url_hash = :1", url_hash).get()
            if existingLink is None:
                existingLink = Links.gql("WHERE url = :1", url).get()
        except BadValueError:
            logging.info("bad value url %s" % url)
        klout_score = UserUtils.getKloutScore(user, self.klout_api_key)
        share_margin = self.tw_margin
        if klout_score is not None:
            link.overall_score = link.overall_score * int(klout_score)
            logging.info("adjusted overall score %s" % link.overall_score)
            share_margin = share_margin * self.klout_correction
            logging.info("adjusting twit margin: %s" % share_margin)

        logging.info("link score %s tweet margin %s ( existing %s )" % (link.overall_score, share_margin, existingLink))
        if link.overall_score > share_margin and (existingLink is None or not existingLink.shared):
            t = Twit()
            t.generate_content(link, title, "")
            # skip tweets is text emtpy and for root domains
            if t.text is None or LinkUtils.isRootDomain(link.url):
                logging.info("twit with no body. aborting")
                return
            execute_time = TaskUtil.execution_time()
            logging.info("scheduling tweet for %s" % str(execute_time))
            mail.send_mail(
                sender="*****@*****.**",
                to="*****@*****.**",
                subject="Twit to queue!",
                html="Twitt: %s <br> score: %s" % (t.text, link.overall_score),
                body="Twitt: %s <br> score: %s" % (t.text[:500], link.overall_score),
            )

            # taskqueue.add(url='/util/twitter/twit/task', eta=execute_time, queue_name='twit-queue', params={'twit':t.text})
            taskqueue.add(url="/util/twitter/twit/task", queue_name="twit-queue", params={"twit": t.text})
            # update article shared status
            if existingLink is not None:
                existingLink.shared = True
                existingLink.put()
            logging.info("updated link share status")
        else:
            logging.info("not scheduled for tweeting")
        lh = LinkHandler()
        lh.update_link(url, link)