def post(self): payloads = [Payload(simple_url['url'],simple_url['user_id']) for simple_url in eval(self.request.get('data'))] cached_urls = Url.get_by_key_name([payload.url for payload in payloads], _storage = [LOCAL,MEMCACHE], _result_type = NAME_DICT) user_ban_list = Banlist.retrieve(_storage=[LOCAL,MEMCACHE,DATASTORE], _local_expiration=time_util.minute_expiration(minutes=10)).users fetch_targets = [] #Urls that are not in lookup list counter_targets = [] #Product urls that were fetched before for payload in payloads: if payload.user_id in user_ban_list: #Don't take banned users' URLs into account continue #Look for existing cached instance with same short_url cached_url = cached_urls[payload.url] if cached_url is not None: if cached_url.is_product: #cached url points to a valid product page counter_targets.append(Payload(cached_url.product_url, payload.user_id)) else: fetch_targets.append(payload) if len(fetch_targets): urlfetch_payload = Payload.serialize(fetch_targets) enqueue_url_fetch(urlfetch_payload) if len(counter_targets): counter_payload = Payload.serialize(counter_targets) enqueue_counter(counter_payload)
def post(self): logging.info('UrlFetchWorker started') payloads = Payload.deserialize(self.request.get('payload')) product_ban_list = Banlist.retrieve( _storage=[LOCAL, MEMCACHE, DATASTORE], _local_expiration=time_util.minute_expiration(minutes=10)).products fetch_targets = list(set([payload.url for payload in payloads])) result_dict = UrlFetcher.fetch_urls(fetch_targets) urls = [] counter_targets = [] for payload in payloads: request_url = payload.url final_url = result_dict[request_url] user_id = payload.user_id urls.append( Url(key_name=request_url, final_url=final_url, user_id=user_id)) for url in urls: if url.final_url is not None: try: product_url = AmazonURLParser.product_url(url.final_url) if product_url in product_ban_list: logging.info( 'Mention creation prevented for banned product url: %s' % product_url) continue #no action for banned product url.is_product = True #No exceptions for product_url => valid product reference counter_targets.append(Payload(product_url, url.user_id)) except ParserException: pass logging.info('UrlFetchWorker finished, counter targets: %s' % counter_targets) pdb.put(urls, _storage=[LOCAL, MEMCACHE]) #Urls are stored in cache only if len(counter_targets): enqueue_counter(Payload.serialize(counter_targets))
def post(self): logging.info('UrlFetchWorker started') payloads = Payload.deserialize(self.request.get('payload')) product_ban_list = Banlist.retrieve(_storage=[LOCAL,MEMCACHE,DATASTORE], _local_expiration=time_util.minute_expiration(minutes=10)).products fetch_targets = list(set([payload.url for payload in payloads])) result_dict = UrlFetcher.fetch_urls(fetch_targets) urls = [] counter_targets = [] for payload in payloads: request_url = payload.url final_url = result_dict[request_url] user_id = payload.user_id urls.append(Url(key_name=request_url, final_url=final_url, user_id = user_id)) for url in urls: if url.final_url is not None: try: product_url = AmazonURLParser.product_url(url.final_url) if product_url in product_ban_list: logging.info('Mention creation prevented for banned product url: %s' %product_url) continue #no action for banned product url.is_product = True #No exceptions for product_url => valid product reference counter_targets.append(Payload(product_url,url.user_id)) except ParserException: pass logging.info('UrlFetchWorker finished, counter targets: %s' %counter_targets) pdb.put(urls, _storage = [LOCAL,MEMCACHE]) #Urls are stored in cache only if len(counter_targets): enqueue_counter(Payload.serialize(counter_targets))
def post(self): payloads = [ Payload(simple_url['url'], simple_url['user_id']) for simple_url in eval(self.request.get('data')) ] cached_urls = Url.get_by_key_name( [payload.url for payload in payloads], _storage=[LOCAL, MEMCACHE], _result_type=NAME_DICT) user_ban_list = Banlist.retrieve( _storage=[LOCAL, MEMCACHE, DATASTORE], _local_expiration=time_util.minute_expiration(minutes=10)).users fetch_targets = [] #Urls that are not in lookup list counter_targets = [] #Product urls that were fetched before for payload in payloads: if payload.user_id in user_ban_list: #Don't take banned users' URLs into account continue #Look for existing cached instance with same short_url cached_url = cached_urls[payload.url] if cached_url is not None: if cached_url.is_product: #cached url points to a valid product page counter_targets.append( Payload(cached_url.product_url, payload.user_id)) else: fetch_targets.append(payload) if len(fetch_targets): urlfetch_payload = Payload.serialize(fetch_targets) enqueue_url_fetch(urlfetch_payload) if len(counter_targets): counter_payload = Payload.serialize(counter_targets) enqueue_counter(counter_payload)