def post(self): logging.info('UrlFetchWorker started') payloads = Payload.deserialize(self.request.get('payload')) product_ban_list = Banlist.retrieve( _storage=[LOCAL, MEMCACHE, DATASTORE], _local_expiration=time_util.minute_expiration(minutes=10)).products fetch_targets = list(set([payload.url for payload in payloads])) result_dict = UrlFetcher.fetch_urls(fetch_targets) urls = [] counter_targets = [] for payload in payloads: request_url = payload.url final_url = result_dict[request_url] user_id = payload.user_id urls.append( Url(key_name=request_url, final_url=final_url, user_id=user_id)) for url in urls: if url.final_url is not None: try: product_url = AmazonURLParser.product_url(url.final_url) if product_url in product_ban_list: logging.info( 'Mention creation prevented for banned product url: %s' % product_url) continue #no action for banned product url.is_product = True #No exceptions for product_url => valid product reference counter_targets.append(Payload(product_url, url.user_id)) except ParserException: pass logging.info('UrlFetchWorker finished, counter targets: %s' % counter_targets) pdb.put(urls, _storage=[LOCAL, MEMCACHE]) #Urls are stored in cache only if len(counter_targets): enqueue_counter(Payload.serialize(counter_targets))
def post(self): logging.info('UrlFetchWorker started') payloads = Payload.deserialize(self.request.get('payload')) product_ban_list = Banlist.retrieve(_storage=[LOCAL,MEMCACHE,DATASTORE], _local_expiration=time_util.minute_expiration(minutes=10)).products fetch_targets = list(set([payload.url for payload in payloads])) result_dict = UrlFetcher.fetch_urls(fetch_targets) urls = [] counter_targets = [] for payload in payloads: request_url = payload.url final_url = result_dict[request_url] user_id = payload.user_id urls.append(Url(key_name=request_url, final_url=final_url, user_id = user_id)) for url in urls: if url.final_url is not None: try: product_url = AmazonURLParser.product_url(url.final_url) if product_url in product_ban_list: logging.info('Mention creation prevented for banned product url: %s' %product_url) continue #no action for banned product url.is_product = True #No exceptions for product_url => valid product reference counter_targets.append(Payload(product_url,url.user_id)) except ParserException: pass logging.info('UrlFetchWorker finished, counter targets: %s' %counter_targets) pdb.put(urls, _storage = [LOCAL,MEMCACHE]) #Urls are stored in cache only if len(counter_targets): enqueue_counter(Payload.serialize(counter_targets))
def product_url(self): return AmazonURLParser.product_url(self.final_url)
def new(cls,*args,**kwds): entity = super(ProductRenderer, cls).new(*args,**kwds) url = AmazonURLParser.product_url(args[0]) entity.url = url return entity
def new(cls, *args, **kwds): entity = super(ProductRenderer, cls).new(*args, **kwds) url = AmazonURLParser.product_url(args[0]) entity.url = url return entity