def post(self): from collections import defaultdict payload_string = self.request.get('payload') counter_targets = Payload.deserialize(payload_string) today = time_util.today() product_targets = defaultdict(int) user_targets = defaultdict(int) for payload in counter_targets: daily_product_key = ProductCounter.build_key_name( payload.url, DAILY, today) weekly_product_key = ProductCounter.build_key_name( payload.url, WEEKLY, today) monthly_product_key = ProductCounter.build_key_name( payload.url, MONTHLY, today) user_key = UserCounter.build_key_name(payload.user_id, DAILY, today) product_targets[daily_product_key] += 1 product_targets[weekly_product_key] += 1 product_targets[monthly_product_key] += 1 user_targets[user_key] += 1 product_counters = ProductCounter.get_by_key_name( product_targets.keys(), _storage=[MEMCACHE, DATASTORE], _result_type=NAME_DICT) user_counters = UserCounter.get_by_key_name(user_targets.keys(), _result_type=NAME_DICT) for key_name, delta in product_targets.iteritems(): try: product_counters[key_name].count += delta except AttributeError: #Value is None in dict frequency = ProductCounter.frequency_from_key_name(key_name) product_counters[key_name] = ProductCounter.new( key_name, frequency, today, count=delta, _build_key_name=False) for key_name, delta in user_targets.iteritems(): try: user_counters[key_name].count += delta except AttributeError: #Value is None in dict user_counters[key_name] = UserCounter.new( key_name, DAILY, today, count=delta, _build_key_name=False) ProductCounter.filtered_update(product_counters.values()) UserCounter.filtered_update(user_counters.values())
def post(self): logging.info('UrlFetchWorker started') payloads = Payload.deserialize(self.request.get('payload')) product_ban_list = Banlist.retrieve( _storage=[LOCAL, MEMCACHE, DATASTORE], _local_expiration=time_util.minute_expiration(minutes=10)).products fetch_targets = list(set([payload.url for payload in payloads])) result_dict = UrlFetcher.fetch_urls(fetch_targets) urls = [] counter_targets = [] for payload in payloads: request_url = payload.url final_url = result_dict[request_url] user_id = payload.user_id urls.append( Url(key_name=request_url, final_url=final_url, user_id=user_id)) for url in urls: if url.final_url is not None: try: product_url = AmazonURLParser.product_url(url.final_url) if product_url in product_ban_list: logging.info( 'Mention creation prevented for banned product url: %s' % product_url) continue #no action for banned product url.is_product = True #No exceptions for product_url => valid product reference counter_targets.append(Payload(product_url, url.user_id)) except ParserException: pass logging.info('UrlFetchWorker finished, counter targets: %s' % counter_targets) pdb.put(urls, _storage=[LOCAL, MEMCACHE]) #Urls are stored in cache only if len(counter_targets): enqueue_counter(Payload.serialize(counter_targets))
def post(self): from collections import defaultdict payload_string = self.request.get('payload') counter_targets = Payload.deserialize(payload_string) today = time_util.today() product_targets = defaultdict(int) user_targets = defaultdict(int) for payload in counter_targets: daily_product_key = ProductCounter.build_key_name(payload.url, DAILY, today) weekly_product_key = ProductCounter.build_key_name(payload.url, WEEKLY, today) monthly_product_key = ProductCounter.build_key_name(payload.url, MONTHLY, today) user_key = UserCounter.build_key_name(payload.user_id, DAILY, today) product_targets[daily_product_key] += 1 product_targets[weekly_product_key] += 1 product_targets[monthly_product_key] += 1 user_targets[user_key] += 1 product_counters = ProductCounter.get_by_key_name(product_targets.keys(), _storage = [MEMCACHE,DATASTORE], _result_type=NAME_DICT) user_counters = UserCounter.get_by_key_name(user_targets.keys(), _result_type=NAME_DICT) for key_name,delta in product_targets.iteritems(): try: product_counters[key_name].count += delta except AttributeError: #Value is None in dict frequency = ProductCounter.frequency_from_key_name(key_name) product_counters[key_name] = ProductCounter.new(key_name, frequency, today, count=delta,_build_key_name = False) for key_name,delta in user_targets.iteritems(): try: user_counters[key_name].count += delta except AttributeError: #Value is None in dict user_counters[key_name] = UserCounter.new(key_name, DAILY, today, count=delta,_build_key_name = False) ProductCounter.filtered_update(product_counters.values()) UserCounter.filtered_update(user_counters.values())
def post(self): logging.info('UrlFetchWorker started') payloads = Payload.deserialize(self.request.get('payload')) product_ban_list = Banlist.retrieve(_storage=[LOCAL,MEMCACHE,DATASTORE], _local_expiration=time_util.minute_expiration(minutes=10)).products fetch_targets = list(set([payload.url for payload in payloads])) result_dict = UrlFetcher.fetch_urls(fetch_targets) urls = [] counter_targets = [] for payload in payloads: request_url = payload.url final_url = result_dict[request_url] user_id = payload.user_id urls.append(Url(key_name=request_url, final_url=final_url, user_id = user_id)) for url in urls: if url.final_url is not None: try: product_url = AmazonURLParser.product_url(url.final_url) if product_url in product_ban_list: logging.info('Mention creation prevented for banned product url: %s' %product_url) continue #no action for banned product url.is_product = True #No exceptions for product_url => valid product reference counter_targets.append(Payload(product_url,url.user_id)) except ParserException: pass logging.info('UrlFetchWorker finished, counter targets: %s' %counter_targets) pdb.put(urls, _storage = [LOCAL,MEMCACHE]) #Urls are stored in cache only if len(counter_targets): enqueue_counter(Payload.serialize(counter_targets))