Пример #1
0
 def post(self):
   payloads = [Payload(simple_url['url'],simple_url['user_id']) for simple_url in eval(self.request.get('data'))]
   
   cached_urls = Url.get_by_key_name([payload.url for payload in payloads],
                                     _storage = [LOCAL,MEMCACHE],
                                     _result_type = NAME_DICT)
   
   user_ban_list = Banlist.retrieve(_storage=[LOCAL,MEMCACHE,DATASTORE],
                                       _local_expiration=time_util.minute_expiration(minutes=10)).users
                                       
   fetch_targets = [] #Urls that are not in lookup list
   counter_targets = [] #Product urls that were fetched before
   
   for payload in payloads:
     if payload.user_id in user_ban_list:
       #Don't take banned users' URLs into account
       continue
           
     #Look for existing cached instance with same short_url    
     cached_url = cached_urls[payload.url]
     if cached_url is not None:
       if cached_url.is_product: #cached url points to a valid product page
         counter_targets.append(Payload(cached_url.product_url,
                                                     payload.user_id))
     else:
       fetch_targets.append(payload)                                 
   
   if len(fetch_targets):
     urlfetch_payload = Payload.serialize(fetch_targets)
     enqueue_url_fetch(urlfetch_payload)
   if len(counter_targets):
     counter_payload = Payload.serialize(counter_targets)
     enqueue_counter(counter_payload)
Пример #2
0
    def post(self):
        logging.info('UrlFetchWorker started')
        payloads = Payload.deserialize(self.request.get('payload'))
        product_ban_list = Banlist.retrieve(
            _storage=[LOCAL, MEMCACHE, DATASTORE],
            _local_expiration=time_util.minute_expiration(minutes=10)).products

        fetch_targets = list(set([payload.url for payload in payloads]))
        result_dict = UrlFetcher.fetch_urls(fetch_targets)
        urls = []
        counter_targets = []

        for payload in payloads:
            request_url = payload.url
            final_url = result_dict[request_url]
            user_id = payload.user_id

            urls.append(
                Url(key_name=request_url, final_url=final_url,
                    user_id=user_id))

        for url in urls:
            if url.final_url is not None:
                try:
                    product_url = AmazonURLParser.product_url(url.final_url)

                    if product_url in product_ban_list:
                        logging.info(
                            'Mention creation prevented for banned product url: %s'
                            % product_url)
                        continue  #no action for banned product

                    url.is_product = True  #No exceptions for product_url => valid product reference
                    counter_targets.append(Payload(product_url, url.user_id))
                except ParserException:
                    pass

        logging.info('UrlFetchWorker finished, counter targets: %s' %
                     counter_targets)
        pdb.put(urls, _storage=[LOCAL,
                                MEMCACHE])  #Urls are stored in cache only

        if len(counter_targets):
            enqueue_counter(Payload.serialize(counter_targets))
Пример #3
0
 def post(self):
   logging.info('UrlFetchWorker started')
   payloads = Payload.deserialize(self.request.get('payload'))
   product_ban_list = Banlist.retrieve(_storage=[LOCAL,MEMCACHE,DATASTORE],
                                   _local_expiration=time_util.minute_expiration(minutes=10)).products 
   
   fetch_targets = list(set([payload.url for payload in payloads]))
   result_dict = UrlFetcher.fetch_urls(fetch_targets)
   urls = []
   counter_targets = []
   
   for payload in payloads:
     request_url = payload.url
     final_url = result_dict[request_url]
     user_id = payload.user_id
     
     urls.append(Url(key_name=request_url,
                         final_url=final_url,
                         user_id = user_id))
       
   for url in urls:
     if url.final_url is not None:
       try:
         product_url = AmazonURLParser.product_url(url.final_url)
         
         if product_url in product_ban_list:
             logging.info('Mention creation prevented for banned product url: %s' %product_url)
             continue #no action for banned product
         
         url.is_product = True #No exceptions for product_url => valid product reference
         counter_targets.append(Payload(product_url,url.user_id))
       except ParserException:
         pass 
   
   logging.info('UrlFetchWorker finished, counter targets: %s' %counter_targets)   
   pdb.put(urls, _storage = [LOCAL,MEMCACHE]) #Urls are stored in cache only
   
   if len(counter_targets):
     enqueue_counter(Payload.serialize(counter_targets))
Пример #4
0
    def post(self):
        payloads = [
            Payload(simple_url['url'], simple_url['user_id'])
            for simple_url in eval(self.request.get('data'))
        ]

        cached_urls = Url.get_by_key_name(
            [payload.url for payload in payloads],
            _storage=[LOCAL, MEMCACHE],
            _result_type=NAME_DICT)

        user_ban_list = Banlist.retrieve(
            _storage=[LOCAL, MEMCACHE, DATASTORE],
            _local_expiration=time_util.minute_expiration(minutes=10)).users

        fetch_targets = []  #Urls that are not in lookup list
        counter_targets = []  #Product urls that were fetched before

        for payload in payloads:
            if payload.user_id in user_ban_list:
                #Don't take banned users' URLs into account
                continue

            #Look for existing cached instance with same short_url
            cached_url = cached_urls[payload.url]
            if cached_url is not None:
                if cached_url.is_product:  #cached url points to a valid product page
                    counter_targets.append(
                        Payload(cached_url.product_url, payload.user_id))
            else:
                fetch_targets.append(payload)

        if len(fetch_targets):
            urlfetch_payload = Payload.serialize(fetch_targets)
            enqueue_url_fetch(urlfetch_payload)
        if len(counter_targets):
            counter_payload = Payload.serialize(counter_targets)
            enqueue_counter(counter_payload)