Пример #1
0
    def test_delete_product_not_booked(self):
        product = ProductRepository.create({'price': 1.5, 'name': 'sample 1'})
        responses.add(responses.GET,
                      self.app.config['ORDERS_SERVICE_URL'] +
                      f'/?product_uuid={product["uuid"]}',
                      status=404)

        # delete product
        response = self.client.delete(
            f'/products/{product["uuid"]}',
            content_type='application/json',
            headers={'Authorization': f'Bearer {self.test_jwt_token}'},
        )
        self.assert_status(response, 204)

        with self.assertRaises(NoResultFound):
            ProductRepository.retrieve(product['uuid'])

        # delete product not found
        response = self.client.delete(
            f'/products/{product["uuid"]}',
            content_type='application/json',
            headers={'Authorization': f'Bearer {self.test_jwt_token}'},
        )
        self.assert_status(response, 404)
Пример #2
0
 def patch(self, uuid):
     """
     Updates a product.
     Use this method to change the name of an Product.
     * Specify the UUID of the Product to modify in the request URL path.
     """
     data = marshal(request.json, product_update, skip_none=True)
     if data:
         ProductRepository.update(uuid, data)
     return None, 204
Пример #3
0
    def test_list_products(self):
        ProductRepository.create({'price': 1.5, 'name': 'sample 1'})
        ProductRepository.create({'price': 2.5, 'name': 'sample 2'})

        response = self.client.get(
            '/products/',
            content_type='application/json',
            headers={'Authorization': f'Bearer {self.test_jwt_token}'},
        )
        self.assertStatus(response, 200)
        self.assertTrue(response.is_json)
        self.assertEqual(len(response.get_json()), 2)
 def __init__(self, jobEntity: ProductAddJobEntity, http: Http):
     self.productItemRepository = ProductItemRepository()
     self.productRepository = ProductRepository()
     self.siteRepository = SiteRepository()
     self.base_url = '{}/dp/{}'  # 亚马逊产品地址
     self.jobEntity = jobEntity
     self.product = self.productRepository.show(self.jobEntity.product_id)
     self.site = self.siteRepository.show(self.jobEntity.site_id)
     self.productItem = None
     if self.product and self.site:
         self.url = self.base_url.format(self.site.domain,
                                         self.product.asin)
         BaseAmazonCrawler.__init__(self, http=http, site=self.site)
Пример #5
0
 def post(self):
     """
     Creates a new product.
     """
     # data = request.json
     data = marshal(request.json, product, skip_none=True)
     return ProductRepository.create(data), 201
Пример #6
0
    def __init__(self, job_entity: ProductJobEntity, http: Http):
        self.product_item_repository = ProductItemRepository()
        self.product_repository = ProductRepository()
        self.product_service = ProductService()
        self.base_url = '{}/dp/{}'  # 亚马逊产品地址
        self.job_entity = job_entity
        self.product_item = self.product_item_repository.show(
            self.job_entity.product_item_id)

        if self.product_item and self.product_item.product and self.product_item.site:
            self.product = self.product_item.product
            self.url = self.base_url.format(self.product_item.site.domain,
                                            self.product.asin)
            BaseAmazonCrawler.__init__(self,
                                       http=http,
                                       site=self.product_item.site)
Пример #7
0
    def test_update_product(self):
        # create product
        product = ProductRepository.create({'price': 1.5, 'name': 'sample 1'})
        product_uuid = product['uuid']
        # update price
        response = self.client.patch(
            f'/products/{product_uuid}',
            content_type='application/json',
            headers={'Authorization': f'Bearer {self.test_jwt_token}'},
            data=json.dumps({'price': 2}))
        self.assert_status(response, 204)
        self.assertEquals(
            ProductRepository.retrieve(product["uuid"])['price'], '2')

        # update product_uuid
        response = self.client.patch(
            f'/products/{product_uuid}',
            content_type='application/json',
            headers={'Authorization': f'Bearer {self.test_jwt_token}'},
            data=json.dumps({'uuid': '42f616dd-ea9f-41c0-a4d2-389be68b2a99'}))
        self.assert_status(response, 204)
        self.assertEquals(
            ProductRepository.retrieve(product["uuid"])['price'], '2')

        # update name and price
        response = self.client.patch(
            f'/products/{product_uuid}',
            content_type='application/json',
            headers={'Authorization': f'Bearer {self.test_jwt_token}'},
            data=json.dumps({
                'uuid': '42f616dd-ea9f-41c0-a4d2-389be68b2a99',
                'price': 3,
                'name': 'sample 2',
            }))
        self.assert_status(response, 204)
        product = ProductRepository.list_()[0]
        self.assertEquals(product['price'], '3')
        self.assertEquals(product['uuid'], product_uuid)
        self.assertEquals(product['name'], 'sample 2')

        # update product not found
        response = self.client.patch(
            f'/products/42f616dd-ea9f-41c0-a4d2-389be68b2a99',
            content_type='application/json',
            headers={'Authorization': f'Bearer {self.test_jwt_token}'},
            data=json.dumps({'price': 2}))
        self.assert_status(response, 404)
Пример #8
0
    def test_get_product(self):
        product = ProductRepository.create({'price': 1.5, 'name': 'sample 1'})

        response = self.client.get(
            '/products/uuid_not_found',
            content_type='application/json',
            headers={'Authorization': f'Bearer {self.test_jwt_token}'},
        )
        self.assert404(response)

        response = self.client.get(
            f'/products/{product["uuid"]}',
            content_type='application/json',
            headers={'Authorization': f'Bearer {self.test_jwt_token}'},
        )
        self.assert200(response)
class ProductReviewProducer(BaseProducer):
    def __init__(self):
        self.job_count = 0
        self.productRepository = ProductRepository()
        self.productTypeRepository = ProductTypeRepository()
        BaseProducer.__init__(self)

    def start(self):
        # 这边只会产出cpa、shop类型的产品,search类型的可以不抓取
        products = self.productRepository.getProductsByType(
            [ProductTypeEnum.TYPE_ID_CPA, ProductTypeEnum.TYPE_ID_SHOP])
        with Bar('product-review-producer...',
                 max=len(products),
                 fill='#',
                 suffix='%(percent)d%%') as bar:
            for product in products:
                for product_item in product.product_items:
                    if product_item.site:
                        # 没有传对象,直接存了ID,取出任务后,需要使用id再获取到对象再操作,消费队列通过product_item_id获取,其他参数可有可无
                        entity = CurrentJobEntity.instance({
                            'product_id':
                            product.id,
                            'product_asin':
                            product.asin,
                            'site_id':
                            product_item.site.id,
                            'product_item_id':
                            product_item.id,
                            'page':
                            1
                        })
                        self.set_job(entity)
                        self.job_count += 1
                bar.next()

        Logger().info('product 开始投放任务,{}个产品, 共添加{}个任务'.format(
            len(products), self.job_count))

    def set_job_key(self) -> str:
        return RedisListKeyEnum.product_review_crawl_job

    def _schedule(self):
        # 每天凌晨执行任务
        self.schedule.every().day.at('00:00').do(self.start)
class ProductAddCrawler(BaseAmazonCrawler):
    """
    可以在asin被添加时,插入对应的队列相关任务
    """
    def __init__(self, jobEntity: ProductAddJobEntity, http: Http):
        self.productItemRepository = ProductItemRepository()
        self.productRepository = ProductRepository()
        self.siteRepository = SiteRepository()
        self.base_url = '{}/dp/{}'  # 亚马逊产品地址
        self.jobEntity = jobEntity
        self.product = self.productRepository.show(self.jobEntity.product_id)
        self.site = self.siteRepository.show(self.jobEntity.site_id)
        self.productItem = None
        if self.product and self.site:
            self.url = self.base_url.format(self.site.domain,
                                            self.product.asin)
            BaseAmazonCrawler.__init__(self, http=http, site=self.site)

    def run(self):
        try:
            if self.site_config_entity.has_en_translate:
                self.url = self.url + '?language=en_US'
            Logger().debug('新增asin{}开始抓取,地址 {}'.format(self.product.asin,
                                                       self.url))
            rs = self.get(url=self.url)
            product_element = ProductElement(
                content=rs.content, site_config=self.site_config_entity)
            title = getattr(product_element, 'title')
            if title:
                self.productItem = self.productItemRepository.create({
                    'product_id':
                    self.product.id,
                    'site_id':
                    self.site.id
                })
            else:
                raise CrawlErrorException('页面请求异常, 地址 {}'.format(self.url))
        except requests.exceptions.RequestException:
            raise CrawlErrorException('product ' + self.url + '请求异常')
        except NotFoundException:
            pass
 def __init__(self, job_entity: ProductReviewJobEntity, http: Http):
     self.crawl_next_page = True
     self.crawl_date = None
     self.review_count = 0
     self.base_url = "{}/product-reviews/{}?reviewerType=all_reviews&pageNumber={}&sortBy=recent"
     self.productItemReviewRepository = ProductItemReviewRepository()
     self.productItemRepository = ProductItemRepository()
     self.productRepository = ProductRepository()
     self.productService = ProductService()
     self.job_entity = job_entity
     self.productItem = self.productItemRepository.show(
         self.job_entity.product_item_id)
     if self.productItem and self.productItem.product and self.productItem.site:
         self.product = self.productItem.product
         if self.productItem.crawl_date:
             self.crawl_date = self.productItem.crawl_date.strftime(
                 '%Y-%m-%d')
         self.url = self.base_url.format(self.productItem.site.domain,
                                         self.product.asin,
                                         self.job_entity.page)
         BaseAmazonCrawler.__init__(self,
                                    http=http,
                                    site=self.productItem.site)
Пример #12
0
 def delete(self, uuid):
     """
     Deletes Product.
     """
     ProductRepository.delete(uuid)
     return None, 204
Пример #13
0
 def get(self, uuid):
     """
     Returns a product
     """
     return ProductRepository.retrieve(uuid)
Пример #14
0
 def get(self):
     """
     Returns list of products.
     """
     products = ProductRepository.list_()
     return products
 def __init__(self):
     self.job_count = 0
     self.productRepository = ProductRepository()
     self.productTypeRepository = ProductTypeRepository()
     BaseProducer.__init__(self)
Пример #16
0
class ProductCrawler(BaseAmazonCrawler):
    """
    抓取、保存产品数据
    """
    def __init__(self, job_entity: ProductJobEntity, http: Http):
        self.product_item_repository = ProductItemRepository()
        self.product_repository = ProductRepository()
        self.product_service = ProductService()
        self.base_url = '{}/dp/{}'  # 亚马逊产品地址
        self.job_entity = job_entity
        self.product_item = self.product_item_repository.show(
            self.job_entity.product_item_id)

        if self.product_item and self.product_item.product and self.product_item.site:
            self.product = self.product_item.product
            self.url = self.base_url.format(self.product_item.site.domain,
                                            self.product.asin)
            BaseAmazonCrawler.__init__(self,
                                       http=http,
                                       site=self.product_item.site)

    def run(self):
        try:
            if self.site_config_entity.has_en_translate:
                self.url = self.url + '?language=en_US'
            Logger().debug('开始抓取{}产品,地址 {}'.format(self.product.asin,
                                                   self.url))
            rs = self.get(url=self.url)
            product_element = ProductElement(
                content=rs.content, site_config=self.site_config_entity)
            title = getattr(product_element, 'title')
            if title:
                data = product_element.get_all_element()
                no_empty_data = dict()
                for k, v in data.items():
                    if v:
                        no_empty_data[k] = v
                self.save_data(no_empty_data)
            else:
                raise CrawlErrorException('页面请求异常, 地址 {}'.format(self.url))
        except requests.exceptions.RequestException:
            raise CrawlErrorException('product ' + self.url + '请求异常')

    def save_data(self, no_empty_data: dict):
        rating = no_empty_data.get('rating', 0.0)
        available_date = no_empty_data.get('available_date', None)
        price = no_empty_data.get('price', '')
        feature_rate = no_empty_data.get('feature_rate', {})
        classify_rank = no_empty_data.get('classify_rank', {})
        product_dict = {}
        if rating:
            product_dict['rating'] = rating
        if available_date:
            product_dict['available_date'] = available_date
        if self.product:
            self.product_repository.update_by_id(self.product.id, product_dict)
        if self.product_item:
            product_item_dict = deepcopy(product_dict)
            if price:
                product_item_dict['price'] = price
            if feature_rate:
                product_item_dict['feature_rate'] = feature_rate
            if classify_rank:
                product_item_dict['classify_rank'] = self.handle_ranks_dict(
                    classify_rank)
            self.product_item.update(product_item_dict)

        self.product_service.update_product_item_daily_data(self.product_item)
        self.product_service.update_product_item_daily_rank(
            self.product_item, ranks=classify_rank)

    @staticmethod
    def handle_ranks_dict(classify_rank: dict):
        if classify_rank:
            return [
                "{} in {}".format(rank, name)
                for name, rank in classify_rank.items()
            ]
        else:
            return []