def test_delete_product_not_booked(self): product = ProductRepository.create({'price': 1.5, 'name': 'sample 1'}) responses.add(responses.GET, self.app.config['ORDERS_SERVICE_URL'] + f'/?product_uuid={product["uuid"]}', status=404) # delete product response = self.client.delete( f'/products/{product["uuid"]}', content_type='application/json', headers={'Authorization': f'Bearer {self.test_jwt_token}'}, ) self.assert_status(response, 204) with self.assertRaises(NoResultFound): ProductRepository.retrieve(product['uuid']) # delete product not found response = self.client.delete( f'/products/{product["uuid"]}', content_type='application/json', headers={'Authorization': f'Bearer {self.test_jwt_token}'}, ) self.assert_status(response, 404)
def patch(self, uuid): """ Updates a product. Use this method to change the name of an Product. * Specify the UUID of the Product to modify in the request URL path. """ data = marshal(request.json, product_update, skip_none=True) if data: ProductRepository.update(uuid, data) return None, 204
def test_list_products(self): ProductRepository.create({'price': 1.5, 'name': 'sample 1'}) ProductRepository.create({'price': 2.5, 'name': 'sample 2'}) response = self.client.get( '/products/', content_type='application/json', headers={'Authorization': f'Bearer {self.test_jwt_token}'}, ) self.assertStatus(response, 200) self.assertTrue(response.is_json) self.assertEqual(len(response.get_json()), 2)
def __init__(self, jobEntity: ProductAddJobEntity, http: Http): self.productItemRepository = ProductItemRepository() self.productRepository = ProductRepository() self.siteRepository = SiteRepository() self.base_url = '{}/dp/{}' # 亚马逊产品地址 self.jobEntity = jobEntity self.product = self.productRepository.show(self.jobEntity.product_id) self.site = self.siteRepository.show(self.jobEntity.site_id) self.productItem = None if self.product and self.site: self.url = self.base_url.format(self.site.domain, self.product.asin) BaseAmazonCrawler.__init__(self, http=http, site=self.site)
def post(self): """ Creates a new product. """ # data = request.json data = marshal(request.json, product, skip_none=True) return ProductRepository.create(data), 201
def __init__(self, job_entity: ProductJobEntity, http: Http): self.product_item_repository = ProductItemRepository() self.product_repository = ProductRepository() self.product_service = ProductService() self.base_url = '{}/dp/{}' # 亚马逊产品地址 self.job_entity = job_entity self.product_item = self.product_item_repository.show( self.job_entity.product_item_id) if self.product_item and self.product_item.product and self.product_item.site: self.product = self.product_item.product self.url = self.base_url.format(self.product_item.site.domain, self.product.asin) BaseAmazonCrawler.__init__(self, http=http, site=self.product_item.site)
def test_update_product(self): # create product product = ProductRepository.create({'price': 1.5, 'name': 'sample 1'}) product_uuid = product['uuid'] # update price response = self.client.patch( f'/products/{product_uuid}', content_type='application/json', headers={'Authorization': f'Bearer {self.test_jwt_token}'}, data=json.dumps({'price': 2})) self.assert_status(response, 204) self.assertEquals( ProductRepository.retrieve(product["uuid"])['price'], '2') # update product_uuid response = self.client.patch( f'/products/{product_uuid}', content_type='application/json', headers={'Authorization': f'Bearer {self.test_jwt_token}'}, data=json.dumps({'uuid': '42f616dd-ea9f-41c0-a4d2-389be68b2a99'})) self.assert_status(response, 204) self.assertEquals( ProductRepository.retrieve(product["uuid"])['price'], '2') # update name and price response = self.client.patch( f'/products/{product_uuid}', content_type='application/json', headers={'Authorization': f'Bearer {self.test_jwt_token}'}, data=json.dumps({ 'uuid': '42f616dd-ea9f-41c0-a4d2-389be68b2a99', 'price': 3, 'name': 'sample 2', })) self.assert_status(response, 204) product = ProductRepository.list_()[0] self.assertEquals(product['price'], '3') self.assertEquals(product['uuid'], product_uuid) self.assertEquals(product['name'], 'sample 2') # update product not found response = self.client.patch( f'/products/42f616dd-ea9f-41c0-a4d2-389be68b2a99', content_type='application/json', headers={'Authorization': f'Bearer {self.test_jwt_token}'}, data=json.dumps({'price': 2})) self.assert_status(response, 404)
def test_get_product(self): product = ProductRepository.create({'price': 1.5, 'name': 'sample 1'}) response = self.client.get( '/products/uuid_not_found', content_type='application/json', headers={'Authorization': f'Bearer {self.test_jwt_token}'}, ) self.assert404(response) response = self.client.get( f'/products/{product["uuid"]}', content_type='application/json', headers={'Authorization': f'Bearer {self.test_jwt_token}'}, ) self.assert200(response)
class ProductReviewProducer(BaseProducer): def __init__(self): self.job_count = 0 self.productRepository = ProductRepository() self.productTypeRepository = ProductTypeRepository() BaseProducer.__init__(self) def start(self): # 这边只会产出cpa、shop类型的产品,search类型的可以不抓取 products = self.productRepository.getProductsByType( [ProductTypeEnum.TYPE_ID_CPA, ProductTypeEnum.TYPE_ID_SHOP]) with Bar('product-review-producer...', max=len(products), fill='#', suffix='%(percent)d%%') as bar: for product in products: for product_item in product.product_items: if product_item.site: # 没有传对象,直接存了ID,取出任务后,需要使用id再获取到对象再操作,消费队列通过product_item_id获取,其他参数可有可无 entity = CurrentJobEntity.instance({ 'product_id': product.id, 'product_asin': product.asin, 'site_id': product_item.site.id, 'product_item_id': product_item.id, 'page': 1 }) self.set_job(entity) self.job_count += 1 bar.next() Logger().info('product 开始投放任务,{}个产品, 共添加{}个任务'.format( len(products), self.job_count)) def set_job_key(self) -> str: return RedisListKeyEnum.product_review_crawl_job def _schedule(self): # 每天凌晨执行任务 self.schedule.every().day.at('00:00').do(self.start)
class ProductAddCrawler(BaseAmazonCrawler): """ 可以在asin被添加时,插入对应的队列相关任务 """ def __init__(self, jobEntity: ProductAddJobEntity, http: Http): self.productItemRepository = ProductItemRepository() self.productRepository = ProductRepository() self.siteRepository = SiteRepository() self.base_url = '{}/dp/{}' # 亚马逊产品地址 self.jobEntity = jobEntity self.product = self.productRepository.show(self.jobEntity.product_id) self.site = self.siteRepository.show(self.jobEntity.site_id) self.productItem = None if self.product and self.site: self.url = self.base_url.format(self.site.domain, self.product.asin) BaseAmazonCrawler.__init__(self, http=http, site=self.site) def run(self): try: if self.site_config_entity.has_en_translate: self.url = self.url + '?language=en_US' Logger().debug('新增asin{}开始抓取,地址 {}'.format(self.product.asin, self.url)) rs = self.get(url=self.url) product_element = ProductElement( content=rs.content, site_config=self.site_config_entity) title = getattr(product_element, 'title') if title: self.productItem = self.productItemRepository.create({ 'product_id': self.product.id, 'site_id': self.site.id }) else: raise CrawlErrorException('页面请求异常, 地址 {}'.format(self.url)) except requests.exceptions.RequestException: raise CrawlErrorException('product ' + self.url + '请求异常') except NotFoundException: pass
def __init__(self, job_entity: ProductReviewJobEntity, http: Http): self.crawl_next_page = True self.crawl_date = None self.review_count = 0 self.base_url = "{}/product-reviews/{}?reviewerType=all_reviews&pageNumber={}&sortBy=recent" self.productItemReviewRepository = ProductItemReviewRepository() self.productItemRepository = ProductItemRepository() self.productRepository = ProductRepository() self.productService = ProductService() self.job_entity = job_entity self.productItem = self.productItemRepository.show( self.job_entity.product_item_id) if self.productItem and self.productItem.product and self.productItem.site: self.product = self.productItem.product if self.productItem.crawl_date: self.crawl_date = self.productItem.crawl_date.strftime( '%Y-%m-%d') self.url = self.base_url.format(self.productItem.site.domain, self.product.asin, self.job_entity.page) BaseAmazonCrawler.__init__(self, http=http, site=self.productItem.site)
def delete(self, uuid): """ Deletes Product. """ ProductRepository.delete(uuid) return None, 204
def get(self, uuid): """ Returns a product """ return ProductRepository.retrieve(uuid)
def get(self): """ Returns list of products. """ products = ProductRepository.list_() return products
def __init__(self): self.job_count = 0 self.productRepository = ProductRepository() self.productTypeRepository = ProductTypeRepository() BaseProducer.__init__(self)
class ProductCrawler(BaseAmazonCrawler): """ 抓取、保存产品数据 """ def __init__(self, job_entity: ProductJobEntity, http: Http): self.product_item_repository = ProductItemRepository() self.product_repository = ProductRepository() self.product_service = ProductService() self.base_url = '{}/dp/{}' # 亚马逊产品地址 self.job_entity = job_entity self.product_item = self.product_item_repository.show( self.job_entity.product_item_id) if self.product_item and self.product_item.product and self.product_item.site: self.product = self.product_item.product self.url = self.base_url.format(self.product_item.site.domain, self.product.asin) BaseAmazonCrawler.__init__(self, http=http, site=self.product_item.site) def run(self): try: if self.site_config_entity.has_en_translate: self.url = self.url + '?language=en_US' Logger().debug('开始抓取{}产品,地址 {}'.format(self.product.asin, self.url)) rs = self.get(url=self.url) product_element = ProductElement( content=rs.content, site_config=self.site_config_entity) title = getattr(product_element, 'title') if title: data = product_element.get_all_element() no_empty_data = dict() for k, v in data.items(): if v: no_empty_data[k] = v self.save_data(no_empty_data) else: raise CrawlErrorException('页面请求异常, 地址 {}'.format(self.url)) except requests.exceptions.RequestException: raise CrawlErrorException('product ' + self.url + '请求异常') def save_data(self, no_empty_data: dict): rating = no_empty_data.get('rating', 0.0) available_date = no_empty_data.get('available_date', None) price = no_empty_data.get('price', '') feature_rate = no_empty_data.get('feature_rate', {}) classify_rank = no_empty_data.get('classify_rank', {}) product_dict = {} if rating: product_dict['rating'] = rating if available_date: product_dict['available_date'] = available_date if self.product: self.product_repository.update_by_id(self.product.id, product_dict) if self.product_item: product_item_dict = deepcopy(product_dict) if price: product_item_dict['price'] = price if feature_rate: product_item_dict['feature_rate'] = feature_rate if classify_rank: product_item_dict['classify_rank'] = self.handle_ranks_dict( classify_rank) self.product_item.update(product_item_dict) self.product_service.update_product_item_daily_data(self.product_item) self.product_service.update_product_item_daily_rank( self.product_item, ranks=classify_rank) @staticmethod def handle_ranks_dict(classify_rank: dict): if classify_rank: return [ "{} in {}".format(rank, name) for name, rank in classify_rank.items() ] else: return []