def _create_product_dictionary( self, response: HtmlResponse, data: Optional[Dict] = None, ) -> product.Product: try: upc = (universal_product_code.UniversalProductCode( upc=data.get('ProductId').replace('_', ''))).value except: # TODO: Log issue and return nothing. return None title1 = response.css('meta[property="og:title"]::attr(content)' ).extract()[0].split('|')[0] title2 = response.css('title::text').get() name = title1 or title2 if not name: pass # TODO: Log error and return none. elif name == 'Grocery Product' or name == 'Produit épicerie en ligne': pass # TODO: Log error and return none. brand = data.get('BrandName') if not name: pass # TODO: Log error and return none. item_loader = product_item_loader.ProductItemLoader( response=response ).add_name( response=response, name=name, # TODO: What about if it's none. language=self.language, ).add_brand( response=response, brand=brand, # TODO: What about if it's none. language=self.language, ).add_upc(response=response, upc=upc) \ .add_product_data_dictionary( product_data_dictionary=self._create_product_data_dictionary( response=response, data=data, name=name, brand=brand, upc=upc, ), ).add_offer_dictionary( offer_dictionary=self._create_offer_dictionary( response=response, data=data, ), ).add_store_dictionary( store_dictionary=self._create_store_dictionary( response=response, ), ).add_supported_language(language=self.language) return item_loader.load_item()
def _create_product_dictionary( self, response: HtmlResponse, data: Optional[Dict] = None, ) -> product.Product: try: upc = (universal_product_code.UniversalProductCode( upc=response.css('span[itemprop="sku"]::text').get())).value except Exception as exception: logging.exception(msg='Unable to get UPC.', exc_info=exception) return None name1 = response.css( "div.product-info.item-addToCart > a.invisible-text::text" ).extract() name2 = response.css('title::text').extract()[0].split('|')[0] name = name1 or name2 if not name: pass # TODO: Log error and return none. brand = response.css('div[itemtype="http://schema.org/Product"] \ > span[itemprop="brand"]::text').extract() item_loader = product_item_loader.ProductItemLoader( response=response ).add_name( response=response, name=name, language=self.language, ).add_brand( response=response, brand=brand, language=self.language, ).add_upc(response=response, upc=upc) \ .add_product_data_dictionary( product_data_dictionary=self._create_product_data_dictionary( response=response, data=data, name=name, brand=brand, upc=upc, ), ).add_offer_dictionary( offer_dictionary=self._create_offer_dictionary( response=response, data=data, ), ).add_store_dictionary( store_dictionary=self._create_store_dictionary( response=response, ), ).add_supported_language(language=self.language) return item_loader.load_item()
def __load_with_dictionary(self, response, data): product_loader = product_item_loader \ .ProductItemLoader(response=response) if not data and not data.get('product') \ and not data.get('product').get('product'): return product_loader.load_item() # TODO: Verify product. productDetails = data['product']['product'] model_number = productDetails['modelNumber'] try: upc = (universal_product_code.UniversalProductCode( model_number )).value except: upc = None product_loader.add_value( product.Product.KEY_BRAND, productDetails['brandName'] ) product_loader.add_value( product.Product.KEY_MODEL_NUMBER, model_number ) if upc: product_loader.add_value( product.Product.KEY_GTIN, super()._create_gtin_field( response=response, type=global_trade_item_number \ .GlobalTradeItemNumber.UPCA.value, value=upc ) ) product_loader.add_value( product.Product.KEY_CURRENT_OFFER, self.__get_offer_with_dictionary(response, data) ) product_loader.add_value( product.Product.KEY_STORE, self.__create_store_dictionary(response) ) product_loader.add_value( product.Product.KEY_PRODUCT_DATA, self.__create_product_data_dictionary(response, data, upc) ) return product_loader.load_item()
def process_item(self, item, spider): if item.get(product.Product.KEY_UPC): try: upc = universal_product_code.UniversalProductCode( item[product.FieldProduct.KEY_UPC] ).value # TODO: Check for nulls in mongodb pipeline. except: upc = None item[product.Product.KEY_UPC] = upc item[product.Product.KEY_CURRENT_OFFER][offer.Offer.KEY_SKU] = upc item[product.Product.KEY_MODEL_NUMBER] = upc item[product.Product.KEY_SKU] = upc return item
def __load_with_dictionary(self, response, data): product_loader = product_item_loader \ .ProductItemLoader(response=response) lang = language.Language.EN.value data['additionalInfo'] = self.__parse_aditional_info( data['additionalInfo']) try: upc = (universal_product_code.UniversalProductCode( data['additionalInfo']['UPC'])).value except: upc = None product_loader.add_value(product.Product.KEY_BRAND, data['brand']) product_loader.add_value(product.Product.KEY_MODEL_NUMBER, data['additionalInfo']['Numéro fabricant']) if upc: product_loader.add_value( product.Product.KEY_GTIN, super()._create_gtin_field( response=response, type=global_trade_item_number \ .GlobalTradeItemNumber.UPCA.value, value=upc ) ) product_loader.add_value( product.Product.KEY_CURRENT_OFFER, self.__get_offer_with_dictionary(response, data)) product_loader.add_value(product.Product.KEY_STORE, self.__create_store_dictionary(response)) product_loader.add_value( product.Product.KEY_PRODUCT_DATA, self.__create_product_data_dictionary(response=response, data=data, upc=upc)) return product_loader.load_item()
def __load_with_dictionary(self, response, data): product_loader = product_item_loader.ProductItemLoader( response=response) data['tags'] = self.__parse_tags(data['tags']) try: upc = (universal_product_code.UniversalProductCode( data['tags']['upc'])).value except: upc = None if upc: product_loader.add_value( product.Product.KEY_GTIN, super()._create_gtin_field(response=response, type=global_trade_item_number. GlobalTradeItemNumber.UPCA.value, value=upc)) product_loader.add_value(field_name=product.Product.KEY_BRAND, value=data['vendor']) product_loader.add_value(field_name=product.Product.KEY_CURRENT_OFFER, value=self.__create_offer_dictionary( response, data)) product_loader.add_value(field_name=product.Product.KEY_MODEL_NUMBER, value=data.get('tags').get('vsn') or data.get('variants')[0].get('sku')) product_loader.add_value(field_name=product.Product.KEY_PRODUCT_DATA, value=self._create_product_data_dictionary( response, data, upc)) product_loader.add_value( field_name=product.Product.KEY_STORE, value=self.__create_store_dictionary(response)) return product_loader.load_item()
def __create_product_data_dictionary(self, response, data, upc): product_data_value_loader = \ product_data_item_loader.ProductDataItemLoader(response=response) if not data and not data.get('product') \ and not data.get('product').get('product'): return product_data_value_loader.load_item() product = data['product'] productDetails = product['product'] lang = data['intl']['language'] if data.get('intl') \ and data.get('intl').get('language') else None date_format_1 = self.ENGLISH_DATE_FORMAT if language.Language.EN == \ lang else self.FRENCH_DATE_FORMAT # TODO: Check best approach to comparing enums date_format_2 = self.FRENCH_DATE_FORMAT if language.Language.EN == \ lang else self.ENGLISH_DATE_FORMAT product_data_value_loader.add_value( product_data.ProductData.KEY_URL, response.url ) product_data_value_loader.add_value( product_data.ProductData.KEY_NAME, {lang: productDetails['name']} ) product_data_value_loader.add_value( product_data.ProductData.KEY_DESCRIPTION, {lang: productDetails['shortDescription']} ) if productDetails['preorderReleaseDate']: try: date = datetime.datetime.strptime( productDetails['preorderReleaseDate'], date_format_1 ) except: try: date = datetime.datetime.strptime( # TODO: Test the enum evaluation. productDetails['preorderReleaseDate'], date_format_2 ) except: raise product_data_value_loader.add_value( product_data.ProductData.KEY_RELEASE_DATE, date.replace( hour=0, minute=0, second=0, microsecond=0 ).isoformat() ) model_number = productDetails['modelNumber'] try: upc = (universal_product_code.UniversalProductCode( model_number )).value except: upc = None product_data_value_loader.add_value( product_data.ProductData.KEY_BRAND, productDetails['brandName'] ) product_data_value_loader.add_value( product_data.ProductData.KEY_SKU, productDetails['sku'] ) product_data_value_loader.add_value( product_data.ProductData.KEY_MODEL_NUMBER, model_number ) product_data_value_loader.add_value( offer.Offer.KEY_SOLD_BY, self.sold_by ) product_data_value_loader.add_value( offer.Offer.KEY_STORE_ID, self.store_id ) # product_data_value_loader.add_value( # field_name=product_data.ProductData.KEY_SUPPORTED_LANGUAGES, # value={language.Language.EN.value: {}} # TODO: Fixed. # ) if upc: product_data_value_loader.add_value( product.Product.KEY_GTIN, super()._create_gtin_field( response=response, type=global_trade_item_number \ .GlobalTradeItemNumber.UPCA.value, value=upc ) ) return (product_data_value_loader.load_item()).get_dictionary()