示例#1
0
    def _create_product_dictionary(
        self,
        response: HtmlResponse,
        data: Optional[Dict] = None,
    ) -> product.Product:
        try:
            upc = (universal_product_code.UniversalProductCode(
                upc=data.get('ProductId').replace('_', ''))).value
        except:
            # TODO: Log issue and return nothing.
            return None

        title1 = response.css('meta[property="og:title"]::attr(content)'
                              ).extract()[0].split('|')[0]
        title2 = response.css('title::text').get()
        name = title1 or title2

        if not name:
            pass  # TODO: Log error and return none.
        elif name == 'Grocery Product' or name == 'Produit épicerie en ligne':
            pass  # TODO: Log error and return none.

        brand = data.get('BrandName')

        if not name:
            pass  # TODO: Log error and return none.

        item_loader = product_item_loader.ProductItemLoader(
            response=response
        ).add_name(
            response=response,
            name=name, # TODO: What about if it's none.
            language=self.language,
        ).add_brand(
            response=response,
            brand=brand, # TODO: What about if it's none.
            language=self.language,
        ).add_upc(response=response, upc=upc) \
        .add_product_data_dictionary(
            product_data_dictionary=self._create_product_data_dictionary(
                response=response,
                data=data,
                name=name,
                brand=brand,
                upc=upc,
            ),
        ).add_offer_dictionary(
            offer_dictionary=self._create_offer_dictionary(
                response=response,
                data=data,
            ),
        ).add_store_dictionary(
            store_dictionary=self._create_store_dictionary(
                response=response,
            ),
        ).add_supported_language(language=self.language)

        return item_loader.load_item()
示例#2
0
    def _create_product_dictionary(
        self,
        response: HtmlResponse,
        data: Optional[Dict] = None,
    ) -> product.Product:
        try:
            upc = (universal_product_code.UniversalProductCode(
                upc=response.css('span[itemprop="sku"]::text').get())).value
        except Exception as exception:
            logging.exception(msg='Unable to get UPC.', exc_info=exception)
            return None

        name1 = response.css(
            "div.product-info.item-addToCart > a.invisible-text::text"
        ).extract()
        name2 = response.css('title::text').extract()[0].split('|')[0]
        name = name1 or name2

        if not name:
            pass  # TODO: Log error and return none.

        brand = response.css('div[itemtype="http://schema.org/Product"] \
            > span[itemprop="brand"]::text').extract()
        item_loader = product_item_loader.ProductItemLoader(
            response=response
        ).add_name(
            response=response,
            name=name,
            language=self.language,
        ).add_brand(
            response=response,
            brand=brand,
            language=self.language,
        ).add_upc(response=response, upc=upc) \
        .add_product_data_dictionary(
            product_data_dictionary=self._create_product_data_dictionary(
                response=response,
                data=data,
                name=name,
                brand=brand,
                upc=upc,
            ),
        ).add_offer_dictionary(
            offer_dictionary=self._create_offer_dictionary(
                response=response,
                data=data,
            ),
        ).add_store_dictionary(
            store_dictionary=self._create_store_dictionary(
                response=response,
            ),
        ).add_supported_language(language=self.language)

        return item_loader.load_item()
示例#3
0
    def __load_with_dictionary(self, response, data):
        product_loader = product_item_loader \
            .ProductItemLoader(response=response)

        if not data and not data.get('product') \
            and not data.get('product').get('product'):
            return product_loader.load_item()

        # TODO: Verify product.
        productDetails = data['product']['product']

        model_number = productDetails['modelNumber']

        try:
            upc = (universal_product_code.UniversalProductCode(
                model_number
            )).value
        except:
            upc = None

        product_loader.add_value(
            product.Product.KEY_BRAND,
            productDetails['brandName']
        )
        product_loader.add_value(
            product.Product.KEY_MODEL_NUMBER,
            model_number
        )

        if upc:
            product_loader.add_value(
                product.Product.KEY_GTIN, 
                super()._create_gtin_field(
                    response=response, 
                    type=global_trade_item_number \
                        .GlobalTradeItemNumber.UPCA.value,
                    value=upc
                )
            )

        product_loader.add_value(
            product.Product.KEY_CURRENT_OFFER,
            self.__get_offer_with_dictionary(response, data)
        )
        product_loader.add_value(
            product.Product.KEY_STORE,
            self.__create_store_dictionary(response)
        )
        product_loader.add_value(
            product.Product.KEY_PRODUCT_DATA, 
            self.__create_product_data_dictionary(response, data, upc)
        )
        
        return product_loader.load_item()
    def process_item(self, item, spider):
        if item.get(product.Product.KEY_UPC):
            try:
                upc = universal_product_code.UniversalProductCode(
                    item[product.FieldProduct.KEY_UPC]
                ).value  # TODO: Check for nulls in mongodb pipeline.
            except:
                upc = None

        item[product.Product.KEY_UPC] = upc
        item[product.Product.KEY_CURRENT_OFFER][offer.Offer.KEY_SKU] = upc
        item[product.Product.KEY_MODEL_NUMBER] = upc
        item[product.Product.KEY_SKU] = upc

        return item
示例#5
0
    def __load_with_dictionary(self, response, data):
        product_loader = product_item_loader \
            .ProductItemLoader(response=response)

        lang = language.Language.EN.value

        data['additionalInfo'] = self.__parse_aditional_info(
            data['additionalInfo'])

        try:
            upc = (universal_product_code.UniversalProductCode(
                data['additionalInfo']['UPC'])).value
        except:
            upc = None

        product_loader.add_value(product.Product.KEY_BRAND, data['brand'])
        product_loader.add_value(product.Product.KEY_MODEL_NUMBER,
                                 data['additionalInfo']['Numéro fabricant'])

        if upc:
            product_loader.add_value(
                product.Product.KEY_GTIN,
                super()._create_gtin_field(
                    response=response,
                    type=global_trade_item_number \
                        .GlobalTradeItemNumber.UPCA.value,
                    value=upc
                )
            )

        product_loader.add_value(
            product.Product.KEY_CURRENT_OFFER,
            self.__get_offer_with_dictionary(response, data))
        product_loader.add_value(product.Product.KEY_STORE,
                                 self.__create_store_dictionary(response))
        product_loader.add_value(
            product.Product.KEY_PRODUCT_DATA,
            self.__create_product_data_dictionary(response=response,
                                                  data=data,
                                                  upc=upc))

        return product_loader.load_item()
示例#6
0
    def __load_with_dictionary(self, response, data):
        product_loader = product_item_loader.ProductItemLoader(
            response=response)

        data['tags'] = self.__parse_tags(data['tags'])

        try:
            upc = (universal_product_code.UniversalProductCode(
                data['tags']['upc'])).value
        except:
            upc = None

        if upc:
            product_loader.add_value(
                product.Product.KEY_GTIN,
                super()._create_gtin_field(response=response,
                                           type=global_trade_item_number.
                                           GlobalTradeItemNumber.UPCA.value,
                                           value=upc))

        product_loader.add_value(field_name=product.Product.KEY_BRAND,
                                 value=data['vendor'])
        product_loader.add_value(field_name=product.Product.KEY_CURRENT_OFFER,
                                 value=self.__create_offer_dictionary(
                                     response, data))
        product_loader.add_value(field_name=product.Product.KEY_MODEL_NUMBER,
                                 value=data.get('tags').get('vsn')
                                 or data.get('variants')[0].get('sku'))
        product_loader.add_value(field_name=product.Product.KEY_PRODUCT_DATA,
                                 value=self._create_product_data_dictionary(
                                     response, data, upc))
        product_loader.add_value(
            field_name=product.Product.KEY_STORE,
            value=self.__create_store_dictionary(response))

        return product_loader.load_item()
示例#7
0
    def __create_product_data_dictionary(self, response, data, upc):
        product_data_value_loader = \
            product_data_item_loader.ProductDataItemLoader(response=response)

        if not data and not data.get('product') \
            and not data.get('product').get('product'):
            return product_data_value_loader.load_item()

        product = data['product']
        productDetails = product['product']
        lang = data['intl']['language'] if data.get('intl') \
            and data.get('intl').get('language') else None
        date_format_1 = self.ENGLISH_DATE_FORMAT if language.Language.EN == \
            lang else self.FRENCH_DATE_FORMAT # TODO: Check best approach to comparing enums
        date_format_2 = self.FRENCH_DATE_FORMAT if language.Language.EN == \
            lang else self.ENGLISH_DATE_FORMAT

        product_data_value_loader.add_value(
            product_data.ProductData.KEY_URL,
            response.url
        )
        product_data_value_loader.add_value(
            product_data.ProductData.KEY_NAME,
            {lang: productDetails['name']}
        )
        product_data_value_loader.add_value(
            product_data.ProductData.KEY_DESCRIPTION,
            {lang: productDetails['shortDescription']}
        )

        if productDetails['preorderReleaseDate']:
            try:
                date = datetime.datetime.strptime(
                    productDetails['preorderReleaseDate'],
                    date_format_1
                )
            except:
                try:
                    date = datetime.datetime.strptime( # TODO: Test the enum evaluation.
                        productDetails['preorderReleaseDate'],
                        date_format_2
                    )
                except:
                    raise

            product_data_value_loader.add_value(
                product_data.ProductData.KEY_RELEASE_DATE,
                date.replace(
                    hour=0,
                    minute=0,
                    second=0,
                    microsecond=0
                ).isoformat()
            )

        model_number = productDetails['modelNumber']

        try:
            upc = (universal_product_code.UniversalProductCode(
                model_number
            )).value
        except:
            upc = None

        product_data_value_loader.add_value(
            product_data.ProductData.KEY_BRAND,
            productDetails['brandName']
        )
        product_data_value_loader.add_value(
            product_data.ProductData.KEY_SKU,
            productDetails['sku']
        )
        product_data_value_loader.add_value(
            product_data.ProductData.KEY_MODEL_NUMBER,
            model_number
        )
        product_data_value_loader.add_value(
            offer.Offer.KEY_SOLD_BY,
            self.sold_by
        )
        product_data_value_loader.add_value(
            offer.Offer.KEY_STORE_ID,
            self.store_id
        )
        # product_data_value_loader.add_value(
        #     field_name=product_data.ProductData.KEY_SUPPORTED_LANGUAGES,
        #     value={language.Language.EN.value: {}} # TODO: Fixed.
        # )

        if upc:
            product_data_value_loader.add_value(
                product.Product.KEY_GTIN, 
                super()._create_gtin_field(
                    response=response, 
                    type=global_trade_item_number \
                        .GlobalTradeItemNumber.UPCA.value,
                    value=upc
                )
            )

        return (product_data_value_loader.load_item()).get_dictionary()