def parse_item(self,response):
        sel = Selector(response)
        il = ItemLoader(item=Product(), response=response)

        cat = il.get_xpath('//div[contains(@id, "ctl00_pnlBreadCrumbs")]/a[last()]/text()')
        availability = il.get_xpath('//a[contains(@id,"hplddToCart") or contains(@class,"addToCart")]/text()')
        price = il.get_css('span#ctl00_ContentPlaceHolder1_ctrlProdDetailUC_lblRegprice > font::text')
        sale = il.get_css('span#ctl00_ContentPlaceHolder1_ctrlProdDetailUC_lblSaleprice > font::text')
       
        """If the xpath doesn't retunr a category, the product belongs to the Bundle category"""
        if not cat:
            il.add_value("category", "Bundle")
        else:
            il.add_value("category", cat)
       
        il.add_css("title", "span#ctl00_ContentPlaceHolder1_ctrlProdDetailUC_lblProdTitle::text")
        il.add_value("url",response.url)
       
        """If a product can be added to the cart, the product is available online, if not, the product is not available online"""
        if "ADD TO CART" in availability:
            il.add_value("availability", "Product is available online")
        else:
            il.add_value("availability", "Product is not available online")

        """If there's a sale price present but not a regular price present, it switches the sale price for the regular price as shown in the website"""
        if not price:
            il.add_value("regPrice",sale)
            il.add_value("salePrice", None)
        else:
            il.add_value("regPrice", price)
            il.add_value("salePrice",sale)
        return il.load_item()
示例#2
0
 def parse_product(self, response: HtmlResponse):
     loader = ItemLoader(item=LeroyItem(), response=response)
     loader.add_css('name', 'h1.header-2::text')
     loader.add_value('url', response.url)
     loader.add_css(
         'price',
         'uc-pdp-price-view.primary-price meta[itemprop="price"]::attr(content)'
     )
     loader.add_css('photos',
                    'picture[slot="pictures"] img::attr(data-origin)')
     options_keys = loader.get_css('dt.def-list__term::text',
                                   MapCompose(str.strip))
     options_val = loader.get_css('dd.def-list__definition::text',
                                  MapCompose(str.strip))
     loader.add_value('options', dict(zip(options_keys, options_val)))
     yield loader.load_item()
    def parse_detail(self, response, char):
        loader = ItemLoader(item=char, response=response)

        loader.add_value("url", response.url)
        loader.add_css("image", selectors["CHARACTER_IMAGE"])
        loader.add_css("name", selectors["CHARACTER_NAME"])
        loader.add_css("feature_films", selectors["CHARACTER_FEATURE_FILMS"])
        loader.add_css("short_films", selectors["CHARACTER_SHORT_FILMS"])
        loader.add_css("shows", selectors["CHARACTER_SHOWS"])
        loader.add_css("games", selectors["CHARACTER_GAMES"])
        loader.add_css("rides", selectors["CHARACTER_RIDES"])
        loader.add_css("animator", selectors["CHARACTER_ANIMATOR"])
        loader.add_css("designer", selectors["CHARACTER_DESIGNER"])
        loader.add_css("voice", selectors["CHARACTER_VOICE"])
        loader.add_css("portrayed_by", selectors["CHARACTER_PORTRAYED_BY"])
        loader.add_css("performance_model",
                       selectors["CHARACTER_PERFORMANCE_MODEL"])
        loader.add_css("inspiration", selectors["CHARACTER_INSPIRATION"])
        loader.add_css("awards", selectors["CHARACTER_AWARDS"])
        loader.add_css("fullname", selectors["CHARACTER_FULLNAME"])
        loader.add_css("other_names", selectors["CHARACTER_OTHER_NAMES"])
        loader.add_css("occupation", selectors["CHARACTER_OCCUPATION"])
        loader.add_css("affiliations", selectors["CHARACTER_AFFILIATIONS"])
        loader.add_css("home", selectors["CHARACTER_HOME"])
        loader.add_css("likes", selectors["CHARACTER_LIKES"])
        loader.add_css("dislikes", selectors["CHARACTER_DISLIKES"])
        loader.add_css("powers", selectors["CHARACTER_POWERS"])
        loader.add_css("paraphernalia", selectors["CHARACTER_PARAPHERNALIA"])
        loader.add_css("status", selectors["CHARACTER_STATUS"])
        loader.add_css("parents", selectors["CHARACTER_PARENTS"])
        loader.add_css("siblings", selectors["CHARACTER_SIBLINGS"])
        loader.add_css("family", selectors["CHARACTER_FAMILY"])
        loader.add_css("partner", selectors["CHARACTER_PARTNER"])
        loader.add_css("children", selectors["CHARACTER_CHILDREN"])
        loader.add_css("pets", selectors["CHARACTER_PETS"])

        if len(loader.get_css(selectors["CHARACTER_NAME"])) < 1:
            loader.add_css("name", selectors["PAGE_HEADER_TITLE"])

        if len(loader.get_css(selectors["CHARACTER_IMAGE"])) < 1:
            loader.add_css("image", selectors["CHARACTER_THUMB_IMAGE"])

        logging.info("Crawl %s" % loader.get_collected_values("name"))

        char = loader.load_item()
        yield char
    def parse_item(self, response):
        sel = Selector(response)
        il = ItemLoader(item=Product(), response=response)

        cat = il.get_xpath(
            '//div[contains(@id, "ctl00_pnlBreadCrumbs")]/a[last()]/text()')
        availability = il.get_xpath(
            '//a[contains(@id,"hplddToCart") or contains(@class,"addToCart")]/text()'
        )
        price = il.get_css(
            'span#ctl00_ContentPlaceHolder1_ctrlProdDetailUC_lblRegprice > font::text'
        )
        sale = il.get_css(
            'span#ctl00_ContentPlaceHolder1_ctrlProdDetailUC_lblSaleprice > font::text'
        )
        """If the xpath doesn't retunr a category, the product belongs to the Bundle category"""
        if not cat:
            il.add_value("category", "Bundle")
        else:
            il.add_value("category", cat)

        il.add_css(
            "title",
            "span#ctl00_ContentPlaceHolder1_ctrlProdDetailUC_lblProdTitle::text"
        )
        il.add_value("url", response.url)
        """If a product can be added to the cart, the product is available online, if not, the product is not available online"""
        if "ADD TO CART" in availability:
            il.add_value("availability", "Product is available online")
        else:
            il.add_value("availability", "Product is not available online")
        """If there's a sale price present but not a regular price present, it switches the sale price for the regular price as shown in the website"""
        if not price:
            il.add_value("regPrice", sale)
            il.add_value("salePrice", None)
        else:
            il.add_value("regPrice", price)
            il.add_value("salePrice", sale)
        return il.load_item()
示例#5
0
    def parse_image_url(self, response):
        image_loader = ItemLoader(response=response)
        link = image_loader.get_css(
            'div.main > section.section > div.container > div > div > div > img'
        )[0]

        link_selector = Selector(text=link, type="xml")
        sign = response.meta['current_item']
        link_loader = ItemLoader(item=sign, selector=link_selector)

        link_loader.add_xpath('image_url', '@src')

        sign = link_loader.load_item()
        return sign
示例#6
0
    def parse(self, response):
        category = response.meta['current_category']

        link_loader = ItemLoader(response=response)
        links = link_loader.get_css(
            'div.main > section.section > div.container > div > div > div > a')

        for link in links:
            link_selector = Selector(text=link, type="xml")
            link_loader = ItemLoader(item=Sign(), selector=link_selector)

            link_loader.add_value('category', category)
            link_loader.add_xpath('detail_url', '@href')
            link_loader.add_xpath('meaning', '@title')
            link_loader.add_xpath('miniature_url', 'img/@src')

            sign = link_loader.load_item()
            yield scrapy.Request(url=sign['detail_url'],
                                 callback=self.parse_image_url,
                                 meta={'current_item': sign})
示例#7
0
loader.get_xpath('//p[@id="price"]', TakeFirst(), re='the price is (.*)')
__________________________________________________________
'add_xpath(field_name, xpath, *processors, **kwargs)'
 - Giống add_value, nó nhận 1 biểu thức Xpath thay cho 1 giá trị, Biểu thức Xpath này dùng để trích xuất ra 1 list các chuỗi unicode
 - Vd:
# HTML snippet: <p class="product-name">Color TV</p>
loader.add_xpath('name', '//p[@class="product-name"]')
# HTML snippet: <p id="price">the price is $1200</p>
loader.add_xpath('price', '//p[@id="price"]', re='the price is (.*)')
__________________________________________________________
'replace_xpath(field_name, xpath, *processors, **kwargs)'
 - Giống add_xpath(), nó thay thế dữ liệu cũ thay vì thêm dữ liệu mới vào nó.
__________________________________________________________
'get_css(css, *processors, **kwargs)'
 - Tương tự get_xpath, nó thay thế biểu thức xpath bằng Css selector, và cũng trích xuất ra 1 list các chuỗi unicode
 - Vd : 
# HTML snippet: <p class="product-name">Color TV</p>
loader.get_css('p.product-name')
# HTML snippet: <p id="price">the price is $1200</p>
loader.get_css('p#price', TakeFirst(), re='the price is (.*)')
__________________________________________________________
'add_css(field_name, css, *processors, **kwargs)'
- Giống add_value, nó nhận 1 biểu thức Xpath thay cho 1 giá trị, Biểu thức Xpath này dùng để trích xuất ra 1 list các chuỗi unicode
 - Vd:
# HTML snippet: <p class="product-name">Color TV</p>
loader.add_css('name', 'p.product-name')
# HTML snippet: <p id="price">the price is $1200</p>
loader.add_css('price', 'p#price', re='the price is (.*)')
__________________________________________________________
'replace_css(field_name, css, *processors, **kwargs)'
- Giống add_css(), nó thay thế dữ liệu cũ thay vì thêm dữ liệu mới tới nó.
__________________________________________________________