Пример #1
0
 def get_url_from_urls(self, urls):
     url = get_extracted(urls)
     ids = [
         get_extracted(re.findall('brandid=(\d+)', url)) for url in urls
         if url
     ]
     return add_query_parameters(url, 'brandid', "|".join(ids))
Пример #2
0
 def parse_item(self, response):
     item = MScrapyItems()
     info = response.css('.breadcrumbs li a[title=""]::text').extract()
     item["store_name"] = self.store_name
     item["store_url"] = "".join(self.start_urls)
     item["url"] = response.url
     item["title"] = "".join(response.css('.product-name h1::text').extract())
     item["price"] = get_extracted(response.css('.price *::text').extract())
     item["make"] = get_extracted(info)
     item["category"] = get_extracted(info, 2)
     item["description"] = "".join(response.css('.product-specs').extract())
     return item
Пример #3
0
 def parse_item(self, response):
     item = MScrapyItems()
     info = response.css('.ty-breadcrumbs a::text').extract()
     item["store_name"] = self.store_name
     item["store_url"] = "".join(self.start_urls)
     item["url"] = response.url
     item["title"] = "".join(response.css('.ty-product-block-title::text').extract())
     item["price"] = "".join(response.css('.ty-price-num::text').extract())
     item["make"] = get_extracted(info, 1)
     item["category"] = get_extracted(info, 2)
     item["description"] = "".join(response.css('#content_description').extract())
     return item
Пример #4
0
 def parse(self, response):
     menu = get_extracted(response.css('#vmenu_69'))
     lis = menu.xpath('li')
     if self.categories:
         lis = SelectorList([li for cat in self.categories
                             for li in menu.xpath('li[div/a[text() = "' + cat + '"]]')
                             if cat])
     for url in lis.xpath('div/a/@href').extract():
         yield Request(url, callback=self.parse_items)
Пример #5
0
 def parse(self, response):
     menu = response.css('#column-left .nav')
     urls = menu.xpath('li/a/@href').extract()
     if self.categories:
         urls = [get_extracted(menu.xpath('li/a[strong[text() = "' + cat + '"]]/@href').extract())
                 for cat in self.categories
                 if cat]
     for url in urls:
         yield Request(url, callback=self.parse_items)
Пример #6
0
 def get_urls(self, menu, categories):
     urls = menu.xpath('a/@href').extract()
     if categories:
         urls = filter(None, [
             get_extracted(
                 menu.xpath('a[contains(text(), "' + cat +
                            '")]/@href').extract())
             for cat in categories if cat
         ])
     return urls
Пример #7
0
 def parse_item(self, response):
     item = MScrapyItems()
     item["store_name"] = self.store_name
     item["store_url"] = "".join(self.start_urls)
     item["url"] = response.url
     item["title"] = "".join(response.css('#laptop_header h1 a::text').extract())
     item["code"] = "".join(response.css('#navigator .share span::text').extract()).strip("SKU: ")
     item["price"] = "".join(response.css('#laptop_header .desc-price::text').extract()).strip()
     item["make"] = "".join(response.css('#laptop_header > div > a::text').extract())
     item["category"] = get_extracted(response.css('#navigator > a::text').extract(), 1)
     item['description'] = "".join(response.css('#tab_detail2').extract())
     return item
Пример #8
0
 def parse(self, response):
     menu = response.css('#column-left .nav')
     urls = menu.xpath('li/a/@href').extract()
     if self.categories:
         urls = [
             get_extracted(
                 menu.xpath('li/a[strong[text() = "' + cat +
                            '"]]/@href').extract())
             for cat in self.categories if cat
         ]
     for url in urls:
         yield Request(url, callback=self.parse_items)
Пример #9
0
 def parse_item(self, response):
     item = MScrapyItems()
     info = response.css('.product-info .right')
     item["store_name"] = self.store_name
     item["store_url"] = "".join(self.start_urls)
     item["url"] = response.url
     item["title"] = "".join(info.css('h1::text').extract())
     item["price"] = "".join(info.css('div[class^="price"]::text').extract()).strip()
     item["make"] = "".join(info.css('.manf a img::attr(alt)').extract())
     item["category"] = get_extracted(response.css('.breadcrumb a::text').extract(), 1)
     item["description"] = "".join(response.css('#sec-description').extract())
     return item
Пример #10
0
 def parse_item(self, response):
     item = MScrapyItems()
     info = response.css('.product-info .right')
     item["store_name"] = self.store_name
     item["store_url"] = "".join(self.start_urls)
     item["url"] = response.url
     item["title"] = "".join(info.css('h1::text').extract())
     item["price"] = "".join(
         info.css('div[class^="price"]::text').extract()).strip()
     item["make"] = "".join(info.css('.manf a img::attr(alt)').extract())
     item["category"] = get_extracted(
         response.css('.breadcrumb a::text').extract(), 1)
     item["description"] = "".join(
         response.css('#sec-description').extract())
     return item
Пример #11
0
 def parse_item(self, response):
     item = MScrapyItems()
     item["store_name"] = self.store_name
     item["store_url"] = "".join(self.start_urls)
     item["url"] = response.url
     item["title"] = "".join(
         response.css('#laptop_header h1 a::text').extract())
     item["code"] = "".join(
         response.css('#navigator .share span::text').extract()).strip(
             "SKU: ")
     item["price"] = "".join(
         response.css(
             '#laptop_header .desc-price::text').extract()).strip()
     item["make"] = "".join(
         response.css('#laptop_header > div > a::text').extract())
     item["category"] = get_extracted(
         response.css('#navigator > a::text').extract(), 1)
     item['description'] = "".join(response.css('#tab_detail2').extract())
     return item