def get_url_from_urls(self, urls): url = get_extracted(urls) ids = [ get_extracted(re.findall('brandid=(\d+)', url)) for url in urls if url ] return add_query_parameters(url, 'brandid', "|".join(ids))
def parse_item(self, response): item = MScrapyItems() info = response.css('.breadcrumbs li a[title=""]::text').extract() item["store_name"] = self.store_name item["store_url"] = "".join(self.start_urls) item["url"] = response.url item["title"] = "".join(response.css('.product-name h1::text').extract()) item["price"] = get_extracted(response.css('.price *::text').extract()) item["make"] = get_extracted(info) item["category"] = get_extracted(info, 2) item["description"] = "".join(response.css('.product-specs').extract()) return item
def parse_item(self, response): item = MScrapyItems() info = response.css('.ty-breadcrumbs a::text').extract() item["store_name"] = self.store_name item["store_url"] = "".join(self.start_urls) item["url"] = response.url item["title"] = "".join(response.css('.ty-product-block-title::text').extract()) item["price"] = "".join(response.css('.ty-price-num::text').extract()) item["make"] = get_extracted(info, 1) item["category"] = get_extracted(info, 2) item["description"] = "".join(response.css('#content_description').extract()) return item
def parse(self, response): menu = get_extracted(response.css('#vmenu_69')) lis = menu.xpath('li') if self.categories: lis = SelectorList([li for cat in self.categories for li in menu.xpath('li[div/a[text() = "' + cat + '"]]') if cat]) for url in lis.xpath('div/a/@href').extract(): yield Request(url, callback=self.parse_items)
def parse(self, response): menu = response.css('#column-left .nav') urls = menu.xpath('li/a/@href').extract() if self.categories: urls = [get_extracted(menu.xpath('li/a[strong[text() = "' + cat + '"]]/@href').extract()) for cat in self.categories if cat] for url in urls: yield Request(url, callback=self.parse_items)
def get_urls(self, menu, categories): urls = menu.xpath('a/@href').extract() if categories: urls = filter(None, [ get_extracted( menu.xpath('a[contains(text(), "' + cat + '")]/@href').extract()) for cat in categories if cat ]) return urls
def parse_item(self, response): item = MScrapyItems() item["store_name"] = self.store_name item["store_url"] = "".join(self.start_urls) item["url"] = response.url item["title"] = "".join(response.css('#laptop_header h1 a::text').extract()) item["code"] = "".join(response.css('#navigator .share span::text').extract()).strip("SKU: ") item["price"] = "".join(response.css('#laptop_header .desc-price::text').extract()).strip() item["make"] = "".join(response.css('#laptop_header > div > a::text').extract()) item["category"] = get_extracted(response.css('#navigator > a::text').extract(), 1) item['description'] = "".join(response.css('#tab_detail2').extract()) return item
def parse(self, response): menu = response.css('#column-left .nav') urls = menu.xpath('li/a/@href').extract() if self.categories: urls = [ get_extracted( menu.xpath('li/a[strong[text() = "' + cat + '"]]/@href').extract()) for cat in self.categories if cat ] for url in urls: yield Request(url, callback=self.parse_items)
def parse_item(self, response): item = MScrapyItems() info = response.css('.product-info .right') item["store_name"] = self.store_name item["store_url"] = "".join(self.start_urls) item["url"] = response.url item["title"] = "".join(info.css('h1::text').extract()) item["price"] = "".join(info.css('div[class^="price"]::text').extract()).strip() item["make"] = "".join(info.css('.manf a img::attr(alt)').extract()) item["category"] = get_extracted(response.css('.breadcrumb a::text').extract(), 1) item["description"] = "".join(response.css('#sec-description').extract()) return item
def parse_item(self, response): item = MScrapyItems() info = response.css('.product-info .right') item["store_name"] = self.store_name item["store_url"] = "".join(self.start_urls) item["url"] = response.url item["title"] = "".join(info.css('h1::text').extract()) item["price"] = "".join( info.css('div[class^="price"]::text').extract()).strip() item["make"] = "".join(info.css('.manf a img::attr(alt)').extract()) item["category"] = get_extracted( response.css('.breadcrumb a::text').extract(), 1) item["description"] = "".join( response.css('#sec-description').extract()) return item
def parse_item(self, response): item = MScrapyItems() item["store_name"] = self.store_name item["store_url"] = "".join(self.start_urls) item["url"] = response.url item["title"] = "".join( response.css('#laptop_header h1 a::text').extract()) item["code"] = "".join( response.css('#navigator .share span::text').extract()).strip( "SKU: ") item["price"] = "".join( response.css( '#laptop_header .desc-price::text').extract()).strip() item["make"] = "".join( response.css('#laptop_header > div > a::text').extract()) item["category"] = get_extracted( response.css('#navigator > a::text').extract(), 1) item['description'] = "".join(response.css('#tab_detail2').extract()) return item