Пример #1
0
 def parse(self, response):
     products = response.xpath(
         '//div[@id="mainsrp-itemlist"]//div[@class="items"][1]//div[contains(@class,"item"]'
     )
     for product in products:
         item = ProductItem()
         item["price"] = ''.join(
             product.xpath('.//div[contains(@class,"price")]//text()').
             extract()).strip()
         item["title"] = ''.join(
             product.xpath('.//div[contains(@class,"title")]//text()').
             extract()).strip()
         item["shop"] = ''.join(
             product.xpath('.//div[contains(@class,"shop")]//text()').
             extract()).strip()
         item["image"] = ''.join(
             product.xpath(
                 './/div[@class="pic")]//img[contains(@class,"img")]/@data-src'
             ).extract()).strip()
         item["deal"] = ''.join(
             product.xpath('.//div[contains(@class,"deal-cnt")]//text()').
             extract()).strip()
         item["location"] = ''.join(
             product.xpath('.//div[contains(@class,"location")]//text()').
             extract()).strip()
         yield item
Пример #2
0
    def parse(self, response):
        products = response.xpath(
            '//div[@id="mainsrp-itemlist"]//div[@class="items"][1]//div[contains(@class, "item")]'
        )

        for product in products:
            item = ProductItem()

            item['price'] = ''.join(
                product.xpath('.//div[contains(@class, "price")]//text()').
                extract()).strip()
            item['title'] = ''.join(
                product.xpath('.//div[contains(@class, "title")]//text()').
                extract()).strip()
            item['shop'] = ''.join(
                product.xpath('.//div[contains(@class, "shop")]//text()').
                extract()).strip()
            item['image'] = ''.join(
                product.xpath(
                    './/div[@class="pic"]//img[contains(@class, "img")]/@data-src'
                ).extract()).strip()
            item['deal'] = product.xpath(
                './/div[contains(@class, "deal-cnt")]//text()').extract_first(
                )
            item['location'] = product.xpath(
                './/div[contains(@class, "location")]//text()').extract_first(
                )
            yield item

            print(dict(item))
            print('ok' * 30)
Пример #3
0
    def parse(self, response):
        products = response.xpath(
            '//*[@id="mainsrp-itemlist"]/div/div/div[1]/div')
        for product in products:
            item = ProductItem()
            item['image'] = response.urljoin(
                product.xpath('//div[@class="pic"]//a//img/@data-src').
                extract_first().strip())
            item['price'] = product.xpath(
                '//div[contains(@class, "price"]/strong/text()').extract_first(
                ).strip()
            item['deal'] = product.xpath(
                '//div[@class="deal-cnt"]/text()').extract_first().strip()[:-3]
            item['title'] = ''.join(
                product.xpath('//*[contails(@class, "title")]/a/span/text()').
                extract()).strip()
            item['shop'] = ''.join(
                product.xpath(
                    '//div[@class="shop"]/a/span/text()').extract()).strip()
            item['location'] = product.xpath(
                '//div[@class="location"]/text()').extract_first().strip()

            print('item: ', item)

            yield item
Пример #4
0
    def parse(self, response):
        # 获取本页面下所有商品信息
        products = response.css('ul.gl-warp li')
        for product in products:
            item = ProductItem()
            item['name'] = product.css(
                'div.p-name a em::text').extract_first().strip()
            item['price'] = product.css(
                'div.p-price strong.J_price i::text').extract_first()
            item['shop'] = product.css(
                'div.p-shop span a::attr(title)').extract_first()
            item['commit'] = product.css(
                'strong a.comment::text').extract_first()
            #item['image'] = 'https:' + str(product.css('div.p-img a img::attr(src)').extract_first().strip())
            #item['image'] = ''.join(product.xpath('.//div[@class="p-img"]/a/img/@src').extract()).strip()

            if product.xpath('.//div[@class="p-img"]/a/img/@src'
                             ).extract_first() != None:
                item['image'] = product.xpath(
                    './/div[@class="p-img"]/a/img/@src').extract_first()
            else:
                item['image'] = product.xpath(
                    './/div[@class="p-img"]/a/img/@data-lazy-img'
                ).extract_first()

            yield item
Пример #5
0
 def parse(self, response):
     products = response.xpath(
         "//div[@id='mainsrp-itemlist']//div[@class='items'][1]//div[contains(@class, 'item')]"
     )
     for product in products:
         item = ProductItem()
         item["price"] = ''.join(
             product.xpath(".//div[contains(@class, 'price')]//text()").
             extract()).strip()
         item["title"] = ''.join(
             product.xpath(".//div[contains(@class, 'title')]//text()").
             extract()).strip()
         item["shop"] = ''.join(
             product.xpath(".//div[contains(@class, 'shop')]//text()").
             extract()).strip()
         item["image"] = ''.join(
             product.xpath(
                 ".//div[contains(@class, 'pic')]//img[contains(@class, 'img')]/@data-src"
             ).extract()).strip()
         item["deal"] = ''.join(
             product.xpath(".//div[contains(@class, 'deal-cnt')]//text()").
             extract()).strip()
         item["location"] = ''.join(
             product.xpath(".//div[contains(@class, 'location')]//text()").
             extract()).strip()
         yield item
Пример #6
0
 def parse(self, response):
     products=response.xpath('//div[@id="mainsrp-itemlist"]//div[@class="items"][1]//div[contains(@class,"item")]')
     for product in products:
         item=ProductItem()
         item['title']=''.join(product.xpath('.//div[contains(@class,"title")]//text()').extract()).strip()
         item['shop']=''.join(product.xpath('.//div[contains(@class,"shop")]//text()').extract()).strip()
         item['location']=product.xpath('.//div[contains(@class,"location")]//text()').extract_first()
         item['price']=product.xpath('.//div[contains(@class,"price")]//strong/text()').extract_first()
         item['deal']=product.xpath('.//div[contains(@class,"deal-cnt")]//text()').extract_first()
         item['href']='https:'+product.xpath('.//div[contains(@class,"title")]/a/@href').extract_first()
         yield item
Пример #7
0
def parse(self, response):
    products = response.xpath(  # 用xpath进行解析,调用response变量的xpath方法即可
        '//div[@id="mainsrp-itemlist"]//div[@class="items"][1]//div[contains(@class, "item")]')
    for product in products:    # 对结果进行遍历,依次选取每个商品
        item = ProductItem()    # 传递选取所有商品对应的XPath,可匹配所有商品,
        item['price'] = ''.join(product.xpath('.//div[contains(@class, "price")]//text()').extract()).strip()   # 价格
        item['item'] = ''.join(product.xpath('.//div[contains(@class, "item")]//text()').extract()).strip() # 名称
        item['shop'] = ''.join(product.xpath('.//div[contains(@class, "shop")]//text()').extract()).strip()
        item['image'] = ''.join(product.xpath('.//div[contains(@class, "pic")]//text()').extract()).strip() # 图片
        item['deal'] = product.xpath('.//div[contains(@class, "deal-cnt")]//text()').extract_first()
        item['location'] = product.xpath('.//div[contains(@class, "location")]//text()').extract_first()
        yield item  # 构造并返回一个ProductItem对象
 def parse(self, response):
     products = response.xpath('//*[@id="J_goodsList"]/ul/li')
     for product in products:
         item = ProductItem()
         item['price'] = ''.join(
             product.xpath(
                 './/div/div[3]/strong/i/text()').extract()).strip()
         item['title'] = ''.join(
             product.xpath('.//div/div[4]/a/em//text()').extract()).strip()
         item['shop'] = ''.join(
             product.xpath(
                 './/div/div[7]/span/a//text()').extract()).strip()
         item['image'] = ''.join(
             product.xpath('.//div/div[1]/a/img/@src').extract()).strip()
         item['deal'] = product.xpath(
             './/div/div[5]/strong/a//text()').extract_first()
         yield item
Пример #9
0
    def parse(self, response):
        products = response.xpath('//*[@id="component_59"]/li')
        for product in products:
            item = ProductItem()
            item['image'] = response.urljoin(
                product.xpath(
                    './/a[@class="pic"]/img/@src').extract_first().strip())
            item['price'] = product.xpath(
                './/p[@class="price"]/span/text()').extract_first().strip()[1:]
            item['title'] = ''.join(
                product.xpath(
                    './/p[@class="name"]/a/text()').extract()).strip()
            item['deal'] = product.xpath(
                './/p[@class="star"]/a/text()').extract_first().strip()[:-3]
            item['shop'] = product.xpath(
                './/p[@class="link"]/a/text()').extract_first()
            item['location'] = 'no localtion'

            yield item
Пример #10
0
    def parse(self, response):
        print('##############3.###################')
        # print(response.text)
        products = response.xpath(
            '//div[@id="mainsrp-itemlist"]//div[@class="items"][1]//div[contains(@class,"item")]'
        )
        for product in products:
            item = ProductItem()
            item['price'] = ''.join(
                product.xpath('.//div[contains(@class,"price")]//text()').
                extract()).strip()
            item['title'] = ''.join(
                product.xpath('.//div[contains(@class,"title")]//text()').
                extract()).strip()
            item['shop'] = ''.join(
                product.xpath('.//div[contains(@class,"shop")]//text()').
                extract()).strip()
            item['image'] = ''.join(
                product.xpath(
                    './/div[@class="pic"]/a/img/@data-src').extract()).strip()

        print(item)
        yield item
Пример #11
0
    def parse(self, response):
        # print(type(response.body))

        res = str(response.body, encoding='utf-8')
        result = re.findall('"itemlist":(.*?);\n\s+g_srp_loadCss', res,
                            re.S)[0]
        data = re.findall('"auctions":\[(.*?)\],"recommendAuctions"', result,
                          re.S)[0]

        titles = re.findall('"pid":"","title":"(.*?)","raw_title"', data, re.S)
        pic_urls = re.findall('"pic_url":"(.*?)","detail_url"', data, re.S)
        prices = re.findall('"view_price":"(.*?)","view_fee"', data, re.S)
        locations = re.findall('"item_loc":"(.*?)","', data, re.S)
        shops = re.findall('"nick":"(.*?)","', data, re.S)

        for title, pic_url, price, location, shop in zip(
                titles, pic_urls, prices, locations, shops):
            item = ProductItem()
            item['image'] = 'https:' + pic_url.strip()
            item['price'] = price.strip()
            item['title'] = re.sub('<span.*>|</span>', '', title).strip()
            item['shop'] = shop.strip()
            item['location'] = location.strip()
            yield item