Пример #1
0
    def parse(self, response):
        item = XiaozhuItem()
        selector = Selector(response)

        title = selector.xpath('//h4/em/text()').extract()[0].strip()
        address = selector.xpath(
            '//div[@class="pho_info"]/p/span[@class="pr5"]/text()').extract(
            )[0].strip()
        price = selector.xpath(
            '//div[@id="pricePart"]/div/span/text()').extract()[0]
        lease_type = selector.xpath(
            '//*[@id="introduce"]/li[1]/h6/text()').extract()[0]
        suggestion = selector.xpath(
            '//*[@id="introduce"]/li[2]/h6/text()').extract()[0]
        bed = selector.xpath(
            '//*[@id="introduce"]/li[3]/h6/text()').extract()[0]

        item['title'] = title
        item['address'] = address
        item['price'] = price
        item['lease_type'] = lease_type
        item['suggestion'] = suggestion
        item['bed'] = bed

        yield item
Пример #2
0
    def parse(self, response):

        item = XiaozhuItem()
        selector = Selector(response)

        infos = selector.xpath("//ul[@class='pic_list clearfix']/li")

        for info in infos:

            url = infos.xpath('a/@href').extract()[0]

            item['url'] = url

            price = info.xpath('div[2]/span[1]/i/text()').extract()[0]

            item['price'] = int(price)

            intro = info.xpath('div[2]/div/a/span/text()').extract()[0]

            house = info.xpath('div[2]/div/em/text()').extract()[0]

            house = str(house).strip().split('/')

            item['rent_type'] = house[0]

            item['beds'] = int(filter(str.isdigit, house[1]))
            nums = filter(str.isdigit, house[0])

            item['num_of_people'] = int(filter(str.isdigit, house[2]))

            comment = str(
                info.xpath('div[2]/div/em/span/text()').extract()[0]).strip()

            start = 0
            comment_num = 0
            if comment.find('/') > 0:

                comment = comment.split('/')

                item['star'] = int(filter(str.isdigit, comment[0]))

                item['comment_num'] = int(filter(str.isdigit, comment[1]))

            else:
                item['comment_num'] = int(filter(str.isdigit, comment))

                item['star'] = 0

            yield item

        for i in range(2, 14):
            nexturl = 'http://sh.xiaozhu.com/search-duanzufang-p%s-0/' % i

            yield Request(nexturl, callback=self.parse)
Пример #3
0
 def parse(self,response):
     item = XiaozhuItem()
     selector = Selector(response)
     title = selector.xpath('/html/body/div[3]/div[1]/div[1]/h4/em/text()')
     item['title'] = title