Exemplo n.º 1
0
    def parse_expose_data(self, response):
        sel = Selector(response)
        home = Home()

        home['url'] = response.url
        home['source'] = 'coming-home'
        home['furnished'] = True
        home['title'] = sel.xpath('//h2//text()').extract()[0]
        home['source_id'] = str(
            sel.xpath('//table//tr//th//text()').extract()[0]).strip().split(
                'Offer no. ')[1]
        home['dimensions'] = sel.xpath('//table//tr//td//text()').extract()[0]
        home['available_at'] = parse_date(
            str(sel.xpath('//table//tr//td//text()').extract()[1]))
        home['min_time_to_stay'] = str(
            sel.xpath('//table//tr//td//text()').extract()[2]).split(
                'min. ')[1]
        home['rent_price'] = float(
            sel.xpath('//table//tr//td//text()').extract()[3].encode(
                'utf-8').split(',00\xc2')[0].replace('.', ''))
        home['deposit'] = sel.xpath('//table//tr//td//text()').extract()[5]
        home['location'] = sel.xpath(
            '//table//tr//td//text()').extract()[8].strip()
        home['description'] = ' '.join([
            x.strip() for x in sel.xpath('//table//tr//td//text()').extract()
        ])
        home['updated_at'] = date.today().isoformat()
        home['address'] = self.address(response.body)
        home['geolocation'] = coord(home['address'])
        home['allows_pets'] = self.format_allows_pets(home['description'])

        return home
Exemplo n.º 2
0
    def parse_expose_data(self, response):
        sel = Selector(response)
        home = Home()

        home['url'] = response.url
        home['source'] = 'immobilienscout24'
        home['furnished'] = True
        home['title'] = self.to_str(
            sel.xpath("//h1[@id='expose-title']//text()"))
        home['source_id'] = re.match(r'.*expose/(\d+)', response.url).group(1)

        attributes = sel.xpath(
            "//div[@class='criteriagroup print-two-columns']")
        home['rooms'] = self.to_float(
            attributes.xpath("//dl//dd[contains(@class, 'zimmer')]//text()"))
        home['available_at'] = parse_date(
            self.to_str(
                attributes.xpath(
                    "//dl//dd[contains(@class, 'bezugsfrei')]//text()")))
        home['rent_price'] = self.format_price(
            sel.xpath("//div[contains(@class, 'mietemonat')]//text()"))
        home['updated_at'] = date.today().isoformat()
        home['geolocation'] = self.geo(
            self.to_str(
                sel.xpath(
                    "//div[@id='half-page-ad-stick-stopper']//script//text()"))
        )
        home['address'] = address(home['geolocation'])

        try:
            home['allows_pets'] = self.format_allows_pets(
                self.to_str(
                    attributes.xpath(
                        "//dl//dd[contains(@class, 'haustiere')]//text()")))
            home['min_time_to_stay'] = self.to_str(
                attributes.xpath(
                    "//dl//dd[contains(@class, 'mindestmietdauer')]//text()"))
            home['dimensions'] = self.to_str(
                attributes.xpath(
                    "//dl//dd[contains(@class, 'wohnflaeche')]//text()"))
        except:
            pass

        return home
Exemplo n.º 3
0
 def test_date_range(self):
     self.assertEqual("2016-12-01", parse_date("01.12.2016 - 31.05.2017"))
Exemplo n.º 4
0
 def test_date_parse_from(self):
     self.assertEqual("2016-10-30", parse_date("from 30.10.2016"))
Exemplo n.º 5
0
 def test_fix_wrong_formats(self):
     self.assertEqual(date.today().isoformat(), parse_date("  15.9.2018  "))
Exemplo n.º 6
0
 def test_invalid_date_are_immediately(self):
     self.assertEqual(date.today().isoformat(), parse_date(" 1.10. "))
Exemplo n.º 7
0
 def test_date_parse_german_break_format(self):
     self.assertEqual("2018-01-09", parse_date(" 09/01/2018 "))
Exemplo n.º 8
0
 def test_date_parse_german_format_not_usual(self):
     self.assertEqual("2018-12-01", parse_date(" 01/12/2018 "))
Exemplo n.º 9
0
 def test_date_parse_german_format(self):
     self.assertEqual("2017-11-01", parse_date(" 01.11.2017 "))
Exemplo n.º 10
0
 def test_date_immediately(self):
     self.assertEqual(date.today().isoformat(), parse_date("immediately"))