Exemplo n.º 1
0
            def parse(self, el):
                rooms_bedrooms_area = el.xpath(
                    './/div[@class="clearfix"]/ul[has-class("item-tags")]/li'
                )
                self.env['rooms'] = NotLoaded
                self.env['bedrooms'] = NotLoaded
                self.env['area'] = NotLoaded

                for item in rooms_bedrooms_area:
                    name = CleanText('.')(item)
                    if 'chambre' in name.lower():
                        name = 'bedrooms'
                        value = CleanDecimal('./strong')(item)
                    elif 'pièce' in name.lower():
                        name = 'rooms'
                        value = CleanDecimal('./strong')(item)
                    else:
                        name = 'area'
                        value = CleanDecimal(
                            Regexp(
                                CleanText(
                                    '.'
                                ),
                                r'(\d*\.*\d*) .*'
                            )
                        )(item)
                    self.env[name] = value
Exemplo n.º 2
0
            def parse(self, el):
                rooms_bedrooms_area = el.xpath(
                    './/div[@class="clearfix"]/ul[has-class("item-tags")]/li'
                )
                self.env['rooms'] = NotAvailable
                self.env['bedrooms'] = NotAvailable
                self.env['area'] = NotAvailable

                for item in rooms_bedrooms_area:
                    name = CleanText('.')(item)
                    if 'chambre' in name.lower():
                        name = 'bedrooms'
                        value = CleanDecimal('./strong')(item)
                    elif 'pièce' in name.lower():
                        name = 'rooms'
                        value = CleanDecimal('./strong')(item)
                    else:
                        name = 'area'
                        value = CleanDecimal(
                            Regexp(
                                CleanText(
                                    '.'
                                ),
                                r'(\d*\.*\d*) .*'
                            )
                        )(item)
                    self.env[name] = value
Exemplo n.º 3
0
        def parse(self, el):
            rooms_bedrooms_area = el.xpath(
                './/ul[has-class("item-tags")]/li'
            )
            self.env['rooms'] = NotAvailable
            self.env['bedrooms'] = NotAvailable
            self.env['area'] = NotAvailable

            for item in rooms_bedrooms_area:
                name = CleanText('.')(item)
                if 'chambre' in name.lower():
                    name = 'bedrooms'
                    value = CleanDecimal('./strong')(item)
                elif 'pièce' in name.lower():
                    name = 'rooms'
                    value = CleanDecimal('./strong')(item)
                elif ' m²' in name and 'le m²' not in name:
                    name = 'area'
                    value = CleanDecimal(
                        Regexp(
                            CleanText(
                                '.'
                            ),
                            r'(\d*\.*\d*) .*'
                        )
                    )(item)
                self.env[name] = value
Exemplo n.º 4
0
 def obj_type(self):
     url = BrowserURL('housing', _id=Env('_id'))(self)
     if 'colocation' in url:
         return POSTS_TYPES.SHARING
     elif 'location' in url:
         isFurnished = False
         for li in XPath('//ul[@itemprop="description"]/li')(self):
             label = CleanText('./div[has-class("criteria-label")]')(li)
             if label.lower() == "meublé":
                 isFurnished = (
                     CleanText('./div[has-class("criteria-value")]')(li).lower() == 'oui'
                 )
         if isFurnished:
             return POSTS_TYPES.FURNISHED_RENT
         else:
             return POSTS_TYPES.RENT
     elif 'vente' in url:
         offertype = Attr(
             '//button[has-class("offer-contact-vertical-phone")][1]',
             'data-offertransactiontype'
         )(self)
         if offertype == '4':
             return POSTS_TYPES.VIAGER
         else:
             return POSTS_TYPES.SALE
     return NotAvailable
Exemplo n.º 5
0
            def obj_type(self):
                # card url is /compte/cav/xxx/carte/yyy so reverse to match "carte" before "cav"
                for word in Field('url')(self).lower().split('/')[::-1]:
                    v = self.page.ACCOUNT_TYPES.get(word)
                    if v:
                        return v

                for word in Field('label')(self).replace('_',
                                                         ' ').lower().split():
                    v = self.page.ACCOUNT_TYPES.get(word)
                    if v:
                        return v

                category = CleanText(
                    './preceding-sibling::tr[has-class("list--accounts--master")]//h4'
                )(self)
                v = self.page.ACCOUNT_TYPES.get(category.lower())
                if v:
                    return v

                page = Async('details').loaded_page(self)
                if isinstance(page, LoanPage):
                    return Account.TYPE_LOAN

                return Account.TYPE_UNKNOWN
Exemplo n.º 6
0
 def obj_utilities(self):
     price = CleanText(
         './/strong[has-class("TeaserOffer-price-num")]')(self)
     if "charges comprises" in price.lower():
         return UTILITIES.INCLUDED
     else:
         return UTILITIES.EXCLUDED
Exemplo n.º 7
0
 def obj_type(self):
     url = BrowserURL('housing', _id=Env('_id'))(self)
     if 'colocation' in url:
         return POSTS_TYPES.SHARING
     elif 'location' in url:
         isFurnished = False
         for li in XPath('//ul[@itemprop="description"]/li')(self):
             label = CleanText('./span[has-class("criteria-label")]')(
                 li)
             if label.lower() == "meublé":
                 isFurnished = (
                     CleanText('./span[has-class("criteria-value")]')(
                         li).lower() == 'oui')
         if isFurnished:
             return POSTS_TYPES.FURNISHED_RENT
         else:
             return POSTS_TYPES.RENT
     elif 'vente' in url:
         offertype = Attr(
             '//button[has-class("offer-contact-vertical-phone")][1]',
             'data-offertransactiontype')(self)
         if offertype == '4':
             return POSTS_TYPES.VIAGER
         else:
             return POSTS_TYPES.SALE
     return NotAvailable
Exemplo n.º 8
0
    def find_account(self, acclabel, accowner):
        accowner = sorted(accowner.lower().split()) # first name and last name may not be ordered the same way on market site...

        # Check if history is present
        if CleanText(default=None).filter(self.doc.xpath('//body/p[contains(text(), "indisponible pour le moment")]')):
            return False

        ids = None
        for a in self.doc.xpath('//a[contains(@onclick, "indiceCompte")]'):
            self.logger.debug("get investment from onclick")

            label = CleanText('.')(a)
            owner = CleanText('./ancestor::tr/preceding-sibling::tr[@class="LnMnTiers"][1]')(a)
            owner = sorted(owner.lower().split())

            if label == acclabel and owner == accowner:
                ids = list(re.search(r'indiceCompte[^\d]+(\d+).*idRacine[^\d]+(\d+)', Attr('.', 'onclick')(a)).groups())
                ids.append(CleanText('./ancestor::td/preceding-sibling::td')(a))
                self.logger.debug("assign value to ids: {}".format(ids))
                return ids

        for a in self.doc.xpath('//a[contains(@href, "indiceCompte")]'):
            self.logger.debug("get investment from href")
            if CleanText('.')(a) == acclabel:
                ids = list(re.search(r'indiceCompte[^\d]+(\d+).*idRacine[^\d]+(\d+)', Attr('.', 'href')(a)).groups())
                ids.append(CleanText('./ancestor::td/preceding-sibling::td')(a))
                self.logger.debug("assign value to ids: {}".format(ids))
                return ids
Exemplo n.º 9
0
 def obj_valuation(self):
     valuation = MyDecimal(TableCell('valuation', default=None))(self)
     h2 = CleanText(
         './ancestor::div[contains(@id, "Histo")][1]/preceding-sibling::h2[1]'
     )(self)
     return -valuation if valuation and any(
         word in h2.lower()
         for word in self.page.DEBIT_WORDS) else valuation
Exemplo n.º 10
0
 def on_load(self):
     error_msg = CleanText('//li[@class="globalErreurMessage"]')(self.doc)
     if error_msg:
         # Catch wrongpass accordingly
         wrongpass_messages = ("mot de passe incorrect", "votre compte n'est plus utilisable")
         if any(message in error_msg.lower() for message in wrongpass_messages):
             raise BrowserIncorrectPassword(error_msg)
         raise BrowserUnavailable(error_msg)
Exemplo n.º 11
0
 def obj_utilities(self):
     price = CleanText(
         '//p[has-class("OfferTop-price")]'
     )(self)
     if "charges comprises" in price.lower():
         return UTILITIES.INCLUDED
     else:
         return UTILITIES.EXCLUDED
Exemplo n.º 12
0
 def parse(self, el):
     txt = CleanText(
         TableCell('availability')(self)[0].xpath('./span'))(self)
     self.env['availability_date'] = Date(
         dayfirst=True, default=NotAvailable).filter(txt)
     self.env['condition'] = Pocket.CONDITION_DATE if self.env['availability_date'] else \
                             self.page.CONDITIONS.get(txt.lower().split()[0], Pocket.CONDITION_UNKNOWN)
     self.env['matching_txt'] = txt
Exemplo n.º 13
0
 def obj_utilities(self):
     price = CleanText(
         './/strong[has-class("TeaserOffer-price-num")]'
     )(self)
     if "charges comprises" in price.lower():
         return UTILITIES.INCLUDED
     else:
         return UTILITIES.EXCLUDED
Exemplo n.º 14
0
 def condition(self):
     title = CleanText('./div[has-class("box-header")]/a[@class="title-item"]')(self)
     isNotFurnishedOk = True
     if self.env['query_type'] == POSTS_TYPES.RENT:
         isNotFurnishedOk = 'meublé' not in title.lower()
     return (
         Regexp(Link('./div/a[@class="item-title"]'), '/annonces/(.*)', default=None)(self)
         and isNotFurnishedOk
     )
Exemplo n.º 15
0
 def parse(self, el):
     page = Async('details').loaded_page(self)
     type = CleanText().filter(
         page.doc.xpath('//th[contains(text(), \
             "Cadre fiscal")]/following-sibling::td[1]'))
     if not type:
         raise SkipItem()
     self.env['type'] = self.page.TYPES.get(type.lower(),
                                            Account.TYPE_UNKNOWN)
     self.env['page'] = page
Exemplo n.º 16
0
            def parse(self, el):
                rooms_bedrooms_area = el.xpath(
                    './div/a[has-class("item-title")]/ul[has-class("item-tags")]/li'
                )
                self.env['rooms'] = NotLoaded
                self.env['bedrooms'] = NotLoaded
                self.env['area'] = NotLoaded

                for item in rooms_bedrooms_area:
                    name = CleanText('.')(item)
                    if 'chambre' in name.lower():
                        name = 'bedrooms'
                        value = CleanDecimal('.')(item)
                    elif 'pièce' in name.lower():
                        name = 'rooms'
                        value = CleanDecimal('.')(item)
                    else:
                        name = 'area'
                        value = CleanDecimal(
                            Regexp(CleanText('.'), r'(\d*\.*\d*) .*'))(item)
                    self.env[name] = value
Exemplo n.º 17
0
 def get_ids(ref, acclabel, accowner):
     ids = None
     for a in self.doc.xpath('//a[contains(@%s, "indiceCompte")]' % ref):
         self.logger.debug("get investment from %s" % ref)
         label = CleanText('.')(a)
         owner = CleanText('./ancestor::tr/preceding-sibling::tr[@class="LnMnTiers"][1]')(a)
         owner = re.sub(r' \(.+', '', owner)
         owner = sorted(owner.lower().split())
         if label == acclabel and owner == accowner:
             ids = list(re.search(r'indiceCompte[^\d]+(\d+).*idRacine[^\d]+(\d+)', Attr('.', ref)(a)).groups())
             ids.append(CleanText('./ancestor::td/preceding-sibling::td')(a))
             self.logger.debug("assign value to ids: {}".format(ids))
     return ids
Exemplo n.º 18
0
 def obj_type(self):
     prev_link = Link('//ol[has-class("breadcrumb")]/li[1]/a')(self)
     if 'location' in prev_link:
         title = CleanText(
             '//div[has-class("box-header")]/h1[@class="clearfix"]')(
                 self)
         if 'meublé' in title.lower():
             return POSTS_TYPES.FURNISHED_RENT
         else:
             return POSTS_TYPES.RENT
     elif 'vente' in prev_link:
         return POSTS_TYPES.SALE
     elif 'viager' in prev_link:
         return POSTS_TYPES.VIAGER
     else:
         return NotAvailable
Exemplo n.º 19
0
            def parse(self, el):
                page = Async('details').loaded_page(self)
                label = CleanText(TableCell('label')(self)[0].xpath('./a[1]'))(self)

                # Try to get gross amount
                amount = None
                for td in page.doc.xpath('//td[em[1][contains(text(), "Total")]]/following-sibling::td'):
                    amount = CleanDecimal('.', default=None)(td)
                    if amount:
                        break

                amount = amount or MyDecimal(TableCell('amount'))(self)
                if any(word in label.lower() for word in self.page.DEBIT_WORDS):
                    amount = -amount

                self.env['label'] = label
                self.env['amount'] = amount
                self.env['investments'] = list(page.get_investments())
Exemplo n.º 20
0
        def obj_type(self):
            try:
                breadcrumb = int(Dict('adview/category_id')(self))
            except ValueError:
                breadcrumb = None

            if breadcrumb == 11:
                return POSTS_TYPES.SHARING
            elif breadcrumb == 10:

                isFurnished = CleanText(PopDetail('furnished', default=' '))(self)

                if isFurnished.lower() == u'meublé':
                    return POSTS_TYPES.FURNISHED_RENT
                else:
                    return POSTS_TYPES.RENT
            else:
                return POSTS_TYPES.SALE
Exemplo n.º 21
0
        def obj_type(self):
            try:
                breadcrumb = int(Dict('adview/category_id')(self))
            except ValueError:
                breadcrumb = None

            if breadcrumb == 11:
                return POSTS_TYPES.SHARING
            elif breadcrumb == 10:

                isFurnished = CleanText(PopDetail('furnished',
                                                  default=' '))(self)

                if isFurnished.lower() == u'meublé':
                    return POSTS_TYPES.FURNISHED_RENT
                else:
                    return POSTS_TYPES.RENT
            else:
                return POSTS_TYPES.SALE
Exemplo n.º 22
0
 def get_ids(ref, acclabel, accowner):
     ids = None
     for a in self.doc.xpath('//a[contains(@%s, "indiceCompte")]' %
                             ref):
         self.logger.debug("get investment from %s" % ref)
         label = CleanText('.')(a)
         owner = CleanText(
             './ancestor::tr/preceding-sibling::tr[@class="LnMnTiers"][1]'
         )(a)
         owner = re.sub(r' \(.+', '', owner)
         owner = sorted(owner.lower().split())
         if label == acclabel and owner == accowner:
             ids = list(
                 re.search(
                     r'indiceCompte[^\d]+(\d+).*idRacine[^\d]+(\d+)',
                     Attr('.', ref)(a)).groups())
             ids.append(
                 CleanText('./ancestor::td/preceding-sibling::td')(a))
             self.logger.debug("assign value to ids: {}".format(ids))
     return ids
Exemplo n.º 23
0
    def find_account(self, acclabel, accowner):
        accowner = sorted(
            accowner.lower().split()
        )  # first name and last name may not be ordered the same way on market site...

        # Check if history is present
        if CleanText(default=None).filter(
                self.doc.xpath(
                    '//body/p[contains(text(), "indisponible pour le moment")]'
                )):
            return False

        ids = None
        for a in self.doc.xpath('//a[contains(@onclick, "indiceCompte")]'):
            self.logger.debug("get investment from onclick")

            label = CleanText('.')(a)
            owner = CleanText(
                './ancestor::tr/preceding-sibling::tr[@class="LnMnTiers"][1]')(
                    a)
            owner = sorted(owner.lower().split())

            if label == acclabel and owner == accowner:
                ids = list(
                    re.search(r'indiceCompte[^\d]+(\d+).*idRacine[^\d]+(\d+)',
                              Attr('.', 'onclick')(a)).groups())
                ids.append(
                    CleanText('./ancestor::td/preceding-sibling::td')(a))
                self.logger.debug("assign value to ids: {}".format(ids))
                return ids

        for a in self.doc.xpath('//a[contains(@href, "indiceCompte")]'):
            self.logger.debug("get investment from href")
            if CleanText('.')(a) == acclabel:
                ids = list(
                    re.search(r'indiceCompte[^\d]+(\d+).*idRacine[^\d]+(\d+)',
                              Attr('.', 'href')(a)).groups())
                ids.append(
                    CleanText('./ancestor::td/preceding-sibling::td')(a))
                self.logger.debug("assign value to ids: {}".format(ids))
                return ids
Exemplo n.º 24
0
            def parse(self, el):
                page = Async('details').loaded_page(self)
                label = CleanText(
                    TableCell('label')(self)[0].xpath('./a[1]'))(self)

                # Try to get gross amount
                amount = None
                for td in page.doc.xpath(
                        '//td[em[1][contains(text(), "Total")]]/following-sibling::td'
                ):
                    amount = CleanDecimal('.', default=None)(td)
                    if amount:
                        break

                amount = amount or MyDecimal(TableCell('amount'))(self)
                if any(word in label.lower()
                       for word in self.page.DEBIT_WORDS):
                    amount = -amount

                self.env['label'] = label
                self.env['amount'] = amount
                self.env['investments'] = list(page.get_investments())
Exemplo n.º 25
0
            def obj_type(self):
                # card url is /compte/cav/xxx/carte/yyy so reverse to match "carte" before "cav"
                for word in Field('url')(self).lower().split('/')[::-1]:
                    v = self.page.ACCOUNT_TYPES.get(word)
                    if v:
                        return v

                for word in Field('label')(self).replace('_', ' ').lower().split():
                    v = self.page.ACCOUNT_TYPES.get(word)
                    if v:
                        return v

                category = CleanText('./preceding-sibling::tr[has-class("list--accounts--master")]//h4')(self)
                v = self.page.ACCOUNT_TYPES.get(category.lower())
                if v:
                    return v

                page = Async('details').loaded_page(self)
                if isinstance(page, LoanPage):
                    return Account.TYPE_LOAN

                return Account.TYPE_UNKNOWN
Exemplo n.º 26
0
 def obj_type(self):
     url = BrowserURL('housing', _id=Env('_id'))(self)
     if 'colocation' in url:
         return POSTS_TYPES.SHARING
     elif 'location' in url:
         isFurnished = False
         for li in XPath('//ul[@itemprop="description"]/li')(self):
             label = CleanText('./div[has-class("criteria-label")]')(li)
             if label.lower() == "meublé":
                 isFurnished = (
                     CleanText('./div[has-class("criteria-value")]')(
                         li).lower() == 'oui')
         if isFurnished:
             return POSTS_TYPES.FURNISHED_RENT
         else:
             return POSTS_TYPES.RENT
     elif 'vente' in url:
         if ('viager' in self.obj_text(self).lower()
                 and 'rente' in self.obj_text(self).lower()):
             return POSTS_TYPES.VIAGER
         else:
             return POSTS_TYPES.SALE
     return NotAvailable
Exemplo n.º 27
0
 def obj_valuation(self):
     valuation = MyDecimal(TableCell('valuation', default=None))(self)
     h2 = CleanText('./ancestor::div[contains(@id, "Histo")][1]/preceding-sibling::h2[1]')(self)
     return -valuation if valuation and any(word in h2.lower() for word in self.page.DEBIT_WORDS) else valuation
Exemplo n.º 28
0
 def is_agency(self):
     agency = CleanText('.//span[has-class("item-agency-name")]')(self.el)
     return 'annonce de particulier' not in agency.lower()
Exemplo n.º 29
0
 def is_here(self):
     err = CleanText('//div[contains(@id, "alert-random")]/text()', children=False)(self.doc)
     return "compte inconnu" in err.lower()
Exemplo n.º 30
0
 def parse(self, el):
     txt = CleanText(TableCell('availability')(self)[0].xpath('./span'))(self)
     self.env['availability_date'] = Date(dayfirst=True, default=NotAvailable).filter(txt)
     self.env['condition'] = Pocket.CONDITION_DATE if self.env['availability_date'] else \
                             self.page.CONDITIONS.get(txt.lower().split()[0], Pocket.CONDITION_UNKNOWN)
     self.env['matching_txt'] = txt
Exemplo n.º 31
0
 def is_agency(self):
     agency = CleanText('.//span[has-class("item-agency-name")]')(
         self.el)
     return 'annonce de particulier' not in agency.lower()
Exemplo n.º 32
0
 def obj_utilities(self):
     price = CleanText('//p[has-class("OfferTop-price")]')(self)
     if "charges comprises" in price.lower():
         return UTILITIES.INCLUDED
     else:
         return UTILITIES.EXCLUDED