def obj_photos(self): photos = [] for i in range(1, CleanDecimal('//NbPhotos')(self) + 1): img = CleanText('//LienImage%s' % i, replace=[(u'w=69&h=52', u'w=786&h=481')])(self) url = img if img.startswith('http') else u'http://www.entreparticuliers.com%s' % img photos.append(HousingPhoto(url)) return photos
def obj__sublabel(self): # Use the second part of the label to determine account index # later on InvestmentPage and remove the 'N ' at the beginning sublabel = CleanText('./td[2]', children=False)(self) if sublabel.startswith('N '): sublabel = sublabel[2:] return sublabel
def parse(self, el): self.env['category'] = u'Interne' if Attr('.', 'value')(self)[0] == 'I' else u'Externe' if self.env['category'] == u'Interne': # TODO use after 'I'? _id = Regexp(CleanText('.'), r'- (\w+\d\w+)')(self) # at least one digit accounts = list(self.page.browser.get_accounts_list()) + list(self.page.browser.get_loans_list()) match = [acc for acc in accounts if _id in acc.id] assert len(match) == 1 match = match[0] self.env['id'] = match.id self.env['iban'] = match.iban self.env['bank_name'] = u"Caisse d'Épargne" self.env['label'] = match.label # Usual case elif Attr('.', 'value')(self)[1] == '-': full = CleanText('.')(self) if full.startswith('- '): self.logger.warning('skipping recipient without a label: %r', full) raise SkipItem() # <recipient name> - <account number or iban> - <bank name (optional)> <optional last dash> mtc = re.match('(?P<label>.+) - (?P<id>[^-]+) -(?P<bank> [^-]*)?-?$', full) assert mtc self.env['id'] = self.env['iban'] = mtc.group('id') self.env['bank_name'] = (mtc.group('bank') and mtc.group('bank').strip()) or NotAvailable self.env['label'] = mtc.group('label') # Fcking corner case else: mtc = re.match('(?P<id>.+) - (?P<label>[^-]+) -( [^-]*)?-?$', CleanText('.')(self)) assert mtc self.env['id'] = mtc.group('id') self.env['iban'] = NotAvailable self.env['bank_name'] = NotAvailable self.env['label'] = mtc.group('label')
def obj_url(self): url = CleanText('//meta[@property="og:url"]/@content', default=None)(self) if url is None: url = JSVar(CleanText('//script'), var='_JobUrl')(self) if not url.startswith('http'): url = 'www.adecco.fr%s' % url return url
def on_load(self): h1 = CleanText('//h1[1]')(self.doc) if h1.startswith('Le service est moment'): text = CleanText('//h4[1]')(self.doc) or h1 raise BrowserUnavailable(text) if not self.browser.no_login: raise LoggedOut()
def obj_photos(self): photos = [] for i in range(1, CleanDecimal('//NbPhotos')(self) + 1): img = CleanText('//LienImage%s' % i, replace=[(u'w=69&h=52', u'w=786&h=481')])(self) url = img if img.startswith( 'http') else u'http://www.entreparticuliers.com%s' % img photos.append(HousingPhoto(url)) return photos
def get_author(self): try: author = CleanText('.')(self.get_element_author()) if author.startswith('Par '): return author.split('Par ', 1)[1] else: return author except AttributeError: return ''
def on_load(self): h1 = CleanText('//h1[1]')(self.doc) if h1.startswith('Le service est moment'): text = CleanText('//h4[1]')(self.doc) or h1 raise BrowserUnavailable(text) if not self.browser.no_login: raise LoggedOut()
def obj_url(self): url = CleanText('//meta[@property="og:url"]/@content', default=None)(self) if url is None: url = JSVar(CleanText('//script'), var='_JobUrl')(self) if not url.startswith('http'): url = 'www.adecco.fr%s' % url return url
def obj_type(self): try: el_to_check = CleanText(TableCell('type'))(self) type_dict = self.TYPE except ColumnNotFound: el_to_check = Field('label')(self) type_dict = self.TYPE_BY_LABELS for k, v in type_dict.items(): if el_to_check.startswith(k): return v return Account.TYPE_UNKNOWN
def obj_type(self): try: el_to_check = CleanText(TableCell('type'))(self) type_dict = self.TYPE except ColumnNotFound: el_to_check = Field('label')(self) type_dict = self.TYPE_BY_LABELS for k, v in type_dict.items(): if el_to_check.startswith(k): return v return Account.TYPE_UNKNOWN
def obj_income(self): price = CleanText('.//span[has-class("nbPrice")]')(self) return not price.startswith('−')
def condition(self): text = CleanText('td')(self) return not text.startswith('Aucune information disponible')
def obj_url(self): url = CleanText('./LienDetail')(self) if not url.startswith('http'): url = u'http://www.entreparticuliers.com%s' % url return url
def obj_income(self): price = CleanText('.//span[has-class("nbPrice")]')(self) return not price.startswith('−')
def obj_url(self): url = CleanText('./LienDetail')(self) if not url.startswith('http'): url = u'http://www.entreparticuliers.com%s' % url return url
def condition(self): text = CleanText('td')(self) return not text.startswith('Aucune information disponible')