class item(ItemElement): def condition(self): has_children = XPath('.//div[@id="spanInfosEpc"]', default=False)(self) if has_children: return True return False klass = Housing obj_id = Regexp( CleanText('./a/@href', replace=[('/annonces-immobilieres/', ''), ('/location/', '')]), '(.*).html') obj_type = Env('query_type') obj_advert_type = ADVERT_TYPES.PERSONAL def obj_house_type(self): type = Attr('./a/div/p/span[@class="item type"]/img', 'alt')(self) if type == 'Appartement': return HOUSE_TYPES.APART elif type == 'Maison /villa': return HOUSE_TYPES.HOUSE elif type == 'Terrain / autreinfosaccesepc': return HOUSE_TYPES.LAND else: return HOUSE_TYPES.OTHER def obj_title(self): title = CleanText('./a/div/p/span[@class="item title"]')(self) if title == "": title = CleanText('./a/div/p/span[@class="item loc"]')( self) return title obj_cost = CleanDecimal( CleanText('./a/div/p/span[@class="item prix"]', children=False)) obj_currency = Currency.get_currency(u'€') obj_text = Format( '%s / %s / %s / %s', CleanText('./a/div/p/span[@class="item type"]/img/@alt'), CleanText('./a/div/p/span[@id="divnbpieces"]', children=False), CleanText('./a/div/p/span[@id="divsurface"]', children=False), CleanText('./a/div/p/span[@class="item prix"]/span')) obj_location = CleanText( './a/div/p/span[@class="item loc"]/text()[position() > 1]') obj_area = CleanDecimal( './a/div/p/span[@class="item surf"]/text()[last()]') obj_rooms = CleanDecimal( './a/div/p/span[@class="item nb"]/text()[last()]', default=NotAvailable) obj_currency = Currency.get_currency(u'€') obj_utilities = UTILITIES.UNKNOWN obj_url = AbsoluteLink('./a')
def populate_balances(self, accounts): for account in accounts: acc_dict = self.doc['donnees']['compteSoldesMap'][account._id] account.balance = CleanDecimal(replace_dots=True).filter(acc_dict.get('soldeComptable', acc_dict.get('soldeInstantane'))) account.currency = Currency.get_currency(acc_dict.get('deviseSoldeComptable', acc_dict.get('deviseSoldeInstantane'))) account.coming = CleanDecimal(replace_dots=True, default=NotAvailable).filter(acc_dict.get('montantOperationJour')) yield account
def iter_results(self): for div in self.document.getroot().cssselect('div.train_info'): info = None price = None currency = None for td in div.cssselect('td.price'): txt = self.parser.tocleanstring(td) p = Decimal(re.sub('([^\d\.]+)', '', txt)) if price is None or p < price: info = list( div.cssselect('strong.price_label') [0].itertext())[-1].strip().strip(':') price = p currency = Currency.get_currency(txt) yield { 'type': self.get_value(div, 'div.transporteur-txt'), 'time': self.parse_hour(div, 'div.departure div.hour'), 'departure': self.get_value(div, 'div.departure div.station'), 'arrival': self.get_value(div, 'div.arrival div.station', last=True), 'arrival_time': self.parse_hour(div, 'div.arrival div.hour', last=True), 'price': price, 'currency': currency, 'price_info': info, }
def parse_recipients(self, items, assume_internal=False): for opt in items: lines = get_text_lines(opt) if opt.attrib['value'].startswith('I') or assume_internal: for n, line in enumerate(lines): if line.strip().startswith('n°'): rcpt = Recipient() rcpt._index = opt.attrib['value'] rcpt._raw_label = ' '.join(lines) rcpt.category = 'Interne' rcpt.id = CleanText().filter(line[2:].strip()) # we don't have iban here, use account number rcpt.label = ' '.join(lines[:n]) rcpt.currency = Currency.get_currency(lines[-1]) rcpt.enabled_at = datetime.now().replace(microsecond=0) yield rcpt break elif opt.attrib['value'].startswith('E'): if len(lines) > 1: # In some cases we observed beneficiaries without label, we skip them rcpt = Recipient() rcpt._index = opt.attrib['value'] rcpt._raw_label = ' '.join(lines) rcpt.category = 'Externe' rcpt.label = lines[0] rcpt.iban = lines[1].upper() rcpt.id = rcpt.iban rcpt.enabled_at = datetime.now().replace(microsecond=0) yield rcpt else: self.logger.warning('The recipient associated with the iban %s has got no label' % lines[0])
def populate_balances(self, accounts): for account in accounts: acc_dict = self.doc['donnees']['compteSoldesMap'][account._id] account.balance = CleanDecimal(replace_dots=True).filter( acc_dict['soldeComptable']) account.currency = Currency.get_currency( acc_dict['deviseSoldeComptable']) yield account
class get_housing(ItemElement): klass = Housing obj_id = Regexp(CleanText('//p[has-class("property-reference")]'), r'\:(.*)$') def obj_url(self): return self.page.url obj_area = CleanDecimal( Regexp( CleanText('//table[@id="table"]//span[contains(text(), "Surface")]//following-sibling::span[has-class("r")]'), r'([\d\ ]+)m' ), default=NotAvailable ) obj_title = CleanText('//span[has-class("mainh1")]') obj_cost = CleanDecimal('//span[has-class("price-info")]') obj_currency = Currency.get_currency(u'€') obj_rooms = CleanDecimal('//table[@id="table"]//span[contains(text(), "Pièce")]//following-sibling::span[has-class("r")]') obj_bedrooms = CleanDecimal('//table[@id="table"]//span[contains(text(), "Chambre")]//following-sibling::span[has-class("r")]') obj_location = CleanText(Regexp(CleanText('//span[has-class("mainh1")]'), r',(.+)$')) obj_text = CleanText('//div[has-class("property-description-main")]') obj_date = Date( Regexp( CleanText('//div[has-class("property-description-main")]'), r'Mise à jour le ([\d\\]+)', default=datetime.today() ) ) obj_phone = Attr('//button[@id="display-phonenumber-1"]', 'data-phone-number') def obj_photos(self): photos = [] for photo in self.xpath('//div[@id="bxSliderContainer"]//ul//li//img'): url = Attr('.', 'src')(photo) if url[0] != '/': photos.append(HousingPhoto(url)) return photos def obj_details(self): return { 'GES': CleanText('//span[@id="gassymbol"]', '')(self), 'DPE': CleanText('//span[@id="energysymbol"]', '')(self), } def obj_utilities(self): price = CleanText('//span[has-class("price-info")]')(self) if 'CC' in price: return UTILITIES.INCLUDED elif 'HC' in price: return UTILITIES.EXCLUDED else: return UTILITIES.UNKNOWN obj_station = NotAvailable obj_price_per_meter = PricePerMeterFilter()
class get_housing(ItemElement): klass = Housing obj_id = Env('_id') obj_type = EPAdvertType(CleanText('//rubrique')) obj_advert_type = ADVERT_TYPES.PERSONAL obj_house_type = EPHouseType(CleanText('//tbien')) obj_title = CleanText('//titre') obj_rooms = CleanDecimal('//pieces') obj_cost = CleanDecimal('//prix') obj_currency = Currency.get_currency(u'€') obj_utilities = UTILITIES.UNKNOWN obj_text = CleanText('//titre') obj_location = CleanText('//ville') obj_url = CleanText('//urlDetailAnnonce') obj_area = CleanDecimal('//surface') obj_price_per_meter = PricePerMeterFilter() obj_phone = CleanText('//telephone1') obj_date = DateTime(CleanText('//DateCheck')) def obj_GES(self): value = CleanText('//GSE')(self) return getattr(ENERGY_CLASS, value.upper(), NotAvailable) def obj_photos(self): photos = [] for photo in ['//UrlImage1', '//UrlImage2', '//UrlImage3']: p = CleanText(photo)(self) if p: photos.append(HousingPhoto(p)) return photos def obj_DPE(self): value = CleanText('//DPE')(self) return getattr(ENERGY_CLASS, value.upper(), NotAvailable) def obj_details(self): details = dict() d = [('//Nb_Etage', 'Nombre d\'etages'), ('//Neuf', 'Neuf'), ('//Ancien_avec_du_Charme', 'Ancien avec charme'), ('//Avec_terasse', 'Avec Terrasse'), ('//latitude', 'Latitude'), ('//longitude', 'Longitude'), ('//loyer', 'Loyer'), ('//piscine', 'Piscine'), ('//surface_balcon', 'Surface du balcon'), ('//surface_exp', 'Surface exploitable'), ('//surface_terrain', 'Surface du Terrain'), ('//Meuble', 'furnished')] for key, value in d: key = CleanText(key)(self) if key: details[value] = key return details
def get_accounts(self): accounts = {} content = self.doc.xpath( '//div[@id="moneyPage" or @id="MoneyPage"]')[0] # Multiple accounts lines = content.xpath( '(//div[@class="col-md-8 multi-currency"])[1]/ul/li') for li in lines: account = Account() account.iban = NotAvailable account.type = Account.TYPE_CHECKING currency_code = CleanText().filter( (li.xpath('./span[@class="currencyUnit"]/span') or li.xpath('./span[1]'))[0]) currency = Currency.get_currency(currency_code) if not currency: self.logger.warning('Unable to find currency %r', currency_code) continue account.id = currency account.currency = currency account.balance = CleanDecimal(replace_dots=True).filter( li.xpath('./span[@class="amount"]/text()')) account.label = u'%s %s*' % (self.browser.username, account.currency) accounts[account.id] = account self.browser.account_currencies.append(account.currency) if not accounts: # Primary currency account primary_account = Account() primary_account.iban = NotAvailable primary_account.type = Account.TYPE_CHECKING try: balance = CleanText('.')(content.xpath( '//div[contains(@class, "col-md-6")][contains(@class, "available")]' )[0]) except IndexError: primary_account.id = 'EUR' primary_account.currency = u'EUR' primary_account.balance = NotAvailable primary_account.label = u'%s' % (self.browser.username) else: primary_account.currency = Account.get_currency(balance) primary_account.id = unicode(primary_account.currency) primary_account.balance = Decimal( FrenchTransaction.clean_amount(balance)) primary_account.label = u'%s %s*' % (self.browser.username, primary_account.currency) accounts[primary_account.id] = primary_account return accounts
def populate_balances(self, accounts): for account in accounts: acc_dict = self.doc['donnees']['compteSoldesMap'][account._id] account.balance = CleanDecimal(replace_dots=True).filter( acc_dict.get('soldeComptable', acc_dict.get('soldeInstantane'))) account.currency = Currency.get_currency( acc_dict.get('deviseSoldeComptable', acc_dict.get('deviseSoldeInstantane'))) account.coming = CleanDecimal( replace_dots=True, default=NotAvailable).filter( acc_dict.get('montantOperationJour')) yield account
def get_shop_data(self, backend): lastReport = time() sorders = [] currency = Currency.get_currency(backend.get_currency()) for o in backend.iter_orders(): sopayments = [[ u'ShopPayment', u'{ sptime = %i' % totime(p.date), u', spamount = %i' % tocent(p.amount), u', spmethod = %s' % tostr(p.method), u'}'] for p in backend.iter_payments(o)] soitems = [[ u'ShopItem', u'{ silabel = %s' % tostr(i.label), u', siprice = %i' % tocent(i.price), u', siurl = %s' % tostr(i.url), u'}'] for i in backend.iter_items(o)] sorders.append([ u'ShopOrder', u'{ soid = %s' % tostr(o.id), u', sotime = %i' % totime(o.date), u', sodiscount = %i' % tocent(o.discount), u', soshipping = %i' % tocent(o.shipping), u', sotax = %i' % tocent(o.tax), u', sopayments ='] + [ u' %s' % s for s in r_list(sopayments)] + [ u', soitems ='] + [ u' %s' % s for s in r_list(soitems)] + [ u'}']) if time()-lastReport > self.REPORT_TIME: print u'Scraped %i orders' % len(sorders) stdout.flush() lastReport = time() payments_total = sum(p.amount for p in backend.iter_payments(o)) items_total = sum(i.price for i in backend.iter_items(o)) order_total = o.shipping + o.discount + o.tax assert payments_total == items_total + order_total, \ u'%s != %s + %s' % (payments_total, items_total, order_total) assert list(backend.iter_items(o)) assert list(backend.iter_payments(o)) print u'Scraped %i orders' % len(sorders) stdout.flush() return [ u'Shop', u'{ sid = %s' % tostr(backend.name), u', scurrency = %s' % currency, u', sorders ='] + [ u' %s' % s for s in r_list(sorders)] + [ u'}']
class get_housing(ItemElement): klass = Housing obj_id = Attr('//article//a[has-class("add-to-selection")]', 'data-id') def obj_url(self): return self.page.url obj_type = POSTS_TYPES.SALE obj_advert_type = ADVERT_TYPES.PROFESSIONAL obj_house_type = HOUSE_TYPES.LAND obj_title = CleanText('//article[@id="annonceTerrain"]/header/h1') obj_area = CleanDecimal( CleanText('//table[@id="price-list"]/tbody/tr/td[1]')) obj_cost = CleanDecimal( CleanText('//table[@id="price-list"]/tbody/tr/td[2]', replace=[(".", "")])) obj_currency = Currency.get_currency(u'€') obj_date = Date( CleanText( '//section[@id="photos-details"]/div[@class="right-bloc"]/div/div[3]/div[2]/strong' )) obj_location = CleanText( '//article[@id="annonceTerrain"]/div[@class="btn-vert-bloc"]/a/strong' ) obj_text = CleanText('//div[@id="informationsTerrain"]/p[2]') obj_phone = CleanText( '//div[@id="infos-annonceur"]/div/div/div[@class="phone-numbers-bloc"]/p/strong' ) def obj_photos(self): photos = [] for photo in self.xpath('//div[@id="miniatures-carousel"]//img'): photo_url = BASE_URL + '/' + Attr('.', 'data-big-photo')(photo) photos.append(HousingPhoto(photo_url)) return photos obj_utilities = UTILITIES.UNKNOWN
class item(ItemElement): klass = Housing obj_id = QueryValue( Attr('.//div[has-class("presentationItem")]/h2/a', 'href'), 'idter') obj_url = AbsoluteLink('.//h2/a') obj_type = POSTS_TYPES.SALE obj_advert_type = ADVERT_TYPES.PROFESSIONAL obj_house_type = HOUSE_TYPES.LAND obj_title = CleanText('.//div[@class="presentationItem"]/h2/a') obj_area = CleanDecimal( Regexp(CleanText('.//div[@class="presentationItem"]/h3'), 'surface de (\d+) m²')) obj_cost = CleanDecimal( CleanText('.//div[@class="presentationItem"]/h3/span[1]', replace=[(".", ""), (" €", "")])) obj_currency = Currency.get_currency(u'€') obj_date = Date( CleanText( './/div[@class="presentationItem"]//span[@class="majItem"]', replace=[("Mise à jour : ", "")])) obj_text = CleanText('.//div[@class="presentationItem"]/p') obj_phone = CleanText( './/div[@class="divBoutonContact"]/div[@class="phone-numbers-bloc"]/p[1]/strong' ) def obj_photos(self): for photo in self.xpath( './/div[has-class("photoItemListe")]/img/@data-src'): if photo: photo_url = BASE_URL + '/' + photo return [HousingPhoto(photo_url)] else: return [] obj_utilities = UTILITIES.UNKNOWN
def iter_investment(self, account): if account.id not in self.invs: staging = '_s' if 'staging' in self.sessionId else '' self.accounts.stay_or_go(staging=staging, accountId=self.intAccount, sessionId=self.sessionId) invests = list( self.page.iter_investment(currency=account.currency)) # Replace as liquidities investments that are cash self.invs[account.id] = [ create_french_liquidity(inv.valuation) if len(inv.label) < 4 and Currency.get_currency(inv.label) else inv for inv in invests ] return self.invs[account.id]
def obj_currency(self): _id = CleanText('.//td/a/@id')(self) if not _id: ac_details_page = self.page.browser.open( Link('.//td/a')(self)).page else: split = _id.split('-') ac_details_page = self.page.browser.open( '/outil/UWVI/AssuranceVie/accesDetail?ID_CONTRAT=%s&PRODUCTEUR=%s' % (split[0], split[1])).page return Currency.get_currency( re.sub( r'[\d\,\ ]', '', CleanText('(//tr[8])/td[2]', default=NotAvailable)( ac_details_page.doc))) or NotAvailable
def get_payback(self): if not self.doc.xpath(u'//td[contains(text(), "Transaction associée")]'): return None, None, None, None tr = self.doc.xpath(u'//tr[td[contains(text(),"Approvisionnement à")]]') td_id = self.doc.xpath(u'//td[span[contains(text(),"Approvisionnement à")]]') if len(tr) > 0 and len(td_id)>0: tr = tr[0] m = re.search(u'Nº de transaction unique ([a-zA-Z0-9_]*)', CleanText().filter(td_id[0])) if m: id = m.group(1) raw = CleanText().filter(tr.xpath('./td[2]')[0]) amount = Decimal(FrenchTransaction.clean_amount(CleanText().filter(tr.xpath('./td[5]')[0]))) currency = Currency.get_currency(CleanText().filter(tr.xpath('./td[5]')[0])) return id, raw, amount, currency return None, None, None, None
def get_shop_data(self, backend): lastReport = time() sorders = [] currency = Currency.get_currency(backend.get_currency()) for o in backend.iter_orders(): sopayments = [[ u'ShopPayment', u'{ sptime = %i' % totime(p.date), u', spamount = %i' % tocent(p.amount), u', spmethod = %s' % tostr(p.method), u'}' ] for p in backend.iter_payments(o)] soitems = [[ u'ShopItem', u'{ silabel = %s' % tostr(i.label), u', siprice = %i' % tocent(i.price), u', siurl = %s' % tostr(i.url), u'}' ] for i in backend.iter_items(o)] sorders.append([ u'ShopOrder', u'{ soid = %s' % tostr(o.id), u', sotime = %i' % totime(o.date), u', sodiscount = %i' % tocent(o.discount), u', soshipping = %i' % tocent(o.shipping), u', sotax = %i' % tocent(o.tax), u', sopayments =' ] + [u' %s' % s for s in r_list(sopayments)] + [u', soitems ='] + [u' %s' % s for s in r_list(soitems)] + [u'}']) if time() - lastReport > self.REPORT_TIME: print u'Scraped %i orders' % len(sorders) stdout.flush() lastReport = time() payments_total = sum(p.amount for p in backend.iter_payments(o)) items_total = sum(i.price for i in backend.iter_items(o)) order_total = o.shipping + o.discount + o.tax assert payments_total == items_total + order_total, \ u'%s != %s + %s' % (payments_total, items_total, order_total) assert list(backend.iter_items(o)) assert list(backend.iter_payments(o)) print u'Scraped %i orders' % len(sorders) stdout.flush() return [ u'Shop', u'{ sid = %s' % tostr(backend.name), u', scurrency = %s' % currency, u', sorders =' ] + [u' %s' % s for s in r_list(sorders)] + [u'}']
def get_title(self, obj): s = obj.type if hasattr(obj, 'price') and not empty(obj.price): s += u' %s %s' % ( self.colored(u'—', 'cyan'), self.colored( '%6.2f %s' % (obj.price, Currency.currency2txt(obj.currency)), 'green')) if hasattr( obj, 'late') and not empty(obj.late) and obj.late > datetime.time(): s += u' %s %s' % (self.colored( u'—', 'cyan'), self.colored('Late: %s' % obj.late, 'red', 'bold')) if hasattr(obj, 'information') and not empty( obj.information) and obj.information.strip() != '': s += u' %s %s' % (self.colored( u'—', 'cyan'), self.colored(obj.information, 'red')) return s
def get_accounts(self): accounts = {} content = self.doc.xpath('//div[@id="moneyPage" or @id="MoneyPage"]')[0] # Multiple accounts lines = content.xpath('(//div[@class="col-md-8 multi-currency"])[1]/ul/li') for li in lines: account = Account() account.iban = NotAvailable account.type = Account.TYPE_CHECKING currency_code = CleanText().filter((li.xpath('./span[@class="currencyUnit"]/span') or li.xpath('./span[1]'))[0]) currency = Currency.get_currency(currency_code) if not currency: self.logger.warning('Unable to find currency %r', currency_code) continue account.id = currency account.currency = currency account.balance = CleanDecimal(replace_dots=True).filter(li.xpath('./span[@class="amount"]/text()')) account.label = u'%s %s*' % (self.browser.username, account.currency) accounts[account.id] = account self.browser.account_currencies.append(account.currency) if not accounts: # Primary currency account primary_account = Account() primary_account.iban = NotAvailable primary_account.type = Account.TYPE_CHECKING try: balance = CleanText('.')(content.xpath('//div[contains(@class, "col-md-6")][contains(@class, "available")]')[0]) except IndexError: primary_account.id = 'EUR' primary_account.currency = u'EUR' primary_account.balance = NotAvailable primary_account.label = u'%s' % (self.browser.username) else: primary_account.currency = Account.get_currency(balance) primary_account.id = unicode(primary_account.currency) primary_account.balance = Decimal(FrenchTransaction.clean_amount(balance)) primary_account.label = u'%s %s*' % (self.browser.username, primary_account.currency) accounts[primary_account.id] = primary_account return accounts
def iter_results(self): for div in self.document.getroot().cssselect('div.train_info'): info = None price = None currency = None for td in div.cssselect('td.price'): txt = self.parser.tocleanstring(td) p = Decimal(re.sub('([^\d\.]+)', '', txt)) if price is None or p < price: info = list(div.cssselect('strong.price_label')[0].itertext())[-1].strip().strip(':') price = p currency = Currency.get_currency(txt) yield {'type': self.get_value(div, 'div.transporteur-txt'), 'time': self.parse_hour(div, 'div.departure div.hour'), 'departure': self.get_value(div, 'div.departure div.station'), 'arrival': self.get_value(div, 'div.arrival div.station', last=True), 'arrival_time': self.parse_hour(div, 'div.arrival div.hour', last=True), 'price': price, 'currency': currency, 'price_info': info, }
class get_housing(ItemElement): klass = Housing obj_id = Env('_id') obj_type = NotAvailable # TODO obj_advert_type = ADVERT_TYPES.PERSONAL obj_house_type = NotAvailable # TODO obj_title = CleanText('h1') obj_rooms = CleanDecimal( '//div[@class="stats"]/section/div[@id="divpieces"]/span[@class="stat"]', default=0) obj_cost = CleanDecimal('(//div[@class="stats"]/div/h2)[2]/em') obj_currency = Currency.get_currency(u'€') obj_utilities = UTILITIES.UNKNOWN obj_text = CleanHTML('//div[@class="textes"]') obj_location = CleanText('//input[@id="adressegeo"]/@value') obj_url = CleanText('//input[@id="hfurldetail"]/@value') obj_area = CleanDecimal(Regexp(CleanText( '//div[@class="stats"]/section/div[@id="divsurface"]/span[@class="stat"]' ), u'\s?(\d+)\sm\s2', default=NotAvailable), default=NotAvailable) obj_price_per_meter = PricePerMeterFilter() obj_phone = CleanText('//input[@id="hftelA"]/@value') obj_date = datetime.now def obj_photos(self): photos = [] for photo in self.xpath('//div[@id="plistimage"]/a/@urlbig'): photos.append( HousingPhoto(u"http://www.entreparticuliers.com/%s" % photo)) return photos
class item(ItemElement): klass = Housing obj_id = Format("%s#%s", Dict('rubrique'), Dict('idannonce')) obj_type = EPAdvertType(Dict('rubrique')) obj_advert_type = ADVERT_TYPES.PERSONAL obj_house_type = EPHouseType(Dict('tbien')) obj_title = Dict('titre') obj_cost = CleanDecimal(Dict('prix')) obj_currency = Currency.get_currency(u'€') obj_text = Dict('titre') obj_location = Dict('ville') obj_area = CleanDecimal(Dict('surface')) obj_rooms = CleanDecimal(Dict('pieces')) obj_date = DateTime(Dict('creationdate')) obj_utilities = UTILITIES.UNKNOWN obj_price_per_meter = PricePerMeterFilter() def obj_photos(self): photos = [] photo = Dict('UrlImage', default=NotAvailable)(self) if not empty(photo): photos.append(HousingPhoto(photo)) return photos
def populate_balances(self, accounts): for account in accounts: acc_dict = self.doc['donnees']['compteSoldesMap'][account._id] account.balance = CleanDecimal(replace_dots=True).filter(acc_dict['soldeComptable']) account.currency = Currency.get_currency(acc_dict['deviseSoldeComptable']) yield account
def obj_currency(self): txt = CleanText('./article/div/div[@itemprop="location"]')( self) return Currency.get_currency(txt)
def get_currency(self): # Amazon uses only U.S. dollars. return Currency.get_currency(u'$')
def obj_currency(self): txt = CleanText('./article/div/div[@itemprop="location"]')(self) return Currency.get_currency(txt)
def obj_currency(self): return Currency.get_currency(CleanText('./td[5]')(self))
def get_currency(self): # MyHabit uses only U.S. dollars. return Currency.get_currency(u'$')
def obj_currency(self): return Currency.get_currency( CleanText(TableCell('titres'))(self))
class get_housing(ItemElement): klass = Housing def is_agency(self): return Dict('agency/isParticulier')(self) == 'false' obj_id = Env('_id') def obj_type(self): transaction = Dict('characteristics/transaction')(self) if transaction == 'location': if Dict('characteristics/isFurnished')(self): return POSTS_TYPES.FURNISHED_RENT else: return POSTS_TYPES.RENT elif transaction == 'vente': type = Dict('characteristics/estateType')(self).lower() if 'viager' in type: return POSTS_TYPES.VIAGER else: return POSTS_TYPES.SALE else: return NotAvailable def obj_advert_type(self): if self.is_agency: return ADVERT_TYPES.PROFESSIONAL else: return ADVERT_TYPES.PERSONAL def obj_house_type(self): type = Dict('characteristics/estateType')(self).lower() if 'appartement' in type: return HOUSE_TYPES.APART elif 'maison' in type: return HOUSE_TYPES.HOUSE elif 'parking' in type: return HOUSE_TYPES.PARKING elif 'terrain' in type: return HOUSE_TYPES.LAND else: return HOUSE_TYPES.OTHER obj_title = Dict('characteristics/titleWithTransaction') obj_location = Format('%s %s %s', Dict('location/address'), Dict('location/cityLabel'), Dict('location/postalCode')) def obj_cost(self): cost = TypeDecimal(Dict('characteristics/price'))(self) if cost == 0: cost = TypeDecimal(Dict('characteristics/priceMin'))(self) return cost obj_currency = BaseCurrency.get_currency('€') def obj_utilities(self): are_fees_included = Dict('characteristics/areFeesIncluded', default=None)(self) if are_fees_included: return UTILITIES.INCLUDED else: return UTILITIES.EXCLUDED obj_text = CleanHTML(Dict('characteristics/description')) obj_url = BrowserURL('housing_html', _id=Env('_id')) def obj_area(self): area = TypeDecimal(Dict('characteristics/area'))(self) if area == 0: area = TypeDecimal(Dict('characteristics/areaMin'))(self) return area obj_date = FromTimestamp(Dict('characteristics/date')) obj_bedrooms = TypeDecimal(Dict('characteristics/bedroomCount')) def obj_rooms(self): # TODO: Why is roomCount a list? rooms = Dict('characteristics/roomCount', default=[])(self) if rooms: return TypeDecimal(rooms[0])(self) return NotAvailable obj_price_per_meter = PricePerMeterFilter() def obj_photos(self): photos = [] for img in Dict('characteristics/images')(self): m = re.search('http://thbr\.figarocms\.net.*(http://.*)', img.get('xl')) if m: photos.append(HousingPhoto(m.group(1))) else: photos.append(HousingPhoto(img.get('xl'))) return photos def obj_DPE(self): DPE = Dict('characteristics/energyConsumptionCategory', default="")(self) return getattr(ENERGY_CLASS, DPE, NotAvailable) def obj_GES(self): GES = Dict('characteristics/greenhouseGasEmissionCategory', default="")(self) return getattr(ENERGY_CLASS, GES, NotAvailable) def obj_details(self): details = {} details['fees'] = Dict('characteristics/fees', default=NotAvailable)(self) details['agencyFees'] = Dict('characteristics/agencyFees', default=NotAvailable)(self) details['guarantee'] = Dict('characteristics/guarantee', default=NotAvailable)(self) details['bathrooms'] = Dict('characteristics/bathroomCount', default=NotAvailable)(self) details['creationDate'] = FromTimestamp(Dict( 'characteristics/creationDate', default=NotAvailable), default=NotAvailable)(self) details['availabilityDate'] = Dict( 'characteristics/estateAvailabilityDate', default=NotAvailable)(self) details['exposure'] = Dict('characteristics/exposure', default=NotAvailable)(self) details['heatingType'] = Dict('characteristics/heatingType', default=NotAvailable)(self) details['floor'] = Dict('characteristics/floor', default=NotAvailable)(self) details['bedrooms'] = Dict('characteristics/bedroomCount', default=NotAvailable)(self) details['isFurnished'] = Dict('characteristics/isFurnished', default=NotAvailable)(self) rooms = Dict('characteristics/roomCount', default=[])(self) if len(rooms): details['rooms'] = rooms[0] details['available'] = Dict('characteristics/isAvailable', default=NotAvailable)(self) agency = Dict('agency', default=NotAvailable)(self) details['agency'] = ', '.join([ x for x in [ agency.get('corporateName', ''), agency.get('corporateAddress', ''), agency.get('corporatePostalCode', ''), agency.get('corporateCity', '') ] if x ]) return details
def filter(self, txt): txt = super(Currency, self).filter(txt) return BaseCurrency.get_currency(txt)
def get_currency(self): # Victoria's Secret uses only U.S. dollars. return Currency.get_currency(u'$')
def obj_currency(self): return Currency.get_currency(CleanText(TableCell('titres'))(self))
def get_currency(self): # Ideel uses only U.S. dollars. return Currency.get_currency(u'$')
class get_housing(ItemElement): klass = Housing def parse(self, el): self.env['details'] = { obj['key']: obj['value_label'] for obj in self.el['adview']['attributes'] } obj_id = Env('_id') obj_area = CleanDecimal(PopDetail('square', default=0), default=NotAvailable) obj_rooms = CleanDecimal(PopDetail('rooms', default=0), default=NotAvailable) def obj_GES(self): ges = CleanText(PopDetail('ges', default='|'))(self) return getattr(ENERGY_CLASS, ges[0], NotAvailable) def obj_DPE(self): dpe = CleanText(PopDetail('energy_rate', default='|'))(self) return getattr(ENERGY_CLASS, dpe[0], NotAvailable) def obj_house_type(self): value = CleanText(PopDetail('real_estate_type'), default=' ')(self).lower() if value == 'parking': return HOUSE_TYPES.PARKING elif value == 'appartement': return HOUSE_TYPES.APART elif value == 'maison': return HOUSE_TYPES.HOUSE elif value == 'terrain': return HOUSE_TYPES.LAND else: return HOUSE_TYPES.OTHER def obj_utilities(self): value = CleanText(PopDetail('charges_included', default='Non'), default=NotAvailable)(self) if value == "Oui": return UTILITIES.INCLUDED else: return UTILITIES.EXCLUDED obj_title = Dict('adview/subject') obj_cost = CleanDecimal(Dict('adview/price/0', default=NotAvailable), default=Decimal(0)) obj_currency = BaseCurrency.get_currency(u'€') obj_text = Dict('adview/body') obj_location = Dict('adview/location/city_label') def obj_advert_type(self): line_pro = Dict('adview/owner/type')(self) if line_pro == u'pro': return ADVERT_TYPES.PROFESSIONAL else: return ADVERT_TYPES.PERSONAL obj_date = DateTime(Dict('adview/first_publication_date')) def obj_photos(self): photos = [] for img in Dict('adview/images/urls_large', default=[])(self): photos.append(HousingPhoto(img)) return photos def obj_type(self): try: breadcrumb = int(Dict('adview/category_id')(self)) except ValueError: breadcrumb = None if breadcrumb == 11: return POSTS_TYPES.SHARING elif breadcrumb == 10: isFurnished = CleanText(PopDetail('furnished', default=' '))(self) if isFurnished.lower() == u'meublé': return POSTS_TYPES.FURNISHED_RENT else: return POSTS_TYPES.RENT else: return POSTS_TYPES.SALE obj_price_per_meter = PricePerMeterFilter() obj_url = Dict('adview/url') obj_details = Env('details')
def get_title(self, obj): s = obj.type if hasattr(obj, 'price') and not empty(obj.price): s += u' %s %s' % (self.colored(u'—', 'cyan'), self.colored('%6.2f %s' % (obj.price, Currency.currency2txt(obj.currency)), 'green')) if hasattr(obj, 'late') and not empty(obj.late) and obj.late > datetime.time(): s += u' %s %s' % (self.colored(u'—', 'cyan'), self.colored('Late: %s' % obj.late, 'red', 'bold')) if hasattr(obj, 'information') and not empty(obj.information) and obj.information.strip() != '': s += u' %s %s' % (self.colored(u'—', 'cyan'), self.colored(obj.information, 'red')) return s