def parse(self, el): json_content = Regexp(CleanText('//script'), "var ava_data = ({.+?});")(self) json_content = json_content.replace("logged", "\"logged\"") json_content = json_content.replace("lengthcarrousel", "\"lengthcarrousel\"") json_content = json_content.replace("products", "\"products\"") json_content = json_content.replace( "// // ANNONCES_SIMILAIRE / RECO", "") self.house_json_datas = json.loads(json_content)['products'][0]
def obj_rdate(self): s = Regexp(Field('raw'), ' (\d{2}/\d{2}/\d{2}) | (?!NUM) (\d{6}) ', default=NotAvailable)(self) if not s: return Field('date')(self) s = s.replace('/', '') return Date(dayfirst=True).filter( '%s%s%s%s%s' % (s[:2], '-', s[2:4], '-', s[4:]))
def obj_rdate(self): if self.obj.rdate: # Transaction.Raw may have already set it return self.obj.rdate s = Regexp(Field('raw'), ' (\d{2}/\d{2}/\d{2}) | (?!NUM) (\d{6}) ', default=NotAvailable)(self) if not s: return Field('date')(self) s = s.replace('/', '') return Date(dayfirst=True).filter('%s%s%s%s%s' % (s[:2], '-', s[2:4], '-', s[4:]))
def obj_rdate(self): if self.obj.rdate: # Transaction.Raw may have already set it return self.obj.rdate s = Regexp(Field('raw'), ' (\d{2}/\d{2}/\d{2}) | (?!NUM) (\d{6}) ', default=NotAvailable)(self) if not s: return Field('date')(self) s = s.replace('/', '') # Sometimes the user enters an invalid date 16/17/19 for example return Date(dayfirst=True, default=NotAvailable).filter('%s%s%s%s%s' % (s[:2], '-', s[2:4], '-', s[4:]))
def obj_rdate(self): if self.obj.rdate: # Transaction.Raw may have already set it return self.obj.rdate s = Regexp(Field('raw'), ' (\d{2}/\d{2}/\d{2}) | (?!NUM) (\d{6}) ', default=NotAvailable)(self) if not s: return Field('date')(self) s = s.replace('/', '') # Sometimes the user enters an invalid date 16/17/19 for example return Date(dayfirst=True, default=NotAvailable).filter( '%s-%s-%s' % (s[:2], s[2:4], s[4:]))
def obj_url(self): url = Regexp(AbsoluteLink('//div[has-class("torrentinfo")]//div[has-class("dltorrent")]//a[text()="Download torrent"]'), '(^.*)\?.*', '\\1')(self) return url.replace('http://', 'https://')
def obj_url(self): url = Regexp(AbsoluteLink('.//div[has-class("tt-name")]/a[1]'), '(^.*)\?.*', '\\1')(self) return url.replace('http://', 'https://')
def obj_rdate(self): s = Regexp(Field('raw'), ' (\d{2}/\d{2}/\d{2}) | (?!NUM) (\d{6}) ', default=NotAvailable)(self) if not s: return Field('date')(self) s = s.replace('/', '') return Date(dayfirst=True).filter('%s%s%s%s%s' % (s[:2], '-', s[2:4], '-', s[4:]))
def obj_split_path(self): _id = Regexp(CleanText('./@href'), '/\w{2}/(.*)', default=u'accueil')(self) return [SITE.CREATIVE.get('id')] + [_id.replace('/', '^')]
def obj_split_path(self): _id = Regexp(CleanText('./@href'), '/\w{2}/(.*)', default=u'accueil')(self) return [SITE.CREATIVE.get('id')] + [_id.replace('/', '^')]