Пример #1
0
 def parse(self, el):
     json_content = Regexp(CleanText('//script'),
                           "var ava_data = ({.+?});")(self)
     json_content = json_content.replace("logged", "\"logged\"")
     json_content = json_content.replace("lengthcarrousel",
                                         "\"lengthcarrousel\"")
     json_content = json_content.replace("products", "\"products\"")
     json_content = json_content.replace(
         "// // ANNONCES_SIMILAIRE / RECO", "")
     self.house_json_datas = json.loads(json_content)['products'][0]
Пример #2
0
 def obj_rdate(self):
     s = Regexp(Field('raw'),
                ' (\d{2}/\d{2}/\d{2}) | (?!NUM) (\d{6}) ',
                default=NotAvailable)(self)
     if not s:
         return Field('date')(self)
     s = s.replace('/', '')
     return Date(dayfirst=True).filter(
         '%s%s%s%s%s' % (s[:2], '-', s[2:4], '-', s[4:]))
Пример #3
0
            def obj_rdate(self):
                if self.obj.rdate:
                    # Transaction.Raw may have already set it
                    return self.obj.rdate

                s = Regexp(Field('raw'), ' (\d{2}/\d{2}/\d{2}) | (?!NUM) (\d{6}) ', default=NotAvailable)(self)
                if not s:
                    return Field('date')(self)
                s = s.replace('/', '')
                return Date(dayfirst=True).filter('%s%s%s%s%s' % (s[:2], '-', s[2:4], '-', s[4:]))
Пример #4
0
            def obj_rdate(self):
                if self.obj.rdate:
                    # Transaction.Raw may have already set it
                    return self.obj.rdate

                s = Regexp(Field('raw'), ' (\d{2}/\d{2}/\d{2}) | (?!NUM) (\d{6}) ', default=NotAvailable)(self)
                if not s:
                    return Field('date')(self)
                s = s.replace('/', '')
                # Sometimes the user enters an invalid date 16/17/19 for example
                return Date(dayfirst=True, default=NotAvailable).filter('%s%s%s%s%s' % (s[:2], '-', s[2:4], '-', s[4:]))
Пример #5
0
            def obj_rdate(self):
                if self.obj.rdate:
                    # Transaction.Raw may have already set it
                    return self.obj.rdate

                s = Regexp(Field('raw'),
                           ' (\d{2}/\d{2}/\d{2}) | (?!NUM) (\d{6}) ',
                           default=NotAvailable)(self)
                if not s:
                    return Field('date')(self)
                s = s.replace('/', '')
                # Sometimes the user enters an invalid date 16/17/19 for example
                return Date(dayfirst=True, default=NotAvailable).filter(
                    '%s-%s-%s' % (s[:2], s[2:4], s[4:]))
Пример #6
0
 def obj_url(self):
     url = Regexp(AbsoluteLink('//div[has-class("torrentinfo")]//div[has-class("dltorrent")]//a[text()="Download torrent"]'), '(^.*)\?.*', '\\1')(self)
     return url.replace('http://', 'https://')
Пример #7
0
 def obj_url(self):
     url = Regexp(AbsoluteLink('.//div[has-class("tt-name")]/a[1]'), '(^.*)\?.*', '\\1')(self)
     return url.replace('http://', 'https://')
Пример #8
0
 def obj_rdate(self):
     s = Regexp(Field('raw'), ' (\d{2}/\d{2}/\d{2}) | (?!NUM) (\d{6}) ', default=NotAvailable)(self)
     if not s:
         return Field('date')(self)
     s = s.replace('/', '')
     return Date(dayfirst=True).filter('%s%s%s%s%s' % (s[:2], '-', s[2:4], '-', s[4:]))
Пример #9
0
 def obj_split_path(self):
     _id = Regexp(CleanText('./@href'),
                  '/\w{2}/(.*)',
                  default=u'accueil')(self)
     return [SITE.CREATIVE.get('id')] + [_id.replace('/', '^')]
Пример #10
0
 def obj_split_path(self):
     _id = Regexp(CleanText('./@href'), '/\w{2}/(.*)', default=u'accueil')(self)
     return [SITE.CREATIVE.get('id')] + [_id.replace('/', '^')]