Пример #1
0
 def filter(self, el):
     index = 1 if len(el) > 1 else 0
     content = CleanText.clean(CleanText('.', ['HORAIRES'])(el[index]))
     a_time = content.split(' - ')[-2]
     regexp = re.compile(ur'(?P<hh>\d+)h?(?P<mm>\d+)')
     m = regexp.search(a_time)
     return time(int(m.groupdict()['hh'] or 0), int(m.groupdict()['mm'] or 0))
Пример #2
0
    def filter(self, el):
        index = 1 if len(el) > 1 else 0
        content = CleanText.clean(CleanText('.', ['HORAIRES'])(el[index]))
        a_price = content.split(' - ')[-1]
        parsed_price = re.findall(r"\d*\,\d+|\d+", " ".join(a_price))

        if parsed_price and len(parsed_price) > 0:
            return float(parsed_price[0].replace(',', '.'))

        return float(0)
Пример #3
0
    def filter(self, el):
        content = CleanText.clean(CleanText(CleanHTML('.'), ['*'])(el[0]))
        a_date = content[0:content.index(' - ')]

        for fr, en in date_util.DATE_TRANSLATE_FR:
            a_date = fr.sub(en, a_date)

        try:
            _month = datetime.strptime(a_date, "%A %d %B").month
            if (datetime.now().month > _month):
                a_date += u' %i' % (datetime.now().year + 1)
            else:
                a_date += u' %i' % (datetime.now().year)
        except ValueError:
            pass

        return datetime.strptime(a_date, "%A %d %B %Y")
Пример #4
0
    def next_page(self):
        try:
            form = self.page.get_form('//form[@id="paginationForm"]')
        except FormNotFound:
            return

        text = CleanText.clean(form.el)
        m = re.search(u"(\d+) / (\d+)", text or "", flags=re.MULTILINE)
        if not m:
            return

        cur = int(m.group(1))
        last = int(m.group(2))

        if cur == last:
            return

        form["page"] = str(cur + 1)
        return form.request