示例#1
0
    def _parse_row(self, row, tags=None):
        movie_el = row.cssselect_first('.movie a:not(.tag)')
        url = movie_el.link()
        title = movie_el.text_content()

        date_el = row.cssselect_first('.date').text_content(whitespace=True)
        date, time = re.split(r'[\r\n]+', date_el)

        starts_at = times.to_universal(datetime.datetime.combine(
            parsers.date_cs(date),
            datetime.time(*[int(n) for n in time.split(':')])
        ), 'Europe/Prague')

        tags = self._parse_tags(row, tags)
        details = self._parse_details(url)

        return Showtime(
            cinema=cinema,
            film_scraped=ScrapedFilm(
                title_main_scraped=title,
                url=url,
                **details
            ),
            starts_at=starts_at,
            tags=tags,
            url=self.url,
        )
示例#2
0
    def _parse_row(self, row, tags=None):
        movie_el = row.cssselect_first('.movie a:not(.tag)')
        url = movie_el.link()
        title = movie_el.text_content()

        date_el = row.cssselect_first('.date').text_content(whitespace=True)
        date, time = re.split(r'[\r\n]+', date_el)

        starts_at = times.to_universal(
            datetime.datetime.combine(
                parsers.date_cs(date),
                datetime.time(*[int(n) for n in time.split(':')])),
            'Europe/Prague')

        tags = self._parse_tags(row, tags)
        details = self._parse_details(url)

        return Showtime(
            cinema=cinema,
            film_scraped=ScrapedFilm(title_main_scraped=title,
                                     url=url,
                                     **details),
            starts_at=starts_at,
            tags=tags,
            url=self.url,
        )
示例#3
0
 def __call__(self):
     date = None
     for row in self._scrape_rows():
         if row.element.has_class('day'):
             date = parsers.date_cs(row.element.text_content())
         else:
             yield self._parse_row(row.element, date, row.url)
示例#4
0
 def __call__(self):
     date = None
     for row in self._scrape_rows():
         if row.element.has_class('day'):
             date = parsers.date_cs(row.element.text_content())
         else:
             yield self._parse_row(row.element, date, row.url)
示例#5
0
    def __call__(self):
        resp = self.session.get(self.url)
        html = parsers.html(resp.content, base_url=resp.url)

        for event in html.cssselect('.event'):
            header = event.cssselect_first('h2')

            url = header.link()
            title = header.text_content()

            title_parts = title.split('/')
            if len(title_parts) == 2:
                # naive, but for now good enough
                title_main, title_orig = title_parts
            else:
                title_main = title
                title_orig = None

            details = event.cssselect_first('.descshort').text_content()
            cat = event.cssselect_first('.title-cat').text_content().lower()

            tags = []
            for regexp, tag in self.tag_re:
                if regexp.search(title_main):
                    tags.append(tag)
                    title_main = regexp.sub('', title_main).strip()
                if title_orig and regexp.search(title_orig):
                    tags.append(tag)
                    title_orig = regexp.sub('', title_orig).strip()
                if regexp.search(details):
                    tags.append(tag)
            if cat != 'filmy':
                tags.append(cat)

            d = parsers.date_cs(
                event.cssselect_first('.nextdate strong').text
            )

            t = event.cssselect_first('.nextdate .evttime').text_content()
            t = time(*map(int, t.split(':')))

            starts_at = times.to_universal(datetime.combine(d, t), self.tz)

            yield Showtime(
                cinema=cinema,
                film_scraped=ScrapedFilm(
                    title_main_scraped=title_main,
                    title_orig=title_orig or None,
                ),
                starts_at=starts_at,
                url=url,
                url_booking=self.url_booking,
                tags={tag: None for tag in tags},
            )
示例#6
0
    def __call__(self):
        resp = self.session.get(self.url)
        html = parsers.html(resp.content, base_url=resp.url)

        for event in html.cssselect('.event'):
            header = event.cssselect_first('h2')

            url = header.link()
            title = header.text_content()

            title_parts = title.split('/')
            if len(title_parts) == 2:
                # naive, but for now good enough
                title_main, title_orig = title_parts
            else:
                title_main = title
                title_orig = None

            details = event.cssselect_first('.descshort').text_content()
            cat = event.cssselect_first('.title-cat').text_content().lower()

            tags = []
            for regexp, tag in self.tag_re:
                if regexp.search(title_main):
                    tags.append(tag)
                    title_main = regexp.sub('', title_main).strip()
                if title_orig and regexp.search(title_orig):
                    tags.append(tag)
                    title_orig = regexp.sub('', title_orig).strip()
                if regexp.search(details):
                    tags.append(tag)
            if cat != 'filmy':
                tags.append(cat)

            d = parsers.date_cs(event.cssselect_first('.nextdate strong').text)

            t = event.cssselect_first('.nextdate .evttime').text_content()
            t = time(*map(int, t.split(':')))

            starts_at = times.to_universal(datetime.combine(d, t), self.tz)

            yield Showtime(
                cinema=cinema,
                film_scraped=ScrapedFilm(
                    title_main_scraped=title_main,
                    title_orig=title_orig or None,
                ),
                starts_at=starts_at,
                url=url,
                url_booking=self.url_booking,
                tags={tag: None
                      for tag in tags},
            )
示例#7
0
    def _parse_item(self, item):
        title_main = item.cssselect_first('.program-title').text_content()
        url = item.cssselect_first('.program-title').link()

        date_el = item.cssselect_first('.program-date').text_content()
        date, time = re.split(r'\s+ve?\s+', date_el)

        starts_at = times.to_universal(
            datetime.datetime.combine(
                parsers.date_cs(date),
                datetime.time(*[int(n) for n in time.split(':')])),
            'Europe/Prague')

        details = self._parse_details(url)

        return Showtime(
            cinema=cinema,
            film_scraped=ScrapedFilm(title_main_scraped=title_main,
                                     url=url,
                                     **details),
            starts_at=starts_at,
            url=self.url,
        )
示例#8
0
    def _parse_item(self, item):
        title_main = item.cssselect_first('.program-title').text_content()
        url = item.cssselect_first('.program-title').link()

        date_el = item.cssselect_first('.program-date').text_content()
        date, time = re.split(r'\s+ve?\s+', date_el)

        starts_at = times.to_universal(datetime.datetime.combine(
            parsers.date_cs(date),
            datetime.time(*[int(n) for n in time.split(':')])
        ), 'Europe/Prague')

        details = self._parse_details(url)

        return Showtime(
            cinema=cinema,
            film_scraped=ScrapedFilm(
                title_main_scraped=title_main,
                url=url,
                **details
            ),
            starts_at=starts_at,
            url=self.url,
        )