Пример #1
0
    def parse(self, response):
        # get theater name
        if 'redirect_urls' in response.request.meta:
            request_url = response.request.meta['redirect_urls'][0]
        else:
            request_url = response.url
        theater = self.db.find_one({'link': request_url}).get('name')

        date = response.css('.schedule-body-day::text').extract_first()
        movies = response.css('.schedule-body-section-item')
        for movie in movies:
            title = movie.css('.schedule-body-title::text').extract_first()

            # skip the movie is not kinpri
            if not utils.is_title_kinpri(title):
                continue

            screens = movie.css('.schedule-screen')
            for s in screens:
                screen = s.css('.schedule-screen-title::text').extract_first()

                shows = s.css('.schedule-item')
                for s in shows:
                    show = Show()
                    show['updated'] = datetime.datetime.now()
                    show['theater'] = theater
                    show['schedule_url'] = response.url
                    show['date'] = date
                    show['title'] = title
                    show['movie_types'] = utils.get_kinpri_types(title)
                    show['screen'] = screen
                    show['start_time'] = s.css(
                        '.time .start::text').extract_first()
                    show['end_time'] = s.css(
                        '.time .end::text').extract_first()
                    show['ticket_state'] = s.css(
                        '.status::attr(class)').extract_first()
                    reservation_url = s.css('a')
                    # if reservation_url:
                    #     reservation_url =
                    #     yield scrapy.Request(url=reservation_url,
                    #                          callback=self.parse_reservation,
                    #                          meta={'show': show},
                    #     )
                    # else:
                    show['remaining_seats_num'] = 0
                    show['total_seats_num'] = None
                    show['reserved_seats'] = None
                    show['remaining_seats'] = []
                    show['reservation_url'] = None
                    yield show
    def parse_schedule(self, response):
        date = response.url
        movies = response.css('#dailyList>li')
        for movie in movies:
            title = movie.css('.movieTitle a::text').extract_first()

            # skip the movie is not kinpri
            if not utils.is_title_kinpri(title):
                continue

            shows_rows = movie.css('.tl>li')
            for shows_row in shows_rows:
                screen = shows_row.css('.screenNumber img::attr(alt)').re(
                    r'(\d+)')[0]
                shows = shows_row.css('div')
                for s in shows:
                    show = Show()
                    show['updated'] = datetime.datetime.now()
                    show['theater'] = response.meta['theater']
                    show['schedule_url'] = response.url
                    show['date'] = date
                    show['title'] = title
                    show['screen'] = screen
                    show['movie_types'] = utils.get_kinpri_types(title)
                    show['start_time'] = s.css(
                        '.startTime::text').extract_first()
                    show['end_time'] = s.css('.endTime::text').extract_first()
                    state = s.css('.uolIcon .scheduleIcon::attr(alt)').re(
                        r'\[(.)\]')
                    if state:
                        show['ticket_state'] = state[0]
                    else:
                        show['ticket_state'] = None
                    reservation_url = movie.css(
                        '.uolIcon a::attr(href)').extract_first()
                    if reservation_url:
                        show['reservation_url'] = reservation_url
                        yield scrapy.Request(
                            url=reservation_url,
                            callback=self.parse_check_continue,
                            meta={'show': show},
                            dont_filter=True,
                        )
                    else:
                        show['remaining_seats_num'] = 0
                        show['total_seats_num'] = None
                        show['reserved_seats'] = None
                        show['remaining_seats'] = []
                        show['reservation_url'] = None
                        yield show
Пример #3
0
    def parse(self, response):
        # get theater name
        if 'redirect_urls' in response.request.meta:
            request_url = response.request.meta['redirect_urls'][0]
        else:
            request_url = response.url
        theater = self.db.find_one({'link': request_url}).get('name')

        date = response.css('#Day_schedule h1::text').extract_first()
        movies = response.css('.scheduleBox')
        for movie in movies:
            title = movie.css('h2 ::text').extract_first()

            # skip the movie is not kinpri
            if not utils.is_title_kinpri(title):
                continue

            shows = movie.css('.scheduleBox>table>tbody>tr>td')
            for s in shows:
                show = Show()
                show['updated'] = datetime.datetime.now()
                show['theater'] = theater
                show['schedule_url'] = response.url
                show['date'] = date
                show['title'] = title
                show['movie_types'] = utils.get_kinpri_types(title)
                screen = s.css('p::text').re(r'\d+')
                if not screen:
                    break
                show['screen'] = screen[0]
                show['start_time'] = s.css('span::text').extract_first()
                show['end_time'] = s.css('tr>td::text').re(r'\d+:\d+')[0]
                show['ticket_state'] = s.css('img::attr(alt)').extract_first()
                reservation_url = s.css('td[onclick]::attr(onclick)')
                if reservation_url:
                    reservation_url = reservation_url.re(r"'(https://.+?)'")[0]
                    yield scrapy.Request(
                        url=reservation_url,
                        callback=self.parse_reservation,
                        meta={'show': show},
                    )
                else:
                    show['remaining_seats_num'] = 0
                    show['total_seats_num'] = None
                    show['reserved_seats'] = None
                    show['remaining_seats'] = []
                    show['reservation_url'] = None
                    yield show
Пример #4
0
    def parse_schedule(self, response):
        date = response.css('.today::text').extract_first()
        movies = response.css('.timeschedule')
        for movie in movies:
            title = movie.css(
                '.mtitle span.fontm::text').extract_first().strip()

            # skip not kinpri
            if not utils.is_title_kinpri(title):
                continue

            shows = movie.css('td')
            for s in shows:
                show = Show()

                show['updated'] = datetime.datetime.now()
                show['title'] = title
                show['movie_types'] = utils.get_kinpri_types(title)
                show['date'] = date
                show['theater'] = response.meta['theater']
                show['schedule_url'] = response.url
                show['start_time'] = s.css('.start ::text').extract_first()
                if not show['start_time']:
                    break
                show['end_time'] = s.css('.end ::text').extract_first()
                show['screen'] = None
                state = s.css('.icon_kuuseki ::text').extract_first()
                show['ticket_state'] = state

                reservation_url = None
                if reservation_url:
                    show['reservation_url'] = reservation_url
                    yield scrapy.Request(url=reservation_url,
                                         callback=self.parse_reservation,
                                         meta={'show': show})
                else:
                    yield show

        next_day_url = response.css('.schehead .b-next a::attr(href)') \
                               .extract_first()
        if not next_day_url == response.url:
            print(next_day_url)
            yield response.request.replace(url=next_day_url)
Пример #5
0
    def parse_schedule(self, response):
        movies = response.css('table.movietitle')
        for movie in movies:
            title = movie.css('.item1 ::text').extract_first()
            
            # skip the movie is not kinpri
            if not utils.is_title_kinpri(title):
                continue

            screen = movie.css('a.theaterlink::text').re(r'\d+')[0]
            shows = movie.css('.time1, .time2')
            for s in shows:
                show = Show()
                show['updated'] = datetime.datetime.now()
                show['theater'] = response.meta['theater']
                show['schedule_url'] = response.url
                show['date'] = response.meta['date']
                show['title'] = title
                show['screen'] = screen
                show['movie_types'] = utils.get_kinpri_types(title)
                times = s.css('span::text').re(r'\d+:\d+')
                if not times:
                    break
                show['start_time'] = times[0]
                show['end_time'] = times[1]
                show['ticket_state'] = s.css('img::attr(src)').extract()[-1]
                show['reservation_url'] = s.css('a::attr(href)').extract_first()
                # TODO: check seats
                # if reservation_url:
                #     
                #     yield scrapy.Request(url=reservation_url,
                #                          callback=self.parse_reservation,
                #                          meta={'show': show},
                #     )
                # else: 
                show['remaining_seats_num'] = 0
                show['total_seats_num'] = None
                show['reserved_seats'] = None
                show['remaining_seats'] = []
                show['reservation_url'] = None
                yield show
Пример #6
0
    def parse_schedule(self, response):

        schedule_days = response.css('#schedule p[id^="day_"]')
        schedule_list = response.css('.schedule_list ul')
        # self.logger.info('schedule_days: ' + schedule_days.extract_first())
        for day, ul in zip(schedule_days, schedule_list):
            date = day.css('span::text').extract_first()
            for li in ul.css('li'):
                show = Show()

                # skip no schedule day
                state = li.css('::attr(class)').extract_first()
                if not state or state == 'noSchedule':
                    continue

                show['updated'] = datetime.datetime.now()
                title = ' '.join(response.css('.text span::text').extract())
                show['title'] = title
                show['movie_types'] = utils.get_kinpri_types(title)
                show['date'] = date
                show['ticket_state'] = state
                show['theater'] = response.meta['theater']
                show['screen'], time = li.css('::text').extract()
                show['start_time'], show['end_time'] = time.split(' - ')
                show['schedule_url'] = response.url

                reservation_url = li.css('a::attr(href)').extract_first()
                if state == 'sec05':  # soldout
                    show['remaining_seats_num'] = 0
                    show['total_seats_num'] = None
                    show['reserved_seats'] = None
                    show['remaining_seats'] = []
                    show['reservation_url'] = None
                    yield show
                else:
                    show['reservation_url'] = reservation_url
                    yield scrapy.Request(url=reservation_url,
                                         callback=self.parse_reservation,
                                         meta={'show': show})
    def parse_schedule(self, response):

        date = response.css('.today::text').extract_first()
        movies = response.css('.movielist')
        for movie in movies:
            title = movie.css('.main a::text').extract_first().strip()
            
            # skip not kinpri
            if not utils.is_title_kinpri(title):
                continue

            shows = movie.css('.timetbl [class^="tbl"]')[1:]
            for s in shows:
                show = Show()

                show['updated'] = datetime.datetime.now()
                show['title'] = title
                show['movie_types'] = utils.get_kinpri_types(title)
                show['date'] = date
                show['theater'] = response.meta['theater']
                show['schedule_url'] = response.url

                start_time, end_time = s.css('.time ::text').extract()
                show['start_time'] = ':'.join(re.findall(r'(\d{1,2})', start_time))
                show['end_time'] = ':'.join(re.findall(r'(\d{1,2})', end_time))
                show['screen'] = s.css('.screen ::text').extract_first()
                state = s.css('.icon_kuuseki ::text').extract_first()
                show['ticket_state'] = state

                # TODO: run javascript via splash
                reservation_url = None
                if reservation_url:
                    show['reservation_url'] = reservation_url
                    yield scrapy.Request(url=reservation_url,
                                         callback=self.parse_reservation,
                                         meta={'show': show})
                else:
                    yield show