def parse(self, response): # get theater name if 'redirect_urls' in response.request.meta: request_url = response.request.meta['redirect_urls'][0] else: request_url = response.url theater = self.db.find_one({'link': request_url}).get('name') date = response.css('.schedule-body-day::text').extract_first() movies = response.css('.schedule-body-section-item') for movie in movies: title = movie.css('.schedule-body-title::text').extract_first() # skip the movie is not kinpri if not utils.is_title_kinpri(title): continue screens = movie.css('.schedule-screen') for s in screens: screen = s.css('.schedule-screen-title::text').extract_first() shows = s.css('.schedule-item') for s in shows: show = Show() show['updated'] = datetime.datetime.now() show['theater'] = theater show['schedule_url'] = response.url show['date'] = date show['title'] = title show['movie_types'] = utils.get_kinpri_types(title) show['screen'] = screen show['start_time'] = s.css( '.time .start::text').extract_first() show['end_time'] = s.css( '.time .end::text').extract_first() show['ticket_state'] = s.css( '.status::attr(class)').extract_first() reservation_url = s.css('a') # if reservation_url: # reservation_url = # yield scrapy.Request(url=reservation_url, # callback=self.parse_reservation, # meta={'show': show}, # ) # else: show['remaining_seats_num'] = 0 show['total_seats_num'] = None show['reserved_seats'] = None show['remaining_seats'] = [] show['reservation_url'] = None yield show
def parse_schedule(self, response): date = response.url movies = response.css('#dailyList>li') for movie in movies: title = movie.css('.movieTitle a::text').extract_first() # skip the movie is not kinpri if not utils.is_title_kinpri(title): continue shows_rows = movie.css('.tl>li') for shows_row in shows_rows: screen = shows_row.css('.screenNumber img::attr(alt)').re( r'(\d+)')[0] shows = shows_row.css('div') for s in shows: show = Show() show['updated'] = datetime.datetime.now() show['theater'] = response.meta['theater'] show['schedule_url'] = response.url show['date'] = date show['title'] = title show['screen'] = screen show['movie_types'] = utils.get_kinpri_types(title) show['start_time'] = s.css( '.startTime::text').extract_first() show['end_time'] = s.css('.endTime::text').extract_first() state = s.css('.uolIcon .scheduleIcon::attr(alt)').re( r'\[(.)\]') if state: show['ticket_state'] = state[0] else: show['ticket_state'] = None reservation_url = movie.css( '.uolIcon a::attr(href)').extract_first() if reservation_url: show['reservation_url'] = reservation_url yield scrapy.Request( url=reservation_url, callback=self.parse_check_continue, meta={'show': show}, dont_filter=True, ) else: show['remaining_seats_num'] = 0 show['total_seats_num'] = None show['reserved_seats'] = None show['remaining_seats'] = [] show['reservation_url'] = None yield show
def parse(self, response): # get theater name if 'redirect_urls' in response.request.meta: request_url = response.request.meta['redirect_urls'][0] else: request_url = response.url theater = self.db.find_one({'link': request_url}).get('name') date = response.css('#Day_schedule h1::text').extract_first() movies = response.css('.scheduleBox') for movie in movies: title = movie.css('h2 ::text').extract_first() # skip the movie is not kinpri if not utils.is_title_kinpri(title): continue shows = movie.css('.scheduleBox>table>tbody>tr>td') for s in shows: show = Show() show['updated'] = datetime.datetime.now() show['theater'] = theater show['schedule_url'] = response.url show['date'] = date show['title'] = title show['movie_types'] = utils.get_kinpri_types(title) screen = s.css('p::text').re(r'\d+') if not screen: break show['screen'] = screen[0] show['start_time'] = s.css('span::text').extract_first() show['end_time'] = s.css('tr>td::text').re(r'\d+:\d+')[0] show['ticket_state'] = s.css('img::attr(alt)').extract_first() reservation_url = s.css('td[onclick]::attr(onclick)') if reservation_url: reservation_url = reservation_url.re(r"'(https://.+?)'")[0] yield scrapy.Request( url=reservation_url, callback=self.parse_reservation, meta={'show': show}, ) else: show['remaining_seats_num'] = 0 show['total_seats_num'] = None show['reserved_seats'] = None show['remaining_seats'] = [] show['reservation_url'] = None yield show
def parse_schedule(self, response): date = response.css('.today::text').extract_first() movies = response.css('.timeschedule') for movie in movies: title = movie.css( '.mtitle span.fontm::text').extract_first().strip() # skip not kinpri if not utils.is_title_kinpri(title): continue shows = movie.css('td') for s in shows: show = Show() show['updated'] = datetime.datetime.now() show['title'] = title show['movie_types'] = utils.get_kinpri_types(title) show['date'] = date show['theater'] = response.meta['theater'] show['schedule_url'] = response.url show['start_time'] = s.css('.start ::text').extract_first() if not show['start_time']: break show['end_time'] = s.css('.end ::text').extract_first() show['screen'] = None state = s.css('.icon_kuuseki ::text').extract_first() show['ticket_state'] = state reservation_url = None if reservation_url: show['reservation_url'] = reservation_url yield scrapy.Request(url=reservation_url, callback=self.parse_reservation, meta={'show': show}) else: yield show next_day_url = response.css('.schehead .b-next a::attr(href)') \ .extract_first() if not next_day_url == response.url: print(next_day_url) yield response.request.replace(url=next_day_url)
def parse_schedule(self, response): movies = response.css('table.movietitle') for movie in movies: title = movie.css('.item1 ::text').extract_first() # skip the movie is not kinpri if not utils.is_title_kinpri(title): continue screen = movie.css('a.theaterlink::text').re(r'\d+')[0] shows = movie.css('.time1, .time2') for s in shows: show = Show() show['updated'] = datetime.datetime.now() show['theater'] = response.meta['theater'] show['schedule_url'] = response.url show['date'] = response.meta['date'] show['title'] = title show['screen'] = screen show['movie_types'] = utils.get_kinpri_types(title) times = s.css('span::text').re(r'\d+:\d+') if not times: break show['start_time'] = times[0] show['end_time'] = times[1] show['ticket_state'] = s.css('img::attr(src)').extract()[-1] show['reservation_url'] = s.css('a::attr(href)').extract_first() # TODO: check seats # if reservation_url: # # yield scrapy.Request(url=reservation_url, # callback=self.parse_reservation, # meta={'show': show}, # ) # else: show['remaining_seats_num'] = 0 show['total_seats_num'] = None show['reserved_seats'] = None show['remaining_seats'] = [] show['reservation_url'] = None yield show
def parse_schedule(self, response): schedule_days = response.css('#schedule p[id^="day_"]') schedule_list = response.css('.schedule_list ul') # self.logger.info('schedule_days: ' + schedule_days.extract_first()) for day, ul in zip(schedule_days, schedule_list): date = day.css('span::text').extract_first() for li in ul.css('li'): show = Show() # skip no schedule day state = li.css('::attr(class)').extract_first() if not state or state == 'noSchedule': continue show['updated'] = datetime.datetime.now() title = ' '.join(response.css('.text span::text').extract()) show['title'] = title show['movie_types'] = utils.get_kinpri_types(title) show['date'] = date show['ticket_state'] = state show['theater'] = response.meta['theater'] show['screen'], time = li.css('::text').extract() show['start_time'], show['end_time'] = time.split(' - ') show['schedule_url'] = response.url reservation_url = li.css('a::attr(href)').extract_first() if state == 'sec05': # soldout show['remaining_seats_num'] = 0 show['total_seats_num'] = None show['reserved_seats'] = None show['remaining_seats'] = [] show['reservation_url'] = None yield show else: show['reservation_url'] = reservation_url yield scrapy.Request(url=reservation_url, callback=self.parse_reservation, meta={'show': show})
def parse_schedule(self, response): date = response.css('.today::text').extract_first() movies = response.css('.movielist') for movie in movies: title = movie.css('.main a::text').extract_first().strip() # skip not kinpri if not utils.is_title_kinpri(title): continue shows = movie.css('.timetbl [class^="tbl"]')[1:] for s in shows: show = Show() show['updated'] = datetime.datetime.now() show['title'] = title show['movie_types'] = utils.get_kinpri_types(title) show['date'] = date show['theater'] = response.meta['theater'] show['schedule_url'] = response.url start_time, end_time = s.css('.time ::text').extract() show['start_time'] = ':'.join(re.findall(r'(\d{1,2})', start_time)) show['end_time'] = ':'.join(re.findall(r'(\d{1,2})', end_time)) show['screen'] = s.css('.screen ::text').extract_first() state = s.css('.icon_kuuseki ::text').extract_first() show['ticket_state'] = state # TODO: run javascript via splash reservation_url = None if reservation_url: show['reservation_url'] = reservation_url yield scrapy.Request(url=reservation_url, callback=self.parse_reservation, meta={'show': show}) else: yield show