Пример #1
0
 def _get_episode_url(self, show_url, video):
     url = scraper_utils.urljoin(self.base_url, show_url)
     html = self._http_get(url, cache_limit=2)
     episode_pattern = 'href="([^"]+-s0*%se0*%s(?!\d)[^"]*)' % (video.season, video.episode)
     parts = dom_parser2.parse_dom(html, 'ul', {'class': 'episode_list'})
     fragment = '\n'.join(part.content for part in parts)
     result = self._default_get_episode_url(fragment, video, episode_pattern)
     if result: return result
     
     ep_urls = [r.attrs['href'] for r in dom_parser2.parse_dom(fragment, 'a', req='href')]
     ep_dates = [r.content for r in dom_parser2.parse_dom(fragment, 'span', {'class': 'episode_air_d'})]
     ep_titles = [r.content for r in dom_parser2.parse_dom(fragment, 'span', {'class': 'episode_name'})]
     force_title = scraper_utils.force_title(video)
     if not force_title and kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate:
         for ep_url, ep_date in zip(ep_urls, ep_dates):
             logger.log('Quikr Ep Airdate Matching: %s - %s - %s' % (ep_url, ep_date, video.ep_airdate), log_utils.LOGDEBUG)
             if video.ep_airdate == scraper_utils.to_datetime(ep_date, '%Y-%m-%d').date():
                 return scraper_utils.pathify_url(ep_url)
 
     if force_title or kodi.get_setting('title-fallback') == 'true':
         norm_title = scraper_utils.normalize_title(video.ep_title)
         for ep_url, ep_title in zip(ep_urls, ep_titles):
             ep_title = re.sub('<span>.*?</span>\s*', '', ep_title)
             logger.log('Quikr Ep Title Matching: %s - %s - %s' % (ep_url.encode('utf-8'), ep_title.encode('utf-8'), video.ep_title), log_utils.LOGDEBUG)
             if norm_title == scraper_utils.normalize_title(ep_title):
                 return scraper_utils.pathify_url(ep_url)
Пример #2
0
 def _get_episode_url(self, show_url, video):
     query = scraper_utils.parse_query(show_url)
     if 'id' in query:
         url = scraper_utils.urljoin(self.base_url, '/api/v2/shows/%s' % (query['id']))
         js_data = self._http_get(url, cache_limit=.5)
         if 'episodes' in js_data:
             force_title = scraper_utils.force_title(video)
             if not force_title:
                 for episode in js_data['episodes']:
                     if int(video.season) == int(episode['season']) and int(video.episode) == int(episode['number']):
                         return scraper_utils.pathify_url('?id=%s' % (episode['id']))
                 
                 if kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate:
                     for episode in js_data['episodes']:
                         if 'airdate' in episode:
                             ep_airdate = scraper_utils.to_datetime(episode['airdate'], "%Y-%m-%d").date()
                             if video.ep_airdate == (ep_airdate - datetime.timedelta(days=1)):
                                 return scraper_utils.pathify_url('?id=%s' % (episode['id']))
             else:
                 logger.log('Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG)
             
             if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title:
                 norm_title = scraper_utils.normalize_title(video.ep_title)
                 for episode in js_data['episodes']:
                     if 'name' in episode and norm_title in scraper_utils.normalize_title(episode['name']):
                         return scraper_utils.pathify_url('?id=%s' % (episode['id']))
Пример #3
0
 def __too_old(self, post):
     filter_days = datetime.timedelta(
         days=int(kodi.get_setting('%s-filter' % (self.get_name()))))
     post_date = post.get('post_date', '')
     if filter_days and post_date:
         today = datetime.date.today()
         try:
             post_date = scraper_utils.to_datetime(
                 post_date, '%Y-%m-%d %H:%M:%S').date()
             if today - post_date > filter_days:
                 return True
         except ValueError:
             return False
     return False
Пример #4
0
    def __too_old(self, post):
        try:
            filter_days = datetime.timedelta(
                days=int(kodi.get_setting('%s-filter' % (self.get_name()))))
            if filter_days:
                post_date = re.search(
                    'class="postMonth"\s+title="(\d+)[^>]*>([^<]+).*?class="postDay"[^>]*>(\d+)',
                    post, re.DOTALL)
                year, mon_name, day = post_date.groups()
                post_date = '%s-%s-%s' % (year, SHORT_MONS.index(mon_name) + 1,
                                          day)
                post_date = scraper_utils.to_datetime(post_date,
                                                      '%Y-%m-%d').date()
                if datetime.date.today() - post_date > filter_days:
                    return True
        except:
            return False

        return False
Пример #5
0
    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = base64.decodestring(SEARCH_URL) % (
            urllib.quote_plus(title))
        html = self._http_get(search_url, cache_limit=2)
        if html:
            js_data = scraper_utils.parse_json(html)
            search_meta = scraper_utils.parse_episode_link(title)
            for item in js_data.get('results', []):
                metatags = item.get('richSnippet', {}).get('metatags', {})
                post_date = metatags.get('articlePublishedTime')
                if post_date:
                    post_date = re.sub('[+-]\d+:\d+$', '', post_date)
                    post_date = scraper_utils.to_datetime(
                        post_date, '%Y-%m-%dT%H:%M:%S').date()
                    if self.__too_old(post_date): continue

                match_title = metatags.get('ogTitle', '')
                if not match_title:
                    match_title = item['titleNoFormatting']
                    match_title = re.sub(
                        re.compile('\s*-\s*Release\s*Scene$', re.I), '',
                        match_title)
                match_url = item['url']
                match_year = ''
                item_meta = scraper_utils.parse_episode_link(match_title)
                if scraper_utils.meta_release_check(video_type, search_meta,
                                                    item_meta):
                    result = {
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year,
                        'url': scraper_utils.pathify_url(match_url)
                    }
                    results.append(result)

        if not results:
            results = self.__site_search(video_type, title, year)

        return results
Пример #6
0
    def _blog_proc_results(self, html, post_pattern, date_format, video_type,
                           title, year):
        results = []
        search_date = ''
        search_sxe = ''
        if video_type == VIDEO_TYPES.EPISODE:
            match = re.search('(.*?)\s*(S\d+E\d+)\s*', title)
            if match:
                show_title, search_sxe = match.groups()
            else:
                match = re.search(
                    '(.*?)\s*(\d{4})[._ -]?(\d{2})[._ -]?(\d{2})\s*', title)
                if match:
                    show_title, search_year, search_month, search_day = match.groups(
                    )
                    search_date = '%s-%s-%s' % (search_year, search_month,
                                                search_day)
                    search_date = scraper_utils.to_datetime(
                        search_date, "%Y-%m-%d").date()
                else:
                    show_title = title
        else:
            show_title = title

        today = datetime.date.today()
        for match in re.finditer(post_pattern, html, re.DOTALL):
            post_data = match.groupdict()
            post_title = post_data['post_title']
            post_title = re.sub('<[^>]*>', '', post_title)
            if 'quality' in post_data:
                post_title += '- [%s]' % (post_data['quality'])

            try:
                filter_days = int(
                    kodi.get_setting('%s-filter' % (self.get_name())))
            except ValueError:
                filter_days = 0
            if filter_days and date_format and 'date' in post_data:
                post_data['date'] = post_data['date'].strip()
                filter_days = datetime.timedelta(days=filter_days)
                post_date = scraper_utils.to_datetime(post_data['date'],
                                                      date_format).date()
                if not post_date:
                    logger.log(
                        'Failed date Check in %s: |%s|%s|%s|' %
                        (self.get_name(), post_data['date'], date_format),
                        log_utils.LOGWARNING)
                    post_date = today

                if today - post_date > filter_days:
                    continue

            match_year = ''
            match_date = ''
            match_sxe = ''
            match_title = full_title = post_title
            if video_type == VIDEO_TYPES.MOVIE:
                meta = scraper_utils.parse_movie_link(post_title)
                match_year = meta['year']
            else:
                meta = scraper_utils.parse_episode_link(post_title)
                match_sxe = 'S%02dE%02d' % (int(
                    meta['season']), int(meta['episode']))
                match_date = meta['airdate']

            match_title = meta['title']
            full_title = '%s (%sp) [%s]' % (meta['title'], meta['height'],
                                            meta['extra'])
            norm_title = scraper_utils.normalize_title(show_title)
            match_norm_title = scraper_utils.normalize_title(match_title)
            title_match = norm_title and (match_norm_title in norm_title
                                          or norm_title in match_norm_title)
            year_match = not year or not match_year or year == match_year
            sxe_match = not search_sxe or (search_sxe == match_sxe)
            date_match = not search_date or (search_date == match_date)
            logger.log(
                'Blog Results: |%s|%s|%s| - |%s|%s|%s| - |%s|%s|%s| - |%s|%s|%s| (%s)'
                % (match_norm_title, norm_title, title_match, year, match_year,
                   year_match, search_date, match_date, date_match, search_sxe,
                   match_sxe, sxe_match, self.get_name()), log_utils.LOGDEBUG)
            if title_match and year_match and date_match and sxe_match:
                quality = scraper_utils.height_get_quality(meta['height'])
                result = {
                    'url': scraper_utils.pathify_url(post_data['url']),
                    'title': scraper_utils.cleanse_title(full_title),
                    'year': match_year,
                    'quality': quality
                }
                results.append(result)
        return results