Exemplo n.º 1
0
 def _get_episode_url(self, show_url, video):
     query = scraper_utils.parse_query(show_url)
     if 'id' in query:
         url = scraper_utils.urljoin(self.base_url, '/api/v2/shows/%s' % (query['id']))
         js_data = self._http_get(url, cache_limit=.5)
         if 'episodes' in js_data:
             force_title = scraper_utils.force_title(video)
             if not force_title:
                 for episode in js_data['episodes']:
                     if int(video.season) == int(episode['season']) and int(video.episode) == int(episode['number']):
                         return scraper_utils.pathify_url('?id=%s' % (episode['id']))
                 
                 if kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate:
                     for episode in js_data['episodes']:
                         if 'airdate' in episode:
                             ep_airdate = scraper_utils.to_datetime(episode['airdate'], "%Y-%m-%d").date()
                             if video.ep_airdate == (ep_airdate - datetime.timedelta(days=1)):
                                 return scraper_utils.pathify_url('?id=%s' % (episode['id']))
             else:
                 logger.log('Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG)
             
             if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title:
                 norm_title = scraper_utils.normalize_title(video.ep_title)
                 for episode in js_data['episodes']:
                     if 'name' in episode and norm_title in scraper_utils.normalize_title(episode['name']):
                         return scraper_utils.pathify_url('?id=%s' % (episode['id']))
Exemplo n.º 2
0
 def search(self, video_type, title, year, season=''):  # @UnusedVariable
     results = []
     search_url = base64.decodestring(SEARCH_URL) % (urllib.quote_plus(title))
     html = self._http_get(search_url, cache_limit=2)
     if html:
         js_data = scraper_utils.parse_json(html)
         search_meta = scraper_utils.parse_episode_link(title)
         for item in js_data.get('results', []):
             metatags = item.get('richSnippet', {}).get('metatags', {})
             post_date = metatags.get('articlePublishedTime')
             if post_date:
                 post_date = re.sub('[+-]\d+:\d+$', '', post_date)
                 post_date = scraper_utils.to_datetime(post_date, '%Y-%m-%dT%H:%M:%S').date()
                 if self.__too_old(post_date): continue
             
             match_title = metatags.get('ogTitle', '')
             if not match_title:
                 match_title = item['titleNoFormatting']
                 match_title = re.sub(re.compile('\s*-\s*Scene\s*Down$', re.I), '', match_title)
             match_url = item['url']
             match_year = ''
             item_meta = scraper_utils.parse_episode_link(match_title)
             if scraper_utils.meta_release_check(video_type, search_meta, item_meta):
                 result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                 results.append(result)
     
     if not results:
         results = self.__site_search(video_type, title, year)
         
     return results
Exemplo n.º 3
0
 def _get_episode_url(self, show_url, video):
     url = scraper_utils.urljoin(self.base_url, show_url)
     html = self._http_get(url, cache_limit=2)
     episode_pattern = 'href="([^"]+-s0*%se0*%s(?!\d)[^"]*)' % (video.season, video.episode)
     parts = dom_parser2.parse_dom(html, 'ul', {'class': 'episode_list'})
     fragment = '\n'.join(part.content for part in parts)
     result = self._default_get_episode_url(fragment, video, episode_pattern)
     if result: return result
     
     ep_urls = [r.attrs['href'] for r in dom_parser2.parse_dom(fragment, 'a', req='href')]
     ep_dates = [r.content for r in dom_parser2.parse_dom(fragment, 'span', {'class': 'episode_air_d'})]
     ep_titles = [r.content for r in dom_parser2.parse_dom(fragment, 'span', {'class': 'episode_name'})]
     force_title = scraper_utils.force_title(video)
     if not force_title and kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate:
         for ep_url, ep_date in zip(ep_urls, ep_dates):
             logger.log('Quikr Ep Airdate Matching: %s - %s - %s' % (ep_url, ep_date, video.ep_airdate), log_utils.LOGDEBUG)
             if video.ep_airdate == scraper_utils.to_datetime(ep_date, '%Y-%m-%d').date():
                 return scraper_utils.pathify_url(ep_url)
 
     if force_title or kodi.get_setting('title-fallback') == 'true':
         norm_title = scraper_utils.normalize_title(video.ep_title)
         for ep_url, ep_title in zip(ep_urls, ep_titles):
             ep_title = re.sub('<span>.*?</span>\s*', '', ep_title)
             logger.log('Quikr Ep Title Matching: %s - %s - %s' % (ep_url.encode('utf-8'), ep_title.encode('utf-8'), video.ep_title), log_utils.LOGDEBUG)
             if norm_title == scraper_utils.normalize_title(ep_title):
                 return scraper_utils.pathify_url(ep_url)
Exemplo n.º 4
0
    def _get_episode_url(self, show_url, video):
        episode_pattern = 'href="([^"]+-s0*%se0*%s(?!\d)[^"]*)' % (video.season, video.episode)
        result = self._default_get_episode_url(show_url, video, episode_pattern)
        if result:
            return result

        url = urlparse.urljoin(self.base_url, show_url)
        html = self._http_get(url, cache_limit=2)
        fragment = dom_parser.parse_dom(html, "ul", {"class": "episode_list"})
        if fragment:
            ep_urls = dom_parser.parse_dom(fragment[0], "a", ret="href")
            ep_dates = dom_parser.parse_dom(fragment[0], "span", {"class": "episode_air_d"})
            ep_titles = dom_parser.parse_dom(fragment[0], "span", {"class": "episode_name"})
            force_title = scraper_utils.force_title(video)
            if not force_title and kodi.get_setting("airdate-fallback") == "true" and video.ep_airdate:
                for ep_url, ep_date in zip(ep_urls, ep_dates):
                    log_utils.log(
                        "Quikr Ep Airdate Matching: %s - %s - %s" % (ep_url, ep_date, video.ep_airdate),
                        log_utils.LOGDEBUG,
                    )
                    if video.ep_airdate == scraper_utils.to_datetime(ep_date, "%Y-%m-%d").date():
                        return scraper_utils.pathify_url(ep_url)

            if force_title or kodi.get_setting("title-fallback") == "true":
                norm_title = scraper_utils.normalize_title(video.ep_title)
                for ep_url, ep_title in zip(ep_urls, ep_titles):
                    ep_title = re.sub("<span>.*?</span>\s*", "", ep_title)
                    log_utils.log(
                        "Quikr Ep Title Matching: %s - %s - %s" % (ep_url, norm_title, video.ep_title),
                        log_utils.LOGDEBUG,
                    )
                    if norm_title == scraper_utils.normalize_title(ep_title):
                        return scraper_utils.pathify_url(ep_url)
Exemplo n.º 5
0
 def __too_old(self, post):
     filter_days = datetime.timedelta(days=int(kodi.get_setting('%s-filter' % (self.get_name()))))
     post_date = post.get('post_date', '')
     if filter_days and post_date:
         today = datetime.date.today()
         try:
             post_date = scraper_utils.to_datetime(post_date, '%Y-%m-%d %H:%M:%S').date()
             if today - post_date > filter_days:
                 return True
         except ValueError:
             return False
     return False
Exemplo n.º 6
0
 def __too_old(self, post):
     filter_days = datetime.timedelta(days=int(kodi.get_setting('%s-filter' % (self.get_name()))))
     post_date = post.get('post_date', '')
     if filter_days and post_date:
         today = datetime.date.today()
         try:
             post_date = scraper_utils.to_datetime(post_date, '%Y-%m-%d %H:%M:%S').date()
             if today - post_date > filter_days:
                 return True
         except ValueError:
             return False
     return False
Exemplo n.º 7
0
 def __too_old(self, post):
     try:
         filter_days = datetime.timedelta(days=int(kodi.get_setting('%s-filter' % (self.get_name()))))
         if filter_days:
             post_date = re.search('class="postMonth"\s+title="(\d+)[^>]*>([^<]+).*?class="postDay"[^>]*>(\d+)', post, re.DOTALL)
             year, mon_name, day = post_date.groups()
             post_date = '%s-%s-%s' % (year, SHORT_MONS.index(mon_name) + 1, day)
             post_date = scraper_utils.to_datetime(post_date, '%Y-%m-%d').date()
             if datetime.date.today() - post_date > filter_days:
                 return True
     except:
         return False
     
     return False
Exemplo n.º 8
0
    def _get_episode_url(self, show_url, video):
        url = scraper_utils.urljoin(self.base_url, show_url)
        html = self._http_get(url, cache_limit=2)
        episode_pattern = 'href="([^"]+-s0*%se0*%s(?!\d)[^"]*)' % (
            video.season, video.episode)
        parts = dom_parser2.parse_dom(html, 'ul', {'class': 'episode_list'})
        fragment = '\n'.join(part.content for part in parts)
        result = self._default_get_episode_url(fragment, video,
                                               episode_pattern)
        if result: return result

        ep_urls = [
            r.attrs['href']
            for r in dom_parser2.parse_dom(fragment, 'a', req='href')
        ]
        ep_dates = [
            r.content for r in dom_parser2.parse_dom(
                fragment, 'span', {'class': 'episode_air_d'})
        ]
        ep_titles = [
            r.content for r in dom_parser2.parse_dom(fragment, 'span',
                                                     {'class': 'episode_name'})
        ]
        force_title = scraper_utils.force_title(video)
        if not force_title and kodi.get_setting(
                'airdate-fallback') == 'true' and video.ep_airdate:
            for ep_url, ep_date in zip(ep_urls, ep_dates):
                logger.log(
                    'Quikr Ep Airdate Matching: %s - %s - %s' %
                    (ep_url, ep_date, video.ep_airdate), log_utils.LOGDEBUG)
                if video.ep_airdate == scraper_utils.to_datetime(
                        ep_date, '%Y-%m-%d').date():
                    return scraper_utils.pathify_url(ep_url)

        if force_title or kodi.get_setting('title-fallback') == 'true':
            norm_title = scraper_utils.normalize_title(video.ep_title)
            for ep_url, ep_title in zip(ep_urls, ep_titles):
                ep_title = re.sub('<span>.*?</span>\s*', '', ep_title)
                logger.log(
                    'Quikr Ep Title Matching: %s - %s - %s' %
                    (ep_url.encode('utf-8'), ep_title.encode('utf-8'),
                     video.ep_title), log_utils.LOGDEBUG)
                if norm_title == scraper_utils.normalize_title(ep_title):
                    return scraper_utils.pathify_url(ep_url)
    def __too_old(self, post):
        try:
            filter_days = datetime.timedelta(
                days=int(kodi.get_setting('%s-filter' % (self.get_name()))))
            if filter_days:
                post_date = re.search(
                    'class="postMonth"\s+title="(\d+)[^>]*>([^<]+).*?class="postDay"[^>]*>(\d+)',
                    post, re.DOTALL)
                year, mon_name, day = post_date.groups()
                post_date = '%s-%s-%s' % (year, SHORT_MONS.index(mon_name) + 1,
                                          day)
                post_date = scraper_utils.to_datetime(post_date,
                                                      '%Y-%m-%d').date()
                if datetime.date.today() - post_date > filter_days:
                    return True
        except:
            return False

        return False
Exemplo n.º 10
0
 def __too_old(self, post):
     log_utils.log(post)
     filter_days = datetime.timedelta(days=int(kodi.get_setting('%s-filter' % (self.get_name()))))
     match = re.search('(/\d{4}/\d{2}/\d{2}/)', post['post_link'])
     if match:
         post_date = match.group(1)
         date_format = '/%Y/%m/%d/'
     else:
         post_date = post['post_date_formatted']
         date_format = '%B %d, %Y'
         
     if filter_days and post_date:
         try:
             today = datetime.date.today()
             post_date = scraper_utils.to_datetime(post_date, date_format).date()
             if today - post_date > filter_days:
                 return True
         except ValueError:
             return False
     return False
Exemplo n.º 11
0
    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = base64.decodestring(SEARCH_URL) % (
            urllib.quote_plus(title))
        html = self._http_get(search_url, cache_limit=2)
        if html:
            js_data = scraper_utils.parse_json(html)
            search_meta = scraper_utils.parse_episode_link(title)
            for item in js_data.get('results', []):
                metatags = item.get('richSnippet', {}).get('metatags', {})
                post_date = metatags.get('articlePublishedTime')
                if post_date:
                    post_date = re.sub('[+-]\d+:\d+$', '', post_date)
                    post_date = scraper_utils.to_datetime(
                        post_date, '%Y-%m-%dT%H:%M:%S').date()
                    if self.__too_old(post_date): continue

                match_title = metatags.get('ogTitle', '')
                if not match_title:
                    match_title = item['titleNoFormatting']
                    match_title = re.sub(
                        re.compile('\s*-\s*Scene\s*Down$', re.I), '',
                        match_title)
                match_url = item['url']
                match_year = ''
                item_meta = scraper_utils.parse_episode_link(match_title)
                if scraper_utils.meta_release_check(video_type, search_meta,
                                                    item_meta):
                    result = {
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year,
                        'url': scraper_utils.pathify_url(match_url)
                    }
                    results.append(result)

        if not results:
            results = self.__site_search(video_type, title, year)

        return results
Exemplo n.º 12
0
    def _get_episode_url(self, show_url, video):
        query = scraper_utils.parse_query(show_url)
        if 'id' in query:
            url = scraper_utils.urljoin(self.base_url,
                                        '/api/v2/shows/%s' % (query['id']))
            js_data = self._http_get(url, cache_limit=.5)
            if 'episodes' in js_data:
                force_title = scraper_utils.force_title(video)
                if not force_title:
                    for episode in js_data['episodes']:
                        if int(video.season) == int(episode['season']) and int(
                                video.episode) == int(episode['number']):
                            return scraper_utils.pathify_url('?id=%s' %
                                                             (episode['id']))

                    if kodi.get_setting(
                            'airdate-fallback') == 'true' and video.ep_airdate:
                        for episode in js_data['episodes']:
                            if 'airdate' in episode:
                                ep_airdate = scraper_utils.to_datetime(
                                    episode['airdate'], "%Y-%m-%d").date()
                                if video.ep_airdate == (
                                        ep_airdate -
                                        datetime.timedelta(days=1)):
                                    return scraper_utils.pathify_url(
                                        '?id=%s' % (episode['id']))
                else:
                    logger.log(
                        'Skipping S&E matching as title search is forced on: %s'
                        % (video.trakt_id), log_utils.LOGDEBUG)

                if (force_title or kodi.get_setting('title-fallback')
                        == 'true') and video.ep_title:
                    norm_title = scraper_utils.normalize_title(video.ep_title)
                    for episode in js_data['episodes']:
                        if 'name' in episode and norm_title in scraper_utils.normalize_title(
                                episode['name']):
                            return scraper_utils.pathify_url('?id=%s' %
                                                             (episode['id']))
Exemplo n.º 13
0
    def _blog_proc_results(self, html, post_pattern, date_format, video_type, title, year):
        results = []
        search_date = ''
        search_sxe = ''
        if video_type == VIDEO_TYPES.EPISODE:
            match = re.search('(.*?)\s*(S\d+E\d+)\s*', title)
            if match:
                show_title, search_sxe = match.groups()
            else:
                match = re.search('(.*?)\s*(\d{4})[._ -]?(\d{2})[._ -]?(\d{2})\s*', title)
                if match:
                    show_title, search_year, search_month, search_day = match.groups()
                    search_date = '%s-%s-%s' % (search_year, search_month, search_day)
                    search_date = scraper_utils.to_datetime(search_date, "%Y-%m-%d").date()
                else:
                    show_title = title
        else:
            show_title = title

        today = datetime.date.today()
        for match in re.finditer(post_pattern, html, re.DOTALL):
            post_data = match.groupdict()
            post_title = post_data['post_title']
            post_title = re.sub('<[^>]*>', '', post_title)
            if 'quality' in post_data:
                post_title += '- [%s]' % (post_data['quality'])

            try: filter_days = int(kodi.get_setting('%s-filter' % (self.get_name())))
            except ValueError: filter_days = 0
            if filter_days and date_format and 'date' in post_data:
                post_data['date'] = post_data['date'].strip()
                filter_days = datetime.timedelta(days=filter_days)
                post_date = scraper_utils.to_datetime(post_data['date'], date_format).date()
                if not post_date:
                    logger.log('Failed date Check in %s: |%s|%s|%s|' % (self.get_name(), post_data['date'], date_format), log_utils.LOGWARNING)
                    post_date = today
                        
                if today - post_date > filter_days:
                    continue

            match_year = ''
            match_date = ''
            match_sxe = ''
            match_title = full_title = post_title
            if video_type == VIDEO_TYPES.MOVIE:
                meta = scraper_utils.parse_movie_link(post_title)
                match_year = meta['year']
            else:
                meta = scraper_utils.parse_episode_link(post_title)
                match_sxe = 'S%02dE%02d' % (int(meta['season']), int(meta['episode']))
                match_date = meta['airdate']

            match_title = meta['title']
            full_title = '%s (%sp) [%s]' % (meta['title'], meta['height'], meta['extra'])
            norm_title = scraper_utils.normalize_title(show_title)
            match_norm_title = scraper_utils.normalize_title(match_title)
            title_match = norm_title and (match_norm_title in norm_title or norm_title in match_norm_title)
            year_match = not year or not match_year or year == match_year
            sxe_match = not search_sxe or (search_sxe == match_sxe)
            date_match = not search_date or (search_date == match_date)
            logger.log('Blog Results: |%s|%s|%s| - |%s|%s|%s| - |%s|%s|%s| - |%s|%s|%s| (%s)' %
                          (match_norm_title, norm_title, title_match, year, match_year, year_match,
                           search_date, match_date, date_match, search_sxe, match_sxe, sxe_match, self.get_name()),
                          log_utils.LOGDEBUG)
            if title_match and year_match and date_match and sxe_match:
                quality = scraper_utils.height_get_quality(meta['height'])
                result = {'url': scraper_utils.pathify_url(post_data['url']), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year, 'quality': quality}
                results.append(result)
        return results
Exemplo n.º 14
0
    def _blog_proc_results(self, html, post_pattern, date_format, video_type,
                           title, year):
        results = []
        search_date = ''
        search_sxe = ''
        if video_type == VIDEO_TYPES.EPISODE:
            match = re.search('(.*?)\s*(S\d+E\d+)\s*', title)
            if match:
                show_title, search_sxe = match.groups()
            else:
                match = re.search(
                    '(.*?)\s*(\d{4})[._ -]?(\d{2})[._ -]?(\d{2})\s*', title)
                if match:
                    show_title, search_year, search_month, search_day = match.groups(
                    )
                    search_date = '%s-%s-%s' % (search_year, search_month,
                                                search_day)
                    search_date = scraper_utils.to_datetime(
                        search_date, "%Y-%m-%d").date()
                else:
                    show_title = title
        else:
            show_title = title

        today = datetime.date.today()
        for match in re.finditer(post_pattern, html, re.DOTALL):
            post_data = match.groupdict()
            post_title = post_data['post_title']
            post_title = re.sub('<[^>]*>', '', post_title)
            if 'quality' in post_data:
                post_title += '- [%s]' % (post_data['quality'])

            try:
                filter_days = int(
                    kodi.get_setting('%s-filter' % (self.get_name())))
            except ValueError:
                filter_days = 0
            if filter_days and date_format and 'date' in post_data:
                post_data['date'] = post_data['date'].strip()
                filter_days = datetime.timedelta(days=filter_days)
                post_date = scraper_utils.to_datetime(post_data['date'],
                                                      date_format).date()
                if not post_date:
                    logger.log(
                        'Failed date Check in %s: |%s|%s|%s|' %
                        (self.get_name(), post_data['date'], date_format),
                        log_utils.LOGWARNING)
                    post_date = today

                if today - post_date > filter_days:
                    continue

            match_year = ''
            match_date = ''
            match_sxe = ''
            match_title = full_title = post_title
            if video_type == VIDEO_TYPES.MOVIE:
                meta = scraper_utils.parse_movie_link(post_title)
                match_year = meta['year']
            else:
                meta = scraper_utils.parse_episode_link(post_title)
                match_sxe = 'S%02dE%02d' % (int(
                    meta['season']), int(meta['episode']))
                match_date = meta['airdate']

            match_title = meta['title']
            full_title = '%s (%sp) [%s]' % (meta['title'], meta['height'],
                                            meta['extra'])
            norm_title = scraper_utils.normalize_title(show_title)
            match_norm_title = scraper_utils.normalize_title(match_title)
            title_match = norm_title and (match_norm_title in norm_title
                                          or norm_title in match_norm_title)
            year_match = not year or not match_year or year == match_year
            sxe_match = not search_sxe or (search_sxe == match_sxe)
            date_match = not search_date or (search_date == match_date)
            logger.log(
                'Blog Results: |%s|%s|%s| - |%s|%s|%s| - |%s|%s|%s| - |%s|%s|%s| (%s)'
                % (match_norm_title, norm_title, title_match, year, match_year,
                   year_match, search_date, match_date, date_match, search_sxe,
                   match_sxe, sxe_match, self.get_name()), log_utils.LOGDEBUG)
            if title_match and year_match and date_match and sxe_match:
                quality = scraper_utils.height_get_quality(meta['height'])
                result = {
                    'url': scraper_utils.pathify_url(post_data['url']),
                    'title': scraper_utils.cleanse_title(full_title),
                    'year': match_year,
                    'quality': quality
                }
                results.append(result)
        return results