Exemplos de pathify_url em Python, exemplos de transistortv_lib.scraper_utils.pathify_url em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: tvhd_scraper.py Projeto: hpduong/retropie_configs

 def search(self, video_type, title, year, season=''):  # @UnusedVariable
     results = []
     if video_type == VIDEO_TYPES.TVSHOW and title:
         test_url = '/tv-show/%s/' % (scraper_utils.to_slug(title))
         test_url = scraper_utils.urljoin(self.base_url, test_url)
         html = self._http_get(test_url, require_debrid=True, cache_limit=24)
         posts = dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')})
         if posts:
             result = {'url': scraper_utils.pathify_url(test_url), 'title': scraper_utils.cleanse_title(title), 'year': ''}
             results.append(result)
     elif video_type == VIDEO_TYPES.MOVIE:
         search_title = re.sub('[^A-Za-z0-9 ]', '', title.lower())
         html = self._http_get(self.base_url, params={'s': search_title}, require_debrid=True, cache_limit=1)
         norm_title = scraper_utils.normalize_title(title)
         for _attrs, post in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}):
             match = re.search('<h\d+[^>]*>\s*<a\s+href="([^"]+)[^>]*>(.*?)</a>', post)
             if match:
                 post_url, post_title = match.groups()
                 if '/tv-show/' in post or self.__too_old(post): continue
                 post_title = re.sub('<[^>]*>', '', post_title)
                 meta = scraper_utils.parse_movie_link(post_title)
                 full_title = '%s [%s] (%sp)' % (meta['title'], meta['extra'], meta['height'])
                 match_year = meta['year']
                 
                 match_norm_title = scraper_utils.normalize_title(meta['title'])
                 if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year):
                     result = {'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year}
                     results.append(result)
         
     return results

Exemplo n.º 2

0

Exibir arquivo

    def _get_episode_url(self, show_url, video):
        url = scraper_utils.urljoin(self.base_url, show_url)
        html = self._http_get(url, cache_limit=2)
        if html:
            force_title = scraper_utils.force_title(video)
            episodes = dom_parser2.parse_dom(html, 'div', {'class': 'el-item'})
            if not force_title:
                episode_pattern = 'href="([^"]*-[sS]%02d[eE]%02d(?!\d)[^"]*)' % (int(video.season), int(video.episode))
                match = re.search(episode_pattern, html)
                if match:
                    return scraper_utils.pathify_url(match.group(1))
                
                if kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate:
                    airdate_pattern = '%02d-%02d-%d' % (video.ep_airdate.day, video.ep_airdate.month, video.ep_airdate.year)
                    for episode in episodes:
                        episode = episode.content
                        ep_url = dom_parser2.parse_dom(episode, 'a', req='href')
                        ep_airdate = dom_parser2.parse_dom(episode, 'div', {'class': 'date'})
                        if ep_url and ep_airdate:
                            ep_airdate = ep_airdate[0].content.strip()
                            if airdate_pattern == ep_airdate:
                                return scraper_utils.pathify_url(ep_url[0].attrs['href'])

            if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title:
                norm_title = scraper_utils.normalize_title(video.ep_title)
                for episode in episodes:
                    episode = episode.content
                    ep_url = dom_parser2.parse_dom(episode, 'a', req='href')
                    ep_title = dom_parser2.parse_dom(episode, 'div', {'class': 'e-name'})
                    if ep_url and ep_title and norm_title == scraper_utils.normalize_title(ep_title[0].content):
                        return scraper_utils.pathify_url(ep_url[0].attrs['href'])

Exemplo n.º 3

0

Exibir arquivo

    def _get_episode_url(self, show_url, video):
        force_title = scraper_utils.force_title(video)
        title_fallback = kodi.get_setting('title-fallback') == 'true'
        norm_title = scraper_utils.normalize_title(video.ep_title)
        page_url = [show_url]
        too_old = False
        while page_url and not too_old:
            url = scraper_utils.urljoin(self.base_url, page_url[0])
            html = self._http_get(url, require_debrid=True, cache_limit=1)
            for _attrs, post in dom_parser2.parse_dom(
                    html, 'div', {'id': re.compile('post-\d+')}):
                if self.__too_old(post):
                    too_old = True
                    break
                if show_url not in post: continue
                match = dom_parser2.parse_dom(post, 'a', req='href')
                if match:
                    url, title = match[0].attrs['href'], match[0].content
                    if not force_title:
                        if scraper_utils.release_check(video,
                                                       title,
                                                       require_title=False):
                            return scraper_utils.pathify_url(url)
                    else:
                        if title_fallback and norm_title:
                            match = re.search('</strong>(.*?)</p>', post)
                            if match and norm_title == scraper_utils.normalize_title(
                                    match.group(1)):
                                return scraper_utils.pathify_url(url)

            page_url = dom_parser2.parse_dom(html,
                                             'a', {'class': 'nextpostslink'},
                                             req='href')
            if page_url: page_url = [page_url[0].attrs['href']]

Exemplo n.º 4

0

Exibir arquivo

Arquivo: scraper.py Projeto: hpduong/retropie_configs

    def _default_get_episode_url(self,
                                 html,
                                 video,
                                 episode_pattern,
                                 title_pattern='',
                                 airdate_pattern=''):
        logger.log('Default Episode Url: |%s|%s|' % (self.get_name(), video),
                   log_utils.LOGDEBUG)
        if not html: return

        try:
            html = html[0].content
        except AttributeError:
            pass
        force_title = scraper_utils.force_title(video)
        if not force_title:
            if episode_pattern:
                match = re.search(episode_pattern, html, re.DOTALL | re.I)
                if match:
                    return scraper_utils.pathify_url(match.group(1))

            if kodi.get_setting(
                    'airdate-fallback'
            ) == 'true' and airdate_pattern and video.ep_airdate:
                airdate_pattern = airdate_pattern.replace(
                    '{year}', str(video.ep_airdate.year))
                airdate_pattern = airdate_pattern.replace(
                    '{month}', str(video.ep_airdate.month))
                airdate_pattern = airdate_pattern.replace(
                    '{p_month}', '%02d' % (video.ep_airdate.month))
                airdate_pattern = airdate_pattern.replace(
                    '{month_name}', MONTHS[video.ep_airdate.month - 1])
                airdate_pattern = airdate_pattern.replace(
                    '{short_month}', SHORT_MONS[video.ep_airdate.month - 1])
                airdate_pattern = airdate_pattern.replace(
                    '{day}', str(video.ep_airdate.day))
                airdate_pattern = airdate_pattern.replace(
                    '{p_day}', '%02d' % (video.ep_airdate.day))
                logger.log('Air Date Pattern: %s' % (airdate_pattern),
                           log_utils.LOGDEBUG)

                match = re.search(airdate_pattern, html, re.DOTALL | re.I)
                if match:
                    return scraper_utils.pathify_url(match.group(1))
        else:
            logger.log(
                'Skipping S&E matching as title search is forced on: %s' %
                (video.trakt_id), log_utils.LOGDEBUG)

        if (force_title or kodi.get_setting('title-fallback')
                == 'true') and video.ep_title and title_pattern:
            norm_title = scraper_utils.normalize_title(video.ep_title)
            for match in re.finditer(title_pattern, html, re.DOTALL | re.I):
                episode = match.groupdict()
                if norm_title == scraper_utils.normalize_title(
                        episode['title']):
                    return scraper_utils.pathify_url(episode['url'])

Exemplo n.º 5

0

Exibir arquivo

Arquivo: 2ddl_scraper.py Projeto: hpduong/retropie_configs

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = '/search/' + urllib.quote_plus(title)
        html = self._http_get(search_url, require_debrid=True, cache_limit=1)
        if video_type == VIDEO_TYPES.TVSHOW:
            seen_urls = {}
            for _attr, post in dom_parser2.parse_dom(
                    html, 'div', {'id': re.compile('post-\d+')}):
                if CATEGORIES[video_type] not in post: continue
                match = re.search(
                    '<span>\s*TAGS:\s*</span>\s*<a\s+href="([^"]+)[^>]+>([^<]+)',
                    post, re.I)
                if match:
                    show_url, match_title = match.groups()
                    if show_url in seen_urls: continue
                    result = {
                        'url': scraper_utils.pathify_url(show_url),
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': ''
                    }
                    seen_urls[show_url] = result
                    results.append(result)
        elif video_type == VIDEO_TYPES.MOVIE:
            norm_title = scraper_utils.normalize_title(title)
            headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>',
                                  html)
            posts = [
                result.content for result in dom_parser2.parse_dom(
                    html, 'div', {'id': re.compile('post-\d+')})
            ]
            for heading, post in zip(headings, posts):
                if CATEGORIES[video_type] not in post or self.__too_old(post):
                    continue
                post_url, post_title = heading
                meta = scraper_utils.parse_movie_link(post_title)
                full_title = '%s [%s] (%sp)' % (meta['title'], meta['extra'],
                                                meta['height'])
                match_year = meta['year']

                match_norm_title = scraper_utils.normalize_title(meta['title'])
                if (match_norm_title in norm_title or norm_title
                        in match_norm_title) and (not year or not match_year
                                                  or year == match_year):
                    result = {
                        'url': scraper_utils.pathify_url(post_url),
                        'title': scraper_utils.cleanse_title(full_title),
                        'year': match_year
                    }
                    results.append(result)

        return results

Exemplo n.º 6

0

Exibir arquivo

Arquivo: movytvy_scraper.py Projeto: hpduong/retropie_configs

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        media_type = 'series' if video_type == VIDEO_TYPES.TVSHOW else 'movie'
        search_url = scraper_utils.urljoin(
            self.base_url, '/typeahead/%s' % (urllib.quote(title)))
        headers = {'Referer': self.base_url}
        headers.update(XHR)
        html = self._http_get(search_url,
                              headers=headers,
                              require_debrid=True,
                              cache_limit=.5)
        for item in scraper_utils.parse_json(html, search_url):
            match_title = item.get('title')
            match_url = item.get('link')
            match_year = ''
            if item.get('type') == media_type and match_title and match_url:
                if not year or not match_year or year == match_year:
                    result = {
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year,
                        'url': scraper_utils.pathify_url(match_url)
                    }
                    results.append(result)

        return results

Exemplo n.º 7

0

Exibir arquivo

Arquivo: streamdor_scraper.py Projeto: hpduong/retropie_configs

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url,
                                           '/search/searchBoxSuggestion')
        html = self._http_get(search_url,
                              params={
                                  'top': 8,
                                  'query': title
                              },
                              cache_limit=8)
        js_data = scraper_utils.parse_json(html, search_url)
        for item in js_data:
            entityName = match_title_year = item.get('Value', '')
            if entityName:
                match_title, match_year2 = scraper_utils.extra_year(
                    match_title_year)
                match_year = str(item.get('ReleaseYear', ''))
                if not match_year: match_year = match_year2

                match_url = '/ontology/EntityDetails?' + urllib.urlencode(
                    {
                        'entityName': entityName,
                        'ignoreMediaLinkError': 'false'
                    })
                if not year or not match_year or year == match_year:
                    result = {
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year,
                        'url': scraper_utils.pathify_url(match_url)
                    }
                    results.append(result)

        return results

Exemplo n.º 8

0

Exibir arquivo

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        seen_urls = set()
        search_url = scraper_utils.urljoin(self.base_url, '/ajax_submit.php')
        html = self._http_get(search_url, cache_limit=48)
        norm_title = scraper_utils.normalize_title(title)
        for _attrs, item in dom_parser2.parse_dom(html, 'div',
                                                  {'class': 'category-post'}):
            match_url = dom_parser2.parse_dom(item, 'a', req='href')
            match_title = dom_parser2.parse_dom(item, 'h3')
            if match_url and match_title:
                match_url = scraper_utils.pathify_url(
                    match_url[0].attrs['href'])
                match_title = match_title[0].content
                if match_url in seen_urls: continue
                seen_urls.add(match_url)
                if norm_title in scraper_utils.normalize_title(match_title):
                    result = {
                        'url': match_url,
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': ''
                    }
                    results.append(result)

        return results

Exemplo n.º 9

0

Exibir arquivo

Arquivo: xmovies8v2_scraper.py Projeto: hpduong/retropie_configs

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/movies/search')
        html = self._http_get(search_url, params={'s': title}, cache_limit=8)
        for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'item_movie'}):
            match = dom_parser2.parse_dom(item, 'a', req=['href', 'title'])
            if not match: continue
            
            match_title_year = match[0].attrs['title']
            match_url = match[0].attrs['href']
            is_season = re.search('S(?:eason\s+)?(\d+)', match_title_year, re.I)
            match_vt = video_type == (VIDEO_TYPES.MOVIE and not is_season) or (video_type == VIDEO_TYPES.SEASON and is_season)
            match_year = ''
            if video_type == VIDEO_TYPES.SEASON:
                if not season and not match_vt: continue
                if match_vt:
                    if season and int(is_season.group(1)) != int(season): continue
                else:
                    if season and int(season) != 1: continue
                    site_title, site_year = scraper_utils.extra_year(match_title_year)
                    if scraper_utils.normalize_title(site_title) not in scraper_utils.normalize_title(title) or year != site_year: continue
                    
                match_title = match_title_year
            else:
                if not match_vt: continue
                match_title, match_year = scraper_utils.extra_year(match_title_year)

            match_url = scraper_utils.urljoin(match_url, 'watching.html')
            if not year or not match_year or year == match_year:
                result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                results.append(result)
        return results

Exemplo n.º 10

0

Exibir arquivo

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url,
                                           '/bestmatch-fund-movies-%s.html')
        search_title = title.replace(' ', '-')
        search_title = re.sub('[^A-Za-z0-9-]', '', search_title).lower()
        search_url = search_url % (search_title)
        html = self._http_get(search_url, cache_limit=1)
        for _attrs, item in dom_parser2.parse_dom(html, 'div',
                                                  {'class': 'thumbsTitle'}):
            match = dom_parser2.parse_dom(item, 'a', req='href')
            if not match: continue

            match_url, match_title_year = match[0].attrs['href'], match[
                0].content
            match_title, match_year = scraper_utils.extra_year(
                match_title_year)
            if (not year or not match_year or year == match_year):
                result = {
                    'url': scraper_utils.pathify_url(match_url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year
                }
                results.append(result)

        return results

Exemplo n.º 11

0

Exibir arquivo

Arquivo: downloadtube_scraper.py Projeto: hpduong/retropie_configs

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(
            self.base_url, '/search/%s' % (urllib.quote(title)))
        html = self._http_get(search_url, cache_limit=8)
        fragment = dom_parser2.parse_dom(html, 'div', {'id': 'who-likes'})
        if not fragment: return results

        fragment = fragment[0].content
        match_url = dom_parser2.parse_dom(fragment, 'a', req='href')
        match_title_year = dom_parser2.parse_dom(fragment, 'img', req='alt')
        if match_url and match_title_year:
            match_url = match_url[0].attrs['href']
            match_title_year = match_title_year[0].attrs['alt']
            match_title, match_year = scraper_utils.extra_year(
                match_title_year)
            if not year or not match_year or year == match_year:
                result = {
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year,
                    'url': scraper_utils.pathify_url(match_url)
                }
                results.append(result)

        return results

Exemplo n.º 12

0

Exibir arquivo

Arquivo: moviewatcher_scraper.py Projeto: hpduong/retropie_configs

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/search')
        search_type = 'movies' if video_type == VIDEO_TYPES.MOVIE else 'series'
        html = self._http_get(search_url, params={'query': title.lower(), 'type': search_type}, cache_limit=8)
        for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'one_movie-item'}):
            match_url = dom_parser2.parse_dom(item, 'a', req='href')
            match_title = dom_parser2.parse_dom(item, 'img', req='alt')
            media_type = dom_parser2.parse_dom(item, 'div', {'class': 'movie-series'})
            if not media_type:
                media_type = VIDEO_TYPES.MOVIE
            elif media_type[0].content == 'TV SERIE':
                media_type = VIDEO_TYPES.TVSHOW
                
            if match_url and match_title and video_type == media_type:
                match_url = match_url[0].attrs['href']
                match_title = match_title[0].attrs['alt']
                
                match_year = re.search('-(\d{4})-', match_url)
                if match_year:
                    match_year = match_year.group(1)
                else:
                    match_year = ''
        
                if not year or not match_year or year == match_year:
                    result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                    results.append(result)

        return results

Exemplo n.º 13

0

Exibir arquivo

Arquivo: movietube_scraper.py Projeto: hpduong/retropie_configs

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/index.php')
        data = {'subaction': 'search', 'story': title, 'do': 'search'}
        headers = {'Referer': search_url}
        html = self._http_get(search_url,
                              params={'do': 'search'},
                              data=data,
                              headers=headers,
                              cache_limit=1)
        fragment = dom_parser2.parse_dom(html, 'div', {'id': 'dle-content'})
        if not fragment: return results

        for _attrs, item in dom_parser2.parse_dom(fragment[0].content, 'div',
                                                  {'class': 'short-film'}):
            match = re.search('<h5><a\s+href="([^"]+)[^>]+title="([^"]+)',
                              item)
            if not match: continue

            url, match_title = match.groups('')
            result = {
                'url': scraper_utils.pathify_url(url),
                'title': scraper_utils.cleanse_title(match_title),
                'year': ''
            }
            results.append(result)

        return results

Exemplo n.º 14

0

Exibir arquivo

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        html = self._http_get(self.base_url,
                              params={'s': title},
                              cache_limit=8)
        for _attrs, item in dom_parser2.parse_dom(html, 'h2'):
            for attrs, match_title_year in dom_parser2.parse_dom(item,
                                                                 'a',
                                                                 req=['href']):
                match_url = attrs['href']
                match_title_year = re.sub('[^\x00-\x7F]', '', match_title_year)
                match = re.search('(.*?)\s+(\d{4})$', match_title_year)
                if match:
                    match_title, match_year = match.groups()
                else:
                    match = re.search('-(\d{4})/?$', match_url)
                    if match:
                        match_year = match.groups(1)
                    else:
                        match_title, match_year = match_title_year, ''

                if not year or not match_year or year == match_year:
                    result = {
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year,
                        'url': scraper_utils.pathify_url(match_url)
                    }
                    results.append(result)

        return results

Exemplo n.º 15

0

Exibir arquivo

Arquivo: vumoo_scraper.py Projeto: hpduong/retropie_configs

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/search?q=')
        headers = {'Referer': ''}
        html = self._http_get(search_url,
                              params={
                                  'search': title,
                                  've': 1
                              },
                              headers=headers,
                              cache_limit=8)
        for _attrs, article in dom_parser2.parse_dom(html, 'article',
                                                     {'class': 'movie_item'}):
            match = dom_parser2.parse_dom(article,
                                          'a',
                                          req=['href', 'data-title'])
            if match:
                match_url = match[0].attrs['href']
                match_title = match[0].attrs['data-title']
                match_year = ''
                if not year or not match_year or year == match_year:
                    result = {
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year,
                        'url': scraper_utils.pathify_url(match_url)
                    }
                    results.append(result)

        return results

Exemplo n.º 16

0

Exibir arquivo

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=8)
        for attrs, _content in dom_parser2.parse_dom(html, 'a', req='href'):
            stream_url = attrs['href']
            if MOVIE_URL in stream_url:
                meta = scraper_utils.parse_movie_link(stream_url)
                stream_url = scraper_utils.pathify_url(
                    stream_url) + scraper_utils.append_headers(
                        {'User-Agent': scraper_utils.get_ua()})
                quality = scraper_utils.height_get_quality(meta['height'])
                hoster = {
                    'multi-part': False,
                    'host':
                    scraper_utils.get_direct_hostname(self, stream_url),
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'url': stream_url,
                    'direct': True
                }
                if 'format' in meta: hoster['format'] = meta['format']
                hosters.append(hoster)

        return hosters

Exemplo n.º 17

0

Exibir arquivo

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/ajax/search.php')
        timestamp = int(time.time() * 1000)
        query = {
            'q': title,
            'limit': 100,
            'timestamp': timestamp,
            'verifiedCheck': ''
        }
        html = self._http_get(search_url,
                              data=query,
                              headers=XHR,
                              cache_limit=1)
        if video_type in [VIDEO_TYPES.TVSHOW, VIDEO_TYPES.EPISODE]:
            media_type = 'TV SHOW'
        else:
            media_type = 'MOVIE'

        js_data = scraper_utils.parse_json(html, search_url)
        for item in js_data:
            if not item['meta'].upper().startswith(media_type): continue

            result = {
                'title': scraper_utils.cleanse_title(item['title']),
                'url': scraper_utils.pathify_url(item['permalink']),
                'year': ''
            }
            results.append(result)

        return results

Exemplo n.º 18

0

Exibir arquivo

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        params = {'c': 'movie', 'm': 'filter', 'keyword': title}
        html = self._http_get(self.base_url, params=params, cache_limit=8)
        for attrs, item in dom_parser2.parse_dom(html,
                                                 'div',
                                                 {'class': 'recent-item'},
                                                 req='title'):
            match_url = dom_parser2.parse_dom(item, 'a', req='href')
            if not match_url: continue

            match_url = match_url[0].attrs['href']
            is_series = re.search('/series/', match_url, re.I)
            if (video_type == VIDEO_TYPES.MOVIE
                    and is_series) or (video_type == VIDEO_TYPES.TVSHOW
                                       and not is_series):
                continue

            match_title_year = attrs['title']
            match_title, match_year = scraper_utils.extra_year(
                match_title_year)

            if not year or not match_year or year == match_year:
                result = {
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year,
                    'url': scraper_utils.pathify_url(match_url)
                }
                results.append(result)

        return results

Exemplo n.º 19

0

Exibir arquivo

Arquivo: filmovizjia_scraper.py Projeto: hpduong/retropie_configs

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/search.php')
        if video_type == VIDEO_TYPES.MOVIE:
            params = {
                'all': 'all',
                'searchin': 'mov',
                'subtitles': '',
                'imdbfrom': '',
                'yearrange': '',
                'keywords': title
            }
        else:
            params = {'all': 'all', 'vselect': 'ser', 'keywords': title}
        html = self._http_get(search_url, params=params, cache_limit=8)
        fragment = dom_parser2.parse_dom(html, 'ul', {'class': 'cbp-rfgrid'})
        if not fragment: return results

        for item in dom_parser2.parse_dom(fragment, 'li'):
            match = dom_parser2.parse_dom(item, 'a', req=['title', 'href'])
            if not match: continue

            match_url = match[0].attrs['href']
            match_title_year = match[0].attrs['title']
            match_title, match_year = scraper_utils.extra_year(
                match_title_year)
            if not year or not match_year or year == match_year:
                result = {
                    'url': scraper_utils.pathify_url(match_url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year
                }
                results.append(result)

        return results

Exemplo n.º 20

0

Exibir arquivo

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        html = self._http_get(self.base_url,
                              params={'s': title},
                              cache_limit=8)
        for _attrs, item in dom_parser2.parse_dom(html, 'div',
                                                  {'class': 'result-item'}):
            match = dom_parser2.parse_dom(item, 'div', {'class': 'title'})
            is_movie = dom_parser2.parse_dom(item, 'span', {'class': 'movies'})
            if not is_movie or not match: return results

            match = dom_parser2.parse_dom(match[0].content, 'a', req='href')
            if not match: return results

            match_url, match_title_year = match[0].attrs['href'], match[
                0].content
            match_title, match_year = scraper_utils.extra_year(
                match_title_year)
            if not match_year:
                match_year = dom_parser2.parse_dom(item, 'span',
                                                   {'class': 'year'})
                match_year = match_year[0].content if match_year else ''

            if not year or not match_year or year == match_year:
                result = {
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year,
                    'url': scraper_utils.pathify_url(match_url)
                }
                results.append(result)

        return results

Exemplo n.º 21

0

Exibir arquivo

Arquivo: movienight_scraper.py Projeto: hpduong/retropie_configs

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        html = self._http_get(self.base_url,
                              params={'s': title},
                              cache_limit=4)
        for _attrs, movie in dom_parser2.parse_dom(html, 'div',
                                                   {'class': 'movie'}):
            match_url = dom_parser2.parse_dom(movie, 'a', req='href')
            match_title_year = dom_parser2.parse_dom(movie, 'img', req='alt')
            if match_url and match_title_year:
                match_url = match_url[0].attrs['href']
                if re.search('season-\d+-episode\d+', match_url): continue
                match_title_year = match_title_year[0].attrs['alt']

                match_title, match_year = scraper_utils.extra_year(
                    match_title_year)
                if not match_year:
                    match_year = dom_parser2.parse_dom(movie, 'div',
                                                       {'class': 'year'})
                    try:
                        match_year = match_year[0].content
                    except:
                        match_year = ''

                if not year or not match_year or year == match_year:
                    result = {
                        'url': scraper_utils.pathify_url(match_url),
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year
                    }
                    results.append(result)

        return results

Exemplo n.º 22

0

Exibir arquivo

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/advanced-search/')
        headers = {'Referer': self.base_url}
        params = {'search_query': title, 'orderby': '', 'order': '', 'wpas': 1}
        html = self._http_get(search_url,
                              params=params,
                              headers=headers,
                              cache_limit=8)
        norm_title = scraper_utils.normalize_title(title)
        for _attrs, item in dom_parser2.parse_dom(html, 'div',
                                                  {'class': 'datos'}):
            match = dom_parser2.parse_dom(item, 'a', req='href')
            if not match: continue

            match_url = match[0].attrs['href']
            is_tvshow = '/tvshows/' in match_url
            if is_tvshow and video_type == VIDEO_TYPES.MOVIE or not is_tvshow and video_type == VIDEO_TYPES.TVSHOW:
                continue

            match_title = match[0].content
            match_title, match_year = scraper_utils.extra_year(match_title)
            if scraper_utils.normalize_title(match_title) in norm_title and (
                    not year or not match_year or year == match_year):
                result = {
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year,
                    'url': scraper_utils.pathify_url(match_url)
                }
                results.append(result)

        return results

Exemplo n.º 23

0

Exibir arquivo

Arquivo: mvl_proxy.py Projeto: hpduong/retropie_configs

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        seen_urls = set()
        for page in ['/latest-added/', '/popular-today/', '/most-popular/']:
            url = scraper_utils.urljoin(self.base_url, page)
            html = self._http_get(url, cache_limit=24)
            fragment = dom_parser2.parse_dom(html, 'div', {'class': 'home'})
            if fragment:
                norm_title = scraper_utils.normalize_title(title)
                for attrs, match_title_year in dom_parser2.parse_dom(
                        fragment[0].content, 'a', req='href'):
                    match_url = attrs['href']
                    match_title, match_year = scraper_utils.extra_year(
                        match_title_year)
                    if norm_title in scraper_utils.normalize_title(
                            match_title) and (not year or not match_year
                                              or year == match_year):
                        match_url = scraper_utils.pathify_url(match_url)
                        if match_url in seen_urls: continue
                        seen_urls.add(match_url)
                        result = {
                            'url': match_url,
                            'title': scraper_utils.cleanse_title(match_title),
                            'year': match_year
                        }
                        results.append(result)

        return results

Exemplo n.º 24

0

Exibir arquivo

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        url = scraper_utils.urljoin(self.base_url, '/searchtest.php')
        data = {'searchapi': title}
        headers = {'Referer': self.base_url}
        html = self._http_get(url, data=data, headers=headers, cache_limit=2)
        if video_type == VIDEO_TYPES.MOVIE:
            query_type = 'watch-movie-'
        else:
            query_type = 'watch-tvshow-'

        norm_title = scraper_utils.normalize_title(title)
        for _attrs, item in dom_parser2.parse_dom(html, 'a', {'href': '#'}):
            match = re.search('href="(%s[^"]+)' % (query_type), item)
            if match:
                link = match.group(1)
                match_title = self.__make_title(link, query_type)
                match_year = ''
                if norm_title in scraper_utils.normalize_title(
                        match_title) and (not year or not match_year
                                          or int(year) == int(match_year)):
                    result = {
                        'url': scraper_utils.pathify_url(link),
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year
                    }
                    results.append(result)

        return results

Exemplo n.º 25

0

Exibir arquivo

    def _get_episode_url(self, show_url, video):
        show_url = scraper_utils.urljoin(self.base_url, show_url)
        html = self._http_get(show_url, cache_limit=8)
        pattern = '''href=['"]([^'"]+)[^>]+>\s*%s\.\s*Sezon<''' % (
            video.season)
        match = re.search(pattern, html)
        if match:
            episode_pattern = '''href=['"]([^'"]+-%s-sezon-%s-bolum[^'"]*)''' % (
                video.season, video.episode)
            season_url = scraper_utils.urljoin(self.base_url, match.group(1))
            html = self._http_get(season_url, cache_limit=2)
            ep_url = self._default_get_episode_url(html, video,
                                                   episode_pattern)
            if ep_url: return ep_url

        # front page fallback
        html = self._http_get(self.base_url, cache_limit=2)
        for slug in reversed(show_url.split('/')):
            if slug: break

        ep_url_frag = 'href="([^"]+/{slug}-{season}-sezon-{episode}-bolum[^"]*)'.format(
            slug=slug, season=video.season, episode=video.episode)
        match = re.search(ep_url_frag, html)
        if match:
            return scraper_utils.pathify_url(match.group(1))

Exemplo n.º 26

0

Exibir arquivo

Arquivo: rmz_scraper.py Projeto: hpduong/retropie_configs

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/search/')
        search_url = scraper_utils.urljoin(search_url,
                                           urllib.quote_plus(title))
        html = self._http_get(search_url, require_debrid=True, cache_limit=8)
        for _attrs, fragment in dom_parser2.parse_dom(html, 'div',
                                                      {'class': 'list'}):
            if not dom_parser2.parse_dom(fragment, 'div',
                                         {'class': 'lists_titles'}):
                continue
            for attrs, match_title_year in dom_parser2.parse_dom(
                    fragment, 'a', {'class': 'title'}, req='href'):
                match_url = attrs['href']
                match_title_year = re.sub('</?[^>]*>', '', match_title_year)
                is_show = re.search('\(d{4|-\)', match_title_year)
                if (is_show and video_type == VIDEO_TYPES.MOVIE) or (
                        not is_show and video_type == VIDEO_TYPES.TVSHOW):
                    continue

                match_title, match_year = scraper_utils.extra_year(
                    match_title_year)
                if not year or not match_year or year == match_year:
                    result = {
                        'url': scraper_utils.pathify_url(match_url),
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year
                    }
                    results.append(result)

        return results

Exemplo n.º 27

0

Exibir arquivo

Arquivo: iomovies_scraper.py Projeto: hpduong/retropie_configs

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/search')
        html = self._http_get(search_url, params={'q': title}, cache_limit=8)
        for _attrs, item in dom_parser2.parse_dom(html, 'div',
                                                  {'class': 'movie-item'}):
            match = dom_parser2.parse_dom(item,
                                          'a', {'itemprop': 'url'},
                                          req='href')
            if not match: continue

            match_url, match_title_year = match[0].attrs['href'], match[
                0].content
            match_title, match_year = scraper_utils.extra_year(
                match_title_year)
            if not match_year:
                try:
                    match_year = dom_parser2.parse_dom(
                        item, 'div', {'class': 'overlay-year'})[0].content
                except:
                    match_year = ''

            if not year or not match_year or year == match_year:
                result = {
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year,
                    'url': scraper_utils.pathify_url(match_url)
                }
                results.append(result)

        return results

Exemplo n.º 28

0

Exibir arquivo

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        html = self._http_get(self.base_url,
                              params={'s': title},
                              cache_limit=8)
        for _attrs, item in dom_parser2.parse_dom(
                html, 'div', {'class': 'browse-movie-top'}):
            match = dom_parser2.parse_dom(item, 'a', req='href')
            if match:
                match_url, match_title_year = match[0].attrs['href'], match[
                    0].content
                match_title, match_year = scraper_utils.extra_year(
                    match_title_year)
                if not match_year:
                    div = dom_parser2.parse_dom(item, 'div',
                                                {'class': 'browse-movie-year'})
                    if div: match_year = div[0].content.strip()

                match_url += '?watching'
                if not year or not match_year or year == match_year:
                    result = {
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year,
                        'url': scraper_utils.pathify_url(match_url)
                    }
                    results.append(result)

        return results

Exemplo n.º 29

0

Exibir arquivo

Arquivo: iwatch_scraper.py Projeto: hpduong/retropie_configs

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_in = 'm' if video_type == VIDEO_TYPES.MOVIE else 't'
        search_url = scraper_utils.urljoin(self.base_url, '/search')
        html = self._http_get(search_url,
                              data={
                                  'searchquery': title,
                                  'searchin': search_in
                              },
                              cache_limit=8)
        fragment = dom_parser2.parse_dom(html, 'div', {'class': 'search-page'})
        if not fragment: return results
        fragment = dom_parser2.parse_dom(fragment[0].content, 'table')
        if not fragment: return results
        for attrs, match_title_year in dom_parser2.parse_dom(
                fragment[0].content, 'a', req='href'):
            match_url = attrs['href']
            match_title, match_year = scraper_utils.extra_year(
                match_title_year)
            if not year or not match_year or year == match_year:
                result = {
                    'url': scraper_utils.pathify_url(match_url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year
                }
                results.append(result)

        return results

Exemplo n.º 30

0

Exibir arquivo

Arquivo: watchseries_scraper.py Projeto: hpduong/retropie_configs

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/suggest.php')
        headers = {'Referer': self.base_url}
        headers.update(XHR)
        params = {'ajax': 1, 's': title, 'type': 'TVShows'}
        html = self._http_get(search_url, params=params, cache_limit=8)
        for attrs, match_title in dom_parser2.parse_dom(html, 'a', req='href'):
            match_url = attrs['href']
            match_title = re.sub('</?[^>]*>', '', match_title)
            match = re.search('\((\d{4})\)$', match_url)
            if match:
                match_year = match.group(1)
            else:
                match_year = ''

            if not year or not match_year or year == match_year:
                result = {
                    'url': scraper_utils.pathify_url(match_url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year
                }
                results.append(result)

        return results