Python extra_year примеры, sigsaur_lib.scraper_utils.extra_year Python примеры использования

Пример #1

0

Показать файл

Файл: filmikz_scraper.py Проект: idaviesfmts/hmdsm.repository

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/index.php')
        params = {'search': title, 'image.x': 0, 'image.y': 0}
        html = self._http_get(search_url, params=params, cache_limit=1)

        # Are we on a results page?
        if not re.search('window\.location', html):
            pattern = '<td[^>]+class="movieText"[^>]*>(.*?)</p>.*?href="(/watch/[^"]+)'
            for match in re.finditer(pattern, html, re.DOTALL):
                match_title_year, match_url = match.groups('')
                # skip p**n
                if '-XXX-' in match_url.upper() or ' XXX:' in match_title_year: continue
                match_title_year = re.sub('</?.*?>', '', match_title_year)
                match_title, match_year = scraper_utils.extra_year(match_title_year)
                if not year or not match_year or year == match_year:
                    result = {'url': match_url, 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                    results.append(result)
        else:
            match = re.search('window\.location\s+=\s+"([^"]+)', html)
            if not match: return results
            url = match.group(1)
            if url != 'movies.php':
                result = {'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(title), 'year': year}
                results.append(result)
        return results

Пример #2

0

Показать файл

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        html = self._http_get(self.base_url,
                              params={'s': title},
                              cache_limit=4)
        for _attrs, movie in dom_parser2.parse_dom(html, 'div',
                                                   {'class': 'movie'}):
            match_url = dom_parser2.parse_dom(movie, 'a', req='href')
            match_title_year = dom_parser2.parse_dom(movie, 'img', req='alt')
            if match_url and match_title_year:
                match_url = match_url[0].attrs['href']
                if re.search('season-\d+-episode\d+', match_url): continue
                match_title_year = match_title_year[0].attrs['alt']

                match_title, match_year = scraper_utils.extra_year(
                    match_title_year)
                if not match_year:
                    match_year = dom_parser2.parse_dom(movie, 'div',
                                                       {'class': 'year'})
                    try:
                        match_year = match_year[0].content
                    except:
                        match_year = ''

                if not year or not match_year or year == match_year:
                    result = {
                        'url': scraper_utils.pathify_url(match_url),
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year
                    }
                    results.append(result)

        return results

Пример #3

0

Показать файл

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_in = 'm' if video_type == VIDEO_TYPES.MOVIE else 't'
        search_url = scraper_utils.urljoin(self.base_url, '/search')
        html = self._http_get(search_url,
                              data={
                                  'searchquery': title,
                                  'searchin': search_in
                              },
                              cache_limit=8)
        fragment = dom_parser2.parse_dom(html, 'div', {'class': 'search-page'})
        if not fragment: return results
        fragment = dom_parser2.parse_dom(fragment[0].content, 'table')
        if not fragment: return results
        for attrs, match_title_year in dom_parser2.parse_dom(
                fragment[0].content, 'a', req='href'):
            match_url = attrs['href']
            match_title, match_year = scraper_utils.extra_year(
                match_title_year)
            if not year or not match_year or year == match_year:
                result = {
                    'url': scraper_utils.pathify_url(match_url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year
                }
                results.append(result)

        return results

Пример #4

0

Показать файл

Файл: moviego_scraper.py Проект: idaviesfmts/hmdsm.repository

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        data = {
            'hash': 'indexert',
            'do': 'search',
            'subaction': 'search',
            'search_start': 0,
            'full_search': 0,
            'result_from': 1,
            'story': title
        }
        search_url = scraper_utils.urljoin(self.base_url, 'index.php')
        html = self._http_get(search_url,
                              params={'do': 'search'},
                              data=data,
                              cache_limit=8)
        if dom_parser2.parse_dom(html, 'div', {'class': 'sresult'}):
            for _attrs, item in dom_parser2.parse_dom(
                    html, 'div', {'class': 'short_content'}):
                match_url = dom_parser2.parse_dom(item, 'a', req='href')
                match_title_year = dom_parser2.parse_dom(
                    item, 'div', {'class': 'short_header'})
                if match_url and match_title_year:
                    match_url = match_url[0].attrs['href']
                    match_title, match_year = scraper_utils.extra_year(
                        match_title_year[0].content)
                    if not year or not match_year or year == match_year:
                        result = {
                            'title': scraper_utils.cleanse_title(match_title),
                            'year': match_year,
                            'url': scraper_utils.pathify_url(match_url)
                        }
                        results.append(result)

        return results

Пример #5

0

Показать файл

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        html = self._http_get(self.base_url,
                              params={'s': title},
                              cache_limit=1)
        if re.search('Sorry, but nothing matched', html, re.I): return results

        fragment = dom_parser2.parse_dom(html, 'ul',
                                         {'class': 'listing-videos'})
        if not fragment: return results

        for attrs, match_title_year in dom_parser2.parse_dom(
                fragment[0].content, 'a', req='href'):
            match_url = attrs['href']
            match_title_year = re.sub('</?[^>]*>', '', match_title_year)
            match_title, match_year = scraper_utils.extra_year(
                match_title_year)
            if not year or not match_year or year == match_year:
                result = {
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year,
                    'url': scraper_utils.pathify_url(match_url)
                }
                results.append(result)

        return results

Пример #6

0

Показать файл

Файл: watchonline_scraper.py Проект: idaviesfmts/hmdsm.repository

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/advanced-search/')
        headers = {'Referer': self.base_url}
        params = {'search_query': title, 'orderby': '', 'order': '', 'wpas': 1}
        html = self._http_get(search_url,
                              params=params,
                              headers=headers,
                              cache_limit=8)
        norm_title = scraper_utils.normalize_title(title)
        for _attrs, item in dom_parser2.parse_dom(html, 'div',
                                                  {'class': 'datos'}):
            match = dom_parser2.parse_dom(item, 'a', req='href')
            if not match: continue

            match_url = match[0].attrs['href']
            is_tvshow = '/tvshows/' in match_url
            if is_tvshow and video_type == VIDEO_TYPES.MOVIE or not is_tvshow and video_type == VIDEO_TYPES.TVSHOW:
                continue

            match_title = match[0].content
            match_title, match_year = scraper_utils.extra_year(match_title)
            if scraper_utils.normalize_title(match_title) in norm_title and (
                    not year or not match_year or year == match_year):
                result = {
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year,
                    'url': scraper_utils.pathify_url(match_url)
                }
                results.append(result)

        return results

Пример #7

0

Показать файл

Файл: miradetodo_scraper.py Проект: idaviesfmts/hmdsm.repository

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        html = self._http_get(self.base_url,
                              params={'s': title},
                              cache_limit=1)
        results = []
        for _attrs, item in dom_parser2.parse_dom(html, 'div',
                                                  {'class': 'item'}):
            match_url = dom_parser2.parse_dom(item, 'a', req='href')
            match_title = dom_parser2.parse_dom(item, 'span', {'class': 'tt'})
            year_frag = dom_parser2.parse_dom(item, 'span', {'class': 'year'})
            if match_url and match_title:
                match_url = match_url[0].attrs['href']
                match_title = match_title[0].content
                if re.search('\d+\s*x\s*\d+', match_title):
                    continue  # exclude episodes
                match_title, match_year = scraper_utils.extra_year(match_title)
                if not match_year and year_frag:
                    match_year = year_frag[0].content

                if not year or not match_year or year == match_year:
                    result = {
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year,
                        'url': scraper_utils.pathify_url(match_url)
                    }
                    results.append(result)

        return results

Пример #8

0

Показать файл

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url,
                                           '/search/searchBoxSuggestion')
        html = self._http_get(search_url,
                              params={
                                  'top': 8,
                                  'query': title
                              },
                              cache_limit=8)
        js_data = scraper_utils.parse_json(html, search_url)
        for item in js_data:
            entityName = match_title_year = item.get('Value', '')
            if entityName:
                match_title, match_year2 = scraper_utils.extra_year(
                    match_title_year)
                match_year = str(item.get('ReleaseYear', ''))
                if not match_year: match_year = match_year2

                match_url = '/ontology/EntityDetails?' + urllib.urlencode(
                    {
                        'entityName': entityName,
                        'ignoreMediaLinkError': 'false'
                    })
                if not year or not match_year or year == match_year:
                    result = {
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year,
                        'url': scraper_utils.pathify_url(match_url)
                    }
                    results.append(result)

        return results

Пример #9

0

Показать файл

Файл: iomovies_scraper.py Проект: idaviesfmts/hmdsm.repository

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/search')
        html = self._http_get(search_url, params={'q': title}, cache_limit=8)
        for _attrs, item in dom_parser2.parse_dom(html, 'div',
                                                  {'class': 'movie-item'}):
            match = dom_parser2.parse_dom(item,
                                          'a', {'itemprop': 'url'},
                                          req='href')
            if not match: continue

            match_url, match_title_year = match[0].attrs['href'], match[
                0].content
            match_title, match_year = scraper_utils.extra_year(
                match_title_year)
            if not match_year:
                try:
                    match_year = dom_parser2.parse_dom(
                        item, 'div', {'class': 'overlay-year'})[0].content
                except:
                    match_year = ''

            if not year or not match_year or year == match_year:
                result = {
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year,
                    'url': scraper_utils.pathify_url(match_url)
                }
                results.append(result)

        return results

Пример #10

0

Показать файл

Файл: dayt_scraper.py Проект: idaviesfmts/hmdsm.repository

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        page_url = scraper_utils.urljoin(self.base_url, '/tvseries/search.php')
        html = self._http_get(page_url, params={'dayq': title}, cache_limit=48)
        html = re.sub('<!--.*?-->', '', html)
        norm_title = scraper_utils.normalize_title(title)
        for _attrs, td in dom_parser2.parse_dom(html, 'td',
                                                {'class': 'topic_content'}):
            match_url = dom_parser2.parse_dom(td, 'a', req='href')
            match_title_year = dom_parser2.parse_dom(td, 'img', req='alt')
            if not match_url or not match_title_year: continue

            match_url = match_url[0].attrs['href']
            match_title_year = match_title_year[0].attrs['alt']
            if not match_url.startswith('/'):
                match_url = '/tvseries/' + match_url
            match_title, match_year = scraper_utils.extra_year(
                match_title_year)
            if (norm_title in scraper_utils.normalize_title(match_title)) and (
                    not year or not match_year or year == match_year):
                result = {
                    'url': scraper_utils.pathify_url(match_url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year
                }
                results.append(result)

        return results

Пример #11

0

Показать файл

Файл: moviehut_scraper.py Проект: idaviesfmts/hmdsm.repository

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url,
                                           '/bestmatch-fund-movies-%s.html')
        search_title = title.replace(' ', '-')
        search_title = re.sub('[^A-Za-z0-9-]', '', search_title).lower()
        search_url = search_url % (search_title)
        html = self._http_get(search_url, cache_limit=1)
        for _attrs, item in dom_parser2.parse_dom(html, 'div',
                                                  {'class': 'thumbsTitle'}):
            match = dom_parser2.parse_dom(item, 'a', req='href')
            if not match: continue

            match_url, match_title_year = match[0].attrs['href'], match[
                0].content
            match_title, match_year = scraper_utils.extra_year(
                match_title_year)
            if (not year or not match_year or year == match_year):
                result = {
                    'url': scraper_utils.pathify_url(match_url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year
                }
                results.append(result)

        return results

Пример #12

0

Показать файл

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        html = self._http_get(self.base_url,
                              params={'s': title},
                              cache_limit=8)
        for _attrs, item in dom_parser2.parse_dom(html, 'div',
                                                  {'class': 'cover'}):
            match = dom_parser2.parse_dom(item, 'a', req=['href', 'title'])
            if not match: continue

            match_url, match_title_year = match[0].attrs['href'], match[
                0].attrs['title']
            if re.search('S\d+E\d+', match_title_year, re.I): continue
            match_title, match_year = scraper_utils.extra_year(
                match_title_year)
            if not match_year:
                match = re.search('-(\d{4})-', match_url)
                if match:
                    match_year = match.group(1)

            if not year or not match_year or year == match_year:
                result = {
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year,
                    'url': scraper_utils.pathify_url(match_url)
                }
                results.append(result)

        return results

Пример #13

0

Показать файл

Файл: downloadtube_scraper.py Проект: idaviesfmts/hmdsm.repository

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(
            self.base_url, '/search/%s' % (urllib.quote(title)))
        html = self._http_get(search_url, cache_limit=8)
        fragment = dom_parser2.parse_dom(html, 'div', {'id': 'who-likes'})
        if not fragment: return results

        fragment = fragment[0].content
        match_url = dom_parser2.parse_dom(fragment, 'a', req='href')
        match_title_year = dom_parser2.parse_dom(fragment, 'img', req='alt')
        if match_url and match_title_year:
            match_url = match_url[0].attrs['href']
            match_title_year = match_title_year[0].attrs['alt']
            match_title, match_year = scraper_utils.extra_year(
                match_title_year)
            if not year or not match_year or year == match_year:
                result = {
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year,
                    'url': scraper_utils.pathify_url(match_url)
                }
                results.append(result)

        return results

Пример #14

0

Показать файл

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = scraper_utils.urljoin(
            self.base_url, '/search-movies/%s.html' % urllib.quote_plus(title))
        html = self._http_get(search_url, cache_limit=8)
        for _attrs, item in dom_parser2.parse_dom(html, 'div',
                                                  {'class': 'movie_about'}):
            match = dom_parser2.parse_dom(item, 'a', req='href')
            if match:
                match_url = match[0].attrs['href']
                match_title_year = match[0].content
                is_season = re.search('Season\s+(\d+)\s*', match_title_year,
                                      re.I)
                if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (
                        is_season and video_type == VIDEO_TYPES.SEASON):
                    match_title, match_year = scraper_utils.extra_year(
                        match_title_year)
                    if video_type == VIDEO_TYPES.SEASON:
                        match_year = ''
                        if season and int(season) != int(is_season.group(1)):
                            continue

                    if (not year or not match_year or year == match_year):
                        result = {
                            'url': scraper_utils.pathify_url(match_url),
                            'title': scraper_utils.cleanse_title(match_title),
                            'year': match_year
                        }
                        results.append(result)

        return results

Пример #15

0

Показать файл

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        html = self._http_get(self.base_url,
                              params={'s': title},
                              cache_limit=8)
        for _attrs, item in dom_parser2.parse_dom(
                html, 'div', {'class': 'browse-movie-bottom'}):
            match = dom_parser2.parse_dom(item, 'a', req='href')
            if match:
                match_url, match_title_year = match[0].attrs['href'], match[
                    0].content
                match_title, match_year = scraper_utils.extra_year(
                    match_title_year)
                if not match_year:
                    div = dom_parser2.parse_dom(item, 'div',
                                                {'class': 'browse-movie-year'})
                    if div: match_year = div[0].content.strip()

                match_url += '?watching'
                if not year or not match_year or year == match_year:
                    result = {
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year,
                        'url': scraper_utils.pathify_url(match_url)
                    }
                    results.append(result)

        return results

Пример #16

0

Показать файл

Файл: pubfilmto_scraper.py Проект: idaviesfmts/hmdsm.repository

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        params = {'c': 'movie', 'm': 'filter', 'keyword': title}
        html = self._http_get(self.base_url, params=params, cache_limit=8)
        for attrs, item in dom_parser2.parse_dom(html,
                                                 'div',
                                                 {'class': 'recent-item'},
                                                 req='title'):
            match_url = dom_parser2.parse_dom(item, 'a', req='href')
            if not match_url: continue

            match_url = match_url[0].attrs['href']
            is_series = re.search('/series/', match_url, re.I)
            if (video_type == VIDEO_TYPES.MOVIE
                    and is_series) or (video_type == VIDEO_TYPES.TVSHOW
                                       and not is_series):
                continue

            match_title_year = attrs['title']
            match_title, match_year = scraper_utils.extra_year(
                match_title_year)

            if not year or not match_year or year == match_year:
                result = {
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year,
                    'url': scraper_utils.pathify_url(match_url)
                }
                results.append(result)

        return results

Пример #17

0

Показать файл

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/wp-content/plugins/ajax-search-pro/ajax_search.php')
        data = {'action': 'ajaxsearchpro_search', 'aspp': title, 'asid': '1', 'asp_inst_id': '1_1',
                'options': 'current_page_id=7&qtranslate_lang=0&set_intitle=None&customset%5B%5D=post'}
        html = self._http_get(search_url, data=data, headers=XHR, cache_limit=8)
        norm_title = scraper_utils.normalize_title(title)
        for attrs, match_title_year in dom_parser2.parse_dom(html, 'a', {'class': 'asp_res_url'}):
            match_url = attrs['href']
            match_title_year = re.sub('</?[^>]*>', '', match_title_year)
            is_season = re.search('Season\s+(\d+)\s*', match_title_year, re.I)
            if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (is_season and video_type == VIDEO_TYPES.SEASON):
                match_year = ''
                if video_type == VIDEO_TYPES.SEASON:
                    if season and int(is_season.group(1)) != int(season):
                        continue

                    match_title = match_title_year
                    match_title = re.sub('\s*\d{4}', '', match_title)
                else:
                    match_title, match_year = scraper_utils.extra_year(match_title_year)

                match_norm_title = scraper_utils.normalize_title(match_title)
                title_match = (norm_title in match_norm_title) or (match_norm_title in norm_title)
                if title_match and (not year or not match_year or year == match_year):
                    result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                    results.append(result)
        return results

Пример #18

0

Показать файл

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/movies/search')
        html = self._http_get(search_url, params={'s': title}, cache_limit=8)
        for _attrs, item in dom_parser2.parse_dom(html, 'div',
                                                  {'class': 'item_movie'}):
            match = dom_parser2.parse_dom(item, 'a', req=['href', 'title'])
            if not match: continue

            match_title_year = match[0].attrs['title']
            match_url = match[0].attrs['href']
            is_season = re.search('S(?:eason\s+)?(\d+)', match_title_year,
                                  re.I)
            match_vt = video_type == (VIDEO_TYPES.MOVIE and not is_season) or (
                video_type == VIDEO_TYPES.SEASON and is_season)
            match_year = ''
            if video_type == VIDEO_TYPES.SEASON:
                if not season and not match_vt: continue
                if match_vt:
                    if season and int(is_season.group(1)) != int(season):
                        continue
                else:
                    if season and int(season) != 1: continue
                    site_title, site_year = scraper_utils.extra_year(
                        match_title_year)
                    if scraper_utils.normalize_title(
                            site_title) not in scraper_utils.normalize_title(
                                title) or year != site_year:
                        continue

                match_title = match_title_year
            else:
                if not match_vt: continue
                match_title, match_year = scraper_utils.extra_year(
                    match_title_year)

            match_url = scraper_utils.urljoin(match_url, 'watching.html')
            if not year or not match_year or year == match_year:
                result = {
                    'url': scraper_utils.pathify_url(match_url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year
                }
                results.append(result)
        return results

Пример #19

0

Показать файл

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        html = self._http_get(self.base_url, params={'s': title}, require_debrid=True, cache_limit=8)
        for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'post'}):
            match = dom_parser2.parse_dom(item, 'a', req='href')
            if not match: continue

            match_url, match_title_year = match[0].attrs['href'], match[0].content
            match_title, match_year = scraper_utils.extra_year(match_title_year)
            if not year or not match_year or year == match_year:
                result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                results.append(result)

        return results

Пример #20

0

Показать файл

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/wp-content/themes/afdah/ajax-search.php')
        data = {'test1': title, 'test2': 'title'}
        html = self._http_get(search_url, data=data, headers=XHR, cache_limit=1)
        for _attrs, item in dom_parser2.parse_dom(html, 'li'):
            match = dom_parser2.parse_dom(item, 'a', req='href')
            if not match: continue

            match_url = match[0].attrs['href']
            match_title, match_year = scraper_utils.extra_year(match[0].content)
            if not year or not match_year or year == match_year:
                result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                results.append(result)
        return results

Пример #21

0

Показать файл

 def search(self, video_type, title, year, season=''):  # @UnusedVariable
     results = []
     search_url = scraper_utils.urljoin(self.base_url, '/search')
     params = {'q': title, 's': 't'}
     html = self._http_get(search_url, params=params, cache_limit=1)
     for _attrs, content in dom_parser2.parse_dom(html, 'span', {'class': 'title_list'}):
         match = dom_parser2.parse_dom(content, 'a', req=['href', 'title'])
         if match:
             attrs = match[0].attrs
             match_url, match_title_year = attrs['href'], attrs['title']
             match_title, match_year = scraper_utils.extra_year(match_title_year)
             if not year or not match_year or year == match_year:
                 result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                 results.append(result)
                 
     return results

Пример #22

0

Показать файл

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        title = re.sub('[^A-Za-z0-9 ]', '', title)
        title = re.sub('\s+', '-', title)
        search_url = scraper_utils.urljoin(self.base_url, '/tag/%s' % (title))
        html = self._http_get(search_url, cache_limit=1)
        for attrs, match_title_year in dom_parser2.parse_dom(html, 'a', {'class': 'top-item'}, req='href'):
            match_url = attrs['href']
            if '-tvshow-' in match_url: continue
            match_title_year = re.sub('</?[^>]*>', '', match_title_year)
            match_title_year = re.sub('^Watch\s*', '', match_title_year)
            match_title, match_year = scraper_utils.extra_year(match_title_year)
            if not year or not match_year or year == match_year:
                result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                results.append(result)

        return results

Пример #23

0

Показать файл

Файл: mintmovies_scraper.py Проект: idaviesfmts/hmdsm.repository

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        html = self._http_get(self.base_url, params={'s': title}, cache_limit=1)
        if re.search('Sorry, but nothing matched', html, re.I): return results
        
        norm_title = scraper_utils.normalize_title(title)
        for _attrs, item in dom_parser2.parse_dom(html, 'li', {'class': 'box-shadow'}):
            for attrs, _content in dom_parser2.parse_dom(item, 'a', req=['href', 'title']):
                match_url, match_title_year = attrs['href'], attrs['title']
                if re.search('S\d{2}E\d{2}', match_title_year): continue  # skip episodes
                if re.search('TV\s*SERIES', match_title_year, re.I): continue  # skip shows
                match_title, match_year = scraper_utils.extra_year(match_title_year)
                if (not year or not match_year or year == match_year) and norm_title in scraper_utils.normalize_title(match_title):
                    result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                    results.append(result)

        return results

Пример #24

0

Показать файл

 def search(self, video_type, title, year, season=''):  # @UnusedVariable
     results = []
     search_url = scraper_utils.urljoin(self.base_url, '/search/')
     search_url = scraper_utils.urljoin(search_url, urllib.quote_plus(title))
     html = self._http_get(search_url, require_debrid=True, cache_limit=8)
     for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'list'}):
         if not dom_parser2.parse_dom(fragment, 'div', {'class': 'lists_titles'}): continue
         for attrs, match_title_year in dom_parser2.parse_dom(fragment, 'a', {'class': 'title'}, req='href'):
             match_url = attrs['href']
             match_title_year = re.sub('</?[^>]*>', '', match_title_year)
             is_show = re.search('\(d{4|-\)', match_title_year)
             if (is_show and video_type == VIDEO_TYPES.MOVIE) or (not is_show and video_type == VIDEO_TYPES.TVSHOW): continue
             
             match_title, match_year = scraper_utils.extra_year(match_title_year)
             if not year or not match_year or year == match_year:
                 result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                 results.append(result)
         
     return results

Пример #25

0

Показать файл

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/search/')
        search_url += urllib.quote_plus(title)
        html = self._http_get(search_url, cache_limit=1)
        for _attrs, fragment in dom_parser2.parse_dom(html, 'div',
                                                      {'class': 'inner'}):
            name = dom_parser2.parse_dom(fragment, 'div', {'class': 'name'})
            if not name: continue

            match = dom_parser2.parse_dom(name[0].content, 'a', req='href')
            if not match: continue

            match_url, match_title_year = match[0].attrs['href'], match[
                0].content
            if 'tv-series' in match_url and video_type == VIDEO_TYPES.MOVIE:
                continue

            match_title_year = re.sub('</?[^>]*>', '', match_title_year)
            match_title_year = re.sub('[Ww]atch\s+[Mm]ovie\s*', '',
                                      match_title_year)
            match_title_year = match_title_year.replace('&#8217;', "'")
            match_title, match_year = scraper_utils.extra_year(
                match_title_year)
            if not match_year:
                year_span = dom_parser2.parse_dom(fragment, 'span',
                                                  {'class': 'year'})
                if year_span:
                    year_text = dom_parser2.parse_dom(year_span[0].content,
                                                      'a')
                    if year_text:
                        match_year = year_text[0].content.strip()

            if not year or not match_year or year == match_year:
                result = {
                    'title': scraper_utils.cleanse_title(match_title),
                    'url': scraper_utils.pathify_url(match_url),
                    'year': match_year
                }
                results.append(result)

        return results

Пример #26

0

Показать файл

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        folders = ['/addons/real-movies/base.xml']
        norm_title = scraper_utils.normalize_title(title)
        for page_url in folders:
            xml_file = os.path.basename(page_url)
            page_url = scraper_utils.urljoin(self.base_url, page_url)
            xml = self._http_get(page_url, require_debrid=True, cache_limit=48)
            new_folders = re.findall('<folder>(.*?)</folder>', xml, re.I)
            if new_folders:
                folders += [folder for folder in new_folders if folder]

            for match in re.finditer('<item>(.*?)</item>', xml,
                                     re.I | re.DOTALL):
                item = match.group(1)
                match_title_year = re.search('<title>(.*?)</title>', item,
                                             re.I)
                match_url = re.search('<link>(.*?)</link>', item, re.I)
                if match_title_year and match_url:
                    match_title_year = match_title_year.group(1)
                    match_url = match_url.group(1)
                    if match_title_year and match_url:
                        match_title, match_year = scraper_utils.extra_year(
                            match_title_year)
                        xml_file = xml_file.replace(' ', '').lower()
                        match_url = 'xml_file=%s&link=%s' % (xml_file,
                                                             match_url)
                        if norm_title in scraper_utils.normalize_title(
                                match_title) and (not year or not match_year
                                                  or year == match_year):
                            if 'format' in XML_META.get(xml_file, {}):
                                match_title += ' (%s)' % (
                                    XML_META[xml_file]['format'])
                            result = {
                                'title':
                                scraper_utils.cleanse_title(match_title),
                                'year': match_year,
                                'url': match_url
                            }
                            results.append(result)

        return results

Пример #27

0

Показать файл

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(
            self.base_url,
            '/search-movies/%s.html' % (urllib.quote_plus(title)))
        html = self._http_get(search_url, cache_limit=8)
        for _attrs, item in dom_parser2.parse_dom(html, 'li',
                                                  {'class': 'item'}):
            match_url = dom_parser2.parse_dom(item, 'a', req='href')
            match_title_year = re.search('onmouseover="([^"]+)', item)
            if match_url and match_title_year:
                match_url = match_url[0].attrs['href']
                match_title_year = match_title_year.group(1)
                match = re.search('<b>(?:<i>)?\s*(.*?)\s*(?:</i>)?</b>',
                                  match_title_year)
                if not match: continue

                match_title, match_year = scraper_utils.extra_year(
                    match.group(1))
                is_season = re.search('season\s+(\d+)', match_title_year, re.I)
                if (is_season and video_type == VIDEO_TYPES.MOVIE) or (
                        not is_season and video_type == VIDEO_TYPES.SEASON):
                    continue

                if video_type == VIDEO_TYPES.MOVIE:
                    if not match_year:
                        match_year = re.search('>Release:\s*(\d{4})',
                                               match_title_year)
                        match_year = match_year.group(1) if match_year else ''
                else:
                    if season and int(season) != int(is_season.group(1)):
                        continue

                if not year or not match_year or year == match_year:
                    result = {
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year,
                        'url': scraper_utils.pathify_url(match_url)
                    }
                    results.append(result)

        return results

Пример #28

0

Показать файл

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        seen_urls = set()
        for page in ['/latest-added/', '/popular-today/', '/most-popular/']:
            url = scraper_utils.urljoin(self.base_url, page)
            html = self._http_get(url, cache_limit=24)
            fragment = dom_parser2.parse_dom(html, 'div', {'class': 'home'})
            if fragment:
                norm_title = scraper_utils.normalize_title(title)
                for attrs, match_title_year in dom_parser2.parse_dom(fragment[0].content, 'a', req='href'):
                    match_url = attrs['href']
                    match_title, match_year = scraper_utils.extra_year(match_title_year)
                    if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
                        match_url = scraper_utils.pathify_url(match_url)
                        if match_url in seen_urls: continue
                        seen_urls.add(match_url)
                        result = {'url': match_url, 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                        results.append(result)

        return results

Пример #29

0

Показать файл

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url,
                                           '/keywords/%s/' % (title))
        html = self._http_get(search_url, cache_limit=4)
        for _attrs, item in dom_parser2.parse_dom(html, 'div',
                                                  {'class': 'movie_about'}):
            match = dom_parser2.parse_dom(item, 'a', req=['href', 'title'])
            if not match: continue

            match_url, match_title, extra = match[0].attrs['href'], match[
                0].attrs['title'], match[0].content
            _match_title, match_year = scraper_utils.extra_year(extra)
            if not year or not match_year or year == match_year:
                result = {
                    'url': scraper_utils.pathify_url(match_url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year
                }
                results.append(result)
        return results

Пример #30

0

Показать файл

Файл: ol_scraper.py Проект: idaviesfmts/hmdsm.repository

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        html = self._http_get(self.base_url, params={'s': title}, cache_limit=8)
        for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'result-item'}):
            match = dom_parser2.parse_dom(item, 'div', {'class': 'title'})
            is_movie = dom_parser2.parse_dom(item, 'span', {'class': 'movies'})
            if not is_movie or not match: return results
            
            match = dom_parser2.parse_dom(match[0].content, 'a', req='href')
            if not match: return results
            
            match_url, match_title_year = match[0].attrs['href'], match[0].content
            match_title, match_year = scraper_utils.extra_year(match_title_year)
            if not match_year:
                match_year = dom_parser2.parse_dom(item, 'span', {'class': 'year'})
                match_year = match_year[0].content if match_year else ''
                
            if not year or not match_year or year == match_year:
                result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                results.append(result)

        return results

Python extra_year примеры использования