Python get_quality示例，scraper_utils.get_quality Python示例

示例#1

0

显示文件

 def get_sources(self, video, video_type):
     source_url = self.get_url(video)
     hosters = []
     if source_url and source_url != FORCE_NO_MATCH:
         page_url = urlparse.urljoin(self.base_url, source_url)
         headers = {'Refer': self.base_url}
         html = self._http_get(page_url, headers=headers, cache_limit=.5)
         table = dom_parser.parse_dom(html, 'div', {'class': 'linktable'})
         if table:
             for row in dom_parser.parse_dom(table[0], 'tr'):
                 spans = dom_parser.parse_dom(row, 'span')
                 stream_url = dom_parser.parse_dom(row, 'a', ret='href')
                 is_sponsored = any(
                     [i for i in spans if 'sponsored' in i.lower()])
                 if not is_sponsored and len(spans) > 1 and stream_url:
                     host, rating = spans[0], spans[1]
                     stream_url = stream_url[0]
                     quality = scraper_utils.get_quality(
                         video, host, QUALITIES.HIGH)
                     hoster = {
                         'multi-part': False,
                         'host': host,
                         'class': self,
                         'quality': quality,
                         'views': None,
                         'rating': None,
                         'url': stream_url,
                         'direct': False
                     }
                     if 'rating'.endswith('%') and rating[:-1].isdigit():
                         hoster['rating'] = rating[:-1]
                     hosters.append(hoster)
     return hosters

示例#2

0

显示文件

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)

            match = re.search('This movie is of poor quality', html, re.I)
            if match:
                quality = QUALITIES.LOW
            else:
                quality = QUALITIES.HIGH

            for match in re.finditer('href="([^"]+/embed\d*/[^"]+)', html):
                url = match.group(1)
                embed_html = self._http_get(url, cache_limit=.5)
                hosters += self.__get_links(embed_html)

            pattern = 'href="([^"]+)[^>]*>\s*<[^>]+play_video.gif'
            for match in re.finditer(pattern, html, re.I):
                stream_url = match.group(1)
                host = urlparse.urlparse(stream_url).hostname
                quality = scraper_utils.get_quality(video, host, quality)
                hoster = {
                    'multi-part': False,
                    'url': stream_url,
                    'host': host,
                    'class': self,
                    'quality': quality,
                    'rating': None,
                    'views': None,
                    'direct': False
                }
                hosters.append(hoster)
        return hosters

示例#3

0

显示文件

文件： iwatchonline.py 项目： krzysztofuu/Bonitillonew

    def get_sources(self, video, video_type):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)

            fragment = dom_parser.parse_dom(html, 'table',
                                            {'id': 'streamlinks'})
            if fragment:
                max_age = 0
                now = min_age = int(time.time())
                for row in dom_parser.parse_dom(fragment[0], 'tr',
                                                {'id': 'pt\d+'}):
                    if video_type == 'movies':
                        pattern = 'href="([^"]+).*?/>([^<]+).*?(?:<td>.*?</td>\s*){1}<td>(.*?)</td>\s*<td>(.*?)</td>'
                    else:
                        pattern = 'href="([^"]+).*?/>([^<]+).*?(<span class="linkdate">.*?)</td>\s*<td>(.*?)</td>'
                    match = re.search(pattern, row, re.DOTALL)
                    if match:
                        url, host, age, quality = match.groups()
                        age = self.__get_age(now, age)
                        quality = quality.upper()
                        if age > max_age: max_age = age
                        if age < min_age: min_age = age
                        host = host.strip()
                        hoster = {
                            'hostname': 'iWatchOnline',
                            'multi-part': False,
                            'class': '',
                            'url': self.resolve_link(url),
                            'host': host,
                            'age': age,
                            'views': None,
                            'rating': None,
                            'direct': False
                        }
                        hoster['quality'] = scraper_utils.get_quality(
                            video, host,
                            QUALITY_MAP.get(quality, QUALITIES.HIGH))
                        hosters.append(hoster)

                unit = (max_age - min_age) / 100
                if unit > 0:
                    for hoster in hosters:
                        hoster['rating'] = (hoster['age'] - min_age) / unit

        main_scrape.apply_urlresolver(hosters)
        return hosters

示例#4

0

显示文件

文件： watchepisodes.py 项目： ItsMYZTIK/tdbaddon

    def get_sources(self, video, video_type):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            page_url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(page_url, cache_limit=.25)
            for link in dom_parser.parse_dom(html, 'div',
                                             {'class': '[^"]*ldr-item[^"]*'}):
                stream_url = dom_parser.parse_dom(link,
                                                  'a',
                                                  ret='data-actuallink')

                views = None
                watched = dom_parser.parse_dom(link, 'div',
                                               {'class': 'click-count'})
                if watched:
                    match = re.search(' (\d+) ', watched[0])
                    if match:
                        views = match.group(1)

                score = dom_parser.parse_dom(link, 'div',
                                             {'class': '\s*point\s*'})
                if score:
                    score = int(score[0])
                    rating = score * 10 if score else None

                if stream_url:
                    stream_url = stream_url[0].strip()
                    host = urlparse.urlparse(stream_url).hostname
                    quality = scraper_utils.get_quality(
                        video, host, QUALITIES.HIGH)
                    #source = {'hostname': 'IceFilms', 'multi-part': False, 'quality': quality, 'class': '','version': label,'rating': None, 'views': None, 'direct': False}
                    hoster = {
                        'hostname': 'WatchEpisodes',
                        'multi-part': False,
                        'host': host,
                        'class': self,
                        'quality': quality,
                        'views': views,
                        'rating': rating,
                        'url': stream_url,
                        'direct': False
                    }
                    hosters.append(hoster)
        main_scrape.apply_urlresolver(hosters)
        return hosters

示例#5

0

显示文件

文件： afdah_scraper.py 项目： krzysztofuu/Bonitillonew

    def get_sources(self, video):
        #kodi.log(video.url)
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)

            match = re.search('This movie is of poor quality', html, re.I)
            if match:
                quality = QUALITIES.LOW
            else:
                quality = QUALITIES.HIGH

            for match in re.finditer('href="([^"]+/embed\d*/[^"]+)', html):
                url = match.group(1)
                embed_html = self._http_get(url, cache_limit=.5)
                r = re.search('{\s*write\("([^"]+)', embed_html)
                if r:
                    plaintext = self._caesar(r.group(1), 13).decode('base-64')
                    if 'http' not in plaintext:
                        plaintext = self._caesar(
                            r.group(1).decode('base-64'), 13).decode('base-64')
                else:
                    plaintext = embed_html
                hosters += self._get_links(plaintext)

            pattern = 'href="([^"]+)"[^>]*><[^>]+play_video.gif'
            for match in re.finditer(pattern, html, re.I):
                url = match.group(1)
                host = urlparse.urlparse(url).hostname
                hoster = {
                    'hostname': 'Afdah',
                    'multi-part': False,
                    'url': url,
                    'host': host,
                    'class': '',
                    'quality': scraper_utils.get_quality(video, host, quality),
                    'rating': None,
                    'views': None,
                    'direct': False
                }
                hosters.append(hoster)
                main_scrape.apply_urlresolver(hosters)
        return hosters

示例#6

0

显示文件

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            page_url = urlparse.urljoin(self.base_url, source_url)
            headers = {'Referer': ''}
            html = self._http_get(page_url, headers=headers, cache_limit=.5)
            page_links = []
            for iframe_url in dom_parser.parse_dom(html, 'iframe', ret='src'):
                if 'youtube' not in iframe_url:
                    host = urlparse.urlparse(iframe_url).hostname
                    page_links.append((iframe_url, 'embedded', host))

            page_links += re.findall(
                '<a[^>]+href="([^"]+)[^>]+>(Version \d+)</a>([^<]+)', html)

            for stream_url, version, host in page_links:
                if not stream_url.startswith('http'):
                    url = source_url + stream_url
                    host = host.replace('&nbsp;', '')
                else:
                    url = stream_url
                    host = urlparse.urlparse(stream_url).hostname

                base_quality = QUALITIES.HD720 if version == 'embedded' else QUALITIES.HIGH
                hoster = {
                    'hostname': 'Putlocker',
                    'multi-part': False,
                    'host': host,
                    'class': self,
                    'quality':
                    scraper_utils.get_quality(video, host, base_quality),
                    'views': None,
                    'rating': None,
                    'url': url,
                    'direct': False
                }
                hoster['version'] = '(%s)' % (version)
                hosters.append(hoster)

        fullsource = main_scrape.apply_urlresolver(hosters)
        return fullsource

示例#7

0

显示文件

文件： putlocker_both.py 项目： ItsMYZTIK/tdbaddon

 def get_sources(self, video, video_type):
     source_url = self.get_url(video)
     hosters = []
     if source_url and source_url != FORCE_NO_MATCH:
         page_url = urlparse.urljoin(self.base_url, source_url)
         html = self._http_get(page_url, cache_limit=.5)
         fragment = dom_parser.parse_dom(html, 'div',
                                         {'class': 'alternativesc'})
         if fragment:
             for item in dom_parser.parse_dom(fragment[0], 'div',
                                              {'class': 'altercolumn'}):
                 link = dom_parser.parse_dom(item,
                                             'a',
                                             {'class': 'altercolumnlink'},
                                             ret='href')
                 host = dom_parser.parse_dom(item, 'span')
                 if link and host:
                     link = link[0]
                     if not link.startswith('http'):
                         link = source_url + link
                     host = host[0]
                     quality = scraper_utils.get_quality(
                         video, host, QUALITIES.HIGH)
                     hoster = {
                         'hostname': 'PutLocker',
                         'multi-part': False,
                         'host': host,
                         'class': '',
                         'quality': quality,
                         'views': None,
                         'rating': None,
                         'url': link,
                         'direct': False
                     }
                     hosters.append(hoster)
     main_scrape.apply_urlresolver(hosters)
     return hosters