Python height_get_quality示例，deaths_lib.scraper_utils.height_get_quality Python示例

示例#1

0

显示文件

文件： ymovies_scraper.py 项目： uguer30/Project

 def __get_links_from_json2(self, url, page_url, video_type):
     sources = {}
     headers = {'Referer': page_url}
     headers.update(XHR)
     html = self._http_get(url, headers=headers, cache_limit=0)
     js_data = scraper_utils.parse_json(html, url)
     try:
         playlist = js_data.get('playlist', [])
         for source in playlist[0].get('sources', []):
             stream_url = source['file']
             if scraper_utils.get_direct_hostname(self,
                                                  stream_url) == 'gvideo':
                 quality = scraper_utils.gv_get_quality(stream_url)
             elif 'label' in source:
                 quality = scraper_utils.height_get_quality(source['label'])
             else:
                 if video_type == VIDEO_TYPES.MOVIE:
                     meta = scraper_utils.parse_movie_link(stream_url)
                 else:
                     meta = scraper_utils.parse_episode_link(stream_url)
                 quality = scraper_utils.height_get_quality(meta['height'])
             sources[stream_url] = {'quality': quality, 'direct': True}
             logger.log(
                 'Adding stream: %s Quality: %s' % (stream_url, quality),
                 log_utils.LOGDEBUG)
     except Exception as e:
         logger.log('Exception during ymovies extract: %s' % (e),
                    log_utils.LOGDEBUG)
     return sources

示例#2

0

显示文件

文件： premiumizev2_scraper.py 项目： uguer30/Project

 def __get_quality(self, item, video):
     if item.get('width'):
         return scraper_utils.width_get_quality(item['width'])
     elif item.get('height'):
         return scraper_utils.height_get_quality(item['height'])
     elif 'name' in item:
         if video.video_type == VIDEO_TYPES.MOVIE:
             meta = scraper_utils.parse_movie_link(item['name'])
         else:
             meta = scraper_utils.parse_episode_link(item['name'])
         return scraper_utils.height_get_quality(meta['height'])
     else:
         return QUALITIES.HIGH

示例#3

0

显示文件

文件： tvshow_scraper.py 项目： uguer30/Project

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, require_debrid=True, cache_limit=.5)
        title = dom_parser2.parse_dom(html, 'title')
        if title:
            title = title[0].content
            title = re.sub('^\[ST\]\s*&#8211;\s*', '', title)
            meta = scraper_utils.parse_episode_link(title)
            page_quality = scraper_utils.height_get_quality(meta['height'])
        else:
            page_quality = QUALITIES.HIGH

        fragment = dom_parser2.parse_dom(html, 'section',
                                         {'class': 'entry-content'})
        if fragment:
            for _attrs, section in dom_parser2.parse_dom(
                    fragment[0].content, 'p'):
                match = re.search('([^<]*)', section)
                meta = scraper_utils.parse_episode_link(match.group(1))
                if meta['episode'] != '-1' or meta['airdate']:
                    section_quality = scraper_utils.height_get_quality(
                        meta['height'])
                else:
                    section_quality = page_quality

                if Q_ORDER[section_quality] < Q_ORDER[page_quality]:
                    quality = section_quality
                else:
                    quality = page_quality

                for attrs, _content in dom_parser2.parse_dom(section,
                                                             'a',
                                                             req='href'):
                    stream_url = attrs['href']
                    host = urlparse.urlparse(stream_url).hostname
                    hoster = {
                        'multi-part': False,
                        'host': host,
                        'class': self,
                        'views': None,
                        'url': stream_url,
                        'rating': None,
                        'quality': quality,
                        'direct': False
                    }
                    hosters.append(hoster)

        return hosters

示例#4

0

显示文件

文件： yify_proxy.py 项目： uguer30/Project

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        headers = {
            'User-Agent': scraper_utils.get_ua(),
            'Referer': self.base_url + source_url
        }
        if video.video_type == VIDEO_TYPES.MOVIE:
            meta = scraper_utils.parse_movie_link(source_url)
            stream_url = source_url + scraper_utils.append_headers(headers)
            quality = scraper_utils.height_get_quality(meta['height'])
            hoster = {
                'multi-part': False,
                'host': scraper_utils.get_direct_hostname(self, stream_url),
                'class': self,
                'quality': quality,
                'views': None,
                'rating': None,
                'url': stream_url,
                'direct': True
            }
            if 'format' in meta: hoster['format'] = meta['format']
            hosters.append(hoster)
        else:
            for episode in self.__match_episode(source_url, video):
                meta = scraper_utils.parse_episode_link(episode['title'])
                stream_url = episode['url'] + scraper_utils.append_headers(
                    headers)
                stream_url = stream_url.replace(self.base_url, '')
                quality = scraper_utils.height_get_quality(meta['height'])
                hoster = {
                    'multi-part': False,
                    'host':
                    scraper_utils.get_direct_hostname(self, stream_url),
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'url': stream_url,
                    'direct': True
                }
                if 'format' in meta: hoster['format'] = meta['format']
                if 'size' in episode:
                    hoster['size'] = scraper_utils.format_size(
                        int(episode['size']))
                hosters.append(hoster)

        return hosters

示例#5

0

显示文件

文件： heydl_scraper.py 项目： uguer30/Project

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=8)
        for attrs, _content in dom_parser2.parse_dom(html, 'a', req='href'):
            stream_url = attrs['href']
            if MOVIE_URL in stream_url:
                meta = scraper_utils.parse_movie_link(stream_url)
                stream_url = scraper_utils.pathify_url(
                    stream_url) + scraper_utils.append_headers(
                        {'User-Agent': scraper_utils.get_ua()})
                quality = scraper_utils.height_get_quality(meta['height'])
                hoster = {
                    'multi-part': False,
                    'host':
                    scraper_utils.get_direct_hostname(self, stream_url),
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'url': stream_url,
                    'direct': True
                }
                if 'format' in meta: hoster['format'] = meta['format']
                hosters.append(hoster)

        return hosters

示例#6

0

显示文件

    def __get_gk_links(self, html, url):
        hosters = []
        for match in re.finditer('gkpluginsphp.*?link\s*:\s*"([^"]+)', html):
            data = {'link': match.group(1)}
            headers = XHR
            headers['Referer'] = url
            gk_url = urlparse.urljoin(self.base_url, GK_URL)
            html = self._http_get(gk_url,
                                  data=data,
                                  headers=headers,
                                  cache_limit=.5)
            js_result = scraper_utils.parse_json(html, gk_url)
            if 'link' in js_result and 'func' not in js_result:
                if isinstance(js_result['link'], list):
                    sources = dict((link['link'],
                                    scraper_utils.height_get_quality(
                                        link.get('label', 700)))
                                   for link in js_result['link'])
                else:
                    sources = {js_result['link']: QUALITIES.HD720}

                for source in sources:
                    if source:
                        hoster = {
                            'multi-part': False,
                            'url': source,
                            'class': self,
                            'quality': sources[source],
                            'host': self._get_direct_hostname(source),
                            'rating': None,
                            'views': None,
                            'direct': True
                        }
                        hosters.append(hoster)
        return hosters

示例#7

0

显示文件

文件： 123hub.py 项目： uguer30/Project

    def __get_links_from_xml(self, xml, video):
        sources = {}
        try:
            root = ET.fromstring(xml)
            for item in root.findall('.//item'):
                title = item.find('title').text
                for source in item.findall('{http://rss.jwpcdn.com/}source'):
                    stream_url = source.get('file')
                    label = source.get('label')
                    if self._get_direct_hostname(stream_url) == 'gvideo':
                        quality = scraper_utils.gv_get_quality(stream_url)
                    elif label:
                        quality = scraper_utils.height_get_quality(label)
                    else:
                        quality = scraper_utils.blog_get_quality(
                            video, title, '')
                    sources[stream_url] = {'quality': quality, 'direct': True}
                    log_utils.log(
                        'Adding stream: %s Quality: %s' %
                        (stream_url, quality), log_utils.LOGDEBUG)
        except Exception as e:
            log_utils.log('Exception during 123Movies XML Parse: %s' % (e),
                          log_utils.LOGWARNING)

        return sources

示例#8

0

显示文件

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if source_url and source_url != FORCE_NO_MATCH:
         page_url = urlparse.urljoin(self.base_url, source_url)
         html = self._http_get(page_url, cache_limit=.5)
         match = re.search('var\s*video_id="([^"]+)', html)
         if match:
             video_id = match.group(1)
             data = {'v': video_id}
             headers = {'Referer': page_url}
             headers.update(XHR)
             html = self._http_get(self.info_url, data=data, headers=headers, cache_limit=0)
             sources = scraper_utils.parse_json(html, self.info_url)
             for source in sources:
                 match = re.search('url=(.*)', sources[source])
                 if match:
                     stream_url = urllib.unquote(match.group(1))
                     host = self._get_direct_hostname(stream_url)
                     if host == 'gvideo':
                         quality = scraper_utils.gv_get_quality(stream_url)
                     else:
                         quality = scraper_utils.height_get_quality(source)
                     stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
                     hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                     hosters.append(hoster)
     return hosters

示例#9

0

显示文件

文件： pubfilm_scraper.py 项目： uguer30/Project

 def __get_gk_links(self, link, iframe_url):
     sources = {}
     data = {'link': link}
     headers = XHR
     headers.update({'Referer': iframe_url, 'User-Agent': USER_AGENT})
     html = self._http_get(GK_URL, data=data, headers=headers, cache_limit=.25)
     js_data = scraper_utils.parse_json(html, GK_URL)
     if 'link' in js_data:
         if isinstance(js_data['link'], basestring):
             stream_url = js_data['link']
             if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo':
                 for source in scraper_utils.parse_google(self, stream_url):
                     sources[source] = {'quality': scraper_utils.gv_get_quality(source), 'direct': True}
             else:
                 sources[stream_url] = {'quality': QUALITIES.HIGH, 'direct': False}
         else:
             for link in js_data['link']:
                 stream_url = link['link']
                 if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo':
                     quality = scraper_utils.gv_get_quality(stream_url)
                 elif 'label' in link:
                     quality = scraper_utils.height_get_quality(link['label'])
                 else:
                     quality = QUALITIES.HIGH
                 sources[stream_url] = {'quality': quality, 'direct': True}
     return sources

示例#10

0

显示文件

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        headers = {'User-Agent': LOCAL_UA}
        html = self._http_get(url,
                              require_debrid=True,
                              headers=headers,
                              cache_limit=.5)
        for match in re.finditer(
                "<span\s+class='info2'(.*?)(<span\s+class='info|<hr\s*/>)",
                html, re.DOTALL):
            for match2 in re.finditer('href="([^"]+)', match.group(1)):
                stream_url = match2.group(1)
                meta = scraper_utils.parse_episode_link(stream_url)
                quality = scraper_utils.height_get_quality(meta['height'])
                host = urlparse.urlparse(stream_url).hostname
                hoster = {
                    'multi-part': False,
                    'host': host,
                    'class': self,
                    'views': None,
                    'url': stream_url,
                    'rating': None,
                    'quality': quality,
                    'direct': False
                }
                hosters.append(hoster)

        return hosters

示例#11

0

显示文件

    def __get_links_from_xml(self, url, video, page_url, cookies):
        sources = {}
        try:
            headers = {'Referer': page_url}
            xml = self._http_get(url, cookies=cookies, headers=headers, cache_limit=.5)
            root = ET.fromstring(xml)
            for item in root.findall('.//item'):
                title = item.find('title').text
                if title and title.upper() == 'OOPS!': continue
                for source in item.findall('{http://www2.0123movies.com/ajax/movie_sources/'):
                    stream_url = source.get('file')
                    label = source.get('label')
                    if scraper_utils.get_direct_hostname(self, stream_url) == 'openload':
                        quality = scraper_utils.gv_get_quality(stream_url)
                    elif label:
                        quality = scraper_utils.height_get_quality(label)
                    elif title:
                        quality = scraper_utils.blog_get_quality(video, title, '')
                    else:
                        quality = scraper_utils.blog_get_quality(video, stream_url, '')
                    sources[stream_url] = {'quality': quality, 'direct': True}
                    logger.log('Adding stream: %s Quality: %s' % (stream_url, quality), log_utils.LOGDEBUG)
        except Exception as e:
            logger.log('Exception during 123Movies XML Parse: %s' % (e), log_utils.LOGWARNING)

        return sources

示例#12

0

显示文件

文件： dizilab_scraper.py 项目： uguer30/Project

    def __get_json_links(self, html, sub):
        hosters = []
        js_data = scraper_utils.parse_json(html)
        if 'sources' in js_data:
            for source in js_data.get('sources', []):
                stream_url = source.get('file')
                if stream_url is None: continue

                host = scraper_utils.get_direct_hostname(self, stream_url)
                if host == 'gvideo':
                    quality = scraper_utils.gv_get_quality(stream_url)
                elif 'label' in source:
                    quality = scraper_utils.height_get_quality(source['label'])
                else:
                    quality = QUALITIES.HIGH
                hoster = {
                    'multi-part': False,
                    'host': host,
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'url': stream_url,
                    'direct': True
                }
                hoster['subs'] = sub
                hosters.append(hoster)
        return hosters

示例#13

0

显示文件

文件： tvwtvs_scraper.py 项目： uguer30/Project

 def __get_gk_links(self, html, page_url):
     sources = {}
     match = re.search('{link\s*:\s*"([^"]+)', html)
     if match:
         data = {'link': match.group(1)}
         url = urlparse.urljoin(self.base_url, LINK_URL)
         headers = {'Referer': page_url}
         html = self._http_get(url,
                               data=data,
                               headers=headers,
                               cache_limit=.25)
         js_data = scraper_utils.parse_json(html, url)
         if 'link' in js_data:
             for link in js_data['link']:
                 if 'type' in link and link[
                         'type'] == 'mp4' and 'link' in link:
                     if self._get_direct_hostname(link['link']) == 'gvideo':
                         quality = scraper_utils.gv_get_quality(
                             link['link'])
                     elif 'label' in link:
                         quality = scraper_utils.height_get_quality(
                             link['label'])
                     else:
                         quality = QUALITIES.HIGH
                     sources[link['link']] = quality
     return sources

示例#14

0

显示文件

文件： miradetodo_scraper.py 项目： uguer30/Project

 def __get_gk_links(self, html):
     sources = {}
     match = re.search('{link\s*:\s*"([^"]+)', html)
     if match:
         iframe_url = match.group(1)
         data = {'link': iframe_url}
         headers = {'Referer': iframe_url}
         html = self._http_get(self.gk_url, data=data, headers=headers, cache_limit=.5)
         js_data = scraper_utils.parse_json(html, self.gk_url)
         links = js_data.get('link', [])
         if isinstance(links, basestring):
             links = [{'link': links}]
             
         for link in links:
             stream_url = link['link']
             if scraper_utils.get_direct_hostname(self, stream_url) == 'openload.co':
                 quality = scraper_utils.gv_get_quality(stream_url)
                 direct = True
             elif 'label' in link:
                 quality = scraper_utils.height_get_quality(link['label'])
                 direct = True
             else:
                 quality = QUALITIES.HIGH
                 direct = False
             sources[stream_url] = {'quality': quality, 'direct': direct}
     return sources

示例#15

0

显示文件

文件： bestmoviez_scraper.py 项目： uguer30/Project

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(url, require_debrid=True, cache_limit=.5)
     post = dom_parser2.parse_dom(html, 'div', {'class': 'entry-content'})
     if not post: return hosters
     for match in re.finditer('(?:href="|>)(https?://[^"<]+)',
                              post[0].content):
         stream_url = match.group(1)
         if scraper_utils.excluded_link(
                 stream_url) or 'imdb.com' in stream_url:
             continue
         host = urlparse.urlparse(stream_url).hostname
         if video.video_type == VIDEO_TYPES.MOVIE:
             meta = scraper_utils.parse_movie_link(stream_url)
         else:
             meta = scraper_utils.parse_episode_link(stream_url)
         quality = scraper_utils.height_get_quality(meta['height'])
         hoster = {
             'multi-part': False,
             'host': host,
             'class': self,
             'views': None,
             'url': stream_url,
             'rating': None,
             'quality': quality,
             'direct': False
         }
         hosters.append(hoster)
     return hosters

示例#16

0

显示文件

文件： hevcbluray_scraper.py 项目： uguer30/Project

 def get_sources(self, video):
     source_url = self.get_url(video)
     sources = []
     if not source_url or source_url == FORCE_NO_MATCH: return sources
     url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(url, cache_limit=.5)
     is_3d = False
     page_quality = QUALITIES.HD720
     title = dom_parser2.parse_dom(html, 'title')
     if title:
         title = title[0].content
         match = re.search('(\d{3,})p', title)
         if match:
             page_quality = scraper_utils.height_get_quality(match.group(1))
         
         is_3d = True if re.search('\s+3D\s+', title) else False
     
     fragment = dom_parser2.parse_dom(html, 'div', {'class': 'entry'})
     if fragment:
         for _attrs, item in dom_parser2.parse_dom(fragment[0].content, 'h3'):
             for attrs, _content in dom_parser2.parse_dom(item, 'a', req='href'):
                 stream_url = attrs['href']
                 host = urlparse.urlparse(stream_url).hostname
                 source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': page_quality, 'views': None, 'rating': None, 'direct': False}
                 source['format'] = 'x265'
                 source['3D'] = is_3d
                 sources.append(source)
                 
     return sources

示例#17

0

显示文件

文件： dizilab_scraper.py 项目： Lhse44/repository.deallen

 def __get_cloud_links(self, html, page_url, sub):
     hosters = []
     html = html.replace('\\"', '"').replace('\\/', '/')
     match = re.search("dizi_kapak_getir\('([^']+)", html)
     if match:
         ep_id = match.group(1)
         for attrs, _content in dom_parser2.parse_dom(html, 'script', {'data-cfasync': 'false'}, req='src'):
             script_url = attrs['src']
             html = self._http_get(script_url, cache_limit=24)
             match1 = re.search("var\s+kapak_url\s*=\s*'([^']+)", html)
             match2 = re.search("var\s+aCtkp\s*=\s*'([^']+)", html)
             if match1 and match2:
                 link_url = '%s?fileid=%s&access_token=%s' % (match1.group(1), ep_id, match2.group(1))
                 headers = {'Referer': page_url}
                 html = self._http_get(link_url, headers=headers, cache_limit=.5)
                 js_data = scraper_utils.parse_json(html, link_url)
                 for variant in js_data.get('variants', {}):
                     stream_host = random.choice(variant.get('hosts', []))
                     if stream_host:
                         stream_url = stream_host + variant['path'] + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': page_url})
                         if not stream_url.startswith('http'):
                             stream_url = 'http://' + stream_url
                         host = scraper_utils.get_direct_hostname(self, stream_url)
                         if 'width' in variant:
                             quality = scraper_utils.width_get_quality(variant['width'])
                         elif 'height' in variant:
                             quality = scraper_utils.height_get_quality(variant['height'])
                         else:
                             quality = QUALITIES.HIGH
                         hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                         hoster['subs'] = sub
                         hosters.append(hoster)
     return hosters

示例#18

0

显示文件

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)
            q_str = dom_parser.parse_dom(html, 'span', {'class': 'calidad\d*'})
            if q_str:
                if q_str[0].upper() == 'COMING SOON':
                    return hosters

                try:
                    quality = scraper_utils.height_get_quality(q_str[0])
                except:
                    quality = QUALITIES.HIGH
            else:
                quality = QUALITIES.HIGH
            fragment = dom_parser.parse_dom(html, 'div', {'id': 'player\d+'})
            if fragment:
                for match in re.finditer('<iframe[^>]+src="([^"]+)',
                                         fragment[0], re.I):
                    stream_url = match.group(1)
                    host = urlparse.urlparse(stream_url).hostname
                    hoster = {
                        'multi-part': False,
                        'url': stream_url,
                        'host': host,
                        'class': self,
                        'quality': quality,
                        'views': None,
                        'rating': None,
                        'direct': False
                    }
                    hosters.append(hoster)
        return hosters

示例#19

0

显示文件

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(url, require_debrid=True, cache_limit=.5)
     sources = self.__get_post_links(html)
     for source, value in sources.iteritems():
         if scraper_utils.excluded_link(source): continue
         host = urlparse.urlparse(source).hostname
         if video.video_type == VIDEO_TYPES.MOVIE:
             meta = scraper_utils.parse_movie_link(value['release'])
         else:
             meta = scraper_utils.parse_episode_link(value['release'])
         quality = scraper_utils.height_get_quality(meta['height'])
         hoster = {
             'multi-part': False,
             'host': host,
             'class': self,
             'views': None,
             'url': source,
             'rating': None,
             'quality': quality,
             'direct': False
         }
         if 'format' in meta: hoster['format'] = meta['format']
         hosters.append(hoster)
     return hosters

示例#20

0

显示文件

文件： 9movies_scraper.py 项目： uguer30/Project

    def __grab_links(self, grab_url, query, referer):
        try:
            sources = {}
            query['mobile'] = '0'
            query.update(self.__get_token(query))
            grab_url = grab_url + '?' + urllib.urlencode(query)
            headers = XHR
            headers['Referer'] = referer
            html = self._http_get(grab_url, headers=headers, cache_limit=.5)
            js_data = scraper_utils.parse_json(html, grab_url)
            if 'data' in js_data:
                for link in js_data['data']:
                    stream_url = link['file']
                    if self._get_direct_hostname(stream_url) == 'gvideo':
                        quality = scraper_utils.gv_get_quality(stream_url)
                    elif 'label' in link:
                        quality = scraper_utils.height_get_quality(
                            link['label'])
                    else:
                        quality = QUALITIES.HIGH
                    sources[stream_url] = {'direct': False, 'quality': quality}
        except Exception as e:
            log_utils.log('9Movies Link Parse Error: %s' % (e),
                          log_utils.LOGWARNING)

        return sources

示例#21

0

显示文件

文件： dizimag_scraper.py 项目： Lhse44/repository.deallen

    def __create_source(self,
                        stream_url,
                        height,
                        page_url,
                        subs=False,
                        direct=True):
        if direct:
            stream_url = stream_url.replace('\\/', '/')
            if self.get_name().lower() in stream_url:
                headers = {'Referer': page_url}
                redir_url = self._http_get(stream_url,
                                           headers=headers,
                                           method='HEAD',
                                           allow_redirect=False,
                                           cache_limit=.25)
                if redir_url.startswith('http'):
                    stream_url = redir_url
                    stream_url += scraper_utils.append_headers(
                        {'User-Agent': scraper_utils.get_ua()})
                else:
                    stream_url += scraper_utils.append_headers({
                        'User-Agent':
                        scraper_utils.get_ua(),
                        'Referer':
                        page_url,
                        'Cookie':
                        self._get_stream_cookies()
                    })
            else:
                stream_url += scraper_utils.append_headers({
                    'User-Agent':
                    scraper_utils.get_ua(),
                    'Referer':
                    page_url
                })

            host = scraper_utils.get_direct_hostname(self, stream_url)
        else:
            host = urlparse.urlparse(stream_url).hostname

        if host == 'gvideo':
            quality = scraper_utils.gv_get_quality(stream_url)
        else:
            quality = scraper_utils.height_get_quality(height)

        hoster = {
            'multi-part': False,
            'host': host,
            'class': self,
            'quality': quality,
            'views': None,
            'rating': None,
            'url': stream_url,
            'direct': direct
        }
        if subs: hoster['subs'] = 'Turkish Subtitles'
        return hoster

示例#22

0

显示文件

    def get_sources(self, video):
        hosters = []
        sources = {}
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)
        match = re.search("load_player\('([^']+)", html)
        if not match: return hosters
        
        headers = {'Referer': page_url, 'Server': 'cloudflare-nginx', 'Accept': 'text/html, */*; q=0.01',
                   'Accept-Language': 'en-US,en;q=0.5', 'Accept-Formating': 'application/json, text/javascript', 'Accept-Encoding': 'gzip, deflate'}
        headers.update(XHR)
        params = {'id': match.group(1)}
        player_url = scraper_utils.urljoin(self.base_url, PLAYER_URL)
        html = self._http_get(player_url, params=params, headers=headers, cache_limit=1)
        js_data = scraper_utils.parse_json(html, player_url)
        pl_url = js_data.get('value') or js_data.get('download')
        if not pl_url: return hosters
        
        headers = {'Referer': page_url}
        if pl_url.startswith('//'): pl_url = 'https:' + pl_url
        html = self._http_get(pl_url, headers=headers, allow_redirect=False, cache_limit=0)
        if html.startswith('http'):
            streams = [(html, '')]
        else:
            js_data = scraper_utils.parse_json(html, pl_url)
            try: streams = [(source['file'], source.get('label', '')) for source in js_data['playlist'][0]['sources']]
            except: streams = []
            
        for stream in streams:
            stream_url, label = stream
            if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo':
                sources[stream_url] = {'quality': scraper_utils.gv_get_quality(stream_url), 'direct': True}
            else:
                if label:
                    quality = scraper_utils.height_get_quality(label)
                else:
                    quality = QUALITIES.HIGH
                sources[stream_url] = {'quality': quality, 'direct': False}
                    
        for source, value in sources.iteritems():
            direct = value['direct']
            quality = value['quality']
            if direct:
                host = scraper_utils.get_direct_hostname(self, source)
            else:
                host = urlparse.urlparse(source).hostname

            stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
            hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct}
            hosters.append(hoster)
            
        return hosters

示例#23

0

显示文件

文件： easynews_scraper.py 项目： uguer30/Project

    def __get_links(self, url, video):
        hosters = []
        search_url, params = self.__translate_search(url)
        html = self._http_get(search_url, params=params, cache_limit=.5)
        js_result = scraper_utils.parse_json(html, search_url)
        down_url = js_result.get('downURL')
        dl_farm = js_result.get('dlFarm')
        dl_port = js_result.get('dlPort')
        for item in js_result.get('data', []):
            post_hash, size, post_title, ext, duration = item['0'], item['4'], item['10'], item['11'], item['14']
            checks = [False] * 6
            if not scraper_utils.release_check(video, post_title): checks[0] = True
            if 'alangs' in item and item['alangs'] and 'eng' not in item['alangs']: checks[1] = True
            if re.match('^\d+s', duration) or re.match('^[0-5]m', duration): checks[2] = True
            if 'passwd' in item and item['passwd']: checks[3] = True
            if 'virus' in item and item['virus']: checks[4] = True
            if 'type' in item and item['type'].upper() != 'VIDEO': checks[5] = True
            if any(checks):
                logger.log('EasyNews Post excluded: %s - |%s|' % (checks, item), log_utils.LOGDEBUG)
                continue
            
            stream_url = down_url + urllib.quote('/%s/%s/%s%s/%s%s' % (dl_farm, dl_port, post_hash, ext, post_title, ext))
            stream_url = stream_url + '|Authorization=%s' % (urllib.quote(self.auth))
            host = scraper_utils.get_direct_hostname(self, stream_url)
            quality = None
            if 'width' in item:
                try: width = int(item['width'])
                except: width = 0
                if width:
                    quality = scraper_utils.width_get_quality(width)
            
            if quality is None:
                if video.video_type == VIDEO_TYPES.MOVIE:
                    meta = scraper_utils.parse_movie_link(post_title)
                else:
                    meta = scraper_utils.parse_episode_link(post_title)
                quality = scraper_utils.height_get_quality(meta['height'])
                
            if self.max_bytes:
                match = re.search('([\d.]+)\s+(.*)', size)
                if match:
                    size_bytes = scraper_utils.to_bytes(*match.groups())
                    if size_bytes > self.max_bytes:
                        logger.log('Result skipped, Too big: |%s| - %s (%s) > %s (%s GB)' % (post_title, size_bytes, size, self.max_bytes, self.max_gb))
                        continue

            hoster = {'multi-part': False, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'host': host, 'quality': quality, 'direct': True}
            if any(i for i in ['X265', 'HEVC'] if i in post_title.upper()): hoster['format'] = 'x265'
            if size: hoster['size'] = size
            if post_title: hoster['extra'] = post_title
            hosters.append(hoster)
        return hosters

示例#24

0

显示文件

文件： tvhd_scraper.py 项目： uguer30/Project

 def __get_sources(self, video, html):
     sources = {}
     for match in re.finditer(
             '<center>\s*<b>\s*(.*?)\s*</b>.*?<tr>(.*?)</tr>', html,
             re.DOTALL):
         release, links = match.groups()
         release = re.sub('</?[^>]*>', '', release)
         if scraper_utils.release_check(video, release):
             meta = scraper_utils.parse_episode_link(release)
             for match in re.finditer('href="([^"]+)', links):
                 sources[match.group(1)] = scraper_utils.height_get_quality(
                     meta['height'])
     return sources

示例#25

0

显示文件

    def get_sources(self, video):
        source_url = self.get_url(video)
        sources = []
        if not source_url or source_url == FORCE_NO_MATCH: return sources
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=8)
        for attrs, _content in dom_parser2.parse_dom(html, 'img', req=['data-id', 'data-name']):
            film_id, data_name = attrs['data-id'], attrs['data-name']
            data = {'id': film_id, 'n': data_name}
            server_url = scraper_utils.urljoin(self.base_url, SERVER_URL)
            server_url = server_url % (film_id)
            headers = {'Referer': page_url}
            headers.update(XHR)
            html = self._http_get(server_url, data=data, headers=headers, cache_limit=.5)
            for attrs, _content in dom_parser2.parse_dom(html, 'a', req='data-id'):
                data = {'epid': attrs['data-id']}
                ep_url = scraper_utils.urljoin(self.base_url, EP_URL)
                ep_url = ep_url % (attrs['data-id'])
                headers = {'Referer': page_url}
                headers.update(XHR)
                html = self._http_get(ep_url, data=data, headers=headers, cache_limit=.5)
                js_data = scraper_utils.parse_json(html, ep_url)
                try:
                    links = [r.attrs['src'] for r in dom_parser2.parse_dom(js_data['link']['embed'], 'iframe', req='src')]
                except:
                    try: links = js_data['link']['l']
                    except: links = []
                try: heights = js_data['link']['q']
                except: heights = []
                for stream_url, height in map(None, links, heights):
                    match = re.search('movie_url=(.*)', stream_url)
                    if match:
                        stream_url = match.group(1)
                        
                    host = scraper_utils.get_direct_hostname(self, stream_url)
                    if host == 'gvideo':
                        quality = scraper_utils.gv_get_quality(stream_url)
                        stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': page_url})
                        direct = True
                    else:
                        host = urlparse.urlparse(stream_url).hostname
                        if height:
                            quality = scraper_utils.height_get_quality(height)
                        else:
                            quality = QUALITIES.HD720
                        direct = False
                    source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct}
                    sources.append(source)

        return sources

示例#26

0

显示文件

文件： movytvy_scraper.py 项目： uguer30/Project

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, require_debrid=True, cache_limit=.5)
        fragment = dom_parser2.parse_dom(html, 'table',
                                         {'class': 'links-table'})
        if not fragment: return hosters
        for _attrs, row in dom_parser2.parse_dom(fragment[0].content, 'tr'):
            match = re.search(
                "playVideo\.bind\(.*?'([^']+)(?:[^>]*>){2}(.*?)</td>", row,
                re.DOTALL)
            if not match: continue

            stream_url, release = match.groups()
            if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo':
                sources = scraper_utils.parse_google(self, stream_url)
            else:
                sources = [stream_url]

            for source in sources:
                host = scraper_utils.get_direct_hostname(self, source)
                if host == 'gvideo':
                    quality = scraper_utils.gv_get_quality(source)
                    direct = True
                else:
                    host = urlparse.urlparse(source).hostname
                    if video.video_type == VIDEO_TYPES.MOVIE:
                        meta = scraper_utils.parse_movie_link(release)
                    else:
                        meta = scraper_utils.parse_episode_link(release)
                    base_quality = scraper_utils.height_get_quality(
                        meta['height'])
                    quality = scraper_utils.get_quality(
                        video, host, base_quality)
                    direct = False
                hoster = {
                    'multi-part': False,
                    'host': host,
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'url': source,
                    'direct': direct
                }
                hosters.append(hoster)

        return hosters

示例#27

0

显示文件

 def __get_post_links(self, html, video):
     sources = {}
     post = dom_parser.parse_dom(html, 'article', {'id': 'post-\d+'})
     if post:
         for fragment in dom_parser.parse_dom(post[0], 'h2'):
             for match in re.finditer('href="([^"]+)', fragment):
                 stream_url = match.group(1)
                 meta = scraper_utils.parse_episode_link(stream_url)
                 release_quality = scraper_utils.height_get_quality(
                     meta['height'])
                 host = urlparse.urlparse(stream_url).hostname
                 quality = scraper_utils.get_quality(
                     video, host, release_quality)
                 sources[stream_url] = quality
     return sources

示例#28

0

显示文件

文件： pubfilmto_scraper.py 项目： uguer30/Project

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)
        iframe_url = dom_parser2.parse_dom(html,
                                           'iframe', {'id': 'myiframe'},
                                           req='src',
                                           exclude_comments=True)
        if not iframe_url: return hosters
        iframe_url = iframe_url[0].attrs['src']
        html = self._http_get(iframe_url,
                              headers={'Referer': page_url},
                              cache_limit=.5)

        for source in dom_parser2.parse_dom(html,
                                            'source', {'type': 'video/mp4'},
                                            req=['src', 'data-res']):
            stream_url = source.attrs['src']
            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(stream_url)
                stream_url += scraper_utils.append_headers(
                    {'User-Agent': scraper_utils.get_ua()})
            else:
                quality = scraper_utils.height_get_quality(
                    source.attrs['data-res'])
                stream_url += scraper_utils.append_headers({
                    'User-Agent':
                    scraper_utils.get_ua(),
                    'Referer':
                    page_url
                })

            source = {
                'multi-part': False,
                'url': stream_url,
                'host': host,
                'class': self,
                'quality': quality,
                'views': None,
                'rating': None,
                'direct': True
            }
            hosters.append(source)

        return hosters

示例#29

0

显示文件

文件： tunemovie_scraper.py 项目： Lhse44/repository.deallen

 def __get_gk_links(self, html, page_url):
     sources = {}
     for link in dom_parser.parse_dom(html, 'div',
                                      {'class': '[^"]*server_line[^"]*'}):
         film_id = dom_parser.parse_dom(link, 'a', ret='data-film')
         name_id = dom_parser.parse_dom(link, 'a', ret='data-name')
         server_id = dom_parser.parse_dom(link, 'a', ret='data-server')
         if film_id and name_id and server_id:
             data = {
                 'ipplugins': 1,
                 'ip_film': film_id[0],
                 'ip_server': server_id[0],
                 'ip_name': name_id[0]
             }
             headers = XHR
             headers['Referer'] = page_url
             url = urlparse.urljoin(self.base_url, LINK_URL)
             html = self._http_get(url,
                                   data=data,
                                   headers=headers,
                                   cache_limit=.25)
             js_data = scraper_utils.parse_json(html, url)
             if 's' in js_data:
                 url = urlparse.urljoin(self.base_url, LINK_URL2)
                 params = {'u': js_data['s'], 'w': '100%', 'h': 420}
                 html = self._http_get(url,
                                       params=params,
                                       data=data,
                                       headers=headers,
                                       cache_limit=.25)
                 js_data = scraper_utils.parse_json(html, url)
                 if 'data' in js_data and js_data['data']:
                     if isinstance(js_data['data'], basestring):
                         sources[js_data['data']] = QUALITIES.HIGH
                     else:
                         for link in js_data['data']:
                             stream_url = link['files']
                             if self._get_direct_hostname(
                                     stream_url) == 'gvideo':
                                 quality = scraper_utils.gv_get_quality(
                                     stream_url)
                             elif 'quality' in link:
                                 quality = scraper_utils.height_get_quality(
                                     link['quality'])
                             else:
                                 quality = QUALITIES.HIGH
                             sources[stream_url] = quality
     return sources

示例#30

0

显示文件

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=0)
        match = re.search('var\s*video_id\s*=\s*"([^"]+)', html)
        if not match: return hosters

        video_id = match.group(1)
        headers = {'Referer': page_url}
        headers.update(XHR)
        _html = self._http_get(scraper_utils.urljoin(self.base_url, 'av'),
                               headers=headers,
                               method='POST',
                               cache_limit=0)

        vid_url = scraper_utils.urljoin(self.base_url, VIDEO_URL)
        html = self._http_get(vid_url,
                              data={'v': video_id},
                              headers=headers,
                              cache_limit=0)
        for source, value in scraper_utils.parse_json(html,
                                                      vid_url).iteritems():
            match = re.search('url=(.*)', value)
            if not match: continue
            stream_url = urllib.unquote(match.group(1))

            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(stream_url)
            else:
                quality = scraper_utils.height_get_quality(source)
            stream_url += scraper_utils.append_headers(
                {'User-Agent': scraper_utils.get_ua()})
            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'quality': quality,
                'views': None,
                'rating': None,
                'url': stream_url,
                'direct': True
            }
            hosters.append(hoster)
        return hosters