Python height_get_quality示例，dsrd_lib.scraper_utils.height_get_quality Python示例

示例#1

0

显示文件

 def __get_links_from_json2(self, url, page_url, video_type):
     sources = {}
     headers = {'Referer': page_url}
     headers.update(XHR)
     html = self._http_get(url, headers=headers, cache_limit=0)
     js_data = scraper_utils.parse_json(html, url)
     try:
         playlist = js_data.get('playlist', [])
         for source in playlist[0].get('sources', []):
             stream_url = source['file']
             if scraper_utils.get_direct_hostname(self,
                                                  stream_url) == 'gvideo':
                 quality = scraper_utils.gv_get_quality(stream_url)
             elif 'label' in source:
                 quality = scraper_utils.height_get_quality(source['label'])
             else:
                 if video_type == VIDEO_TYPES.MOVIE:
                     meta = scraper_utils.parse_movie_link(stream_url)
                 else:
                     meta = scraper_utils.parse_episode_link(stream_url)
                 quality = scraper_utils.height_get_quality(meta['height'])
             sources[stream_url] = {'quality': quality, 'direct': True}
             logger.log(
                 'Adding stream: %s Quality: %s' % (stream_url, quality),
                 log_utils.LOGDEBUG)
     except Exception as e:
         logger.log('Exception during yesmovies extract: %s' % (e),
                    log_utils.LOGDEBUG)
     return sources

示例#2

0

显示文件

    def get_sources(self, video):
        hosters = []
        sources = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.25)
        match = re.search('var\s+view_id\s*=\s*"([^"]+)', html)
        if not match: return hosters
        view_id = match.group(1)
        
        for lang in ['or', 'tr']:
            subs = True if lang == 'tr' else False
            view_data = {'id': view_id, 'tip': 'view', 'dil': lang}
            html = self._http_get(self.ajax_url, data=view_data, headers=XHR, cache_limit=.25)
            html = html.strip()
            html = re.sub(r'\\n|\\t', '', html)
            match = re.search('var\s+sources\s*=\s*(\[.*?\])', html)
            if match:
                raw_data = match.group(1)
                raw_data = raw_data.replace('\\', '')
            else:
                raw_data = html
             
            js_data = scraper_utils.parse_json(raw_data, self.ajax_url)
            if 'data' not in js_data: continue
            
            src = dom_parser2.parse_dom(js_data['data'], 'iframe', req='src')
            if not src: continue
            
            html = self._http_get(src[0].attrs['src'], cache_limit=.25)
            for attrs, _content in dom_parser2.parse_dom(html, 'iframe', req='src'):
                src = attrs['src']
                if not src.startswith('http'): continue
                sources.append({'label': '720p', 'file': src, 'direct': False, 'subs': subs})
            
            sources += [{'file': url, 'subs': subs} for url in scraper_utils.parse_sources_list(self, html).iterkeys()]
            
            if sources: break

        for source in sources:
            direct = source.get('direct', True)
            stream_url = source['file'] + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
            if direct:
                host = scraper_utils.get_direct_hostname(self, stream_url)
                if host == 'gvideo':
                    quality = scraper_utils.gv_get_quality(stream_url)
                elif 'label' in source:
                    quality = scraper_utils.height_get_quality(source['label'])
                else:
                    continue
            else:
                host = urlparse.urlparse(stream_url).hostname
                quality = scraper_utils.height_get_quality(source['label'])
        
            hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct}
            if source.get('subs'): hoster['subs'] = 'Turkish Subtitles'
            hosters.append(hoster)
    
        return hosters

示例#3

0

显示文件

文件： premiumizev2_scraper.py 项目： Lhse44/repository.deallen

 def __get_quality(self, item, video):
     if item.get('width'):
         return scraper_utils.width_get_quality(item['width'])
     elif item.get('height'):
         return scraper_utils.height_get_quality(item['height'])
     elif 'name' in item:
         if video.video_type == VIDEO_TYPES.MOVIE:
             meta = scraper_utils.parse_movie_link(item['name'])
         else:
             meta = scraper_utils.parse_episode_link(item['name'])
         return scraper_utils.height_get_quality(meta['height'])
     else:
         return QUALITIES.HIGH

示例#4

0

显示文件

文件： tvshow_scraper.py 项目： Lhse44/repository.deallen

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, require_debrid=True, cache_limit=.5)
        title = dom_parser2.parse_dom(html, 'title')
        if title:
            title = title[0].content
            title = re.sub('^\[ST\]\s*&#8211;\s*', '', title)
            meta = scraper_utils.parse_episode_link(title)
            page_quality = scraper_utils.height_get_quality(meta['height'])
        else:
            page_quality = QUALITIES.HIGH

        fragment = dom_parser2.parse_dom(html, 'section',
                                         {'class': 'entry-content'})
        if fragment:
            for _attrs, section in dom_parser2.parse_dom(
                    fragment[0].content, 'p'):
                match = re.search('([^<]*)', section)
                meta = scraper_utils.parse_episode_link(match.group(1))
                if meta['episode'] != '-1' or meta['airdate']:
                    section_quality = scraper_utils.height_get_quality(
                        meta['height'])
                else:
                    section_quality = page_quality

                if Q_ORDER[section_quality] < Q_ORDER[page_quality]:
                    quality = section_quality
                else:
                    quality = page_quality

                for attrs, _content in dom_parser2.parse_dom(section,
                                                             'a',
                                                             req='href'):
                    stream_url = attrs['href']
                    host = urlparse.urlparse(stream_url).hostname
                    hoster = {
                        'multi-part': False,
                        'host': host,
                        'class': self,
                        'views': None,
                        'url': stream_url,
                        'rating': None,
                        'quality': quality,
                        'direct': False
                    }
                    hosters.append(hoster)

        return hosters

示例#5

0

显示文件

文件： dizibox_scraper.py 项目： Lhse44/repository.deallen

 def __get_king_links(self, iframe_url):
     hosters = []
     match = re.search('v=(.*)', iframe_url)
     if match:
         data = {'ID': match.group(1)}
         headers = {'Referer': iframe_url}
         headers.update(XHR)
         xhr_url = iframe_url.split('?')[0]
         html = self._http_get(xhr_url, params={'p': 'GetVideoSources'}, data=data, headers=headers, cache_limit=.5)
         js_data = scraper_utils.parse_json(html, xhr_url)
         try:
             for source in js_data['VideoSources']:
                 stream_url = source['file'] + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
                 host = scraper_utils.get_direct_hostname(self, source['file'])
                 label = source.get('label', '')
                 if host == 'gvideo':
                     quality = scraper_utils.gv_get_quality(source['file'])
                 elif re.search('\d+p?', label):
                     quality = scraper_utils.height_get_quality(label)
                 else:
                     quality = QUALITY_MAP.get(label, QUALITIES.HIGH)
                 hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True, 'subs': 'Turkish Subtitles'}
                 hosters.append(hoster)
         except:
             pass
         
     return hosters

示例#6

0

显示文件

 def __get_cloud_links(self, html, page_url, sub):
     hosters = []
     html = html.replace('\\"', '"').replace('\\/', '/')
     match = re.search("dizi_kapak_getir\('([^']+)", html)
     if match:
         ep_id = match.group(1)
         for attrs, _content in dom_parser2.parse_dom(html, 'script', {'data-cfasync': 'false'}, req='src'):
             script_url = attrs['src']
             html = self._http_get(script_url, cache_limit=24)
             match1 = re.search("var\s+kapak_url\s*=\s*'([^']+)", html)
             match2 = re.search("var\s+aCtkp\s*=\s*'([^']+)", html)
             if match1 and match2:
                 link_url = '%s?fileid=%s&access_token=%s' % (match1.group(1), ep_id, match2.group(1))
                 headers = {'Referer': page_url}
                 html = self._http_get(link_url, headers=headers, cache_limit=.5)
                 js_data = scraper_utils.parse_json(html, link_url)
                 for variant in js_data.get('variants', {}):
                     stream_host = random.choice(variant.get('hosts', []))
                     if stream_host:
                         stream_url = stream_host + variant['path'] + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': page_url})
                         if not stream_url.startswith('http'):
                             stream_url = 'http://' + stream_url
                         host = scraper_utils.get_direct_hostname(self, stream_url)
                         if 'width' in variant:
                             quality = scraper_utils.width_get_quality(variant['width'])
                         elif 'height' in variant:
                             quality = scraper_utils.height_get_quality(variant['height'])
                         else:
                             quality = QUALITIES.HIGH
                         hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                         hoster['subs'] = sub
                         hosters.append(hoster)
     return hosters

示例#7

0

显示文件

文件： tvwtvs_scraper.py 项目： Lhse44/repository.deallen

 def __get_gk_links(self, html, page_url):
     sources = {}
     match = re.search('{link\s*:\s*"([^"]+)', html)
     if match:
         data = {'link': match.group(1)}
         url = urlparse.urljoin(self.base_url, LINK_URL)
         headers = {'Referer': page_url}
         html = self._http_get(url,
                               data=data,
                               headers=headers,
                               cache_limit=.25)
         js_data = scraper_utils.parse_json(html, url)
         if 'link' in js_data:
             for link in js_data['link']:
                 if 'type' in link and link[
                         'type'] == 'mp4' and 'link' in link:
                     if self._get_direct_hostname(link['link']) == 'gvideo':
                         quality = scraper_utils.gv_get_quality(
                             link['link'])
                     elif 'label' in link:
                         quality = scraper_utils.height_get_quality(
                             link['label'])
                     else:
                         quality = QUALITIES.HIGH
                     sources[link['link']] = quality
     return sources

示例#8

0

显示文件

文件： heydl_scraper.py 项目： Lhse44/repository.deallen

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=8)
        for attrs, _content in dom_parser2.parse_dom(html, 'a', req='href'):
            stream_url = attrs['href']
            if MOVIE_URL in stream_url:
                meta = scraper_utils.parse_movie_link(stream_url)
                stream_url = scraper_utils.pathify_url(
                    stream_url) + scraper_utils.append_headers(
                        {'User-Agent': scraper_utils.get_ua()})
                quality = scraper_utils.height_get_quality(meta['height'])
                hoster = {
                    'multi-part': False,
                    'host':
                    scraper_utils.get_direct_hostname(self, stream_url),
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'url': stream_url,
                    'direct': True
                }
                if 'format' in meta: hoster['format'] = meta['format']
                hosters.append(hoster)

        return hosters

示例#9

0

显示文件

文件： filmstreaming_scraper.py 项目： Lhse44/repository.deallen

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)
            q_str = dom_parser.parse_dom(html, 'span', {'class': 'calidad\d*'})
            if q_str:
                if q_str[0].upper() == 'COMING SOON':
                    return hosters

                try:
                    quality = scraper_utils.height_get_quality(q_str[0])
                except:
                    quality = QUALITIES.HIGH
            else:
                quality = QUALITIES.HIGH
            fragment = dom_parser.parse_dom(html, 'div', {'id': 'player\d+'})
            if fragment:
                for match in re.finditer('<iframe[^>]+src="([^"]+)',
                                         fragment[0], re.I):
                    stream_url = match.group(1)
                    host = urlparse.urlparse(stream_url).hostname
                    hoster = {
                        'multi-part': False,
                        'url': stream_url,
                        'host': host,
                        'class': self,
                        'quality': quality,
                        'views': None,
                        'rating': None,
                        'direct': False
                    }
                    hosters.append(hoster)
        return hosters

示例#10

0

显示文件

文件： bestmoviez_scraper.py 项目： Lhse44/repository.deallen

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(url, require_debrid=True, cache_limit=.5)
     post = dom_parser2.parse_dom(html, 'div', {'class': 'entry-content'})
     if not post: return hosters
     for match in re.finditer('(?:href="|>)(http(?:s|)://.+?)',
                              post[0].content):
         stream_url = match.group(1)
         if scraper_utils.excluded_link(
                 stream_url) or 'imdb.com' in stream_url:
             continue
         host = urlparse.urlparse(stream_url).hostname
         if video.video_type == VIDEO_TYPES.MOVIE:
             meta = scraper_utils.parse_movie_link(stream_url)
         else:
             meta = scraper_utils.parse_episode_link(stream_url)
         quality = scraper_utils.height_get_quality(meta['height'])
         hoster = {
             'multi-part': False,
             'host': host,
             'class': self,
             'views': None,
             'url': stream_url,
             'rating': None,
             'quality': quality,
             'direct': False
         }
         hosters.append(hoster)
     return hosters

示例#11

0

显示文件

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        headers = {'User-Agent': LOCAL_UA}
        html = self._http_get(url,
                              require_debrid=True,
                              headers=headers,
                              cache_limit=.5)
        for match in re.finditer(
                "<span\s+class='info2'(.*?)(<span\s+class='info|<hr\s*/>)",
                html, re.DOTALL):
            for match2 in re.finditer('href="([^"]+)', match.group(1)):
                stream_url = match2.group(1)
                meta = scraper_utils.parse_episode_link(stream_url)
                quality = scraper_utils.height_get_quality(meta['height'])
                host = urlparse.urlparse(stream_url).hostname
                hoster = {
                    'multi-part': False,
                    'host': host,
                    'class': self,
                    'views': None,
                    'url': stream_url,
                    'rating': None,
                    'quality': quality,
                    'direct': False
                }
                hosters.append(hoster)

        return hosters

示例#12

0

显示文件

文件： scenerls_scraper.py 项目： Lhse44/repository.deallen

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(url, require_debrid=True, cache_limit=.5)
     sources = self.__get_post_links(html)
     for source, value in sources.iteritems():
         if scraper_utils.excluded_link(source): continue
         host = urlparse.urlparse(source).hostname
         if video.video_type == VIDEO_TYPES.MOVIE:
             meta = scraper_utils.parse_movie_link(value['release'])
         else:
             meta = scraper_utils.parse_episode_link(value['release'])
         quality = scraper_utils.height_get_quality(meta['height'])
         hoster = {
             'multi-part': False,
             'host': host,
             'class': self,
             'views': None,
             'url': source,
             'rating': None,
             'quality': quality,
             'direct': False
         }
         if 'format' in meta: hoster['format'] = meta['format']
         hosters.append(hoster)
     return hosters

示例#13

0

显示文件

    def __get_links_from_xml(self, xml, video):
        sources = {}
        try:
            root = ET.fromstring(xml)
            for item in root.findall('.//item'):
                title = item.find('title').text
                for source in item.findall('{http://rss.jwpcdn.com/}source'):
                    stream_url = source.get('file')
                    label = source.get('label')
                    if self._get_direct_hostname(stream_url) == 'gvideo':
                        quality = scraper_utils.gv_get_quality(stream_url)
                    elif label:
                        quality = scraper_utils.height_get_quality(label)
                    else:
                        quality = scraper_utils.blog_get_quality(
                            video, title, '')
                    sources[stream_url] = {'quality': quality, 'direct': True}
                    log_utils.log(
                        'Adding stream: %s Quality: %s' %
                        (stream_url, quality), log_utils.LOGDEBUG)
        except Exception as e:
            log_utils.log('Exception during 123Movies XML Parse: %s' % (e),
                          log_utils.LOGWARNING)

        return sources

示例#14

0

显示文件

文件： rmz_scraper.py 项目： Lhse44/repository.deallen

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, require_debrid=True, cache_limit=.5)
        if video.video_type == VIDEO_TYPES.MOVIE:
            page_url = self.__get_release(html, video)
            if page_url is None: return hosters

            page_url = scraper_utils.urljoin(self.base_url, page_url)
            html = self._http_get(page_url,
                                  require_debrid=True,
                                  cache_limit=.5)

        hevc = False
        for _attrs, content in dom_parser2.parse_dom(
                html, 'span', {'class': 'releaselabel'}):
            if re.search('(hevc|x265)', content, re.I):
                hevc = 'x265'

            match = re.search('(\d+)x(\d+)', content)
            if match:
                _width, height = match.groups()
                quality = scraper_utils.height_get_quality(height)
                break
        else:
            quality = QUALITIES.HIGH

        streams = [
            attrs['href'] for attrs, _content in dom_parser2.parse_dom(
                html, 'a', {'class': 'links'}, req='href')
        ]
        streams += [
            content for _attrs, content in dom_parser2.parse_dom(
                html, 'pre', {'class': 'links'})
        ]
        for stream_url in streams:
            if scraper_utils.excluded_link(stream_url): continue
            host = urlparse.urlparse(stream_url).hostname
            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'views': None,
                'url': stream_url,
                'rating': None,
                'quality': quality,
                'direct': False
            }
            if hevc: hoster['format'] = hevc
            hosters.append(hoster)

        return hosters

示例#15

0

显示文件

文件： rmz_scraper.py 项目： Lhse44/repository.deallen

    def __get_release(self, html, video):
        try:
            select = int(kodi.get_setting('%s-select' % (self.get_name())))
        except:
            select = 0
        ul_id = 'releases' if video.video_type == VIDEO_TYPES.MOVIE else 'episodes'
        fragment = dom_parser2.parse_dom(html, 'ul', {'id': ul_id})
        if fragment:
            best_qorder = 0
            best_page = None
            for _attrs, item in dom_parser2.parse_dom(fragment[0].content,
                                                      'li'):
                match = dom_parser2.parse_dom(item,
                                              'span',
                                              req=['href', 'title'])
                if not match:
                    match = dom_parser2.parse_dom(item,
                                                  'a',
                                                  req=['href', 'title'])
                    if not match: continue

                page_url, release = match[0].attrs['href'], match[0].attrs[
                    'title']
                match = dom_parser2.parse_dom(item, 'span', {'class': 'time'})
                if match and self.__too_old(match[0].content): break

                release = re.sub('^\[[^\]]*\]\s*', '', release)
                if video.video_type == VIDEO_TYPES.MOVIE:
                    meta = scraper_utils.parse_movie_link(release)
                else:
                    if not scraper_utils.release_check(
                            video, release, require_title=False):
                        continue
                    meta = scraper_utils.parse_episode_link(release)

                if select == 0:
                    best_page = page_url
                    break
                else:
                    quality = scraper_utils.height_get_quality(meta['height'])
                    logger.log(
                        'result: |%s|%s|%s|' %
                        (page_url, quality, Q_ORDER[quality]),
                        log_utils.LOGDEBUG)
                    if Q_ORDER[quality] > best_qorder:
                        logger.log(
                            'Setting best as: |%s|%s|%s|' %
                            (page_url, quality, Q_ORDER[quality]),
                            log_utils.LOGDEBUG)
                        best_page = page_url
                        best_qorder = Q_ORDER[quality]

            return best_page

示例#16

0

显示文件

文件： rlshd_scraper.py 项目： Lhse44/repository.deallen

 def __get_post_links(self, html, video):
     sources = {}
     post = dom_parser.parse_dom(html, 'article', {'id': 'post-\d+'})
     if post:
         for fragment in dom_parser.parse_dom(post[0], 'h2'):
             for match in re.finditer('href="([^"]+)', fragment):
                 stream_url = match.group(1)
                 meta = scraper_utils.parse_episode_link(stream_url)
                 release_quality = scraper_utils.height_get_quality(meta['height'])
                 host = urlparse.urlparse(stream_url).hostname
                 quality = scraper_utils.get_quality(video, host, release_quality)
                 sources[stream_url] = quality
     return sources

示例#17

0

显示文件

文件： tvhd_scraper.py 项目： Lhse44/repository.deallen

 def __get_sources(self, video, html):
     sources = {}
     for match in re.finditer(
             '<center>\s*<b>\s*(.*?)\s*</b>.*?<tr>(.*?)</tr>', html,
             re.DOTALL):
         release, links = match.groups()
         release = re.sub('</?[^>]*>', '', release)
         if scraper_utils.release_check(video, release):
             meta = scraper_utils.parse_episode_link(release)
             for match in re.finditer('href="([^"]+)', links):
                 sources[match.group(1)] = scraper_utils.height_get_quality(
                     meta['height'])
     return sources

示例#18

0

显示文件

 def __get_gk_links(self, html, page_url):
     sources = {}
     for link in dom_parser.parse_dom(html, 'div',
                                      {'class': '[^"]*server_line[^"]*'}):
         film_id = dom_parser.parse_dom(link, 'a', ret='data-film')
         name_id = dom_parser.parse_dom(link, 'a', ret='data-name')
         server_id = dom_parser.parse_dom(link, 'a', ret='data-server')
         if film_id and name_id and server_id:
             data = {
                 'ipplugins': 1,
                 'ip_film': film_id[0],
                 'ip_server': server_id[0],
                 'ip_name': name_id[0]
             }
             headers = XHR
             headers['Referer'] = page_url
             url = urlparse.urljoin(self.base_url, LINK_URL)
             html = self._http_get(url,
                                   data=data,
                                   headers=headers,
                                   cache_limit=.25)
             js_data = scraper_utils.parse_json(html, url)
             if 's' in js_data:
                 url = urlparse.urljoin(self.base_url, LINK_URL2)
                 params = {'u': js_data['s'], 'w': '100%', 'h': 420}
                 html = self._http_get(url,
                                       params=params,
                                       data=data,
                                       headers=headers,
                                       cache_limit=.25)
                 js_data = scraper_utils.parse_json(html, url)
                 if 'data' in js_data and js_data['data']:
                     if isinstance(js_data['data'], basestring):
                         sources[js_data['data']] = QUALITIES.HIGH
                     else:
                         for link in js_data['data']:
                             stream_url = link['files']
                             if self._get_direct_hostname(
                                     stream_url) == 'gvideo':
                                 quality = scraper_utils.gv_get_quality(
                                     stream_url)
                             elif 'quality' in link:
                                 quality = scraper_utils.height_get_quality(
                                     link['quality'])
                             else:
                                 quality = QUALITIES.HIGH
                             sources[stream_url] = quality
     return sources

示例#19

0

显示文件

文件： xmovies8_scraper.py 项目： Lhse44/repository.deallen

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=0)
        match = re.search('var\s*video_id\s*=\s*"([^"]+)', html)
        if not match: return hosters

        video_id = match.group(1)
        headers = {'Referer': page_url}
        headers.update(XHR)
        # _html = self._http_get(scraper_utils.urljoin(self.base_url, 'av'), headers=headers, method='POST', cache_limit=0)

        vid_url = scraper_utils.urljoin(self.base_url, VIDEO_URL)
        html = self._http_get(vid_url,
                              data={'v': video_id},
                              headers=headers,
                              cache_limit=0)
        for source, value in scraper_utils.parse_json(html,
                                                      vid_url).iteritems():
            match = re.search('url=(.*)', value)
            if not match: continue
            stream_url = urllib.unquote(match.group(1))

            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(stream_url)
            else:
                quality = scraper_utils.height_get_quality(source)
            stream_url += scraper_utils.append_headers(
                {'User-Agent': scraper_utils.get_ua()})
            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'quality': quality,
                'views': None,
                'rating': None,
                'url': stream_url,
                'direct': True
            }
            hosters.append(hoster)
        return hosters

示例#20

0

显示文件

文件： tvwtvs_scraper.py 项目： Lhse44/repository.deallen

 def __get_iframe_links(self, html, page_url):
     sources = {}
     urls = dom_parser.parse_dom(
         html, 'iframe', ret='data-lazy-src') + dom_parser.parse_dom(
             html, 'iframe', ret='src')
     for iframe_url in urls:
         headers = {'Referer': page_url}
         html = self._http_get(iframe_url, headers=headers, cache_limit=0)
         for match in re.finditer(
                 '"file"\s*:\s*"([^"]+)"\s*,\s*"label"\s*:\s*"([^"]+)',
                 html, re.DOTALL):
             stream_url, height = match.groups()
             stream_url = re.sub('; .*', '', stream_url)
             if self._get_direct_hostname(stream_url) == 'gvideo':
                 quality = scraper_utils.gv_get_quality(stream_url)
             else:
                 quality = scraper_utils.height_get_quality(height)
             sources[stream_url] = quality
     return sources

示例#21

0

显示文件

 def __get_json_links(self, html, sub):
     hosters = []
     js_data = scraper_utils.parse_json(html)
     if 'sources' in js_data:
         for source in js_data.get('sources', []):
             stream_url = source.get('file')
             if stream_url is None: continue
             
             host = scraper_utils.get_direct_hostname(self, stream_url)
             if host == 'gvideo':
                 quality = scraper_utils.gv_get_quality(stream_url)
             elif 'label' in source:
                 quality = scraper_utils.height_get_quality(source['label'])
             else:
                 quality = QUALITIES.HIGH
             hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
             hoster['subs'] = sub
             hosters.append(hoster)
     return hosters

示例#22

0

显示文件

    def __get_ajax(self, html, page_url):
        sources = {}
        pattern = '\$\.ajax\(\s*"([^"]+)'
        match = re.search(pattern, html)
        if not match: return sources

        post_url = match.group(1)
        headers = {'Referer': page_url}
        html = self._http_get(post_url, headers=headers, cache_limit=.5)
        js_result = scraper_utils.parse_json(html, post_url)
        for key in js_result:
            stream_url = js_result[key]
            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(stream_url)
            else:
                quality = scraper_utils.height_get_quality(key)
            sources[stream_url] = quality
        return sources

示例#23

0

显示文件

    def __get_posts(self, html):
        sources = {}
        pattern = '\$\.post\("([^"]+)"\s*,\s*\{(.*?)\}'
        match = re.search(pattern, html)
        if not match: return sources

        post_url, post_data = match.groups()
        data = self.__get_data(post_data)
        html = self._http_get(post_url, data=data, cache_limit=.5)
        js_result = scraper_utils.parse_json(html, post_url)
        for key in js_result:
            stream_url = js_result[key]
            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(stream_url)
            else:
                quality = scraper_utils.height_get_quality(key)
            sources[stream_url] = quality
        return sources

示例#24

0

显示文件

文件： downloadtube_scraper.py 项目： Lhse44/repository.deallen

 def __get_direct(self, html, page_url):
     sources = []
     best_quality = QUALITIES.HIGH
     match = re.search('''['"]?sources["']?\s*:\s*\[(.*?)\}\s*,?\s*\]''', html, re.DOTALL)
     if match:
         files = re.findall('''['"]?file['"]?\s*:\s*(.*?)['"]([^'"]+)''', match.group(1), re.DOTALL)
         labels = re.findall('''['"]?label['"]?\s*:\s*['"]([^'"]*)''', match.group(1), re.DOTALL)
         for stream, label in map(None, files, labels):
             func, stream_url = stream
             if 'atob' in func:
                 stream_url = base64.b64decode(stream_url)
             stream_url = stream_url.replace(' ', '%20')
             host = scraper_utils.get_direct_hostname(self, stream_url)
             label = re.sub(re.compile('\s*HD', re.I), '', label)
             quality = scraper_utils.height_get_quality(label)
             if Q_ORDER[quality] > Q_ORDER[best_quality]: best_quality = quality
             stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': page_url})
             source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True}
             sources.append(source)
     return best_quality, sources

示例#25

0

显示文件

文件： hevcbluray_scraper.py 项目： Lhse44/repository.deallen

    def get_sources(self, video):
        source_url = self.get_url(video)
        sources = []
        if not source_url or source_url == FORCE_NO_MATCH: return sources
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)
        is_3d = False
        page_quality = QUALITIES.HD720
        title = dom_parser2.parse_dom(html, 'title')
        if title:
            title = title[0].content
            match = re.search('(\d{3,})p', title)
            if match:
                page_quality = scraper_utils.height_get_quality(match.group(1))

            is_3d = True if re.search('\s+3D\s+', title) else False

        fragment = dom_parser2.parse_dom(html, 'div', {'class': 'entry'})
        if fragment:
            for _attrs, item in dom_parser2.parse_dom(fragment[0].content,
                                                      'h3'):
                for attrs, _content in dom_parser2.parse_dom(item,
                                                             'a',
                                                             req='href'):
                    stream_url = attrs['href']
                    host = urlparse.urlparse(stream_url).hostname
                    source = {
                        'multi-part': False,
                        'url': stream_url,
                        'host': host,
                        'class': self,
                        'quality': page_quality,
                        'views': None,
                        'rating': None,
                        'direct': False
                    }
                    source['format'] = 'x265'
                    source['3D'] = is_3d
                    sources.append(source)

        return sources

示例#26

0

显示文件

    def __get_linked(self, html):
        sources = {}
        match = re.search('dizi=([^"]+)', html)
        if not match: return sources
        html = self._http_get(AJAX_URL,
                              params={'dizi': match.group(1)},
                              headers=XHR,
                              cache_limit=.5)
        js_result = scraper_utils.parse_json(html, AJAX_URL)
        for source in js_result.get('success', []):
            stream_url = source.get('src')
            if stream_url is None: continue

            if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo':
                quality = scraper_utils.gv_get_quality(stream_url)
            elif 'label' in source:
                quality = scraper_utils.height_get_quality(source['label'])
            else:
                quality = QUALITIES.HIGH
            sources[stream_url] = quality
        return sources

示例#27

0

显示文件

文件： afdahorg_scraper.py 项目： Lhse44/repository.deallen

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if source_url and source_url != FORCE_NO_MATCH:
         page_url = urlparse.urljoin(self.base_url, source_url)
         html = self._http_get(page_url, cache_limit=.5)
         match = re.search('var\s*video_id="([^"]+)', html)
         if match:
             video_id = match.group(1)
             data = {'v': video_id}
             headers = {'Referer': page_url}
             headers.update(XHR)
             html = self._http_get(self.info_url,
                                   data=data,
                                   headers=headers,
                                   cache_limit=0)
             sources = scraper_utils.parse_json(html, self.info_url)
             for source in sources:
                 match = re.search('url=(.*)', sources[source])
                 if match:
                     stream_url = urllib.unquote(match.group(1))
                     host = self._get_direct_hostname(stream_url)
                     if host == 'gvideo':
                         quality = scraper_utils.gv_get_quality(stream_url)
                     else:
                         quality = scraper_utils.height_get_quality(source)
                     stream_url += scraper_utils.append_headers(
                         {'User-Agent': scraper_utils.get_ua()})
                     hoster = {
                         'multi-part': False,
                         'host': host,
                         'class': self,
                         'quality': quality,
                         'views': None,
                         'rating': None,
                         'url': stream_url,
                         'direct': True
                     }
                     hosters.append(hoster)
     return hosters

示例#28

0

显示文件

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if source_url and source_url != FORCE_NO_MATCH:
         url = urlparse.urljoin(self.base_url, source_url)
         html = self._http_get(url, require_debrid=True, cache_limit=.5)
         post = dom_parser.parse_dom(html, 'div', {'class': 'entry-content'})
         if post:
             for p in dom_parser.parse_dom(post[0], 'p'):
                 for match in re.finditer('href="([^"]+)[^>]+>([^<]+)', p):
                     stream_url, q_str = match.groups()
                     if re.search('\.part\.?\d+', q_str, re.I) or '.rar' in q_str or 'sample' in q_str or q_str.endswith('.nfo'): continue
                     host = urlparse.urlparse(stream_url).hostname
                     if video.video_type == VIDEO_TYPES.MOVIE:
                         meta = scraper_utils.parse_movie_link(q_str)
                     else:
                         meta = scraper_utils.parse_episode_link(q_str)
                     quality = scraper_utils.height_get_quality(meta['height'])
                     hoster = {'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'quality': quality, 'direct': False}
                     if 'format' in meta: hoster['format'] = meta['format']
                     hosters.append(hoster)
     return hosters

示例#29

0

显示文件

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, require_debrid=True, cache_limit=.5)
        fragment = dom_parser2.parse_dom(html, 'div', {'class': 'post-cont'})
        if not fragment: return hosters

        match = re.search('<p>\s*<strong>(.*?)<script', fragment[0].content,
                          re.DOTALL)
        if not match: return hosters

        for attrs, _content in dom_parser2.parse_dom(match.group(1),
                                                     'a',
                                                     req='href'):
            stream_url = attrs['href']
            if scraper_utils.excluded_link(stream_url): continue
            if video.video_type == VIDEO_TYPES.MOVIE:
                meta = scraper_utils.parse_movie_link(stream_url)
            else:
                meta = scraper_utils.parse_episode_link(stream_url)

            host = urlparse.urlparse(stream_url).hostname
            quality = scraper_utils.get_quality(
                video, host, scraper_utils.height_get_quality(meta['height']))
            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'views': None,
                'url': stream_url,
                'rating': None,
                'quality': quality,
                'direct': False
            }
            hosters.append(hoster)

        return hosters

示例#30

0

显示文件

文件： rainierland_scraper.py 项目： Lhse44/repository.deallen

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            html = self._http_get(source_url, cache_limit=.5)
            fragment = dom_parser.parse_dom(html, 'div',
                                            {'class': '[^"]*screen[^"]*'})
            if fragment:
                js_src = dom_parser.parse_dom(fragment[0], 'script', ret='src')
                if js_src:
                    js_url = urlparse.urljoin(self.base_url, js_src[0])
                    html = self._http_get(js_url, cache_limit=.5)
                else:
                    html = fragment[0]

                for match in re.finditer('<source[^>]+src="([^"]+)', html):
                    stream_url = match.group(1)
                    host = self._get_direct_hostname(stream_url)
                    if host == 'gvideo':
                        quality = scraper_utils.gv_get_quality(stream_url)
                    else:
                        _, _, height, _ = scraper_utils.parse_movie_link(
                            stream_url)
                        quality = scraper_utils.height_get_quality(height)
                        stream_url += '|User-Agent=%s' % (
                            scraper_utils.get_ua())

                    hoster = {
                        'multi-part': False,
                        'host': host,
                        'class': self,
                        'quality': quality,
                        'views': None,
                        'rating': None,
                        'url': stream_url,
                        'direct': True
                    }
                    hosters.append(hoster)
        return hosters