Python append_headers示例，salts_lib.scraper_utils.append_headers Python示例

示例#1

0

显示文件

文件： dizimag_scraper.py 项目： enursha101/xbmc-addon

    def __create_source(self,
                        stream_url,
                        height,
                        page_url,
                        subs=False,
                        direct=True):
        if direct:
            stream_url = stream_url.replace('\\/', '/')
            if self.get_name().lower() in stream_url:
                headers = {'Referer': page_url}
                redir_url = self._http_get(stream_url,
                                           headers=headers,
                                           method='HEAD',
                                           allow_redirect=False,
                                           cache_limit=.25)
                if redir_url.startswith('http'):
                    stream_url = redir_url
                    stream_url += scraper_utils.append_headers(
                        {'User-Agent': scraper_utils.get_ua()})
                else:
                    stream_url += scraper_utils.append_headers({
                        'User-Agent':
                        scraper_utils.get_ua(),
                        'Referer':
                        page_url,
                        'Cookie':
                        self._get_stream_cookies()
                    })
            else:
                stream_url += scraper_utils.append_headers({
                    'User-Agent':
                    scraper_utils.get_ua(),
                    'Referer':
                    page_url
                })

            host = scraper_utils.get_direct_hostname(self, stream_url)
        else:
            host = urlparse.urlparse(stream_url).hostname

        if host == 'gvideo':
            quality = scraper_utils.gv_get_quality(stream_url)
        else:
            quality = scraper_utils.height_get_quality(height)

        hoster = {
            'multi-part': False,
            'host': host,
            'class': self,
            'quality': quality,
            'views': None,
            'rating': None,
            'url': stream_url,
            'direct': direct
        }
        if subs: hoster['subs'] = 'Turkish Subtitles'
        return hoster

示例#2

0

显示文件

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        headers = {
            'User-Agent': scraper_utils.get_ua(),
            'Referer': self.base_url + source_url
        }
        if video.video_type == VIDEO_TYPES.MOVIE:
            meta = scraper_utils.parse_movie_link(source_url)
            stream_url = source_url + scraper_utils.append_headers(headers)
            quality = scraper_utils.height_get_quality(meta['height'])
            hoster = {
                'multi-part': False,
                'host': scraper_utils.get_direct_hostname(self, stream_url),
                'class': self,
                'quality': quality,
                'views': None,
                'rating': None,
                'url': stream_url,
                'direct': True
            }
            if 'format' in meta: hoster['format'] = meta['format']
            hosters.append(hoster)
        else:
            for episode in self.__match_episode(source_url, video):
                meta = scraper_utils.parse_episode_link(episode['title'])
                stream_url = episode['url'] + scraper_utils.append_headers(
                    headers)
                stream_url = stream_url.replace(self.base_url, '')
                quality = scraper_utils.height_get_quality(meta['height'])
                hoster = {
                    'multi-part': False,
                    'host':
                    scraper_utils.get_direct_hostname(self, stream_url),
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'url': stream_url,
                    'direct': True
                }
                if 'format' in meta: hoster['format'] = meta['format']
                if 'size' in episode:
                    hoster['size'] = scraper_utils.format_size(
                        int(episode['size']))
                hosters.append(hoster)

        return hosters

示例#3

0

显示文件

文件： pubfilmto_scraper.py 项目： enursha101/xbmc-addon

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)
        iframe_url = dom_parser2.parse_dom(html,
                                           'iframe', {'id': 'myiframe'},
                                           req='src',
                                           exclude_comments=True)
        if not iframe_url: return hosters
        iframe_url = iframe_url[0].attrs['src']
        html = self._http_get(iframe_url,
                              headers={'Referer': page_url},
                              cache_limit=.5)

        for source in dom_parser2.parse_dom(html,
                                            'source', {'type': 'video/mp4'},
                                            req=['src', 'data-res']):
            stream_url = source.attrs['src']
            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(stream_url)
                stream_url += scraper_utils.append_headers(
                    {'User-Agent': scraper_utils.get_ua()})
            else:
                quality = scraper_utils.height_get_quality(
                    source.attrs['data-res'])
                stream_url += scraper_utils.append_headers({
                    'User-Agent':
                    scraper_utils.get_ua(),
                    'Referer':
                    page_url
                })

            source = {
                'multi-part': False,
                'url': stream_url,
                'host': host,
                'class': self,
                'quality': quality,
                'views': None,
                'rating': None,
                'direct': True
            }
            hosters.append(source)

        return hosters

示例#4

0

显示文件

文件： xmovies8_scraper.py 项目： CYBERxNUKE/xbmc-addon

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=0)
        match = re.search('var\s*video_id\s*=\s*"([^"]+)', html)
        if not match: return hosters
        
        video_id = match.group(1)
        headers = {'Referer': page_url}
        headers.update(XHR)
        _html = self._http_get(scraper_utils.urljoin(self.base_url, 'av'), headers=headers, method='POST', cache_limit=0)
        
        vid_url = scraper_utils.urljoin(self.base_url, VIDEO_URL)
        html = self._http_get(vid_url, data={'v': video_id}, headers=headers, cache_limit=0)
        for source, value in scraper_utils.parse_json(html, vid_url).iteritems():
            match = re.search('url=(.*)', value)
            if not match: continue
            stream_url = urllib.unquote(match.group(1))

            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(stream_url)
            else:
                quality = scraper_utils.height_get_quality(source)
            stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
            hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
            hosters.append(hoster)
        return hosters

示例#5

0

显示文件

文件： seriesonline_scraper.py 项目： CYBERxNUKE/xbmc-addon

    def get_sources(self, video):
        hosters = []
        sources = {}
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)
        for match in re.finditer('player-data="([^"]+)[^>]+episode-data="([^"]+)[^>]*>(.*?)</a>', html, re.DOTALL):
            player_url, ep_id, label = match.groups()
            if video.video_type == VIDEO_TYPES.EPISODE and not self.__episode_match(video, ep_id):
                continue
            label = label.strip()
            headers = {'Referer': page_url}
            if re.match('https?://embed', player_url):
                src_html = self._http_get(player_url, headers=headers, cache_limit=.5)
                sources.update(scraper_utils.parse_sources_list(self, src_html))
                sources.update(self.__get_sources(src_html, label))
            else:
                sources[player_url] = {'direct': False, 'quality': Q_MAP.get(label.upper(), QUALITIES.HIGH)}
                    
        for source, value in sources.iteritems():
            direct = value['direct']
            quality = value['quality']
            if direct:
                host = scraper_utils.get_direct_hostname(self, source)
                stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
            else:
                host = urlparse.urlparse(source).hostname
                stream_url = source

            hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct}
            hosters.append(hoster)
            
        return hosters

示例#6

0

显示文件

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=8)
        fragment = dom_parser2.parse_dom(html, 'div', {'class': 'playex'})
        if fragment: html = fragment[0].content
        links = scraper_utils.parse_sources_list(self, html)
        for link in links:
            stream_url = link
            if self.base_url in link:
                redir_url = self._http_get(link, headers={'Referer': url}, allow_redirect=False, method='HEAD')
                if redir_url.startswith('http'):
                    stream_url = redir_url
            
            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(stream_url)
            else:
                quality = links[link]['quality']
                stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': url})
                
            source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True}
            hosters.append(source)

        return hosters

示例#7

0

显示文件

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=8)
        for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'movieplay'}):
            iframe_src = dom_parser2.parse_dom(fragment, 'iframe', req='src')
            if iframe_src:
                iframe_src = iframe_src[0].attrs['src']
                if re.search('o(pen)?load', iframe_src, re.I):
                    meta = scraper_utils.parse_movie_link(iframe_src)
                    quality = scraper_utils.height_get_quality(meta['height'])
                    links = {iframe_src: {'quality': quality, 'direct': False}}
                else:
                    links = self.__get_links(iframe_src, url)

                for link in links:
                    direct = links[link]['direct']
                    quality = links[link]['quality']
                    if direct:
                        host = scraper_utils.get_direct_hostname(self, link)
                        if host == 'gvideo':
                            quality = scraper_utils.gv_get_quality(link)
                        stream_url = link + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': url})
                    else:
                        host = urlparse.urlparse(link).hostname
                        stream_url = link
                        
                    source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct}
                    hosters.append(source)

        return hosters

示例#8

0

显示文件

文件： ol_scraper.py 项目： CYBERxNUKE/xbmc-addon

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=8)
        fragment = dom_parser2.parse_dom(html, 'div', {'class': 'playex'})
        if fragment: html = fragment[0].content
        links = scraper_utils.parse_sources_list(self, html)
        for link in links:
            stream_url = link
            if self.base_url in link:
                redir_url = self._http_get(link, headers={'Referer': url}, allow_redirect=False, method='HEAD')
                if redir_url.startswith('http'):
                    stream_url = redir_url
            
            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(stream_url)
            else:
                quality = links[link]['quality']
                stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': url})
                
            source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True}
            hosters.append(source)

        return hosters

示例#9

0

显示文件

文件： fmovie_scraper.py 项目： CYBERxNUKE/xbmc-addon

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     page_url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(page_url, cache_limit=.5)
     match = re.search('var\s*video_id="([^"]+)', html)
     if not match: return hosters
     
     video_id = match.group(1)
     data = {'v': video_id}
     headers = {'Referer': page_url}
     headers.update(XHR)
     html = self._http_get(INFO_URL, data=data, headers=headers, cache_limit=.5)
     sources = scraper_utils.parse_json(html, INFO_URL)
     for source in sources:
         match = re.search('url=(.*)', sources[source])
         if not match: continue
         
         stream_url = urllib.unquote(match.group(1))
         host = scraper_utils.get_direct_hostname(self, stream_url)
         if host == 'gvideo':
             quality = scraper_utils.gv_get_quality(stream_url)
         else:
             quality = scraper_utils.height_get_quality(source)
         stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
         hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
         hosters.append(hoster)
     return hosters

示例#10

0

显示文件

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=8)
        fragment = dom_parser2.parse_dom(html, 'div', {'class': 'playex'})
        if fragment: html = fragment[0].content
        iframe_url = dom_parser2.parse_dom(html, 'iframe', req='src')
        if not iframe_url: return hosters
        iframe_url = iframe_url[0].attrs['src']
        if iframe_url.startswith('/'):
            iframe_url = scraper_utils.urljoin(self.base_url, iframe_url)
        html = self._http_get(iframe_url, headers={'Referer': page_url}, cache_limit=.5)
        obj = dom_parser2.parse_dom(html, 'object', req='data')
        if obj:
            streams = dict((stream_url, {'quality': scraper_utils.gv_get_quality(stream_url), 'direct': True}) for stream_url in
                           scraper_utils.parse_google(self, obj[0].attrs['data']))
        else:
            streams = scraper_utils.parse_sources_list(self, html)
            
        for stream_url, values in streams.iteritems():
            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(stream_url)
            else:
                quality = values['quality']
                stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': page_url})
                 
            source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True}
            hosters.append(source)

        return hosters

示例#11

0

显示文件

文件： afdah_scraper.py 项目： normico21/repository.xvbmc

 def __get_links(self, html):
     hosters = []
     r = re.search('tlas\("([^"]+)', html)
     if r:
         plaintext = self.__caesar(
             self.__get_f(self.__caesar(r.group(1), 13)), 13)
         sources = scraper_utils.parse_sources_list(self, plaintext)
         for source in sources:
             stream_url = source + scraper_utils.append_headers(
                 {
                     'User-Agent': scraper_utils.get_ua(),
                     'Cookie': self._get_stream_cookies()
                 })
             host = scraper_utils.get_direct_hostname(self, stream_url)
             hoster = {
                 'multi-part': False,
                 'url': stream_url,
                 'host': host,
                 'class': self,
                 'quality': sources[source]['quality'],
                 'rating': None,
                 'views': None,
                 'direct': True
             }
             hosters.append(hoster)
     return hosters

示例#12

0

显示文件

文件： snagfilms_scraper.py 项目： CYBERxNUKE/xbmc-addon

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     page_url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(page_url, cache_limit=.5)
     fragment = dom_parser2.parse_dom(html, 'div', {'class': 'film-container'})
     if fragment:
         iframe_url = dom_parser2.parse_dom(fragment[0].content, 'iframe', req='src')
         if iframe_url:
             iframe_url = scraper_utils.urljoin(self.base_url, iframe_url[0].attrs['src'])
             headers = {'Referer': page_url}
             html = self._http_get(iframe_url, headers=headers, cache_limit=.5)
             sources = scraper_utils.parse_sources_list(self, html)
             for source in sources:
                 quality = sources[source]['quality']
                 host = scraper_utils.get_direct_hostname(self, source)
                 stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': iframe_url})
                 hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                 match = re.search('(\d+[a-z]bps)', source)
                 if match:
                     hoster['extra'] = match.group(1)
                 hosters.append(hoster)
                     
     hosters.sort(key=lambda x: x.get('extra', ''), reverse=True)
     return hosters

示例#13

0

显示文件

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=8)
        for attrs, _content in dom_parser2.parse_dom(html, 'a', req='href'):
            stream_url = attrs['href']
            if MOVIE_URL in stream_url:
                meta = scraper_utils.parse_movie_link(stream_url)
                stream_url = scraper_utils.pathify_url(
                    stream_url) + scraper_utils.append_headers(
                        {'User-Agent': scraper_utils.get_ua()})
                quality = scraper_utils.height_get_quality(meta['height'])
                hoster = {
                    'multi-part': False,
                    'host':
                    scraper_utils.get_direct_hostname(self, stream_url),
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'url': stream_url,
                    'direct': True
                }
                if 'format' in meta: hoster['format'] = meta['format']
                hosters.append(hoster)

        return hosters

示例#14

0

显示文件

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=0)
        match = re.search('var\s*video_id\s*=\s*"([^"]+)', html)
        if not match: return hosters
        
        video_id = match.group(1)
        headers = {'Referer': page_url}
        headers.update(XHR)
        _html = self._http_get(scraper_utils.urljoin(self.base_url, 'av'), headers=headers, method='POST', cache_limit=0)
        
        vid_url = scraper_utils.urljoin(self.base_url, VIDEO_URL)
        html = self._http_get(vid_url, data={'v': video_id}, headers=headers, cache_limit=0)
        for source, value in scraper_utils.parse_json(html, vid_url).iteritems():
            match = re.search('url=(.*)', value)
            if not match: continue
            stream_url = urllib.unquote(match.group(1))

            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(stream_url)
            else:
                quality = scraper_utils.height_get_quality(source)
            stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
            hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
            hosters.append(hoster)
        return hosters

示例#15

0

显示文件

文件： quikr_scraper.py 项目： CYBERxNUKE/xbmc-addon

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)
        page_quality = QUALITIES.HD720 if video.video_type == VIDEO_TYPES.MOVIE else QUALITIES.HIGH
        for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'embed-responsive'}):
            iframe_url = dom_parser2.parse_dom(fragment, 'iframe', req='data-src')
            if iframe_url:
                iframe_url = iframe_url[0].attrs['data-src']
                iframe_host = urlparse.urlparse(iframe_url).hostname
                if iframe_host in DIRECT_HOSTS:
                    sources = self.__parse_streams(iframe_url, url)
                else:
                    sources = {iframe_url: {'quality': scraper_utils.get_quality(video, iframe_host, page_quality), 'direct': False}}
            
            for source in sources:
                quality = sources[source]['quality']
                direct = sources[source]['direct']
                if direct:
                    host = scraper_utils.get_direct_hostname(self, source)
                    stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
                else:
                    host = urlparse.urlparse(source).hostname
                    stream_url = source
                
                hoster = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct}
                hosters.append(hoster)

        return hosters

示例#16

0

显示文件

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)
        sources = self.__get_posts(html)
        sources.update(self.__get_ajax(html, url))
        sources.update(self.__get_embedded(html, url))
        for source in sources:
            stream_url = source + scraper_utils.append_headers(
                {'User-Agent': scraper_utils.get_ua()})
            host = scraper_utils.get_direct_hostname(self, source)
            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'quality': sources[source],
                'views': None,
                'rating': None,
                'url': stream_url,
                'direct': True,
                'subs': 'Turkish subtitles'
            }
            hosters.append(hoster)

        return hosters

示例#17

0

显示文件

    def get_sources(self, video):
        hosters = []
        sources = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.25)
        match = re.search('var\s+view_id\s*=\s*"([^"]+)', html)
        if not match: return hosters
        view_id = match.group(1)
        
        for lang in ['or', 'tr']:
            subs = True if lang == 'tr' else False
            view_data = {'id': view_id, 'tip': 'view', 'dil': lang}
            html = self._http_get(self.ajax_url, data=view_data, headers=XHR, cache_limit=.25)
            html = html.strip()
            html = re.sub(r'\\n|\\t', '', html)
            match = re.search('var\s+sources\s*=\s*(\[.*?\])', html)
            if match:
                raw_data = match.group(1)
                raw_data = raw_data.replace('\\', '')
            else:
                raw_data = html
             
            js_data = scraper_utils.parse_json(raw_data, self.ajax_url)
            if 'data' not in js_data: continue
            
            src = dom_parser2.parse_dom(js_data['data'], 'iframe', req='src')
            if not src: continue
            
            html = self._http_get(src[0].attrs['src'], cache_limit=.25)
            for attrs, _content in dom_parser2.parse_dom(html, 'iframe', req='src'):
                src = attrs['src']
                if not src.startswith('http'): continue
                sources.append({'label': '720p', 'file': src, 'direct': False, 'subs': subs})
            
            sources += [{'file': url, 'subs': subs} for url in scraper_utils.parse_sources_list(self, html).iterkeys()]
            
            if sources: break

        for source in sources:
            direct = source.get('direct', True)
            stream_url = source['file'] + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
            if direct:
                host = scraper_utils.get_direct_hostname(self, stream_url)
                if host == 'gvideo':
                    quality = scraper_utils.gv_get_quality(stream_url)
                elif 'label' in source:
                    quality = scraper_utils.height_get_quality(source['label'])
                else:
                    continue
            else:
                host = urlparse.urlparse(stream_url).hostname
                quality = scraper_utils.height_get_quality(source['label'])
        
            hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct}
            if source.get('subs'): hoster['subs'] = 'Turkish Subtitles'
            hosters.append(hoster)
    
        return hosters

示例#18

0

显示文件

文件： piratejunkies_scraper.py 项目： CYBERxNUKE/xbmc-addon

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     js_url = scraper_utils.urljoin(self.base_url, '/javascript/movies.js')
     html = self._http_get(js_url, cache_limit=48)
     if source_url.startswith('/'):
         source_url = source_url[1:]
     pattern = '''getElementById\(\s*"%s".*?play\(\s*'([^']+)''' % (source_url)
     match = re.search(pattern, html, re.I)
     if match:
         stream_url = match.group(1)
         if 'drive.google' in stream_url or 'docs.google' in stream_url:
             sources = scraper_utils.parse_google(self, stream_url)
         else:
             sources = [stream_url]
         
         for source in sources:
             stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
             host = scraper_utils.get_direct_hostname(self, source)
             if host == 'gvideo':
                 quality = scraper_utils.gv_get_quality(source)
                 direct = True
             elif 'youtube' in stream_url:
                 quality = QUALITIES.HD720
                 direct = False
                 host = 'youtube.com'
             else:
                 quality = QUALITIES.HIGH
                 direct = True
             hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct}
             hosters.append(hoster)
     return hosters

示例#19

0

显示文件

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)
        if video.video_type == VIDEO_TYPES.EPISODE:
            gk_html = ''.join(match.group(0) for match in re.finditer('<a[^>]*>(%s|Server \d+)</a>' % (video.episode), html, re.I))
        else:
            gk_html = html
        link_url = scraper_utils.urljoin(self.base_url, LINK_URL)
        player_url = scraper_utils.urljoin(self.base_url, PLAYER_URL)
        sources = scraper_utils.get_gk_links(self, gk_html, page_url, QUALITIES.HIGH, link_url, player_url)
        sources.update(self.__get_ht_links(html, page_url))
        
        for stream_url, quality in sources.iteritems():
            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                direct = True
            else:
                host = urlparse.urlparse(stream_url).hostname
                direct = False
            
            if host is None: continue
            stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
            hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct}
            hosters.append(hoster)

        return hosters

示例#20

0

显示文件

文件： dizibox_scraper.py 项目： CYBERxNUKE/xbmc-addon

 def __get_king_links(self, iframe_url):
     hosters = []
     match = re.search('v=(.*)', iframe_url)
     if match:
         data = {'ID': match.group(1)}
         headers = {'Referer': iframe_url}
         headers.update(XHR)
         xhr_url = iframe_url.split('?')[0]
         html = self._http_get(xhr_url, params={'p': 'GetVideoSources'}, data=data, headers=headers, cache_limit=.5)
         js_data = scraper_utils.parse_json(html, xhr_url)
         try:
             for source in js_data['VideoSources']:
                 stream_url = source['file'] + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
                 host = scraper_utils.get_direct_hostname(self, source['file'])
                 label = source.get('label', '')
                 if host == 'gvideo':
                     quality = scraper_utils.gv_get_quality(source['file'])
                 elif re.search('\d+p?', label):
                     quality = scraper_utils.height_get_quality(label)
                 else:
                     quality = QUALITY_MAP.get(label, QUALITIES.HIGH)
                 hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True, 'subs': 'Turkish Subtitles'}
                 hosters.append(hoster)
         except:
             pass
         
     return hosters

示例#21

0

显示文件

文件： diziay_scraper.py 项目： enursha101/xbmc-addon

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     sources = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     page_url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(page_url, cache_limit=1)
     fragment = dom_parser2.parse_dom(html, 'div', {'class': 'player'})
     if not fragment: return hosters
     
     iframe_url = dom_parser2.parse_dom(fragment[0].content, 'iframe', req='src')
     if not iframe_url: return hosters
     
     html = self._http_get(iframe_url[0].attrs['src'], cache_limit=.25)
     sources.append(self.__get_embedded_sources(html))
     sources.append(self.__get_linked_sources(html))
     for source in sources:
         for stream_url in source['sources']:
             host = scraper_utils.get_direct_hostname(self, stream_url)
             if host == 'gvideo':
                 stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
                 quality = scraper_utils.gv_get_quality(stream_url)
                 hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                 hoster['subs'] = source.get('subs', True)
                 hosters.append(hoster)
 
     return hosters

示例#22

0

显示文件

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        headers = {'Referer': page_url}
        html = self._http_get(page_url, headers=headers, cache_limit=.5)
        if video.video_type == VIDEO_TYPES.MOVIE:
            fragment = dom_parser2.parse_dom(html, 'div', {'class': 'poster'})
            if fragment:
                movie_url = dom_parser2.parse_dom(fragment[0].content, 'a', req='href')
                if movie_url:
                    page_url = scraper_utils.urljoin(self.base_url, movie_url[0].attrs['href'])
                    html = self._http_get(page_url, cache_limit=.5)
                    episodes = self.__get_episodes(html)
                    page_url = self.__get_best_page(episodes)
                    if not page_url:
                        return hosters
                    else:
                        page_url = scraper_utils.urljoin(self.base_url, page_url)
                        html = self._http_get(page_url, cache_limit=.5)
        
        streams = dom_parser2.parse_dom(html, 'iframe', req='src')
        if streams:
            streams = [(attrs['src'], 480) for attrs, _content in streams]
            direct = False
        else:
            streams = [(attrs['src'], attrs.get('data-res', 480)) for attrs, _content in dom_parser2.parse_dom(html, 'source', req=['src'])]
            direct = True
            
        headers = {'User-Agent': scraper_utils.get_ua(), 'Referer': page_url}
        for stream_url, height in streams:
            if 'video.php' in stream_url or 'moviexk.php' in stream_url:
                if 'title=' in stream_url:
                    title = stream_url.split('title=')[-1]
                    stream_url = stream_url.replace(title, urllib.quote(title))
                redir_url = self._http_get(stream_url, headers=headers, allow_redirect=False, method='HEAD', cache_limit=0)
                if redir_url.startswith('http'):
                    redir_url = redir_url.replace(' ', '').split(';codec')[0]
                    stream_url = redir_url
                else:
                    continue
            
            if direct:
                host = scraper_utils.get_direct_hostname(self, stream_url)
                if host == 'gvideo':
                    quality = scraper_utils.gv_get_quality(stream_url)
                else:
                    quality = scraper_utils.height_get_quality(height)
                stream_url += scraper_utils.append_headers(headers)
            else:
                host = urlparse.urlparse(stream_url).hostname
                quality = scraper_utils.height_get_quality(height)
            
            source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct}
            hosters.append(source)

        return hosters

示例#23

0

显示文件

文件： dizibox_scraper.py 项目： CYBERxNUKE/xbmc-addon

 def __get_embed_links(self, html):
     hosters = []
     sources = scraper_utils.parse_sources_list(self, html)
     for source in sources:
         quality = source['quality']
         stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
         hoster = {'multi-part': False, 'host': scraper_utils.get_direct_hostname(self, source), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True, 'subs': 'Turkish Subtitles'}
         hosters.append(hoster)
     return hosters

示例#24

0

显示文件

文件： watch5s_scraper.py 项目： enursha101/xbmc-addon

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        sources = {}
        headers = {'Accept-Language': 'en-US,en;q=0.5'}
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, headers=headers, cache_limit=2)
        if video.video_type == VIDEO_TYPES.MOVIE:
            sources.update(self.__scrape_sources(html, page_url))
            pages = set([
                r.attrs['href'] for r in dom_parser2.parse_dom(
                    html, 'a', {'class': 'btn-eps'}, req='href')
            ])
            active = set([
                r.attrs['href'] for r in dom_parser2.parse_dom(
                    html, 'a', {'class': 'active'}, req='href')
            ])
            for page in list(pages - active):
                page_url = scraper_utils.urljoin(self.base_url, page)
                html = self._http_get(page_url, headers=headers, cache_limit=2)
                sources.update(self.__scrape_sources(html, page_url))
        else:
            for page in self.__match_episode(video, html):
                page_url = scraper_utils.urljoin(self.base_url, page)
                html = self._http_get(page_url, headers=headers, cache_limit=2)
                sources.update(self.__scrape_sources(html, page_url))

        for source, values in sources.iteritems():
            if not source.lower().startswith('http'): continue
            if values['direct']:
                host = scraper_utils.get_direct_hostname(self, source)
                if host != 'gvideo':
                    stream_url = source + scraper_utils.append_headers(
                        {
                            'User-Agent': scraper_utils.get_ua(),
                            'Referer': page_url
                        })
                else:
                    stream_url = source
            else:
                host = urlparse.urlparse(source).hostname
                stream_url = source
            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'quality': values['quality'],
                'views': None,
                'rating': None,
                'url': stream_url,
                'direct': values['direct']
            }
            hosters.append(hoster)

        return hosters

示例#25

0

显示文件

文件： pubfilm_scraper.py 项目： enursha101/xbmc-addon

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)

        views = None
        fragment = dom_parser2.parse_dom(html, 'span', {'class': 'post-views'})
        if fragment:
            views = re.sub('[^\d]', '', fragment[0].content)

        iframe_urls = []
        if video.video_type == VIDEO_TYPES.MOVIE:
            iframe_urls = [
                r.attrs['href'] for r in dom_parser2.parse_dom(
                    html, 'a', {'class': ['orange', 'abutton']}, req='href')
            ]
        else:
            for label, link in self.__get_episode_links(html):
                if int(label) == int(video.episode):
                    iframe_urls.append(link)

        for iframe_url in iframe_urls:
            headers = {'Referer': url}
            html = self._http_get(iframe_url, headers=headers, cache_limit=.5)
            match = re.search('{link\s*:\s*"([^"]+)', html)
            if match:
                sources = self.__get_gk_links(match.group(1), iframe_url)
            else:
                sources = scraper_utils.parse_sources_list(self, html)

            for source in sources:
                stream_url = source + scraper_utils.append_headers(
                    {'User-Agent': scraper_utils.get_ua()})
                direct = sources[source]['direct']
                quality = sources[source]['quality']
                if sources[source]['direct']:
                    host = scraper_utils.get_direct_hostname(self, source)
                else:
                    host = urlparse.urlparse(source).hostname
                hoster = {
                    'multi-part': False,
                    'url': stream_url,
                    'class': self,
                    'quality': quality,
                    'host': host,
                    'rating': None,
                    'views': views,
                    'direct': direct
                }
                hosters.append(hoster)

        return hosters

示例#26

0

显示文件

文件： dizilab_scraper.py 项目： enursha101/xbmc-addon

 def __get_cloud_links(self, html, page_url, sub):
     hosters = []
     html = html.replace('\\"', '"').replace('\\/', '/')
     match = re.search("dizi_kapak_getir\('([^']+)", html)
     if match:
         ep_id = match.group(1)
         for attrs, _content in dom_parser2.parse_dom(
                 html, 'script', {'data-cfasync': 'false'}, req='src'):
             script_url = attrs['src']
             html = self._http_get(script_url, cache_limit=24)
             match1 = re.search("var\s+kapak_url\s*=\s*'([^']+)", html)
             match2 = re.search("var\s+aCtkp\s*=\s*'([^']+)", html)
             if match1 and match2:
                 link_url = '%s?fileid=%s&access_token=%s' % (
                     match1.group(1), ep_id, match2.group(1))
                 headers = {'Referer': page_url}
                 html = self._http_get(link_url,
                                       headers=headers,
                                       cache_limit=.5)
                 js_data = scraper_utils.parse_json(html, link_url)
                 for variant in js_data.get('variants', {}):
                     stream_host = random.choice(variant.get('hosts', []))
                     if stream_host:
                         stream_url = stream_host + variant[
                             'path'] + scraper_utils.append_headers(
                                 {
                                     'User-Agent': scraper_utils.get_ua(),
                                     'Referer': page_url
                                 })
                         if not stream_url.startswith('http'):
                             stream_url = 'http://' + stream_url
                         host = scraper_utils.get_direct_hostname(
                             self, stream_url)
                         if 'width' in variant:
                             quality = scraper_utils.width_get_quality(
                                 variant['width'])
                         elif 'height' in variant:
                             quality = scraper_utils.height_get_quality(
                                 variant['height'])
                         else:
                             quality = QUALITIES.HIGH
                         hoster = {
                             'multi-part': False,
                             'host': host,
                             'class': self,
                             'quality': quality,
                             'views': None,
                             'rating': None,
                             'url': stream_url,
                             'direct': True
                         }
                         hoster['subs'] = sub
                         hosters.append(hoster)
     return hosters

示例#27

0

显示文件

文件： afdah_scraper.py 项目： CYBERxNUKE/xbmc-addon

 def __get_links(self, html):
     hosters = []
     r = re.search('tlas\("([^"]+)', html)
     if r:
         plaintext = self.__caesar(self.__get_f(self.__caesar(r.group(1), 13)), 13)
         sources = scraper_utils.parse_sources_list(self, plaintext)
         for source in sources:
             stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Cookie': self._get_stream_cookies()})
             host = scraper_utils.get_direct_hostname(self, stream_url)
             hoster = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': sources[source]['quality'], 'rating': None, 'views': None, 'direct': True}
             hosters.append(hoster)
     return hosters

示例#28

0

显示文件

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=8)
        hosts = [
            r.content for r in dom_parser2.parse_dom(
                html, 'p', {'class': 'server_servername'})
        ]
        links = [
            r.content
            for r in dom_parser2.parse_dom(html, 'p', {'class': 'server_play'})
        ]
        for host, link_frag in zip(hosts, links):
            stream_url = dom_parser2.parse_dom(link_frag, 'a', req='href')
            if not stream_url: continue

            stream_url = stream_url[0].attrs['href']
            host = re.sub('^Server\s*', '', host, re.I)
            host = re.sub('\s*Link\s+\d+', '', host)
            if host.lower() == 'google':
                sources = self.__get_gvideo_links(stream_url)
            else:
                sources = [{'host': host, 'link': stream_url}]

            for source in sources:
                host = scraper_utils.get_direct_hostname(self, stream_url)
                if host == 'gvideo':
                    quality = scraper_utils.gv_get_quality(stream_url)
                    stream_url = source['link'] + scraper_utils.append_headers(
                        {'User-Agent': scraper_utils.get_ua()})
                    direct = True
                else:
                    stream_url = scraper_utils.pathify_url(source['link'])
                    host = HOST_SUB.get(source['host'].lower(), source['host'])
                    quality = scraper_utils.get_quality(
                        video, host, QUALITIES.HIGH)
                    direct = False

                hoster = {
                    'multi-part': False,
                    'url': stream_url,
                    'host': host,
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'direct': direct
                }
                hosters.append(hoster)

        return hosters

示例#29

0

显示文件

文件： seriesonline_scraper.py 项目： normico21/repository.xvbmc

    def get_sources(self, video):
        hosters = []
        sources = {}
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)
        for match in re.finditer(
                'player-data="([^"]+)[^>]+episode-data="([^"]+)[^>]*>(.*?)</a>',
                html, re.DOTALL):
            player_url, ep_id, label = match.groups()
            if video.video_type == VIDEO_TYPES.EPISODE and not self.__episode_match(
                    video, ep_id):
                continue
            label = label.strip()
            headers = {'Referer': page_url}
            if re.match('https?://embed', player_url):
                src_html = self._http_get(player_url,
                                          headers=headers,
                                          cache_limit=.5)
                sources.update(scraper_utils.parse_sources_list(
                    self, src_html))
                sources.update(self.__get_sources(src_html, label))
            else:
                sources[player_url] = {
                    'direct': False,
                    'quality': Q_MAP.get(label.upper(), QUALITIES.HIGH)
                }

        for source, value in sources.iteritems():
            direct = value['direct']
            quality = value['quality']
            if direct:
                host = scraper_utils.get_direct_hostname(self, source)
                stream_url = source + scraper_utils.append_headers(
                    {'User-Agent': scraper_utils.get_ua()})
            else:
                host = urlparse.urlparse(source).hostname
                stream_url = source

            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'quality': quality,
                'views': None,
                'rating': None,
                'url': stream_url,
                'direct': direct
            }
            hosters.append(hoster)

        return hosters

示例#30

0

显示文件

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)
        page_quality = QUALITIES.HD720 if video.video_type == VIDEO_TYPES.MOVIE else QUALITIES.HIGH
        for _attrs, fragment in dom_parser2.parse_dom(
                html, 'div', {'class': 'embed-responsive'}):
            iframe_url = dom_parser2.parse_dom(fragment,
                                               'iframe',
                                               req='data-src')
            if iframe_url:
                iframe_url = iframe_url[0].attrs['data-src']
                iframe_host = urlparse.urlparse(iframe_url).hostname
                if iframe_host in DIRECT_HOSTS:
                    sources = self.__parse_streams(iframe_url, url)
                else:
                    sources = {
                        iframe_url: {
                            'quality':
                            scraper_utils.get_quality(video, iframe_host,
                                                      page_quality),
                            'direct':
                            False
                        }
                    }

            for source in sources:
                quality = sources[source]['quality']
                direct = sources[source]['direct']
                if direct:
                    host = scraper_utils.get_direct_hostname(self, source)
                    stream_url = source + scraper_utils.append_headers(
                        {'User-Agent': scraper_utils.get_ua()})
                else:
                    host = urlparse.urlparse(source).hostname
                    stream_url = source

                hoster = {
                    'multi-part': False,
                    'url': stream_url,
                    'host': host,
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'direct': direct
                }
                hosters.append(hoster)

        return hosters

示例#31

0

显示文件

文件： nitertv_scraper.py 项目： CYBERxNUKE/xbmc-addon

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)

        match = re.search('((?:pic|emb|vb|dir|emb2)=[^<]+)', html)
        if match:
            embeds = match.group(1)
            for stream_url in embeds.split('&'):
                if stream_url.startswith('dir='):
                    headers = {'Referer': url}
                    html = self._http_get(DIR_URL, params={'v': stream_url[3:]}, headers=headers, auth=False, allow_redirect=False, cache_limit=.5)
                    if html.startswith('http'):
                        stream_url = html + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': url})
                        host = scraper_utils.get_direct_hostname(self, stream_url)
                        direct = True
                        quality = QUALITIES.HD720
                    else:
                        continue
                elif stream_url.startswith('vb='):
                    stream_url = 'http://www.vidbux.com/%s' % (stream_url[3:])
                    host = 'vidbux.com'
                    direct = False
                    quality = scraper_utils.get_quality(video, host, QUALITIES.HD1080)
                elif stream_url.startswith('pic='):
                    data = {'url': stream_url[4:]}
                    html = self._http_get(PHP_URL, data=data, auth=False, cache_limit=1)
                    js_data = scraper_utils.parse_json(html, PHP_URL)
                    host = scraper_utils.get_direct_hostname(self, stream_url)
                    direct = True
                    for item in js_data:
                        if item.get('medium') == 'video':
                            stream_url = item['url']
                            quality = scraper_utils.width_get_quality(item['width'])
                            break
                    else:
                        continue
                elif stream_url.startswith(('emb=', 'emb2=')):
                    stream_url = re.sub('emb\d*=', '', stream_url)
                    host = urlparse.urlparse(stream_url).hostname
                    direct = False
                    quality = scraper_utils.get_quality(video, host, QUALITIES.HD720)
                else:
                    continue

                hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct}
                hosters.append(hoster)

        return hosters

示例#32

0

显示文件

    def get_sources(self, video):
        source_url = self.get_url(video)
        sources = []
        if not source_url or source_url == FORCE_NO_MATCH: return sources
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=8)
        for attrs, _content in dom_parser2.parse_dom(html, 'img', req=['data-id', 'data-name']):
            film_id, data_name = attrs['data-id'], attrs['data-name']
            data = {'id': film_id, 'n': data_name}
            server_url = scraper_utils.urljoin(self.base_url, SERVER_URL)
            server_url = server_url % (film_id)
            headers = {'Referer': page_url}
            headers.update(XHR)
            html = self._http_get(server_url, data=data, headers=headers, cache_limit=.5)
            for attrs, _content in dom_parser2.parse_dom(html, 'a', req='data-id'):
                data = {'epid': attrs['data-id']}
                ep_url = scraper_utils.urljoin(self.base_url, EP_URL)
                ep_url = ep_url % (attrs['data-id'])
                headers = {'Referer': page_url}
                headers.update(XHR)
                html = self._http_get(ep_url, data=data, headers=headers, cache_limit=.5)
                js_data = scraper_utils.parse_json(html, ep_url)
                try:
                    links = [r.attrs['src'] for r in dom_parser2.parse_dom(js_data['link']['embed'], 'iframe', req='src')]
                except:
                    try: links = js_data['link']['l']
                    except: links = []
                try: heights = js_data['link']['q']
                except: heights = []
                for stream_url, height in map(None, links, heights):
                    match = re.search('movie_url=(.*)', stream_url)
                    if match:
                        stream_url = match.group(1)
                        
                    host = scraper_utils.get_direct_hostname(self, stream_url)
                    if host == 'gvideo':
                        quality = scraper_utils.gv_get_quality(stream_url)
                        stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': page_url})
                        direct = True
                    else:
                        host = urlparse.urlparse(stream_url).hostname
                        if height:
                            quality = scraper_utils.height_get_quality(height)
                        else:
                            quality = QUALITIES.HD720
                        direct = False
                    source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct}
                    sources.append(source)

        return sources

示例#33

0

显示文件

文件： vivoto_scraper.py 项目： enursha101/xbmc-addon

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)
        page_quality = dom_parser2.parse_dom(html, 'dd', {'class': 'status'})
        if page_quality:
            page_quality = QUALITY_MAP.get(page_quality[0].content,
                                           QUALITIES.HIGH)
        else:
            page_quality = QUALITIES.HIGH

        if video.video_type == VIDEO_TYPES.EPISODE:
            fragment = dom_parser2.parse_dom(html, 'div',
                                             {'id': 'servers-list'})
            gk_html = fragment[0].content if fragment else ''
        else:
            gk_html = html

        link_url = scraper_utils.urljoin(self.base_url, LINK_URL)
        player_url = scraper_utils.urljoin(self.base_url, PLAYER_URL)
        for stream_url, quality in scraper_utils.get_gk_links(
                self, gk_html, page_url, page_quality, link_url,
                player_url).iteritems():
            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                direct = True
                quality = quality
            else:
                host = urlparse.urlparse(stream_url).hostname
                quality = scraper_utils.get_quality(video, host, quality)
                direct = False

            if host is not None:
                stream_url += scraper_utils.append_headers(
                    {'User-Agent': scraper_utils.get_ua()})
                hoster = {
                    'multi-part': False,
                    'host': host,
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'url': stream_url,
                    'direct': direct
                }
                hosters.append(hoster)

        return hosters

示例#34

0

显示文件

文件： serieswatch_scraper.py 项目： CYBERxNUKE/xbmc-addon

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        headers = {'User-Agent': scraper_utils.get_ua(), 'Referer': self.base_url + source_url}
        if video.video_type == VIDEO_TYPES.MOVIE:
            meta = scraper_utils.parse_movie_link(source_url)
            stream_url = source_url + scraper_utils.append_headers(headers)
            quality = scraper_utils.height_get_quality(meta['height'])
            hoster = {'multi-part': False, 'host': scraper_utils.get_direct_hostname(self, stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
            if 'format' in meta: hoster['format'] = meta['format']
            hosters.append(hoster)
        else:
            for episode in self.__match_episode(source_url, video):
                meta = scraper_utils.parse_episode_link(episode['title'])
                stream_url = episode['url'] + scraper_utils.append_headers(headers)
                stream_url = stream_url.replace(self.base_url, '')
                quality = scraper_utils.height_get_quality(meta['height'])
                hoster = {'multi-part': False, 'host': scraper_utils.get_direct_hostname(self, stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                if 'format' in meta: hoster['format'] = meta['format']
                if 'size' in episode: hoster['size'] = scraper_utils.format_size(int(episode['size']))
                hosters.append(hoster)

        return hosters

示例#35

0

显示文件

    def get_sources(self, video):
        source_url = self.get_url(video)
        sources = {}
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=0)
        for match in re.finditer("embeds\[(\d+)\]\s*=\s*'([^']+)", html):
            match = re.search('src="([^"]+)', match.group(2))
            if match:
                iframe_url = match.group(1)
                if 'play-en.php' in iframe_url:
                    match = re.search('id=([^"&]+)', iframe_url)
                    if match:
                        proxy_link = match.group(1)
                        proxy_link = proxy_link.split('*', 1)[-1]
                        picasa_url = scraper_utils.gk_decrypt(self.get_name(), GK_KEY, proxy_link)
                        for stream_url in scraper_utils.parse_google(self, picasa_url):
                            sources[stream_url] = {'quality': scraper_utils.gv_get_quality(stream_url), 'direct': True}
                else:
                    html = self._http_get(iframe_url, cache_limit=0)
                    temp_sources = scraper_utils.parse_sources_list(self, html)
                    for source in temp_sources:
                        if 'download.php' in source:
                            redir_html = self._http_get(source, allow_redirect=False, method='HEAD', cache_limit=0)
                            if redir_html.startswith('http'):
                                temp_sources[redir_html] = temp_sources[source]
                                del temp_sources[source]
                    sources.update(temp_sources)
                    for source in dom_parser2.parse_dom(html, 'source', {'type': 'video/mp4'}, req='src'):
                        sources[source.attrs['src']] = {'quality': QUALITIES.HD720, 'direct': True, 'referer': iframe_url}
                                
        for source, values in sources.iteritems():
            host = scraper_utils.get_direct_hostname(self, source)
            headers = {'User-Agent': scraper_utils.get_ua()}
            if 'referer' in values: headers['Referer'] = values['referer']
            stream_url = source + scraper_utils.append_headers(headers)
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(source)
            else:
                quality = values['quality']
                if quality not in Q_ORDER:
                    quality = QUALITY_MAP.get(values['quality'], QUALITIES.HIGH)
                    
            hoster = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True}
            hosters.append(hoster)

        return hosters

示例#36

0

显示文件

文件： heydl_scraper.py 项目： CYBERxNUKE/xbmc-addon

 def get_sources(self, video):
     hosters = []
     source_url = self.get_url(video)
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     page_url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(page_url, cache_limit=8)
     for attrs, _content in dom_parser2.parse_dom(html, 'a', req='href'):
         stream_url = attrs['href']
         if MOVIE_URL in stream_url:
             meta = scraper_utils.parse_movie_link(stream_url)
             stream_url = scraper_utils.pathify_url(stream_url) + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
             quality = scraper_utils.height_get_quality(meta['height'])
             hoster = {'multi-part': False, 'host': scraper_utils.get_direct_hostname(self, stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
             if 'format' in meta: hoster['format'] = meta['format']
             hosters.append(hoster)
             
     return hosters

示例#37

0

显示文件

文件： moviego_scraper.py 项目： enursha101/xbmc-addon

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=8)
        q_str = dom_parser2.parse_dom(html, 'div', {'class': 'poster-qulabel'})
        if q_str:
            q_str = q_str[0].content.replace(' ', '').upper()
            page_quality = Q_MAP.get(q_str, QUALITIES.HIGH)
        else:
            page_quality = QUALITIES.HIGH
            
        for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'tab_box'}):
            iframe_url = dom_parser2.parse_dom(fragment, 'iframe', req='src')
            if iframe_url:
                iframe_url = iframe_url[0].attrs['src']
                if 'youtube' in iframe_url: continue
                
                html = self._http_get(iframe_url, headers={'Referer': page_url}, cache_limit=.5)
                for match in re.finditer('(eval\(function\(.*?)</script>', html, re.DOTALL):
                    js_data = jsunpack.unpack(match.group(1))
                    js_data = js_data.replace('\\', '')
                    html += js_data
                
                sources = scraper_utils.parse_sources_list(self, html)
                if not sources:
                    sources = {iframe_url: {'quality': page_quality, 'direct': False}}
                
                for source, values in sources.iteritems():
                    direct = values['direct']
                    if direct:
                        host = scraper_utils.get_direct_hostname(self, source)
                        if host == 'gvideo':
                            quality = scraper_utils.gv_get_quality(source)
                        else:
                            quality = values['quality']
                        source += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': page_url})
                    else:
                        host = urlparse.urlparse(source).hostname
                        quality = scraper_utils.get_quality(video, host, values['quality'])
                    
                    hoster = {'multi-part': False, 'url': source, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct}
                    hosters.append(hoster)

        return hosters

示例#38

0

显示文件

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)
        fragment = dom_parser2.parse_dom(html, 'div',
                                         {'class': 'film-container'})
        if fragment:
            iframe_url = dom_parser2.parse_dom(fragment[0].content,
                                               'iframe',
                                               req='src')
            if iframe_url:
                iframe_url = scraper_utils.urljoin(self.base_url,
                                                   iframe_url[0].attrs['src'])
                headers = {'Referer': page_url}
                html = self._http_get(iframe_url,
                                      headers=headers,
                                      cache_limit=.5)
                sources = scraper_utils.parse_sources_list(self, html)
                for source in sources:
                    quality = sources[source]['quality']
                    host = scraper_utils.get_direct_hostname(self, source)
                    stream_url = source + scraper_utils.append_headers(
                        {
                            'User-Agent': scraper_utils.get_ua(),
                            'Referer': iframe_url
                        })
                    hoster = {
                        'multi-part': False,
                        'host': host,
                        'class': self,
                        'quality': quality,
                        'views': None,
                        'rating': None,
                        'url': stream_url,
                        'direct': True
                    }
                    match = re.search('(\d+[a-z]bps)', source)
                    if match:
                        hoster['extra'] = match.group(1)
                    hosters.append(hoster)

        hosters.sort(key=lambda x: x.get('extra', ''), reverse=True)
        return hosters

示例#39

0

显示文件

 def __get_pk_links(self, html):
     hosters = []
     match = re.search('var\s+parametros\s*=\s*"([^"]+)', html)
     if match:
         params = scraper_utils.parse_query(match.group(1))
         if 'pic' in params:
             data = {'sou': 'pic', 'fv': '25', 'url': params['pic']}
             html = self._http_get(PK_URL,
                                   headers=XHR,
                                   data=data,
                                   cache_limit=0)
             js_data = scraper_utils.parse_json(html, PK_URL)
             for item in js_data:
                 if 'url' in item and item['url']:
                     if 'width' in item and item['width']:
                         quality = scraper_utils.width_get_quality(
                             item['width'])
                     elif 'height' in item and item['height']:
                         quality = scraper_utils.height_get_quality(
                             item['height'])
                     else:
                         quality = QUALITIES.HD720
                     stream_url = item['url'] + scraper_utils.append_headers(
                         {'User-Agent': scraper_utils.get_ua()})
                     hoster = {
                         'multi-part':
                         False,
                         'url':
                         stream_url,
                         'class':
                         self,
                         'quality':
                         quality,
                         'host':
                         scraper_utils.get_direct_hostname(
                             self, item['url']),
                         'rating':
                         None,
                         'views':
                         None,
                         'direct':
                         True
                     }
                     hosters.append(hoster)
     return hosters

示例#40

0

显示文件

文件： piratejunkies_scraper.py 项目： enursha101/xbmc-addon

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        js_url = scraper_utils.urljoin(self.base_url, '/javascript/movies.js')
        html = self._http_get(js_url, cache_limit=48)
        if source_url.startswith('/'):
            source_url = source_url[1:]
        pattern = '''getElementById\(\s*"%s".*?play\(\s*'([^']+)''' % (
            source_url)
        match = re.search(pattern, html, re.I)
        if match:
            stream_url = match.group(1)
            if 'drive.google' in stream_url or 'docs.google' in stream_url:
                sources = scraper_utils.parse_google(self, stream_url)
            else:
                sources = [stream_url]

            for source in sources:
                stream_url = source + scraper_utils.append_headers(
                    {'User-Agent': scraper_utils.get_ua()})
                host = scraper_utils.get_direct_hostname(self, source)
                if host == 'gvideo':
                    quality = scraper_utils.gv_get_quality(source)
                    direct = True
                elif 'youtube' in stream_url:
                    quality = QUALITIES.HD720
                    direct = False
                    host = 'youtube.com'
                else:
                    quality = QUALITIES.HIGH
                    direct = True
                hoster = {
                    'multi-part': False,
                    'host': host,
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'url': stream_url,
                    'direct': direct
                }
                hosters.append(hoster)
        return hosters

示例#41

0

显示文件

文件： iomovies_scraper.py 项目： CYBERxNUKE/xbmc-addon

    def get_sources(self, video):
        sources = []
        streams = {}
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return sources
        
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=8)
        match = dom_parser2.parse_dom(html, 'a', {'title': re.compile('click to play', re.I)}, req='href')
        if not match: return sources
        
        page_url = scraper_utils.urljoin(self.base_url, match[0].attrs['href'])
        headers = {'Referer': page_url}
        html = self._http_get(page_url, headers=headers, cache_limit=.02)
        for attrs, _content in dom_parser2.parse_dom(html, 'a', {'class': 'mw-episode-btn'}, req=['data-target-i', 'data-target-e', 'title']):
            try:
                if "wasn't alive" in attrs['title']: continue
                vid_url = scraper_utils.urljoin(self.base_url, VID_URL)
                vid_url = vid_url.format(data_i=attrs['data-target-i'], data_e=attrs['data-target-e'])
                headers = {'Referer': page_url}
                headers.update(XHR)
                cookies = self.__get_cookies(html, attrs)
                vid_html = self._http_get(vid_url, headers=headers, cookies=cookies, cache_limit=.02)
                streams.update(self.__get_js_sources(vid_html, vid_url, cookies, page_url))
            except scraper.ScrapeError as e:
                logger.log('IOMovies Error (%s): %s in %s' % (e, vid_url, page_url))
                    
        for stream_url, values in streams.iteritems():
            if values['direct']:
                host = scraper_utils.get_direct_hostname(self, stream_url)
                if host == 'gvideo':
                    quality = scraper_utils.gv_get_quality(stream_url)
                else:
                    quality = scraper_utils.height_get_quality(values['label'])
                    stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
            else:
                host = urlparse.urlparse(stream_url).hostname
                quality = QUALITIES.HIGH
                 
            source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': values['direct']}
            sources.append(source)

        return sources

示例#42

0

显示文件

    def __get_king_links(self, iframe_url):
        hosters = []
        match = re.search('v=(.*)', iframe_url)
        if match:
            data = {'ID': match.group(1)}
            headers = {'Referer': iframe_url}
            headers.update(XHR)
            xhr_url = iframe_url.split('?')[0]
            html = self._http_get(xhr_url,
                                  params={'p': 'GetVideoSources'},
                                  data=data,
                                  headers=headers,
                                  cache_limit=.5)
            js_data = scraper_utils.parse_json(html, xhr_url)
            try:
                for source in js_data['VideoSources']:
                    stream_url = source['file'] + scraper_utils.append_headers(
                        {'User-Agent': scraper_utils.get_ua()})
                    host = scraper_utils.get_direct_hostname(
                        self, source['file'])
                    label = source.get('label', '')
                    if host == 'gvideo':
                        quality = scraper_utils.gv_get_quality(source['file'])
                    elif re.search('\d+p?', label):
                        quality = scraper_utils.height_get_quality(label)
                    else:
                        quality = QUALITY_MAP.get(label, QUALITIES.HIGH)
                    hoster = {
                        'multi-part': False,
                        'host': host,
                        'class': self,
                        'quality': quality,
                        'views': None,
                        'rating': None,
                        'url': stream_url,
                        'direct': True,
                        'subs': 'Turkish Subtitles'
                    }
                    hosters.append(hoster)
            except:
                pass

        return hosters

示例#43

0

显示文件

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)
        match = re.search('var\s*video_id="([^"]+)', html)
        if not match: return hosters

        video_id = match.group(1)
        data = {'v': video_id}
        headers = {'Referer': page_url}
        headers.update(XHR)
        html = self._http_get(INFO_URL,
                              data=data,
                              headers=headers,
                              cache_limit=.5)
        sources = scraper_utils.parse_json(html, INFO_URL)
        for source in sources:
            match = re.search('url=(.*)', sources[source])
            if not match: continue

            stream_url = urllib.unquote(match.group(1))
            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(stream_url)
            else:
                quality = scraper_utils.height_get_quality(source)
            stream_url += scraper_utils.append_headers(
                {'User-Agent': scraper_utils.get_ua()})
            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'quality': quality,
                'views': None,
                'rating': None,
                'url': stream_url,
                'direct': True
            }
            hosters.append(hoster)
        return hosters

示例#44

0

显示文件

文件： yesmovies_scraper.py 项目： CYBERxNUKE/xbmc-addon

 def get_sources(self, video):
     hosters = []
     sources = {}
     source_url = self.get_url(video)
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     page_url = scraper_utils.urljoin(self.base_url, source_url)
     movie_id, watching_url, html = self.__get_source_page(video.video_type, page_url)
     
     links = []
     for match in dom_parser2.parse_dom(html, 'li', {'class': 'ep-item'}, req=['data-id', 'data-server']):
         label = dom_parser2.parse_dom(match.content, 'a', req='title')
         if not label: continue
         if video.video_type == VIDEO_TYPES.EPISODE and not self.__episode_match(video, label[0].attrs['title']):
             continue
         links.append((match.attrs['data-server'], match.attrs['data-id']))
         
     for link_type, link_id in links:
         if link_type in ['12', '13', '14', '15']:
             url = scraper_utils.urljoin(self.base_url, PLAYLIST_URL1.format(ep_id=link_id))
             sources.update(self.__get_link_from_json(url))
         elif kodi.get_setting('scraper_url'):
             url = scraper_utils.urljoin(self.base_url, PLAYLIST_URL2.format(ep_id=link_id))
             params = self.__get_params(movie_id, link_id, watching_url)
             if params is not None:
                 url += '?' + urllib.urlencode(params)
             sources.update(self.__get_links_from_json2(url, page_url, video.video_type))
         
     for source in sources:
         if not source.lower().startswith('http'): continue
         if sources[source]['direct']:
             host = scraper_utils.get_direct_hostname(self, source)
             if host != 'gvideo':
                 stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': page_url})
             else:
                 stream_url = source
         else:
             host = urlparse.urlparse(source).hostname
             stream_url = source
         hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': sources[source]['quality'], 'views': None, 'rating': None, 'url': stream_url, 'direct': sources[source]['direct']}
         hosters.append(hoster)
             
     return hosters

示例#45

0

显示文件

文件： streamlord_scraper.py 项目： CYBERxNUKE/xbmc-addon

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=1)
        match = re.search('''["']sources['"]\s*:\s*\[(.*?)\]''', html, re.DOTALL)
        if match:
            for match in re.finditer('''['"]*file['"]*\s*:\s*([^\(]+)''', match.group(1), re.DOTALL):
                stream_url = self.__decode(match.group(1), html)
                if stream_url:
                    if video.video_type == VIDEO_TYPES.MOVIE:
                        quality = QUALITIES.HD720
                    else:
                        quality = QUALITIES.HIGH
                    stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': url, 'Cookie': self._get_stream_cookies()})
                    hoster = {'multi-part': False, 'host': scraper_utils.get_direct_hostname(self, stream_url), 'class': self, 'url': stream_url, 'quality': quality, 'views': None, 'rating': None, 'direct': True}
                    hosters.append(hoster)

        return hosters

示例#46

0

显示文件

 def __get_embed_links(self, html):
     hosters = []
     sources = scraper_utils.parse_sources_list(self, html)
     for source in sources:
         quality = source['quality']
         stream_url = source + scraper_utils.append_headers(
             {'User-Agent': scraper_utils.get_ua()})
         hoster = {
             'multi-part': False,
             'host': scraper_utils.get_direct_hostname(self, source),
             'class': self,
             'quality': quality,
             'views': None,
             'rating': None,
             'url': stream_url,
             'direct': True,
             'subs': 'Turkish Subtitles'
         }
         hosters.append(hoster)
     return hosters

示例#47

0

显示文件

文件： downloadtube_scraper.py 项目： CYBERxNUKE/xbmc-addon

 def __get_direct(self, html, page_url):
     sources = []
     best_quality = QUALITIES.HIGH
     match = re.search('''['"]?sources["']?\s*:\s*\[(.*?)\}\s*,?\s*\]''', html, re.DOTALL)
     if match:
         files = re.findall('''['"]?file['"]?\s*:\s*(.*?)['"]([^'"]+)''', match.group(1), re.DOTALL)
         labels = re.findall('''['"]?label['"]?\s*:\s*['"]([^'"]*)''', match.group(1), re.DOTALL)
         for stream, label in map(None, files, labels):
             func, stream_url = stream
             if 'atob' in func:
                 stream_url = base64.b64decode(stream_url)
             stream_url = stream_url.replace(' ', '%20')
             host = scraper_utils.get_direct_hostname(self, stream_url)
             label = re.sub(re.compile('\s*HD', re.I), '', label)
             quality = scraper_utils.height_get_quality(label)
             if Q_ORDER[quality] > Q_ORDER[best_quality]: best_quality = quality
             stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': page_url})
             source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True}
             sources.append(source)
     return best_quality, sources

示例#48

0

显示文件

文件： pubfilm_scraper.py 项目： CYBERxNUKE/xbmc-addon

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)
        
        views = None
        fragment = dom_parser2.parse_dom(html, 'span', {'class': 'post-views'})
        if fragment:
            views = re.sub('[^\d]', '', fragment[0].content)
        
        iframe_urls = []
        if video.video_type == VIDEO_TYPES.MOVIE:
            iframe_urls = [r.attrs['href'] for r in dom_parser2.parse_dom(html, 'a', {'class': ['orange', 'abutton']}, req='href')]
        else:
            for label, link in self.__get_episode_links(html):
                if int(label) == int(video.episode):
                    iframe_urls.append(link)
            
        for iframe_url in iframe_urls:
            headers = {'Referer': url}
            html = self._http_get(iframe_url, headers=headers, cache_limit=.5)
            match = re.search('{link\s*:\s*"([^"]+)', html)
            if match:
                sources = self.__get_gk_links(match.group(1), iframe_url)
            else:
                sources = scraper_utils.parse_sources_list(self, html)
                
            for source in sources:
                stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
                direct = sources[source]['direct']
                quality = sources[source]['quality']
                if sources[source]['direct']:
                    host = scraper_utils.get_direct_hostname(self, source)
                else:
                    host = urlparse.urlparse(source).hostname
                hoster = {'multi-part': False, 'url': stream_url, 'class': self, 'quality': quality, 'host': host, 'rating': None, 'views': views, 'direct': direct}
                hosters.append(hoster)

        return hosters

示例#49

0

显示文件

文件： pelispedia_scraper.py 项目： CYBERxNUKE/xbmc-addon

 def __get_pk_links(self, html):
     hosters = []
     match = re.search('var\s+parametros\s*=\s*"([^"]+)', html)
     if match:
         params = scraper_utils.parse_query(match.group(1))
         if 'pic' in params:
             data = {'sou': 'pic', 'fv': '25', 'url': params['pic']}
             html = self._http_get(PK_URL, headers=XHR, data=data, cache_limit=0)
             js_data = scraper_utils.parse_json(html, PK_URL)
             for item in js_data:
                 if 'url' in item and item['url']:
                     if 'width' in item and item['width']:
                         quality = scraper_utils.width_get_quality(item['width'])
                     elif 'height' in item and item['height']:
                         quality = scraper_utils.height_get_quality(item['height'])
                     else:
                         quality = QUALITIES.HD720
                     stream_url = item['url'] + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
                     hoster = {'multi-part': False, 'url': stream_url, 'class': self, 'quality': quality, 'host': scraper_utils.get_direct_hostname(self, item['url']), 'rating': None, 'views': None, 'direct': True}
                     hosters.append(hoster)
     return hosters

示例#50

0

显示文件

文件： m4ufree_scraper.py 项目： CYBERxNUKE/xbmc-addon

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)
        
        views = None
        fragment = dom_parser2.parse_dom(html, 'img', {'src': re.compile('[^"]*view_icon.png')})
        if fragment:
            match = re.search('(\d+)', fragment[0].content)
            if match:
                views = match.group(1)
            
        match = re.search('href="([^"]+-full-movie-[^"]+)', html)
        if match:
            url = match.group(1)
            html = self._http_get(url, cache_limit=.5)
        
        sources = self.__get_embedded(html)
        for link in dom_parser2.parse_dom(html, 'span', {'class': 'btn-eps'}, req='link'):
            link = link.attrs['link']
            ajax_url = scraper_utils.urljoin(self.base_url, AJAX_URL)
            headers = {'Referer': url}
            headers.update(XHR)
            html = self._http_get(ajax_url, params={'v': link}, headers=headers, cache_limit=.5)
            sources.update(self.__get_sources(html))
        
        for source in sources:
            if sources[source]['direct']:
                host = scraper_utils.get_direct_hostname(self, source)
            else:
                host = urlparse.urlparse(source).hostname
            stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
            direct = sources[source]['direct']
            quality = sources[source]['quality']
            hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': views, 'rating': None, 'url': stream_url, 'direct': direct}
            hosters.append(hoster)

        return hosters

示例#51

0

显示文件

文件： seehd_scraper.py 项目： CYBERxNUKE/xbmc-addon

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        sources = {}
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)
        for _attrs, div in dom_parser2.parse_dom(html, 'div', {'class': 'tabcontent'}):
            for attrs, _content in dom_parser2.parse_dom(div, 'source', req='src'):
                source = attrs['src'] + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': page_url})
                sources[source] = {'quality': None, 'direct': True}
            
            iframe_url = dom_parser2.parse_dom(div, 'iframe', req='src')
            if iframe_url:
                iframe_url = iframe_url[0].attrs['src']
                if 'songs2dl' in iframe_url:
                    headers = {'Referer': page_url}
                    iframe_html = self._http_get(iframe_url, headers=headers, cache_limit=1)
                    sources.update(scraper_utils.parse_sources_list(self, iframe_html))
                else:
                    sources[iframe_url] = {'quality': None, 'direct': False}
                
        sources.update(self.__get_mirror_links(html, video))
        page_quality = self.__get_best_quality(sources)
        for source, values in sources.iteritems():
            direct = values['direct']
            if direct:
                host = scraper_utils.get_direct_hostname(self, source)
            else:
                host = urlparse.urlparse(source).hostname
            
            if values['quality'] is None:
                values['quality'] = page_quality
                
            hoster = {'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': source, 'rating': None, 'quality': values['quality'], 'direct': direct}
            hosters.append(hoster)

        return hosters

示例#52

0

显示文件

文件： watch5s_scraper.py 项目： CYBERxNUKE/xbmc-addon

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        sources = {}
        headers = {'Accept-Language': 'en-US,en;q=0.5'}
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, headers=headers, cache_limit=2)
        if video.video_type == VIDEO_TYPES.MOVIE:
            sources.update(self.__scrape_sources(html, page_url))
            pages = set([r.attrs['href'] for r in dom_parser2.parse_dom(html, 'a', {'class': 'btn-eps'}, req='href')])
            active = set([r.attrs['href'] for r in dom_parser2.parse_dom(html, 'a', {'class': 'active'}, req='href')])
            for page in list(pages - active):
                page_url = scraper_utils.urljoin(self.base_url, page)
                html = self._http_get(page_url, headers=headers, cache_limit=2)
                sources.update(self.__scrape_sources(html, page_url))
        else:
            for page in self.__match_episode(video, html):
                page_url = scraper_utils.urljoin(self.base_url, page)
                html = self._http_get(page_url, headers=headers, cache_limit=2)
                sources.update(self.__scrape_sources(html, page_url))
        
        for source, values in sources.iteritems():
            if not source.lower().startswith('http'): continue
            if values['direct']:
                host = scraper_utils.get_direct_hostname(self, source)
                if host != 'gvideo':
                    stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': page_url})
                else:
                    stream_url = source
            else:
                host = urlparse.urlparse(source).hostname
                stream_url = source
            hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': values['quality'], 'views': None, 'rating': None, 'url': stream_url, 'direct': values['direct']}
            hosters.append(hoster)

        return hosters

示例#53

0

显示文件

文件： moviehubs_scraper.py 项目： CYBERxNUKE/xbmc-addon

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=8)
        hosts = [r.content for r in dom_parser2.parse_dom(html, 'p', {'class': 'server_servername'})]
        links = [r.content for r in dom_parser2.parse_dom(html, 'p', {'class': 'server_play'})]
        for host, link_frag in zip(hosts, links):
            stream_url = dom_parser2.parse_dom(link_frag, 'a', req='href')
            if not stream_url: continue
            
            stream_url = stream_url[0].attrs['href']
            host = re.sub('^Server\s*', '', host, re.I)
            host = re.sub('\s*Link\s+\d+', '', host)
            if host.lower() == 'google':
                sources = self.__get_gvideo_links(stream_url)
            else:
                sources = [{'host': host, 'link': stream_url}]
            
            for source in sources:
                host = scraper_utils.get_direct_hostname(self, stream_url)
                if host == 'gvideo':
                    quality = scraper_utils.gv_get_quality(stream_url)
                    stream_url = source['link'] + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
                    direct = True
                else:
                    stream_url = scraper_utils.pathify_url(source['link'])
                    host = HOST_SUB.get(source['host'].lower(), source['host'])
                    quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH)
                    direct = False
                    
                hoster = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct}
                hosters.append(hoster)

        return hosters