Exemplos de get_direct_hostname em Python, exemplos de dsrd_lib.scraper_utils.get_direct_hostname em Python

Exemplo n.º 1

0

Exibir arquivo

 def __get_links_from_json2(self, url, page_url, video_type):
     sources = {}
     headers = {'Referer': page_url}
     headers.update(XHR)
     html = self._http_get(url, headers=headers, cache_limit=0)
     js_data = scraper_utils.parse_json(html, url)
     try:
         playlist = js_data.get('playlist', [])
         for source in playlist[0].get('sources', []):
             stream_url = source['file']
             if scraper_utils.get_direct_hostname(self,
                                                  stream_url) == 'gvideo':
                 quality = scraper_utils.gv_get_quality(stream_url)
             elif 'label' in source:
                 quality = scraper_utils.height_get_quality(source['label'])
             else:
                 if video_type == VIDEO_TYPES.MOVIE:
                     meta = scraper_utils.parse_movie_link(stream_url)
                 else:
                     meta = scraper_utils.parse_episode_link(stream_url)
                 quality = scraper_utils.height_get_quality(meta['height'])
             sources[stream_url] = {'quality': quality, 'direct': True}
             logger.log(
                 'Adding stream: %s Quality: %s' % (stream_url, quality),
                 log_utils.LOGDEBUG)
     except Exception as e:
         logger.log('Exception during yesmovies extract: %s' % (e),
                    log_utils.LOGDEBUG)
     return sources

Exemplo n.º 2

0

Exibir arquivo

Arquivo: snagfilms_scraper.py Projeto: Lhse44/repository.deallen

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     page_url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(page_url, cache_limit=.5)
     fragment = dom_parser2.parse_dom(html, 'div', {'class': 'film-container'})
     if fragment:
         iframe_url = dom_parser2.parse_dom(fragment[0].content, 'iframe', req='src')
         if iframe_url:
             iframe_url = scraper_utils.urljoin(self.base_url, iframe_url[0].attrs['src'])
             headers = {'Referer': page_url}
             html = self._http_get(iframe_url, headers=headers, cache_limit=.5)
             sources = scraper_utils.parse_sources_list(self, html)
             for source in sources:
                 quality = sources[source]['quality']
                 host = scraper_utils.get_direct_hostname(self, source)
                 stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': iframe_url})
                 hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                 match = re.search('(\d+[a-z]bps)', source)
                 if match:
                     hoster['extra'] = match.group(1)
                 hosters.append(hoster)
                     
     hosters.sort(key=lambda x: x.get('extra', ''), reverse=True)
     return hosters

Exemplo n.º 3

0

Exibir arquivo

Arquivo: heydl_scraper.py Projeto: Lhse44/repository.deallen

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=8)
        for attrs, _content in dom_parser2.parse_dom(html, 'a', req='href'):
            stream_url = attrs['href']
            if MOVIE_URL in stream_url:
                meta = scraper_utils.parse_movie_link(stream_url)
                stream_url = scraper_utils.pathify_url(
                    stream_url) + scraper_utils.append_headers(
                        {'User-Agent': scraper_utils.get_ua()})
                quality = scraper_utils.height_get_quality(meta['height'])
                hoster = {
                    'multi-part': False,
                    'host':
                    scraper_utils.get_direct_hostname(self, stream_url),
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'url': stream_url,
                    'direct': True
                }
                if 'format' in meta: hoster['format'] = meta['format']
                hosters.append(hoster)

        return hosters

Exemplo n.º 4

0

Exibir arquivo

 def __get_cloud_links(self, html, page_url, sub):
     hosters = []
     html = html.replace('\\"', '"').replace('\\/', '/')
     match = re.search("dizi_kapak_getir\('([^']+)", html)
     if match:
         ep_id = match.group(1)
         for attrs, _content in dom_parser2.parse_dom(html, 'script', {'data-cfasync': 'false'}, req='src'):
             script_url = attrs['src']
             html = self._http_get(script_url, cache_limit=24)
             match1 = re.search("var\s+kapak_url\s*=\s*'([^']+)", html)
             match2 = re.search("var\s+aCtkp\s*=\s*'([^']+)", html)
             if match1 and match2:
                 link_url = '%s?fileid=%s&access_token=%s' % (match1.group(1), ep_id, match2.group(1))
                 headers = {'Referer': page_url}
                 html = self._http_get(link_url, headers=headers, cache_limit=.5)
                 js_data = scraper_utils.parse_json(html, link_url)
                 for variant in js_data.get('variants', {}):
                     stream_host = random.choice(variant.get('hosts', []))
                     if stream_host:
                         stream_url = stream_host + variant['path'] + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': page_url})
                         if not stream_url.startswith('http'):
                             stream_url = 'http://' + stream_url
                         host = scraper_utils.get_direct_hostname(self, stream_url)
                         if 'width' in variant:
                             quality = scraper_utils.width_get_quality(variant['width'])
                         elif 'height' in variant:
                             quality = scraper_utils.height_get_quality(variant['height'])
                         else:
                             quality = QUALITIES.HIGH
                         hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                         hoster['subs'] = sub
                         hosters.append(hoster)
     return hosters

Exemplo n.º 5

0

Exibir arquivo

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)
        sources = self.__get_posts(html)
        sources.update(self.__get_ajax(html, url))
        sources.update(self.__get_embedded(html, url))
        for source in sources:
            stream_url = source + scraper_utils.append_headers(
                {'User-Agent': scraper_utils.get_ua()})
            host = scraper_utils.get_direct_hostname(self, source)
            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'quality': sources[source],
                'views': None,
                'rating': None,
                'url': stream_url,
                'direct': True,
                'subs': 'Turkish subtitles'
            }
            hosters.append(hoster)

        return hosters

Exemplo n.º 6

0

Exibir arquivo

    def __get_episode_sources(self, source_url, video):
        hosters = []
        links = self.__find_episode(source_url, video)
        if not links: return hosters
        hash_data = self.__get_hash_data([link[0] for link in links])
        for link in links:
            try:
                status = hash_data['hashes'][link[0]]['status']
            except KeyError:
                status = ''
            if status.lower() != 'finished': continue
            stream_url = 'hash_id=%s' % (link[0])
            host = scraper_utils.get_direct_hostname(self, stream_url)
            quality = scraper_utils.blog_get_quality(video, link[1], '')
            hoster = {
                'multi-part': False,
                'class': self,
                'views': None,
                'url': stream_url,
                'rating': None,
                'host': host,
                'quality': quality,
                'direct': True
            }
            hoster['extra'] = link[1]
            hosters.append(hoster)

        return hosters

Exemplo n.º 7

0

Exibir arquivo

Arquivo: dizibox_scraper.py Projeto: Lhse44/repository.deallen

 def __get_king_links(self, iframe_url):
     hosters = []
     match = re.search('v=(.*)', iframe_url)
     if match:
         data = {'ID': match.group(1)}
         headers = {'Referer': iframe_url}
         headers.update(XHR)
         xhr_url = iframe_url.split('?')[0]
         html = self._http_get(xhr_url, params={'p': 'GetVideoSources'}, data=data, headers=headers, cache_limit=.5)
         js_data = scraper_utils.parse_json(html, xhr_url)
         try:
             for source in js_data['VideoSources']:
                 stream_url = source['file'] + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
                 host = scraper_utils.get_direct_hostname(self, source['file'])
                 label = source.get('label', '')
                 if host == 'gvideo':
                     quality = scraper_utils.gv_get_quality(source['file'])
                 elif re.search('\d+p?', label):
                     quality = scraper_utils.height_get_quality(label)
                 else:
                     quality = QUALITY_MAP.get(label, QUALITIES.HIGH)
                 hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True, 'subs': 'Turkish Subtitles'}
                 hosters.append(hoster)
         except:
             pass
         
     return hosters

Exemplo n.º 8

0

Exibir arquivo

    def get_sources(self, video):
        hosters = []
        sources = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.25)
        match = re.search('var\s+view_id\s*=\s*"([^"]+)', html)
        if not match: return hosters
        view_id = match.group(1)
        
        for lang in ['or', 'tr']:
            subs = True if lang == 'tr' else False
            view_data = {'id': view_id, 'tip': 'view', 'dil': lang}
            html = self._http_get(self.ajax_url, data=view_data, headers=XHR, cache_limit=.25)
            html = html.strip()
            html = re.sub(r'\\n|\\t', '', html)
            match = re.search('var\s+sources\s*=\s*(\[.*?\])', html)
            if match:
                raw_data = match.group(1)
                raw_data = raw_data.replace('\\', '')
            else:
                raw_data = html
             
            js_data = scraper_utils.parse_json(raw_data, self.ajax_url)
            if 'data' not in js_data: continue
            
            src = dom_parser2.parse_dom(js_data['data'], 'iframe', req='src')
            if not src: continue
            
            html = self._http_get(src[0].attrs['src'], cache_limit=.25)
            for attrs, _content in dom_parser2.parse_dom(html, 'iframe', req='src'):
                src = attrs['src']
                if not src.startswith('http'): continue
                sources.append({'label': '720p', 'file': src, 'direct': False, 'subs': subs})
            
            sources += [{'file': url, 'subs': subs} for url in scraper_utils.parse_sources_list(self, html).iterkeys()]
            
            if sources: break

        for source in sources:
            direct = source.get('direct', True)
            stream_url = source['file'] + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
            if direct:
                host = scraper_utils.get_direct_hostname(self, stream_url)
                if host == 'gvideo':
                    quality = scraper_utils.gv_get_quality(stream_url)
                elif 'label' in source:
                    quality = scraper_utils.height_get_quality(source['label'])
                else:
                    continue
            else:
                host = urlparse.urlparse(stream_url).hostname
                quality = scraper_utils.height_get_quality(source['label'])
        
            hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct}
            if source.get('subs'): hoster['subs'] = 'Turkish Subtitles'
            hosters.append(hoster)
    
        return hosters

Exemplo n.º 9

0

Exibir arquivo

Arquivo: seehd_scraper.py Projeto: Lhse44/repository.deallen

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        sources = {}
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)
        for _attrs, div in dom_parser2.parse_dom(html, 'div',
                                                 {'class': 'tabcontent'}):
            for attrs, _content in dom_parser2.parse_dom(div,
                                                         'source',
                                                         req='src'):
                source = attrs['src'] + scraper_utils.append_headers(
                    {
                        'User-Agent': scraper_utils.get_ua(),
                        'Referer': page_url
                    })
                sources[source] = {'quality': None, 'direct': True}

            iframe_url = dom_parser2.parse_dom(div, 'iframe', req='src')
            if iframe_url:
                iframe_url = iframe_url[0].attrs['src']
                if 'songs2dl' in iframe_url:
                    headers = {'Referer': page_url}
                    iframe_html = self._http_get(iframe_url,
                                                 headers=headers,
                                                 cache_limit=1)
                    sources.update(
                        scraper_utils.parse_sources_list(self, iframe_html))
                else:
                    sources[iframe_url] = {'quality': None, 'direct': False}

        sources.update(self.__get_mirror_links(html, video))
        page_quality = self.__get_best_quality(sources)
        for source, values in sources.iteritems():
            direct = values['direct']
            if direct:
                host = scraper_utils.get_direct_hostname(self, source)
            else:
                host = urlparse.urlparse(source).hostname

            if values['quality'] is None:
                values['quality'] = page_quality

            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'views': None,
                'url': source,
                'rating': None,
                'quality': values['quality'],
                'direct': direct
            }
            hosters.append(hoster)

        return hosters

Exemplo n.º 10

0

Exibir arquivo

Arquivo: dizibox_scraper.py Projeto: Lhse44/repository.deallen

 def __get_embed_links(self, html):
     hosters = []
     sources = scraper_utils.parse_sources_list(self, html)
     for source in sources:
         quality = source['quality']
         stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
         hoster = {'multi-part': False, 'host': scraper_utils.get_direct_hostname(self, source), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True, 'subs': 'Turkish Subtitles'}
         hosters.append(hoster)
     return hosters

Exemplo n.º 11

0

Exibir arquivo

 def __get_embedded_links(self, html, sub):
     hosters = []
     html = html.replace('\\"', '"').replace('\\/', '/')
     sources = scraper_utils.parse_sources_list(self, html)
     for source in sources:
         host = scraper_utils.get_direct_hostname(self, source)
         quality = sources[source]['quality']
         direct = sources[source]['direct']
         hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': source, 'direct': direct}
         hoster['subs'] = sub
         hosters.append(hoster)
     return hosters

Exemplo n.º 12

0

Exibir arquivo

Arquivo: moviehubs_scraper.py Projeto: Lhse44/repository.deallen

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=8)
        hosts = [
            r.content for r in dom_parser2.parse_dom(
                html, 'p', {'class': 'server_servername'})
        ]
        links = [
            r.content
            for r in dom_parser2.parse_dom(html, 'p', {'class': 'server_play'})
        ]
        for host, link_frag in zip(hosts, links):
            stream_url = dom_parser2.parse_dom(link_frag, 'a', req='href')
            if not stream_url: continue

            stream_url = stream_url[0].attrs['href']
            host = re.sub('^Server\s*', '', host, re.I)
            host = re.sub('\s*Link\s+\d+', '', host)
            if host.lower() == 'google':
                sources = self.__get_gvideo_links(stream_url)
            else:
                sources = [{'host': host, 'link': stream_url}]

            for source in sources:
                host = scraper_utils.get_direct_hostname(self, stream_url)
                if host == 'gvideo':
                    quality = scraper_utils.gv_get_quality(stream_url)
                    stream_url = source['link'] + scraper_utils.append_headers(
                        {'User-Agent': scraper_utils.get_ua()})
                    direct = True
                else:
                    stream_url = scraper_utils.pathify_url(source['link'])
                    host = HOST_SUB.get(source['host'].lower(), source['host'])
                    quality = scraper_utils.get_quality(
                        video, host, QUALITIES.HIGH)
                    direct = False

                hoster = {
                    'multi-part': False,
                    'url': stream_url,
                    'host': host,
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'direct': direct
                }
                hosters.append(hoster)

        return hosters

Exemplo n.º 13

0

Exibir arquivo

Arquivo: quikr_scraper.py Projeto: Lhse44/repository.deallen

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)
        page_quality = QUALITIES.HD720 if video.video_type == VIDEO_TYPES.MOVIE else QUALITIES.HIGH
        for _attrs, fragment in dom_parser2.parse_dom(
                html, 'div', {'class': 'embed-responsive'}):
            iframe_url = dom_parser2.parse_dom(fragment,
                                               'iframe',
                                               req='data-src')
            if iframe_url:
                iframe_url = iframe_url[0].attrs['data-src']
                iframe_host = urlparse.urlparse(iframe_url).hostname
                if iframe_host in DIRECT_HOSTS:
                    sources = self.__parse_streams(iframe_url, url)
                else:
                    sources = {
                        iframe_url: {
                            'quality':
                            scraper_utils.get_quality(video, iframe_host,
                                                      page_quality),
                            'direct':
                            False
                        }
                    }

            for source in sources:
                quality = sources[source]['quality']
                direct = sources[source]['direct']
                if direct:
                    host = scraper_utils.get_direct_hostname(self, source)
                    stream_url = source + scraper_utils.append_headers(
                        {'User-Agent': scraper_utils.get_ua()})
                else:
                    host = urlparse.urlparse(source).hostname
                    stream_url = source

                hoster = {
                    'multi-part': False,
                    'url': stream_url,
                    'host': host,
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'direct': direct
                }
                hosters.append(hoster)

        return hosters

Exemplo n.º 14

0

Exibir arquivo

Arquivo: vivoto_scraper.py Projeto: Lhse44/repository.deallen

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)
        page_quality = dom_parser2.parse_dom(html, 'dd', {'class': 'status'})
        if page_quality:
            page_quality = QUALITY_MAP.get(page_quality[0].content,
                                           QUALITIES.HIGH)
        else:
            page_quality = QUALITIES.HIGH

        if video.video_type == VIDEO_TYPES.EPISODE:
            fragment = dom_parser2.parse_dom(html, 'div',
                                             {'id': 'servers-list'})
            gk_html = fragment[0].content if fragment else ''
        else:
            gk_html = html

        link_url = scraper_utils.urljoin(self.base_url, LINK_URL)
        player_url = scraper_utils.urljoin(self.base_url, PLAYER_URL)
        for stream_url, quality in scraper_utils.get_gk_links(
                self, gk_html, page_url, page_quality, link_url,
                player_url).iteritems():
            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                direct = True
                quality = quality
            else:
                host = urlparse.urlparse(stream_url).hostname
                quality = scraper_utils.get_quality(video, host, quality)
                direct = False

            if host is not None:
                stream_url += scraper_utils.append_headers(
                    {'User-Agent': scraper_utils.get_ua()})
                hoster = {
                    'multi-part': False,
                    'host': host,
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'url': stream_url,
                    'direct': direct
                }
                hosters.append(hoster)

        return hosters

Exemplo n.º 15

0

Exibir arquivo

Arquivo: premiumizev2_scraper.py Projeto: Lhse44/repository.deallen

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        for stream in self.__get_videos(source_url, video):
            if video.video_type == VIDEO_TYPES.EPISODE and not scraper_utils.release_check(video, stream['name']):
                continue

            host = scraper_utils.get_direct_hostname(self, stream['url'])
            hoster = {'multi-part': False, 'class': self, 'views': None, 'url': stream['url'], 'rating': None, 'host': host, 'quality': stream['quality'], 'direct': True}
            if 'size' in stream: hoster['size'] = scraper_utils.format_size(stream['size'])
            if 'name' in stream: hoster['extra'] = stream['name']
            hosters.append(hoster)
                         
        return hosters

Exemplo n.º 16

0

Exibir arquivo

    def __get_movie_sources(self, source_url):
        hosters = []
        query = kodi.parse_query(urlparse.urlparse(source_url).query)
        movie_id = query.get('movie_id') or self.__get_movie_id(source_url)
        if not movie_id: return hosters

        details_url = scraper_utils.urljoin(self.movie_base_url,
                                            MOVIE_DETAILS_URL)
        detail_data = self._json_get(details_url,
                                     params={'movie_id': movie_id},
                                     cache_limit=24)
        try:
            torrents = detail_data['data']['movie']['torrents']
        except KeyError:
            torrents = []
        try:
            hashes = [torrent['hash'].lower() for torrent in torrents]
        except KeyError:
            hashes = []
        hash_data = self.__get_hash_data(hashes)
        for torrent in torrents:
            hash_id = torrent['hash'].lower()
            try:
                status = hash_data['hashes'][hash_id]['status']
            except KeyError:
                status = ''
            if status.lower() != 'finished': continue
            stream_url = 'hash_id=%s' % (hash_id)
            host = scraper_utils.get_direct_hostname(self, stream_url)
            quality = QUALITY_MAP.get(torrent['quality'], QUALITIES.HD720)
            hoster = {
                'multi-part': False,
                'class': self,
                'views': None,
                'url': stream_url,
                'rating': None,
                'host': host,
                'quality': quality,
                'direct': True
            }
            if 'size_bytes' in torrent:
                hoster['size'] = scraper_utils.format_size(
                    torrent['size_bytes'], 'B')
            if torrent['quality'] == '3D': hoster['3D'] = True
            hosters.append(hoster)
        return hosters

Exemplo n.º 17

0

Exibir arquivo

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters

        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)
        if video.video_type == VIDEO_TYPES.EPISODE:
            gk_html = ''.join(
                match.group(0) for match in re.finditer(
                    '<a[^>]*>(%s|Server \d+)</a>' %
                    (video.episode), html, re.I))
        else:
            gk_html = html
        link_url = scraper_utils.urljoin(self.base_url, LINK_URL)
        player_url = scraper_utils.urljoin(self.base_url, PLAYER_URL)
        sources = scraper_utils.get_gk_links(self, gk_html, page_url,
                                             QUALITIES.HIGH, link_url,
                                             player_url)
        sources.update(self.__get_ht_links(html, page_url))

        for stream_url, quality in sources.iteritems():
            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                direct = True
            else:
                host = urlparse.urlparse(stream_url).hostname
                direct = False

            if host is None: continue
            stream_url += scraper_utils.append_headers(
                {'User-Agent': scraper_utils.get_ua()})
            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'quality': quality,
                'views': None,
                'rating': None,
                'url': stream_url,
                'direct': direct
            }
            hosters.append(hoster)

        return hosters

Exemplo n.º 18

0

Exibir arquivo

Arquivo: xmovies8_scraper.py Projeto: Lhse44/repository.deallen

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=0)
        match = re.search('var\s*video_id\s*=\s*"([^"]+)', html)
        if not match: return hosters

        video_id = match.group(1)
        headers = {'Referer': page_url}
        headers.update(XHR)
        # _html = self._http_get(scraper_utils.urljoin(self.base_url, 'av'), headers=headers, method='POST', cache_limit=0)

        vid_url = scraper_utils.urljoin(self.base_url, VIDEO_URL)
        html = self._http_get(vid_url,
                              data={'v': video_id},
                              headers=headers,
                              cache_limit=0)
        for source, value in scraper_utils.parse_json(html,
                                                      vid_url).iteritems():
            match = re.search('url=(.*)', value)
            if not match: continue
            stream_url = urllib.unquote(match.group(1))

            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(stream_url)
            else:
                quality = scraper_utils.height_get_quality(source)
            stream_url += scraper_utils.append_headers(
                {'User-Agent': scraper_utils.get_ua()})
            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'quality': quality,
                'views': None,
                'rating': None,
                'url': stream_url,
                'direct': True
            }
            hosters.append(hoster)
        return hosters

Exemplo n.º 19

0

Exibir arquivo

Arquivo: ol_scraper.py Projeto: Lhse44/repository.deallen

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=8)
        fragment = dom_parser2.parse_dom(html, 'div', {'class': 'playex'})
        if fragment: html = fragment[0].content
        links = scraper_utils.parse_sources_list(self, html)
        for link in links:
            stream_url = link
            if self.base_url in link:
                redir_url = self._http_get(link,
                                           headers={'Referer': url},
                                           allow_redirect=False,
                                           method='HEAD')
                if redir_url.startswith('http'):
                    stream_url = redir_url

            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(stream_url)
            else:
                quality = links[link]['quality']
                stream_url += scraper_utils.append_headers({
                    'User-Agent':
                    scraper_utils.get_ua(),
                    'Referer':
                    url
                })

            source = {
                'multi-part': False,
                'url': stream_url,
                'host': host,
                'class': self,
                'quality': quality,
                'views': None,
                'rating': None,
                'direct': True
            }
            hosters.append(source)

        return hosters

Exemplo n.º 20

0

Exibir arquivo

    def __get_ajax(self, html, page_url):
        sources = {}
        pattern = '\$\.ajax\(\s*"([^"]+)'
        match = re.search(pattern, html)
        if not match: return sources

        post_url = match.group(1)
        headers = {'Referer': page_url}
        html = self._http_get(post_url, headers=headers, cache_limit=.5)
        js_result = scraper_utils.parse_json(html, post_url)
        for key in js_result:
            stream_url = js_result[key]
            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(stream_url)
            else:
                quality = scraper_utils.height_get_quality(key)
            sources[stream_url] = quality
        return sources

Exemplo n.º 21

0

Exibir arquivo

    def __get_posts(self, html):
        sources = {}
        pattern = '\$\.post\("([^"]+)"\s*,\s*\{(.*?)\}'
        match = re.search(pattern, html)
        if not match: return sources

        post_url, post_data = match.groups()
        data = self.__get_data(post_data)
        html = self._http_get(post_url, data=data, cache_limit=.5)
        js_result = scraper_utils.parse_json(html, post_url)
        for key in js_result:
            stream_url = js_result[key]
            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(stream_url)
            else:
                quality = scraper_utils.height_get_quality(key)
            sources[stream_url] = quality
        return sources

Exemplo n.º 22

0

Exibir arquivo

 def __get_json_links(self, html, sub):
     hosters = []
     js_data = scraper_utils.parse_json(html)
     if 'sources' in js_data:
         for source in js_data.get('sources', []):
             stream_url = source.get('file')
             if stream_url is None: continue
             
             host = scraper_utils.get_direct_hostname(self, stream_url)
             if host == 'gvideo':
                 quality = scraper_utils.gv_get_quality(stream_url)
             elif 'label' in source:
                 quality = scraper_utils.height_get_quality(source['label'])
             else:
                 quality = QUALITIES.HIGH
             hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
             hoster['subs'] = sub
             hosters.append(hoster)
     return hosters

Exemplo n.º 23

0

Exibir arquivo

Arquivo: downloadtube_scraper.py Projeto: Lhse44/repository.deallen

 def __get_direct(self, html, page_url):
     sources = []
     best_quality = QUALITIES.HIGH
     match = re.search('''['"]?sources["']?\s*:\s*\[(.*?)\}\s*,?\s*\]''', html, re.DOTALL)
     if match:
         files = re.findall('''['"]?file['"]?\s*:\s*(.*?)['"]([^'"]+)''', match.group(1), re.DOTALL)
         labels = re.findall('''['"]?label['"]?\s*:\s*['"]([^'"]*)''', match.group(1), re.DOTALL)
         for stream, label in map(None, files, labels):
             func, stream_url = stream
             if 'atob' in func:
                 stream_url = base64.b64decode(stream_url)
             stream_url = stream_url.replace(' ', '%20')
             host = scraper_utils.get_direct_hostname(self, stream_url)
             label = re.sub(re.compile('\s*HD', re.I), '', label)
             quality = scraper_utils.height_get_quality(label)
             if Q_ORDER[quality] > Q_ORDER[best_quality]: best_quality = quality
             stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': page_url})
             source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True}
             sources.append(source)
     return best_quality, sources

Exemplo n.º 24

0

Exibir arquivo

    def __get_linked(self, html):
        sources = {}
        match = re.search('dizi=([^"]+)', html)
        if not match: return sources
        html = self._http_get(AJAX_URL,
                              params={'dizi': match.group(1)},
                              headers=XHR,
                              cache_limit=.5)
        js_result = scraper_utils.parse_json(html, AJAX_URL)
        for source in js_result.get('success', []):
            stream_url = source.get('src')
            if stream_url is None: continue

            if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo':
                quality = scraper_utils.gv_get_quality(stream_url)
            elif 'label' in source:
                quality = scraper_utils.height_get_quality(source['label'])
            else:
                quality = QUALITIES.HIGH
            sources[stream_url] = quality
        return sources

Exemplo n.º 25

0

Exibir arquivo

 def __get_ht_links(self, html, page_url):
     sources = {}
     match = re.search('Htplugins_Make_Player\("([^"]+)', html)
     if match:
         data = {'data': match.group(1)}
         url = scraper_utils.urljoin(self.base_url, LINK_URL2)
         headers = {'Referer': page_url}
         html = self._http_get(url,
                               data=data,
                               headers=headers,
                               cache_limit=.25)
         js_data = scraper_utils.parse_json(html, url)
         if 'l' in js_data:
             for link in js_data['l']:
                 if scraper_utils.get_direct_hostname(self,
                                                      link) == 'gvideo':
                     quality = scraper_utils.gv_get_quality(link)
                 else:
                     quality = QUALITIES.HIGH
                 sources[link] = quality
     return sources

Exemplo n.º 26

0

Exibir arquivo

Arquivo: sit2play_scraper.py Projeto: Lhse44/repository.deallen

    def get_sources(self, video):
        sources = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return sources
        object_id = self.__extract_id(source_url)
        if object_id is None: return sources
        source_url = TITLE_URL.format(id=object_id)
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._authed_http_get(page_url, cache_limit=.5)
        js_data = scraper_utils.parse_json(html, page_url)
        if video.video_type == VIDEO_TYPES.MOVIE:
            links = js_data.get('links', {})
        else:
            links = self.__episode_match(js_data, video)

        prefix = js_data.get('domain', {}).get('prefix')
        suffix = js_data.get('domain', {}).get('suffix')
        for key, path in links.get('links', {}).iteritems():
            for mirror in sorted(list(set(links.get('mirrors', [])))):
                stream_url = TEMPLATE.format(prefix=prefix,
                                             mirror=mirror,
                                             suffix=suffix,
                                             path=path)
                host = scraper_utils.get_direct_hostname(self, stream_url)
                quality = Q_MAP.get(key, QUALITIES.HIGH)
                source = {
                    'multi-part': False,
                    'url': stream_url,
                    'host': host,
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'direct': True
                }
                source['version'] = '(Mirror %d)' % (mirror)
                sources.append(source)

        return sources

Exemplo n.º 27

0

Exibir arquivo

    def __get_links_from_xml(self, url, video, page_url, cookies):
        sources = {}
        try:
            headers = {'Referer': page_url}
            xml = self._http_get(url,
                                 cookies=cookies,
                                 headers=headers,
                                 cache_limit=.5)
            root = ET.fromstring(xml)
            for item in root.findall('.//item'):
                title = item.find('title').text
                if title and title.upper() == 'OOPS!': continue
                for source in item.findall(
                        '{https://yesmovies.to/ajax/movie_sources/'):
                    stream_url = source.get('file')
                    label = source.get('label')
                    if scraper_utils.get_direct_hostname(
                            self, stream_url) == 'gvideo':
                        quality = scraper_utils.gv_get_quality(stream_url)
                    elif label:
                        quality = scraper_utils.height_get_quality(label)
                    elif title:
                        quality = scraper_utils.blog_get_quality(
                            video, title, '')
                    else:
                        quality = scraper_utils.blog_get_quality(
                            video, stream_url, '')
                    sources[stream_url] = {'quality': quality, 'direct': True}
                    logger.log(
                        'Adding stream: %s Quality: %s' %
                        (stream_url, quality), log_utils.LOGDEBUG)
        except Exception as e:
            logger.log('Exception during YesMovies XML Parse: %s' % (e),
                       log_utils.LOGWARNING)

        return sources

Exemplo n.º 28

0

Exibir arquivo

Arquivo: dayt_scraper.py Projeto: Lhse44/repository.deallen

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     sources = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     page_url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(page_url, cache_limit=1)
     iframes = dom_parser2.parse_dom(html, 'iframe', req='src')
     for attrs, _content in iframes:
         iframe_url = attrs['src']
         if 'docs.google.com' in iframe_url:
             sources = scraper_utils.parse_google(self, iframe_url)
             break
         else:
             iframe_url = scraper_utils.urljoin(self.base_url, iframe_url)
             html = self._http_get(iframe_url, cache_limit=1)
             iframes += dom_parser2.parse_dom(html, 'iframe', req='src')
     
     for source in sources:
         host = scraper_utils.get_direct_hostname(self, source)
         hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': scraper_utils.gv_get_quality(source), 'views': None, 'rating': None, 'url': source, 'direct': True}
         hosters.append(hoster)
 
     return hosters

Exemplo n.º 29

0

Exibir arquivo

Arquivo: furk_scraper.py Projeto: Lhse44/repository.deallen

    def __get_links(self, url, video):
        hosters = []
        search_url = scraper_utils.urljoin(self.base_url, SEARCH_URL)
        query = self.__translate_search(url)
        result = self._http_get(search_url,
                                data=query,
                                allow_redirect=False,
                                cache_limit=.5)
        for item in result.get('files', []):
            checks = [False] * 6
            if item.get('type', '').upper() != 'VIDEO': checks[0] = True
            if item.get('is_ready') != '1': checks[1] = True
            if item.get('av_result') in ['warning', 'infected']:
                checks[2] = True
            if 'video_info' not in item: checks[3] = True
            if item.get('video_info') and not re.search(
                    '#0:(0|1)(\((eng|und)\))?:\s*Audio:', item['video_info'],
                    re.I):
                checks[4] = True
            if not scraper_utils.release_check(video, item['name']):
                checks[5] = True
            if any(checks):
                logger.log(
                    'Furk.net result excluded: %s - |%s|' %
                    (checks, item['name']), log_utils.LOGDEBUG)
                continue

            match = re.search('(\d{3,})\s*x\s*(\d{3,})', item['video_info'])
            if match:
                width, _height = match.groups()
                quality = scraper_utils.width_get_quality(width)
            else:
                if video.video_type == VIDEO_TYPES.MOVIE:
                    meta = scraper_utils.parse_movie_link(item['name'])
                else:
                    meta = scraper_utils.parse_episode_link(item['name'])
                quality = scraper_utils.height_get_quality(meta['height'])

            if 'url_pls' in item:
                size_gb = scraper_utils.format_size(int(item['size']), 'B')
                if self.max_bytes and int(item['size']) > self.max_bytes:
                    logger.log(
                        'Result skipped, Too big: |%s| - %s (%s) > %s (%sGB)' %
                        (item['name'], item['size'], size_gb, self.max_bytes,
                         self.max_gb))
                    continue

                stream_url = item['url_pls']
                host = scraper_utils.get_direct_hostname(self, stream_url)
                hoster = {
                    'multi-part': False,
                    'class': self,
                    'views': None,
                    'url': stream_url,
                    'rating': None,
                    'host': host,
                    'quality': quality,
                    'direct': True
                }
                hoster['size'] = size_gb
                hoster['extra'] = item['name']
                hosters.append(hoster)
            else:
                logger.log(
                    'Furk.net result skipped - no playlist: |%s|' %
                    (json.dumps(item)), log_utils.LOGDEBUG)

        return hosters

Exemplo n.º 30

0

Exibir arquivo

    def get_sources(self, video):
        hosters = []
        sources = {}
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        movie_id, watching_url, html = self.__get_source_page(
            video.video_type, page_url)

        links = []
        for match in dom_parser2.parse_dom(html,
                                           'li', {'class': 'ep-item'},
                                           req=['data-id', 'data-server']):
            label = dom_parser2.parse_dom(match.content, 'a', req='title')
            if not label: continue
            if video.video_type == VIDEO_TYPES.EPISODE and not self.__episode_match(
                    video, label[0].attrs['title']):
                continue
            links.append((match.attrs['data-server'], match.attrs['data-id']))

        for link_type, link_id in links:
            if link_type in ['12', '13', '14', '15']:
                url = scraper_utils.urljoin(
                    self.base_url, PLAYLIST_URL1.format(ep_id=link_id))
                sources.update(self.__get_link_from_json(url))
            elif kodi.get_setting('scraper_url'):
                url = scraper_utils.urljoin(
                    self.base_url, PLAYLIST_URL2.format(ep_id=link_id))
                params = self.__get_params(movie_id, link_id, watching_url)
                if params is not None:
                    url += '?' + urllib.urlencode(params)
                sources.update(
                    self.__get_links_from_json2(url, page_url,
                                                video.video_type))

        for source in sources:
            if not source.lower().startswith('http'): continue
            if sources[source]['direct']:
                host = scraper_utils.get_direct_hostname(self, source)
                if host != 'gvideo':
                    stream_url = source + scraper_utils.append_headers(
                        {
                            'User-Agent': scraper_utils.get_ua(),
                            'Referer': page_url
                        })
                else:
                    stream_url = source
            else:
                host = urlparse.urlparse(source).hostname
                stream_url = source
            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'quality': sources[source]['quality'],
                'views': None,
                'rating': None,
                'url': stream_url,
                'direct': sources[source]['direct']
            }
            hosters.append(hoster)

        return hosters