Python parse_google примеры, salts_lib.scraper_utils.parse_google Python примеры использования

Пример #1

0

Показать файл

Файл: piratejunkies_scraper.py Проект: CYBERxNUKE/xbmc-addon

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     js_url = scraper_utils.urljoin(self.base_url, '/javascript/movies.js')
     html = self._http_get(js_url, cache_limit=48)
     if source_url.startswith('/'):
         source_url = source_url[1:]
     pattern = '''getElementById\(\s*"%s".*?play\(\s*'([^']+)''' % (source_url)
     match = re.search(pattern, html, re.I)
     if match:
         stream_url = match.group(1)
         if 'drive.google' in stream_url or 'docs.google' in stream_url:
             sources = scraper_utils.parse_google(self, stream_url)
         else:
             sources = [stream_url]
         
         for source in sources:
             stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
             host = scraper_utils.get_direct_hostname(self, source)
             if host == 'gvideo':
                 quality = scraper_utils.gv_get_quality(source)
                 direct = True
             elif 'youtube' in stream_url:
                 quality = QUALITIES.HD720
                 direct = False
                 host = 'youtube.com'
             else:
                 quality = QUALITIES.HIGH
                 direct = True
             hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct}
             hosters.append(hoster)
     return hosters

Пример #2

0

Показать файл

Файл: dayt_scraper.py Проект: enursha101/xbmc-addon

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        sources = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=1)
        iframes = dom_parser2.parse_dom(html, 'iframe', req='src')
        for attrs, _content in iframes:
            iframe_url = attrs['src']
            if 'docs.google.com' in iframe_url:
                sources = scraper_utils.parse_google(self, iframe_url)
                break
            else:
                iframe_url = scraper_utils.urljoin(self.base_url, iframe_url)
                html = self._http_get(iframe_url, cache_limit=1)
                iframes += dom_parser2.parse_dom(html, 'iframe', req='src')

        for source in sources:
            host = scraper_utils.get_direct_hostname(self, source)
            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'quality': scraper_utils.gv_get_quality(source),
                'views': None,
                'rating': None,
                'url': source,
                'direct': True
            }
            hosters.append(hoster)

        return hosters

Пример #3

0

Показать файл

Файл: pubfilm_scraper.py Проект: CYBERxNUKE/xbmc-addon

 def __get_gk_links(self, link, iframe_url):
     sources = {}
     data = {'link': link}
     headers = XHR
     headers.update({'Referer': iframe_url, 'User-Agent': USER_AGENT})
     html = self._http_get(GK_URL, data=data, headers=headers, cache_limit=.25)
     js_data = scraper_utils.parse_json(html, GK_URL)
     if 'link' in js_data:
         if isinstance(js_data['link'], basestring):
             stream_url = js_data['link']
             if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo':
                 for source in scraper_utils.parse_google(self, stream_url):
                     sources[source] = {'quality': scraper_utils.gv_get_quality(source), 'direct': True}
             else:
                 sources[stream_url] = {'quality': QUALITIES.HIGH, 'direct': False}
         else:
             for link in js_data['link']:
                 stream_url = link['link']
                 if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo':
                     quality = scraper_utils.gv_get_quality(stream_url)
                 elif 'label' in link:
                     quality = scraper_utils.height_get_quality(link['label'])
                 else:
                     quality = QUALITIES.HIGH
                 sources[stream_url] = {'quality': quality, 'direct': True}
     return sources

Пример #4

0

Показать файл

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=8)
        fragment = dom_parser2.parse_dom(html, 'div', {'class': 'playex'})
        if fragment: html = fragment[0].content
        iframe_url = dom_parser2.parse_dom(html, 'iframe', req='src')
        if not iframe_url: return hosters
        iframe_url = iframe_url[0].attrs['src']
        if iframe_url.startswith('/'):
            iframe_url = scraper_utils.urljoin(self.base_url, iframe_url)
        html = self._http_get(iframe_url, headers={'Referer': page_url}, cache_limit=.5)
        obj = dom_parser2.parse_dom(html, 'object', req='data')
        if obj:
            streams = dict((stream_url, {'quality': scraper_utils.gv_get_quality(stream_url), 'direct': True}) for stream_url in
                           scraper_utils.parse_google(self, obj[0].attrs['data']))
        else:
            streams = scraper_utils.parse_sources_list(self, html)
            
        for stream_url, values in streams.iteritems():
            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(stream_url)
            else:
                quality = values['quality']
                stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': page_url})
                 
            source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True}
            hosters.append(source)

        return hosters

Пример #5

0

Показать файл

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, require_debrid=True, cache_limit=.5)
        fragment = dom_parser2.parse_dom(html, 'table',
                                         {'class': 'links-table'})
        if not fragment: return hosters
        for _attrs, row in dom_parser2.parse_dom(fragment[0].content, 'tr'):
            match = re.search(
                "playVideo\.bind\(.*?'([^']+)(?:[^>]*>){2}(.*?)</td>", row,
                re.DOTALL)
            if not match: continue

            stream_url, release = match.groups()
            if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo':
                sources = scraper_utils.parse_google(self, stream_url)
            else:
                sources = [stream_url]

            for source in sources:
                host = scraper_utils.get_direct_hostname(self, source)
                if host == 'gvideo':
                    quality = scraper_utils.gv_get_quality(source)
                    direct = True
                else:
                    host = urlparse.urlparse(source).hostname
                    if video.video_type == VIDEO_TYPES.MOVIE:
                        meta = scraper_utils.parse_movie_link(release)
                    else:
                        meta = scraper_utils.parse_episode_link(release)
                    base_quality = scraper_utils.height_get_quality(
                        meta['height'])
                    quality = scraper_utils.get_quality(
                        video, host, base_quality)
                    direct = False
                hoster = {
                    'multi-part': False,
                    'host': host,
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'url': source,
                    'direct': direct
                }
                hosters.append(hoster)

        return hosters

Пример #6

0

Показать файл

    def get_sources(self, video):
        source_url = self.get_url(video)
        sources = {}
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=0)
        for match in re.finditer("embeds\[(\d+)\]\s*=\s*'([^']+)", html):
            match = re.search('src="([^"]+)', match.group(2))
            if match:
                iframe_url = match.group(1)
                if 'play-en.php' in iframe_url:
                    match = re.search('id=([^"&]+)', iframe_url)
                    if match:
                        proxy_link = match.group(1)
                        proxy_link = proxy_link.split('*', 1)[-1]
                        picasa_url = scraper_utils.gk_decrypt(self.get_name(), GK_KEY, proxy_link)
                        for stream_url in scraper_utils.parse_google(self, picasa_url):
                            sources[stream_url] = {'quality': scraper_utils.gv_get_quality(stream_url), 'direct': True}
                else:
                    html = self._http_get(iframe_url, cache_limit=0)
                    temp_sources = scraper_utils.parse_sources_list(self, html)
                    for source in temp_sources:
                        if 'download.php' in source:
                            redir_html = self._http_get(source, allow_redirect=False, method='HEAD', cache_limit=0)
                            if redir_html.startswith('http'):
                                temp_sources[redir_html] = temp_sources[source]
                                del temp_sources[source]
                    sources.update(temp_sources)
                    for source in dom_parser2.parse_dom(html, 'source', {'type': 'video/mp4'}, req='src'):
                        sources[source.attrs['src']] = {'quality': QUALITIES.HD720, 'direct': True, 'referer': iframe_url}
                                
        for source, values in sources.iteritems():
            host = scraper_utils.get_direct_hostname(self, source)
            headers = {'User-Agent': scraper_utils.get_ua()}
            if 'referer' in values: headers['Referer'] = values['referer']
            stream_url = source + scraper_utils.append_headers(headers)
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(source)
            else:
                quality = values['quality']
                if quality not in Q_ORDER:
                    quality = QUALITY_MAP.get(values['quality'], QUALITIES.HIGH)
                    
            hoster = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True}
            hosters.append(hoster)

        return hosters

Пример #7

0

Показать файл

Файл: piratejunkies_scraper.py Проект: enursha101/xbmc-addon

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        js_url = scraper_utils.urljoin(self.base_url, '/javascript/movies.js')
        html = self._http_get(js_url, cache_limit=48)
        if source_url.startswith('/'):
            source_url = source_url[1:]
        pattern = '''getElementById\(\s*"%s".*?play\(\s*'([^']+)''' % (
            source_url)
        match = re.search(pattern, html, re.I)
        if match:
            stream_url = match.group(1)
            if 'drive.google' in stream_url or 'docs.google' in stream_url:
                sources = scraper_utils.parse_google(self, stream_url)
            else:
                sources = [stream_url]

            for source in sources:
                stream_url = source + scraper_utils.append_headers(
                    {'User-Agent': scraper_utils.get_ua()})
                host = scraper_utils.get_direct_hostname(self, source)
                if host == 'gvideo':
                    quality = scraper_utils.gv_get_quality(source)
                    direct = True
                elif 'youtube' in stream_url:
                    quality = QUALITIES.HD720
                    direct = False
                    host = 'youtube.com'
                else:
                    quality = QUALITIES.HIGH
                    direct = True
                hoster = {
                    'multi-part': False,
                    'host': host,
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'url': stream_url,
                    'direct': direct
                }
                hosters.append(hoster)
        return hosters

Пример #8

0

Показать файл

Файл: pubfilm_scraper.py Проект: enursha101/xbmc-addon

 def __get_gk_links(self, link, iframe_url):
     sources = {}
     data = {'link': link}
     headers = XHR
     headers.update({'Referer': iframe_url, 'User-Agent': USER_AGENT})
     html = self._http_get(GK_URL,
                           data=data,
                           headers=headers,
                           cache_limit=.25)
     js_data = scraper_utils.parse_json(html, GK_URL)
     if 'link' in js_data:
         if isinstance(js_data['link'], basestring):
             stream_url = js_data['link']
             if scraper_utils.get_direct_hostname(self,
                                                  stream_url) == 'gvideo':
                 for source in scraper_utils.parse_google(self, stream_url):
                     sources[source] = {
                         'quality': scraper_utils.gv_get_quality(source),
                         'direct': True
                     }
             else:
                 sources[stream_url] = {
                     'quality': QUALITIES.HIGH,
                     'direct': False
                 }
         else:
             for link in js_data['link']:
                 stream_url = link['link']
                 if scraper_utils.get_direct_hostname(
                         self, stream_url) == 'gvideo':
                     quality = scraper_utils.gv_get_quality(stream_url)
                 elif 'label' in link:
                     quality = scraper_utils.height_get_quality(
                         link['label'])
                 else:
                     quality = QUALITIES.HIGH
                 sources[stream_url] = {'quality': quality, 'direct': True}
     return sources

Пример #9

0

Показать файл

Файл: dayt_scraper.py Проект: CYBERxNUKE/xbmc-addon

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     sources = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     page_url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(page_url, cache_limit=1)
     iframes = dom_parser2.parse_dom(html, 'iframe', req='src')
     for attrs, _content in iframes:
         iframe_url = attrs['src']
         if 'docs.google.com' in iframe_url:
             sources = scraper_utils.parse_google(self, iframe_url)
             break
         else:
             iframe_url = scraper_utils.urljoin(self.base_url, iframe_url)
             html = self._http_get(iframe_url, cache_limit=1)
             iframes += dom_parser2.parse_dom(html, 'iframe', req='src')
     
     for source in sources:
         host = scraper_utils.get_direct_hostname(self, source)
         hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': scraper_utils.gv_get_quality(source), 'views': None, 'rating': None, 'url': source, 'direct': True}
         hosters.append(hoster)
 
     return hosters

Python parse_google примеры использования