def refresh(self, url, channel_url, max_episodes): """ Fetch a channel or playlist contents. Doesn't yet fetch video entry informations, so we only get the video id and title. """ # Duplicate a bit of the YoutubeDL machinery here because we only # want to parse the channel/playlist first, not to fetch video entries. # We call YoutubeDL.extract_info(process=False), so we # have to call extract_info again ourselves when we get a result of type 'url'. def extract_type(ie_result): result_type = ie_result.get('_type', 'video') if result_type not in ('url', 'playlist', 'multi_video'): raise Exception( 'Unsuported result_type: {}'.format(result_type)) has_playlist = result_type in ('playlist', 'multi_video') return result_type, has_playlist opts = { 'youtube_include_dash_manifest': False, # only interested in video title and id } opts.update(self._ydl_opts) with youtube_dl.YoutubeDL(opts) as ydl: ie_result = ydl.extract_info(url, download=False, process=False) result_type, has_playlist = extract_type(ie_result) while not has_playlist: if result_type in ('url', 'url_transparent'): ie_result['url'] = sanitize_url(ie_result['url']) if result_type == 'url': logger.debug("extract_info(%s) to get the video list", ie_result['url']) # We have to add extra_info to the results because it may be # contained in a playlist ie_result = ydl.extract_info( ie_result['url'], download=False, process=False, ie_key=ie_result.get('ie_key')) result_type, has_playlist = extract_type(ie_result) cover_url = youtube.get_cover( channel_url) # youtube-dl doesn't provide the cover url! description = youtube.get_channel_desc( channel_url) # youtube-dl doesn't provide the description! return feedcore.Result( feedcore.UPDATED_FEED, YoutubeFeed(url, cover_url, description, max_episodes, ie_result, self))
def extractChannelSubscription(self, url, user_name=None): entries = None ydl_opts = {} youtube_dl.utils.std_headers['User-Agent'] = self._user_agent with youtube_dl.YoutubeDL(ydl_opts) as ydl: ie = self._selectExtractor(ydl._ies, url) if ie is None: return None ie = ydl.get_info_extractor(ie.ie_key()) if not ie.working(): ydl.report_warning( 'The program functionality for this site has been marked as broken, ' 'and will probably not work.') try: ie_result = ie.extract(url) if ie_result is None: return None ydl.add_default_extra_info(ie_result, ie, url) # ydl.process_ie_result(ie_result) # here the url type changed, so we hava to select extractor again url = sanitize_url(ie_result['url']) ie = self._selectExtractor(ydl._ies, ie_result['url']) ie = ydl.get_info_extractor(ie.ie_key()) ie_result = ie.extract(url) ie_entries = ie_result['entries'] entries = list(itertools.islice(ie_entries, 0, None)) except GeoRestrictedError as e: msg = e.msg if e.countries: msg += '\nThis video is available in %s.' % ', '.join( map(ISO3166Utils.short2full, e.countries)) msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.' ydl.report_error(msg) except ExtractorError as e: # An error we somewhat expected ydl.report_error(compat_str(e), e.format_traceback()) except MaxDownloadsReached: raise except Exception as e: if ydl.params.get('ignoreerrors', False): ydl.report_error(error_to_compat_str(e), tb=encode_compat_str( traceback.format_exc())) return None else: raise return entries
def add(self, url): from youtube_dl import utils as u safe_url = u.sanitize_url(url) # great helper method, thanks OP self.queue.put(safe_url) print("QueueManager added url {} to queue".format(safe_url))
def test_sanitize_url_slash_expansion(self): self.assertEqual(sanitize_url('//google.com'), 'http://google.com')
def test_full_prefix_sanitize_url(self): self.assertEqual(sanitize_url('www.google.com'), 'http://www.google.com')
def test_no_colon_prefix_sanitize_url(self): self.assertEqual(sanitize_url('http//google.com'), 'http://google.com')
def test_sanitize_url_base_case(self): self.assertEqual(sanitize_url('http://google.com'), 'http://google.com')