def js_url(self): if self._js_url: return self._js_url if self.age_restricted: self._js_url = extract.js_url(self.embed_html) else: self._js_url = extract.js_url(self.watch_html) return self._js_url
def init(self): logger.info('init started') self.vid_info = {k: v for k, v in parse_qsl(self.vid_info)} if self.age_restricted: self.player_config_args = self.vid_info else: self.player_config_args = extract.get_ytplayer_config( self.watch_html, )['args'] # ---> ADD THIS PART <--- if 'title' not in self.player_config_args: # for more reliability when parsing, we may use a trained parser try: from bs4 import BeautifulSoup soup = BeautifulSoup(self.watch_html, 'lxml') title = soup.title.get_text().strip() except ModuleNotFoundError: # since this parsing is actually pretty simple, we may just # parse it using index() i_start = self.watch_html.lower().index('<title>') + len( '<title>') i_end = self.watch_html.lower().index('</title>') title = self.watch_html[i_start:i_end].strip() # remove the ' - youtube' part that is added to the browser tab's title index = title.lower().rfind(' - youtube') title = title[:index] if index > 0 else title self.player_config_args['title'] = title # / ---> ADD THIS PART <--- self.vid_descr = extract.get_vid_descr(self.watch_html) # https://github.com/nficano/pytube/issues/165 stream_maps = ['url_encoded_fmt_stream_map'] if 'adaptive_fmts' in self.player_config_args: stream_maps.append('adaptive_fmts') # unscramble the progressive and adaptive stream manifests. for fmt in stream_maps: if not self.age_restricted and fmt in self.vid_info: mixins.apply_descrambler(self.vid_info, fmt) mixins.apply_descrambler(self.player_config_args, fmt) try: mixins.apply_signature(self.player_config_args, fmt, self.js) except TypeError: self.js_url = extract.js_url( self.embed_html, self.age_restricted, ) self.js = request.get(self.js_url) mixins.apply_signature(self.player_config_args, fmt, self.js) # build instances of :class:`Stream <Stream>` self.initialize_stream_objects(fmt) # load the player_response object (contains subtitle information) apply_mixin(self.player_config_args, 'player_response', json.loads) self.initialize_caption_objects() logger.info('init finished successfully')
def test_js_url(cipher_signature): expected = ( r"https://youtube.com/s/player/([\w\d]+)/player_ias.vflset/en_US/base.js" ) result = extract.js_url(cipher_signature.watch_html) match = re.search(expected, result) assert match is not None
def prefetch(self) -> None: """Eagerly download all necessary data. Eagerly executes all necessary network requests so all other operations don't does need to make calls outside of the interpreter which blocks for long periods of time. :rtype: None """ self.watch_html = request.get(url=self.watch_url) if self.watch_html is None: raise VideoUnavailable(video_id=self.video_id) self.age_restricted = extract.is_age_restricted(self.watch_html) if not self.age_restricted and "This video is private" in self.watch_html: raise VideoUnavailable(video_id=self.video_id) if self.age_restricted: if not self.embed_html: self.embed_html = request.get(url=self.embed_url) self.vid_info_url = extract.video_info_url_age_restricted( self.video_id, self.watch_url ) else: self.vid_info_url = extract.video_info_url( video_id=self.video_id, watch_url=self.watch_url ) self.vid_info_raw = request.get(self.vid_info_url) if not self.age_restricted: self.js_url = extract.js_url(self.watch_html) self.js = request.get(self.js_url)
def prefetch(self) -> None: """Eagerly download all necessary data. Eagerly executes all necessary network requests so all other operations don't does need to make calls outside of the interpreter which blocks for long periods of time. :rtype: None """ self.watch_html = request.get(url=self.watch_url) if (self.watch_html is None or '<img class="icon meh" src="/yts/img' # noqa: W503 not in self.watch_html # noqa: W503 ): raise VideoUnavailable(video_id=self.video_id) self.embed_html = request.get(url=self.embed_url) self.age_restricted = extract.is_age_restricted(self.watch_html) self.vid_info_url = extract.video_info_url( video_id=self.video_id, watch_url=self.watch_url, embed_html=self.embed_html, age_restricted=self.age_restricted, ) self.vid_info_raw = request.get(self.vid_info_url) if not self.age_restricted: self.js_url = extract.js_url(self.watch_html, self.age_restricted) self.js = request.get(self.js_url)
def prefetch(self): """Eagerly download all necessary data. Eagerly executes all necessary network requests so all other operations don't does need to make calls outside of the interpreter which blocks for long periods of time. :rtype: None """ self.watch_html = request.get(url=self.watch_url) #with open("/tmp/watch_html",'w') as f: # Debug # f.write(self.watch_html) # 30.07.2020 siehe github.com/nficano/pytube/issues/499 + # github.com/nficano/pytube/issues/337: #if '<img class="icon meh" src="/yts/img' not in self.watch_html: # raise VideoUnavailable('This video is unavailable.') self.embed_html = request.get(url=self.embed_url) self.age_restricted = extract.is_age_restricted(self.watch_html) self.vid_info_url = extract.video_info_url( video_id=self.video_id, watch_url=self.watch_url, watch_html=self.watch_html, embed_html=self.embed_html, age_restricted=self.age_restricted, ) self.vid_info = request.get(self.vid_info_url) if not self.age_restricted: self.js_url = extract.js_url(self.watch_html, self.age_restricted) self.js = request.get(self.js_url)
def init(self): """Descramble the stream data and build Stream instances. The initialization process takes advantage of Python's "call-by-reference evaluation," which allows dictionary transforms to be applied in-place, instead of holding references to mutations at each interstitial step. :rtype: None """ logger.info('init started') self.vid_info = {k: v for k, v in parse_qsl(self.vid_info)} if self.age_restricted: self.player_config_args = self.vid_info else: self.player_config_args = extract.get_ytplayer_config( self.watch_html, )['args'] # Fix for KeyError: 'title' issue #434 if 'title' not in self.player_config_args: i_start = (self.watch_html.lower().index('<title>') + len('<title>')) i_end = self.watch_html.lower().index('</title>') title = self.watch_html[i_start:i_end].strip() index = title.lower().rfind(' - youtube') title = title[:index] if index > 0 else title self.player_config_args['title'] = title self.vid_descr = extract.get_vid_descr(self.watch_html) # https://github.com/nficano/pytube/issues/165 stream_maps = ['url_encoded_fmt_stream_map'] if 'adaptive_fmts' in self.player_config_args: stream_maps.append('adaptive_fmts') # unscramble the progressive and adaptive stream manifests. for fmt in stream_maps: if not self.age_restricted and fmt in self.vid_info: mixins.apply_descrambler(self.vid_info, fmt) mixins.apply_descrambler(self.player_config_args, fmt) try: mixins.apply_signature(self.player_config_args, fmt, self.js) except TypeError: self.js_url = extract.js_url( self.embed_html, self.age_restricted, ) self.js = request.get(self.js_url) mixins.apply_signature(self.player_config_args, fmt, self.js) # build instances of :class:`Stream <Stream>` self.initialize_stream_objects(fmt) # load the player_response object (contains subtitle information) apply_mixin(self.player_config_args, 'player_response', json.loads) self.initialize_caption_objects() logger.info('init finished successfully')
def prefetch(self): """Eagerly download all necessary data. Eagerly executes all necessary network requests so all other operations don't does need to make calls outside of the interpreter which blocks for long periods of time. :rtype: None """ self.watch_html = request.get(url=self.watch_url) if 'id="player-unavailable"' in self.watch_html: raise VideoUnavailable('This video is not available.') self.embed_html = request.get(url=self.embed_url) self.age_restricted = extract.is_age_restricted(self.watch_html) self.vid_info_url = extract.video_info_url( video_id=self.video_id, watch_url=self.watch_url, watch_html=self.watch_html, embed_html=self.embed_html, age_restricted=self.age_restricted, ) self.vid_info = request.get(self.vid_info_url) if not self.age_restricted: self.js_url = extract.js_url(self.watch_html, self.age_restricted) self.js = request.get(self.js_url)
def getJS(self) -> None: response = urlopen("https://youtube.com/watch", timeout=None) watch_html = response.read().decode('utf_8') age_restricted = extract.is_age_restricted(watch_html) if age_restricted: response = urlopen("https://www.youtube.com/embed", timeout=None) embed_html = response.read().decode('utf_8') self.js_url = extract.js_url(embed_html) else: self.js_url = extract.js_url(watch_html) if pytube.__js_url__ != self.js_url: response = urlopen(self.js_url, timeout=None) self.js = response.read().decode('utf_8') pytube.__js__ = self.js pytube.__js_url__ = self.js_url else: self.js = pytube.__js__
def descramble(self) -> None: """Descramble the stream data and build Stream instances. The initialization process takes advantage of Python's "call-by-reference evaluation," which allows dictionary transforms to be applied in-place, instead of holding references to mutations at each interstitial step. :rtype: None """ logger.info("init started") self.vid_info = dict(parse_qsl(self.vid_info_raw)) if self.age_restricted: self.player_config_args = self.vid_info else: assert self.watch_html is not None self.player_config_args = get_ytplayer_config(self.watch_html)["args"] # Fix for KeyError: 'title' issue #434 if "title" not in self.player_config_args: # type: ignore i_start = self.watch_html.lower().index("<title>") + len("<title>") i_end = self.watch_html.lower().index("</title>") title = self.watch_html[i_start:i_end].strip() index = title.lower().rfind(" - youtube") title = title[:index] if index > 0 else title self.player_config_args["title"] = unescape(title) # https://github.com/nficano/pytube/issues/165 stream_maps = ["url_encoded_fmt_stream_map"] if "adaptive_fmts" in self.player_config_args: stream_maps.append("adaptive_fmts") # unscramble the progressive and adaptive stream manifests. for fmt in stream_maps: if not self.age_restricted and fmt in self.vid_info: apply_descrambler(self.vid_info, fmt) apply_descrambler(self.player_config_args, fmt) if not self.js: if not self.embed_html: self.embed_html = request.get(url=self.embed_url) self.js_url = extract.js_url(self.embed_html) self.js = request.get(self.js_url) apply_signature(self.player_config_args, fmt, self.js) # build instances of :class:`Stream <Stream>` self.initialize_stream_objects(fmt) # load the player_response object (contains subtitle information) self.player_response = json.loads(self.player_config_args["player_response"]) del self.player_config_args["player_response"] self.stream_monostate.title = self.title self.stream_monostate.duration = self.length logger.info("init finished successfully")
def descramble(self) -> None: """Descramble the stream data and build Stream instances. The initialization process takes advantage of Python's "call-by-reference evaluation," which allows dictionary transforms to be applied in-place, instead of holding references to mutations at each interstitial step. :rtype: None """ self.vid_info = dict(parse_qsl(self.vid_info_raw)) self.player_config_args = self.vid_info self.player_response = json.loads(self.vid_info['player_response']) # On pre-signed videos, we need to use get_ytplayer_config to fix # the player_response item if 'streamingData' not in self.player_config_args['player_response']: config_response = get_ytplayer_config(self.watch_html) if 'args' in config_response: self.player_config_args['player_response'] = config_response['args']['player_response'] # noqa: E501 else: self.player_config_args['player_response'] = config_response # https://github.com/nficano/pytube/issues/165 stream_maps = ["url_encoded_fmt_stream_map"] if "adaptive_fmts" in self.player_config_args: stream_maps.append("adaptive_fmts") # unscramble the progressive and adaptive stream manifests. for fmt in stream_maps: if not self.age_restricted and fmt in self.vid_info: apply_descrambler(self.vid_info, fmt) apply_descrambler(self.player_config_args, fmt) if not self.js: if not self.embed_html: self.embed_html = request.get(url=self.embed_url) self.js_url = extract.js_url(self.embed_html) self.js = request.get(self.js_url) apply_signature(self.player_config_args, fmt, self.js) # build instances of :class:`Stream <Stream>` self.initialize_stream_objects(fmt) # load the player_response object (contains subtitle information) if isinstance(self.player_config_args["player_response"], str): self.player_response = json.loads( self.player_config_args["player_response"] ) else: self.player_response = self.player_config_args["player_response"] del self.player_config_args["player_response"] self.stream_monostate.title = self.title self.stream_monostate.duration = self.length
async def getJavaScript(self) -> None: '''Gets player JavaScript from YouTube, avoid calling more than once. ''' global js_url async with httpx.AsyncClient() as client: response = await client.get('https://youtube.com/watch', timeout=None) watch_html = response.text age_restricted = extract.is_age_restricted(watch_html) if age_restricted: async with httpx.AsyncClient() as client: response = await client.get('https://www.youtube.com/embed', timeout=None) embed_html = response.text self.js_url = extract.js_url(embed_html) else: self.js_url = extract.js_url(watch_html) if js_url != self.js_url: async with httpx.AsyncClient() as client: response = await client.get(self.js_url, timeout=None) self.js = response.text
def prefetch(self, multithread = True): """Eagerly download all necessary data. Eagerly executes all necessary network requests so all other operations don't does need to make calls outside of the interpreter which blocks for long periods of time. :rtype: None """ if multithread: threads, results = [None] * 2, [None] * 2 for i, url in enumerate([self.watch_url, self.embed_url]): threads[i] = Thread(target=self.do_get, args=(url, results, i)) threads[i].start() for i in range(len(threads)): threads[i].join() self.watch_html, self.embed_html = results else: self.watch_html = request.get(url=self.watch_url) self.embed_html = request.get(url=self.embed_url) if '<img class="icon meh" src="/yts/img' not in self.watch_html: raise VideoUnavailable('This video is unavailable.') self.age_restricted = extract.is_age_restricted(self.watch_html) self.vid_info_url = extract.video_info_url( video_id=self.video_id, watch_url=self.watch_url, watch_html=self.watch_html, embed_html=self.embed_html, age_restricted=self.age_restricted, ) if multithread: threads, results = [None] * 2, [None] * 2 threads[0] = Thread(target=self.do_get, args=(self.vid_info_url, results, 0)) threads[0].start() else: self.vid_info = request.get(self.vid_info_url) if not self.age_restricted: self.js_url = extract.js_url(self.watch_html, self.age_restricted) if multithread: threads[1] = Thread(target=self.do_get, args=(self.js_url, results, 1)) threads[1].start() threads[0].join() threads[1].join() else: self.js = request.get(self.js_url) else: threads[0].join() if multithread: self.vid_info, self.js = results
def prefetch(self) -> None: """Eagerly download all necessary data. Eagerly executes all necessary network requests so all other operations don't does need to make calls outside of the interpreter which blocks for long periods of time. :rtype: None """ self.watch_html = request.get(url=self.watch_url) self.check_availability() self.age_restricted = extract.is_age_restricted(self.watch_html) if self.age_restricted: if not self.embed_html: self.embed_html = request.get(url=self.embed_url) self.vid_info_url = extract.video_info_url_age_restricted( self.video_id, self.watch_url) self.js_url = extract.js_url(self.embed_html) else: self.vid_info_url = extract.video_info_url( video_id=self.video_id, watch_url=self.watch_url) self.js_url = extract.js_url(self.watch_html) self.initial_data = extract.initial_data(self.watch_html) self.vid_info_raw = request.get(self.vid_info_url) # If the js_url doesn't match the cached url, fetch the new js and update # the cache; otherwise, load the cache. if pytube.__js_url__ != self.js_url: self.js = request.get(self.js_url) pytube.__js__ = self.js pytube.__js_url__ = self.js_url else: self.js = pytube.__js__
def _getJS(self) -> None: try: response = urlopen('https://youtube.com/watch', timeout = None) watch_html = response.read().decode('utf_8') age_restricted = extract.is_age_restricted(watch_html) self._js_url = extract.js_url(watch_html) if pytube.__js_url__ != self._js_url: response = urlopen(self._js_url, timeout = None) self._js = response.read().decode('utf_8') pytube.__js__ = self._js pytube.__js_url__ = self._js_url else: self._js = pytube.__js__ except: raise Exception('ERROR: Could not make request.')
async def getJS(self) -> None: async with httpx.AsyncClient() as client: """ Removed v parameter from the query. (No idea about why PyTube bothered with that) """ response = await client.get("https://www.youtube.com/", timeout=None) watch_html = response.text self.js_url = extract.js_url(watch_html) if pytube.__js_url__ != self.js_url: async with httpx.AsyncClient() as client: response = await client.get(self.js_url, timeout=None) self.js = response.text pytube.__js__ = self.js pytube.__js_url__ = self.js_url else: self.js = pytube.__js__
def prefetch(self): """Eagerly download all necessary data. Eagerly executes all necessary network requests so all other operations don't does need to make calls outside of the interpreter which blocks for long periods of time. :rtype: None """ self.watch_html = request.get(url=self.watch_url) if extract.is_age_restricted(self.watch_html): raise AgeRestrictionError('Content is age restricted') self.vid_info_url = extract.video_info_url( video_id=self.video_id, watch_url=self.watch_url, watch_html=self.watch_html, ) self.js_url = extract.js_url(self.watch_html) self.js = request.get(self.js_url) self.vid_info = request.get(self.vid_info_url)
async def prefetch(self) -> None: """Eagerly download all necessary data. Eagerly executes all necessary network requests so all other operations don't does need to make calls outside of the interpreter which blocks for long periods of time. :rtype: None """ self.watch_html = await request.get(url=self.watch_url) if self.watch_html is None: raise VideoUnavailable(video_id=self.video_id) self.age_restricted = extract.is_age_restricted(self.watch_html) if not self.age_restricted and ( "This video is private" in self.watch_html or "This video is no longer available because the YouTube account " "associated with this video has been terminated." in self.watch_html or "This video is only available to Music Premium members" in self.watch_html or "This video is no longer available due to a copyright claim by" in self.watch_html): raise VideoUnavailable(video_id=self.video_id) if self.age_restricted: if not self.embed_html: self.embed_html = await request.get(url=self.embed_url) self.vid_info_url = extract.video_info_url_age_restricted( self.video_id, self.watch_url) else: self.vid_info_url = extract.video_info_url( video_id=self.video_id, watch_url=self.watch_url) self.vid_info_raw = await request.get(self.vid_info_url) if not self.age_restricted: self.js_url = extract.js_url(self.watch_html) self.js = await request.get(self.js_url)
def prefetch(self): """Eagerly download all necessary data. Eagerly executes all necessary network requests so all other operations don't does need to make calls outside of the interpreter which blocks for long periods of time. :rtype: None """ self.watch_html = request.get(url=self.watch_url) self.embed_html = request.get(url=self.embed_url) self.age_restricted = extract.is_age_restricted(self.watch_html) self.vid_info_url = extract.video_info_url( video_id=self.video_id, watch_url=self.watch_url, watch_html=self.watch_html, embed_html=self.embed_html, age_restricted=self.age_restricted, ) self.vid_info = request.get(self.vid_info_url) if not self.age_restricted: self.js_url = extract.js_url(self.watch_html) self.js = request.get(self.js_url)
def test_js_url(cipher_signature): expected = 'https://youtube.com/yts/jsbin/player-vflOdyxa4/en_US/base.js' result = extract.js_url(cipher_signature.watch_html) assert expected == result
def test_js_url(cipher_signature): expected = ( "https://youtube.com/s/player/9b65e980/player_ias.vflset/en_US/base.js" ) result = extract.js_url(cipher_signature.watch_html) assert expected == result
def _get_cipher(self, videoId): embed_url = "https://www.youtube.com/embed/" + videoId embed_html = request.get(url=embed_url) js_url = extract.js_url(embed_html) self._js = request.get(js_url) self._cipher = Cipher(js=self._js)
def test_js_url(cipher_signature): expected = ( "https://youtube.com/yts/jsbin/player_ias-vflWQEEag/en_US/base.js") result = extract.js_url(cipher_signature.watch_html) assert expected == result