def parse_all_links(Playlist): url = Playlist.construct_playlist_url() req = request.get(url) # split the page source by line and process each line content = [ x for x in req.split('\n') if 'yt-uix-sessionlink yt-uix-tile-link' in x ] link_list = [x.split('href="', 1)[1].split('&', 1)[0] for x in content] # The above only returns 100 or fewer links # Simulating a browser request for the load more link load_more_url = Playlist._load_more_url(req) while len(load_more_url): # there is an url found req = request.get(load_more_url) load_more = json.loads(req) videos = re.findall( r'href=\"(/watch\?v=[\w-]*)', load_more['content_html'], ) # remove duplicates link_list.extend(list(OrderedDict.fromkeys(videos))) load_more_url = Playlist._load_more_url( load_more['load_more_widget_html'], ) return link_list
def prefetch(self) -> None: """Eagerly download all necessary data. Eagerly executes all necessary network requests so all other operations don't does need to make calls outside of the interpreter which blocks for long periods of time. :rtype: None """ self.watch_html = request.get(url=self.watch_url) if (self.watch_html is None or '<img class="icon meh" src="/yts/img' # noqa: W503 not in self.watch_html # noqa: W503 ): raise VideoUnavailable(video_id=self.video_id) self.embed_html = request.get(url=self.embed_url) self.age_restricted = extract.is_age_restricted(self.watch_html) self.vid_info_url = extract.video_info_url( video_id=self.video_id, watch_url=self.watch_url, embed_html=self.embed_html, age_restricted=self.age_restricted, ) self.vid_info_raw = request.get(self.vid_info_url) if not self.age_restricted: self.js_url = extract.js_url(self.watch_html, self.age_restricted) self.js = request.get(self.js_url)
def parse_links(self): """Parse the video links from the page source, extracts and returns the /watch?v= part from video link href It's an alternative for BeautifulSoup """ url = self.construct_playlist_url() req = request.get(url) # split the page source by line and process each line content = [x for x in req.split('\n') if 'pl-video-title-link' in x] link_list = [x.split('href="', 1)[1].split('&', 1)[0] for x in content] # The above only returns 100 or fewer links # Simulating a browser request for the load more link load_more_url = self._load_more_url(req) while len(load_more_url): # there is an url found logger.debug('load more url: %s' % load_more_url) req = request.get(load_more_url) load_more = json.loads(req) videos = re.findall( r'href=\"(/watch\?v=[\w-]*)', load_more['content_html'], ) # remove duplicates link_list.extend(list(OrderedDict.fromkeys(videos))) load_more_url = self._load_more_url( load_more['load_more_widget_html'], ) return link_list
def prefetch(self) -> None: """Eagerly download all necessary data. Eagerly executes all necessary network requests so all other operations don't does need to make calls outside of the interpreter which blocks for long periods of time. :rtype: None """ self.watch_html = request.get(url=self.watch_url) if self.watch_html is None: raise VideoUnavailable(video_id=self.video_id) self.age_restricted = extract.is_age_restricted(self.watch_html) if not self.age_restricted and "This video is private" in self.watch_html: raise VideoUnavailable(video_id=self.video_id) if self.age_restricted: if not self.embed_html: self.embed_html = request.get(url=self.embed_url) self.vid_info_url = extract.video_info_url_age_restricted( self.video_id, self.watch_url ) else: self.vid_info_url = extract.video_info_url( video_id=self.video_id, watch_url=self.watch_url ) self.vid_info_raw = request.get(self.vid_info_url) if not self.age_restricted: self.js_url = extract.js_url(self.watch_html) self.js = request.get(self.js_url)
def prefetch(self): """Eagerly download all necessary data. Eagerly executes all necessary network requests so all other operations don't does need to make calls outside of the interpreter which blocks for long periods of time. :rtype: None """ self.watch_html = request.get(url=self.watch_url) #with open("/tmp/watch_html",'w') as f: # Debug # f.write(self.watch_html) # 30.07.2020 siehe github.com/nficano/pytube/issues/499 + # github.com/nficano/pytube/issues/337: #if '<img class="icon meh" src="/yts/img' not in self.watch_html: # raise VideoUnavailable('This video is unavailable.') self.embed_html = request.get(url=self.embed_url) self.age_restricted = extract.is_age_restricted(self.watch_html) self.vid_info_url = extract.video_info_url( video_id=self.video_id, watch_url=self.watch_url, watch_html=self.watch_html, embed_html=self.embed_html, age_restricted=self.age_restricted, ) self.vid_info = request.get(self.vid_info_url) if not self.age_restricted: self.js_url = extract.js_url(self.watch_html, self.age_restricted) self.js = request.get(self.js_url)
def prefetch(self): """Eagerly download all necessary data. Eagerly executes all necessary network requests so all other operations don't does need to make calls outside of the interpreter which blocks for long periods of time. :rtype: None """ self.watch_html = request.get(url=self.watch_url) if 'id="player-unavailable"' in self.watch_html: raise VideoUnavailable('This video is not available.') self.embed_html = request.get(url=self.embed_url) self.age_restricted = extract.is_age_restricted(self.watch_html) self.vid_info_url = extract.video_info_url( video_id=self.video_id, watch_url=self.watch_url, watch_html=self.watch_html, embed_html=self.embed_html, age_restricted=self.age_restricted, ) self.vid_info = request.get(self.vid_info_url) if not self.age_restricted: self.js_url = extract.js_url(self.watch_html, self.age_restricted) self.js = request.get(self.js_url)
def descramble(self) -> None: """Descramble the stream data and build Stream instances. The initialization process takes advantage of Python's "call-by-reference evaluation," which allows dictionary transforms to be applied in-place, instead of holding references to mutations at each interstitial step. :rtype: None """ logger.info("init started") self.vid_info = dict(parse_qsl(self.vid_info_raw)) if self.age_restricted: self.player_config_args = self.vid_info else: assert self.watch_html is not None self.player_config_args = get_ytplayer_config(self.watch_html)["args"] # Fix for KeyError: 'title' issue #434 if "title" not in self.player_config_args: # type: ignore i_start = self.watch_html.lower().index("<title>") + len("<title>") i_end = self.watch_html.lower().index("</title>") title = self.watch_html[i_start:i_end].strip() index = title.lower().rfind(" - youtube") title = title[:index] if index > 0 else title self.player_config_args["title"] = unescape(title) # https://github.com/nficano/pytube/issues/165 stream_maps = ["url_encoded_fmt_stream_map"] if "adaptive_fmts" in self.player_config_args: stream_maps.append("adaptive_fmts") # unscramble the progressive and adaptive stream manifests. for fmt in stream_maps: if not self.age_restricted and fmt in self.vid_info: apply_descrambler(self.vid_info, fmt) apply_descrambler(self.player_config_args, fmt) if not self.js: if not self.embed_html: self.embed_html = request.get(url=self.embed_url) self.js_url = extract.js_url(self.embed_html) self.js = request.get(self.js_url) apply_signature(self.player_config_args, fmt, self.js) # build instances of :class:`Stream <Stream>` self.initialize_stream_objects(fmt) # load the player_response object (contains subtitle information) self.player_response = json.loads(self.player_config_args["player_response"]) del self.player_config_args["player_response"] self.stream_monostate.title = self.title self.stream_monostate.duration = self.length logger.info("init finished successfully")
def descramble(self) -> None: """Descramble the stream data and build Stream instances. The initialization process takes advantage of Python's "call-by-reference evaluation," which allows dictionary transforms to be applied in-place, instead of holding references to mutations at each interstitial step. :rtype: None """ self.vid_info = dict(parse_qsl(self.vid_info_raw)) self.player_config_args = self.vid_info self.player_response = json.loads(self.vid_info['player_response']) # On pre-signed videos, we need to use get_ytplayer_config to fix # the player_response item if 'streamingData' not in self.player_config_args['player_response']: config_response = get_ytplayer_config(self.watch_html) if 'args' in config_response: self.player_config_args['player_response'] = config_response['args']['player_response'] # noqa: E501 else: self.player_config_args['player_response'] = config_response # https://github.com/nficano/pytube/issues/165 stream_maps = ["url_encoded_fmt_stream_map"] if "adaptive_fmts" in self.player_config_args: stream_maps.append("adaptive_fmts") # unscramble the progressive and adaptive stream manifests. for fmt in stream_maps: if not self.age_restricted and fmt in self.vid_info: apply_descrambler(self.vid_info, fmt) apply_descrambler(self.player_config_args, fmt) if not self.js: if not self.embed_html: self.embed_html = request.get(url=self.embed_url) self.js_url = extract.js_url(self.embed_html) self.js = request.get(self.js_url) apply_signature(self.player_config_args, fmt, self.js) # build instances of :class:`Stream <Stream>` self.initialize_stream_objects(fmt) # load the player_response object (contains subtitle information) if isinstance(self.player_config_args["player_response"], str): self.player_response = json.loads( self.player_config_args["player_response"] ) else: self.player_response = self.player_config_args["player_response"] del self.player_config_args["player_response"] self.stream_monostate.title = self.title self.stream_monostate.duration = self.length
def prefetch(self, multithread = True): """Eagerly download all necessary data. Eagerly executes all necessary network requests so all other operations don't does need to make calls outside of the interpreter which blocks for long periods of time. :rtype: None """ if multithread: threads, results = [None] * 2, [None] * 2 for i, url in enumerate([self.watch_url, self.embed_url]): threads[i] = Thread(target=self.do_get, args=(url, results, i)) threads[i].start() for i in range(len(threads)): threads[i].join() self.watch_html, self.embed_html = results else: self.watch_html = request.get(url=self.watch_url) self.embed_html = request.get(url=self.embed_url) if '<img class="icon meh" src="/yts/img' not in self.watch_html: raise VideoUnavailable('This video is unavailable.') self.age_restricted = extract.is_age_restricted(self.watch_html) self.vid_info_url = extract.video_info_url( video_id=self.video_id, watch_url=self.watch_url, watch_html=self.watch_html, embed_html=self.embed_html, age_restricted=self.age_restricted, ) if multithread: threads, results = [None] * 2, [None] * 2 threads[0] = Thread(target=self.do_get, args=(self.vid_info_url, results, 0)) threads[0].start() else: self.vid_info = request.get(self.vid_info_url) if not self.age_restricted: self.js_url = extract.js_url(self.watch_html, self.age_restricted) if multithread: threads[1] = Thread(target=self.do_get, args=(self.js_url, results, 1)) threads[1].start() threads[0].join() threads[1].join() else: self.js = request.get(self.js_url) else: threads[0].join() if multithread: self.vid_info, self.js = results
def download(self, output_path=None, filename=None): """Write the media stream to disk. :param output_path: (optional) Output path for writing media file. If one is not specified, defaults to the current working directory. :type output_path: str or None :param filename: (optional) Output filename (stem only) for writing media file. If one is not specified, the default filename is used. :type filename: str or None :rtype: None """ output_path = output_path or os.getcwd() if filename: safe = safe_filename(filename) filename = '{filename}.{s.subtype}'.format(filename=safe, s=self) filename = filename or self.default_filename # file path fp = os.path.join(output_path, filename) bytes_remaining = self.filesize logger.debug( 'downloading (%s total bytes) file to %s', self.filesize, fp, ) tmpRangefp = None isTimeOut = False with open(fp, 'wb') as fh: while True: for chunk in request.get(self.url, streaming=True, conRangefp=tmpRangefp): if not chunk: print 'streams time out sleep 10s' fh.flush() time.sleep(10) nfize = os.path.getsize(fp) print fp, nfize if nfize < self.filesize: tmpRangefp = fp isTimeOut = True break else: tmpRangefp = None # reduce the (bytes) remainder by the length of the chunk. bytes_remaining -= len(chunk) # send to the on_progress callback. self.on_progress(chunk, fh, bytes_remaining) if isTimeOut: isTimeOut = False print tmpRangefp else: self.on_complete(fh) break
def download(self, output_path=None): """Write the media stream to disk. :param output_path: (optional) Output path for writing media file. If one is not specified, defaults to the current working directory. :type output_path: str or None :rtype: None """ # TODO(nficano): allow a filename to specified. output_path = output_path or os.getcwd() # file path fp = os.path.join(output_path, self.default_filename) bytes_remaining = self.filesize logger.debug( 'downloading (%s total bytes) file to %s', self.filesize, fp, ) with open(fp, 'wb') as fh: for chunk in request.get(self.url, streaming=True): # reduce the (bytes) remainder by the length of the chunk. bytes_remaining -= len(chunk) # send to the on_progress callback. self.on_progress(chunk, fh, bytes_remaining) self.on_complete(fh)
def download(self, output_path=None, filename=None, filename_prefix=None, only_url=None): """Write the media stream to disk. :param output_path: (optional) Output path for writing media file. If one is not specified, defaults to the current working directory. :type output_path: str or None :param filename: (optional) Output filename (stem only) for writing media file. If one is not specified, the default filename is used. :type filename: str or None :param filename_prefix: (optional) A string that will be prepended to the filename. For example a number in a playlist or the name of a series. If one is not specified, nothing will be prepended This is seperate from filename so you can use the default filename but still add a prefix. :type filename_prefix: str or None :rtype: str """ if only_url: return self.url output_path = output_path or os.getcwd() if filename: safe = safe_filename(filename) filename = '{filename}.{s.subtype}'.format(filename=safe, s=self) filename = filename or self.default_filename if filename_prefix: filename = '{prefix}{filename}'\ .format( prefix=safe_filename(filename_prefix), filename=filename, ) # file path fp = os.path.join(output_path, filename) bytes_remaining = self.filesize PLog( 'streams: downloading (%s total bytes) file to %s', self.filesize, fp, ) with open(fp, 'wb') as fh: for chunk in request.get(self.url, streaming=True): # reduce the (bytes) remainder by the length of the chunk. bytes_remaining -= len(chunk) # send to the on_progress callback. self.on_progress(chunk, fh, bytes_remaining) self.on_complete(fh) return fp
def __init__(self, url: str, proxies: Optional[Dict[str, str]] = None): if proxies: install_proxy(proxies) try: self.playlist_id: str = parse_qs(url.split("?")[1])["list"][0] except IndexError: # assume that url is just the id self.playlist_id = url self.playlist_url = ( f"https://www.youtube.com/playlist?list={self.playlist_id}") self.html = request.get(self.playlist_url) # Needs testing with non-English self.last_update: Optional[date] = None date_match = re.search( r"<li>Last updated on (\w{3}) (\d{1,2}), (\d{4})</li>", self.html) if date_match: month, day, year = date_match.groups() self.last_update = datetime.strptime(f"{month} {day:0>2} {year}", "%b %d %Y").date() self._js_regex = re.compile(r"window\[\"ytInitialData\"] = ([^\n]+)") self._video_regex = re.compile(r"href=\"(/watch\?v=[\w-]*)")
def init(self): """Descramble the stream data and build Stream instances. The initialization process takes advantage of Python's "call-by-reference evaluation," which allows dictionary transforms to be applied in-place, instead of holding references to mutations at each interstitial step. :rtype: None """ logger.info('init started') self.vid_info = {k: v for k, v in parse_qsl(self.vid_info)} if self.age_restricted: self.player_config_args = self.vid_info else: self.player_config_args = extract.get_ytplayer_config( self.watch_html, )['args'] # Fix for KeyError: 'title' issue #434 if 'title' not in self.player_config_args: i_start = (self.watch_html.lower().index('<title>') + len('<title>')) i_end = self.watch_html.lower().index('</title>') title = self.watch_html[i_start:i_end].strip() index = title.lower().rfind(' - youtube') title = title[:index] if index > 0 else title self.player_config_args['title'] = title self.vid_descr = extract.get_vid_descr(self.watch_html) # https://github.com/nficano/pytube/issues/165 stream_maps = ['url_encoded_fmt_stream_map'] if 'adaptive_fmts' in self.player_config_args: stream_maps.append('adaptive_fmts') # unscramble the progressive and adaptive stream manifests. for fmt in stream_maps: if not self.age_restricted and fmt in self.vid_info: mixins.apply_descrambler(self.vid_info, fmt) mixins.apply_descrambler(self.player_config_args, fmt) try: mixins.apply_signature(self.player_config_args, fmt, self.js) except TypeError: self.js_url = extract.js_url( self.embed_html, self.age_restricted, ) self.js = request.get(self.js_url) mixins.apply_signature(self.player_config_args, fmt, self.js) # build instances of :class:`Stream <Stream>` self.initialize_stream_objects(fmt) # load the player_response object (contains subtitle information) apply_mixin(self.player_config_args, 'player_response', json.loads) self.initialize_caption_objects() logger.info('init finished successfully')
def init(self): logger.info('init started') self.vid_info = {k: v for k, v in parse_qsl(self.vid_info)} if self.age_restricted: self.player_config_args = self.vid_info else: self.player_config_args = extract.get_ytplayer_config( self.watch_html, )['args'] # ---> ADD THIS PART <--- if 'title' not in self.player_config_args: # for more reliability when parsing, we may use a trained parser try: from bs4 import BeautifulSoup soup = BeautifulSoup(self.watch_html, 'lxml') title = soup.title.get_text().strip() except ModuleNotFoundError: # since this parsing is actually pretty simple, we may just # parse it using index() i_start = self.watch_html.lower().index('<title>') + len( '<title>') i_end = self.watch_html.lower().index('</title>') title = self.watch_html[i_start:i_end].strip() # remove the ' - youtube' part that is added to the browser tab's title index = title.lower().rfind(' - youtube') title = title[:index] if index > 0 else title self.player_config_args['title'] = title # / ---> ADD THIS PART <--- self.vid_descr = extract.get_vid_descr(self.watch_html) # https://github.com/nficano/pytube/issues/165 stream_maps = ['url_encoded_fmt_stream_map'] if 'adaptive_fmts' in self.player_config_args: stream_maps.append('adaptive_fmts') # unscramble the progressive and adaptive stream manifests. for fmt in stream_maps: if not self.age_restricted and fmt in self.vid_info: mixins.apply_descrambler(self.vid_info, fmt) mixins.apply_descrambler(self.player_config_args, fmt) try: mixins.apply_signature(self.player_config_args, fmt, self.js) except TypeError: self.js_url = extract.js_url( self.embed_html, self.age_restricted, ) self.js = request.get(self.js_url) mixins.apply_signature(self.player_config_args, fmt, self.js) # build instances of :class:`Stream <Stream>` self.initialize_stream_objects(fmt) # load the player_response object (contains subtitle information) apply_mixin(self.player_config_args, 'player_response', json.loads) self.initialize_caption_objects() logger.info('init finished successfully')
def featured_channels_html(self): if self._featured_channels_html: return self._featured_channels_html else: self._featured_channels_html = request.get( self.featured_channels_url) return self._featured_channels_html
def _paginate( self, until_watch_id: Optional[str] = None ) -> Iterable[List[str]]: """Parse the video links from the page source, yields the /watch?v= part from video link :param until_watch_id Optional[str]: YouTube Video watch id until which the playlist should be read. :rtype: Iterable[List[str]] :returns: Iterable of lists of YouTube watch ids """ req = self.html videos_urls, continuation = self._extract_videos( # extract the json located inside the window["ytInitialData"] js # variable of the playlist html page self._extract_json(req) ) if until_watch_id: try: trim_index = videos_urls.index(f"/watch?v={until_watch_id}") yield videos_urls[:trim_index] return except ValueError: pass yield videos_urls # Extraction from a playlist only returns 100 videos at a time # if self._extract_videos returns a continuation there are more # than 100 songs inside a playlist, so we need to add further requests # to gather all of them if continuation: load_more_url, headers = self._build_continuation_url(continuation) else: load_more_url, headers = None, None while load_more_url and headers: # there is an url found logger.debug("load more url: %s", load_more_url) # requesting the next page of videos with the url generated from the # previous page req = request.get(load_more_url, extra_headers=headers) # extract up to 100 songs from the page loaded # returns another continuation if more videos are available videos_urls, continuation = self._extract_videos(req) if until_watch_id: try: trim_index = videos_urls.index(f"/watch?v={until_watch_id}") yield videos_urls[:trim_index] return except ValueError: pass yield videos_urls if continuation: load_more_url, headers = self._build_continuation_url( continuation ) else: load_more_url, headers = None, None
def __init__(self, url: str, proxies: Optional[Dict[str, str]] = None): if proxies: install_proxy(proxies) self.playlist_id = extract.playlist_id(url) self.playlist_url = ( f"https://www.youtube.com/playlist?list={self.playlist_id}") self.html = request.get(self.playlist_url)
def filesize(self): """File size of the media stream in bytes. :rtype: int :returns: Filesize (in bytes) of the stream. """ headers = request.get(self.url, headers=True) return int(headers['content-length'])
def html(self): """Get the html for the /videos page. :rtype: str """ if self._html: return self._html self._html = request.get(self.videos_url) return self._html
def parse_links(self): url = self.construct_playlist_url() req = request.get(url) # split the page source by line and process each line content = [x for x in req.split('\n') if 'pl-video-title-link' in x] link_list = [x.split('href="', 1)[1].split('&', 1)[0] for x in content] return link_list
def html(self): """Get the playlist page html. :rtype: str """ if self._html: return self._html self._html = request.get(self.playlist_url) return self._html
def filesize(self) -> int: """File size of the media stream in bytes. :rtype: int :returns: Filesize (in bytes) of the stream. """ if self._filesize is None: headers = request.get(self.url, headers=True) self._filesize = int(headers["content-length"]) return self._filesize
def about_html(self): """Get the html for the /about page. Currently unused for any functionality. :rtype: str """ if self._about_html: return self._about_html else: self._about_html = request.get(self.about_url) return self._about_html
def getPlaylistTitle(url): req = request.get(url) open_tag = "<title>" end_tag = "</title>" matchresult = re.compile(open_tag + "(.+?)" + end_tag) matchresult = matchresult.search(req).group() matchresult = matchresult.replace(open_tag, "") matchresult = matchresult.replace(end_tag, "") matchresult = matchresult.replace("- YouTube", "") matchresult = matchresult.strip() return matchresult
def community_html(self): """Get the html for the /community page. Currently unused for any functionality. :rtype: str """ if self._community_html: return self._community_html else: self._community_html = request.get(self.community_url) return self._community_html
def prefetch(self): """Eagerly download all necessary data. Eagerly executes all necessary network requests so all other operations don't does need to make calls outside of the interpreter which blocks for long periods of time. :rtype: None """ self.watch_html = request.get(url=self.watch_url) if extract.is_age_restricted(self.watch_html): raise AgeRestrictionError('Content is age restricted') self.vid_info_url = extract.video_info_url( video_id=self.video_id, watch_url=self.watch_url, watch_html=self.watch_html, ) self.js_url = extract.js_url(self.watch_html) self.js = request.get(self.js_url) self.vid_info = request.get(self.vid_info_url)
def playlists_html(self): """Get the html for the /playlists page. Currently unused for any functionality. :rtype: str """ if self._playlists_html: return self._playlists_html else: self._playlists_html = request.get(self.playlists_url) return self._playlists_html
def featured_channels_html(self): """Get the html for the /channels page. Currently unused for any functionality. :rtype: str """ if self._featured_channels_html: return self._featured_channels_html else: self._featured_channels_html = request.get( self.featured_channels_url) return self._featured_channels_html
def js(self): if self._js: return self._js # If the js_url doesn't match the cached url, fetch the new js and update # the cache; otherwise, load the cache. if pytube.__js_url__ != self.js_url: self._js = request.get(self.js_url) pytube.__js__ = self._js pytube.__js_url__ = self.js_url else: self._js = pytube.__js__ return self._js
def prefetch(self): """Eagerly download all necessary data. Eagerly executes all necessary network requests so all other operations don't does need to make calls outside of the interpreter which blocks for long periods of time. :rtype: None """ self.watch_html = request.get(url=self.watch_url) self.embed_html = request.get(url=self.embed_url) self.age_restricted = extract.is_age_restricted(self.watch_html) self.vid_info_url = extract.video_info_url( video_id=self.video_id, watch_url=self.watch_url, watch_html=self.watch_html, embed_html=self.embed_html, age_restricted=self.age_restricted, ) self.vid_info = request.get(self.vid_info_url) if not self.age_restricted: self.js_url = extract.js_url(self.watch_html) self.js = request.get(self.js_url)
def parse_links(self): """Parse the video links from the page source, extracts and returns the /watch?v= part from video link href It's an alternative for BeautifulSoup :return: list """ url = self.construct_playlist_url() req = request.get(url) # split the page source by line and process each line content = [x for x in req.split('\n') if 'pl-video-title-link' in x] link_list = [x.split('href="', 1)[1].split('&', 1)[0] for x in content] return link_list
def download(self, output_path=None, filename=None): """Write the media stream to disk. :param output_path: (optional) Output path for writing media file. If one is not specified, defaults to the current working directory. :type output_path: str or None :param filename: (optional) Output filename (stem only) for writing media file. If one is not specified, the default filename is used. :type filename: str or None :rtype: None """ output_path = output_path or os.getcwd() if filename: safe = safe_filename(filename) filename = '{filename}.{s.subtype}'.format(filename=safe, s=self) filename = filename or self.default_filename # file path fp = os.path.join(output_path, filename) bytes_remaining = self.filesize logger.debug( 'downloading (%s total bytes) file to %s', self.filesize, fp, ) with open(fp, 'wb') as fh: for chunk in request.get(self.url, streaming=True): # reduce the (bytes) remainder by the length of the chunk. bytes_remaining -= len(chunk) # send to the on_progress callback. self.on_progress(chunk, fh, bytes_remaining) self.on_complete(fh)
def xml_captions(self): """Download the xml caption tracks.""" return request.get(self.url)