def _extract_dash(dashurl): """ Download dash url and extract some data. """ # pylint: disable = R0914 dbg("Fetching dash page") dashdata = fetch_decode(dashurl) dbg("DASH list fetched") ns = "{urn:mpeg:DASH:schema:MPD:2011}" ytns = "{http://youtube.com/yt/2012/10/10}" tree = ElementTree.fromstring(dashdata) tlist = tree.findall(".//%sRepresentation" % ns) dashmap = [] for x in tlist: baseurl = x.find("%sBaseURL" % ns) url = baseurl.text size = baseurl.get("%scontentLength" % ytns) bitrate = x.get("bandwidth") itag = uni(x.get("id")) width = uni(x.get("width")) height = uni(x.get("height")) dashmap.append( dict(bitrate=bitrate, dash=True, itag=itag, width=width, height=height, url=url, size=size)) return dashmap
def fetch_cached(url, callback, encoding=None, dbg_ref="", file_prefix=""): """ Fetch url - from tmpdir if already retrieved. """ tmpdir = os.path.join(tempfile.gettempdir(), "pafy") if not os.path.exists(tmpdir): os.makedirs(tmpdir) url_md5 = hashlib.md5(url.encode("utf8")).hexdigest() cached_filename = os.path.join(tmpdir, file_prefix + url_md5) if os.path.exists(cached_filename): dbg("fetched %s from cache", dbg_ref) with open(cached_filename) as f: retval = f.read() return retval else: data = fetch_decode(url, "utf8") # unicode dbg("Fetched %s", dbg_ref) if callback: callback("Fetched %s" % dbg_ref) with open(cached_filename, "w") as f: f.write(data) # prune files after write prune_files(tmpdir, file_prefix) return data
def get_video_info(video_id, callback, newurl=None): """ Return info for video_id. Returns dict. """ # TODO: see if there is a way to avoid retrieving the embed page # just for this, or to use it for more. This was coppied from # youtube-dl. embed_webpage = fetch_decode(g.urls['embed']) sts = re.search(r'sts"\s*:\s*(\d+)', embed_webpage).group(1) url = g.urls['vidinfo'] % (video_id, video_id, sts) url = newurl if newurl else url info = fetch_decode(url) # bytes info = parseqs(info) # unicode dict dbg("Fetched video info%s", " (age ver)" if newurl else "") if info['status'][0] == "fail": reason = info['reason'][0] or "Bad video argument" raise IOError("Youtube says: %s [%s]" % (reason, video_id)) return info
def get_playlist(playlist_url, basic=False, gdata=False, size=False, callback=None): """ Return a dict containing Pafy objects from a YouTube Playlist. The returned Pafy objects are initialised using the arguments to get_playlist() in the manner documented for pafy.new() """ playlist_id = extract_playlist_id(playlist_url) if not playlist_id: return None url = g.urls["playlist"] % playlist_id allinfo = fetch_decode(url) # unicode allinfo = json.loads(allinfo) # playlist specific metadata playlist = dict(playlist_id=playlist_id, likes=allinfo.get('likes'), title=allinfo.get('title'), author=allinfo.get('author'), dislikes=allinfo.get('dislikes'), description=allinfo.get('description'), items=[]) # playlist items specific metadata for v in allinfo['video']: vid_data = dict(added=v.get('added'), is_cc=v.get('is_cc'), is_hd=v.get('is_hd'), likes=v.get('likes'), title=v.get('title'), views=v.get('views'), rating=v.get('rating'), author=v.get('author'), user_id=v.get('user_id'), privacy=v.get('privacy'), start=v.get('start', 0.0), dislikes=v.get('dislikes'), duration=v.get('duration'), comments=v.get('comments'), keywords=v.get('keywords'), thumbnail=v.get('thumbnail'), cc_license=v.get('cc_license'), category_id=v.get('category_id'), description=v.get('description'), encrypted_id=v.get('encrypted_id'), time_created=v.get('time_created'), time_updated=v.get('time_updated'), length_seconds=v.get('length_seconds'), end=v.get('end', v.get('length_seconds'))) try: pafy_obj = new(vid_data['encrypted_id'], basic=basic, gdata=gdata, size=size, callback=callback) except IOError as e: if callback: callback("%s: %s" % (v['title'], e.message)) continue pafy_obj.populate_from_playlist(vid_data) playlist['items'].append(dict(pafy=pafy_obj, playlist_meta=vid_data)) if callback: callback("Added video: %s" % v['title']) return playlist
def _fetch_basic(self): """ Fetch basic data and streams. """ if self._have_basic: return allinfo = get_video_info(self.videoid, self.callback) if self.callback: self.callback("Fetched video info") def _get_lst(key, default="unknown", dic=allinfo): """ Dict get function, returns first index. """ retval = dic.get(key, default) return retval[0] if retval != default else default self._title = _get_lst('title') self._dashurl = _get_lst('dashmpd') self._author = _get_lst('author') self._rating = float(_get_lst('avg_rating', 0.0)) self._length = int(_get_lst('length_seconds', 0)) self._viewcount = int(_get_lst('view_count'), 0) self._thumb = unquote_plus(_get_lst('thumbnail_url', "")) self._formats = [x.split("/") for x in _get_lst('fmt_list').split(",")] self._keywords = _get_lst('keywords', "").split(',') self._bigthumb = _get_lst('iurlsd', "") self._bigthumbhd = _get_lst('iurlsdmaxres', "") self.ciphertag = _get_lst("use_cipher_signature") == "True" self.sm = _extract_smap(g.UEFSM, allinfo, True) self.asm = _extract_smap(g.AF, allinfo, True) dbg("extracted stream maps") sm_ciphertag = "s" in self.sm[0] if self.ciphertag != sm_ciphertag: dbg("ciphertag mismatch") self.ciphertag = not self.ciphertag watch_url = g.urls['watchv'] % self.videoid if self.callback: self.callback("Fetching watch page") watchinfo = fetch_decode(watch_url) # unicode dbg("Fetched watch page") if self.callback: self.callback("Fetched watch page") self.age_ver = re.search(r'player-age-gate-content">', watchinfo) is not None if self.ciphertag: dbg("Encrypted signature detected.") if not self.age_ver: smaps, js_url, mainfunc = get_js_sm(watchinfo, self.callback) funcmap[js_url] = mainfunc self.sm, self.asm = smaps self.js_url = js_url dashsig = re.search(r"/s/([\w\.]+)", self._dashurl).group(1) dbg("decrypting dash sig") goodsig = _decodesig(dashsig, js_url, self.callback) self._dashurl = re.sub(r"/s/[\w\.]+", "/signature/%s" % goodsig, self._dashurl) else: s = re.search(r"/s/([\w\.]+)", self._dashurl).group(1) s = s[2:63] + s[82] + s[64:82] + s[63] self._dashurl = re.sub(r"/s/[\w\.]+", "/signature/%s" % s, self._dashurl) if self._dashurl != 'unknown': self.dash = _extract_dash(self._dashurl) self._have_basic = 1 self._process_streams() self.expiry = time.time() + g.lifespan