def query_info(self, url): hutf = self.get_hutf(url) debug(hutf) title = SelStr('title', hutf)[0].text k = None if title.endswith('.mp4'): title, k = title[:-4], 'mp4' #url = "https://www.rapidvideo.com/embed/FUZ35WDLM7" # https://www3731.playercdn.net/187/0/G4i-UJ6bQxIZI6FWc_F5dg/1536365722/180905/692FUZ37O792IXDCUZDFX.mp4 v = SelStr("video#videojs source", hutf) if v: u = v[0]["src"] return title, k, [u], None #url = 'https://www.rapidvideo.com/embed/ZsNSciBj' # https://admkis.playercdn.net/85/1/sQ52oTwwZ6vCo3Vk7-RS2g/1482741547/161202/063k10VmKldzoX8.mp4 hutf = self.get_hutf(url, postdata='block=1') data = match1(hutf, 'jwplayer\("home_video"\)\.setup\(([^\(\)]+)\);') debug(data) data = match1(data, '"sources":\s*(\[[^\[\]]+\])') ml, u = 0, '' for src in json.loads(data): l = src['label'] if l not in self.labels: echo("new label", l) i = self.labels.index(l) if i > ml: ml, u = i, src['file'] debug(title, u) return title, k, [u], None
def query_info(self, url): # title, ext, urls, totalsize #url = "http://www.iqiyi.com/v_19rr26qr38.html" #url = "https://www.iqiyi.com/v_19rr04z9is.html?list=19rrm106om" #url = "https://www.iqiyi.com/v_19rr04z9is.html" hutf = self.get_hutf(url) for s in ('meta[name=irTitle]', 'meta[property=og:title]'): try: title = SelStr(s, hutf)[0]["content"] break except IndexError: title = self.title #echo(hutf) tvid = match1(hutf, """param\['tvid'\] = "(\d+)";""") vid = match1(hutf, """param\['vid'\] = "([^"]+)";""") echo("tvid=", tvid, ", vid=", vid) dat = I2().getVMS(tvid, vid) #echo(dat) vd, url = self.get_vd_url(dat) #title = "%s_vd%02d" % (title, vd) echo(title) #return hutf = self.get_hutf(url) us = self._get_m3u8_urls(url, hutf) if '.ts?' in us[0]: return title, "ts", us, None # title, ext, urls, totalsize return title, None, us, None
def test(self, args): #https://openload.co/embed/GN4oyoh2bQY/ #https://openload.co/stream/GN4oyoh2bQY~1497806882~64.180.0.0~AUcZ8f9j?mime=true #https://1fiag6g.oloadcdn.net/dl/l/zxbDTu3BltrypxdY/GN4oyoh2bQY/PoliceUnit38_06.mp4?mime=true # https://openload.co/embed/qCpGFs8AOa4/ #url = 'https://openload.co/embed/9zS9QNUWxZ8/' #'https://openload.co/stream/9zS9QNUWxZ8~1497803146~64.180.0.0~eBodZDZa?mime=true' #'https://oqt1pl.oloadcdn.net/dl/l/vfG56RBHDh7gErUv/9zS9QNUWxZ8/PoliceUnit38_05.mp4?mime=true' #pass url = 'https://openload.co/embed/QM5ommgqrG8' url = 'https://openload.co/embed/Wx_SaRAFgO4/' url = 'https://openload.co/embed/TkRITZPJ0-8' #hutf = self.phantom_hutf(url) #echo(hutf) #hutf = open("/tmp/tmpC6Kwkk").read() uid = match1(url, '''openload.co/embed/([^/]+)/''') echo("uid =", uid) hutf = self.chrome_hutf(url) ret = match1(hutf, r'>\s*([\w-]+~\d{10,}~\d+\.\d+\.0\.0~[\w-]+)\s*<', r'>\s*([\w~-]+~\d+\.\d+\.\d+\.\d+~[\w~-]+)', r'>\s*([\w-]+~\d{10,}~(?:[a-f\d]+:){2}:~[\w-]+)\s*<', r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)\s*<', r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)', ) echo(ret) ret = match1(hutf, r'>(Wx_SaRAFgO4[^<]+)<') echo(ret)
def query_info(self, url): uid = match1(url, '''openload.co/embed/([^/]+)''') hutf = self.chrome_hutf(url) vid = match1(hutf, r'>(%s[^<]+)<' % uid) url = "https://openload.co/stream/%s?mime=true" % vid echo(url) return self.title, None, [url], None
def test(self, args): #https://openload.co/embed/GN4oyoh2bQY/ #https://openload.co/stream/GN4oyoh2bQY~1497806882~64.180.0.0~AUcZ8f9j?mime=true #https://1fiag6g.oloadcdn.net/dl/l/zxbDTu3BltrypxdY/GN4oyoh2bQY/PoliceUnit38_06.mp4?mime=true # https://openload.co/embed/qCpGFs8AOa4/ #url = 'https://openload.co/embed/9zS9QNUWxZ8/' #'https://openload.co/stream/9zS9QNUWxZ8~1497803146~64.180.0.0~eBodZDZa?mime=true' #'https://oqt1pl.oloadcdn.net/dl/l/vfG56RBHDh7gErUv/9zS9QNUWxZ8/PoliceUnit38_05.mp4?mime=true' #pass url = 'https://openload.co/embed/QM5ommgqrG8' url = 'https://openload.co/embed/Wx_SaRAFgO4/' url = 'https://openload.co/embed/TkRITZPJ0-8' #hutf = self.phantom_hutf(url) #echo(hutf) #hutf = open("/tmp/tmpC6Kwkk").read() uid = match1(url, '''openload.co/embed/([^/]+)/''') echo("uid =", uid) hutf = self.chrome_hutf(url) ret = match1( hutf, r'>\s*([\w-]+~\d{10,}~\d+\.\d+\.0\.0~[\w-]+)\s*<', r'>\s*([\w~-]+~\d+\.\d+\.\d+\.\d+~[\w~-]+)', r'>\s*([\w-]+~\d{10,}~(?:[a-f\d]+:){2}:~[\w-]+)\s*<', r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)\s*<', r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)', ) echo(ret) ret = match1(hutf, r'>(Wx_SaRAFgO4[^<]+)<') echo(ret)
def query_info(self, url): #'http://www.le.com/ptv/vplay/1877906.html?ch=sougou_mfdy&fromvsogou=1' html = self.get_html(url) hutf = html.decode('utf8') if re.match(r'http://www.le.com/ptv/vplay/(\d+).html', url): vid = match1(url, r'http://www.le.com/ptv/vplay/(\d+).html') elif re.match(r'http://www.letv.com/ptv/vplay/(\d+).html', url): vid = match1(url, r'http://www.letv.com/ptv/vplay/(\d+).html') else: vid = match1(hutf, r'vid="(\d+)"') title = match1(hutf, r'name="irTitle" content="(.*?)"') echo("vid =", vid) echo("title =", title) tkey = calcTimeKey(int(time.time())) u = 'http://api.letv.com/mms/out/video/playJson?' u = u + ("id=%s&platid=1&splatid=101&format=1" % vid) u = u + ("&tkey=%d&domain=www.letv.com" % tkey) #u = u + ("&tkey=%d&domain=www.le.com" % tkey) data = self.get_html(u) info = json.loads(data.decode("utf-8")) stream_id = None kwargs = {} support_stream_id = info["playurl"]["dispatch"].keys() si = kwargs.get("stream_id", "") if self.is_playlist: si = kwargs.get("stream_id", "720p") else: si = kwargs.get("stream_id", "1080p") if si and si.lower() in support_stream_id: stream_id = si else: echo("Current Video Supports:") for i in support_stream_id: echo("\t--format", i, "<URL>") if "1080p" in support_stream_id: stream_id = '1080p' elif "720p" in support_stream_id: stream_id = '720p' else: sids = sorted(support_stream_id, key=lambda i: int(i[1:])) stream_id = sids[-1] echo("stream_id =", stream_id) u2 = info["playurl"]["domain"][0] u2 = u2 + info["playurl"]["dispatch"][stream_id][0] ext = info["playurl"]["dispatch"][stream_id][1].split('.')[-1] u2 = u2 + "&ctv=pc&m3v=1&termid=1&format=1&hwtype=un&ostype=Linux" u2 = u2 + ("&tag=letv&sign=letv&expect=3&tn=%d" % random.random()) u2 = u2 + ("&pay=0&iscpn=f9051&rateid=%s" % stream_id) r2 = self.get_html(u2) info2 = json.loads(r2.decode("utf-8")) m3u8 = self.get_html(info2["location"]) m3u8_list = decode_m3u8(bytearray(m3u8)) us = re.findall(r'^[^#][^\r]*', m3u8_list, re.MULTILINE) #return title, ext, us, None return title, None, us, None
def query_info(self, url): #url = 'http://www.ttwanda.com/films/us/1693.html?xf' hutf = self.get_hutf(url) if '?' not in url: a = SelStr('section.p5 div a', hutf)[0]['href'] url = url + a hutf = self.get_hutf(url) title = SelStr("div.video-content article p strong", hutf)[0].text r = "《(.+)》" if not py3: r = r.decode('utf8') t = match1(title, r) if t and '/films/' in url: title = t src = SelStr('iframe.player', hutf)[0]['src'] if '/player/v.php?url=' in src: # http://www.ttwanda.com/tv/ustv/945.html # ../../player/v.php?url=www.le.com/ptv/vplay/20723618.html src = 'http://' + src.split('?url=', 1)[1] from letv import LETV return LETV().query_info(src) if not src.startswith("http://") and not src.startswith("https://"): src = 'http://www.ttwanda.com/' + src echo(src) self.extra_headers['Referer'] = url # this is important hutf = self.get_hutf(src) dst = match1(hutf, 'var play_url \= "([^"]+)"') echo(dst) if not dst: echo("Can not find var play_url") sys.exit(1) if ('youku.com/' in dst and '/m3u8' in dst) \ or 'lecloud.com/' in dst \ or '/letv-uts/' in dst: return title, None, self.try_m3u8(dst), None if 'ttwanda.com/ftn_handler/' in dst: cs = [ "%s=%s" % (c.name, c.value) for c in self.cookie.cookiejar if c.name != 'PHPSESSID' ] echo(cs) self.wget_cookie = "; ".join(cs) k, s = get_kind_size(dst, self.wget_cookie) return title, k, [dst], s #if 'mgtv.com/' in dst or '189.cn/v5/downloadFile' in dst: # # http://www.ttwanda.com/films/us/907.html?style=cq # return title, None, [dst], None #echo('TTWanda has new source') #echo(dst) #sys.exit(1) return title, None, [dst], None
def query_info(self, url): #url = 'http://www.ttwanda.com/films/us/1693.html?xf' hutf = self.get_hutf(url) if '?' not in url: a = SelStr('section.p5 div a', hutf)[0]['href'] url = url + a hutf = self.get_hutf(url) title = SelStr("div.video-content article p strong", hutf)[0].text r = "《(.+)》" if not py3: r = r.decode('utf8') t = match1(title, r) if t and '/films/' in url: title = t src = SelStr('iframe.player', hutf)[0]['src'] if '/player/v.php?url=' in src: # http://www.ttwanda.com/tv/ustv/945.html # ../../player/v.php?url=www.le.com/ptv/vplay/20723618.html src = 'http://' + src.split('?url=', 1)[1] from letv import LETV return LETV().query_info(src) if not src.startswith("http://") and not src.startswith("https://"): src = 'http://www.ttwanda.com/' + src echo(src) self.extra_headers['Referer'] = url # this is important hutf = self.get_hutf(src) dst = match1(hutf, 'var play_url \= "([^"]+)"') echo(dst) if not dst: echo("Can not find var play_url") sys.exit(1) if ('youku.com/' in dst and '/m3u8' in dst) \ or 'lecloud.com/' in dst \ or '/letv-uts/' in dst: return title, None, self.try_m3u8(dst), None if 'ttwanda.com/ftn_handler/' in dst: cs = ["%s=%s" % (c.name, c.value) for c in self.cookie.cookiejar if c.name != 'PHPSESSID'] echo(cs) self.wget_cookie = "; ".join(cs) k, s = get_kind_size(dst, self.wget_cookie) return title, k, [dst], s #if 'mgtv.com/' in dst or '189.cn/v5/downloadFile' in dst: # # http://www.ttwanda.com/films/us/907.html?style=cq # return title, None, [dst], None #echo('TTWanda has new source') #echo(dst) #sys.exit(1) return title, None, [dst], None
def get_one(self, url, t=UTITLE, n=False): if t == UTITLE: hutf = self.get_hutf(url) t = match1(hutf, '\<meta name="title" content="([^"]+)"\>') #echo(url, t) echo("download", t) echo("") #return dn = os.path.dirname(os.path.abspath(__file__)) fn = os.path.abspath(os.path.join(dn, "../you-get/you-get")) p = Popen([fn, "-o", self.out_dir, "--no-caption", url]) p.wait() if not self.parsed_args.post_uri: return for i in range(1, len(t)): n = t[:i] + "*" debug("ls " + n) ls = glob(n) if len(ls) == 0: break if len(ls) == 1: post_file(ls[0], self.parsed_args.post_uri) return raise Exception("can not find " + t)
def query_info(self, url): # http://www.tudou.com/albumplay/zgdaPAjRz1s/8cUPFUj8sl4.html hutf = self.get_hutf(url) vcode = match1(hutf, U("vcode:\s*'([^']+)',\s*lan\:\s*'粤语'")) echo("vcode", vcode) yu = "http://youku.com/v_show/id_" + vcode #return title, None, [url], None return YOUKU().query_info(yu)
def query_info(self, url): hutf = self.get_hutf(url) dat = match1(hutf, r"var\s+player_data\s*\=\s*({[^}]+})") debug(dat) mu = self.last_m3u8(json.loads(dat)['url']) #us = self.try_m3u8(u) t = SelStr("h2.title", hutf)[0] title = '_'.join(t.text.split()) return title, "m3u8", mu, None
def query_info(self, url): key = match1(url, "/play\?id=(.+)") title, murl, pl = self.title_murl(url) if key in dict(pl): #title = u"%s_第%s集" % (title, dict(pl)[key]) title = "%s_%s" % (title, self.name(dict(pl)[key])) if 'chunklist.m3u8' in murl: return title, "m3u8", murl, None return title, None, [murl], None
def query_info1(self, url): hutf = self.login_hutf(url) #<meta name="og:url" content="https://openload.co/embed/isCWWnlsZLE/"> #<iframe src="https://openload.co/embed/isCWWnlsZLE/" urls = match1( hutf, '\<iframe src="(https://openload.(c|i)o/embed/\S+)" ', '\<meta name="og:url" ' 'content="(https://openload.(c|i)o/embed/\S+)"\>') echo(urls) title = match1(hutf, '<meta property="og:title" content="([^<>]+)"') echo("vmus query_info title=", title) if not urls: echo(SelStr("div.clearfix > p > strong", hutf)[0].text) return None, None, urls, None ol = OpenLoad() ol.title = title return ol.query_info(urls[0])
def test(self, args): #url = 'https://www.youtube.com/watch?v=dF2X2Bl9fps' #'https://www.youtube.com/watch?v=dF2X2Bl9fps&pbj=1' hutf = self.get_hutf(args.url) #dat = parse_qs(unquote(hutf)) #echo(json.dumps(dat, indent=2)) #echo(hutf) # <meta name="title" content="心經唱誦36次 -齊豫居士"> ret = match1(hutf, '\<meta name="title" content="([^"]+)"\>') echo(repr(ret))
def get_vid_from_url(self, url): """Extracts video ID from URL. """ # http://player.youku.com/embed/XMTY3NzI5NTU3Ng== #return match1(url, r'youku\.com/v_show/id_([a-zA-Z0-9=]+)') or \ # match1(url, r'player\.youku\.com/player\.php/sid/([a-zA-Z0-9=]+)/v\.swf') or \ # match1(url, r'loader\.swf\?VideoIDS=([a-zA-Z0-9=]+)') or \ # match1(url, r'player\.youku\.com/embed/([a-zA-Z0-9=]+)') return match1(url, r'youku\.com/v_show/id_([a-zA-Z0-9=]+)', r'player\.youku\.com/embed/([a-zA-Z0-9=]+)')[0]
def test(self, args): url = "http://www.tudou.com/albumplay/zgdaPAjRz1s/8cUPFUj8sl4.html" #youku = "http://youku.com/v_show/id_XNzYxNzM0MDk2" #,vcode: 'XNzYxNzM0MDk2' hutf = self.get_hutf(url) #vcode = match1(hutf, "vcode:\s*'([^']+)'") vcode = match1(hutf, U("vcode:\s*'([^']+)',\s*lan\:\s*'粤语'")) #vcode = match1(hutf, "id:\s*3\s*,\s*vcode:\s*'([^']+)',\s*lan:") echo("vcode", vcode) yu = "http://youku.com/v_show/id_" + vcode echo(YOUKU().query_info(yu))
def query_info1(self, url): #url = 'https://www.dnvod.eu/Movie/Readyplay.aspx?id=deYM01Pf0bo%3d' hutf = self.get_hutf(url) title = SelStr('span#bfy_title >', hutf)[0].data.strip() debug('title =', title) for script in SelStr('script', hutf): txt = script.text debug('txt =', txt) if 'PlayerConfig' not in txt: continue debug('got PlayerConfig') vid = match1(txt, "id:\s*'([^']+)',") key = match1(txt, "key:\s*'([^']+)',") debug('vid =', vid, ', key =', key) break u = "https://www.dnvod.eu/Movie/GetResource.ashx?id=%s&type=htm" % vid self.extra_headers['Referer'] = url durl = self.get_html(u, postdata="key=" + key) debug(durl) return title, None, [durl], None
def query_info(self, url): key = match1(url, "/play\?id=(.+)") #echo("key=", key) #return if not key: c, t, keys = self.detail_key(url) key = keys[0] title, murl = self.title_murl(self.key_url(key)) if 'chunklist.m3u8' in murl: return title, "m3u8", murl, None return title, None, [murl], None
def query_info(self, url): hutf = self.get_hutf(url) #obj = match1(hutf, r" var\s+videoObject\s*\=\s*({[^}]+})") #mu = match1(obj, ' video:\s*(\S+)').strip('"') #mu = self.last_m3u8(mu) h = SelStr("h3", hutf)[0] d = SelStr("div.post-entry p", hutf)[0] mu = match1(d.text, ' video:\s*(\S+)').strip('"') mu = self.last_m3u8(mu) d.children = [c for c in d.children if isinstance(c, DataNode)] title = h.text.strip() + "_" + d.text.strip() return title, "m3u8", mu, None
def get_playlist(self, url): if '/tv/' not in url: return [] url = url.split('?')[0] hutf = self.get_hutf(url) ns = SelStr('div.article-paging a', hutf) # href="?vid=20723618&title=第01集 新局长崛起" urls = [] for a in ns: vid = match1(a['href'], 'vid=(\d+)') if vid: urls.append((a.text, url + '?vid=' + vid)) else: urls.append((a.text, url + a['href'])) return urls
def test(self, argv): url = "https://www.duboku.co/vodplay/1433-1-1.html" url = "https://u.zdubo.com/vodplay/1697-1-1.html" #hutf = self.get_hutf(url) #echo(hutf) #return hutf = open("d.html").read().decode('utf8') dat = match1(hutf, r"var\s+player_data\s*\=\s*({[^}]+})") dat = json.loads(dat) echo(dat) echo(dat['url']) #us = self.try_m3u8(dat['url']) #echo(us) t = SelStr("h2.title", hutf)[0] echo(' '.join(t.text.split()))
def test(self, args): url = "http://www.iqiyi.com/v_19rqzugacg.html" hutf = self.get_hutf(url) tvid = match1(hutf, """param\['tvid'\] = "(\d+)";""") vid = match1(hutf, """param\['vid'\] = "([^"]+)";""") echo("tvid=", tvid, ", vid=", vid) dat = I2().getVMS(tvid, vid) for stream in dat['data']['vidl']: #if vd == stream["vd"]: echo("vd =", stream["vd"]) return url = "http://www.iqiyi.com/playlist521743802.html" hutf = self.get_hutf(url) echo(hutf) els = SelStr("div.site-piclist_pic > a.site-piclist_pic_link", hutf) for e in els: echo(e['href']) return #url = "https://www.iqiyi.com/v_19rr04z9is.html" #url = 'http://www.iqiyi.com/v_19rrkxmiss.html' url = "http://www.iqiyi.com/v_19rqzugacg.html?list=19rrm106om" url = "http://www.iqiyi.com/v_19rqztf338.html?list=19rrm106om" hutf = self.get_hutf(url) echo(hutf) #title = SelStr('meta[property=og:title]', hutf)[0]["content"] title = SelStr('meta[name=irTitle]', hutf)[0]["content"] echo(title) return i2 = I2() #i2.get_hutf(url) tvid = "453406400" videoid = "778e9e5286f2ca6a94d8b5da0062f978" du = i2.getVMS(tvid, videoid)
def query_info1(self, url): # url = 'http://www.dayi.ca/ys/?p=2386&page=52' hutf = self.get_hutf(url) # echo(hutf) ct = SelStr("div#content-outer div#content", hutf)[0] title = ct.select('h3')[0].text p = ct.select('p')[0] title = title + '_' + p.text.split()[0] echo(title) #echo(p.text) u = match1(p.text, 'video:(\S+)') #u = u.strip('"').strip("'") if u[0] in ("'", '"'): u = u.split(u[0])[1] echo(u) #us = self.try_m3u8(u) #return title, None, us, None return title, "m3u8", u, None
def test(self, args): url = "http://www.bookdown.com.cn/bookinfo/30258.html" #url = "http://www.bookdown.com.cn/read/30258_1.html" ret = match1(url, "/bookinfo/(\d+)\.html", "/read/(\d+).*\.html") bid = int(ret[0]) echo("bid =", bid) url = "http://www.bookdown.com.cn/read/%d_1.html" % bid while True: #print >> sys.stderr, url hutf = self.get_hutf(url) #echo(hutf) for div in SelStr('div#view_content_txt', hutf): echo(re.sub(u"分节阅读.+,请点击下一页继续阅读。", "", re.sub(" ", " ", div.text))) al = SelStr("a#nextPage", hutf) if not al: break url = al[0]['href']
def test(self, args): url = "http://www.bookdown.com.cn/bookinfo/30258.html" #url = "http://www.bookdown.com.cn/read/30258_1.html" ret = match1(url, "/bookinfo/(\d+)\.html", "/read/(\d+).*\.html") bid = int(ret[0]) echo("bid =", bid) url = "http://www.bookdown.com.cn/read/%d_1.html" % bid while True: #print >> sys.stderr, url hutf = self.get_hutf(url) #echo(hutf) for div in SelStr('div#view_content_txt', hutf): echo( re.sub(u"分节阅读.+,请点击下一页继续阅读。", "", re.sub(" ", " ", div.text))) al = SelStr("a#nextPage", hutf) if not al: break url = al[0]['href']
def query_info(self, url): # http://haiuken.com/theatre/2muu/ vid = match1(url, r'haiuken.com/theatre/([^/]+)/') echo("vid=", vid) hutf = self.get_hutf(url) m = MyHtmlParser(tidy=False) m.feed(hutf) if self.title == UTITLE: title = m.select("head title")[0].text if title.startswith("Theatre - "): title = title[10:] else: title = self.title ret = m.select(".bg2 .tmpl img") ips = json.dumps([r['src'].split("://")[1].split('/')[0] for r in ret]) d = {"xEvent": "UIMovieComments.Error", "xJson": ips} hutf = self.get_html("http://haiuken.com/ajax/theatre/%s/" % vid, postdata=urllib.urlencode(d).encode("utf8")) ret = json.loads(hutf) url = b64decode(ret['Data']['Error'].encode('utf8')).decode('utf8') return title, None, [url], None