def test(self): url = 'https://y.qq.com/portal/mv/v/s0017amxyfd.html' # don't care music url = 'https://v.qq.com/x/cover/tu0kfx77pkwk3t6.html?vid=k00205g5xhk' # don't add proxy='auto' url = 'http://v.qq.com/iframe/player.html?vid=i00167ai266&tiny=0&auto=0' hutf = self.get_hutf(url) #title, vid, mp = self.getvinfo(url) echo(hutf)
def query_info(self, url): #url = "http://www.dailymotion.com/embed/video/k7alsxAgBgcMGaachYS" #url = "http://www.dailymotion.com/embed/video/k4BjypcByJGUTDl6Bvx" hutf = self.get_hutf(url) #echo(hutf) # "720":[{"type":"application\/x-mpegURL","url":"http:\/\/www.dailymotion.com\/cdn\/manifest\/video\/x2hpv0i.m3u8?auth=1482432896-2562-0fq84z9d-24047244e9a36f0f3fab8388642b74c1&include=720"},{"type":"video\/mp4","url":"http:\/\/www.dailymotion.com\/cdn\/H264-1280x720\/video\/x2hpv0i.mp4?auth=1482432896-2562-pvg451ll-4c251ca9aa8a1bf6f56c88d318eccd65"}]} m = re.search("var config = ([^\n]+);", hutf) j = json.loads(m.group(1)) mr, mq, mu, ex = -2, '', '', '' qua = ['auto', '144', '240', '380', '480', '720'] for k, vs in j['metadata']['qualities'].items(): if k not in qua: echo("New qua", k) continue r = qua.index(k) if r > mr: for v in vs: t = v.get('type', '') if t.startswith('video/'): mr, mq = r, k mu = v['url'] ex = t[-3:] echo("ext=%s, mq=%s, url=%s" % (ex, mq, mu)) title = j['metadata']['title'].strip('.') return title, ex, [mu], None
def test1(self, args): # http://m.bookdown.com.cn/read/31314.html url = 'http://m.bookdown.com.cn/read/31314.html' #hutf = self.get_hutf(url) #print hutf #m = re.findall("http://m.bookdown.com.cn/read/31314_\d+.html", hutf) #print m #hutf = self.get_html("http://m.bookdown.com.cn/read/31314_2.html") #print(hutf) #url = "http://m.bookdown.com.cn/read/31314_1.html" #url = "http://m.bookdown.com.cn/read/31314_1_2.html" while True: print >> sys.stderr, url hutf = self.get_hutf(url) # class="articlecon for div in SelStr('div.articlecon', hutf): #echo(div) #echo(" ".join(div.text.split(" "))) echo( re.sub(u"分节阅读.+,请点击下一页继续阅读。", "", re.sub(" ", " ", div.text))) #echo(hutf) m = re.findall( u'''<a class="btn" href="(http://m\.bookdown\.com\.cn/read/31314_.+\.html)">下一章</a>''', hutf) #m = re.findall(u'''\<a class="btn" href=".+"\>下一章\</a\>''', hutf) #, re.U) #echo(m) if not m: break url = m[0]
def get_playlist(self, url): #url = "https://www.shuquge.com/txt/12236/index.html" base = os.path.dirname(url) hutf = self.get_hutf(url) #hutf = open("s.html").read().decode('utf8') #echo(hutf) tt = SelStr("div.book div.info h2", hutf) if not tt: return [] #echo(tt[0].text) title = tt[0].text echo(title) ul = SelStr("div.listmain dl", hutf) if not ul: return [] #for u in ul.descendants: sel = True lst = [] for u in ul[0].children: #echo(u) if u.tag == 'dt': sel = u"最新章节" not in u.text continue if sel and u.tag == 'dd': l = os.path.join(base, u.select("a")[0]['href']) echo(l, u.text) lst.append((u.text, l)) return lst
def detail_key(self, url): ci = get_ci(debug()) qnrr = Queue() def nrr(ci, msg): url = msg['params']['response']['url'] if '/api/video/detail' in url: qnrr.put(msg['params']['requestId']) ci.reg("Network.responseReceived", nrr) try: ci.Page.navigate(url=url) req_id = qnrr.get(timeout=ci.get_to()) ret = ci.Network.getResponseBody(requestId=req_id) body = json.loads(ret['result']['body']) bdi0 = body['data']['info'][0] title, channel = bdi0['title'], bdi0['channel'] keys = [g['key'] for g in bdi0['guestSeriesList']] debug("keys = ", keys) return channel, title, keys except Exception as e: echo("detail_key out:", repr(e)) finally: ci.close() return None, None, []
def get_one(self, url, t=UTITLE, n=False): if t == UTITLE: hutf = self.get_hutf(url) t = match1(hutf, '\<meta name="title" content="([^"]+)"\>') #echo(url, t) echo("download", t) echo("") #return dn = os.path.dirname(os.path.abspath(__file__)) fn = os.path.abspath(os.path.join(dn, "../you-get/you-get")) p = Popen([fn, "-o", self.out_dir, "--no-caption", url]) p.wait() if not self.parsed_args.post_uri: return for i in range(1, len(t)): n = t[:i] + "*" debug("ls " + n) ls = glob(n) if len(ls) == 0: break if len(ls) == 1: post_file(ls[0], self.parsed_args.post_uri) return raise Exception("can not find " + t)
def query_info(self, url): uid = match1(url, '''openload.co/embed/([^/]+)''') hutf = self.chrome_hutf(url) vid = match1(hutf, r'>(%s[^<]+)<' % uid) url = "https://openload.co/stream/%s?mime=true" % vid echo(url) return self.title, None, [url], None
def test1(self, args): # http://m.bookdown.com.cn/read/31314.html url = 'http://m.bookdown.com.cn/read/31314.html' #hutf = self.get_hutf(url) #print hutf #m = re.findall("http://m.bookdown.com.cn/read/31314_\d+.html", hutf) #print m #hutf = self.get_html("http://m.bookdown.com.cn/read/31314_2.html") #print(hutf) #url = "http://m.bookdown.com.cn/read/31314_1.html" #url = "http://m.bookdown.com.cn/read/31314_1_2.html" while True: print >> sys.stderr, url hutf = self.get_hutf(url) # class="articlecon for div in SelStr('div.articlecon', hutf): #echo(div) #echo(" ".join(div.text.split(" "))) echo(re.sub(u"分节阅读.+,请点击下一页继续阅读。", "", re.sub(" ", " ", div.text))) #echo(hutf) m = re.findall(u'''<a class="btn" href="(http://m\.bookdown\.com\.cn/read/31314_.+\.html)">下一章</a>''', hutf) #m = re.findall(u'''\<a class="btn" href=".+"\>下一章\</a\>''', hutf) #, re.U) #echo(m) if not m: break url = m[0]
def query_info(self, url): # http://m.bookdown.com.cn/read/31314_97_2.html hutf = self.get_hutf(url) echo(hutf) us = self._get_m3u8_urls(url, hutf) #return "", "mp4", us, None return "", "book", us, None
def query_info(self, url): # http://m.bookdown.com.cn/read/31314_97_2.html hutf = self.get_hutf(url) echo(hutf) us = self._get_m3u8_urls(url, hutf) #return "", "mp4", us, None return "", None, us, None
def query_info(self, url): # title, ext, urls, totalsize #url = "http://www.iqiyi.com/v_19rr26qr38.html" #url = "https://www.iqiyi.com/v_19rr04z9is.html?list=19rrm106om" #url = "https://www.iqiyi.com/v_19rr04z9is.html" hutf = self.get_hutf(url) for s in ('meta[name=irTitle]', 'meta[property=og:title]'): try: title = SelStr(s, hutf)[0]["content"] break except IndexError: title = self.title #echo(hutf) tvid = match1(hutf, """param\['tvid'\] = "(\d+)";""") vid = match1(hutf, """param\['vid'\] = "([^"]+)";""") echo("tvid=", tvid, ", vid=", vid) dat = I2().getVMS(tvid, vid) #echo(dat) vd, url = self.get_vd_url(dat) #title = "%s_vd%02d" % (title, vd) echo(title) #return hutf = self.get_hutf(url) us = self._get_m3u8_urls(url, hutf) if '.ts?' in us[0]: return title, "ts", us, None # title, ext, urls, totalsize return title, None, us, None
def query_info(self, url): url = 'http://tv.sohu.com/20110220/n279432193.shtml' url = 'http://tv.sohu.com/20150705/n416207533.shtml' html = self.get_html(url) vid = mg1(r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?', html) echo('vid =', vid) hutf = self.get_data_by_vid(vid) debug(hutf) data = json.loads(hutf) for qtyp in ["oriVid", "superVid", "highVid", "norVid", "relativeId"]: hqvid = data['data'][qtyp] if hqvid != 0 and hqvid != vid: break debug(qtyp) data = json.loads(self.get_data_by_vid(hqvid)) debug(data) host = data['allot'] prot = data['prot'] tvid = data['tvid'] urls = [] data = data['data'] title = data['tvName'] size = sum(data['clipsBytes']) ret = [] for new, cu, ck in zip(data['su'], data['clipsURL'], data['ck']): urls.append(("%s%02d.mp4" % (title, len(ret) + 1), self.real_url(host, vid, tvid, new, cu, ck))) debug("title=%s, size=%d" % (title, size)) debug(urls)
def query_info(self, url): # https://vip.pp63.org/20180615/20jqyayZ/hls/index.m3u8 hutf = self.get_hutf(url) echo(hutf) us = self._get_m3u8_urls(url, hutf) #return "", "mp4", us, None return "", None, us, None
def test(self, args): import time from urllib2 import urlopen from urllib import urlencode #url = "http://qdrama.org/k2/" hutf = self.get_hutf(args.url) #echo(hutf) title = SelStr("div.title.sizing h1", hutf)[0].text #echo("title =", title) nodes = SelStr("div#playsource a", hutf) cnt = 0 for node in nodes: cnt += 1 t = "%s_%02d" % (title.encode('utf8'), cnt) u = node['href'] if 'daily' not in u: continue echo(t, u) if cnt < 0: continue data = urlencode({"aviurl": u, "avitil": t, "destdn": "../dwm/xman/", "sub": "Start"}) urlopen("http://127.0.0.1:8080/", data).read() time.sleep(2)
def query_info(self, m3u8url): hutf = self.get_hutf(m3u8url) #echo(hutf) #url = "https://www.y3600.com/hanju/2017/1017.html" m = re.findall('''var redirecturl = "(.+)";''', hutf) if m: echo(m) b = m[0] m = re.findall('''var main = "(.+index.m3u8.+)";''', hutf) echo(m) m3u8url = b + m[0] us = self.try_m3u8(m3u8url) #echo(us) #if len(us) == 1: # t, s = get_kind_size(us[0]) # if t == 'm3u8': # us = self.try_m3u8(us[0]) #us = self._read_m3u8(m3u8url) #print(us) #us = self._read_m3u8(us[0]) #print(us) return "", None, us, None
def get_stream(self, data): for dat in self.stream_types: for ss in data['stream']: if dat[0] == ss['stream_type']: echo(dat) return dat[0], dat[1], ss echo(data)
def query_info(self, url): hutf = self.get_hutf(url) debug(hutf) title = SelStr('title', hutf)[0].text k = None if title.endswith('.mp4'): title, k = title[:-4], 'mp4' #url = "https://www.rapidvideo.com/embed/FUZ35WDLM7" # https://www3731.playercdn.net/187/0/G4i-UJ6bQxIZI6FWc_F5dg/1536365722/180905/692FUZ37O792IXDCUZDFX.mp4 v = SelStr("video#videojs source", hutf) if v: u = v[0]["src"] return title, k, [u], None #url = 'https://www.rapidvideo.com/embed/ZsNSciBj' # https://admkis.playercdn.net/85/1/sQ52oTwwZ6vCo3Vk7-RS2g/1482741547/161202/063k10VmKldzoX8.mp4 hutf = self.get_hutf(url, postdata='block=1') data = match1(hutf, 'jwplayer\("home_video"\)\.setup\(([^\(\)]+)\);') debug(data) data = match1(data, '"sources":\s*(\[[^\[\]]+\])') ml, u = 0, '' for src in json.loads(data): l = src['label'] if l not in self.labels: echo("new label", l) i = self.labels.index(l) if i > ml: ml, u = i, src['file'] debug(title, u) return title, k, [u], None
def handle_sp_list(self, url): # serial play list urls = [] # http://www.bilibili.com/sp/维京传奇 # base.special.js line 25, loadBgmPage # http://www.bilibili.com/sppage/bangumi-21542-913-1.html # first find 21542 hutf = self.get_hutf(url) #echo(hutf) spid = search_first(hutf, 'var spid = "(\d+)";').group(1) echo("spid=", spid) for li in SelStr('ul#season_selector li', hutf): data = self.get_hutf("http://www.bilibili.com/sppage/bangumi-%s-%s-1.html" % ( spid, li['season_id'])) for n in SelStr('div.season_list li a.t', data): urls.append((n['title'].strip(), 'http://www.bilibili.com' + n['href'])) args = copy(self.parsed_args) sk = args.playlist_skip args.playlist_skip = -1315 tp = args.playlist_top args.playlist_top = 0 cnt = 0 for t, u in urls: cnt = cnt + 1 if cnt > tp > 0: break if cnt < sk: continue echo(t, u) b = BILIBILI() b.title = t args.url = u run(b, args) sys.exit(1)
def query_info(self, url): # https://vip.pp63.org/20180615/20jqyayZ/hls/index.m3u8 hutf = self.get_hutf(url) echo(hutf) #us = self._get_m3u8_urls(url, hutf) us = self.try_m3u8(url) return "", None, us, None
def test(self, args): url = "https://www.shuquge.com/txt/12236/46252712.html" hutf = open("s1.html").read().decode('utf8') #hutf = self.get_hutf(url) #echo(hutf) #ct = SelStr("div.content", hutf)[0] echo(ct.text)
def query_info(self, url): #'http://www.le.com/ptv/vplay/1877906.html?ch=sougou_mfdy&fromvsogou=1' html = self.get_html(url) hutf = html.decode('utf8') if re.match(r'http://www.le.com/ptv/vplay/(\d+).html', url): vid = match1(url, r'http://www.le.com/ptv/vplay/(\d+).html') elif re.match(r'http://www.letv.com/ptv/vplay/(\d+).html', url): vid = match1(url, r'http://www.letv.com/ptv/vplay/(\d+).html') else: vid = match1(hutf, r'vid="(\d+)"') title = match1(hutf, r'name="irTitle" content="(.*?)"') echo("vid =", vid) echo("title =", title) tkey = calcTimeKey(int(time.time())) u = 'http://api.letv.com/mms/out/video/playJson?' u = u + ("id=%s&platid=1&splatid=101&format=1" % vid) u = u + ("&tkey=%d&domain=www.letv.com" % tkey) #u = u + ("&tkey=%d&domain=www.le.com" % tkey) data = self.get_html(u) info = json.loads(data.decode("utf-8")) stream_id = None kwargs = {} support_stream_id = info["playurl"]["dispatch"].keys() si = kwargs.get("stream_id", "") if self.is_playlist: si = kwargs.get("stream_id", "720p") else: si = kwargs.get("stream_id", "1080p") if si and si.lower() in support_stream_id: stream_id = si else: echo("Current Video Supports:") for i in support_stream_id: echo("\t--format", i, "<URL>") if "1080p" in support_stream_id: stream_id = '1080p' elif "720p" in support_stream_id: stream_id = '720p' else: sids = sorted(support_stream_id, key=lambda i: int(i[1:])) stream_id = sids[-1] echo("stream_id =", stream_id) u2 = info["playurl"]["domain"][0] u2 = u2 + info["playurl"]["dispatch"][stream_id][0] ext = info["playurl"]["dispatch"][stream_id][1].split('.')[-1] u2 = u2 + "&ctv=pc&m3v=1&termid=1&format=1&hwtype=un&ostype=Linux" u2 = u2 + ("&tag=letv&sign=letv&expect=3&tn=%d" % random.random()) u2 = u2 + ("&pay=0&iscpn=f9051&rateid=%s" % stream_id) r2 = self.get_html(u2) info2 = json.loads(r2.decode("utf-8")) m3u8 = self.get_html(info2["location"]) m3u8_list = decode_m3u8(bytearray(m3u8)) us = re.findall(r'^[^#][^\r]*', m3u8_list, re.MULTILINE) #return title, ext, us, None return title, None, us, None
def query_info(self, url): # http://www.tudou.com/albumplay/zgdaPAjRz1s/8cUPFUj8sl4.html hutf = self.get_hutf(url) vcode = match1(hutf, U("vcode:\s*'([^']+)',\s*lan\:\s*'粤语'")) echo("vcode", vcode) yu = "http://youku.com/v_show/id_" + vcode #return title, None, [url], None return YOUKU().query_info(yu)
def test1(self, argv): url = 'http://vmus.online/the-outpost-s01.html' #hutf = self.chrome_hutf(url) hutf = self.get_hutf(url) #echo(hutf) ret = SelStr("a.fasc-button", hutf) #ret = [str(a) for a in ret] ret = [(0, a['href']) for a in ret] echo(ret)
def test1(self, argv): mu = self.get_m3u8(argv.url) echo(mu) sect, tvid, ptid = self.get_stp(argv.url) u = 'https://www.kantv6.com/index.php/video/part' u = '%s?tvid=%s' % (u, tvid) dat = self.get_hutf(u) dat = json.loads(dat) debug(json.dumps(dat, indent=2))
def test(self, argv): url = 'https://www.dnvod.tv/Movie/detail.aspx?id=TEee8%2fITNg4%3d' url = 'https://www.dnvod.tv/Movie/Readyplay.aspx?id=OIfaQTVHEiA%3d' ru = 'http://server3.dnvod.tv/hvod/lxj-tscgwlb-50-022061041.mp4?sourceIp=154.20.114.142&signature=856ddbf8ecd34fb9b3aae7ad4c8beddf.56b9f1609633f7eacbc18ecd0dd5e4be&start=1536543792.79147&custom=0&ua=62e66f1213d2881d9f80510593ffe2ec' #ru = 'http://server3.dnvod.tv/hvod/lxj-tscgwlb-50-022061041.mp4' url = "https://www.dnvod.tv/play?id=TW29RCmFL4o%3D" #hutf = self.get_hutf(url) hutf = self.chrome_hutf(url) echo(hutf)
def test(self, args): #url = 'https://www.youtube.com/watch?v=dF2X2Bl9fps' #'https://www.youtube.com/watch?v=dF2X2Bl9fps&pbj=1' hutf = self.get_hutf(args.url) #dat = parse_qs(unquote(hutf)) #echo(json.dumps(dat, indent=2)) #echo(hutf) # <meta name="title" content="心經唱誦36次 -齊豫居士"> ret = match1(hutf, '\<meta name="title" content="([^"]+)"\>') echo(repr(ret))
def test(self, args): ''' curl 'https://s8-e1.dnvodcdn.me/cdn/_definst_/mp4:s8/jvod/xj-csywm-480p-011A7CB68.mp4/chunklist.m3u8?dnvodendtime=1630365931&dnvodhash=UdJqjguhoxl8IjH7zE3VmVYSt1lmRk7jfX4VhHl-sME=&dnvodCustomParameter=0_64.180.112.212.CA_1&lb=4e0618fc4b0ea2e45d57e6ea11efb267&us=1&vv=d1a310cefe1e3333a2b6021eb7e5e9fd&pub=CJOpC34vCp4oEIusD3KnCryXeAzDZGkCJWmBZ4nCYuoCJ9VP3CpCpKmCZanC3CpD3OuD64oC3SnOZOnP34rCpKpEJXVEJ5XPZHcPcDcPMGuOs8sOpHbC39cOZOnOpGvD68pEM5' -H 'authority: s8-e1.dnvodcdn.me' -H 'pragma: no-cache' -H 'cache-control: no-cache' -H 'sec-ch-ua: "Chromium";v="92", " Not A;Brand";v="99", "Google Chrome";v="92"' -H 'sec-ch-ua-mobile: ?0' -H 'user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36' ''' ''' https://www.ifsp.tv/play?id=h0zrX16TdlV ''' url = 'https://www.ifsp.tv/play?id=m0IXOmqu894' url = 'https://s8-e1.dnvodcdn.me/cdn/_definst_/mp4:s8/jvod/xj-csywm-480p-011A7CB68.mp4/chunklist.m3u8?dnvodendtime=1630365931&dnvodhash=UdJqjguhoxl8IjH7zE3VmVYSt1lmRk7jfX4VhHl-sME=&dnvodCustomParameter=0_64.180.112.212.CA_1&lb=4e0618fc4b0ea2e45d57e6ea11efb267&us=1&vv=d1a310cefe1e3333a2b6021eb7e5e9fd&pub=CJOpC34vCp4oEIusD3KnCryXeAzDZGkCJWmBZ4nCYuoCJ9VP3CpCpKmCZanC3CpD3OuD64oC3SnOZOnP34rCpKpEJXVEJ5XPZHcPcDcPMGuOs8sOpHbC39cOZOnOpGvD68pEM5' url = 'https://s8-e1.dnvodcdn.me/cdn/_definst_/mp4:s8/jvod/xj-csywm-480p-011A7CB68.mp4/chunklist.m3u8?dnvodendtime=1630365740&dnvodhash=otFp1V-_aeahcOjnMSbBLDfxVITK4IK95JzEJvtIaeg=&dnvodCustomParameter=0_64.180.112.212.CA_1&lb=4e0618fc4b0ea2e45d57e6ea11efb267&us=1&vv=93e0e71677f8618bd245507435fe10d8&pub=CJOpC34vCZapE2utDJ4uD5yXeAzDZGkCJWmBZ4nCYuoCJ9VC6KoD6OpDc8mE6KnD65bE3bYOc9cOJHbEJ9XCZTZDMHVDZWpD3GmDZPbOJatPZDZCJOrPM4vPcCtE65ZDZCoDJ7' echo(self.get_hutf(url))
def test(self, args): url = "http://www.tudou.com/albumplay/zgdaPAjRz1s/8cUPFUj8sl4.html" #youku = "http://youku.com/v_show/id_XNzYxNzM0MDk2" #,vcode: 'XNzYxNzM0MDk2' hutf = self.get_hutf(url) #vcode = match1(hutf, "vcode:\s*'([^']+)'") vcode = match1(hutf, U("vcode:\s*'([^']+)',\s*lan\:\s*'粤语'")) #vcode = match1(hutf, "id:\s*3\s*,\s*vcode:\s*'([^']+)',\s*lan:") echo("vcode", vcode) yu = "http://youku.com/v_show/id_" + vcode echo(YOUKU().query_info(yu))
def query_info(self, url): # http://8drama.com/122804/ #http://8drama.net/ipobar_.php?sign=251438... echo('phantomjs wait ...') p = Popen(["./phantomjs", "dwm.js", "300", url], stdout=PIPE) html = p.stdout.read() hutf = html.decode('utf8') p.wait() url = SelStr('video source', hutf)[0]['src'] title = SelStr('h1.entry-title', hutf)[0].text return title, None, [url], None
def post_one(fn, conn, dst): echo(fn) #conn = HTTPConnection(ip, port) bun = "-----------------12123135---61b3e9bf8df4ee45---------------" fo = UpFile(fn, 'attachment', bun) headers = {"Content-Type": "multipart/form-data; boundary=%s" % bun, "Content-Length": str(fo.size()), } conn.request("POST", dst, fo, headers) resp = conn.getresponse() #print echo(resp.status, resp.reason)
def test(self, args): ''' wget -U 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.109 Safari/537.36' -O 10_4.mp4 --header="Referer: http://video66.org/embed.php?w=798&h=449&vid=vids4/w_2016_-_10_clip4.mp4" "http://gateway.play44.net:3010/old/w_2016_-_10_clip4.mp4?st=MTJmZmQ1Yzc0ODM4ZDg5ZWQ4MmMyOTc4NDEyZDRlYzU&e=1497829689" ''' url = "http://video66.org/embed.php?w=798&h=449&vid=vids4/w_2016_-_10_clip4.mp4" hutf = self.get_hutf(url) m = re.search("player\.load\(\{\s+file: \"([^\"]+)\",\s+image\:", hutf) if m: u = m.group(1) echo(u) self.extra_headers['Referer'] = url html = self.get_html(u)
def login_hutf(self, url): # then we try to login #url = 'http://vmus.co/%E9%99%90%E5%88%B6%E7%B4%9A%E6%AF%92%E6%A2%9F-narcos-%E7%AC%AC%E4%B8%80%E5%AD%A3-%E7%AC%AC%E4%BA%94%E9%9B%86-s01e05-%E7%B7%9A%E4%B8%8A%E7%9C%8B-%E7%B0%A1%E4%B8%AD%E8%8B%B1%E5%AD%97%E5%B9%95/' #post_data = 'log=vm16&pwd=vm16&wp-submit=%E5%85%8D%E8%A8%BB%E5%86%8A%E7%99%BB%E5%85%A5%28%E6%96%B9%E6%B3%95%E8%AB%8B%E8%A6%8B%E4%B8%8A%E6%96%B9%E8%AA%AA%E6%98%8E%29+%C2%BB&redirect_to=%2F%25E9%2599%2590%25E5%2588%25B6%25E7%25B4%259A%25E6%25AF%2592%25E6%25A2%259F-narcos-%25E7%25AC%25AC%25E4%25B8%2580%25E5%25AD%25A3-%25E7%25AC%25AC%25E4%25BA%2594%25E9%259B%2586-s01e05-%25E7%25B7%259A%25E4%25B8%258A%25E7%259C%258B-%25E7%25B0%25A1%25E4%25B8%25AD%25E8%258B%25B1%25E5%25AD%2597%25E5%25B9%2595%2F' up = "vm%02d" % random.randint(1, 30) echo(up) post_data = "log=%s&pwd=%s&wp-submit=" % (up, up) post_data = post_data + "%E5%85%8D%E8%A8%BB%E5%86%8A%E7%99%BB%E5%85%A5%28%E6%96%B9%E6%B3%95%E8%AB%8B%E8%A6%8B%E4%B8%8A%E6%96%B9%E8%AA%AA%E6%98%8E%29+%C2%BB&redirect_to=" post_data = post_data + urllib.quote(url) html = self.get_html(self.login_url, postdata=post_data) hutf = html.decode('utf8') return hutf
def query_info(self, url): hutf = self.get_hutf(url) #echo(hutf) title = SelStr("div.title.sizing h1", hutf)[0].text #echo("title =", title) nodes = SelStr("div#playsource a", hutf) urls = [] dm = DM() for node in nodes: t, e, us, s = dm.query_info(node['href']) echo(us) urls += us return title, None, urls, None
def qi2(self, hutf): # http://www.dianyingbar.com/11184.html ret = re.findall("videoarr.push\('YKYun\.php\?id\=([^\(\)]+)'\)", hutf) echo(ret) ns = SelStr('article.article-content > p > strong', hutf) title = ns[0].text urls = [] tsize = 0 for vid in ret: url = 'https://vipwobuka.dianyingbar.com:998/api/yUrl.php?id=%s&type=mp4' % vid #self.extra_headers['Referer'] = 'https://vipwobuka.dianyingbar.com:998/ckplayer/YKYun.php?id=' + vid urls.append(url) break #echo("title=", title, 'mp4') return title, 'mp4', urls, None
def get_vd_url(self, dat): for vd in (4, # TD: '720p' 17, # TD_H265': 720p H265 2, # HD: '540p' 5, # BD: 1080p ): for stream in dat['data']['vidl']: if vd == stream["vd"]: echo("4, 17, 2, 5, vd =", vd) url = stream['m3u'] return vd, url else: vd = dat['data']['vidl'][0]['vd'] #echo("vd =", dat['data']['vidl'][0]['vd']) url = dat['data']['vidl'][0]['m3u'] return vd, url
def get_playlist(self, url): #url = 'https://www.dnvod.eu/Movie/detail.aspx?id=NU%2bOQHwQObI%3d' #url = 'https://www.dnvod.eu/Movie/Readyplay.aspx?id=deYM01Pf0bo%3d' #hutf = self.get_hutf(url) #debug(hutf) #for a in SelStr('ul[data-identity=guest] > li > div.bfan-n > a', hutf): # debug(a.text, a['href']) # urls.append((a.text, 'https://www.dnvod.eu' + a['href'])) hutf = self.chrome_hutf(url) urls = [] for a in SelStr('ul[data-identity=guest] > li > div.bfan-n > div.bfan-n > a', hutf): #debug(a.text, a['href']) #urls.append((a.text, 'https://www.dnvod.eu/Movie/' + a['href'])) urls.append((self.title, 'https://www.dnvod.tv/Movie/' + a['href'])) echo(urls) return urls
def read(self, bufsize): #FIXME, if bufsize < len(self.pre) or bufsize < len(self.end) if self.s == 0: self.s = 1 #print '#', return self.pre if self.s == 1: buf = self.f.read(bufsize) if buf: #print '#', self.cnt += len(buf) sys.stdout.write("\r%0.1f" % (self.cnt * 100.0 / self.tal)) return buf self.s = 2 echo("") return self.end return ""
def query_info(self, url): hutf = self.chrome_hutf(url) #echo(hutf) title = SelStr("html head title", hutf)[0].text echo("title =", title) ret = SelStr("video#video_player", hutf) echo(ret) if ret: u = ret[0]["src"] return title, None, [u], None #return ci = get_ci(DEBUG) try: return self.query_info_chrome(ci, url) finally: print("ci.stop()") ci.stop()
def getvinfo(self, url, fmt='shd'): hutf = self.get_hutf(url) #echo(hutf) ss = SelStr('script[r-notemplate=true]', hutf) for s in ss: if 'VIDEO_INFO' in s.text: data = s.text break match = re.search('var\s+COVER_INFO\s?=\s?({[^;]+);', data) cover_info = to_dict(match.group(1)) match = re.search('var\s+VIDEO_INFO\s?=\s?({[^;]+);', data) video_info = to_dict(match.group(1)) title = video_info['title'] vid = video_info['vid'] echo('title =', title, 'vid =', vid) mp = self.get_vinfo_do(url, vid, fmt) return title, vid, mp
def test(self, args): url = "http://www.bookdown.com.cn/bookinfo/30258.html" #url = "http://www.bookdown.com.cn/read/30258_1.html" ret = match1(url, "/bookinfo/(\d+)\.html", "/read/(\d+).*\.html") bid = int(ret[0]) echo("bid =", bid) url = "http://www.bookdown.com.cn/read/%d_1.html" % bid while True: #print >> sys.stderr, url hutf = self.get_hutf(url) #echo(hutf) for div in SelStr('div#view_content_txt', hutf): echo(re.sub(u"分节阅读.+,请点击下一页继续阅读。", "", re.sub(" ", " ", div.text))) al = SelStr("a#nextPage", hutf) if not al: break url = al[0]['href']
def query_info(self, url): # return title, ext, urls, size vid = self.get_vid_from_url(url) echo("vid =", vid) api_url = 'http://play.youku.com/play/get.json?vid=%s&ct=12' % vid hutf = self.get_hutf(api_url) meta = json.loads(hutf) data12 = meta['data'] data = data12 debug(data) title = data['video']['title'] sec_ep = data12['security']['encrypt_string'] sec_ip = data12['security']['ip'] echo(title, sec_ep, sec_ip) stype, ext, stm = self.get_stream(data) # get url list e_code = self.trans_e(self.f_code_1, base64.b64decode(sec_ep.encode('ascii'))) #base64.b64decode(bytes(sec_ep, 'ascii')) ) sid, token = e_code.split('_') echo(sid, token) urls = [] streamfileid = stm['stream_fileid'] for no, seg in enumerate(stm['segs'], 0): k = seg['key'] if k == -1: raise # we hit the paywall; stop here fileid = "%s%02X%s" % (streamfileid[0:8], no, streamfileid[10:]) ep = self.generate_ep(fileid, sid, token) q = parse.urlencode(dict( ctype = self.ctype, ev = 1, K = k, ep = parse.unquote(ep), oip = str(sec_ip), token = token, yxon = 1 )) u = 'http://k.youku.com/player/getFlvPath/sid/{sid}_00' \ '/st/{container}/fileid/{fileid}?{q}'.format( sid = sid, container = ext, fileid = fileid, q = q ) html = self.get_html(u, True) for i in json.loads(html.decode("utf8")): echo(i['server']) urls.append(i['server']) k, size = self.get_total_size(urls) return title, ext, urls, size
def get_playlist(self, page_url): #http://www.iqiyi.com/playlist521743802.html if '/playlist' in page_url: hutf = self.get_hutf(page_url) els = SelStr("div.site-piclist_pic > a.site-piclist_pic_link", hutf) return [(e['title'], e['href']) for e in els] # http://www.iqiyi.com/a_19rrhb9eet.html 太阳的后裔 echo("get_list phantomjs wait 200 ...") p = Popen(["./phantomjs", "dwm.js", "200", page_url], stdout=PIPE) html = p.stdout.read() p.wait() hutf = html.decode("utf8") #c = hutf.split("<!--视频列表区域 -->")[1] urls = [(a.text, a['href']) for a in SelStr('div.smalList > ul > li > a', hutf)] self.align_num = len(str(len(urls))) return urls
def query_info(self, url): #url = 'http://www.ttwanda.com/films/us/1693.html?xf' hutf = self.get_hutf(url) if '?' not in url: a = SelStr('section.p5 div a', hutf)[0]['href'] url = url + a hutf = self.get_hutf(url) title = SelStr("div.video-content article p strong", hutf)[0].text r = "《(.+)》" if not py3: r = r.decode('utf8') t = match1(title, r) if t and '/films/' in url: title = t src = SelStr('iframe.player', hutf)[0]['src'] if '/player/v.php?url=' in src: # http://www.ttwanda.com/tv/ustv/945.html # ../../player/v.php?url=www.le.com/ptv/vplay/20723618.html src = 'http://' + src.split('?url=', 1)[1] from letv import LETV return LETV().query_info(src) if not src.startswith("http://") and not src.startswith("https://"): src = 'http://www.ttwanda.com/' + src echo(src) self.extra_headers['Referer'] = url # this is important hutf = self.get_hutf(src) dst = match1(hutf, 'var play_url \= "([^"]+)"') echo(dst) if not dst: echo("Can not find var play_url") sys.exit(1) if ('youku.com/' in dst and '/m3u8' in dst) \ or 'lecloud.com/' in dst \ or '/letv-uts/' in dst: return title, None, self.try_m3u8(dst), None if 'ttwanda.com/ftn_handler/' in dst: cs = ["%s=%s" % (c.name, c.value) for c in self.cookie.cookiejar if c.name != 'PHPSESSID'] echo(cs) self.wget_cookie = "; ".join(cs) k, s = get_kind_size(dst, self.wget_cookie) return title, k, [dst], s #if 'mgtv.com/' in dst or '189.cn/v5/downloadFile' in dst: # # http://www.ttwanda.com/films/us/907.html?style=cq # return title, None, [dst], None #echo('TTWanda has new source') #echo(dst) #sys.exit(1) return title, None, [dst], None