def GetWlRealUrl(playurl, times=0): res = [] if times > MAX_TRY: return res try: _, _, _, response = fetch(playurl) oflvo = re.findall('(?<=var _oFlv_o \= )\{.*.\}(?=;)', response) if not oflvo: return res jdata = json.loads(oflvo[0]) pid = jdata['id'] # pid = re.findall('(?<=var _oFlv_o \= \{\"id\"\:\")\d+(?=\",\")', response) if pid: # pid = pid[0] url = 'http://vxml.56.com/json/%d/?src=site' % (int(pid)) _, _, _, response = fetch(url) jdata = json.loads(response) rfiles = jdata['info']['rfiles'] for rf in rfiles: realurl = rf['url'] playtype = rf['type'] # 可能是清晰度 res.append(['', realurl]) return res except: t, v, tb = sys.exc_info() log.error("GetWlRealUrl playurl: %s, %s,%s,%s" % (playurl, t, v, traceback.format_tb(tb))) return GetWlRealUrl(playurl, times + 1)
def GetTuDouRealUrl(playurl, times=0): res = [] if times > MAX_TRY: return res try: _, _, _, response = fetch(playurl) iid = re.search(r'iid\s*[:=]\s*(\d+)', response).group(1) title = re.search(r"kw\s*[:=]\s*'([^']+)'", response.decode('gb18030')).group(1) _, _, _, response = fetch('http://v2.tudou.com/v?it=' + iid + '&st=1,2,3,4,99') doc = parseString(response) title = title or doc.firstChild.getAttribute( 'tt') or doc.firstChild.getAttribute('title') urls = [(int(n.getAttribute('brt')), n.firstChild.nodeValue.strip()) for n in doc.getElementsByTagName('f')] url = max(urls, key=lambda x: x[0])[1] print url if len(urls) == 1: url = urls[0] res.append(['', url]) else: for url in urls: res.append(['', url[1]]) return res except: t, v, tb = sys.exc_info() log.error("GetTuDouRealUrl playurl: %s, %s,%s,%s" % (playurl, t, v, traceback.format_tb(tb))) return GetTuDouRealUrl(playurl, times + 1)
def GetSoHuRealUrl(playurl, times=0): res = [] if times > MAX_TRY: return res try: _, _, _, response = fetch(playurl) vid = re.search('vid="(\d+)', response).group(1) newurl = 'http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % vid _, _, _, response = fetch(newurl) jdata = json.loads(response) host = jdata['allot'] prot = jdata['prot'] urls = [] data = jdata['data'] title = data['tvName'] size = sum(data['clipsBytes']) for tfile, new in zip(data['clipsURL'], data['su']): urls.append(GetSoHuInfo(host, prot, tfile, new)) if len(urls) == 1: url = urls[0] res.append(['', url]) else: for url in urls: res.append(['', url]) return res except: t, v, tb = sys.exc_info() log.error("GetSoHuRealUrl playurl: %s, %s,%s,%s" % (playurl, t, v, traceback.format_tb(tb))) return GetSoHuRealUrl(playurl, times + 1)
def GetItemData(itemurl, parsetype, times=0): res = [] if times > MAX_TRY: return res try: _, _, _, response = fetch(itemurl) if parsetype == PARSE_TYPE: jdata = json.loads(response) videos = jdata['video'] for video in videos: playlink = video['playlink'] njdata = json.loads(playlink) playurl = njdata['links']['0']['url'] anchor = njdata['links']['0']['anchor'] res.append([playurl, anchor, '']) else: response = response.decode('gb18030') response = re.sub('tag: \[.*.\]', '', response) playtimes = re.findall('(?<=duration_hour\: \")\d+\:\d+(?=\"\,)', response) playurls = re.findall('(?<=url\: \").*.(?=\"\,)', response) ti = re.findall('(?<=ti\:\").*.(?=\"\,)', response) datas = zip(playurls, ti, playtimes) res.extend(datas) return res except: t, v, tb = sys.exc_info() log.error("GetItemData itemurl: %s, %s,%s,%s" % (itemurl, t, v, traceback.format_tb(tb))) return GetItemData(itemurl, parsetype, times + 1)
def GetListData(listurl, times=0): res = [] if times > MAX_TRY: return res try: _, _, _, response = fetch(listurl) jdata = json.loads(response) videos = jdata['videoshow']['videos'] for video in videos: rating = video['rating'] title = video['title'] url = video['url'] source = video['source'] area = ' '.join([d['name'] for d in video['area']]) actor = ' '.join([d['name'] for d in video['actor']]) cid = video['id'] duration = video['duration'] intro = video['intro'] s_intro = video['s_intro'] date = video['date'] ctype = ' '.join([t['name'] for t in video['type']]) imgh_url = video['imgh_url'] imgv_url = video['imgv_url'] res.append([ rating, title, url, source, area, actor, cid, duration, intro, s_intro, date, ctype, imgh_url, imgv_url ]) return res except: t, v, tb = sys.exc_info() log.error("GetListData listurl: %s %s,%s,%s" % (listurl, t, v, traceback.format_tb(tb))) return GetListData(listurl, times + 1)
def GetUKInfo(videoId2, times=0): if times > MAX_TRY: return None try: url = 'http://v.youku.com/player/getPlayList/VideoIDS/%s' % (videoId2) _, _, _, response = fetch(url) return json.loads(response) except: return GetUKInfo(videoId2, times + 1)
def GetSoHuInfo(host, prot, tfile, new, times=0): if times > MAX_TRY: return try: url = 'http://%s/?prot=%s&file=%s&new=%s' % (host, prot, tfile, new) _, _, _, response = fetch(url) start, _, host, key, _, _, _, _ = response.split('|') return '%s%s?key=%s' % (start[:-1], new, key) except: t, v, tb = sys.exc_info() log.error("GetSoHuInfo %s,%s,%s" % (t, v, traceback.format_tb(tb))) return GetSoHuInfo(host, prot, tfile, new, times + 1)
def GetBaiyRealUrl(playurl, times=0): res = [] if times > MAX_TRY: return res try: _, _, _, response = fetch(playurl) soup = bs(response) playlist = soup.findAll('ul', id="playlist") if playlist: newplayurl = playlist[0].script['src'] if newplayurl: url = 'http://' + BAIY_HOST + newplayurl _, _, _, response = fetch(url) uri = re.findall("(?<=unescape\(').*.(?='\);)", response)[0] info = urllib.unquote(uri) for s in info.split('$$$'): res.extend([s.split('$')]) return res except: t, v, tb = sys.exc_info() log.error("GetBaiyRealUrl playurl: %s, %s,%s,%s" % (playurl, t, v, traceback.format_tb(tb))) return GetBaiyRealUrl(playurl, times + 1)
def GetUKouRealUrl(playurl, times=0): res = [] if times > MAX_TRY: return res try: _, _, _, response = fetch(playurl) id2 = re.search(r"var\s+videoId2\s*=\s*'(\S+)'", response).group(1) info = GetUKInfo(id2) urls, _ = zip(*FindUKVideo(info, stream_type=None)) if len(urls) == 1: url = urls[0] _, _, location, response = fetch(url) res.append(['', location]) else: for url in urls: _, _, location, response = fetch(url) res.append(['', location]) time.sleep(2) return res except: t, v, tb = sys.exc_info() log.error("GetUKouRealUrl playurl: %s, %s,%s,%s" % (playurl, t, v, traceback.format_tb(tb))) return GetUKouRealUrl(playurl, times + 1)
def GetAiPaiRealUrl(playurl, times=0): res = [] if times > MAX_TRY: return res try: _, _, _, response = fetch(playurl) assetpurl = re.findall("(?<=asset_pUrl \= \').*.(?=\'\;)", response) if assetpurl: realurl = assetpurl[0].replace('iphone.aipai.com/', '').replace('card.m3u8', 'card.flv') res.append(['', realurl]) return res except: t, v, tb = sys.exc_info() log.error("GetAiPaiRealUrl playurl: %s, %s,%s,%s" % (playurl, t, v, traceback.format_tb(tb))) return GetAiPaiRealUrl(playurl, times + 1)
def GetKuSixRealUrl(playurl, times=0): res = [] if times > MAX_TRY: return res try: _, _, _, response = fetch(playurl) data = re.findall('data: {.*.} }\,', response) if data: data = data[0][5:-2] jdata = json.loads(data) t = jdata['data']['t'] f = jdata['data']['f'] size = jdata['data']['videosize'] res.append(['', f]) return res except: t, v, tb = sys.exc_info() log.error("GetKuSixRealUrl playurl: %s, %s,%s,%s" % (playurl, t, v, traceback.format_tb(tb))) return GetKuSixRealUrl(playurl, times + 1)