def monitor_download_status(): redis_queue = RedisMsgQueue() while True: all_files_old_json = redis_queue.hash_get_all( consts.constant_manager.DOWNLOAD_STATUS_QUEUE_NAME) for hash_sign in all_files_old_json.keys(): all_files_old_json[hash_sign] = from_string_to_json( all_files_old_json[hash_sign]) all_files_old_json[hash_sign]['now_size'] = get_file_size( all_files_old_json[hash_sign]['absolute_path']) time.sleep(10) # 5分钟下载中文件大小不变化,认为下载服务异常挂掉,删除下载队列 all_files_new_json = redis_queue.hash_get_all( consts.constant_manager.DOWNLOAD_STATUS_QUEUE_NAME) for hash_sign in all_files_new_json: all_files_new_json[hash_sign] = from_string_to_json( all_files_new_json[hash_sign]) all_files_new_json[hash_sign]['now_size'] = get_file_size( all_files_new_json[hash_sign]['absolute_path']) if int(all_files_new_json[hash_sign]['now_size']) - int( all_files_old_json[hash_sign]['now_size']) == 0: redis_queue.hash_del( consts.constant_manager.DOWNLOAD_STATUS_QUEUE_NAME, hash_sign) logging.debug('monitor download queue') pass
def process(self, content): response_stream = from_string_to_json(get_and_download_stream_obj(content)) if response_stream['type'] == consts.constant_manager.DOWNLOAD: for download_info in response_stream['download_file_list']: file_name = get_file_name_by_download_url(download_info['download_url']) if download_info['media_type'] == consts.constant_manager.SUBTITLE : file_name = response_stream['site'] + '_' + get_file_name_by_download_url(response_stream['original_url']) + \ '_' + download_info['language'] file_obj = DownloadFile(download_url=download_info['download_url'], file_name=file_name, site=response_stream['site'], original_url=response_stream['original_url']) download_media_json = { 'video_url': response_stream['video_url'], 'original_url': response_stream['original_url'], 'download_url': download_info['download_url'], 'media_quality': download_info['media_quality'], 'episode': response_stream['episode'], 'download_path': ConfigInit().get_config_by_option('download_path'), 'media_name': response_stream['media_name'], 'hash_sign': get_hash_sign(file_name), 'media_type': download_info['media_type'], 'site': response_stream['site'], 'language': download_info['language'], 'merged_sign': download_info['merged_sign'], 'merged_order': download_info['merged_order'], } scheduler_db_save_queue(download_media_json) # todo:下载优先级细粒度管理 if int(download_info['priority']) > 50: scheduler_download_queue(file_obj.from_obj_to_json(), priority=True) else: scheduler_download_queue(file_obj.from_obj_to_json()) return response_stream
def process(self, content): to_merged_medias_lists = from_string_to_json(content) merged_absolue_path = self.merge_media(to_merged_medias_lists) # todo:原子性操作此次批量数据库操作 if merged_absolue_path: download_media_merged_json = copy.deepcopy( to_merged_medias_lists[0]) del_list = [ 'id', 'cloud_path', 'create_time', 'merged_status', 'update_time', 'upload_status' ] for del_column in del_list: del download_media_merged_json[del_column] download_media_merged_json['absolute_path'] = merged_absolue_path download_media_merged_json[ 'media_type'] = consts.constant_manager.MERGED download_media_merged_json['total_size'] = get_file_size( merged_absolue_path) download_media_merged_json['hash_sign'] = get_hash_sign( download_media_merged_json['merged_sign']) download_media_merged_json['merged_order'] = -1 scheduler_db_save_queue(download_media_merged_json) for download_media_json in to_merged_medias_lists: if exist_file(download_media_json['absolute_path']): del_file(download_media_json['absolute_path']) download_media_json['merged_status'] = '1' for column in download_media_json.keys(): if download_media_json[column] == 'None': del download_media_json[column] scheduler_db_save_queue(download_media_json) pass
def being_download(hash_sign): # todo:兼容其他是否下載的驗證方式 redis_queue = RedisMsgQueue() all_files_json = redis_queue.set_get_all( consts.constant_manager.DOWNLOAD_STATUS_QUEUE_NAME) for file_str in all_files_json: file_json = from_string_to_json(file_str) if hash_sign == file_json['hash_sign']: return True return False
def chrome_test(url): opts = ChromeOptions() opts.add_argument("--headless") driver = webdriver.Chrome(executable_path=DEPLOY_HOME + '/src/config/chromedriver_mac243', chrome_options=opts) driver.set_page_load_timeout(15) try: driver.get(url) except TimeoutException: traceback.print_exc() # scriptToExecute = "var performance = window.performance || window.mozPerformance || window.msPerformance || window.webkitPerformance || {}; var network = performance.getEntries() || {}; return network;" # net_work_info = driver.execute_script(scriptToExecute) net_work_info = driver.execute_script( "return window.performance.getEntries();") page_source = driver.page_source.encode('utf-8') driver.quit() tmp_subtitles_list = parser_subtitles_page_source(page_source) subtitles_list = [] for subtitle in tmp_subtitles_list[:1]: subtitle_dict = from_string_to_json(subtitle) tmp_subtitle_url = subtitle_dict['src'] subtitle_dict['source'] = get_subtitle(tmp_subtitle_url) subtitles_list.append(subtitle_dict) sv_id = url[url.find('videos/') + len('videos/'):url.find('-')] net_work_info_str = str(net_work_info) begin = net_work_info_str.find('dash_high_480p_') + len('dash_high_480p_') sv_play_str = net_work_info_str[begin:net_work_info_str. find('_track', begin)] url_video = 'https://content.viki.io/%s/dash/%s_dash_high_480p_%s_track1_dashinit.mp4' % ( sv_id, sv_id, sv_play_str) url_audio = 'https://content.viki.io/%s/dash/%s_dash_high_480p_%s_track2_dashinit.mp4' % ( sv_id, sv_id, sv_play_str) down_480p_list = [url_video, url_audio] download_video(down_480p_list) merge_sv_demo() write_file('/Users/tv365/test_de', subtitles_list[0]['source']) # subtitles_dict = {'de': '/Users/tv365/test_de'} # sv = SingleVideo('vid0', 'svid0', subtitles_dict['de'], '') pass
def process(self, content): to_merged_medias_lists = from_string_to_json(content) merged_absolue_path = self.merge_media(to_merged_medias_lists) # todo:原子性操作此次批量数据库操作 if merged_absolue_path: download_media_json = copy.deepcopy(to_merged_medias_lists[0]) del download_media_json['id'] download_media_json['absolute_path'] = merged_absolue_path download_media_json['media_type'] = consts.constant_manager.MERGED download_media_json['total_size'] = get_file_size( merged_absolue_path) download_media_json['hash_sign'] = get_hash_sign( download_media_json['merged_sign']) download_media_json['download_status'] = '' download_media_json['merged_order'] = '' scheduler_db_save_queue(download_media_json) for download_media_json in to_merged_medias_lists: download_media_json['merged_status'] = '1' scheduler_db_save_queue(download_media_json) pass
def get_paly_urls(): urls = [ 'https://www.viki.com/tv/31037c-woman-with-a-suitcase', 'https://www.viki.com/tv/3548c-dream-high-2', 'https://www.viki.com/tv/35535c-stars-lover', 'https://www.viki.com/tv/655c-winter-bird', 'https://www.viki.com/tv/29619c-que-sera-sera', 'https://www.viki.com/tv/8037c-goodbye-dear-wife', 'https://www.viki.com/tv/29550c-secret', 'https://www.viki.com/tv/29465c-my-love-patzzi', 'https://www.viki.com/tv/12697c-three-days', 'https://www.viki.com/tv/35530c-bad-couple', 'https://www.viki.com/tv/29161c-sweet-savage-family', 'https://www.viki.com/tv/29486c-snowman', 'https://www.viki.com/tv/29483c-my-lifes-golden-age', 'https://www.viki.com/tv/31100c-romance-blue', 'https://www.viki.com/tv/35623c-borg-mom', 'https://www.viki.com/tv/29473c-who-are-you', 'https://www.viki.com/tv/28380c-the-virtual-bride', 'https://www.viki.com/tv/35519c-you-are-too-much', 'https://www.viki.com/tv/29477c-the-lawyers-of-the-great-republic-of-korea', 'https://www.viki.com/tv/29546c-air-city', 'https://www.viki.com/tv/11669c-ad-genius-lee-taebaek', 'https://www.viki.com/tv/25807c-be-arrogant', 'https://www.viki.com/tv/29463c-90-days-time-to-love', 'https://www.viki.com/tv/35571c-daljas-spring', 'https://www.viki.com/tv/35533c-matchmakers-lover', 'https://www.viki.com/tv/35539c-women-in-the-sun', 'https://www.viki.com/tv/35538c-tazza', 'https://www.viki.com/tv/35532c-surgeon-bong-dal-hee', 'https://www.viki.com/tv/29545c-general-hospital-2', 'https://www.viki.com/tv/28243c-28-faces-of-the-moon', 'https://www.viki.com/tv/25771c-punch', 'https://www.viki.com/tv/29535c-super-rookie', 'https://www.viki.com/tv/12068c-the-queen-of-office', 'https://www.viki.com/tv/26913c-the-man-in-the-mask', 'https://www.viki.com/tv/29544c-spotlight', 'https://www.viki.com/tv/3339c-miss-ripley', 'https://www.viki.com/tv/24873c-boarding-house-24', 'https://www.viki.com/tv/35542c-sign', 'https://www.viki.com/tv/29471c-dr-gang', 'https://www.viki.com/tv/29478c-behind-the-white-tower', 'https://www.viki.com/tv/35807c-swan', 'https://www.viki.com/tv/27882c-assembly', 'https://www.viki.com/tv/35529c-get-karl-oh-soo-jung', 'https://www.viki.com/tv/27211c-jumping-girl', 'https://www.viki.com/tv/34053c-bing-goo', 'https://www.viki.com/tv/28972c-cheers-to-me', 'https://www.viki.com/tv/29482c-merry-mary', 'https://www.viki.com/tv/29492c-before-and-after-plastic-surgery-clinic', 'https://www.viki.com/tv/29666c-puck', 'https://www.viki.com/tv/29476c-auction-house', ] play_urls = [] urls_api = ['https://api.viki.io/v4/containers/%s/episodes.json?sort=number&' \ 'direction=asc&per_page=20&with_paging=true&blocked=true&with_kcp=true&' \ 'app=100000a&page=1' % url[url.find('tv/') + len('tv/'):url.find('-')] for url in urls] count = 0 for url in urls_api: count += 1 print count req = urllib2.Request(url=url, headers={'User-Agent': random.choice(consts.constant_manager.USER_AGENTS)}) response = urllib2.urlopen(req) page_source = response.read() page_source_json = from_string_to_json(page_source) for play_url_dict in page_source_json['response']: play_urls.append(play_url_dict['url']['fb']) return play_urls
def parse(self, url): driver = SeleniumDirverFactory().get_driver('chrome') # todo:稳定性处理 driver.set_page_load_timeout(60) try: driver.get(url) except TimeoutException: traceback.print_exc() finally: page_source = driver.page_source.encode('utf-8') net_work_info_list = driver.execute_script("return window.performance.getEntries();") SeleniumDirverFactory().quit_driver('chrome') begin = page_source.find('var parsedSubtitles =') + len('var parsedSubtitles =') end = page_source.find('];', begin) tmp_subtitles_list = page_source[begin:end].replace('[', '').replace(' ', '').replace('amp;', '').split('},') tmp_subtitles_list = [dict_str + '}' for dict_str in tmp_subtitles_list if '}' not in dict_str] download_file_list = [] # tmp_subtitles_list = [] for subtitle in tmp_subtitles_list: subtitle_dict = from_string_to_json(subtitle) if int(subtitle_dict['percentage']) < 95: continue download_file_list.append( self.build_singel_stream_json(download_url=subtitle_dict['src'], language=subtitle_dict['srclang'], media_type=constant_manager.SUBTITLE, priority=99)) # todo:各清晰度配置,兼容其他类型音视频下载,兼容多段音视频的拼接 net_work_info_list = [] for net_work_dict in net_work_info_list: if 'name' in net_work_dict and '480p' in net_work_dict['name']: url_video_480p = str(net_work_dict['name']).replace('track2', 'track1') url_audio_480p = str(net_work_dict['name']).replace('track1', 'track2') download_file_list.append(self.build_singel_stream_json(download_url=url_video_480p, media_quality=consts.constant_manager.MEDIA_480P, media_type=consts.constant_manager.VIDEO, merged_sign='_'.join([url, '480p']), merged_order=1)) download_file_list.append( self.build_singel_stream_json(download_url=url_audio_480p, media_quality=consts.constant_manager.MEDIA_480P, media_type=consts.constant_manager.AUDIO, merged_sign='_'.join([url, '480p']), merged_order=1)) break else: logging.error('can not find 480p in url %s' % url) if len(download_file_list) == 0: logging.error('no download_url in url %s' % url) begin_media_name = page_source.find('<title>') + len('<title>') end_media_name = page_source.find('</title>', begin_media_name) media_name = page_source[begin_media_name:end_media_name].replace(' ', '') # ' <meta property="video:series" content="http://www.viki.com/tv/35884c-all-out-of-love" />' begin_video_url = page_source.find('<meta property="video:series" content="') + len( '<meta property="video:series" content="') end_video_url = page_source.find('"', begin_video_url) video_url = page_source[begin_video_url:end_video_url] try: episode = '' episode_begin = media_name.upper().find('EPISODE') + len('EPISODE') media_name_with_episode = media_name[episode_begin:] episode = re.search('[0-9]+', media_name_with_episode).group(0) except: traceback.print_exc() logging.error('episode error') response_obj = StreamInfo(video_url=video_url, media_name=media_name, episode=episode, original_url=url, download_file_list=download_file_list, site=consts.constant_manager.VIKI, type=consts.constant_manager.DOWNLOAD) return response_obj.from_obj_to_string() @staticmethod def get_subtitle_content(tmp_subtitle_url): # todo:即时url访问处理 req = urllib2.Request(tmp_subtitle_url) req.add_header('User-Agent', random.choice(consts.constant_manager.USER_AGENTS)) page_source = urllib2.urlopen(req).read() return str(page_source)
def get_paly_urls(): urls_support = [ # 韩剧 # 'https://www.viki.com/tv/31037c-woman-with-a-suitcase', # 'https://www.viki.com/tv/3548c-dream-high-2', # 'https://www.viki.com/tv/8037c-goodbye-dear-wife', # 'https://www.viki.com/tv/29550c-secret', # 'https://www.viki.com/tv/29465c-my-love-patzzi', # 'https://www.viki.com/tv/29161c-sweet-savage-family', # 'https://www.viki.com/tv/29486c-snowman', # 'https://www.viki.com/tv/29483c-my-lifes-golden-age', # 'https://www.viki.com/tv/31100c-romance-blue', # 'https://www.viki.com/tv/35623c-borg-mom', # 'https://www.viki.com/tv/29473c-who-are-you', # 'https://www.viki.com/tv/29546c-air-city', # 'https://www.viki.com/tv/25807c-be-arrogant', # 'https://www.viki.com/tv/29463c-90-days-time-to-love', # 'https://www.viki.com/tv/29545c-general-hospital-2', # 'https://www.viki.com/tv/28243c-28-faces-of-the-moon', # 'https://www.viki.com/tv/29535c-super-rookie', # 'https://www.viki.com/tv/26913c-the-man-in-the-mask', # 'https://www.viki.com/tv/29544c-spotlight', # 'https://www.viki.com/tv/3339c-miss-ripley', # 'https://www.viki.com/tv/24873c-boarding-house-24', # 'https://www.viki.com/tv/29471c-dr-gang', # 'https://www.viki.com/tv/29478c-behind-the-white-tower', # 'https://www.viki.com/tv/35807c-swan', # 'https://www.viki.com/tv/27211c-jumping-girl', # 'https://www.viki.com/tv/28972c-cheers-to-me', # 'https://www.viki.com/tv/29482c-merry-mary', # 'https://www.viki.com/tv/29492c-before-and-after-plastic-surgery-clinic', # 'https://www.viki.com/tv/29666c-puck', # # 大陆 # 'https://www.viki.com/tv/29015c-my-sunshine-directors-cut', # 'https://www.viki.com/tv/21925c-singing-all-along', # 'https://www.viki.com/tv/28160c-the-interpreter', # 'https://www.viki.com/tv/35697c-an-oriental-odyssey', # 'https://www.viki.com/tv/29908c-fifteen-years-of-waiting-for-migratory-birds', # 'https://www.viki.com/tv/36049c-sweet-dreams', # 'https://www.viki.com/tv/31805c-because-of-meeting-you', # 'https://www.viki.com/tv/31618c-princess-agents', # 'https://www.viki.com/tv/35699c-sweet-combat', # 'https://www.viki.com/tv/36178c-ever-night', # 'https://www.viki.com/tv/35601c-dear-prince', # 'https://www.viki.com/tv/35857c-secret-of-the-three-kingdoms', # 'https://www.viki.com/tv/32658c-the-foxs-summer', # 'https://www.viki.com/tv/35576c-the-foxs-summer-season-2', # 'https://www.viki.com/tv/29266c-love-me-if-you-dare', # 'https://www.viki.com/tv/33387c-pretty-li-hui-zhen', # 'https://www.viki.com/tv/33973c-my-mr-mermaid', # 'https://www.viki.com/tv/31583c-my-amazing-boyfriend', # 'https://www.viki.com/tv/35710c-i-cannot-hug-you', # 'https://www.viki.com/tv/35684c-face-off', # 'https://www.viki.com/tv/23841c-the-imperial-doctress', # 'https://www.viki.com/tv/34371c-the-kings-woman', # 'https://www.viki.com/tv/30283c-song-of-phoenix', # 'https://www.viki.com/tv/36044c-the-love-knot-his-excellencys-first-love', # 'https://www.viki.com/tv/21864c-chinese-paladin-5-clouds-of-the-world', # 'https://www.viki.com/tv/22943c-nirvana-in-fire', # 'https://www.viki.com/tv/35605c-the-flames-daughter', # 'https://www.viki.com/tv/25705c-legend-of-lu-zhen', # 'https://www.viki.com/tv/35664c-fighter-of-the-destiny', # 'https://www.viki.com/tv/29384c-whirlwind-girl', # 'https://www.viki.com/tv/30332c-the-legend-of-chusen', # 'https://www.viki.com/tv/35843c-siege-in-fog', # 'https://www.viki.com/tv/20346c-perfect-couple', # 'https://www.viki.com/tv/22353c-daughter-back', # 'https://www.viki.com/tv/30705c-addicted', # 'https://www.viki.com/tv/35607c-delicious-destiny', # 'https://www.viki.com/tv/28842c-ice-fantasy', # 'https://www.viki.com/tv/23849c-thinking-of-you-lu-xiang-bei', # 'https://www.viki.com/tv/21228c-legend-of-the-ancient-sword', # 'https://www.viki.com/tv/33665c-across-the-ocean-to-see-you', # 'https://www.viki.com/tv/32827c-nirvana-in-fire-2', # 'https://www.viki.com/tv/30299c-hot-girl', # 'https://www.viki.com/tv/34161c-love-just-come', # 'https://www.viki.com/tv/2978c-scarlet-heart', # 'https://www.viki.com/tv/31190c-when-a-snail-falls-in-love', # 'https://www.viki.com/tv/12472c-the-four', # 'https://www.viki.com/tv/35704c-only-side-by-side-with-you', # 'https://www.viki.com/tv/28818c-my-best-ex-boyfriend', # 'https://www.viki.com/tv/12747c-scarlet-heart-2', # 'https://www.viki.com/tv/34436c-the-lovers-lies', # 日剧 # 'https://www.viki.com/tv/35651c-youre-my-pet-kimi-wa-petto', # 'https://www.viki.com/tv/31884c-sunshine', # 'https://www.viki.com/tv/29377c-my-little-lover-minami-kun-no-koibito', # 'https://www.viki.com/tv/35654c-hakuouki-ssl-sweet-school-life', # 'https://www.viki.com/tv/31882c-rainbow-rose', # 'https://www.viki.com/tv/31813c-vampire-heaven', # 'https://www.viki.com/tv/29394c-lady-girls', # 'https://www.viki.com/tv/35653c-i-am-reiko-shiratori-shiratori-reiko-de-gozaimasu', # 'https://www.viki.com/tv/34349c-delicious-niigata-in-japan', # 'https://www.viki.com/tv/36303c-iniesta-tv', # 'https://www.viki.com/tv/23069c-love-stories-from-fukuoka', # 'https://www.viki.com/tv/28765c-visiting-sacred-places-of-the-tohoku-region', # 'https://www.viki.com/tv/31798c-blue-fire', # 'https://www.viki.com/tv/7468c-leiji-matsumotos-ozma', # 'https://www.viki.com/tv/34350c-railway-story', # 'https://www.viki.com/tv/36253c-iniesta-tv-discover-japan', # 'https://www.viki.com/tv/36302c-iniesta-tv-interviews', # 'https://www.viki.com/tv/29122c-tabiaruki-from-iwate', # 'https://www.viki.com/tv/33804c-a-heartfelt-trip-to-fukushima', # 'https://www.viki.com/tv/33807c-lets-explore-fukushima', # 'https://www.viki.com/tv/33806c-murakami-grand-festival-2016-tradition-passed-down', # 'https://www.viki.com/tv/33805c-festival-pride-for-hometown', # 'https://www.viki.com/tv/30268c-sendai-iroha-zoukangou', # 'https://www.viki.com/tv/34348c-the-sanjo-great-kite-battle', # 'https://www.viki.com/tv/36085c-vissel-kobe-welcome-event-bienvenido-andrs-iniesta', # 'https://www.viki.com/tv/34345c-tales-of-tohoku', # 台剧 'https://www.viki.com/tv/29014c-crime-scene-investigation-center', 'https://www.viki.com/tv/36219c-campus-heroes', 'https://www.viki.com/tv/36106c-love-and', 'https://www.viki.com/tv/35897c-my-little-boys', 'https://www.viki.com/tv/35859c-between', 'https://www.viki.com/tv/35830c-single-ladies-senior', 'https://www.viki.com/tv/35712c-iron-ladies', 'https://www.viki.com/tv/35708c-see-you-in-time', 'https://www.viki.com/tv/35627c-lulus-diary', 'https://www.viki.com/tv/35561c-home-sweet-home', 'https://www.viki.com/tv/35560c-memory-love', 'https://www.viki.com/tv/35552c-when-a-woman-chases-a-man', 'https://www.viki.com/tv/35551c-attention-love', 'https://www.viki.com/tv/35549c-lion-pride', 'https://www.viki.com/tv/35492c-the-man-from-the-future', 'https://www.viki.com/tv/34532c-the-masked-lover', 'https://www.viki.com/tv/34531c-the-perfect-match', 'https://www.viki.com/tv/34530c-just-for-you', 'https://www.viki.com/tv/33719c-all-in-700', 'https://www.viki.com/tv/33590c-behind-your-smile', 'https://www.viki.com/tv/33551c-the-king-of-romance', 'https://www.viki.com/tv/33362c-v-focus', 'https://www.viki.com/tv/32673c-love-by-design', 'https://www.viki.com/tv/32353c-27th-golden-melody-awards', 'https://www.viki.com/tv/32310c-swimming-battle', 'https://www.viki.com/tv/32029c-better-man', 'https://www.viki.com/tv/31402c-metro-of-love', 'https://www.viki.com/tv/30921c-love-at-seventeen', 'https://www.viki.com/tv/30868c-spop-weekly-report', 'https://www.viki.com/tv/30410c-back-to-1989', 'https://www.viki.com/tv/30017c-be-with-me', 'https://www.viki.com/tv/29959c-thirty-something', 'https://www.viki.com/tv/29705c-love-or-spend', 'https://www.viki.com/tv/29259c-bromance', 'https://www.viki.com/tv/23561c-love-myself-or-you', 'https://www.viki.com/tv/28856c-school-beautys-personal-bodyguard', 'https://www.viki.com/tv/28009c-when-i-see-you-again', 'https://www.viki.com/tv/27213c-murphys-law-of-love', 'https://www.viki.com/tv/20292c-just-you', 'https://www.viki.com/tv/20331c-love-around', 'https://www.viki.com/tv/635c-autumns-concerto', 'https://www.viki.com/tv/228c-fated-to-love-you', 'https://www.viki.com/tv/26885c-shia-wa-se', 'https://www.viki.com/tv/23266c-fall-in-love-with-me', ] play_urls = [] urls_api = ['https://api.viki.io/v4/containers/%s/episodes.json?sort=number&' \ 'direction=asc&per_page=200&with_paging=true&blocked=true&with_kcp=true&' \ 'app=100000a&page=1' % url[url.find('tv/') + len('tv/'):url.find('-')] for url in urls_support] count = 0 for url in urls_api: try: count += 1 logging.debug(count) req = urllib2.Request(url=url, headers={ 'User-Agent': random.choice( consts.constant_manager.USER_AGENTS) }) response = urllib2.urlopen(req) page_source = response.read() page_source_json = from_string_to_json(page_source) for play_url_dict in page_source_json['response']: play_urls.append(play_url_dict['url']['fb']) except: traceback.print_exc() return play_urls
def parse(self, url): subtitle_parse_success = media_parse_success = False page_source = '' net_work_info_list = [] tmp_subtitles_list = [] driver_factory = SeleniumDirverFactory() driver = driver_factory.get_driver('chrome') # page_source, net_work_info_list = get_driver_test(url) # todo:使用进程管理浏览器对象,区别管理各个不同的浏览器 driver.set_page_load_timeout(60 * 3) try: driver.get(url) except TimeoutException: logging.debug('time_out load page in %s' % url) traceback.print_exc() except: logging.debug('unknow error in %s' % url) traceback.print_exc() finally: try: page_source = driver.page_source.encode('utf-8') net_work_info_list = driver.execute_script( "return window.performance.getEntries();") except: logging.debug('unknow error to get page_source in %s' % url) traceback.print_exc() finally: driver_factory.quit_driver('chrome') # driver_factory.close_driver() logging.debug('quit success') # todo:支持付费解析 if page_source.find('var parsedSubtitles =') != -1: begin = page_source.find('var parsedSubtitles =') + len( 'var parsedSubtitles =') end = page_source.find('];', begin) tmp_subtitles_str = page_source[begin:end].replace( '[', '').replace(' ', '').replace('amp;', '').split('},') tmp_subtitles_list = [ dict_str + ('}' if '}' not in dict_str else '') for dict_str in tmp_subtitles_str ] download_file_list = [] for subtitle in tmp_subtitles_list: subtitle_dict = from_string_to_json(subtitle) if len(subtitle_dict) == 0: continue if int(subtitle_dict['percentage']) < 95: continue download_file_list.append( self.build_singel_stream_json( download_url=subtitle_dict['src'], language=subtitle_dict['srclang'], media_type=constant_manager.SUBTITLE, priority=99)) subtitle_parse_success = True # todo:各清晰度配置,兼容其他类型音视频下载,兼容多段音视频的拼接 for net_work_dict in net_work_info_list: if 'name' in net_work_dict and '480p' in net_work_dict[ 'name'] and 'track' in net_work_dict['name']: logging.debug('video and audio parse success') media_parse_success = True url_video_480p = str(net_work_dict['name']).replace( 'track2', 'track1') url_audio_480p = str(net_work_dict['name']).replace( 'track1', 'track2') download_file_list.append( self.build_singel_stream_json( download_url=url_video_480p, media_quality=consts.constant_manager.MEDIA_480P, media_type=consts.constant_manager.VIDEO, merged_sign='_'.join([url, '480p']), merged_order=1)) download_file_list.append( self.build_singel_stream_json( download_url=url_audio_480p, media_quality=consts.constant_manager.MEDIA_480P, media_type=consts.constant_manager.AUDIO, merged_sign='_'.join([url, '480p']), merged_order=1)) break elif 'name' in net_work_dict and \ ('240p' in net_work_dict['name'] or '360p' in net_work_dict['name'] or '480p' in net_work_dict[ 'name']) \ and 'stream_name=' in net_work_dict['name']: logging.debug('merged parse success') media_parse_success = True begin = net_work_dict['name'].find('stream_name=') + len( 'stream_name=') end = net_work_dict['name'].find('&', begin) url_merged = url_decode(net_work_dict['name'][begin:end]) if not url_merged.startswith('http'): logging.error('media url error %s %s' % (url_merged, net_work_dict['name'])) download_file_list.append( self.build_singel_stream_json( download_url=url_merged, media_quality=get_media_quality(url_merged), media_type=consts.constant_manager.MERGED, priority=88)) break else: logging.error('can not find 480p in url %s' % url) self.log_result(subtitle_parse_success, media_parse_success, url) if len(download_file_list) == 0: return from_json_to_string({ 'info': 'no stream url', 'url': url, 'type': 'error' }) begin_media_name = page_source.find('<title>') + len('<title>') end_media_name = page_source.find('</title>', begin_media_name) media_name = page_source[begin_media_name:end_media_name].replace( ' ', '') begin_video_url = page_source.find( '<meta property="video:series" content="') + len( '<meta property="video:series" content="') end_video_url = page_source.find('"', begin_video_url) video_url = page_source[begin_video_url:end_video_url] try: episode = '' episode_begin = media_name.upper().find('EPISODE') + len('EPISODE') media_name_with_episode = media_name[episode_begin:] episode = re.search('[0-9]+', media_name_with_episode).group(0) except: traceback.print_exc() logging.error('episode error') stream_obj = StreamInfo(video_url=video_url, media_name=media_name, episode=episode, original_url=url, download_file_list=download_file_list, site=consts.constant_manager.VIKI, type=consts.constant_manager.DOWNLOAD) return stream_obj.from_obj_to_string() @staticmethod def get_subtitle_content(tmp_subtitle_url): # todo:即时url访问处理 req = urllib2.Request(tmp_subtitle_url) req.add_header('User-Agent', random.choice(consts.constant_manager.USER_AGENTS)) page_source = urllib2.urlopen(req).read() return str(page_source)