Exemplo n.º 1
0
def monitor_download_status():
    redis_queue = RedisMsgQueue()
    while True:
        all_files_old_json = redis_queue.hash_get_all(
            consts.constant_manager.DOWNLOAD_STATUS_QUEUE_NAME)
        for hash_sign in all_files_old_json.keys():
            all_files_old_json[hash_sign] = from_string_to_json(
                all_files_old_json[hash_sign])
            all_files_old_json[hash_sign]['now_size'] = get_file_size(
                all_files_old_json[hash_sign]['absolute_path'])
        time.sleep(10)  # 5分钟下载中文件大小不变化,认为下载服务异常挂掉,删除下载队列
        all_files_new_json = redis_queue.hash_get_all(
            consts.constant_manager.DOWNLOAD_STATUS_QUEUE_NAME)
        for hash_sign in all_files_new_json:
            all_files_new_json[hash_sign] = from_string_to_json(
                all_files_new_json[hash_sign])
            all_files_new_json[hash_sign]['now_size'] = get_file_size(
                all_files_new_json[hash_sign]['absolute_path'])
            if int(all_files_new_json[hash_sign]['now_size']) - int(
                    all_files_old_json[hash_sign]['now_size']) == 0:
                redis_queue.hash_del(
                    consts.constant_manager.DOWNLOAD_STATUS_QUEUE_NAME,
                    hash_sign)
        logging.debug('monitor download queue')
    pass
Exemplo n.º 2
0
    def process(self, content):
        response_stream = from_string_to_json(get_and_download_stream_obj(content))

        if response_stream['type'] == consts.constant_manager.DOWNLOAD:
            for download_info in response_stream['download_file_list']:
                file_name = get_file_name_by_download_url(download_info['download_url'])
                if download_info['media_type'] == consts.constant_manager.SUBTITLE :
                    file_name = response_stream['site'] + '_' + get_file_name_by_download_url(response_stream['original_url']) + \
                                '_' + download_info['language']
                file_obj = DownloadFile(download_url=download_info['download_url'], file_name=file_name,
                                        site=response_stream['site'], original_url=response_stream['original_url'])

                download_media_json = {
                    'video_url': response_stream['video_url'],
                    'original_url': response_stream['original_url'],
                    'download_url': download_info['download_url'],
                    'media_quality': download_info['media_quality'],
                    'episode': response_stream['episode'],
                    'download_path': ConfigInit().get_config_by_option('download_path'),
                    'media_name': response_stream['media_name'],
                    'hash_sign': get_hash_sign(file_name),
                    'media_type': download_info['media_type'],
                    'site': response_stream['site'],
                    'language': download_info['language'],
                    'merged_sign': download_info['merged_sign'],
                    'merged_order': download_info['merged_order'],
                }
                scheduler_db_save_queue(download_media_json)
                # todo:下载优先级细粒度管理
                if int(download_info['priority']) > 50:
                    scheduler_download_queue(file_obj.from_obj_to_json(), priority=True)
                else:
                    scheduler_download_queue(file_obj.from_obj_to_json())
        return response_stream
Exemplo n.º 3
0
 def process(self, content):
     to_merged_medias_lists = from_string_to_json(content)
     merged_absolue_path = self.merge_media(to_merged_medias_lists)
     # todo:原子性操作此次批量数据库操作
     if merged_absolue_path:
         download_media_merged_json = copy.deepcopy(
             to_merged_medias_lists[0])
         del_list = [
             'id', 'cloud_path', 'create_time', 'merged_status',
             'update_time', 'upload_status'
         ]
         for del_column in del_list:
             del download_media_merged_json[del_column]
         download_media_merged_json['absolute_path'] = merged_absolue_path
         download_media_merged_json[
             'media_type'] = consts.constant_manager.MERGED
         download_media_merged_json['total_size'] = get_file_size(
             merged_absolue_path)
         download_media_merged_json['hash_sign'] = get_hash_sign(
             download_media_merged_json['merged_sign'])
         download_media_merged_json['merged_order'] = -1
         scheduler_db_save_queue(download_media_merged_json)
         for download_media_json in to_merged_medias_lists:
             if exist_file(download_media_json['absolute_path']):
                 del_file(download_media_json['absolute_path'])
             download_media_json['merged_status'] = '1'
             for column in download_media_json.keys():
                 if download_media_json[column] == 'None':
                     del download_media_json[column]
             scheduler_db_save_queue(download_media_json)
         pass
Exemplo n.º 4
0
def being_download(hash_sign):
    # todo:兼容其他是否下載的驗證方式
    redis_queue = RedisMsgQueue()
    all_files_json = redis_queue.set_get_all(
        consts.constant_manager.DOWNLOAD_STATUS_QUEUE_NAME)
    for file_str in all_files_json:
        file_json = from_string_to_json(file_str)
        if hash_sign == file_json['hash_sign']:
            return True
    return False
Exemplo n.º 5
0
def chrome_test(url):
    opts = ChromeOptions()
    opts.add_argument("--headless")
    driver = webdriver.Chrome(executable_path=DEPLOY_HOME +
                              '/src/config/chromedriver_mac243',
                              chrome_options=opts)
    driver.set_page_load_timeout(15)
    try:
        driver.get(url)
    except TimeoutException:
        traceback.print_exc()
    # scriptToExecute = "var performance = window.performance || window.mozPerformance || window.msPerformance || window.webkitPerformance || {}; var network = performance.getEntries() || {}; return network;"
    # net_work_info = driver.execute_script(scriptToExecute)
    net_work_info = driver.execute_script(
        "return window.performance.getEntries();")
    page_source = driver.page_source.encode('utf-8')
    driver.quit()

    tmp_subtitles_list = parser_subtitles_page_source(page_source)
    subtitles_list = []
    for subtitle in tmp_subtitles_list[:1]:
        subtitle_dict = from_string_to_json(subtitle)
        tmp_subtitle_url = subtitle_dict['src']
        subtitle_dict['source'] = get_subtitle(tmp_subtitle_url)
        subtitles_list.append(subtitle_dict)

    sv_id = url[url.find('videos/') + len('videos/'):url.find('-')]
    net_work_info_str = str(net_work_info)
    begin = net_work_info_str.find('dash_high_480p_') + len('dash_high_480p_')
    sv_play_str = net_work_info_str[begin:net_work_info_str.
                                    find('_track', begin)]
    url_video = 'https://content.viki.io/%s/dash/%s_dash_high_480p_%s_track1_dashinit.mp4' % (
        sv_id, sv_id, sv_play_str)
    url_audio = 'https://content.viki.io/%s/dash/%s_dash_high_480p_%s_track2_dashinit.mp4' % (
        sv_id, sv_id, sv_play_str)
    down_480p_list = [url_video, url_audio]
    download_video(down_480p_list)
    merge_sv_demo()

    write_file('/Users/tv365/test_de', subtitles_list[0]['source'])
    # subtitles_dict = {'de': '/Users/tv365/test_de'}
    # sv = SingleVideo('vid0', 'svid0', subtitles_dict['de'], '')
    pass
Exemplo n.º 6
0
 def process(self, content):
     to_merged_medias_lists = from_string_to_json(content)
     merged_absolue_path = self.merge_media(to_merged_medias_lists)
     # todo:原子性操作此次批量数据库操作
     if merged_absolue_path:
         download_media_json = copy.deepcopy(to_merged_medias_lists[0])
         del download_media_json['id']
         download_media_json['absolute_path'] = merged_absolue_path
         download_media_json['media_type'] = consts.constant_manager.MERGED
         download_media_json['total_size'] = get_file_size(
             merged_absolue_path)
         download_media_json['hash_sign'] = get_hash_sign(
             download_media_json['merged_sign'])
         download_media_json['download_status'] = ''
         download_media_json['merged_order'] = ''
         scheduler_db_save_queue(download_media_json)
         for download_media_json in to_merged_medias_lists:
             download_media_json['merged_status'] = '1'
             scheduler_db_save_queue(download_media_json)
         pass
Exemplo n.º 7
0
def get_paly_urls():
    urls = [
        'https://www.viki.com/tv/31037c-woman-with-a-suitcase',
        'https://www.viki.com/tv/3548c-dream-high-2',
        'https://www.viki.com/tv/35535c-stars-lover',
        'https://www.viki.com/tv/655c-winter-bird',
        'https://www.viki.com/tv/29619c-que-sera-sera',
        'https://www.viki.com/tv/8037c-goodbye-dear-wife',
        'https://www.viki.com/tv/29550c-secret',
        'https://www.viki.com/tv/29465c-my-love-patzzi',
        'https://www.viki.com/tv/12697c-three-days',
        'https://www.viki.com/tv/35530c-bad-couple',
        'https://www.viki.com/tv/29161c-sweet-savage-family',
        'https://www.viki.com/tv/29486c-snowman',
        'https://www.viki.com/tv/29483c-my-lifes-golden-age',
        'https://www.viki.com/tv/31100c-romance-blue',
        'https://www.viki.com/tv/35623c-borg-mom',
        'https://www.viki.com/tv/29473c-who-are-you',
        'https://www.viki.com/tv/28380c-the-virtual-bride',
        'https://www.viki.com/tv/35519c-you-are-too-much',
        'https://www.viki.com/tv/29477c-the-lawyers-of-the-great-republic-of-korea',
        'https://www.viki.com/tv/29546c-air-city',
        'https://www.viki.com/tv/11669c-ad-genius-lee-taebaek',
        'https://www.viki.com/tv/25807c-be-arrogant',
        'https://www.viki.com/tv/29463c-90-days-time-to-love',
        'https://www.viki.com/tv/35571c-daljas-spring',
        'https://www.viki.com/tv/35533c-matchmakers-lover',
        'https://www.viki.com/tv/35539c-women-in-the-sun',
        'https://www.viki.com/tv/35538c-tazza',
        'https://www.viki.com/tv/35532c-surgeon-bong-dal-hee',
        'https://www.viki.com/tv/29545c-general-hospital-2',
        'https://www.viki.com/tv/28243c-28-faces-of-the-moon',
        'https://www.viki.com/tv/25771c-punch',
        'https://www.viki.com/tv/29535c-super-rookie',
        'https://www.viki.com/tv/12068c-the-queen-of-office',
        'https://www.viki.com/tv/26913c-the-man-in-the-mask',
        'https://www.viki.com/tv/29544c-spotlight',
        'https://www.viki.com/tv/3339c-miss-ripley',
        'https://www.viki.com/tv/24873c-boarding-house-24',
        'https://www.viki.com/tv/35542c-sign',
        'https://www.viki.com/tv/29471c-dr-gang',
        'https://www.viki.com/tv/29478c-behind-the-white-tower',
        'https://www.viki.com/tv/35807c-swan',
        'https://www.viki.com/tv/27882c-assembly',
        'https://www.viki.com/tv/35529c-get-karl-oh-soo-jung',
        'https://www.viki.com/tv/27211c-jumping-girl',
        'https://www.viki.com/tv/34053c-bing-goo',
        'https://www.viki.com/tv/28972c-cheers-to-me',
        'https://www.viki.com/tv/29482c-merry-mary',
        'https://www.viki.com/tv/29492c-before-and-after-plastic-surgery-clinic',
        'https://www.viki.com/tv/29666c-puck',
        'https://www.viki.com/tv/29476c-auction-house',
    ]
    play_urls = []
    urls_api = ['https://api.viki.io/v4/containers/%s/episodes.json?sort=number&' \
                'direction=asc&per_page=20&with_paging=true&blocked=true&with_kcp=true&' \
                'app=100000a&page=1' % url[url.find('tv/') + len('tv/'):url.find('-')] for url in urls]
    count = 0
    for url in urls_api:
        count += 1
        print count
        req = urllib2.Request(url=url, headers={'User-Agent': random.choice(consts.constant_manager.USER_AGENTS)})
        response = urllib2.urlopen(req)
        page_source = response.read()
        page_source_json = from_string_to_json(page_source)
        for play_url_dict in page_source_json['response']:
            play_urls.append(play_url_dict['url']['fb'])
    return play_urls
Exemplo n.º 8
0
    def parse(self, url):
        driver = SeleniumDirverFactory().get_driver('chrome')
        # todo:稳定性处理
        driver.set_page_load_timeout(60)
        try:
            driver.get(url)
        except TimeoutException:
            traceback.print_exc()
        finally:
            page_source = driver.page_source.encode('utf-8')
            net_work_info_list = driver.execute_script("return window.performance.getEntries();")
            SeleniumDirverFactory().quit_driver('chrome')

        begin = page_source.find('var parsedSubtitles =') + len('var parsedSubtitles =')
        end = page_source.find('];', begin)
        tmp_subtitles_list = page_source[begin:end].replace('[', '').replace(' ', '').replace('amp;', '').split('},')
        tmp_subtitles_list = [dict_str + '}' for dict_str in tmp_subtitles_list if '}' not in dict_str]

        download_file_list = []
        # tmp_subtitles_list = []
        for subtitle in tmp_subtitles_list:
            subtitle_dict = from_string_to_json(subtitle)
            if int(subtitle_dict['percentage']) < 95:
                continue
            download_file_list.append(
                self.build_singel_stream_json(download_url=subtitle_dict['src'], language=subtitle_dict['srclang'],
                                              media_type=constant_manager.SUBTITLE, priority=99))
        # todo:各清晰度配置,兼容其他类型音视频下载,兼容多段音视频的拼接
        net_work_info_list = []
        for net_work_dict in net_work_info_list:
            if 'name' in net_work_dict and '480p' in net_work_dict['name']:
                url_video_480p = str(net_work_dict['name']).replace('track2', 'track1')
                url_audio_480p = str(net_work_dict['name']).replace('track1', 'track2')
                download_file_list.append(self.build_singel_stream_json(download_url=url_video_480p,
                                                                        media_quality=consts.constant_manager.MEDIA_480P,
                                                                        media_type=consts.constant_manager.VIDEO,
                                                                        merged_sign='_'.join([url, '480p']),
                                                                        merged_order=1))
                download_file_list.append(
                    self.build_singel_stream_json(download_url=url_audio_480p,
                                                  media_quality=consts.constant_manager.MEDIA_480P,
                                                  media_type=consts.constant_manager.AUDIO,
                                                  merged_sign='_'.join([url, '480p']),
                                                  merged_order=1))
                break
        else:
            logging.error('can not find 480p in url %s' % url)
        if len(download_file_list) == 0:
            logging.error('no download_url in url %s' % url)
        begin_media_name = page_source.find('<title>') + len('<title>')
        end_media_name = page_source.find('</title>', begin_media_name)
        media_name = page_source[begin_media_name:end_media_name].replace(' ', '')
        # '  <meta property="video:series" content="http://www.viki.com/tv/35884c-all-out-of-love" />'
        begin_video_url = page_source.find('<meta property="video:series" content="') + len(
            '<meta property="video:series" content="')
        end_video_url = page_source.find('"', begin_video_url)
        video_url = page_source[begin_video_url:end_video_url]

        try:
            episode = ''
            episode_begin = media_name.upper().find('EPISODE') + len('EPISODE')
            media_name_with_episode = media_name[episode_begin:]
            episode = re.search('[0-9]+', media_name_with_episode).group(0)
        except:
            traceback.print_exc()
            logging.error('episode error')
        response_obj = StreamInfo(video_url=video_url, media_name=media_name, episode=episode, original_url=url,
                                  download_file_list=download_file_list,
                                  site=consts.constant_manager.VIKI, type=consts.constant_manager.DOWNLOAD)
        return response_obj.from_obj_to_string()

        @staticmethod
        def get_subtitle_content(tmp_subtitle_url):
            # todo:即时url访问处理
            req = urllib2.Request(tmp_subtitle_url)
            req.add_header('User-Agent', random.choice(consts.constant_manager.USER_AGENTS))
            page_source = urllib2.urlopen(req).read()
            return str(page_source)
Exemplo n.º 9
0
def get_paly_urls():
    urls_support = [
        # 韩剧
        # 'https://www.viki.com/tv/31037c-woman-with-a-suitcase',
        # 'https://www.viki.com/tv/3548c-dream-high-2',
        # 'https://www.viki.com/tv/8037c-goodbye-dear-wife',
        # 'https://www.viki.com/tv/29550c-secret',
        # 'https://www.viki.com/tv/29465c-my-love-patzzi',
        # 'https://www.viki.com/tv/29161c-sweet-savage-family',
        # 'https://www.viki.com/tv/29486c-snowman',
        # 'https://www.viki.com/tv/29483c-my-lifes-golden-age',
        # 'https://www.viki.com/tv/31100c-romance-blue',
        # 'https://www.viki.com/tv/35623c-borg-mom',
        # 'https://www.viki.com/tv/29473c-who-are-you',
        # 'https://www.viki.com/tv/29546c-air-city',
        # 'https://www.viki.com/tv/25807c-be-arrogant',
        # 'https://www.viki.com/tv/29463c-90-days-time-to-love',
        # 'https://www.viki.com/tv/29545c-general-hospital-2',
        # 'https://www.viki.com/tv/28243c-28-faces-of-the-moon',
        # 'https://www.viki.com/tv/29535c-super-rookie',
        # 'https://www.viki.com/tv/26913c-the-man-in-the-mask',
        # 'https://www.viki.com/tv/29544c-spotlight',
        # 'https://www.viki.com/tv/3339c-miss-ripley',
        # 'https://www.viki.com/tv/24873c-boarding-house-24',
        # 'https://www.viki.com/tv/29471c-dr-gang',
        # 'https://www.viki.com/tv/29478c-behind-the-white-tower',
        # 'https://www.viki.com/tv/35807c-swan',
        # 'https://www.viki.com/tv/27211c-jumping-girl',
        # 'https://www.viki.com/tv/28972c-cheers-to-me',
        # 'https://www.viki.com/tv/29482c-merry-mary',
        # 'https://www.viki.com/tv/29492c-before-and-after-plastic-surgery-clinic',
        # 'https://www.viki.com/tv/29666c-puck',
        # # 大陆
        # 'https://www.viki.com/tv/29015c-my-sunshine-directors-cut',
        # 'https://www.viki.com/tv/21925c-singing-all-along',
        # 'https://www.viki.com/tv/28160c-the-interpreter',
        # 'https://www.viki.com/tv/35697c-an-oriental-odyssey',
        # 'https://www.viki.com/tv/29908c-fifteen-years-of-waiting-for-migratory-birds',
        # 'https://www.viki.com/tv/36049c-sweet-dreams',
        # 'https://www.viki.com/tv/31805c-because-of-meeting-you',
        # 'https://www.viki.com/tv/31618c-princess-agents',
        # 'https://www.viki.com/tv/35699c-sweet-combat',
        # 'https://www.viki.com/tv/36178c-ever-night',
        # 'https://www.viki.com/tv/35601c-dear-prince',
        # 'https://www.viki.com/tv/35857c-secret-of-the-three-kingdoms',
        # 'https://www.viki.com/tv/32658c-the-foxs-summer',
        # 'https://www.viki.com/tv/35576c-the-foxs-summer-season-2',
        # 'https://www.viki.com/tv/29266c-love-me-if-you-dare',
        # 'https://www.viki.com/tv/33387c-pretty-li-hui-zhen',
        # 'https://www.viki.com/tv/33973c-my-mr-mermaid',
        # 'https://www.viki.com/tv/31583c-my-amazing-boyfriend',
        # 'https://www.viki.com/tv/35710c-i-cannot-hug-you',
        # 'https://www.viki.com/tv/35684c-face-off',
        # 'https://www.viki.com/tv/23841c-the-imperial-doctress',
        # 'https://www.viki.com/tv/34371c-the-kings-woman',
        # 'https://www.viki.com/tv/30283c-song-of-phoenix',
        # 'https://www.viki.com/tv/36044c-the-love-knot-his-excellencys-first-love',
        # 'https://www.viki.com/tv/21864c-chinese-paladin-5-clouds-of-the-world',
        # 'https://www.viki.com/tv/22943c-nirvana-in-fire',
        # 'https://www.viki.com/tv/35605c-the-flames-daughter',
        # 'https://www.viki.com/tv/25705c-legend-of-lu-zhen',
        # 'https://www.viki.com/tv/35664c-fighter-of-the-destiny',
        # 'https://www.viki.com/tv/29384c-whirlwind-girl',
        # 'https://www.viki.com/tv/30332c-the-legend-of-chusen',
        # 'https://www.viki.com/tv/35843c-siege-in-fog',
        # 'https://www.viki.com/tv/20346c-perfect-couple',
        # 'https://www.viki.com/tv/22353c-daughter-back',
        # 'https://www.viki.com/tv/30705c-addicted',
        # 'https://www.viki.com/tv/35607c-delicious-destiny',
        # 'https://www.viki.com/tv/28842c-ice-fantasy',
        # 'https://www.viki.com/tv/23849c-thinking-of-you-lu-xiang-bei',
        # 'https://www.viki.com/tv/21228c-legend-of-the-ancient-sword',
        # 'https://www.viki.com/tv/33665c-across-the-ocean-to-see-you',
        # 'https://www.viki.com/tv/32827c-nirvana-in-fire-2',
        # 'https://www.viki.com/tv/30299c-hot-girl',
        # 'https://www.viki.com/tv/34161c-love-just-come',
        # 'https://www.viki.com/tv/2978c-scarlet-heart',
        # 'https://www.viki.com/tv/31190c-when-a-snail-falls-in-love',
        # 'https://www.viki.com/tv/12472c-the-four',
        # 'https://www.viki.com/tv/35704c-only-side-by-side-with-you',
        # 'https://www.viki.com/tv/28818c-my-best-ex-boyfriend',
        # 'https://www.viki.com/tv/12747c-scarlet-heart-2',
        # 'https://www.viki.com/tv/34436c-the-lovers-lies',
        # 日剧
        # 'https://www.viki.com/tv/35651c-youre-my-pet-kimi-wa-petto',
        # 'https://www.viki.com/tv/31884c-sunshine',
        # 'https://www.viki.com/tv/29377c-my-little-lover-minami-kun-no-koibito',
        # 'https://www.viki.com/tv/35654c-hakuouki-ssl-sweet-school-life',
        # 'https://www.viki.com/tv/31882c-rainbow-rose',
        # 'https://www.viki.com/tv/31813c-vampire-heaven',
        # 'https://www.viki.com/tv/29394c-lady-girls',
        # 'https://www.viki.com/tv/35653c-i-am-reiko-shiratori-shiratori-reiko-de-gozaimasu',
        # 'https://www.viki.com/tv/34349c-delicious-niigata-in-japan',
        # 'https://www.viki.com/tv/36303c-iniesta-tv',
        # 'https://www.viki.com/tv/23069c-love-stories-from-fukuoka',
        # 'https://www.viki.com/tv/28765c-visiting-sacred-places-of-the-tohoku-region',
        # 'https://www.viki.com/tv/31798c-blue-fire',
        # 'https://www.viki.com/tv/7468c-leiji-matsumotos-ozma',
        # 'https://www.viki.com/tv/34350c-railway-story',
        # 'https://www.viki.com/tv/36253c-iniesta-tv-discover-japan',
        # 'https://www.viki.com/tv/36302c-iniesta-tv-interviews',
        # 'https://www.viki.com/tv/29122c-tabiaruki-from-iwate',
        # 'https://www.viki.com/tv/33804c-a-heartfelt-trip-to-fukushima',
        # 'https://www.viki.com/tv/33807c-lets-explore-fukushima',
        # 'https://www.viki.com/tv/33806c-murakami-grand-festival-2016-tradition-passed-down',
        # 'https://www.viki.com/tv/33805c-festival-pride-for-hometown',
        # 'https://www.viki.com/tv/30268c-sendai-iroha-zoukangou',
        # 'https://www.viki.com/tv/34348c-the-sanjo-great-kite-battle',
        # 'https://www.viki.com/tv/36085c-vissel-kobe-welcome-event-bienvenido-andrs-iniesta',
        # 'https://www.viki.com/tv/34345c-tales-of-tohoku',
        # 台剧
        'https://www.viki.com/tv/29014c-crime-scene-investigation-center',
        'https://www.viki.com/tv/36219c-campus-heroes',
        'https://www.viki.com/tv/36106c-love-and',
        'https://www.viki.com/tv/35897c-my-little-boys',
        'https://www.viki.com/tv/35859c-between',
        'https://www.viki.com/tv/35830c-single-ladies-senior',
        'https://www.viki.com/tv/35712c-iron-ladies',
        'https://www.viki.com/tv/35708c-see-you-in-time',
        'https://www.viki.com/tv/35627c-lulus-diary',
        'https://www.viki.com/tv/35561c-home-sweet-home',
        'https://www.viki.com/tv/35560c-memory-love',
        'https://www.viki.com/tv/35552c-when-a-woman-chases-a-man',
        'https://www.viki.com/tv/35551c-attention-love',
        'https://www.viki.com/tv/35549c-lion-pride',
        'https://www.viki.com/tv/35492c-the-man-from-the-future',
        'https://www.viki.com/tv/34532c-the-masked-lover',
        'https://www.viki.com/tv/34531c-the-perfect-match',
        'https://www.viki.com/tv/34530c-just-for-you',
        'https://www.viki.com/tv/33719c-all-in-700',
        'https://www.viki.com/tv/33590c-behind-your-smile',
        'https://www.viki.com/tv/33551c-the-king-of-romance',
        'https://www.viki.com/tv/33362c-v-focus',
        'https://www.viki.com/tv/32673c-love-by-design',
        'https://www.viki.com/tv/32353c-27th-golden-melody-awards',
        'https://www.viki.com/tv/32310c-swimming-battle',
        'https://www.viki.com/tv/32029c-better-man',
        'https://www.viki.com/tv/31402c-metro-of-love',
        'https://www.viki.com/tv/30921c-love-at-seventeen',
        'https://www.viki.com/tv/30868c-spop-weekly-report',
        'https://www.viki.com/tv/30410c-back-to-1989',
        'https://www.viki.com/tv/30017c-be-with-me',
        'https://www.viki.com/tv/29959c-thirty-something',
        'https://www.viki.com/tv/29705c-love-or-spend',
        'https://www.viki.com/tv/29259c-bromance',
        'https://www.viki.com/tv/23561c-love-myself-or-you',
        'https://www.viki.com/tv/28856c-school-beautys-personal-bodyguard',
        'https://www.viki.com/tv/28009c-when-i-see-you-again',
        'https://www.viki.com/tv/27213c-murphys-law-of-love',
        'https://www.viki.com/tv/20292c-just-you',
        'https://www.viki.com/tv/20331c-love-around',
        'https://www.viki.com/tv/635c-autumns-concerto',
        'https://www.viki.com/tv/228c-fated-to-love-you',
        'https://www.viki.com/tv/26885c-shia-wa-se',
        'https://www.viki.com/tv/23266c-fall-in-love-with-me',
    ]
    play_urls = []
    urls_api = ['https://api.viki.io/v4/containers/%s/episodes.json?sort=number&' \
                'direction=asc&per_page=200&with_paging=true&blocked=true&with_kcp=true&' \
                'app=100000a&page=1' % url[url.find('tv/') + len('tv/'):url.find('-')] for url in urls_support]
    count = 0
    for url in urls_api:
        try:
            count += 1
            logging.debug(count)
            req = urllib2.Request(url=url,
                                  headers={
                                      'User-Agent':
                                      random.choice(
                                          consts.constant_manager.USER_AGENTS)
                                  })
            response = urllib2.urlopen(req)
            page_source = response.read()
            page_source_json = from_string_to_json(page_source)
            for play_url_dict in page_source_json['response']:
                play_urls.append(play_url_dict['url']['fb'])
        except:
            traceback.print_exc()
    return play_urls
Exemplo n.º 10
0
    def parse(self, url):
        subtitle_parse_success = media_parse_success = False
        page_source = ''
        net_work_info_list = []
        tmp_subtitles_list = []
        driver_factory = SeleniumDirverFactory()
        driver = driver_factory.get_driver('chrome')
        # page_source, net_work_info_list = get_driver_test(url)
        # todo:使用进程管理浏览器对象,区别管理各个不同的浏览器
        driver.set_page_load_timeout(60 * 3)
        try:
            driver.get(url)
        except TimeoutException:
            logging.debug('time_out load page in %s' % url)
            traceback.print_exc()
        except:
            logging.debug('unknow error in %s' % url)
            traceback.print_exc()
        finally:
            try:
                page_source = driver.page_source.encode('utf-8')
                net_work_info_list = driver.execute_script(
                    "return window.performance.getEntries();")
            except:
                logging.debug('unknow error to get page_source in %s' % url)
                traceback.print_exc()
            finally:
                driver_factory.quit_driver('chrome')
                # driver_factory.close_driver()
                logging.debug('quit success')
        # todo:支持付费解析
        if page_source.find('var parsedSubtitles =') != -1:
            begin = page_source.find('var parsedSubtitles =') + len(
                'var parsedSubtitles =')
            end = page_source.find('];', begin)
            tmp_subtitles_str = page_source[begin:end].replace(
                '[', '').replace(' ', '').replace('amp;', '').split('},')
            tmp_subtitles_list = [
                dict_str + ('}' if '}' not in dict_str else '')
                for dict_str in tmp_subtitles_str
            ]
        download_file_list = []
        for subtitle in tmp_subtitles_list:
            subtitle_dict = from_string_to_json(subtitle)
            if len(subtitle_dict) == 0:
                continue
            if int(subtitle_dict['percentage']) < 95:
                continue
            download_file_list.append(
                self.build_singel_stream_json(
                    download_url=subtitle_dict['src'],
                    language=subtitle_dict['srclang'],
                    media_type=constant_manager.SUBTITLE,
                    priority=99))
            subtitle_parse_success = True
        # todo:各清晰度配置,兼容其他类型音视频下载,兼容多段音视频的拼接
        for net_work_dict in net_work_info_list:
            if 'name' in net_work_dict and '480p' in net_work_dict[
                    'name'] and 'track' in net_work_dict['name']:
                logging.debug('video and audio parse success')
                media_parse_success = True
                url_video_480p = str(net_work_dict['name']).replace(
                    'track2', 'track1')
                url_audio_480p = str(net_work_dict['name']).replace(
                    'track1', 'track2')
                download_file_list.append(
                    self.build_singel_stream_json(
                        download_url=url_video_480p,
                        media_quality=consts.constant_manager.MEDIA_480P,
                        media_type=consts.constant_manager.VIDEO,
                        merged_sign='_'.join([url, '480p']),
                        merged_order=1))
                download_file_list.append(
                    self.build_singel_stream_json(
                        download_url=url_audio_480p,
                        media_quality=consts.constant_manager.MEDIA_480P,
                        media_type=consts.constant_manager.AUDIO,
                        merged_sign='_'.join([url, '480p']),
                        merged_order=1))
                break
            elif 'name' in net_work_dict and \
                    ('240p' in net_work_dict['name'] or '360p' in net_work_dict['name'] or '480p' in net_work_dict[
                        'name']) \
                    and 'stream_name=' in net_work_dict['name']:
                logging.debug('merged parse success')
                media_parse_success = True
                begin = net_work_dict['name'].find('stream_name=') + len(
                    'stream_name=')
                end = net_work_dict['name'].find('&', begin)
                url_merged = url_decode(net_work_dict['name'][begin:end])
                if not url_merged.startswith('http'):
                    logging.error('media url error %s %s' %
                                  (url_merged, net_work_dict['name']))
                download_file_list.append(
                    self.build_singel_stream_json(
                        download_url=url_merged,
                        media_quality=get_media_quality(url_merged),
                        media_type=consts.constant_manager.MERGED,
                        priority=88))
                break
        else:
            logging.error('can not find 480p in url %s' % url)
        self.log_result(subtitle_parse_success, media_parse_success, url)
        if len(download_file_list) == 0:
            return from_json_to_string({
                'info': 'no stream url',
                'url': url,
                'type': 'error'
            })
        begin_media_name = page_source.find('<title>') + len('<title>')
        end_media_name = page_source.find('</title>', begin_media_name)
        media_name = page_source[begin_media_name:end_media_name].replace(
            ' ', '')
        begin_video_url = page_source.find(
            '<meta property="video:series" content="') + len(
                '<meta property="video:series" content="')
        end_video_url = page_source.find('"', begin_video_url)
        video_url = page_source[begin_video_url:end_video_url]

        try:
            episode = ''
            episode_begin = media_name.upper().find('EPISODE') + len('EPISODE')
            media_name_with_episode = media_name[episode_begin:]
            episode = re.search('[0-9]+', media_name_with_episode).group(0)
        except:
            traceback.print_exc()
            logging.error('episode error')
        stream_obj = StreamInfo(video_url=video_url,
                                media_name=media_name,
                                episode=episode,
                                original_url=url,
                                download_file_list=download_file_list,
                                site=consts.constant_manager.VIKI,
                                type=consts.constant_manager.DOWNLOAD)
        return stream_obj.from_obj_to_string()

        @staticmethod
        def get_subtitle_content(tmp_subtitle_url):
            # todo:即时url访问处理
            req = urllib2.Request(tmp_subtitle_url)
            req.add_header('User-Agent',
                           random.choice(consts.constant_manager.USER_AGENTS))
            page_source = urllib2.urlopen(req).read()
            return str(page_source)