Пример #1
0
def tudou_download(url, **kwargs):
    if 'video.tudou.com' in url:
        vid = match1(url, r'.*?video.tudou.com/v/([\w=]+)')
    else:
        page = get_content(url)
        video_info = json.loads(
            match1(page, r'window.__INITIAL_STATE__=\s*(.+?);</script>'))
        vid = video_info['videoDesc']['detail']['videoid']
    youku_download_by_vid(vid, **kwargs)
Пример #2
0
    def entry(self, **kwargs):
        # tencent player
        tc_flashvars = re.search(
            r'"bili-cid=\d+&bili-aid=\d+&vid=([^"]+)"', self.page
        )
        if tc_flashvars:
            tc_flashvars = tc_flashvars.group(1)
        if tc_flashvars is not None:
            self.out = True
            qq_download_by_vid(
                tc_flashvars, self.title, output_dir=kwargs['output_dir'],
                merge=kwargs['merge'], info_only=kwargs['info_only']
            )
            return

        has_plist = re.search(r'<option', self.page)
        if has_plist and r1('index_(\d+).html', self.url) is None:
            log.w(
                'This page contains a playlist. (use --playlist to download '
                'all videos.)'
            )

        try:
            cid = re.search(r'cid=(\d+)', self.page).group(1)
        except Exception:
            cid = re.search(r'"cid":(\d+)', self.page).group(1)
        if cid is not None:
            self.download_by_vid(
                cid, re.search('bangumi', self.url) is not None, **kwargs
            )
        else:
            # flashvars?
            flashvars = re.search(r'flashvars="([^"]+)"', self.page).group(1)
            if flashvars is None:
                raise Exception('Unsupported page {}'.format(self.url))
            param = flashvars.split('&')[0]
            t, cid = param.split('=')
            t = t.strip()
            cid = cid.strip()
            if t == 'vid':
                sina_download_by_vid(
                    cid, self.title, output_dir=kwargs['output_dir'],
                    merge=kwargs['merge'], info_only=kwargs['info_only']
                )
            elif t == 'ykid':
                youku_download_by_vid(
                    cid, self.title, output_dir=kwargs['output_dir'],
                    merge=kwargs['merge'], info_only=kwargs['info_only']
                )
            else:
                raise NotImplementedError(
                    'Unknown flashvars {}'.format(flashvars)
                )
            return
Пример #3
0
def acfun_download_by_vid(vid,
                          title,
                          output_dir='.',
                          merge=True,
                          info_only=False,
                          **kwargs):
    """str, str, str, bool, bool -> None

    Download Acfun video by vid.

    Call Acfun API, decide which site to use, and pass the job to its
    extractor.
    """
    # first call the main parasing API
    info = json.loads(
        get_content(
            'http://www.acfun.cn/video/getVideo.aspx?id={}'.format(vid)))

    sourceType = info['sourceType']

    # decide sourceId to know which extractor to use
    if 'sourceId' in info:
        sourceId = info['sourceId']
    # danmakuId = info['danmakuId']

    # call extractor decided by sourceId
    if sourceType == 'sina':
        sina_download_by_vid(sourceId,
                             title,
                             output_dir=output_dir,
                             merge=merge,
                             info_only=info_only)
    elif sourceType == 'youku':
        youku_download_by_vid(sourceId,
                              title=title,
                              output_dir=output_dir,
                              merge=merge,
                              info_only=info_only,
                              **kwargs)
    elif sourceType == 'qq':
        qq_download_by_vid(sourceId,
                           title,
                           output_dir=output_dir,
                           merge=merge,
                           info_only=info_only)
    elif sourceType == 'letv':
        letvcloud_download_by_vu(sourceId,
                                 '2d8c027396',
                                 title,
                                 output_dir=output_dir,
                                 merge=merge,
                                 info_only=info_only)
    elif sourceType == 'zhuzhan':
        # As in Jul.28.2016, Acfun is using embsig to anti hotlink so we need
        # to pass this In Mar. 2017 there is a dedicated ``acfun_proxy'' in
        # youku cloud player old code removed
        url = 'http://www.acfun.cn/v/ac{}'.format(vid)
        yk_streams = youku_acfun_proxy(info['sourceId'], info['encode'], url)
        seq = ['mp4hd3', 'mp4hd2', 'mp4hd', 'flvhd']
        for t in seq:
            if yk_streams.get(t):
                preferred = yk_streams[t]
                break
        # total_size in the json could be incorrect(F.I. 0)
        size = 0
        for url in preferred[0]:
            _, _, seg_size = url_info(url)
            size += seg_size
        # fallback to flvhd is not quite possible
        print_info(site_info, title, 'mp4', size)
        if not info_only:
            download_urls(preferred[0],
                          title,
                          'mp4',
                          size,
                          output_dir=output_dir,
                          merge=merge)
    else:
        raise NotImplementedError(sourceType)

    if not info_only and not dry_run:
        if not kwargs['caption']:
            print('Skipping danmaku.')
            return
        try:
            title = get_filename(title)
            print('Downloading %s ...\n' % (title + '.cmt.json'))
            cmt = get_srt_json(vid)
            with open(os.path.join(output_dir, title + '.cmt.json'),
                      'w',
                      encoding='utf-8') as x:
                x.write(cmt)
        except Exception:
            pass
Пример #4
0
def embed_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
    content = get_content(url)
    found = False
    title = match1(content, '<title>([^<>]+)</title>')

    vids = matchall(content, youku_embed_patterns)
    for vid in set(vids):
        found = True
        youku_download_by_vid(
            vid, title=title, output_dir=output_dir, merge=merge,
            info_only=info_only
        )

    vids = matchall(content, yinyuetai_embed_patterns)
    for vid in vids:
        found = True
        yinyuetai_download_by_id(
            vid, title=title, output_dir=output_dir, merge=merge,
            info_only=info_only
        )

    vids = matchall(content, iqiyi_embed_patterns)
    for vid in vids:
        found = True
        iqiyi_download_by_vid(
            (vid[1], vid[0]), title=title, output_dir=output_dir, merge=merge,
            info_only=info_only
        )

    urls = matchall(content, netease_embed_patterns)
    for url in urls:
        found = True
        netease_download(
            url, output_dir=output_dir, merge=merge, info_only=info_only
        )

    urls = matchall(content, vimeo_embed_patters)
    for url in urls:
        found = True
        vimeo_download_by_id(
            url, title=title, output_dir=output_dir, merge=merge,
            info_only=info_only, referer=url
        )

    urls = matchall(content, dailymotion_embed_patterns)
    for url in urls:
        found = True
        dailymotion_download(
            url, output_dir=output_dir, merge=merge, info_only=info_only
        )

    aids = matchall(content, bilibili_embed_patterns)
    for aid in aids:
        found = True
        url = 'http://www.bilibili.com/video/av{}/'.format(aid)
        bilibili_download(
            url, output_dir=output_dir, merge=merge, info_only=info_only
        )

    iqiyi_urls = matchall(content, iqiyi_patterns)
    for url in iqiyi_urls:
        found = True
        iqiyi.download(
            url, output_dir=output_dir, merge=merge, info_only=info_only,
            **kwargs
        )

    bokecc_metas = matchall(content, bokecc_patterns)
    for meta in bokecc_metas:
        found = True
        bokecc.bokecc_download_by_id(
            meta[1], output_dir=output_dir, merge=merge, info_only=info_only,
            **kwargs
        )

    if found:
        return True

    # Try harder, check all iframes
    if 'recur_lv' not in kwargs or kwargs['recur_lv'] < recur_limit:
        r = kwargs.get('recur_lv')
        if r is None:
            r = 1
        else:
            r += 1
        iframes = matchall(content, [r'<iframe.+?src=(?:\"|\')(.+?)(?:\"|\')'])
        for iframe in iframes:
            if not iframe.startswith('http'):
                src = urllib.parse.urljoin(url, iframe)
            else:
                src = iframe
            found = embed_download(
                src, output_dir=output_dir, merge=merge, info_only=info_only,
                recur_lv=r, **kwargs
            )
            if found:
                return True

    if not found and 'recur_lv' not in kwargs:
        raise NotImplementedError(url)
    else:
        return found