Exemplo n.º 1
0
def ckplayer_download_by_xml(
    ckinfo, output_dir='.', merge=False, info_only=False, **kwargs
):
    # Info XML
    video_info = ckplayer_get_info_by_xml(ckinfo)

    try:
        title = kwargs['title']
    except Exception:
        title = ''
    type_ = ''
    size = 0

    if len(video_info['links']) > 0:  # has link
        # use 1st to determine type, ext
        type_, _ext, size = url_info(video_info['links'][0])

    if 'size' in video_info:
        size = int(video_info['size'])
    else:
        for i in video_info['links'][1:]:  # save 1st one
            size += url_info(i)[2]

    print_info(site_info, title, type_, size)
    if not info_only:
        download_urls(
            video_info['links'], title, _ext, size, output_dir=output_dir,
            merge=merge
        )
Exemplo n.º 2
0
def suntv_download(url, info_only=False, **kwargs):
    if re.match(r'http://www.isuntv.com/\w+', url):
        API_URL = ('http://www.isuntv.com/ajaxpro/SunTv.pro_vod_playcatemp4,'
                   'App_Web_playcatemp4.ascx.9f08f04f.ashx')
        itemid = match1(url, r'http://www.isuntv.com/pro/ct(\d+).html')
        values = {'itemid': itemid, 'vodid': ''}

        data = str(values).replace("'", '"')
        data = data.encode('utf-8')
        req = urllib.request.Request(API_URL, data)
        req.add_header('AjaxPro-Method', 'ToPlay')  # important!
        resp = urllib.request.urlopen(req)
        respData = resp.read()
        respData = respData.decode('ascii').strip('"')  # Ahhhhhhh!

        video_url = 'http://www.isuntv.com' + str(respData)

        html = get_content(url, decoded=False)
        html = html.decode('gbk')
        title = match1(html, '<title>([^<]+)').strip()  # get rid of \r\n s

        size = 0
        _type, ext, size = url_info(video_url)

        print_info(site_info, title, _type, size)
        if not info_only:
            download_urls([url], title, 'mp4', size, **kwargs)
Exemplo n.º 3
0
def fc2video_download_by_upid(upid,
                              output_dir='.',
                              merge=True,
                              info_only=False,
                              **kwargs):
    fake_headers = FAKE_HEADERS.copy()
    fake_headers.update({
        'DNT': '1',
        'Accept-Encoding': 'gzip, deflate, sdch',
        'Accept-Language': 'en-CA,en;q=0.8,en-US;q=0.6,zh-CN;q=0.4,zh;q=0.2',
        'X-Requested-With': 'ShockwaveFlash/19.0.0.245',
        'Connection': 'keep-alive',
    })
    api_base = ('https://video.fc2.com/ginfo.php?upid={upid}&mimi='
                '{mimi}'.format(upid=upid, mimi=makeMimi(upid)))
    html = get_content(api_base, headers=fake_headers)
    video_url = match1(html, r'filepath=(.+)&sec')
    video_url = video_url.replace('&mid', '?mid')

    title = match1(html, r'&title=([^&]+)')

    _type, ext, size = url_info(video_url, headers=fake_headers)
    print_info(site_info, title, _type, size)
    if not info_only:
        download_urls([video_url],
                      title,
                      ext,
                      size,
                      output_dir,
                      merge=merge,
                      headers=fake_headers,
                      **kwargs)
Exemplo n.º 4
0
def nicovideo_download(url, info_only=False, **kwargs):
    import ssl
    ssl_context = request.HTTPSHandler(
        context=ssl.SSLContext(ssl.PROTOCOL_TLSv1))
    cookie_handler = request.HTTPCookieProcessor()
    opener = request.build_opener(ssl_context, cookie_handler)
    request.install_opener(opener)

    import netrc
    import getpass
    try:
        info = netrc.netrc().authenticators('nicovideo')
    except Exception:
        info = None
    if info is None:
        user = input('User:     '******'Password: '******'Logging in...')
    nicovideo_login(user, password)

    html = get_content(url)  # necessary!
    title = match1(html, r'<title>(.+?)</title>')

    vid = url.split('/')[-1].split('?')[0]
    api_html = get_content(
        'http://flapi.nicovideo.jp/api/getflv?v={}'.format(vid))
    real_url = parse.unquote(match1(api_html, r'url=([^&]+)&'))

    _type, ext, size = url_info(real_url)
    print_info(site_info, title, _type, size)
    if not info_only:
        download_urls([real_url], title, ext, size, **kwargs)
Exemplo n.º 5
0
def dailymotion_download(url,
                         output_dir='.',
                         merge=True,
                         info_only=False,
                         **kwargs):
    """Downloads Dailymotion videos by URL.
    """

    html = get_content(rebuilt_url(url))
    info = json.loads(match1(html, r'qualities":({.+?}),"'))
    title = match1(html, r'"video_title"\s*:\s*"([^"]+)"') or \
        match1(html, r'"title"\s*:\s*"([^"]+)"')
    title = unicodize(title)

    for quality in ['1080', '720', '480', '380', '240', '144', 'auto']:
        try:
            real_url = info[quality][1]["url"]
            if real_url:
                break
        except KeyError:
            pass

    mime, ext, size = url_info(real_url)

    print_info(site_info, title, mime, size)
    if not info_only:
        download_urls([real_url],
                      title,
                      ext,
                      size,
                      output_dir=output_dir,
                      merge=merge,
                      **kwargs)
Exemplo n.º 6
0
def _download(item, **kwargs):
    url = item['fullLinkUrl']
    title = item['title'].strip()
    _, ext, size = url_info(url)
    print_info(site_info=site_info, title=title, type=ext, size=size)
    if not kwargs.get('info_only'):
        download_urls([url], title, ext, size, **kwargs)
Exemplo n.º 7
0
def facebook_download(
    url, output_dir='.', merge=True, info_only=False, **kwargs
):
    html = get_content(url)

    title = match1(html, r'<title id="pageTitle">(.+)</title>')

    if title is None:
        title = url

    sd_urls = list(set([
        unicodize(str.replace(i, '\\/', '/'))
        for i in re.findall(r'sd_src_no_ratelimit:"([^"]*)"', html)
    ]))
    hd_urls = list(set([
        unicodize(str.replace(i, '\\/', '/'))
        for i in re.findall(r'hd_src_no_ratelimit:"([^"]*)"', html)
    ]))
    urls = hd_urls if hd_urls else sd_urls

    _type, ext, size = url_info(urls[0], True)
    size = urls_size(urls)

    print_info(site_info, title, _type, size)
    if not info_only:
        download_urls(urls, title, ext, size, output_dir, merge=False)
Exemplo n.º 8
0
def baidu_download_album(aid, output_dir='.', merge=True, info_only=False):
    html = get_content('http://music.baidu.com/album/{}'.format(aid))
    parser = get_parser(html)
    album_name = parser.find('h2', class_='album-name').text
    artist = parser.find('span', class_='author_list')['title']
    output_dir = '{}/{} - {}'.format(output_dir, artist, album_name)
    ids = json.loads(
        match1(html,
               r'<span class="album-add" data-adddata=\'(.+?)\'>').replace(
                   '&quot', '').replace(';', '"'))['ids']
    track_nr = 1
    for _id in ids:
        song_data = baidu_get_song_data(_id)
        song_url = song_data['songLink']
        song_title = song_data['songName']
        song_lrc = song_data['lrcLink']
        file_name = '{:0>2d}.{}'.format(track_nr, song_title)

        _type, ext, size = url_info(song_url)
        print_info(site_info, song_title, _type, size)
        if not info_only:
            download_urls([song_url],
                          file_name,
                          ext,
                          size,
                          output_dir,
                          merge=merge)
        if song_lrc:
            _type, ext, size = url_info(song_lrc)
            print_info(site_info, song_title, _type, size)
            if not info_only:
                download_urls([song_lrc], file_name, ext, size, output_dir)

        track_nr += 1
Exemplo n.º 9
0
def vidto_download(url, info_only=False, **kwargs):
    html = get_content(url)
    params = {}
    r = re.findall(
        r'type="(?:hidden|submit)?"(?:.*?)name="(.+?)"\s* value="?(.+?)">',
        html
    )
    for name, value in r:
        params[name] = value
    data = parse.urlencode(params).encode('utf-8')
    req = request.Request(url, headers=FAKE_HEADERS)
    print('Please wait for 6 seconds...')
    time.sleep(6)
    print('Starting')
    new_html = request.urlopen(req, data).read().decode('utf-8', 'replace')
    new_stff = re.search(r'lnk_download" href="(.*?)">', new_html)
    if new_stff:
        url = new_stff.group(1)
        title = params['fname']
        _type = ''
        ext = ''
        a, b, size = url_info(url)
        print_info(site_info, title, _type, size)
        if not info_only:
            download_urls([url], title, ext, size, **kwargs)
    else:
        log.wtf("Cann't find link, please review")
Exemplo n.º 10
0
def videomega_download(url, info_only=False, **kwargs):
    # Hot-plug cookie handler
    ssl_context = request.HTTPSHandler(
        context=ssl.SSLContext(ssl.PROTOCOL_TLSv1))
    cookie_handler = request.HTTPCookieProcessor()
    opener = request.build_opener(ssl_context, cookie_handler)
    opener.addheaders = [('Referer', url), ('Cookie', 'noadvtday=0')]
    request.install_opener(opener)

    if re.search(r'view\.php', url):
        php_url = url
    else:
        content = get_content(url)
        m = re.search(r'ref="([^"]*)";\s*width="([^"]*)";\s*height="([^"]*)"',
                      content)
        ref = m.group(1)
        width, height = m.group(2), m.group(3)
        php_url = (
            'http://videomega.tv/view.php?ref={}&width={}&height={}'.format(
                ref, width, height))
    content = get_content(php_url)

    title = match1(content, r'<title>(.*)</title>')
    js = match1(content, r'(eval.*)')
    t = match1(js, r'\$\("\w+"\)\.\w+\("\w+","([^"]+)"\)')
    t = re.sub(r'(\w)', r'{\1}', t)
    t = t.translate({87 + i: str(i) for i in range(10, 36)})
    s = match1(js, r"'([^']+)'\.split").split('|')
    src = t.format(*s)

    _type, ext, size = url_info(src)

    print_info(site_info, title, _type, size)
    if not info_only:
        download_urls([src], title, ext, size, **kwargs)
Exemplo n.º 11
0
def fantasy_download_by_id_channelId(
    id=0, channelId=0, output_dir='.', merge=True, info_only=False, **kwargs
):
    api_url = (
        'http://www.fantasy.tv/tv/playDetails.action?'
        'myChannelId=1&id={id}&channelId={channelId}&t={t}'.format(
            id=id,
            channelId=channelId,
            t=str(random.random())
        )
    )
    html = get_content(api_url)
    html = json.loads(html)

    if int(html['status']) != 100000:
        raise Exception('API error!')

    title = html['data']['tv']['title']

    video_url = html['data']['tv']['videoPath']
    headers = FAKE_HEADERS.copy()
    headers['Referer'] = api_url
    type, ext, size = url_info(video_url, headers=headers)

    print_info(site_info, title, type, size)
    if not info_only:
        download_urls(
            [video_url], title, ext, size, output_dir, merge=merge,
            headers=headers
        )
Exemplo n.º 12
0
def zhanqi_live(room_id,
                merge=True,
                output_dir='.',
                info_only=False,
                **kwargs):
    api_url = ('https://www.zhanqi.tv/api/static/v2.2/room/domain/{}.json'.
               format(room_id))
    json_data = json.loads(get_content(api_url))['data']
    status = json_data['status']
    if status != '4':
        raise Exception('The live stream is not online!')

    nickname = json_data['nickname']
    title = '{}:{}'.format(nickname, json_data['title'])
    video_levels = base64.b64decode(
        json_data['flashvars']['VideoLevels']).decode('utf8')
    m3u8_url = json.loads(video_levels)['streamUrl']

    print_info(site_info,
               title,
               'm3u8',
               0,
               m3u8_url=m3u8_url,
               m3u8_type='master')
    if not info_only:
        download_url_ffmpeg(m3u8_url,
                            title,
                            'mp4',
                            output_dir=output_dir,
                            merge=merge)
Exemplo n.º 13
0
def xiami_download_song(sid, output_dir='.', info_only=False):
    xml = get_content(
        'http://www.xiami.com/song/playlist/id/{}/object_name/default/'
        'object_id/0'.format(sid))
    doc = parseString(xml)
    i = doc.getElementsByTagName('track')[0]
    artist = i.getElementsByTagName('artist')[0].firstChild.nodeValue
    album_name = i.getElementsByTagName('album_name')[0].firstChild.nodeValue
    song_title = i.getElementsByTagName('name')[0].firstChild.nodeValue
    url = location_dec(
        i.getElementsByTagName('location')[0].firstChild.nodeValue)
    try:
        lrc_url = i.getElementsByTagName('lyric')[0].firstChild.nodeValue
    except Exception:
        pass
    type_, ext, size = url_info(url)
    if not ext:
        ext = 'mp3'

    print_info(site_info, song_title, ext, size)
    if not info_only:
        file_name = '{} - {} - {}'.format(song_title, artist, album_name)
        download_urls([url], file_name, ext, size, output_dir)
        try:
            xiami_download_lyric(lrc_url, file_name, output_dir)
        except Exception:
            pass
Exemplo n.º 14
0
def xiami_download_mv(url, output_dir='.', merge=True, info_only=False):
    # FIXME: broken merge
    page = get_content(url)
    title = re.findall('<title>([^<]+)', page)[0]
    vid, uid = re.findall(r'vid:"(\d+)",uid:"(\d+)"', page)[0]
    api_url = (
        'http://cloud.video.taobao.com/videoapi/info.php?vid={}&uid={}'.format(
            vid, uid))
    result = get_content(api_url)
    doc = parseString(result)
    video_url = doc.getElementsByTagName('video_url')[-1].firstChild.nodeValue
    length = int(doc.getElementsByTagName('length')[-1].firstChild.nodeValue)

    v_urls = []
    k_start = 0
    total_size = 0
    while True:
        k_end = k_start + 20000000
        if k_end >= length:
            k_end = length - 1
        v_url = video_url + '/start_{}/end_{}/1.flv'.format(k_start, k_end)
        try:
            _, ext, size = url_info(v_url)
        except Exception:
            break
        v_urls.append(v_url)
        total_size += size
        k_start = k_end + 1

    print_info(site_info, title, ext, total_size)
    if not info_only:
        download_urls(v_urls, title, ext, total_size, output_dir, merge=merge)
Exemplo n.º 15
0
def vine_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
    html = get_content(url)
    video_id = match1(url, r'vine.co/v/([^/]+)')
    title = match1(html, r'<title>([^<]*)</title>')
    stream = match1(
        html,
        r'<meta property="twitter:player:stream" content="([^"]*)">'
    )
    if not stream:  # https://vine.co/v/.../card
        stream = match1(html, r'"videoUrl":"([^"]+)"')
        if stream:
            stream = stream.replace('\\/', '/')
        else:
            posts_url = 'https://archive.vine.co/posts/{}.json'.format(
                video_id
            )
            json_data = json.loads(get_content(posts_url))
            stream = json_data['videoDashUrl']
            title = json_data['description']
            if title == '':
                title = '{}_{}'.format(
                    json_data['username'].replace(' ', '_'), video_id
                )

    mime, ext, size = url_info(stream)

    print_info(site_info, title, mime, size)
    if not info_only:
        download_urls([stream], title, ext, size, output_dir, merge=merge)
Exemplo n.º 16
0
    def __call__(self, url, **kwargs):
        '''
        data = {
            'urls': [],
            'title': '',
            'file_format': '',
            'size': '',
        }
        '''
        data = self.extract(url, **kwargs)

        if not self.need_download:
            return

        file_format = data.get('file_format', 'mp4')
        size = data.get('size')
        urls = data['urls']
        if not size:
            if len(urls) == 1:
                size = url_size(urls[0])
            else:
                size = urls_size(urls)
        print_info(site_info=self.site_info,
                   title=data['title'],
                   type=file_format,
                   size=size)
        if not kwargs['info_only']:
            download_urls(urls=urls,
                          title=data['title'],
                          ext=file_format,
                          total_size=size,
                          **kwargs)
Exemplo n.º 17
0
def sina_zxt(url, info_only=False, **kwargs):
    ep = 'http://s.video.sina.com.cn/video/play?video_id='
    frag = urllib.parse.urlparse(url).fragment
    if not frag:
        log.wtf('No video specified with fragment')
    meta = json.loads(get_content(ep + frag))
    if meta['code'] != 1:
        # Yes they use 1 for success.
        log.wtf(meta['message'])
    title = meta['data']['title']
    videos = sorted(meta['data']['videos'], key=lambda i: int(i['size']))

    if len(videos) == 0:
        log.wtf('No video file returned by API server')

    vid = videos[-1]['file_id']
    container = videos[-1]['type']
    size = int(videos[-1]['size'])

    if container == 'hlv':
        container = 'flv'

    urls, _, _ = video_info(api_req(vid))
    print_info(site_info, title, container, size)
    if not info_only:
        download_urls(urls, title, container, size, **kwargs)
    return
Exemplo n.º 18
0
def panda_download(url, info_only=False, **kwargs):
    roomid = re.search('/(\d+)', url)
    if roomid is None:
        log.wtf('Cannot found room id for this url')
    roomid = roomid.group(1)
    json_request_url = (
        'http://www.panda.tv/api_room_v2?roomid={}&__plat=pc_web&_={}'.format(
            roomid, int(time.time())))
    content = get_content(json_request_url)
    api_json = json.loads(content)

    errno = api_json['errno']
    errmsg = api_json['errmsg']
    if errno:
        raise ValueError('Errno : {}, Errmsg : {}'.format(errno, errmsg))
    data = api_json['data']
    title = data['roominfo']['name']
    room_key = data['videoinfo']['room_key']
    plflag = data['videoinfo']['plflag'].split('_')
    status = data['videoinfo']['status']
    if status is not '2':
        raise ValueError(
            'The live stream is not online! (status:{})'.format(status))

    data2 = json.loads(data['videoinfo']['plflag_list'])
    rid = data2['auth']['rid']
    sign = data2['auth']['sign']
    ts = data2['auth']['time']
    real_url = ('http://pl{}.live.panda.tv/live_panda/{}.flv?sign={}&ts={}&'
                'rid={}'.format(plflag[1], room_key, sign, ts, rid))
    print_info(site_info, title, 'flv', float('inf'))
    if not info_only:
        download_urls([real_url], title, 'flv', None, **kwargs)
Exemplo n.º 19
0
def douyutv_video_download(url,
                           output_dir='.',
                           merge=True,
                           info_only=False,
                           **kwargs):
    ep = 'http://vmobile.douyu.com/video/getInfo?vid='
    patt = r'show/([0-9A-Za-z]+)'
    title_patt = r'<h1>(.+?)</h1>'

    hit = re.search(patt, url)
    if hit is None:
        log.wtf('Unknown url pattern')
    vid = hit.group(1)

    page = get_content(url)
    hit = re.search(title_patt, page)
    if hit is None:
        title = vid
    else:
        title = hit.group(1)

    meta = json.loads(get_content(ep + vid))
    if meta['error'] != 0:
        log.wtf('Error from API server')
    m3u8_url = meta['data']['video_url']
    print_info('Douyu Video', title, 'm3u8', 0, m3u8_url=m3u8_url)
    if not info_only:
        urls = general_m3u8_extractor(m3u8_url)
        download_urls(urls,
                      title,
                      'ts',
                      0,
                      output_dir=output_dir,
                      merge=merge,
                      **kwargs)
Exemplo n.º 20
0
def mtv81_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
    html = get_content(url)
    title = unescape_html('|'.join(
        match1(html, r'<title>(.*?)</title>').split('|')[:-2]))

    # mgid%3Auma%3Avideo%3Amtv81.com%3A897974
    vid = match1(html, r'getTheVideo\("(.*?)"')
    xml = parseString(
        get_content(
            'http://intl.esperanto.mtvi.com/www/xml/media/mediaGen.jhtml?uri={}&'
            'flashPlayer=LNX%2013,0,0,206&geo=CN&sid=123456'.format(vid)))

    url = sorted(map(lambda x: x.firstChild.nodeValue,
                     xml.getElementsByTagName("src")),
                 key=lambda x: int(match1(x, r'_(\d+?)_')))[-1]

    mediatype, ext, size = 'mp4', 'mp4', 0
    print_info(site_info, title, mediatype, size)
    # rtmpdump  -r 'rtmpe://cp30865.edgefcs.net/ondemand/mtviestor/_!/intlod/MTVInternational/MBUS/GeoLocals/00JP/VIAMTVI/PYC/201304/7122HVAQ4/00JPVIAMTVIPYC7122HVAQ4_640x_360_1200_m30.mp4' -o "title.mp4" --swfVfy http://media.mtvnservices.com/player/prime/mediaplayerprime.1.10.8.swf  # noqa

    # because rtmpdump is unstable,may try serveral times
    if not info_only:
        download_rtmp_url(
            url=url,
            title=title,
            ext=ext,
            params={
                '--swfVfy':
                ('http://media.mtvnservices.com/player/prime/mediaplayer'
                 'prime.1.10.8.swf')
            },
            output_dir=output_dir)
Exemplo n.º 21
0
def baomihua_download_by_id(_id,
                            title=None,
                            output_dir='.',
                            merge=True,
                            info_only=False,
                            **kwargs):
    html = get_content(
        'http://play.baomihua.com/getvideourl.aspx?flvid={}&devicetype='
        'phone_app'.format(_id))
    host = match1(html, r'host=([^&]*)')
    assert host
    _type = match1(html, r'videofiletype=([^&]*)')
    assert _type
    vid = match1(html, r'&stream_name=([^&]*)')
    assert vid
    dir_str = match1(html, r'&dir=([^&]*)').strip()
    url = 'http://{}/{}/{}.{}'.format(host, dir_str, vid, _type)
    _, ext, size = url_info(url)
    print_info(site_info, title, _type, size)
    if not info_only:
        download_urls([url],
                      title,
                      ext,
                      size,
                      output_dir,
                      merge=merge,
                      **kwargs)
Exemplo n.º 22
0
def naver_download_by_url(url, info_only=False, **kwargs):
    ep = 'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{}?key={}'
    page = get_content(url)
    og_video_url = re.search(
        r"<meta\s+property=\"og:video:url\"\s+content='(.+?)'>", page
    ).group(1)
    params_dict = urllib.parse.parse_qs(
        urllib.parse.urlparse(og_video_url).query
    )
    vid = params_dict['vid'][0]
    key = params_dict['outKey'][0]
    meta_str = get_content(ep.format(vid, key))
    meta_json = json.loads(meta_str)
    if 'errorCode' in meta_json:
        log.wtf(meta_json['errorCode'])
    title = meta_json['meta']['subject']
    videos = meta_json['videos']['list']
    video_list = sorted(
        videos, key=lambda video: video['encodingOption']['width']
    )
    video_url = video_list[-1]['source']
    size = url_size(video_url)
    print_info(site_info, title, 'mp4', size)
    if not info_only:
        download_urls([video_url], title, 'mp4', size, **kwargs)
Exemplo n.º 23
0
def miaopai_download(url,
                     output_dir='.',
                     merge=False,
                     info_only=False,
                     **kwargs):
    fid = match1(url, r'\?fid=(\d{4}:\w{32})')
    if fid:
        miaopai_download_by_fid(fid, output_dir, merge, info_only)
    elif '/p/230444' in url:
        fid = match1(url, r'/p/230444(\w+)')
        miaopai_download_by_fid('1034:' + fid, output_dir, merge, info_only)
    else:
        status_id = url.split('?')[0].split('/')[-1]
        video_info = json.loads(
            get_content(
                'https://m.weibo.cn/statuses/show?id={}'.format(status_id),
                headers=config.FAKE_HEADERS_MOBILE))
        video_url = video_info['data']['page_info']['media_info']['stream_url']
        title = video_info['data']['page_info']['content2']
        video_format = 'mp4'
        size = url_size(video_url)
        print_info(site_info=site_info,
                   title=title,
                   type=video_format,
                   size=size)
        if not info_only:
            download_urls(urls=[video_url],
                          title=title,
                          ext=video_format,
                          total_size=size,
                          **kwargs)
Exemplo n.º 24
0
def theplatform_download_by_pid(
    pid, title, output_dir='.', merge=True, info_only=False, **kwargs
):
    smil_url = (
        'http://link.theplatform.com/s/dJ5BDC/{}/meta.smil?format=smil'
        '&mbr=true'.format(pid)
    )
    smil = get_content(smil_url)
    smil_base = unescape_html(match1(smil, r'<meta base="([^"]+)"'))
    smil_videos = {
        y: x for x, y in dict(
            re.findall(r'<video src="([^"]+)".+height="([^"]+)"', smil)
        ).items()
    }
    for height in ['1080', '720', '480', '360', '240', '216']:
        if height in smil_videos:
            smil_video = smil_videos[height]
            break
    assert smil_video

    _type, ext, size = 'mp4', 'mp4', 0

    print_info(site_info, title, _type, size)
    if not info_only:
        download_rtmp_url(
            url=smil_base, title=title, ext=ext,
            params={"-y": '{}:{}'.format(ext, smil_video)},
            output_dir=output_dir
        )
Exemplo n.º 25
0
def baidu_download_song(sid, output_dir='.', merge=True, info_only=False):
    data = baidu_get_song_data(sid)
    if data is not None:
        url = data['songLink']
        title = data['songName']
        artist = data['artistName']
        # album = data['albumName']
        lrc = data['lrcLink']
        file_name = '{} - {}'.format(title, artist)
    else:
        html = get_content('http://music.baidu.com/song/{}'.format(sid))
        url = match1(html, r'data_url="([^"]+)"')
        title = match1(html, r'data_name="([^"]+)"')
        file_name = title

    _type, ext, size = url_info(url)
    print_info(site_info, title, _type, size)
    if not info_only:
        download_urls([url], file_name, ext, size, output_dir, merge=merge)

    try:
        _type, ext, size = url_info(lrc)
        print_info(site_info, title, _type, size)
        if not info_only:
            download_urls([lrc], file_name, ext, size, output_dir)
    except Exception:
        pass
Exemplo n.º 26
0
Arquivo: qq.py Projeto: zzlettle/Lulu
def qq_download_by_vid(vid,
                       title,
                       output_dir='.',
                       merge=True,
                       info_only=False):
    info_api = ('http://vv.video.qq.com/getinfo?otype=json&appver=3.2.19.333'
                '&platform=11&defnpayver=1&vid={}'.format(vid))
    info = get_content(info_api)
    video_json = json.loads(match1(info, r'QZOutputJson=(.*)')[:-1])
    fn_pre = video_json['vl']['vi'][0]['lnk']
    title = video_json['vl']['vi'][0]['ti']
    host = video_json['vl']['vi'][0]['ul']['ui'][0]['url']
    streams = video_json['fl']['fi']
    seg_cnt = video_json['vl']['vi'][0]['cl']['fc']
    if seg_cnt == 0:
        seg_cnt = 1

    # best_quality = streams[-1]['name']
    part_format_id = streams[-1]['id']

    part_urls = []
    total_size = 0
    for part in range(1, seg_cnt + 1):
        filename = '{}.p{}.{}.mp4'.format(fn_pre, str(part_format_id % 10000),
                                          str(part))
        key_api = ('http://vv.video.qq.com/getkey?otype=json&platform=11&'
                   'format={}&vid={}&filename={}&appver=3.2.19.333'.format(
                       part_format_id, vid, filename))
        part_info = get_content(key_api)
        key_json = json.loads(match1(part_info, r'QZOutputJson=(.*)')[:-1])
        if key_json.get('key') is None:
            vkey = video_json['vl']['vi'][0]['fvkey']
            url = '{}{}?vkey={}'.format(
                video_json['vl']['vi'][0]['ul']['ui'][0]['url'],
                fn_pre + '.mp4', vkey)
        else:
            vkey = key_json['key']
            url = '{}{}?vkey={}'.format(host, filename, vkey)
        if not vkey:
            if part == 1:
                log.wtf(key_json['msg'])
            else:
                log.w(key_json['msg'])
            break

        part_urls.append(url)
        _, ext, size = url_info(url)
        total_size += size

    print_info(site_info, title, ext, total_size)
    if not info_only:
        download_urls(part_urls,
                      title,
                      ext,
                      total_size,
                      output_dir=output_dir,
                      merge=merge)
Exemplo n.º 27
0
def ku6_download(url, info_only=False, **kwargs):
    page = get_content(url)
    video = match1(page, r'type: "video/mp4", src: "(.+)"').replace(' ', '%20')
    video = parse.quote(video, safe=string.printable)
    title = match1(page, r'document.title = "(.+)"')
    _type, ext, size = url_info(video)
    print_info(site_info, title, _type, size)
    if not info_only:
        download_urls([video], title, ext, size, **kwargs)
Exemplo n.º 28
0
def joy_download(url, info_only=False, **kwargs):
    page = get_content(url)
    parser = get_parser(page)
    url = parser.source['src']
    title = parser.h1.text.strip()
    _, ext, size = url_info(url)
    print_info(site_info, title, ext, size)
    if not info_only:
        download_urls([url], title, ext, size, **kwargs)
Exemplo n.º 29
0
def sohu_download(url, info_only=False, **kwargs):
    if re.match(r'http://share.vrs.sohu.com', url):
        vid = match1(url, 'id=(\d+)')
    else:
        html = get_content(url)
        vid = match1(html, r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?')
    assert vid

    if re.match(r'http[s]://tv.sohu.com/', url):
        info = json.loads(
            get_content(
                'http://hot.vrs.sohu.com/vrs_flash.action?vid={}'.format(vid)))
        for qtyp in ['oriVid', 'superVid', 'highVid', 'norVid', 'relativeId']:
            if 'data' in info:
                hqvid = info['data'][qtyp]
            else:
                hqvid = info[qtyp]
            if hqvid != 0 and hqvid != vid:
                info = json.loads(
                    get_content(
                        'http://hot.vrs.sohu.com/vrs_flash.action?vid={}'.
                        format(hqvid)))
                if 'allot' not in info:
                    continue
                break
        host = info['allot']
        tvid = info['tvid']
        urls = []
        data = info['data']
        title = data['tvName']
        size = sum(data['clipsBytes'])
        assert len(data['clipsURL']) == len(data['clipsBytes']) \
            == len(data['su'])
        for new, clip, ck in zip(data['su'], data['clipsURL'], data['ck']):
            clipURL = urlparse(clip).path
            urls.append(real_url(host, hqvid, tvid, new, clipURL, ck))

    else:
        info = json.loads(
            get_content(
                'http://my.tv.sohu.com/play/videonew.do?vid={}&referer='
                'http://my.tv.sohu.com'.format(vid)))
        host = info['allot']
        tvid = info['tvid']
        urls = []
        data = info['data']
        title = data['tvName']
        size = sum(map(int, data['clipsBytes']))
        assert len(data['clipsURL']) == len(data['clipsBytes']) \
            == len(data['su'])
        for new, clip, ck, in zip(data['su'], data['clipsURL'], data['ck']):
            clipURL = urlparse(clip).path
            urls.append(real_url(host, vid, tvid, new, clipURL, ck))

    print_info(site_info, title, 'mp4', size)
    if not info_only:
        download_urls(urls, title, 'mp4', size, refer=url, **kwargs)
Exemplo n.º 30
0
def toutiao_download(url, info_only=False, **kwargs):
    html = get_content(url)
    video_id = match1(html, r"videoid\s*:\s*'([^']+)',\n")
    title = match1(html, r"title: '([^']+)'.replace")
    video_file_list = get_file_by_vid(video_id)  # 调api获取视频源文件
    _type, ext, size = url_info(video_file_list[0].url)
    print_info(site_info=site_info, title=title, type=_type, size=size)
    if not info_only:
        download_urls([video_file_list[0].url], title, ext, size, **kwargs)