示例#1
0
文件: xuite.py 项目: JiasHuang/vod
def getSource(url):

    if re.search(r'^http://vlog.xuite.net/play/', url):
        txt = xurl.load(url)
        m = re.search(r'http://m.xuite.net/vlog/([^"]*)', txt)
        if m:
            url = m.group(0)

    os.chdir(xdef.workdir)
    key = getKey(url)
    if key:
        txt = xurl.post(url, {'pwInput': key})
    else:
        txt = xurl.load(url)
    m = re.search(r'data-original="([^"]*)"', txt)
    if m:
        print('\n[xuite][src]\n\n\t%s' %(m.group(1)))
        src_sd = m.group(1)
        src_hd = re.sub('q=360', 'q=720', m.group(1))
        m3u = 'xuite.m3u'
        fd = open(m3u, 'w')
        fd.write(src_hd+'\n')
        fd.write(src_sd+'\n')
        fd.close()
        return '%s%s' %(xdef.workdir, m3u)
    return ''
示例#2
0
def loadM3U8(url):
    txt = xurl.load(url)
    txt = txt.replace('\/', '/')
    txt = txt.replace('\\n', '\n')
    m = re.search(r'"m3u8":"([^"]*)"', txt)
    if m:
        m3u8 = m.group(1)
        local = xurl.genLocal(url, prefix='vod_list_', suffix='.m3u8')
        xurl.saveLocal(local, m3u8)
        return local
    results = []
    for l in re.finditer(r'"l":"([^"]*)"', txt):
        part = l.group(1)
        if re.search(r'f4v\?', part):
            if part.startswith('http'):
                results.append(part)
            else:
                data_url = 'https://data.video.iqiyi.com/videos' + part
                for v in re.finditer(r'"l":"([^"]*)"', xurl.load(data_url)):
                    results.append(v.group(1))
    if len(results):
        local = xurl.genLocal(url, prefix='vod_list_', suffix='.m3u8')
        xurl.saveM3U8(local, results)
        return local
    return None
示例#3
0
文件: pianku.py 项目: JiasHuang/vod
def extract(url):
    objs = []
    basename = url.split('/')[-1]
    if len(basename) == 15:
        url_tv = 'https://www.pianku.tv/ajax/downurl/%s_tv/' % (basename[0:10])
        local_cookie = xurl.genLocal(url, suffix='.cookie')
        opts = []
        opts.append('-c %s' % (local_cookie))
        xurl.load(url, opts=opts)
        opts = []
        opts.append('-b %s' % (local_cookie))
        opts.append('-H \'x-requested-with: XMLHttpRequest\'')
        opts.append('-H \'referer: %s\'' % (url))
        txt = xurl.load(url_tv, opts=opts)
        for m in re.finditer(r'<li><a href="([^"]*)">(.*?)</a></li>', txt):
            link, title = urljoin(url, m.group(1)), m.group(2)
            objs.append(entryObj(link, title))
    else:
        for m in re.finditer(
                r'<a href="(.*?)" title="(.*?)" target="_blank"><img src=".*?"\s+data-funlazy="(.*?)"',
                load(url)):
            link, title, img = urljoin(url, m.group(1)), m.group(2), urljoin(
                url, m.group(3))
            objs.append(pageObj(link, title, img))

    return objs
示例#4
0
文件: youtube.py 项目: JiasHuang/vod
def extract_youtube_channels(url):
    objs = []
    datas = []
    datas.append(parseYoutubeInitialDataJSON(url))
    txt = xurl.load(url)
    m1 = re.search(r'"INNERTUBE_CONTEXT_CLIENT_VERSION":"([^"]*)"', txt)
    m2 = re.search(r'"INNERTUBE_CONTEXT_CLIENT_NAME":(\w+)', txt)
    for m in re.finditer(r'"continuation":"([^"]*)"', txt):
        cont_url = 'https://www.youtube.com/browse_ajax?continuation=' + m.group(
            1)
        opts = []
        opts.append('-H \'x-youtube-client-version: %s\'' % (m1.group(1)))
        opts.append('-H \'x-youtube-client-name: %s\'' % (m2.group(1)))
        cont_txt = xurl.load(cont_url, opts=opts, ref=url)
        cont_data = json.loads(cont_txt)
        datas.append(cont_data)
    for data in datas:
        for x in findItem(data, ['gridChannelRenderer']):
            try:
                channelId = x['channelId'].encode('utf8')
                link = 'https://www.youtube.com/channel/' + channelId
                title = x['title']['simpleText'].encode('utf8')
                image = x['thumbnail']['thumbnails'][0]['url'].encode('utf8')
                objs.append(entryObj(link, title, image, 'Channel', False))
            except:
                log('Exception:\n' + str(x))

    return objs
示例#5
0
def getSource(url, fmt, ref):
    local_json = youtubedl.extractURL(url, dontParseJson=True)
    local_m3u8 = re.sub('.json', '.m3u8', local_json)
    m = re.search(r'"manifest_url": "([^"]*)"', xurl.readLocal(local_json))
    if m:
        manifest_url = m.group(1)
        xurl.load(manifest_url, local=local_m3u8)
        return local_m3u8
    return None
示例#6
0
def getSource(url, fmt, ref):
    if re.search(r'vod-play-id', url):
        try:
            txt = xurl.load(url)
            m = re.search(r'"url":"([^"]*)"', txt)
            url_m3u8 = m.group(1).replace('\\', '')
            txt_m3u8 = xurl.load(url_m3u8)
            m = re.search(r'(.*?\.m3u8)\s*', txt_m3u8)
            if m:
                return xurl.urljoin(url_m3u8, m.group(1))
            else:
                return url_m3u8
        except:
            print('Exception')
    return None
示例#7
0
文件: load.py 项目: JiasHuang/vod
def index(req):

    req.content_type = 'text/html; charset=utf-8'
    form = req.form or util.FieldStorage(req)

    p = form.get('p', None)  # page
    q = form.get('q', None)  # query
    d = form.get('d', None)  # dir
    s = form.get('s', None)  # search
    x = form.get('x', None)  # extra
    j = form.get('j', None)  # json

    if j:
        xurl.init(logfile='vod-load-json.log')
        j = getUnparsedURL(req) or j
        req.write(xurl.load(j))

    elif p:
        xurl.init(logfile='vod-page.log')
        p = getUnparsedURL(req) or p
        req.write(page.getPageJSON(p))

    elif q:
        xurl.init(logfile='vod-page-search.log')
        req.write(page.getSearchJSON(q, s, x))

    elif d:
        req.write(page.getDIRJSON(d))

    return
示例#8
0
文件: jwplayer.py 项目: JiasHuang/vod
def getSource(dataLink):
    url = 'http://play.wtutor.net/wp-admin/admin-ajax.php?action=ts-ajax&p=%s&n=1' %dataLink
    txt = xurl.load(url)
    videos = re.findall('file\s*:\s*[\"\']([^\"\']+).+?label\s*:\s*[\"\'](\d+)p[^\}]', txt)
    if videos:
        return videos[0][0]
    return None
示例#9
0
def get_tracks(no, bno, args):
    url = 'https://histock.tw/stock/brokertrace.aspx?bno={b}&no={n}'.format(
        b=bno, n=no)
    url_opts = []
    if args.cookies:
        url_opts.append('-H \'cookie: ' + args.cookies + '\'')

    local = xurl.genLocal(url, prefix='twstock_load_broker_')
    txt = xurl.load(url,
                    local=local,
                    opts=url_opts,
                    cache=args.cache,
                    cacheOnly=args.cacheOnly,
                    verbose=args.verbose)

    vec = []
    for m in re.finditer(
            r'<td>(.*?)</td><td>([\d|,]+)</td><td>(\d+[.]\d*)</td><td>([\d|,]+)</td><td>(\d+[.]\d*)</td><td>(\d+[.]\d*)</td>',
            txt):
        vec.insert(
            0,
            track(m.group(1), m.group(2), m.group(3), m.group(4), m.group(5),
                  m.group(6)))

    if len(vec) == 0 and re.search('alert', txt):
        os.remove(local)

    return vec
示例#10
0
def get_exchange_rate_infos(data):

    if 'ExchangeRates' not in data:
        return []

    url = 'https://rate.bot.com.tw/xrt/flcsv/0/day'
    txt = xurl.load(url, cache=False)
    infos = []

    for exr in data['ExchangeRates']:
        c = exr['currency']
        m = re.search(re.escape(c) + r',本行買入,([^,]*),([^,]*),.*?本行賣出,([^,]*),([^,]*),', txt)
        if m:
            info = exchange_rate_info(c, m.group(1), m.group(2), m.group(3), m.group(4))
            info.flts = exr['flts']
            info.flts_ret = [0] * len(info.flts)
            infos.append(info)

    # check the retured value of flts
    for info in infos:
        for i, f in enumerate(info.flts):
            try:
                m = re.search(r'(\w+)', f)
                vname = m.group(1)
                val = getattr(info, vname)
                cmd = f.replace(vname, val)
                info.flts_ret[i] = eval(cmd)
            except:
                pass

    return infos
示例#11
0
def update_stock_report_overall(obj):
    url = 'https://fubon-ebrokerdj.fbs.com.tw/z/zc/zca/zca_%s.djhtm' %(obj.code)
    txt = xurl.load(url, encoding='big5')
    m = re.search(r'>收盤價</td>\s*<td class="t3n1">(.*)</td>', txt)
    if m:
        obj.pz_close = float(m.group(1).replace(',',''))
    m = re.search(r'>本益比</td>\s*<td class="t3n1">(.*)</td>', txt)
    if m and m.group(1) != 'N/A':
        obj.per = float(m.group(1).replace(',',''))
    m = re.search(r'>每股淨值\(元\)</td>\s*<td class="t3n1"><span class="t3n1">(.*?)</span></td>', txt)
    if m:
        obj.nav = float(m.group(1).replace(',',''))
    m = re.search(r'>年度</td>(.*?)</tr>', txt, re.MULTILINE | re.DOTALL)
    if m:
        obj.per_year = [int(x.replace(',','')) for x in re.findall(r'>([^<]+)</td>', m.group(1))]
    m = re.search(r'>最高本益比</td>(.*?)</tr>', txt, re.MULTILINE | re.DOTALL)
    if m:
        obj.per_max = [float(x.replace(',','')) if x != 'N/A' else 0 for x in re.findall(r'>([^<]+)</td>', m.group(1))]
    m = re.search(r'>最低本益比</td>(.*?)</tr>', txt, re.MULTILINE | re.DOTALL)
    if m:
        obj.per_min = [float(x.replace(',','')) if x != 'N/A' else 0 for x in re.findall(r'>([^<]+)</td>', m.group(1))]
    m = re.search(r'>股本\(億, 台幣\)</td>\s*<td class="t3n1">(.*)</td>', txt)
    if m:
        obj.capital_stock = float(m.group(1).replace(',',''))
    return
示例#12
0
def autotest():
    list_pass = []
    list_fail = []
    bookmarkJSONURL = 'https://gist.githubusercontent.com/JiasHuang/30f6cc0f78ee246c1e28bd537764d6c4/raw/bookmark.json'
    data = json.loads(xurl.load(bookmarkJSONURL))
    for d in data['channels']:
        channel = d['channel'].encode('utf8')
        for x in d['links']:
            title = x['title'].encode('utf8')
            link = x['link'].encode('utf8')
            test = '[%s][%s] %s' % (channel, title, link)
            m = re.search(r'view.py\?(.*?)$', link)
            if m:
                q = re.search(r'q=([^&]*)', m.group(1))
                q = q.group(1) if q else None
                s = re.search(r's=([^&]*)', m.group(1))
                s = s.group(1) if s else None
                x = re.search(r'x=([^&]*)', m.group(1))
                x = x.group(1) if x else None
                p = re.search(r'p=([^&]*)', m.group(1))
                p = p.group(1) if p else None
                if q:
                    entryCnt = len(extractors.search(q, s, x))
                if p:
                    entryCnt = len(extractors.extract(p))
            else:
                entryCnt = len(extractors.extract(link))
            if entryCnt <= 0:
                list_fail.append(test)
            else:
                list_pass.append(test)
    print('\n--- pass ---\n')
    print('\n'.join(list_pass))
    print('\n--- fail ---\n')
    print('\n'.join(list_fail))
示例#13
0
def getSource(url, fmt, ref):
    txt = xurl.load(url)
    for m in re.finditer(r'source src="([^"]*)"', txt):
        src = m.group(1)
        if not src.endswith('.m4a.m3u8'):
            print('[src] %s' % (src))
            return src
    return url
示例#14
0
def extract(url):
    if re.search(r'api.today.line.me', url):
        link = re.search(r'"720":"([^"]*)"', xurl.load(url, cache=False))
        if link:
            return [entryObj(link.group(1))]
    else:
        programId = re.search(r'data-programId="([^"]*)"',
                              xurl.load(url, cache=False))
        if programId:
            link = 'https://api.today.line.me/webapi/linelive/' + programId.group(
                1)
            return [pageObj(link)]
        else:
            return [
                obj.to_page() for obj in findImageLink(
                    url, ImageExt=None, ImagePattern=r'url\((.*?)\)')
            ]

    return None
示例#15
0
def update_stock_report_dividend(obj):
    url = 'https://jdata.yuanta.com.tw/z/zc/zcc/zcc_%s.djhtm' %(obj.code)
    txt = xurl.load(url, encoding='big5')
    # 股利所屬年度,	現金股利(盈餘),現金股利(公積),現金股利(小計),股票股利(盈餘),股票股利(公積),股票股利(小計)
    for m in re.finditer(r'<td class="t3n0">(.*?)</tr>', txt, re.MULTILINE | re.DOTALL):
        m2 = re.findall(r'>([^<]+)</td>', m.group(0))
        if len(m2) == 9:
            obj.dividend.append(dividend_info(Y=m2[0], cash_a=m2[1], cash_b=m2[2], stock_a=m2[4], stock_b=m2[5]))
        if len(obj.dividend) >= 5:
            break
    return
示例#16
0
def update_stock_report_news(obj):
    for i in range(1, 3):
        url = 'https://jdata.yuanta.com.tw/Z/ZC/ZCV/ZCV_%s_E_%d.djhtm' %(obj.code, i)
        txt = xurl.load(url, encoding='big5')
        for m in re.finditer(r'<tr><td class="t3t1">([^<]*)</td>\s*<td class="t3t1"><a href="([^"]*)">([^<]*)</a>', txt):
            date = m.group(1)
            link = 'https://jdata.yuanta.com.tw' + m.group(2)
            title = m.group(3)
            if re.search(r'(每股稅後|每股盈餘|EPS|法說)', title):
                obj.news.append(news_info(date, title, link))
    return
示例#17
0
def get_stat_vol(code, cacheOnly):
    obj = {}
    url = 'https://jdata.yuanta.com.tw/z/zc/zcw/zcwg_%s.djhtm' %(code)
    txt = xurl.load(url, cacheOnly=cacheOnly, expiration=432000, encoding='big5')
    m = re.search(r'GetBcdData\(\'([^ ]*) ([^\']*)\'', txt)
    if m:
        vols = m.group(2).split(',')
        total_v = 0
        for i in range(len(vols)):
            v = int(vols[i])
            total_v = total_v + v
        obj['30d_vol'] = total_v / 30
    return obj
示例#18
0
def update_stock_report_revenue(obj):
    now = datetime.datetime.now()
    from_year = int(now.year) - 1911 - defs.from_year_offset
    url = 'https://jdata.yuanta.com.tw/z/zc/zch/zch_%s.djhtm' %(obj.code)
    txt = xurl.load(url, encoding='big5')
    for m in re.finditer(r'<td class="t3n0">(\d+)/(\d+)</td>(.*?)</tr>', txt, re.MULTILINE | re.DOTALL):
        Y, M = m.group(1), m.group(2)
        if int(Y) < from_year:
            break
        m2 = re.findall(r'>([^<]+)</td>', m.group(3))
        if len(m2) > 0:
            obj.revenue.insert(0, revenue_info(Y, M, m2[0].replace(',','')))
    return
示例#19
0
def update_stock_report_eps(obj):
    now = datetime.datetime.now()
    from_year = int(now.year) - 1911 - defs.from_year_offset
    url = 'https://fubon-ebrokerdj.fbs.com.tw/z/zc/zce/zce_%s.djhtm' %(obj.code)
    txt = xurl.load(url, encoding='big5')
    # 季別,0營業收入,1營業成本,2營業毛利,3毛利率,4營業利益,5營益率,6業外收支,7稅前淨利,8稅後淨利,9EPS(元)
    for m in re.finditer(r'<td class="t3n0">(\d+)\.(\d)Q(.*?)</tr>', txt, re.MULTILINE | re.DOTALL):
        Y, Q = m.group(1), m.group(2)
        if int(Y) < from_year:
            break
        m2 = re.findall(r'>([^\n<]*)<', m.group(3))
        if len(m2) == 10:
            obj.eps.insert(0, eps_info(Y, Q, rev=m2[0], profit=m2[4], nor=m2[6], ni=m2[8], eps=m2[9]))
    return
示例#20
0
def update_stock_report_wap_otc(obj):
    now = datetime.datetime.now()
    for year in range(now.year - defs.from_year_offset, now.year + 1):
        url = 'https://www.tpex.org.tw/web/stock/statistics/monthly/download_st44.php?l=zh-tw'
        txt = xurl.load(url, opts=['--data-raw \'yy=%s&stk_no=%s\'' %(year, obj.code)])
        # 年度,月份,收市最高價,收市最低價,收市平均價,成交筆數,成交金額仟元(A),成交股數仟股(B),週轉率(%),
        for m in re.finditer(r'"(\d+)","(\d+)","(.*?)","(.*?)","(.*?)",".*?","(.*?)","(.*?)",', txt):
            Y, M = m.group(1), m.group(2)
            h, l, = m.group(3), m.group(4)
            A = m.group(6).replace(',','')
            B = m.group(7).replace(',','')
            a = '%.2f' %(float(A) / float(B))
            obj.wap.append(wap_info(Y, M, h, l, a, A + '000', B + '000'))
    return
示例#21
0
def get_stock_infos(data):
    infos = []
    ex_ch = '|'.join([get_ex_ch_by_code(s['code']) for s in data['stocks']])
    url = 'https://mis.twse.com.tw/stock/api/getStockInfo.jsp?ex_ch=%s&json=1&delay=0' %(ex_ch)
    txt = xurl.load(url, cache=False)
    twse_data = json.loads(txt)
    if 'msgArray' not in twse_data:
        return []
    for msg in twse_data['msgArray']:
        for s in data['stocks']:
            if s['code'] == msg['c']:
                info = stock_info(s['code'], s.get('flts'), s.get('tags'), s.get('notes'))
                info.msg = msg
                parse_info(info)
                infos.append(info)
    return infos
示例#22
0
文件: youtube.py 项目: JiasHuang/vod
def findYouTubeNextPage(url, q):
    objs = []
    local = xurl.genLocal(url, suffix='.old')
    txt = xurl.load(url,
                    local,
                    opts=['--cookie \"PREF=f1=50000000;f6=1408;f5=30;hl=en\"'])
    pages = re.search(r'search-pager(.*?)</div>', txt,
                      re.DOTALL | re.MULTILINE)
    if pages:
        for m in re.finditer(r'<(a|button) .*?</(a|button)>', pages.group(1)):
            label = re.search(r'<span.*?">(.*?)</span>', m.group())
            label = label.group(1) if label else None
            link = None
            if m.group(1) == 'a':
                href = re.search(r'href="([^"]*)"', m.group())
                link = urljoin(url, href.group(1)) if href else None
            objs.append(navObj(label, link))

    return objs
示例#23
0
def update_stock_report_wap(obj):
    now = datetime.datetime.now()
    for year in range(now.year - defs.from_year_offset, now.year + 1):
        url = 'https://www.twse.com.tw/exchangeReport/FMSRFK?response=json&stockNo=%s&date=%4d0101' %(obj.code, year)
        txt = xurl.load(url)
        try:
            data = json.loads(txt)
        except:
            continue
        if 'data' not in data:
            continue
        # 年度,月份,最高價,最低價,加權(A/B)平均價,成交筆數,成交金額(A),成交股數(B),週轉率(%),
        for d in data['data']:
            Y, M = d[0], d[1]
            h, l, a, = d[2], d[3], d[4]
            A = d[6].replace(',','')
            B = d[7].replace(',','')
            obj.wap.append(wap_info(Y, M, h, l, a, A, B))
    return
示例#24
0
def getSource(url, fmt, ref):
    if re.search(r'vod-play-id', url):
        try:
            m = re.search(r'num-(\d+)', url)
            ep_num = m.group(1)
            txt = xurl.load(url)
            m = re.search(r'base64decode\(\'([^\']*)', txt)
            code = m.group(1)
            decoded = xurl.unquote(base64.b64decode(code))
            print('\n[pangzitv][DBG][decoded]\n\n\t%s' % (decoded))
            # process unicode special character
            decoded = decoded.replace('%u', '\\u').decode('unicode_escape')
            urls = []
            for m in re.finditer(r'http[^#$\n]*', decoded):
                urls.append(m.group())
            if len(urls) >= int(ep_num):
                return urls[int(ep_num) - 1]
            return urls[0]
        except:
            print('Exception')
    return None
示例#25
0
文件: bilibili.py 项目: JiasHuang/vod
def getSource(url, fmt, ref):
    txt = xurl.load(url)
    video = []
    audio = []
    video_ids = ['64', '32', '16']
    audio_ids = ['30280', '30216']
    video_id = None
    audio_id = None
    for m in re.finditer(r'"id":(\d+),"baseUrl":"([^"]*)"', txt):
        _id, _url = m.group(1), m.group(2)
        if _id in video_ids:
            if not video_id:
                video_id = _id
            if _id == video_id:
                video.append(_url)
        if _id in audio_ids:
            if not audio_id:
                audio_id = _id
            if _id == audio_id:
                audio.append(_url)

    local_a = xurl.genLocal(url, prefix='vod_list_', suffix='.audio.m3u8')
    local_v = xurl.genLocal(url, prefix='vod_list_', suffix='.video.m3u8')
    local = xurl.genLocal(url, prefix='vod_list_', suffix='.m3u8')

    xurl.saveM3U8(local_a, audio)
    xurl.saveM3U8(local_v, video)

    s = []
    s.append('#EXTM3U')
    s.append(
        '#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio",LANGUAGE="eng",URI="%s"' %
        (local_a))
    s.append('#EXT-X-STREAM-INF:AUDIO="audio"')
    s.append(local_v)
    xurl.saveLocal(local, '\n'.join(s))

    return local
示例#26
0
def getSource(url, fmt, ref):
    txt = xurl.load(url)
    m = re.search(r'geturl\(\'(.*?)\'\)', txt)
    return m.group(1) if m else None
示例#27
0
def loadLocal(url):
    return xurl.load('http://127.0.0.1/vod/' + url)
示例#28
0
文件: goodtv.py 项目: JiasHuang/vod
def getSource(url):
    txt = xurl.load(url)
    m = re.search(r'source src="([^"]*)"', txt)
    if m:
        return m.group(1)
    return url
示例#29
0
文件: lizhi.py 项目: JiasHuang/vod
def dl(url, local, ref=None, read=True):
    if not os.path.exists(local):
        xurl.load(url, local, ref=ref)
    if read:
        return xurl.readLocal(local)
    return None
示例#30
0
def getSource(url, fmt, ref):
    txt = xurl.load(url)
    m = re.search('<source.*? src="([^"]*)"', txt)
    return m.group(1) if m else None
示例#31
0
def get_date_from_bshtm():
    url = 'https://bsr.twse.com.tw/bshtm/bsWelcome.aspx'
    txt = xurl.load(url)
    m = re.search(r'<span id="Label_Date">(\d+)/(\d+)/(\d+)</span>', txt)
    return int(m.group(1) + m.group(2) + m.group(3))
示例#32
0
def gen_tse():
    url = 'https://isin.twse.com.tw/isin/C_public.jsp?strMode=2'
    txt = xurl.load(url)
    m = re.findall(r'<tr><td bgcolor=#FAFAD2>(\w+)', txt)
    xurl.saveLocal('tse-code-list.txt', '\n'.join(m))
    return
示例#33
0
def load(url):
    return xurl.load(url)