Exemplo n.º 1
0
    def video_from_vid(self, vid, **kwargs):
        handlers = [HTTPCookieProcessor()]
        install_opener(build_opener(*handlers))
        did, tk2 = self.generate_did_tk2()
        api_info_url = 'https://pcweb.api.mgtv.com/player/video?tk2={}&video_id={}&type=pch5'.format(tk2, vid)
        html = get_html(api_info_url)
        content = loads(html)

        title = content['data']['info']['title']
        pm2 = content['data']['atc']['pm2']

        api_source_url = 'https://pcweb.api.mgtv.com/player/getSource?video_id={}&pm2={}&tk2={}&type=pch5'.format(vid, pm2, tk2)
        html = get_html(api_source_url)
        content = loads(html)
        streams = content['data']['stream']
        domains = content['data']['stream_domain']
        index = randrange(len(domains))
        domain = domains[index]

        level = kwargs.get('level', 0)
    
        if level >= 0:
            level = min(level, len(streams)-1)
        url = streams[level]['url']

        url = domain + url
        content = loads(get_html(url))
        url = content['info']

        return re.compile("(.*m3u8)").findall(url)
Exemplo n.º 2
0
def main(html_inspire, default_institution):
    m = re.search(r"/([0-9]+)", html_inspire)
    if m is None:
        raise ValueError("not valid html")
    inspire_number = m.group(1)
    url = "http://inspirehep.net/record/{0}/export/xn".format(inspire_number)
    xml = get_html(url)
    doc = xmltodict.parse(xml)

    authors = get_authors(doc)

    print "\n" + "=" * 10 + " ALL AUTHORS " + "=" * 10
    authors_list = ", ".join(map(format_name, authors))
    print authors_list

    print "\n found %d authors" % len(authors)
    milan_authors = [author for author in authors if (default_institution in " ".join(author[2]))]

    print "\n" + "=" * 10 + (" %s AUTHORS " % default_institution) + "=" * 10
    milan_list = "\n".join(map(format_name_italian, milan_authors))
    print milan_list

    print "\n" + "=" * 10 + " TITLE " + "=" * 10
    title = get_title(doc)
    print title

    print "\n" + "=" * 10 + " ABSTRACT " + "=" * 10
    abstract = get_abstract(doc)
    print abstract

    print "\n===== KEYWORKDS ======\n"
    keys = dump_keyword.get_keys_from_html(get_html(html_inspire))
    print keys

    return authors_list, milan_list, title, abstract, keys
Exemplo n.º 3
0
def episodelist2(url):
    plugin.set_content('TVShows')
    link = get_html(url)
    tree = BeautifulSoup(link, 'html.parser')

    listapi = 'http://my.tv.sohu.com/play/getvideolist.do?playlistid=%s&pagesize=30&order=1'

    match0 = re.compile('playlistId\s*=\s*["|\'](.+?)["|\'];', re.DOTALL).findall(link)

    link = get_html(listapi % match0[0])
    videos = loads(link)['videos']
    items = []
    for item in videos:
        length = item['playLength']
        p_date = item['publishTime'].encode('utf-8')
        p_order = int(item['order'])
        vid = item['vid']
        title = item['subName'].encode('utf-8')
        items.append({
            'label': title,
            'path': url_for('playvideo', name=title,
                            url=item['pageUrl'],
                            image=item['largePicUrl']),
            'thumbnail': item['largePicUrl'],
            'is_playable': True,
            'info': {'title': title},
        })

    return items
Exemplo n.º 4
0
    def fetch_magic(self, url):
        magic_list = []
        page = get_html(url)
        src = re.findall(r'src="(.+?)"', page)
        js = [path for path in src if path.endswith('.js')]

        host = 'http://' + urlparse(url).netloc
        js_path = [urljoin(host, rel_path) for rel_path in js]

        for p in js_path:
            if 'mtool' in p or 'mcore' in p:
                js_text = get_html(p)
                hit = re.search(
                    r'\(\'(.+?)\',(\d+),(\d+),\'(.+?)\'\.split\(\'\|\'\),\d+,\{\}\)',
                    js_text)

                code = hit.group(1)
                base = hit.group(2)
                size = hit.group(3)
                names = hit.group(4).split('|')

                sym_to_name = {}
                for no in range(int(size), 0, -1):
                    no_in_base = mapping(no, int(base))
                    val = names[no] if no < len(
                        names) and names[no] else no_in_base
                    sym_to_name[no_in_base] = val

                moz_ec_name = self.search_dict(sym_to_name, 'mozEcName')
                push = self.search_dict(sym_to_name, 'push')
                patt = '{}\.{}\("(.+?)"\)'.format(moz_ec_name, push)
                ec_list = re.findall(patt, code)
                [magic_list.append(sym_to_name[ec]) for ec in ec_list]

        return magic_list
Exemplo n.º 5
0
    def tudou_download_by_iid(self, iid, title, **kwargs):
        data = loads(get_html('http://www.tudou.com/outplay/goto/getItemSegs.action?iid=%s' % iid))
        temp = max([data[i] for i in data if 'size' in data[i][0]], key=lambda x:sum([part['size'] for part in x]))
        vids, size = [t["k"] for t in temp], sum([t["size"] for t in temp])

        urls = []
        for vid in vids:
            for i in parseString(get_html('http://ct.v2.tudou.com/f?id=%s' % vid)).getElementsByTagName('f'):
                urls.append(i.firstChild.nodeValue.strip())

        return urls
Exemplo n.º 6
0
def get_post_pages_by_id(item_id: int) -> Pages:
    """Get Post Pages based on an Item ID."""
    pages = []
    url = HN_ITEMS_URL + '?id={}'.format(item_id)
    pg = extract_page(get_html(url))
    pages.append(pg)
    while (pg.has_next):
        newurl = url + '&p={}'.format(pg.pg_number + 1)
        pg = extract_page(get_html(newurl))
        pages.append(pg)

    return Pages(pages)
Exemplo n.º 7
0
    def video_from_vid(self, vid, **kwargs):
        vparamap = {0: '1300', 1: '720p', 2: '1080p'}

        url = 'http://player-pc.le.com/mms/out/video/playJson'
        req = {
            'id': vid,
            'platid': 1,
            'splatid': 105,
            'format': 1,
            'tkey': self.calcTimeKey(int(time.time())),
            'domain': 'www.le.com',
            'region': 'cn',
            'source': 1000,
            'accessyx': 1
        }
        r = get_html(url + '?' + urlencode(req))
        info = loads(r)
        playurl = info['msgs']['playurl']

        stream_level = kwargs.get('level', 0)
        support_stream_id = playurl["dispatch"].keys()
        stype = len(support_stream_id)
        stream_level = min(stream_level, stype - 1)
        stream_id = support_stream_id[stream_level]

        # pick a random domain
        index = randrange(len(playurl['domain']))

        url = playurl["domain"][index] + playurl["dispatch"][stream_id][0]
        uuid = hashlib.sha1(url.encode('utf8')).hexdigest() + '_0'
        url = url.replace('tss=0', 'tss=ios')
        url += '&m3v=1&termid=1&format=1&hwtype=un&ostype=MacOS10.12.4&p1=1&p2=10&p3=-&expect=3&tn={}&vid={}&uuid={}&sign=letv'.format(
            random(), vid, uuid)

        r2 = get_html(url.encode('utf-8'))
        info2 = loads(r2)

        # hold on ! more things to do
        # to decode m3u8 (encoded)
        suffix = '&r=%d&appid=500' % (int(time.time() * 1000))

        m3u8 = get_html(info2['location'] + suffix, decoded=False)
        if m3u8 is None:
            return None

        m3u8_list = self.m3u8decode(m3u8)
        m3u8_file = kwargs.get('m3u8')
        with open(m3u8_file, "wb") as m3u8File:
            m3u8File.write(m3u8_list)
        m3u8File.close()
        urls = re.findall(r'^[^#][^\r]*', m3u8_list, re.MULTILINE)
        return urls
Exemplo n.º 8
0
    def get_category_from_web_page(self):
        category_dict = {'0': {'title': u'全部', 'url': HOME_URL}}
        node = category_dict['0']
        url = node['url']
        result = BeautifulSoup(get_html(url),
                               "html.parser").findAll('li', {'class': 'm-i'})
        for item in result:
            if len(item['class']) != 1:
                continue
            tid = item['data-tid']
            title = item.em.contents[0]
            url = 'http:' + item.a['href']
            category_dict[tid] = {'title': title, 'url': url}
            node['subs'].append(tid)

        #Fix video and movie
        if '11' not in category_dict['0']['subs']:
            category_dict['0']['subs'].append('11')
        if '23' not in category_dict['0']['subs']:
            category_dict['0']['subs'].append('23')
        category_dict['11'] = {
            'title': u'电视剧',
            'url': 'http://bangumi.bilibili.com/tv/'
        }
        category_dict['23'] = {
            'title': u'电影',
            'url': 'http://bangumi.bilibili.com/movie/'
        }

        for sub in category_dict['0']['subs']:
            node = category_dict[sub]
            url = node['url']
            result = BeautifulSoup(get_html(url),
                                   "html.parser").select('ul.n_num li')
            for item in result[1:]:
                if not item.has_attr('tid'):
                    continue
                if not hasattr(item, 'a'):
                    continue
                if item.has_attr('class'):
                    continue
                tid = item['tid']
                title = item.a.contents[0]
                if item.a['href'][:2] == '//':
                    url = 'http:' + item.a['href']
                else:
                    url = HOME_URL + item.a['href']
                category_dict[tid] = {'title': title, 'url': url}
                node['subs'].append(tid)
        return category_dict
Exemplo n.º 9
0
 def get_json(self, api, data=None, pretty=False):
     headers = self.header
     s = loads(get_html(SERVER + api, data=data, headers=headers))
     if pretty:
         print headers
         print dumps(s, sort_keys=True, indent=4, separators=(',', ': '))
     return s
Exemplo n.º 10
0
def changeList(url):
    html = get_html(url)
    tree = BeautifulSoup(html, 'html.parser')
    soup = tree.find_all('div', {'class': 'm-tag-type'})

    surl = url.split('/')
    purl = surl[-1].split('-')

    dialog = xbmcgui.Dialog()

    filter = ''
    for iclass in soup:
        title = iclass.find('h5', {'class': 'u-title'}).text
        si = iclass.find_all('a')
        list = []
        for subitem in si:
            list.append(subitem.text)
        sel = dialog.select(title, list)

        if sel < 0:
            continue

        filter += u'|' + title + u'(' + si[sel].text + u')'
        seurl = si[sel]['onclick'].split('/')[-1]
        seurl = seurl.split('-')

        for i in range(0, len(purl)):
            if seurl[i] != '':
                purl[i] = seurl[i]

    surl[-1] = '-'.join(purl)
    url = '/'.join(surl)
    mainlist(url, filter)
Exemplo n.º 11
0
def get_dy2018():
    url = 'https://item.mi.com/product/6334.html'
    print('开始抓取AI音箱...')
    soup = get_soup(get_html(url, 'utf-8'))
    # print(soup.prettify())
    a = soup.select('#J_buyBtnBox a')
    print(a)
Exemplo n.º 12
0
def episodelist(url):
    plugin.set_content('TVShows')
    html = get_html(url)
    playcfg = re.compile('var webcfg\s?=\s?({.+?);\n').findall(html)
    if playcfg:
        jsplay = loads(playcfg[0])
    else:
        return []

    items = []
    content = jsplay['share_content']
    for item in jsplay['playList']['data']['list']:
        vip = '' if int(item['vip']) == 0 else VIP
        new = NEW if item.get('isNew') else ''
        items.append({
            'label': item['title'] + vip + new,
            'path': url_for('playvideo', vid=item['id'],
                           name=item['title'].encode('utf-8'),
                           image=item['capture'].encode('utf-8')),
            'thumbnail': item['capture'],
            'is_playable': True,
            'info': {'title': item['title']},
        })

    return items
Exemplo n.º 13
0
def playList(url):
    html = get_html(url)
    tree = BeautifulSoup(html, 'html.parser')

    lists = tree.find_all('a', {'class': 'vd-list-item'})

    if lists is None:
        return []

    items = []
    for item in lists:
        p_thumb = item.img.get('src')
        if p_thumb is None:
            p_thumb = item.img.get('_lazysrc', '')
        d = item.find('i', {'class': 'vtime'})
        duration = 0
        for t in d.text.split(':'):
            duration = duration * 60 + int(t)
        items.append({
            'label': item['title'],
            'path': url_for('playvideo', url=httphead(item['href'])),
            'thumbnail': p_thumb,
            'is_playable': True,
            'info': {'title': item['title'], 'duration': duration}
        })
    return items
Exemplo n.º 14
0
    def video_from_vid(self, vid, **kwargs):
        if self.coeff is None:
            magic_list = self.fetch_magic(self.a_mobile_url)
            self.coeff = self.get_coeff(magic_list)

        ep_url = self.video_ep if 'single_video' in kwargs else self.media_ep
        url = ep_url.format(vid)
        meta = loads(get_html(url))

        streams = meta['playlist']
        maxlevel = len(streams)
        level = kwargs.get('level', 0)
        if level >= maxlevel: level = maxlevel - 1
        stream = streams[level]
        definition = stream['code']
        for s in stream['playinfo']:
            codec = 'h' + s['codec'][2:]  # h.264 -> h264
            for st in self.stream_types:
                s_id = definition if codec == 'h264' else '{}_{}'.format(
                    definition, codec)
                if s_id == st:
                    clear_info = self.dec_playinfo(s, self.coeff)
                    cdn_list = self.get_cdninfo(clear_info['hashid'])
                    base_url = cdn_list[0]
                    token = base64.b64encode(
                        clear_info['token'].encode('utf8'))
                    video_url = '{}?token={}&vf={}'.format(
                        base_url, token, s['vf'])
                    return [video_url]
Exemplo n.º 15
0
def gettudoulist(url):
    html = get_html(httphead(url))
    tree = BeautifulSoup(html, 'html.parser')

    items = []

    soup = tree.find_all('div', {'class': 'td-listbox__list__item--show'})
    for item in soup:
        soup2str = str(item)
        title = re.compile('title=\"(.+?)\"').findall(soup2str)
        if not title:
            title = re.compile('title=\'(.+?)\'').findall(soup2str)
        thumb = re.compile('src="(.+?)"').findall(soup2str)
        purl = re.compile(' href="(.+?)"').findall(soup2str)
        if not (title and thumb and purl): continue

        items.append({
            'label': title[0],
            'path': url_for('playvideo', url=purl[0]),
            'is_playable': True,
            'thumbnail': thumb[0],
            'info': {
                'title': title[0]
            }
        })
    return items
Exemplo n.º 16
0
def filter(url):
    html = get_html(url)
    tree = BeautifulSoup(html, 'html.parser')
    soup = tree.find_all('div', {'class': 'list_nav'})
    dialog = xbmcgui.Dialog()

    urlsplit = url.split('/')
    urltype = re.compile('\w{1}.+?_').findall(urlsplit[-1])
    marktype = []
    for item in soup:
        typelist = item.span.text
        title = re.sub('\r|\n|\t| ', '', typelist)
        li = item.findAll('li')
        sel = dialog.select(title, [x.text for x in li])

        if sel >= 0:
            if 'href' not in li[sel]: li[sel]['href'] = url
            selurl = li[sel]['href'].split('/')
            seltype = re.compile('\w{1}.+?_').findall(selurl[-1])
            for i in seltype:
                if i not in urltype:
                    marktype.append(i)

    u1 = urlsplit[-1]
    for type in marktype:
        u1 = re.sub(type[0] + '.+?_', type, u1)
    urlsplit[-1] = u1
    url = '/'.join(urlsplit)
    return videolist(url=url, page=1)
Exemplo n.º 17
0
def getProgramList(channelId):
    '''
    timeUrl = 'http://live-api.xwei.tv/api/getUnixTimestamp'
    html = get_html(timeUrl)
    timestamp = json.loads(html)['time']
    t = float(timestamp)
    timestamp = int(t/1000)
    '''
    epgAPI = 'http://live-api.xwei.tv/api/getEPGByChannelTime/%s/0/%d'
    info = ''
    try:
        html = get_html(epgAPI % (channelId, int(time.time())))
        results = json.loads(html)['result'][0]

        for prog in results:
            start = time.localtime(prog['start_time'])
            end = time.localtime(prog['end_time'])
            name = prog['name']
            name = name.replace(' ', '')
            info += '%02d:%02d--' % (start[3], start[4])
            info += '%02d:%02d    ' % (end[3], end[4])
            info += name + '\n'
    except:
        pass

    return info
Exemplo n.º 18
0
 def get_video_urls(self, url, need_subtitle=True):
     self._print_info('Getting video address')
     page_full_url = self.BASE_URL + url
     self._print_info('Page url: ' + page_full_url)
     page_content = get_html(page_full_url)
     self._print_info('Origin page length: ' + str(len(page_content)))
     return self._parse_urls(page_content, need_subtitle)
Exemplo n.º 19
0
 def video_from_url(self, url, **kwargs):
     # Embedded player
     id = r1(r'.tudou.com/v/([^/]+)/', url)
     if id:
         return self.tudou_download_by_id(id, title='')
 
     html = get_html(url)
     try:
         title = r1(r'\Wkw\s*[:=]\s*[\'\"]([^\n]+?)\'\s*\n', html).replace("\\'", "\'")
         assert title
         title = unescape_html(title)
     except AttributeError:
         title = match1(html, r'id=\"subtitle\"\s*title\s*=\s*\"([^\"]+)\"')
         if title is None:
             title = ''
 
     vcode = r1(r'vcode\s*[:=]\s*\'([^\']+)\'', html)
     if vcode is None:
         vcode = match1(html, r'viden\s*[:=]\s*\"([\w+/=]+)\"')
     if vcode:
         print "vcode", vcode
         from youku import Youku
         return Youku().video_from_vid(vcode, **kwargs)
 
     iid = r1(r'iid\s*[:=]\s*(\d+)', html)
     if not iid:
         return self.tudou_download_playlist(url, **kwargs)
     else:
         return self.tudou_download_by_iid(iid, title, **kwargs)
Exemplo n.º 20
0
def genlist(start, end):
    global origin
    list_page = common.get_html(origin)
    chapterlist = []
    for i in range(start, end + 1):
        # print(i)
        if i in list(range(1, 51)) + list(range(52, 505)) + \
                list(range(506, 627)):
            text = '^Chapter %s – .*' % str(i)
        elif i in [51]:
            text = '^Chapter %s - .*' % str(i)
        elif i in [505, 981]:
            text = '^Chapter %s( |,).*' % str(i)
            link = list_page.find('a', text=re.compile(text))
            url = origin + link['href'].split("/")[-1]
            chapterlist.append(url)
            text = '^Chapter %s.5( |,).*' % str(i)
        elif i == 968:
            text = '^Chapter %s. .*' % str(i)
        elif i in [1615, 2048, 2288]:
            continue
        else:
            text = '^Chapter %s, .*' % str(i)
        link = list_page.find('a', text=re.compile(text))
        url = origin + link['href'].split("/")[-1]
        chapterlist.append(url)
    return chapterlist
Exemplo n.º 21
0
    def login(self, userid, pwd, captcha):
        #utils.get_html('http://www.bilibili.com')
        if self.is_login == True:
            return True, ''
        pwd = self.get_encryped_pwd(pwd)
        data = 'cType=2&vcType=1&captcha={}&user={}&pwd={}&keep=true&gourl=http://www.bilibili.com/'.format(
            captcha, userid, pwd)
        result = get_html(
            LOGIN_URL, data, {
                'Origin': 'https://passport.bilibili.com',
                'Referer': 'https://passport.bilibili.com/login'
            })

        key = None
        for ck in self.cj:
            if ck.name == 'DedeUserID':
                key = ck.value
                break

        if key is None:
            return False, LOGIN_ERROR_MAP[loads(result)['code']]
        self.cj.save()
        self.is_login = True
        self.mid = str(key)
        return True, ''
Exemplo n.º 22
0
def playfound(url, title, pic):
    items = []
    if not url.startswith('http'):
        return []

    link = get_html(url)
    tvId = r1(r'param\[\'tvid\'\]\s*=\s*"(.+)"', link)
    vid = r1(r'param\[\'vid\'\]\s*=\s*"(.+)"', link)
    if tvId is not None and vid is not None:
        items = [{
            'label':
            title,
            'path':
            url_for('playvideo', tvId=tvId, vid=vid, title=title, pic=pic),
            'is_playable':
            True,
            'info': {
                'title': title
            }
        }]
    else:
        albumId = r1('albumid="(.+?)"', link)
        if albumId is not None:
            items = episodelist(albumId, 1)
    return items
Exemplo n.º 23
0
def list_sections(section):
    if section == "#":
        return
    html = get_html(_meijumao + section, headers={'Host': 'www.meijumao.net'})
    soup = BeautifulSoup(html, "html.parser")

    listing = []
    is_folder = True
    for section in soup.find_all("article"):
        p_title = section.img.get("alt")
        p_thumb = section.img.get("src")
        list_item = ListItem(label=p_title, thumbnailImage=p_thumb)
        list_item.setProperty('fanart_image', p_thumb)
        url = '{0}?action=list_series&series={1}&seriesname={2}&fanart_image={3}'.format(
            _url, section.a.get("href"), p_title.encode("utf-8"), p_thumb)
        listing.append((url, list_item, is_folder))

    #pagination
    will_page = soup.find("ul", attrs={"id": "will_page"}).find_all("li")
    if len(will_page) > 0:
        # print will_page[0].get("class"),will_page[0].find("a").get("href")
        list_item = ListItem(label="上一页")
        url = '{0}?action=list_sections&section={1}'.format(
            _url, will_page[0].find("a").get("href"))
        listing.append((url, list_item, is_folder))
        list_item = ListItem(label="下一页")
        url = '{0}?action=list_sections&section={1}'.format(
            _url, will_page[-1].find("a").get("href"))
        listing.append((url, list_item, is_folder))
    xbmcplugin.addDirectoryItems(_handle, listing, len(listing))
    xbmcplugin.endOfDirectory(_handle)
Exemplo n.º 24
0
def listType1(albumType, albumId):
    url = 'http://cache.video.qiyi.com/jp/sdvlst/%d/%d/' % (albumType, albumId)
    link = get_html(url)
    data = link[link.find('=') + 1:]
    json_response = loads(data)
    items = []
    if 'data' not in json_response:
        return []

    for item in json_response['data']:
        items.append({
            'label':
            item['videoName'] + item['tvYear'],
            'path':
            url_for('playvideo',
                    tvId=item['tvId'],
                    vid=item['vid'],
                    title=item['videoName'].encode('utf-8'),
                    pic=item['aPicUrl']),
            'thumbnail':
            item['aPicUrl'],
            'is_playable':
            True,
            'info': {
                'title': item['videoName']
            }
        })

    return items
Exemplo n.º 25
0
def root():
    plugin.set_content('TVShows')
    # show search entry
    #yield {
    #    'label': '[COLOR FF00FFFF]<搜索...>[/COLOR]',
    #    'path': url_for('search')
    #}
    #yield {
    #    'label': u'全国电视台',
    #    'path': url_for('tvstudio', url=PPTV_TV_LIST, page=1)
    #}

    data = get_html(PPTV_LIST)
    soup = BeautifulSoup(data, 'html.parser')
    menu = soup.find_all('div', {'class': 'detail_menu'})
    tree = menu[0].find_all('li')
    for item in tree:
        url = item.a['href']
        t = re.compile('type_(\d+)').findall(url)
        if len(t) < 1:
            continue
        yield {
            'label': item.a.text,
            'path': url_for('videolist', url=url, page=1)
        }
Exemplo n.º 26
0
def genlist(start, end):
    global origin
    list_page = common.get_html(origin)
    chapterlist = []
    for i in range(start, end+1):
        # print(i)
        text = '^Chapter %s .*' % str(i)
        if i in [30, 236, 237, 406, 408, 548, 749, 828]:
            text = '^Chapter %s' % str(i)
        elif i == 336:
            text = '^Chapter 336 – You Lump of Meat!'
        elif i == 337:
            text = '^Chapter 336 – The Tyrannical Ye Lai'
        elif i == 490:
            text = '^Chapter 490 – The Endless Chase'
        elif i == 590:
            text = '^Chapter 490 – The Red Dragon Queen'
        elif i == 830:
            text = '^Chapter 830 Rotten and Rusty Army Part 1 ?'
            link = list_page.find('a', text=re.compile(text))
            url = origin + link['href'].split("/")[-1]
            chapterlist.append(url)
            text = '^Chapter 830 – Rotten and Rusty Army Part 2 ?'
        elif i in [857, 861, 862, 863, 864, 865] or i >= 867:
            text = '^Chapter %s.*' % str(i)
        elif i == 51 or i >= 841:
            text = '^Chapter %s-.*' % str(i)
        elif i == 839:
            text = '^hapter 839 – The Flying Slash Part 1'
        link = list_page.find('a', text=re.compile(text))
        url = origin + link['href'].split("/")[-1]
        chapterlist.append(url)
    return chapterlist
Exemplo n.º 27
0
def select(url, filter):
    html = get_html(httphead(url))
    tree = BeautifulSoup(html, 'html.parser')
    soup = tree.find_all('div', {'class': 'td__category__filter__panel__item'})

    dialog = xbmcgui.Dialog()
    color = '[COLOR FF00FF00]%s[/COLOR]'
    for item in soup:
        if filter != item.label.text.encode('utf-8'):
            continue
        si = item.find_all('li')
        list = []
        i = 0
        for subitem in si:
            title = subitem.text
            if 'current' in subitem.get('class', ''):
                title = '[B]{}[/B]'.format(title.encode('utf-8'))
                mark = i
            list.append(title)
            i += 1
        sel = dialog.select(item.label.text, list)

        if sel >= 0:
            url = si[sel].a['href']
    return videolist(url.encode('utf-8'))
Exemplo n.º 28
0
def search():
    plugin.set_content('TVShows')
    keyboard = xbmc.Keyboard('', '请输入搜索内容')
    xbmc.sleep(1500)
    keyboard.doModal()
    if not keyboard.isConfirmed():
        return

    keyword = keyboard.getText()
    p_url = 'https://so.mgtv.com/so/k-'
    url = p_url + quote_plus(keyword)
    html = get_html(url)
    tree = BeautifulSoup(html, 'html.parser')
    soup = tree.find_all('div', {'class': 'result-content'})
    items = []
    for x in soup:
        try:
            vid = x.a['video-id']
        except:
            vid = 0
        items.append({
            'label': x.img['alt'],
            'path': url_for('episodelist', url=x.a['href'], id=vid, page=1),
            'thumbnail': httphead(x.img['src']),
        })

    return items
Exemplo n.º 29
0
def albumlist(url):
    plugin.set_content('music')
    html = get_html(url)
    tree = BeautifulSoup(html, 'html.parser')
    soup = tree.find_all('div', {'class': 'discoverAlbum_wrapper'})

    albums = soup[0].find_all('div', {'class', 'discoverAlbum_item'})
    for album in albums:
        yield {
            'label': album.img['alt'],
            'thumbnail': album.img['src'],
            'path': url_for('playList', url=album.a['href'], page=1, order='asc')
        }

    soup = tree.find_all('div', {'class': 'pagingBar_wrapper'})
    try:
        pages = soup[0].find_all('a')
    except:
        return

    for page in pages:
        url = page['href']
        if url == 'javascript:;':
            continue
        yield {
            'label': page.text,
            'path': url_for('albumlist', url=httphead(url.encode('utf-8')))
        }
Exemplo n.º 30
0
def genlist(start, end):
    global origin
    list_page = common.get_html(origin)
    chapterlist = []
    for i in range(start, end + 1):
        # print(i)
        text = '^Chapter %s$' % str(i)
        # if i in range(127, 137):
        #     text = '^Chapter %s' % str(i)
        # elif i in [149, 861, 1044, 1212]:
        #     text = '^Chapter %s-.*' % str(i)
        # elif i == 283:
        #     text = '^Chapter 284 – Special Requests'
        # elif i == 284:
        #     text = '^Chapter 284 – Seeing West Wonder King'
        # elif i == 311:
        #     text = '^Chapter 312 – Playing the Role of A Silkpants'
        # elif i == 312:
        #     text = '^Chapter 312 – Keeping Up Appearances'
        # elif i == 1350:
        #     continue
        link = list_page.find('a', text=re.compile(text))
        url = origin + link['href'].split("/")[-1]
        chapterlist.append(url)
    # print(chapterlist)
    chapterlist = list(dict.fromkeys(chapterlist))
    return chapterlist
Exemplo n.º 31
0
def serieslist(name, url):
    html = get_html(url)
    html = re.sub('\t|\n|\r| ', '', html)
    tree = BeautifulSoup(html, 'html.parser')
    soup = tree.find_all('span', {'class': 'item'})

    info = tree.find('meta', {'name': 'description'})['content']
    img = tree.find('meta', {'itemprop': 'image'})['content']

    for item in soup:
        try:
            p_title = item.a['title']
        except:
            continue
        try:
            href = httphead(item.a['href'])
        except:
            continue
        tn = item.a.text
        title = p_title + '--' + tn
        yield {
            'label': title,
            'path': url_for('playvideo', vid=0),
            'thumbnail': img,
            'info': {'title': title, 'plot': info}
        }
Exemplo n.º 32
0
def main(url):
    logging.info("getting html from %s", url)
    html = get_html(url)
    logging.info("searching from keys")
    keys = get_keywords(html)
    return format_keys(keys)