示例#1
0
def get_movie_streams(movie_url):
    movie_html = utils.fetch_html(movie_url)
    player_link =  parsers.get_player_link(movie_html)
    player_frame = utils.fetch_html(player_link)
    player_frame_src = parsers.get_player_iframe_src(player_frame)
    player_html = utils.fetch_html(player_frame_src)
    return parsers.parse_player(player_html)
示例#2
0
    def __prepare_comments(self,url):
        html=fetch_html(url)

        soup = BeautifulSoup(html, 'lxml')
        pid = soup.select_one('#ifcomment')['data']
        url = 'https://cmt.ithome.com/comment/' + pid
        html=fetch_html(url)

        soup = BeautifulSoup(html, 'lxml')
        script = soup.select_one('#commentlist script').string
        hash = script[-18:-2]
        return hash
示例#3
0
 def get_down_url(self, item):
     id = item['id']
     url = self.base_url + '/p/d.php?aid=' + id
     doc = lxml.html.fromstring(fetch_html(url))
     ul = doc.cssselect("ul.down")[0]
     down_urls = ul.xpath("li/a/@href")
     item['down_url'] = [self.yp_url(x) for x in down_urls]
示例#4
0
    def get_ppt_list(self, cat="", page=1):
        if page > 1:
            page_str = f"/list-{page}.html"
        else:
            page_str = "/"

        url = self.base_url + '/moban/' + cat + page_str
        html = fetch_html(url)

        doc = lxml.html.fromstring(html)
        ul = doc.cssselect("ul.posts")[0]
        items = [{
            'name': x.text,
            'url': self.yp_url(x.get("href")),
            'id': x.get("href")[x.get("href").rfind("/") + 1:-5]
        } for x in ul.xpath("li/a[2]")]
        return items
示例#5
0
def list_movies(page):
    html = utils.fetch_html(MOVIES_URL % page)
    return parsers.parse_movie_page(html)
示例#6
0
def search(q):
    search_url = SEARCH_URL % urllib.quote_plus(q)
    html_result = utils.fetch_html(search_url)
    return parsers.parse_search(html_result)
示例#7
0
def get_movie_stream_from_player(player_url):
    player_frame = utils.fetch_html(player_url)
    player_frame_src = parsers.get_player_iframe_src(player_frame)
    player_html = utils.fetch_html(player_frame_src)
    return parsers.parse_player(player_html)
示例#8
0
def list_episodes(url):
    movie_html = utils.fetch_html(url)
    player_link =  parsers.get_player_link(movie_html)
    html = utils.fetch_html(player_link)
    return parsers.parse_episodes(html)
示例#9
0
def list_genre(genre, page):
    url = GENRES[genre]
    url += '?page-%s' % page
    html = utils.fetch_html(url)
    return parsers.parse_movie_page(html)
示例#10
0
def list_series(page, order='view'):
    html = utils.fetch_html(SERIES_URL % (order, page))
    return parsers.parse_movie_page(html)
示例#11
0
def list_genre(genre, page):
    url = GENRES[genre]
    url += "?page-%s" % page
    html = utils.fetch_html(url)
    return parsers.parse_movie_page(html)
示例#12
0
def list_series(page, order="view"):
    html = utils.fetch_html(SERIES_URL % (order, page))
    return parsers.parse_movie_page(html)
示例#13
0
 def get_news_content(self,news_item):
     html=fetch_html(news_item['url'])
     soup=BeautifulSoup(html,'lxml')
     news_item['content']=soup.select_one("#paragraph").text