def get_movie_streams(movie_url): movie_html = utils.fetch_html(movie_url) player_link = parsers.get_player_link(movie_html) player_frame = utils.fetch_html(player_link) player_frame_src = parsers.get_player_iframe_src(player_frame) player_html = utils.fetch_html(player_frame_src) return parsers.parse_player(player_html)
def __prepare_comments(self,url): html=fetch_html(url) soup = BeautifulSoup(html, 'lxml') pid = soup.select_one('#ifcomment')['data'] url = 'https://cmt.ithome.com/comment/' + pid html=fetch_html(url) soup = BeautifulSoup(html, 'lxml') script = soup.select_one('#commentlist script').string hash = script[-18:-2] return hash
def get_down_url(self, item): id = item['id'] url = self.base_url + '/p/d.php?aid=' + id doc = lxml.html.fromstring(fetch_html(url)) ul = doc.cssselect("ul.down")[0] down_urls = ul.xpath("li/a/@href") item['down_url'] = [self.yp_url(x) for x in down_urls]
def get_ppt_list(self, cat="", page=1): if page > 1: page_str = f"/list-{page}.html" else: page_str = "/" url = self.base_url + '/moban/' + cat + page_str html = fetch_html(url) doc = lxml.html.fromstring(html) ul = doc.cssselect("ul.posts")[0] items = [{ 'name': x.text, 'url': self.yp_url(x.get("href")), 'id': x.get("href")[x.get("href").rfind("/") + 1:-5] } for x in ul.xpath("li/a[2]")] return items
def list_movies(page): html = utils.fetch_html(MOVIES_URL % page) return parsers.parse_movie_page(html)
def search(q): search_url = SEARCH_URL % urllib.quote_plus(q) html_result = utils.fetch_html(search_url) return parsers.parse_search(html_result)
def get_movie_stream_from_player(player_url): player_frame = utils.fetch_html(player_url) player_frame_src = parsers.get_player_iframe_src(player_frame) player_html = utils.fetch_html(player_frame_src) return parsers.parse_player(player_html)
def list_episodes(url): movie_html = utils.fetch_html(url) player_link = parsers.get_player_link(movie_html) html = utils.fetch_html(player_link) return parsers.parse_episodes(html)
def list_genre(genre, page): url = GENRES[genre] url += '?page-%s' % page html = utils.fetch_html(url) return parsers.parse_movie_page(html)
def list_series(page, order='view'): html = utils.fetch_html(SERIES_URL % (order, page)) return parsers.parse_movie_page(html)
def list_genre(genre, page): url = GENRES[genre] url += "?page-%s" % page html = utils.fetch_html(url) return parsers.parse_movie_page(html)
def list_series(page, order="view"): html = utils.fetch_html(SERIES_URL % (order, page)) return parsers.parse_movie_page(html)
def get_news_content(self,news_item): html=fetch_html(news_item['url']) soup=BeautifulSoup(html,'lxml') news_item['content']=soup.select_one("#paragraph").text