def list_videos(plugin, item_id, category_url, page, **kwargs): resp = urlquick.get(category_url % page) parser = htmlement.HTMLement() parser.feed(resp.json()) data = parser.close() for video_datas in data.iterfind(".//a"): video_title = video_datas.find('.//img').get('title') video_image = video_datas.find('.//img').get('data-echo') video_url = URL_ROOT_SITE + video_datas.get('href') item = Listitem() item.label = video_title item.art['thumb'] = video_image item.set_callback(get_video_url, item_id=item_id, video_label=LABELS[item_id] + ' - ' + item.label, video_url=video_url) item_post_treatment(item, is_playable=True, is_downloadable=True) yield item # More videos... yield Listitem.next_page(item_id=item_id, category_url=category_url, page=str(int(page) + 1))
def list_shows(plugin, item_id, category_mode, page, **kwargs): """Build categories listing""" list_programs_json = urlquick.get(URL_PROGRAMS % (page, category_mode)) list_programs_jsonparser = json.loads(list_programs_json.text) parser = htmlement.HTMLement() parser.feed(list_programs_jsonparser["html"]) root = parser.close() for program_datas in root.iterfind(".//div[@class='media']"): item = Listitem() item.label = program_datas.find('.//img').get('alt') item.art['thumb'] = URL_ROOT + program_datas.find('.//img').get('src') program_url = URL_ROOT + program_datas.find('.//a').get('href') item.set_callback(list_videos, item_id=item_id, program_url=program_url, nb_videos=0) item_post_treatment(item) yield item # More programs... yield Listitem.next_page(item_id=item_id, category_mode=category_mode, page=page + 1)
def list_videos(plugin, item_id, category_title, page, **kwargs): replay_episodes_json = urlquick.get(URL_VIDEOS % (category_title, page)).text replay_episodes_jsonparser = json.loads(replay_episodes_json) at_least_one = False for replay_episodes_datas in replay_episodes_jsonparser["items_html"]: parser = htmlement.HTMLement() parser.feed(replay_episodes_datas) root = parser.close() for episode in root.iterfind(".//li"): at_least_one = True item = Listitem() item.label = episode.find('.//img').get('alt') video_url = URL_ROOT + episode.find('.//a').get('href') item.art['thumb'] = episode.find('.//img').get('src') item.set_callback( get_video_url, item_id=item_id, video_label=LABELS[item_id] + ' - ' + item.label, video_url=video_url, ) item_post_treatment(item, is_playable=True, is_downloadable=True) yield item if at_least_one: # More videos... yield Listitem.next_page(item_id=item_id, category_title=category_title, page=page + 1) else: plugin.notify(plugin.localize(LABELS['No videos found']), '') yield False
def list_emissions(plugin, item_id, category_url, page, **kwargs): resp = urlquick.get(category_url % page, max_age=-1) parser = htmlement.HTMLement() parser.feed(resp.json()) data = parser.close() for video_datas in data.iterfind(".//a[@class='emission-item-wrapper']"): item = Listitem() item.label = video_datas.find(".//h2[@class='emission-title']").text item.info['plot'] = video_datas.find( ".//span[@class='emission-description']").text video_image = video_datas.find('.//img').get('data-echo') video_url = URL_ROOT_SITE + video_datas.get('href') item.art['thumb'] = item.art['landscape'] = video_image item.set_callback(list_videos_emission, item_id=item_id, video_url=video_url) item_post_treatment(item) yield item # More videos... yield Listitem.next_page(item_id=item_id, category_url=category_url, page=str(int(page) + 1))
def list_videos(plugin, item_id, category_title, page, **kwargs): """Build videos listing""" replay_episodes_json = urlquick.get(URL_VIDEOS % (category_title, page)).text replay_episodes_jsonparser = json.loads(replay_episodes_json) at_least_one = False for replay_episodes_datas in replay_episodes_jsonparser["items_html"]: parser = htmlement.HTMLement() parser.feed(replay_episodes_datas) root = parser.close() for episode in root.iterfind(".//li"): at_least_one = True item = Listitem() item.label = episode.find(".//img").get("alt") video_url = URL_ROOT + episode.find(".//a").get("href") item.art["thumb"] = item.art["landscape"] = episode.find( ".//img").get("src") item.set_callback(get_video_url, item_id=item_id, video_url=video_url) item_post_treatment(item, is_playable=True, is_downloadable=True) yield item if at_least_one: # More videos... yield Listitem.next_page(item_id=item_id, category_title=category_title, page=page + 1) else: plugin.notify(plugin.localize(30718), "") yield False
def list_videos(plugin, item_id, category_url, page): resp = urlquick.get(category_url % page) parser = htmlement.HTMLement() parser.feed(resp.json()) data = parser.close() for video_datas in data.iterfind(".//a"): video_title = video_datas.find('.//img').get('title') video_image = video_datas.find('.//img').get('data-src') video_url = URL_ROOT_SITE + video_datas.get('href') item = Listitem() item.label = video_title item.art['thumb'] = video_image item.context.script(get_video_url, plugin.localize(LABELS['Download']), item_id=item_id, video_url=video_url, video_label=LABELS[item_id] + ' - ' + item.label, download_mode=True) item.set_callback(get_video_url, item_id=item_id, video_url=video_url) yield item # More videos... yield Listitem.next_page(item_id=item_id, category_url=category_url, page=str(int(page) + 1))
def list_programs(plugin, item_id, category_title): """ Build programs listing - Les feux de l'amour - ... """ resp = urlquick.get(URL_SHOWS) start = '%s</span>' % category_title.replace("'", "'") end = '<span class="' sub_category_datas=(resp.text.split(start))[1].split(end)[0] parser = htmlement.HTMLement() parser.feed(sub_category_datas) root = parser.close() for program_datas in root.iterfind(".//a"): if 'emissions' in program_datas.get('href'): program_title = program_datas.text program_url = URL_ROOT + program_datas.get('href') item = Listitem() item.label = program_title item.set_callback( list_videos, item_id=item_id, program_url=program_url, page='1') yield item
def list_videos_sports(plugin, item_id, category_url, start, end, **kwargs): parser = htmlement.HTMLement() resp = urlquick.get(category_url.format(start=start, end=end)) parser.feed(resp.json()) # json unescaped string needed root = parser.close() at_least_one_item = 0 for video_datas in root.iterfind(".//div[@class='polaris-tile__inner']"): video_title = video_datas.find('.//h2').find('.//a').text.strip() video_image = video_datas.find('.//img').get('data-src') video_url = URL_ROOT_SKYSPORTS + video_datas.find( './/h2').find('.//a').get('href') at_least_one_item += 1 item = Listitem() item.label = video_title item.art['thumb'] = item.art['landscape'] = video_image item.set_callback(get_video_url, item_id=item_id, video_url=video_url) item_post_treatment(item, is_playable=True, is_downloadable=True) yield item if at_least_one_item == VIDEO_PER_PAGE: # More videos... yield Listitem.next_page(item_id=item_id, category_url=category_url, start=end, end=end + VIDEO_PER_PAGE) elif at_least_one_item == 0: plugin.notify(plugin.localize(30718), '') yield False
def list_videos(plugin, item_id, page, **kwargs): """Add modes in the listing""" resp = requests.get(URL_REPLAYS % page) parser = htmlement.HTMLement() parser.feed(resp.text) root = parser.close() for video_datas in root.iterfind(".//li"): if 'concert' in video_datas.get('class'): video_title = video_datas.find('.//a').get('title') video_image = URL_ROOT + re.compile(r'url\((.*?)\)').findall( video_datas.find(".//div[@class='imgContainer']").get( 'style'))[0] video_id = re.compile(r'concert\/(.*?)\/').findall( video_datas.find('.//a').get('href'))[0] item = Listitem() item.label = video_title item.art['thumb'] = item.art['landscape'] = video_image item.set_callback(get_video_url, item_id=item_id, video_id=video_id) item_post_treatment(item, is_playable=True, is_downloadable=True) yield item yield Listitem.next_page(item_id=item_id, page=str(int(page) + 1))
def get_live_url(plugin, item_id, video_id, item_dict, **kwargs): # Live TV Not working / find a way to dump html received # Create session # KO - session_urlquick = urlquick.Session() session_requests = requests.session() # Get Token # KO - resp = session_urlquick.get(URL_COMPTE_LOGIN) resp = session_requests.get(URL_COMPTE_LOGIN_MODAL) token_form_login = re.compile( r'name=\"login_form\[_token\]\" value=\"(.*?)\"').findall(resp.text)[0] if plugin.setting.get_string('nrj.login') == '' or\ plugin.setting.get_string('nrj.password') == '': xbmcgui.Dialog().ok( 'Info', plugin.localize(30604) % ('NRJ', 'http://www.nrj-play.fr')) return False # Build PAYLOAD payload = { "login_form[email]": plugin.setting.get_string('nrj.login'), "login_form[password]": plugin.setting.get_string('nrj.password'), "login_form[_token]": token_form_login } headers = { 'accept': 'application/json, text/javascript, */*; q=0.01', 'referer': 'https://www.nrj-play.fr/%s' % item_id } # LOGIN # KO - resp2 = session_urlquick.post( # URL_COMPTE_LOGIN, data=payload, # headers={'User-Agent': web_utils.get_ua, 'referer': URL_COMPTE_LOGIN}) resp2 = session_requests.post(URL_COMPTE_LOGIN, data=payload, headers=headers) if 'error alert alert-danger' in repr(resp2.text): plugin.notify('ERROR', 'NRJ : ' + plugin.localize(30711)) return False # GET page with url_live with the session logged # KO - resp3 = session_urlquick.get( # URL_LIVE_WITH_TOKEN % item_id, # headers={'User-Agent': web_utils.get_ua, 'referer': URL_LIVE_WITH_TOKEN % item_id}) resp3 = session_requests.get(URL_LIVE_WITH_TOKEN % (item_id), headers=dict(referer=URL_LIVE_WITH_TOKEN % (item_id))) parser = htmlement.HTMLement() parser.feed(resp3.text) root = parser.close() live_data = root.find(".//div[@class='player']") url_live_json = live_data.get('data-options') url_live_json_jsonparser = json.loads(url_live_json) return url_live_json_jsonparser["file"]
def list_videos(plugin, item_id, program_url, nb_videos, **kwargs): """Build videos listing""" replay_episodes_html = urlquick.get(program_url).text program_title = re.compile(r'&q=(.*?)&auto').findall( replay_episodes_html)[0] replay_episodes_json = urlquick.get(URL_VIDEOS % (program_title, nb_videos)).text list_episodes_jsonparser = json.loads(replay_episodes_json) parser = htmlement.HTMLement() parser.feed(list_episodes_jsonparser["content"]) root = parser.close() at_least_one_item = False for episode in root.iterfind( ".//div[@class='media zoomarticle afficheNotices']"): at_least_one_item = True item = Listitem() item.label = 'No title' if episode.find(".//div[@class='media-inapremium-slide']") is not None: item.label = '[Ina Premium] ' + episode.find('.//img').get('alt') else: item.label = episode.find('.//img').get('alt') video_id = episode.find('.//a').get('href').split('/')[2] item.art['thumb'] = URL_ROOT + episode.find('.//img').get('src') video_duration_text_datas = episode.find( ".//span[@class='duration']").text.split(' ') video_duration = 0 for video_duration_datas in video_duration_text_datas: if 's' in video_duration_datas: video_duration_datas = video_duration_datas.replace('s', '') video_duration = video_duration + int(video_duration_datas) elif 'm' in video_duration_datas: video_duration_datas = video_duration_datas.replace('m', '') video_duration = video_duration + (int(video_duration_datas) * 60) elif 'h' in video_duration_datas: video_duration_datas = video_duration_datas.replace('h', '') video_duration = video_duration + (int(video_duration_datas) * 3600) item.info['duration'] = video_duration if episode.find(".//span[@class='broadcast']") is not None: video_date = episode.find(".//span[@class='broadcast']").text item.info.date(video_date, '%d/%m/%Y') item.set_callback(get_video_url, item_id=item_id, video_label=LABELS[item_id] + ' - ' + item.label, video_id=video_id) item_post_treatment(item, is_playable=True, is_downloadable=True) yield item if at_least_one_item: # More videos... yield Listitem.next_page(item_id=item_id, program_url=program_url, nb_videos=nb_videos + 48) else: plugin.notify(plugin.localize(LABELS['No videos found']), '') yield False
def test_partial_filter(): # Check that the html = "<html><body><div test='attribute'><p>text</p></div></body></html>" obj = htmlement.HTMLement("div") obj.feed(html[:51]) obj.feed(html[51:]) root = obj.close() assert root.tag == "div" assert root[0].tag == "p"
def list_sub_categories(plugin, item_id, category_url, **kwargs): resp = urlquick.get(category_url) root = resp.parse() for sub_category_datas in root.iterfind( ".//section[@class='js-item-container']"): if sub_category_datas.find('.//h2').text is not None: sub_category_title = sub_category_datas.find('.//h2').text.strip() else: sub_category_title = sub_category_datas.find( './/h2/a').text.strip() sub_category_id = sub_category_datas.get('id') item = Listitem() item.label = sub_category_title item.set_callback(list_videos_sub_category, item_id=item_id, category_url=category_url, sub_category_id=sub_category_id) item_post_treatment(item) yield item list_data_uuid = re.compile(r'data-uuid\=\"(.*?)\"').findall(resp.text) for sub_category_data_uuid in list_data_uuid: resp2 = urlquick.get( URL_SUB_CATEGORIES % (sub_category_data_uuid, sub_category_data_uuid.split('-')[1])) json_parser = json.loads(resp2.text) if sub_category_data_uuid in json_parser["blocks"]: parser = htmlement.HTMLement() parser.feed(json_parser["blocks"][sub_category_data_uuid]) root_2 = parser.close() for sub_category_dl_data in root_2.iterfind( ".//section[@class='js-item-container']"): if sub_category_dl_data.find('.//h2').text is not None: sub_category_dl_title = sub_category_dl_data.find( './/h2').text.strip() else: sub_category_dl_title = sub_category_dl_data.find( './/h2/a').text.strip() sub_category_dl_id = sub_category_dl_data.get('id') item = Listitem() item.label = sub_category_dl_title item.set_callback( list_videos_sub_category_dl, item_id=item_id, sub_category_data_uuid=sub_category_data_uuid, sub_category_id=sub_category_dl_id) item_post_treatment(item) yield item
def test_basic_partial(): # Check that I can parse a simple tree segment at a time html = "<html><body></body></html>" obj = htmlement.HTMLement() obj.feed(html[:9]) obj.feed(html[9:]) root = obj.close() assert Etree.iselement(root) assert root.tag == "html" assert root[0].tag == "body"
def list_videos_search(plugin, item_id, nb_videos, search_query): replay_episodes_json = urlquick.get(URL_VIDEOS_SEARCH % (search_query, nb_videos)).text list_episodes_jsonparser = json.loads(replay_episodes_json) parser = htmlement.HTMLement() parser.feed(list_episodes_jsonparser["content"]) root = parser.close() for episode in root.iterfind(".//div[@class='media zoomarticle']"): item = Listitem() item.label = 'No title' if episode.find( ".//div[@class='media-inapremium-search']") is not None: item.label = '[Ina Premium] ' + episode.find('.//img').get('alt') else: item.label = episode.find('.//img').get('alt') video_id = episode.find('.//a').get('href').split('/')[2] item.art['thumb'] = URL_ROOT + episode.find('.//img').get('src') video_duration_text_datas = episode.find( ".//span[@class='duration']").text.split(' ') video_duration = 0 for video_duration_datas in video_duration_text_datas: if 's' in video_duration_datas: video_duration_datas = video_duration_datas.replace('s', '') video_duration = video_duration + int(video_duration_datas) elif 'm' in video_duration_datas: video_duration_datas = video_duration_datas.replace('m', '') video_duration = video_duration + (int(video_duration_datas) * 60) elif 'h' in video_duration_datas: video_duration_datas = video_duration_datas.replace('h', '') video_duration = video_duration + (int(video_duration_datas) * 3600) item.info['duration'] = video_duration if episode.find(".//span[@class='broadcast']") is not None: video_date = episode.find(".//span[@class='broadcast']").text item.info.date(video_date, '%d/%m/%Y') item.context.script(get_video_url, plugin.localize(LABELS['Download']), item_id=item_id, video_id=video_id, video_label=LABELS[item_id] + ' - ' + item.label, download_mode=True) item.set_callback(get_video_url, item_id=item_id, video_id=video_id) yield item # More videos... yield Listitem.next_page(item_id=item_id, nb_videos=nb_videos + 48, search_query=search_query)
def list_videos_sports(plugin, item_id, category_url, start, end, **kwargs): parser = htmlement.HTMLement() resp = urlquick.get(category_url.format(start=start, end=end)) parser.feed(resp.json()) # json unescaped string needed root = parser.close() at_least_one_item = 0 for video_datas in root.iterfind(".//div[@class='polaris-tile__inner']"): video_title = video_datas.find('.//h2').find('.//a').text.strip() video_image = video_datas.find('.//img').get('data-src')
def list_videos_sub_category_dl(plugin, item_id, sub_category_data_uuid, sub_category_id): resp = urlquick.get( URL_SUB_CATEGORIES % (sub_category_data_uuid, sub_category_data_uuid.split('-')[1])) json_parser = json.loads(resp.text) parser = htmlement.HTMLement() parser.feed(json_parser["blocks"][sub_category_data_uuid]) root = parser.close() for sub_category_dl_datas in root.iterfind( ".//section[@class='js-item-container']"): if sub_category_dl_datas.get('id') == sub_category_id: list_videos_datas = sub_category_dl_datas.findall('.//article') for video_datas in list_videos_datas: if video_datas.get('data-type') == 'media': if video_datas.find('.//h4') is not None: video_title = video_datas.find('.//h3').find( './/a').get('title') + ' - ' + \ video_datas.find('.//h4').text else: video_title = video_datas.find('.//h3').find( './/a').get('title') video_image = '' image_datas = video_datas.find('.//img').get( 'data-srcset').split(',') for image_data in image_datas: video_image = image_data.split(' ')[0] video_id = video_datas.get('data-id') item = Listitem() item.label = video_title item.art['thumb'] = video_image item.context.script(get_video_url, plugin.localize(LABELS['Download']), item_id=item_id, video_id=video_id, video_label=LABELS[item_id] + ' - ' + item.label, download_mode=True) item.set_callback(get_video_url, item_id=item_id, video_id=video_id) yield item
def list_videos_sub_category_dl(plugin, item_id, sub_category_data_uuid, sub_category_id, **kwargs): resp = urlquick.get( URL_SUB_CATEGORIES % (sub_category_data_uuid, sub_category_data_uuid.split('-')[1])) json_parser = json.loads(resp.text) parser = htmlement.HTMLement() parser.feed(json_parser["blocks"][sub_category_data_uuid]) root = parser.close() for sub_category_dl_datas in root.iterfind( ".//section[@class='js-item-container']"): if sub_category_dl_datas.get('id') != sub_category_id: continue list_videos_datas = sub_category_dl_datas.findall('.//article') for video_datas in list_videos_datas: if video_datas.get('data-card') is None: continue data_card = video_datas.get('data-card') if not data_card: continue json_parser = json.loads(data_card) if not json_parser["isVideo"]: continue if "mediaId" not in json_parser: continue video_title = json_parser["title"] + ' - ' + json_parser["subtitle"] video_image = json_parser["illustration"]["format1248"] video_id = json_parser["mediaId"] item = Listitem() item.label = video_title item.art['thumb'] = item.art['landscape'] = video_image item.set_callback(get_video_url2, item_id=item_id, video_id=video_id) item_post_treatment(item, is_playable=True, is_downloadable=True) yield item
def get_live_url(plugin, item_id, video_id, **kwargs): # Live TV Not working / find a way to dump html received # Create session # KO - session_urlquick = urlquick.Session() session_requests = requests.session() # Build PAYLOAD payload = { "email": plugin.setting.get_string('nrj.login'), "password": plugin.setting.get_string('nrj.password') } headers = { 'accept': 'application/json, text/javascript, */*; q=0.01', 'origin': 'https://www.nrj-play.fr', 'referer': 'https://www.nrj-play.fr/' } # LOGIN # KO - resp2 = session_urlquick.post( # URL_COMPTE_LOGIN, data=payload, # headers={'User-Agent': web_utils.get_ua, 'referer': URL_COMPTE_LOGIN}) resp2 = session_requests.post(URL_COMPTE_LOGIN, data=payload, headers=headers) if 'error alert alert-danger' in repr(resp2.text): plugin.notify('ERROR', 'NRJ : ' + plugin.localize(30711)) return False # GET page with url_live with the session logged # KO - resp3 = session_urlquick.get( # URL_LIVE_WITH_TOKEN % item_id, # headers={'User-Agent': web_utils.get_ua, 'referer': URL_LIVE_WITH_TOKEN % item_id}) resp3 = session_requests.get(URL_LIVE_WITH_TOKEN % (item_id), headers=dict(referer=URL_LIVE_WITH_TOKEN % (item_id))) parser = htmlement.HTMLement() parser.feed(resp3.text) root = parser.close() live_data = root.find(".//div[@class='player']") url_live_json = live_data.get('data-options') url_live_json_jsonparser = json.loads(url_live_json) return url_live_json_jsonparser["file"]
def quick_parsehtml(html, encoding=""): obj = htmlement.HTMLement(encoding=encoding) obj.feed(html) root = obj.close() assert Etree.iselement(root) return root
def quick_parse_filter(html, tag, attrs=None, encoding=""): obj = htmlement.HTMLement(tag, attrs, encoding=encoding) obj.feed(html) return obj.close()
def test_initialization(): # Check that the parser even starts obj = htmlement.HTMLement() assert isinstance(obj, htmlement.HTMLement)