def test_clean_html_body(self): from yarss2 import load_libs load_libs() web_page = """<html> <head> <title> Page title </title> </head> <body> <p id="firstpara" align="center"> This is paragraph <b> one </b> </p> <p id="secondpara" align="blah"> This is paragraph <b> two </b> </p> </body> </html>""" http.clean_html_body(web_page)
def test_clean_html_body(self): from yarss2 import load_libs load_libs() web_page = """<html> <head> <title> Page title </title> </head> <body> <p id="firstpara" align="center"> This is paragraph <b> one </b> </p> <p id="secondpara" align="blah"> This is paragraph <b> two </b> </p> </body> </html>""" cleaned = http.clean_html_body(web_page)
def add_torrent_callback(torrent_download): torrent_download = TorrentDownload(torrent_download) if torrent_download.success: return textview = self.glade.get_widget("textview_messages") textbuffer = textview.get_buffer() readable_body = http.clean_html_body(torrent_download.filedump) textbuffer.set_text(readable_body) notebook = self.glade.get_widget("notebook_lower") notebook.set_current_page(1) # Quick hack to make sure the message is visible to the user. hpaned = self.glade.get_widget("hpaned_matching") #if hpaned.get_position() == 0: max_pos = hpaned.get_property("max-position") hpaned.set_position(int(max_pos * 0.3))
def add_torrent_callback(torrent_download): torrent_download = TorrentDownload(torrent_download) if torrent_download.success: return True if torrent_download.filedump is None: return readable_body = http.clean_html_body(torrent_download.filedump) textbuffer = self.get_object("textview_messages").get_buffer() textbuffer.set_text(readable_body) self.get_object("notebook_lower").set_current_page(1) # Quick hack to make sure the message is visible to the user. hpaned = self.get_object("hpaned_matching") max_pos = hpaned.get_property("max-position") hpaned.set_position(int(max_pos * 0.3)) return False
def fetch_and_parse_rssfeed_atom(url_file_stream_or_string, site_cookies_dict=None, user_agent=None, request_headers=None, timeout=10): result = http.download_file(url_file_stream_or_string, site_cookies_dict=site_cookies_dict, user_agent=user_agent, request_headers=request_headers, timeout=timeout) import atoma atoma.rss.supported_rss_versions = [] parsed_feeds = {} try: atoma_result = atoma.parse_rss_bytes(result['content']) parsed_feeds = atoma_result_to_dict(atoma_result) except atoma.FeedXMLError as err: readable_body = http.clean_html_body(result['content']) parsed_feeds["raw_result"] = readable_body parsed_feeds["bozo"] = 1 parsed_feeds["feed"] = {} parsed_feeds["items"] = [] parsed_feeds["bozo_exception"] = err parsed_feeds['parser'] = "atoma" return parsed_feeds