def test_server_start_run(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") path = os.path.abspath(os.path.split(__file__)[0]) data = os.path.join(path, "data") server = 'localhost' thread = run_doc_server(server, {"pyquickhelper": data}, True, port=8094) url = "http://localhost:8094/pyquickhelper/" cont = get_url_content(url) self.assertNotEmpty(cont) self.assertIn("GitHub/pyquickhelper</a>", cont) fLOG("-------") url = "http://localhost:8094/pyquickhelper/search.html?q=flog&check_keywords=yes&area=default" cont = get_url_content(url) self.assertNotEmpty(cont) self.assertIn("Please activate JavaScript to enable the search", cont) self.assertIn("http://sphinx.pocoo.org/", cont) cont = get_url_content(url, True) self.assertNotEmpty(cont) self.assertIn("Please activate JavaScript to enable the search", cont) self.assertIn("http://sphinx.pocoo.org/", cont) thread.shutdown() if thread.is_alive(): fLOG("thread is still alive?", thread.is_alive()) assert False
def get_elysee_speech_from_elysees( title, url="http://www.elysee.fr/chronologie/article/"): """ retrieve the text from Elysées @param title title of the document @param url weebiste @return html page The function tries something like:: url + title.replace(" ","-") """ if title.startswith("http"): full = title else: if not url.endswith("/"): raise Exception("url should end with /: " + url) link = remove_accent(title.lower()).replace( " ", "-").replace("'", "-").replace('"', "") full = url + "/" + link + "/" try: text = get_url_content(full) except Exception as e: return None raise Exception( "unable to fetch content from: " + title + "\n" + full) from e return xmlParsingLongestDiv(text)
def get_elysee_speech_from_elysees( title, url="http://www.elysee.fr/chronologie/article/"): """ retrieve the text from Elysées @param title title of the document @param url weebiste @return html page The function tries something like:: url + title.replace(" ","-") """ if title.startswith("http"): full = title else: if not url.endswith("/"): raise Exception("url should end with /: " + url) link = remove_accent(title.lower()).replace(" ", "-").replace( "'", "-").replace('"', "") full = url + "/" + link + "/" try: text = get_url_content(full) except Exception as e: return None raise Exception("unable to fetch content from: " + title + "\n" + full) from e return xmlParsingLongestDiv(text)
def get_elysee_speech_from_elysees(title, url="https://www.elysee.fr/"): """ Retrieves the text from the :epkg:`Elysees`. @param title title of the document @param url website @return html page The function tries something like:: url + title.replace(" ","-") """ if title.startswith("http"): full = title else: if not url.endswith("/"): raise Exception("url should end with /: " + url) link = remove_accent(title.lower()).replace(" ", "-").replace( "'", "-").replace('"', "") full = url + "/" + link + "/" try: text = get_url_content(full) except Exception as e: warnings.warn("Unable to retrieve '{0}' - {1}".format(full, e)) return None return xmlParsingLongestDiv(text)
def get_elysee_speech_from_elysees(title, url="https://www.elysee.fr/"): """ Retrieves the text from the :epkg:`Elysees`. @param title title of the document @param url website @return html page The function tries something like:: url + title.replace(" ","-") """ if title.startswith("http"): full = title else: if not url.endswith("/"): raise Exception("url should end with /: " + url) link = remove_accent(title.lower()).replace( " ", "-").replace("'", "-").replace('"', "") full = url + "/" + link + "/" try: text = get_url_content(full) except Exception as e: warnings.warn("Unable to retrieve '{0}' - {1}".format(full, e)) return None return xmlParsingLongestDiv(text)
def test_flask_thread(self): """ On Linux, this test fails unless the firewall is told to allow port 8025: :: sudo ufw allow 5000 sudo ufw enable """ app = create_application() th = FlaskInThread(app, host="localhost", port=8025) th.start() site = "http://localhost:8025/" # main page c = get_url_content(site) self.assertIn("Simple Flask Site", c) # exception c = get_url_content(site + "help/exception") self.assertIn("STACK:", c) # help for c = get_url_content(site + "help/ask/for/help") fLOG(c) self.assertIn("help for command: ask/for/help", c) # shutdown c = requests.post(site + "shutdown/") fLOG(c.text) self.assertIn("Server shutting down...", c.text) nb = 0 while th.is_alive() and nb < 5: fLOG("waiting...", nb) time.sleep(1) nb += 1 if th.is_alive(): fLOG("thread is still alive (1)?", th.is_alive()) assert False
def test_flask(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") if "travis" in sys.executable: # skip travis and Flask return th = FlaskInThread(app, host="localhost", port=8025) th.start() site = "http://localhost:8025/" # main page c = get_url_content(site) assert "Simple Flask Site" # exception c = get_url_content(site + "help/exception") assert "STACK:" in c # help for c = get_url_content(site + "help/ask/for/help") fLOG(c) assert "help for command: ask/for/help" in c # shutdown c = requests.post(site + "shutdown/") fLOG(c.text) assert "Server shutting down..." in c.text nb = 0 while th.is_alive() and nb < 5: fLOG("waiting...", nb) time.sleep(1) nb += 1 if th.is_alive(): fLOG("thread is still alive (1)?", th.is_alive()) assert False
def test_flask(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") if "travis" in sys.executable: # skip travis and Flask return th = FlaskInThread(app, host="localhost", port=8025) th.start() site = "http://localhost:8025/" # main page c = get_url_content(site) assert "Simple Flask Site" # exception c = get_url_content(site + "help/exception") assert "STACK:" in c # help for c = get_url_content(site + "help/ask/for/help") fLOG(c) assert "help for command: ask/for/help" in c # shutdown c = requests.post(site + "shutdown/") fLOG(c.text) assert "Server shutting down..." in c.text nb = 0 while th.is_alive() and nb < 5: fLOG("waiting...", nb) time.sleep(1) nb += 1 if th.is_alive(): fLOG("thread is still alive (1)?", th.is_alive()) assert False
def test_flask(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") if is_travis_or_appveyor() in ('travis', 'circleci'): # Get an error: urllib.error.URLError: <urlopen error [Errno 99] Cannot assign requested address>. return th = FlaskInThread(app, host="localhost", port=8025) th.start() site = "http://localhost:8025/" # main page c = get_url_content(site) self.assertIn("Simple Flask Site", c) # exception c = get_url_content(site + "help/exception") self.assertIn("STACK:", c) # help for c = get_url_content(site + "help/ask/for/help") fLOG(c) self.assertIn("help for command: ask/for/help", c) # shutdown c = requests.post(site + "shutdown/") fLOG(c.text) self.assertIn("Server shutting down...", c.text) nb = 0 while th.is_alive() and nb < 5: fLOG("waiting...", nb) time.sleep(1) nb += 1 if th.is_alive(): fLOG("thread is still alive (1)?", th.is_alive()) assert False
def enumerate_speeches_from_elysees(skip=0, use_json=False): """ enumerates speeches Elysees Speeches @param skip skip the first one in the list @param use_json or json format or xml (json format is incomplete) @return enumerate dictionaries .. exref:: :title: Récupérer des discours du président de la république :tag: Exercice :: for i,disc in enumerate(enumerate_speeches_from_elysees()): print(disc) """ if use_json: url = "http://www.elysee.fr/chronologie/download/json" js = retrieve_speeches_json(url) for i, event in enumerate(js): if i < skip: continue items = event.get("items", None) title = event.get("title", None) if items is not None and title is not None and len(title) > 0: load = False for it in items: if it is None: continue if not isinstance(it, dict): continue tit = it.get("title", "") if tit is not None and "title" in it and "discours" in tit: load = True break if load: content = get_elysee_speech_from_elysees(title) if content is not None: yield dict(text=content, title=title, date=event.get("date", None), description=event.get("description", None)) else: url = "http://www.elysee.fr/chronologie/download/xml" xml = get_url_content(url) reg = re.compile("(http://.*?/article/.*?/)") links = reg.findall(xml) for i, link in enumerate(links): content = get_elysee_speech_from_elysees(link) if content is not None: yield dict(link=link, text=content)
def retrieve_speeches_json( url="http://www.elysee.fr/chronologie/download/json"): """ retrieve the speeches from the Elysées @param url url @return list of documents """ text = get_url_content(url) stream = io.StringIO(text) js = json.load(stream) return js
def test_server_start_run(self): if sys.version_info[0] == 2: return if is_travis_or_appveyor() == "appveyor": return fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") path = os.path.abspath(os.path.split(__file__)[0]) data = os.path.join(path, "data") server = 'localhost' thread = run_doc_server( server, { "pyquickhelper": data}, True, port=8094) url = "http://localhost:8094/pyquickhelper/" cont = get_url_content(url) assert len(cont) > 0 assert "GitHub/pyquickhelper</a>" in cont fLOG("-------") url = "http://localhost:8094/pyquickhelper/search.html?q=flog&check_keywords=yes&area=default" cont = get_url_content(url) assert len(cont) > 0 assert "Please activate JavaScript to enable the search" in cont assert "http://sphinx.pocoo.org/" in cont cont = get_url_content(url, True) assert len(cont) > 0 assert "Please activate JavaScript to enable the search" in cont assert "http://sphinx.pocoo.org/" in cont thread.shutdown() if thread.is_alive(): fLOG("thread is still alive?", thread.is_alive()) assert False
def enumerate_speeches_from_elysees(url="agenda", skip=0): """ Enumerates speeches from the :epkg:`Elysees`. @param url subaddress, url source will be ``'https://www.elysee.fr/' + url`` @param skip skip the first *skip* one in the list @return enumerate dictionaries .. exref:: :title: Récupérer des discours du président de la république :tag: Exercice :: for i, disc in enumerate(enumerate_speeches_from_elysees()): print(disc) Others links can be used such as ``https://www.elysee.fr/recherche?query=discours``. The website changed in 2018 and no longer support xml or json streams. """ base = "https://www.elysee.fr/" if not url.startswith("http"): url = base + url xml = get_url_content(url) reg = re.compile( "href=\\\"(.+?/[0-9]{4}/[0-9]{2}/[0-9]{2}/.+?)\\\" class=") links = reg.findall(xml) for i, link in enumerate(links): if i < skip: continue if link.startswith("/"): link = base + link content = get_elysee_speech_from_elysees(link) if content is not None: yield dict(link=link, text=content) if len(links) == 0: raise ValueError( "Unable to extract links from url='{0}'\npattern='{1}'\n-----\n{2}" .format(url, reg, xml))
def enumerate_speeches_from_elysees(url="agenda", skip=0): """ Enumerates speeches from the :epkg:`Elysees`. @param url subaddress, url source will be ``'https://www.elysee.fr/' + url`` @param skip skip the first *skip* one in the list @return enumerate dictionaries .. exref:: :title: Récupérer des discours du président de la république :tag: Exercice :: for i, disc in enumerate(enumerate_speeches_from_elysees()): print(disc) Others links can be used such as ``https://www.elysee.fr/recherche?query=discours``. The website changed in 2018 and no longer support xml or json streams. """ base = "https://www.elysee.fr/" if not url.startswith("http"): url = base + url xml = get_url_content(url) reg = re.compile( "href=\\\"(.+?/[0-9]{4}/[0-9]{2}/[0-9]{2}/.+?)\\\" class=") links = reg.findall(xml) for i, link in enumerate(links): if i < skip: continue if link.startswith("/"): link = base + link content = get_elysee_speech_from_elysees(link) if content is not None: yield dict(link=link, text=content) if len(links) == 0: raise ValueError("Unable to extract links from url='{0}'\npattern='{1}'\n-----\n{2}".format( url, reg, xml))
def serve_content(self, cpath, method="GET"): """ Tells what to do based on the path. The function intercepts the path /localfile/, otherwise it calls ``serve_content_web``. If you type ``http://localhost:8080/root/file``, assuming ``root`` is mapped to a local folder. It will display this file. @param cpath ParseResult @param method GET or POST """ if cpath.path == "" or cpath.path == "/": params = parse_qs(cpath.query) self.serve_main_page() else: params = parse_qs(cpath.query) params["__path__"] = cpath # fullurl = cpath.geturl() fullfile = cpath.path params["__url__"] = cpath spl = fullfile.strip("/").split("/") project = spl[0] link = "/".join(spl[1:]) value = DocumentationHandler.mappings.get(project, None) if value is None: self.LOG("can't serve", cpath) self.LOG("with params", params) self.send_response(404) #raise KeyError("unable to find a mapping associated to: " + project + "\nURL:\n" + url + "\nPARAMS:\n" + str(params)) elif value == "shut://": self.LOG("call shutdown") self.shutdown() elif value == "http://": self.send_response(200) self.send_headers("debug.html") url = cpath.path.replace("/%s/" % project, "") try: content = get_url_content(url) except Exception as e: # pragma: no cover content = "<html><body>ERROR (2): %s</body></html>" % e self.feed(content, False, params={}) else: if ".." in link: # we avoid that case to prevent users from digging others paths # than the mapped ones, just in that the browser does not # remove them self.send_error(404) self.feed("Requested resource %s unavailable" % link) else: # we do not expect the documentation to point to the root # it must be relative paths localpath = link.lstrip("/") if localpath in [None, "/", ""]: localpath = "index.html" fullpath = os.path.join(value, localpath) self.LOG("localpath ", fullpath, os.path.isfile(fullpath)) self.send_response(200) _, ftype = self.get_ftype(localpath) execute = eval(params.get("execute", ["True"])[0]) spath = params.get("path", [None])[0] # keep = eval(params.get("keep", ["False"])[0]) if ftype != 'execute' or not execute: content = self.get_file_content(fullpath, ftype, spath) if content is None: self.LOG("** w,unable to get file for key:", spath) self.send_error(404) self.feed("Requested resource %s unavailable" % localpath) else: ext = os.path.splitext(localpath)[-1].lower() if ext in [ ".py", ".c", ".cpp", ".hpp", ".h", ".r", ".sql", ".java" ]: self.send_headers(".html") self.feed( DocumentationHandler.html_code_renderer( localpath, content)) elif ext in [".html"]: content = DocumentationHandler.process_html_path( project, content) self.send_headers(localpath) self.feed(content) else: self.send_headers(localpath) self.feed(content) else: self.LOG("execute file ", localpath) out, err = DocumentationHandler.execute(localpath) if len(err) > 0: self.send_error(404) self.feed("Requested resource %s unavailable" % localpath) else: self.send_headers(localpath) self.feed(out)