def test_install_azure(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") fold = os.path.abspath(os.path.split(__file__)[0]) temp = os.path.join(fold, "temp_download_azure") if not os.path.exists(temp): os.mkdir(temp) for _ in os.listdir(temp): if os.path.isfile(os.path.join(temp, _)): os.remove(os.path.join(temp, _)) r1 = compare_version("2.0.0rc5", "1.0.3") r2 = compare_version("1.0.3", "2.0.0rc5") assert r1 * r2 < 0 assert r1 > 0 if sys.platform.startswith("win") and sys.version_info[0] >= 3: m = find_module_install("azure") if m.pip_options is None: raise Exception("no pip_options, issue '{0}'".format(m)) m.fLOG = fLOG name = m.download(temp_folder=temp) v = get_wheel_version(name) r = compare_version(v, "1.9.9") if r <= 0: raise Exception( "unexception version for '{0}',\nshould be >= 1.9.9 not '{1}'".format(name, v)) fLOG(m.version, v, name) assert os.path.exists(name) assert "azure" in name
def test_import_sql(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") file = os.path.join( os.path.abspath( os.path.split(__file__)[0]), "data", "ACA.PA.txt") dbf = os.path.join( os.path.abspath( os.path.split(__file__)[0]), "temp_database_inti.db3") if os.path.exists(dbf): os.remove(dbf) assert not os.path.exists(dbf) face = InterfaceSQL.create(dbf) face.connect() face.import_flat_file(file, "ACAPA2") assert face.CC.ACAPA2._ == "ACAPA2" face.close()
def test_dependencies_ggplot_pip(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") if "travis" not in sys.executable: self.common_function("ggplot", use_pip=True)
def test_save_stock_google(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") cache = os.path.abspath(os.path.split(__file__)[0]) cache = os.path.join(cache, "temp_cache_file_google") name = os.path.join(cache, "NASDAQ_GOOG.2000-01-03.2014-01-15.txt") if os.path.exists(name): os.remove(name) stock = StockPrices( "NASDAQ:GOOG", url="google", folder=cache, end=datetime.datetime( 2014, 1, 15)) file = os.path.join(cache, "save.txt") if os.path.exists(file): os.remove(file) stock.to_csv(file) assert os.path.exists(file) stock2 = StockPrices(file, sep="\t") assert stock.dataframe.shape == stock2.dataframe.shape df = stock2.dataframe file = os.path.join(cache, "out_excel.xlsx") if os.path.exists(file): os.remove(file) df.to_excel(file) assert os.path.exists(file)
def test_notebook_js(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") path = os.path.abspath(os.path.split(__file__)[0]) fold = os.path.normpath(os.path.join(path, "notebooks_js")) nbs = [os.path.join(fold, _) for _ in os.listdir(fold) if ".ipynb" in _] formats = ["slides", "present", "ipynb", "html", "python", "rst", "pdf"] if sys.platform.startswith("win"): formats.append("docx") temp = get_temp_folder(__file__, "temp_nb_bug_js") res = process_notebooks(nbs, temp, temp, formats=formats) fLOG("*****", len(res)) for _ in res: if not os.path.exists(_[0]): raise Exception(_[0]) check = os.path.join(temp, "using_qgrid_with_jsdf.tex") with open(check, "r", encoding="utf8") as f: content = f.read() if "\\section{" not in content: raise Exception(content) checks = [os.path.join(temp, "reveal.js"), os.path.join(temp, "require.js")] for check in checks: if not os.path.exists(check): raise Exception(check)
def test_install(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") fold = os.path.abspath(os.path.split(__file__)[0]) temp = os.path.join(fold, "temp_download") if not os.path.exists(temp): os.mkdir(temp) for _ in os.listdir(temp): if os.path.isfile(os.path.join(temp, _)): os.remove(os.path.join(temp, _)) if os.path.exists(os.path.join(temp, "jsdifflib-master")): for _ in os.listdir(os.path.join(temp, "jsdifflib-master")): os.remove( os.path.join( os.path.join( temp, "jsdifflib-master"), _)) m = ModuleInstall("jsdifflib", "github", gitrepo="cemerick", fLOG=fLOG) files = m.download(temp_folder=temp, unzipFile=True, source="2") assert len(files) > 0 for _ in files: assert os.path.exists(_)
def nb_open(filename, profile='default', open_browser=True, fLOG=fLOG): """ open a notebook with an existing server, if no server can be found, it starts a new one (and the function runs until the server is closed) @param filename notebook @param profile profile to use @param open_browser open browser or not @param fLOG logging function @return a running server or None if not found """ filename = os.path.abspath(filename) server_inf = find_best_server(filename, profile) if server_inf is not None: from notebook.utils import url_path_join fLOG("Using existing server at", server_inf['notebook_dir']) path = os.path.relpath(filename, start=server_inf['notebook_dir']) url = url_path_join(server_inf['url'], 'notebooks', path) webbrowser.open(url, new=2) return server_inf else: fLOG("Starting new server") home_dir = os.path.dirname(filename) from notebook import notebookapp server = notebookapp.launch_new_instance(file_to_run=os.path.abspath(filename), notebook_dir=home_dir, open_browser=open_browser, # Avoid it seeing our own argv argv=[], ) return server
def _pipy_version(self, mods, nbmax=15): error = [] annoying = [] for mod in mods: try: v = mod.get_pypi_version() fLOG(mod.name, " --> ", v) if v is None: error.append((mod.name, "None", None)) except MissingPackageOnPyPiException as e: error.append((mod.name, "pipy", e)) except MissingVersionOnPyPiException as ee: error.append((mod.name, "version", ee)) except AnnoyingPackageException as eee: annoying.append((mod.name, "?", eee)) if len(error) > nbmax: # we accept some errors # joblib seems to give errors from time to time # multipledispatch # ipython --> jupyter (transitionning) raise MissingPackageOnPyPiException("Two many errors\n" + "\n".join("{0}:{1}\n {2}".format(a, b, c) for a, b, c in sorted(error))) if len(annoying) > 0: fLOG("Annoying\n", "\n".join(str(_) for _ in annoying)) warnings.warn("ANNOYING PACKAGES\n" + "\n".join(annoying))
def test_ensae_2016_answers(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") if is_travis_or_appveyor(): # no stored password return password = keyring.get_password( "cpt", os.environ["COMPUTERNAME"] + "pwd") r = data_cpt_ENSAE_2016_11_blind_set(password) self.assertEqual(len(r), 7500) truth = r r = data_cpt_ENSAE_2016_11_blind_set("dummy") self.assertEqual(len(r), 7500) s = sum(r) assert 0 <= s <= len(r) auc = AUC(truth, r) fLOG(auc) assert 0 <= auc <= 1 temp = get_temp_folder(__file__, "temp_cpt_ensae_2016_answers") out = os.path.join(temp, "answer.txt") with open(out, "w") as f: f.write("\n".join(str(_) for _ in r)) zip_files(os.path.join("submission.zip"), [out])
def test_enumerate_feedback(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") exp = [ "<p>ok</p>", ] temp = get_temp_folder(__file__, "temp_enumerate_feedback") data = os.path.abspath(os.path.dirname(__file__)) data = os.path.join(data, "data") xls = os.path.join(data, "groupes_eleves_pitch.xlsx") df = pandas.read_excel(xls, sheetname=0, index=False) mails = list(enumerate_feedback(df, exc=False, fLOG=fLOG, begin="BEGIN", end="END", subject="SUBJECT", col_name="Nom", cols=["Pitch", "Code"])) for i, m in enumerate(mails): fLOG("------------", i) name = os.path.join(temp, "m%d.html" % i) with open(name, "w", encoding="utf-8") as f: f.write(m[1]) if i < len(exp): if exp[i] not in m[1]: raise Exception("EXP\n{0}\nRES\n{1}".format(exp[i], m[1]))
def test_profiling(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") temp = get_temp_folder(__file__, "temp_profiling") data = os.path.join(temp, "..", "data", "sample1000.txt") with open(data, "r", encoding="utf-8") as f: lines = [_.strip(" \n\r\t") for _ in f.readlines()] def profile_exe(): res = self.gain_dynamique_moyen_par_mot(lines, [1.0] * len(lines)) return res def prof(n, show): pr = cProfile.Profile() pr.enable() profile_exe() pr.disable() s = io.StringIO() ps = pstats.Stats(pr, stream=s).sort_stats('cumulative') ps.print_stats() rem = os.path.normpath(os.path.join(temp, "..", "..", "..")) res = s.getvalue().replace(rem, "") if show: fLOG(res) with open(os.path.join(temp, "profiling%d.txt" % n), "w") as f: f.write(res) prof(1, show=False) prof(2, show=True)
def test_fonction(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") self.assertEqual(recherche([2, 3, 45], 3), 1) self.assertEqual(recherche([2, 3, 45], 4), -1) self.assertEqual(minindex([2, 3, 45, -1, 5]), (-1, 3)) li = range(0, 100, 2) self.assertEqual(recherche_dichotomique(li, 48), 24) self.assertEqual(recherche_dichotomique(li, 49), -1) s = "case11;case12;case13|case21;case22;case23" mat = text2mat(s, "|", ";") t = mat2text(mat, "|", ";") self.assertEqual(t, s) tab = ["zero", "un", "deux"] r = triindex(tab) self.assertEqual(r, [('deux', 2), ('un', 1), ('zero', 0)]) li = ["un", "deux", "un", "trois"] r = compte(li) self.assertEqual(r, {'trois': 1, 'deux': 1, 'un': 2}) mat = [[0, 1, 2], [3, 4, 5]] r = mat2vect(mat) self.assertEqual(r, [0, 1, 2, 3, 4, 5]) m = vect2mat(r, 3) self.assertEqual(m, mat) x2 = integrale(lambda x: x, 0, 2, 1000) self.assertEqual(x2, 2)
def test_notebook_ml_text_features(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") self.a_test_notebook_runner("ml_text_features", "td2a")
def test_matrix_array2(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") values = [[random.random(), random.random()] for i in range(0, 10)] tbl = TableFormula(["x", "y"], values) cen = tbl.center_reduce() assert cen.size == tbl.size assert cen[0, 0] != tbl[0, 0]
def test_union_columns(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") values = [random.random() for i in range(0, 100)] values = [[x, x + random.random() / 2] for x in values] tbl = TableFormula(["x", "y"], values) union = tbl.union_columns(["x", "y"]) assert union.size == (200, 1)
def test_split_files(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") fold = os.path.split(__file__)[0] file = os.path.join(fold, "data", "BNP.PA.txt") tempf = os.path.join(fold, "temp_split") assert os.path.exists(file) if not os.path.exists(tempf): os.mkdir(tempf) f_ = os.path.join(tempf, "temp_split") f1 = f_ + ".0000.txt" f2 = f_ + ".0001.txt" for f in [f1, f2]: if os.path.exists(f): os.remove(f) split = TableFormula.random_split_file(file, f_, 2, logFunction=fLOG) assert split for f in [f1, f2]: fLOG(f) assert os.path.exists(f) with open(file, "r") as f: lines = f.readlines() with open(f1, "r") as f: lines1 = f.readlines() with open(f2, "r") as f: lines2 = f.readlines() assert len(lines) == len(lines1) + len(lines2) - 1
def test_code_style_src(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") if sys.version_info[0] == 2: warnings.warn( "skipping test_code_style because of Python 2 or " + sys.executable) return thi = os.path.abspath(os.path.dirname(__file__)) src_ = os.path.normpath(os.path.join(thi, "..", "..", "src")) check_pep8(src_, fLOG=fLOG, extended=[("fLOG", _extended_refactoring)], pylint_ignore=('C0103', 'C1801', 'R0201', 'R1705', 'W0108', 'W0613', 'W0231', 'W0212', 'C0111', 'W0107'), skip=["Redefining built-in 'iter'", "iter_rows.py:340", "translation_class.py", "translation_to_python.py:118", "translation_to_python.py:185", "translation_to_python.py:244", "node_visitor_translator.py:74: E1111", "R1720", ] )
def test_interactive2_RadioWidget(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") fix_tkinter_issues_virtualenv(fLOG=fLOG) import matplotlib.pyplot as plt def plot(amplitude, color, sele): fig, ax = plt.subplots(figsize=(4, 3), subplot_kw={'axisbelow': True}) ax.grid(color='w', linewidth=2, linestyle='solid') x = np.linspace(0, 10, 1000) ax.plot(x, amplitude * np.sin(x), color=color, lw=5, alpha=0.4) ax.set_xlim(0, 10) ax.set_ylim(-1.1, 1.1) return fig res = StaticInteract(plot, amplitude=RangeWidget(0.1, 0.3, 0.1, default=0.2), color=RadioWidget( ['blue', 'green'], default='blue'), sele=DropDownWidget(['a', 'b'])) self.assertNotEmpty(res) ht = res.html() self.assertNotEmpty(ht) plt.close('all') fLOG("end")
def test_example_pydy(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") temp = get_temp_folder(__file__, "temp_example_pydy") fix_tkinter_issues_virtualenv() import matplotlib.pyplot as plt fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(8, 8)) try: example_pydy(ax=ax) except Exception as e: if 'can only concatenate list (not "tuple") to list' in str(e): warnings.warn("Pydy needs to be updated for Python 3.7") return else: raise e self.assertNotEmpty(ax) img = os.path.join(temp, "img.png") fig.savefig(img) self.assertExists(img) if __name__ == "__main__": fig.show() plt.close('all') fLOG("end")
def test_image_video_epidemic(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") temp = get_temp_folder(__file__, "temp_image_video_epidemic") if is_travis_or_appveyor() in ("travis",): # pygame.error: No available video device return import pygame if is_travis_or_appveyor() == "circleci": # os.environ["SDL_VIDEODRIVER"] = "x11" flags = pygame.NOFRAME else: flags = 0 pygame_simulation(pygame, fLOG=fLOG, iter=10, folder=temp, flags=flags) files = os.listdir(temp) self.assertTrue(len(files) > 9) png = [os.path.join(temp, _) for _ in files if os.path.splitext(_)[-1] == ".png"] self.assertTrue(len(png) > 0) out = os.path.join(temp, "epidemic.avi") v = make_video(png, out, size=(300, 300), format="XVID") self.assertTrue(v is not None)
def test_jconvert_sequence_into_batch_file_split2(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") self.zz_st_jconvert_sequence_into_batch_file_split2("win")
def test_pandas_groupbynan_tuple(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") data = [dict(a="a", b="b", c="c", n=1), dict( b="b", n=2), dict(a="a", n=3), dict(c="c", n=4)] df = pandas.DataFrame(data) gr = df.groupby(["a", "b", "c"]).sum() self.assertEqual(gr.shape, (1, 1)) for nanback in [True, False]: try: gr2_ = pandas_groupby_nan( df, ["a", "b", "c"], nanback=nanback, suffix="NAN") except NotImplementedError: continue gr2 = gr2_.sum().sort_values("n") self.assertEqual(gr2.shape, (4, 4)) d = gr2.to_dict("records") fLOG(gr2) self.assertEqual(d[0]["a"], "a") self.assertEqual(d[0]["b"], "b") self.assertEqual(d[0]["c"], "c") self.assertEqual(d[0]["n"], 1) self.assertEqual(d[1]["a"], "NAN")
def test_benchmark(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") temp = get_temp_folder(__file__, "temp_grid_benchmark") params = [dict(value=random.randint(10, 20), name="name%d" % i, shortname="m%d" % i) for i in range(0, 2)] datasets = [dict(X=pandas.DataFrame([[0, 1], [0, 1]]), name="set1", shortname="s1"), dict(X=pandas.DataFrame([[1, 1], [1, 1]]), name="set2", shortname="s2"), ] bench = ATestOverGridBenchMark("TestName", datasets, fLOG=fLOG, clog=temp, cache_file=os.path.join(temp, "cache.pickle")) bench.run(params) df = bench.to_df() ht = df.to_html(float_format="%1.3f", index=False) self.assertTrue(len(df) > 0) self.assertTrue(ht is not None) self.assertEqual(df.shape[0], 4) report = os.path.join(temp, "report.html") csv = os.path.join(temp, "report.csv") rst = os.path.join(temp, "report.rst") bench.report(filehtml=report, filecsv=csv, filerst=rst, title="A Title", description="description") self.assertTrue(os.path.exists(report)) self.assertTrue(os.path.exists(csv)) self.assertTrue(os.path.exists(rst))
def test_encrypt_decrypt(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") password = "******" * 2 temp = get_temp_folder(__file__, "temp_encrypt") temp2 = get_temp_folder(__file__, "temp_encrypt2") tempmm = get_temp_folder(__file__, "temp_encrypt_status") cstatus = os.path.join(tempmm, "crypt_status.txt") cmap = os.path.join(tempmm, "crypt_map.txt") srcf = os.path.abspath(os.path.join(temp, "..")) sys.argv = ["", srcf, temp, password, "--status", cstatus, "--map", cmap] encrypt(fLOG=fLOG) this = __file__ sys.argv = ["", temp, temp2, password] decrypt(fLOG=fLOG) with open(__file__, "rb") as f: c1 = f.read() with open(os.path.join(temp2, os.path.split(this)[-1]), "rb") as f: c2 = f.read() self.assertEqual(c1, c2) fLOG("end")
def test_join_multiple2(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") filename = os.path.join(os.path.split( __file__)[0], "data", "database_linked.zip") temp = get_temp_folder(__file__, "temp_join_multiple2") filename = unzip(filename, temp) assert os.path.exists(filename) db = Database(filename, LOG=fLOG) db.connect() where = {"bucket": ("==", "bu###1")} n1 = db.JoinTreeNode("profile_QSSH", where=where, parent_key="query", key="query") n2 = db.JoinTreeNode("url_QSSH", where=where, parent_key=('url', 'pos'), key=('url', 'pos')) n1.append(n2) sql, fields = db.inner_joins(n1, execute=False, create_index=False) view = db.execute_view(sql) assert view == [('facebbooklogin', 1, 0, 'bu###1', 86, 0, 'digg.com/security/Hackers_Put_Social_Networks_In_Crosshairs', 'digg.com/security/Hackers_Put_Social_Networks_In_Crosshairs', 1, 0, 1, 1, 0, 0, 0, 0)] assert "WHERE" in sql db.close()
def test_notebook_rst_svg(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") temp = get_temp_folder(__file__, "temp_nb_rst_svg") nbs = [os.path.normpath(os.path.join( temp, '..', "data", "rst_notebooks", "notebook_with_svg.ipynb"))] formats = ["rst"] res = process_notebooks(nbs, temp, temp, formats=formats, fLOG=fLOG) name = res[0][0] with open(name, 'r', encoding='utf-8') as f: content = f.read() self.assertIn('SVG in a notebook.', content) self.assertIn('.. image::', content) nb = 0 for line in content.split('\n'): if '.. image::' in line: name = line.replace('.. image::', '').strip(' \r\t') dest = os.path.join(temp, name) self.assertExists(dest) nb += 1 self.assertGreater(nb, 0)
def test_notebook_runner_2a_ml(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") from ensae_teaching_cs.automation.notebook_test_helper import ls_notebooks, execute_notebooks, clean_function_1a from ensae_teaching_cs.data import simple_database temp = get_temp_folder(__file__, "temp_notebook2a_ml4") keepnote = ls_notebooks("td2a_ml") keepnote = [_ for _ in keepnote if "overfitting" in _] shutil.copy(simple_database(), temp) def filter(i, n): if "SNCF" in n: return False if "Scraping" in n: return False if "deep_python" in n: return False if "h2o" in n: # h2o is not working from a virtual environment return False if "td2a" in os.path.split(n)[-1]: # already tested by others tests return False if "libraries" in n: return False return True execute_notebooks(temp, keepnote, filter, fLOG=fLOG, clean_function=clean_function_1a, dump=ensae_teaching_cs)
def test_covariance(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") cache = os.path.abspath(os.path.split(__file__)[0]) cache = os.path.join(cache, "temp_cache_cov") stocks = [StockPrices("BNP.PA", folder=cache), StockPrices("CA.PA", folder=cache), StockPrices("SAF.PA", folder=cache), ] dates = StockPrices.available_dates(stocks) ok = dates[dates["missing"] == 0] stocks = [v.keep_dates(ok) for v in stocks] cov = StockPrices.covariance(stocks) assert len(cov) == 3 cor = StockPrices.covariance(stocks, cov=False) self.assertEqual(len(cor), 3) assert abs(cor.ix["BNP.PA", "BNP.PA"] - 1) < 1e-5 assert abs(cor.ix[2, 2] - 1) < 1e-5 ret, mat = StockPrices.covariance(stocks, cov=False, ret=True) self.assertEqual(len(ret), 3)
def test_notebook_runner_2a(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") if is_travis_or_appveyor() == "appveyor": # too long for appveyor return from src.ensae_teaching_cs.automation.notebook_test_helper import ls_notebooks, execute_notebooks, clean_function_1a, unittest_raise_exception_notebook temp = get_temp_folder(__file__, "temp_notebook2a_") keepnote = ls_notebooks("2a") assert len(keepnote) > 0 def filter(i, n): if not sys.platform.startswith("win") and "_convert" in n: return False if "git_" not in n and "python_r" not in n and "csharp" not in n: return True if is_travis_or_appveyor() and "notebook_convert.ipynb" in n: # this one requires pandoc return False return False if is_travis_or_appveyor() == "travis": warnings.warn("execution does not stop") return res = execute_notebooks(temp, keepnote, filter, fLOG=fLOG, clean_function=clean_function_1a) unittest_raise_exception_notebook(res, fLOG)
def test_image_video_puzzle_girafe(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") temp = get_temp_folder(__file__, "temp_image_video_girafe") if is_travis_or_appveyor() in ("travis",): # pygame.error: No available video device return import pygame if is_travis_or_appveyor() == "circleci": # os.environ["SDL_VIDEODRIVER"] = "x11" flags = pygame.NOFRAME else: flags = 0 pygame_simulation(pygame, fLOG=fLOG, folder=temp, delay=200 if __name__ == "__main__" else 2, flags=flags) files = os.listdir(temp) assert len(files) > 9 png = [os.path.join(temp, _) for _ in files if os.path.splitext(_)[-1] == ".png"] assert len(png) > 0 out = os.path.join(temp, "puzzle_girafe.avi") v = make_video(png, out, size=(500, 500), format="XVID", fps=4) assert v is not None
def test_notebook_python(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") path = os.path.abspath(os.path.split(__file__)[0]) fold = os.path.normpath(os.path.join(path, "notebooks_python")) nbs = [ os.path.join(fold, _) for _ in os.listdir(fold) if ".ipynb" in _ ] formats = ["python"] temp = os.path.join(path, "temp_nb_bug_python") if not os.path.exists(temp): os.mkdir(temp) for file in os.listdir(temp): os.remove(os.path.join(temp, file)) if is_travis_or_appveyor() in ('travis', 'appveyor'): return setup_environment_for_help() res = process_notebooks(nbs, temp, temp, formats=formats) fLOG("*****", len(res)) for _ in res: fLOG(_) self.assertExists(_[0]) with open(os.path.join(temp, "seance5_approche_fonctionnelle_correction.py"), "r", encoding="utf8") as f: content = f.read() exp = "# -*- coding: utf-8 -*-" if exp not in content: raise Exception(content)
def test_linkedin_connection(self): self.start() fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") token = self.get_access_token() if token is None: warnings.warn("no credential, unable to test linkedin") return linkedin = LinkedInAccess(*token) res = linkedin.connect() fLOG("***", res) try: prof = linkedin.get_connections(member_id=TestLinkedIn.my_id) except Exception as e: if "Expired access token." in str(e): warnings.warn(str(e)) return else: raise e values = prof["values"] assert len(values) > 0 for v in values: fLOG(v)
def test_get_data(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") folder = os.path.join(os.path.abspath(os.path.dirname(__file__)), "temp_rues") if not os.path.exists(folder): os.mkdir(folder) for ext in [".txt", ".zip"]: f = os.path.join(folder, "paris_54000" + ext) if os.path.exists(f): os.remove(f) try: data = get_data(whereTo=folder, fLOG=fLOG, timeout=60) except Exception as e: if "unable to retrieve data" in str(e): return else: raise Exception("*****" + str(e) + "*****") from e fLOG(len(data)) assert len(data) > 0 total = sum(_[-1] for _ in data) fLOG("total length", total)
def test_setup(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") warnings.warn("not implemented for jupyter 4.0") return if sys.platform.startswith("win"): r = setup_ipython(r"C:\temp", [], apply_modification=False) assert len(r) > 0 fLOG(r) for _ in r: assert os.path.exists(_) else: return fold = os.environ["HOME"] fold = os.path.join(fold, "temp") if not os.path.exists(fold): os.mkdir(fold) r = setup_ipython(fold, [], apply_modification=False) assert len(r) > 0 fLOG(r) for _ in r: assert os.path.exists(_)
def test_notebook_comment(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") path = os.path.abspath(os.path.split(__file__)[0]) fold = os.path.normpath(os.path.join(path, "notebooks_comment")) nbs = [ os.path.join(fold, _) for _ in os.listdir(fold) if ".ipynb" in _ ] formats = [ "rst", ] temp = os.path.join(path, "temp_nb_comment") if not os.path.exists(temp): os.mkdir(temp) for file in os.listdir(temp): os.remove(os.path.join(temp, file)) if is_travis_or_appveyor() in ('travis', 'appveyor'): return res = process_notebooks(nbs, temp, temp, formats=formats) fLOG("*****", len(res)) for _ in res: fLOG(_) self.assertExists(os.path.exists(_[0])) with open(os.path.join(temp, "example_with_comments.rst"), "r", encoding="utf8") as f: lines = f.readlines() nb = 0 for line in lines: if line.startswith(".. index:: comment, notebook, rst"): nb += 1
def print_function(*s): fLOG("i,", *s) return s
def test_style_src(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") thi = os.path.abspath(os.path.dirname(__file__)) src_ = os.path.normpath(os.path.join(thi, "..", "..", "src")) skip = [ "do not assign a lambda expression, use a def", "too many leading '#' for block comment", "line too long (480 > 143", "Redefining name 'fLOG' from outer scope", "Unable to import 'System", "parallel_thread.py:39: R1710", "dice.py:51: W0612", "dice.py:47: W0612", "Redefining built-in 'format'", "Unused variable 'variance_a_eviter'", "Attribute 'objets' defined outside __init__", "Attribute 'sources' defined outside __init__", "Redefining built-in 'round'", "Redefining built-in 'iter'", "Unable to import 'ENSAE.", "Instance of '_TableFormulaStat' has no ", "Line too long (480/143)", "Unable to import 'MagicJupyter' (pylint)", "Unused variable 'n'", "Unused variable 'i'", "bad operand type for unary -: matrix", "No name 'AddReference' in module 'clr'", "Instance of 'TableFormula' has no 'table' member", "Unexpected keyword argument 'sheet' in function call", "Redefining built-in 'filter'", "Unused variable 'a'", "No name 'resize' in module 'cv2'", "No name 'imread' in module 'cv2'", "No name 'VideoWriter_fourcc' in module 'cv2'", "No name 'VideoWriter' in module 'cv2'", "'pygame' has no 'error' member", "'pygame' has no 'init' member", "Module 'matplotlib.cm' has no 'rainbow' member", "Redefining built-in 'input'", "Unused variable 'vt'", "Value 'lastrow' is unsubscriptable", "tsp_bresenham.py:9: R1710", "Redefining built-in 'next'", "Attribute '__dict__' defined outside __init__", "Instance of 'Rule' has no 'clauses' member", "Instance of 'LatexCode' has no 'replace' member", "filename_helper.py:68: E1136", "send_feedback.py:287: W0640", "send_feedback.py:286: W0640", "send_feedback.py:135: W0640", "send_feedback.py:135: W0631", "send_feedback.py:26", "projects_helper.py:16: W0102", "mail_helper.py:79: W0102", "jenkins_helper.py:142: W0102", "pandas_helper.py:8: W0611", "pandas_helper.py:7: W0611", "table_formula.py:2702: W0612", "ftp_publish_helper.py:246: E0401", "ftp_publish_helper.py:138: E0401", "Unable to import 'pycuda", "Unable to import 'pyopencl'", "Unable to import 'selenium", "Unable to import 'ensae_teaching_cs.td_1a.flask_helper'", "Unused import clr", "Module 'clr' has no 'AddReference' member", "Unable to import 'clr'", "send_feedback.py:292: E0602", "send_feedback.py:137: W0631", "send_feedback.py:137: W0640", "R1720", ] if is_travis_or_appveyor() == "appveyor": skip.extend(["Unable to import 'fairtest'"]) check_pep8(src_, fLOG=fLOG, skip=skip, verbose=False, run_cmd_filter=_run_cmd_filter, pylint_ignore=('C0103', 'C1801', 'R0201', 'R1705', 'W0108', 'W0613', 'C0111', 'R1702', 'C0200', 'W0703', 'W0223', 'W020', 'W0212', 'C0123', 'C0302', 'W0221', 'R0912', 'E0203', 'W0201', 'R1710', 'W0603', 'R1711', 'R1714', 'W0107', 'W0640'))
def test_join_bis(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") filename = os.path.join( os.path.split(__file__)[0], "data", "database_linked.zip") temp = get_temp_folder(__file__, "temp_join_bis") filename = unzip(filename, temp) assert os.path.exists(filename) db = Database(filename, LOG=fLOG) db.connect() sql = "SELECT COUNT(*) FROM profile_QSSH" exe = db.execute_view(sql) assert exe[0][0] == 16 sql, fields = db.inner_join("profile_QSSH", "url_QSSH", "url", None, execute=False, create_index=False, unique=False) sql = sql.strip(" \n\r\t") tep = TestDatabaseJoin._memo_SQL1.strip(" \n\r\t") if sql.replace(" ", "") != tep.replace(" ", ""): print(sql) raise Exception("sql queries should be identifical") assert fields == [('query', 'query'), ('profile_QSSH.pos', 'profile_QSSH_pos'), ('type', 'type'), ('bucket', 'bucket'), ('max_nb', 'max_nb'), ('sum_difftime', 'sum_difftime'), ('profile_QSSH.url', 'url'), ('url_QSSH.pos', 'url_QSSH_pos'), ('co', 'co'), ('nb_view', 'nb_view'), ('sum_nb_view', 'sum_nb_view'), ('sum_difftime_view', 'sum_difftime_view'), ('nb_click', 'nb_click'), ('sum_nb_click', 'sum_nb_click'), ('sum_difftime_click', 'sum_difftime_click')] view = db.execute_view(sql) assert len(view) == 2 sql, fields = db.inner_join("profile_QSSH", "url_QSSH", ("url", "pos"), None, execute=False, create_index=False, where="bucket == 'bu###1'") sql = sql.strip(" \n\r\t") tep = TestDatabaseJoin._memo_SQL2.strip(" \n\r\t") if sql.replace(" ", "") != tep.replace(" ", ""): for a, b in zip(sql.split("\n"), tep.split("\n")): print("res", a) print("exp", b) print(a == b) assert sql.replace(" ", "") == tep.replace(" ", "") assert fields == [('query', 'query'), ('profile_QSSH.pos', 'pos'), ('type', 'type'), ('bucket', 'bucket'), ('max_nb', 'max_nb'), ('sum_difftime', 'sum_difftime'), ('profile_QSSH.url', 'url'), ('co', 'co'), ('nb_view', 'nb_view'), ('sum_nb_view', 'sum_nb_view'), ('sum_difftime_view', 'sum_difftime_view'), ('nb_click', 'nb_click'), ('sum_nb_click', 'sum_nb_click'), ('sum_difftime_click', 'sum_difftime_click')] view = db.execute_view(sql) assert len(view) == 1 db.close()
def test_algo_euler4(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") folder = os.path.join(os.path.abspath(os.path.dirname(__file__)), "temp_algo_euler4") if not os.path.exists(folder): os.mkdir(folder) edges = get_data(whereTo=folder, fLOG=fLOG) edges = edges[:3] vertices = {} for e in edges: for i in range(0, 2): _ = e[i] p = e[i + 3] vertices[_] = p connex = connected_components(edges) v = [v for k, v in connex.items()] mi, ma = min(v), max(v) while mi != ma: edges.append((mi, ma, 2, vertices[mi], vertices[ma], distance_haversine(*(vertices[mi] + vertices[ma])))) connex = connected_components(edges) v = [v for k, v in connex.items()] mi, ma = min(v), max(v) fix_tkinter_issues_virtualenv() import matplotlib.pyplot as plt import networkx as nx plt.figure() G = nx.Graph() for e in edges: a, b = e[:2] G.add_edge(a, b) pos = nx.spring_layout(G) nx.draw(G, pos, node_color='#A0CBE2') plt.savefig(os.path.join(folder, "graph1.png")) plt.close('all') added = eulerien_extension(edges, fLOG=lambda *l: None, distance=distance_paris) for e in added: a, b = e[:2] G.add_edge(a, b) plt.figure() pos = nx.spring_layout(G) graph_degree(edges + added) #labels={ v:"{0}".format(deg[v]) for v in G.nodes() } nx.draw( G, pos, node_color='#A0CBE2' # ,labels=labels ) plt.savefig(os.path.join(folder, "graph2.png")) plt.close('all') path = euler_path(edges, added) alls = edges + added fLOG(len(alls), len(path))
def test_style_test(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") thi = os.path.abspath(os.path.dirname(__file__)) test = os.path.normpath(os.path.join( thi, "..", )) skip = [ "src' imported but unused", "skip_' imported but unused", "skip__' imported but unused", "skip___' imported but unused", "Unused variable 'skip_'", "imported as skip_", "Module 'pygame' has no 'init' member", "Module 'pygame' has no 'NOFRAME' member", "Class 'mem_flags' has no ", "Module 'torch' has no ", "Module 'numpy.random' has no 'RandomState' ", "Unable to import 'onemod' ", "Redefining built-in 'filter' ", "Redefining built-in 'input'", "Unused variable 'n'", "Redefining name 'path' from outer scope", "Unused variable 'i'", "Unable to import 'System", "Module 'ensae_teaching_cs.pythonnet.", "Unused variable 'skip___'", "Module 'pygame' has no 'quit'", "Unable to import 'primes'", "Parameters differ from overridden 'forward' method", "test_data_competition.py:3", "test_data_competition.py:1", "Redefining name 'src' from outer scope", "Unable to import 'pycuda.", "Unable to import 'pyopencl'", "Unused import clr", "Unused variable 'clr'", "Unable to import 'clr'", "Module 'clr' has no 'AddReference' member", "test_SKIP_torch.py:67: E1123", "test_SKIP_torch.py:102: E1101", "test_SKIP_torch.py:104: E1101", "R1720", ] if is_travis_or_appveyor() == "appveyor": skip.extend(["Unable to import 'fairtest'"]) check_pep8(test, fLOG=fLOG, neg_pattern="temp_.*", skip=skip, verbose=False, run_cmd_filter=_run_cmd_filter, pylint_ignore=('C0103', 'C1801', 'R0201', 'R1705', 'W0108', 'W0613', 'C0111', 'C0200', 'C0122', "W0123", 'W0703', 'W0212', 'W0201', 'R1711', 'R1714', 'W0107'))
def test_search_predictions_keras(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") from mlinsights.search_rank import SearchEnginePredictionImages # We delay the import as keras backend is not necessarily available. with redirect_stderr(StringIO()): try: from keras.applications.mobilenet import MobileNet # pylint: disable=E0401 except (SyntaxError, ModuleNotFoundError) as e: warnings.warn( "tensorflow is probably not available yet on python 3.7: {0}" .format(e)) return from keras.preprocessing.image import ImageDataGenerator # pylint: disable=E0401 from keras.preprocessing.image import img_to_array, load_img # pylint: disable=E0401 # deep learning model model = MobileNet(input_shape=None, alpha=1.0, depth_multiplier=1, dropout=1e-3, include_top=True, weights='imagenet', input_tensor=None, pooling=None, classes=1000) self.assertEqual(model.name, 'mobilenet_1.00_224') # images temp = get_temp_folder(__file__, "temp_search_predictions_keras") dest = os.path.join(temp, "simages") os.mkdir(dest) zipname = os.path.join(temp, "..", "..", "..", "_doc", "notebooks", "explore", "data", "dog-cat-pixabay.zip") files = unzip_files(zipname, where_to=dest) self.assertTrue(len(files) > 0) # iterator gen = ImageDataGenerator(rescale=1. / 255) with redirect_stdout(StringIO()): iterim = gen.flow_from_directory(temp, batch_size=1, target_size=(224, 224), classes=['simages'], shuffle=False) # search se = SearchEnginePredictionImages( model, fct_params=dict(layer=len(model.layers) - 4), n_neighbors=5) r = repr(se) self.assertIn("SearchEnginePredictionImages", r) # fit se.fit(iterim, fLOG=fLOG) # neighbors score, ind, meta = se.kneighbors(iterim) # assert self.assertIsInstance(ind, (list, numpy.ndarray)) self.assertEqual(len(ind), 5) self.assertEqual(ind[0], 0) self.assertIsInstance(score, numpy.ndarray) self.assertEqual(score.shape, (5, )) self.assertEqual(score[0], 0) self.assertIsInstance(meta, (numpy.ndarray, pandas.DataFrame)) self.assertEqual(meta.shape, (5, 2)) self.assertEqual(meta.loc[0, 'name'].replace('\\', '/'), 'simages/cat-1151519__480.jpg') # neighbors 2 img = load_img(os.path.join(temp, 'simages', 'cat-2603300__480.jpg'), target_size=(224, 224)) x = img_to_array(img) gen = ImageDataGenerator(rescale=1. / 255) iterim = gen.flow(x[numpy.newaxis, :, :, :], batch_size=1) score, ind, meta = se.kneighbors(iterim) self.assertIsInstance(ind, (list, numpy.ndarray)) self.assertIsInstance(score, numpy.ndarray) self.assertIsInstance(meta, (numpy.ndarray, pandas.DataFrame))
def test_roc(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") fix_tkinter_issues_virtualenv(fLOG=fLOG) import matplotlib.pyplot as plt # pylint: disable=C0415 temp = get_temp_folder(__file__, "temp_roc") data = [random.random() for a in range(0, 1000)] data = [(x, 1 if x + random.random() / 3 > 0.7 else 0) for x in data] test = ROC(y_true=[_[1] for _ in data], y_score=[_[0] for _ in data]) self.assertEqual(len(test), len(data)) test = ROC(df=data) fLOG(test.__str__()) roc = test.compute_roc_curve() t = test.roc_intersect(roc, 0.2) self.assertTrue(1 >= t >= 0) conf = test.confusion() s = str(conf) fLOG(s) self.assertEqual(conf.shape, (12, 5)) conf = test.confusion(score=0.5) fLOG(conf) self.assertEqual(conf.shape, (1, 5)) fLOG("graph.............. PROBSCORE") fig, ax = plt.subplots() ax = test.plot(0, ax=ax, curve=ROC.CurveType.PROBSCORE, thresholds=True) self.assertNotEmpty(ax) fig.savefig(os.path.join(temp, "roc_PROBSCORE_10.png")) fig, ax = plt.subplots() test.plot(0, ax=ax, bootstrap=10, curve=ROC.CurveType.PROBSCORE, thresholds=True) self.assertNotEmpty(ax) fig.savefig(os.path.join(temp, "roc_PROBSCORE_100_b10.png")) fLOG("graph.............. SKROC") fig, ax = plt.subplots() ax = test.plot(0, ax=ax, curve=ROC.CurveType.SKROC) self.assertNotEmpty(ax) fig.savefig(os.path.join(temp, "roc_SKROC_10.png")) fig, ax = plt.subplots() test.plot(0, ax=ax, bootstrap=10, curve=ROC.CurveType.SKROC) self.assertNotEmpty(ax) fig.savefig(os.path.join(temp, "roc_SKROC_100_b10.png")) fLOG("graph.............. RECPREC") fig, ax = plt.subplots() ax = test.plot(100, ax=ax, curve=ROC.CurveType.RECPREC) self.assertNotEmpty(ax) fig.savefig(os.path.join(temp, "roc_RECPREC_100.png")) fig, ax = plt.subplots() ax = test.plot(100, ax=ax, bootstrap=10, curve=ROC.CurveType.RECPREC) self.assertNotEmpty(ax) fig.savefig(os.path.join(temp, "roc_RECPREC_100_b10.png")) fLOG("graph.............. SKROC True") fig, ax = plt.subplots() ax = test.plot(0, ax=ax, curve=ROC.CurveType.SKROC, thresholds=True) self.assertNotEmpty(ax) fig.savefig(os.path.join(temp, "roc_SKROC_T_10.png")) fig, ax = plt.subplots() test.plot(0, ax=ax, bootstrap=10, curve=ROC.CurveType.SKROC, thresholds=True) self.assertNotEmpty(ax) fig.savefig(os.path.join(temp, "roc_SKROC_T_100_b10.png")) fLOG("graph.............. RECPREC True") fig, ax = plt.subplots() ax = test.plot(100, ax=ax, curve=ROC.CurveType.RECPREC, thresholds=True) self.assertNotEmpty(ax) fig.savefig(os.path.join(temp, "roc_RECPREC_T_100.png")) fig, ax = plt.subplots() ax = test.plot(100, ax=ax, bootstrap=10, curve=ROC.CurveType.RECPREC, thresholds=True) self.assertNotEmpty(ax) fig.savefig(os.path.join(temp, "roc_RECPREC_T_100_b10.png")) fLOG("graph.............. ERRREC") fig, ax = plt.subplots() ax = test.plot(100, ax=ax, curve=ROC.CurveType.ERRREC) self.assertNotEmpty(ax) fig.savefig(os.path.join(temp, "roc_ERRREC_100.png")) fig, ax = plt.subplots() ax = test.plot(100, ax=ax, bootstrap=10, curve=ROC.CurveType.ERRREC) self.assertNotEmpty(ax) fig.savefig(os.path.join(temp, "roc_ERRREC_100_b10.png")) fLOG("graph.............. ROC") fig, ax = plt.subplots() self.assertRaise( lambda: test.plot( 10, ax=ax, label=["r10", "p10"], curve=ROC.CurveType.ROC), ValueError) ax = test.plot(10, ax=ax, thresholds=True, label=["r10", "p10"], curve=ROC.CurveType.ROC) self.assertNotEmpty(ax) fig.savefig(os.path.join(temp, "roc_ROC_10.png")) fig, ax = plt.subplots() test.plot(100, ax=ax, label=["r100", "p100"], curve=ROC.CurveType.ROC, thresholds=True) self.assertNotEmpty(ax) fig.savefig(os.path.join(temp, "roc_ROC_100.png")) fig, ax = plt.subplots() test.plot(100, ax=ax, bootstrap=10, curve=ROC.CurveType.ROC) self.assertNotEmpty(ax) fig.savefig(os.path.join(temp, "roc_ROC_100_b10.png")) fLOG("computing rate..............................") values = test.auc_interval(alpha=0.1, bootstrap=20) for k, v in sorted(values.items()): fLOG("{0}={1}".format(k, v)) self.assertEqual( list(sorted(values.keys())), ['auc', 'interval', 'max', 'mean', 'mediane', 'min', 'var']) self.assertTrue(values["min"] <= values["auc"] <= values["max"]) fLOG("computing rate..............................") values = test.roc_intersect_interval(0.1, 100, bootstrap=50) for k, v in sorted(values.items()): fLOG("{0}={1}".format(k, v)) self.assertEqual( list(sorted(values.keys())), ['interval', 'max', 'mean', 'mediane', 'min', 'var', 'y']) self.assertTrue(values["min"] <= values["y"] <= values["max"]) plt.close('all') fLOG("end")
def test_notebook_2048(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") self.a_test_notebook_runner("2048", "td1a")
def _linux_jenkins_ext_setup_server_yaml2(self, local_file, disp): srv = JenkinsExt( "http://localhost:8080/", "user", "password", mock=True, engines=default_engines(platform="linux"), fLOG=fLOG, platform="linux") fLOG("---------------------") modules = [('yml', local_file, None)] fLOG("[modules]", modules) res = setup_jenkins_server_yml(srv, github="sdpython", modules=modules, overwrite=True, add_environ=False, location="anything") reg = re.compile("<description>(.*)</description>") sch = 0 wipe = 0 pub = 0 arti = 0 confs = [] for i, r in enumerate(res): conf = r[-1] if "set current=" in conf.lower(): raise Exception("The job is for linux\n{0}".format(conf)) if "SET " in conf: raise Exception("The job is for linux\n{0}".format(conf)) if "c:" in conf: raise Exception("The job is for linux\n{0}".format(conf)) if disp: fLOG(conf) if not conf.startswith("<?xml version='1.0' encoding='UTF-8'?>"): raise Exception(conf) search = reg.search(conf) if not search: raise Exception(conf) job = r[0] fLOG(search.groups()[0], "--", job, "--", r[1]) if "PYQUICKHELPER27" in conf: raise Exception(conf) if "export VERSION=" not in conf: raise Exception(conf) if "export NAME=" not in conf: raise Exception(conf) if "export DIST=" not in conf: raise Exception(conf) if "<runOnChoice>ON_BOTH</runOnChoice>" in conf: sch += 1 if "PUBLISHER" in conf: pub += 1 if "artifacts" in conf: arti += 1 if 'if [ "PYPI"' in conf: raise Exception(conf) if "<hudson.plugins.git.extensions.impl.WipeWorkspace />" in conf: wipe += 1 confs.append(conf) if sch + pub != 1: raise Exception("{0} != {1}\n{2}".format( sch + pub, 1, "\n\n\n----------------------------\n\n\n".join(confs))) if pub == 0 and wipe != len(confs): raise Exception("{0} != {1}\n{2}".format( wipe, len(confs), "\n\n\n----------------------------\n\n\n".join(confs))) if pub != 0 and wipe != 0: raise Exception("{0} != {1}\n{2}".format( wipe, len(confs), "\n\n\n----------------------------\n\n\n".join(confs))) if arti == 0: raise Exception("{0} != {1}\n{2}".format( wipe, len(confs), "\n\n\n----------------------------\n\n\n".join(confs)))
def test_files_compress(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") password = "******" * 2 this = os.path.abspath(__file__) temp = get_temp_folder(__file__, "temp_crypt") dest = os.path.join(temp, "out_crypt.enc") mg = MagicCrypt() cmd = "this dest %s" % password fLOG("**", cmd) if os.path.exists(dest): raise Exception(dest) mg.add_context({"this": this, "dest": dest}) res = mg.encrypt_file(cmd) fLOG(res) assert os.path.exists(dest) dest2 = os.path.join(temp, "__file__.py") cmd = "dest dest2 %s" % password fLOG("**", cmd) assert not os.path.exists(dest2) mg.add_context({"dest": dest, "dest2": dest2}) res = mg.decrypt_file(cmd) fLOG(res) assert os.path.exists(dest2) with open(__file__, "rb") as f: c1 = f.read() with open(dest2, "rb") as f: c2 = f.read() self.assertEqual(c1, c2) fLOG("end", len(c1), len(c2)) assert len(c1) > 0
def test_notebook_ml_text_features(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") self.a_test_notebook_runner("ml_text_features", "td2a")
def test_linkedin_basic(self): self.start() fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") token = self.get_access_token() if token is None: warnings.warn("no credential, unable to test linkedin") return linkedin = LinkedInAccess(*token) res = linkedin.connect(False) fLOG("***", res) try: prof = linkedin.get_profile() except Exception as e: if "Expired access token." in str(e): warnings.warn(str(e)) return else: raise e fLOG("prof", prof) assert prof["lastName"] == "Dupre" fLOG("------") prof = [] se = linkedin.search_profile( params={ "last-name": "dupre", "first-name": "xavier"}) for _ in se["people"]["values"]: fLOG(_) assert 'id' in _ try: prof.append(linkedin.get_profile(id=_['id'])) except Exception as e: fLOG("error", e) fLOG("----") assert len(se["people"]["values"]) > 1 assert len(prof) > 0 for p in prof: fLOG(p)
def test_notebook_timeseries(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") self.a_test_notebook_runner("ml_timeseries_base", "2a")
def test_TableFormulaCore(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") fold = os.path.split(__file__)[0] assert TableFormula.delta is not None file = os.path.join(fold, "data", "BNP.PA.txt") table = TableFormula(file, sep=",") table.sort(lambda v: v["Date"]) assert len(table) > 0 table = TableFormula( "name d_a d_b d_c#A 1 2 3#A 1.1 2.1 3.1#B 3 4 5".replace( " ", "\t").replace("#", "\n")) assert "d_a\td_b\td_c" in str(table) dist = table.get_distinct_values("name") assert len(dist) > 0 table.add_column("has_A", lambda v: 1. if "A" in v["name"] else 0.) assert len(table) > 0 x = 1. / 3 table.add_column_smooth("has_A_smooth", lambda v: v["has_A"], [-1, 0, 1], [x, x, x]) assert len(table) > 0 fil = table.filter(lambda v: v["d_b"] == 2) assert len(table) > 0 rnd = table.random(5) assert len(rnd) > 0 rnd = table.random(1, True) assert len(rnd) > 0 fil = table.filter_quantile(lambda v: v["d_b"], 0, 0.4) assert len(fil) > 0 total = table.aggregate(lambda v: v["d_c"]) assert total > 0 table.sort(lambda v: v["d_b"] + v["d_c"]) assert len(table) > 0 union = table.union(table) assert len(union) > len(table) group = table.groupby(lambda v: v["name"], [lambda v: v["d_a"], lambda v: v["d_b"]], ["name", "sum_d_a", "sum_d_b"]) assert len(group) > 0 groupmax = table.groupby(lambda v: v["name"], [lambda v: v["d_a"], lambda v: v["d_b"]], ["name", "max_d_a", "max_d_b"], [max, max]) assert len(groupmax) > 0 group = table.groupby(lambda v: v["name"], [lambda v: v["d_a"]], ["name", "weight", "sum_d_a"], [lambda vec, w: sum(vec) / w], lambda v: v["d_b"]) innerjoin = table.innerjoin(group, lambda v: v["name"], lambda v: v["name"], "group") assert len(innerjoin) > 0 ext = table.extract_columns(["name", "d_a"]) assert len(ext) > 0 ext = table.remove_columns(["d_a"]) assert len(ext) > 0 d = table.todict(lambda v: v["name"], lambda v: v["d_b"], True) assert len(d) > 0 d = table.select(lambda v: (v["name"], v["d_b"])) assert len(list(d)) > 0 table.create_index(lambda v: (v["name"], v["d_a"])) row = table.get(('A', 1.1)) assert row value = table.get(('A', 1.1), 2) assert value table = TableFormula( "name d_a d_b d_c#A 1 2 3#A 1.1 2.1 3.1#B 3 4 5".replace( " ", "\t").replace("#", "\n")) table.add_column("key_add", lambda v: "unique") mul = table.multiply_column_by_row_instance(lambda v: v["key_add"], lambda v: v["name"]) assert len(mul) > 0 table = TableFormula( "key_name sum_a len_b avg_c#A 1 2 3#A 1.1 2.1 3.1#B 3 4 5".replace( " ", "\t").replace("#", "\n")) gr = table.groupby_implicit(lambda v: v["key_name"]) assert len(gr) > 0
def test_diff(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") seq1 = "ab ab2 abc3 abcd abc4".split() seq2 = "ab ab2 abc3 abc4 abc adb".split() diff = SequenceMatcher(a=seq1, b=seq2) nb = 0 for opcode in diff.get_opcodes(): fLOG(opcode) nb += 1 self.assertEqual(nb, 4) h = 20 size = 500, 500 white = 255, 255, 255 if is_travis_or_appveyor() in ("travis",): # pygame.error: No available video device return import pygame if is_travis_or_appveyor() == "circleci": # os.environ["SDL_VIDEODRIVER"] = "x11" flags = pygame.NOFRAME else: flags = 0 pygame, screen, fonts = get_pygame_screen_font(h, size, flags=flags) from src.ensae_teaching_cs.helpers.pygame_helper import wait_event bars = [random.randint(10, 500) / 500.0 for s in seq2] screen.fill(white) build_diff_image(pygame, screen, h=h, maxw=size[1], seq1=seq1, seq2=seq2, diff=diff, fonts=fonts, bars=bars) pygame.display.flip() temp = get_temp_folder(__file__, "temp_video_diff") for i in range(0, 21): screen.fill(white) build_diff_image(pygame, screen, h=h, maxw=size[0], seq1=seq1, seq2=seq2, diff=diff, fonts=fonts, bars=bars, progress=i / 20.0, prev_bars=None) pygame.time.wait(60) pygame.display.flip() pygame.image.save(screen, os.path.join(temp, "diff%d.png" % i)) if __name__ == "__main__": from src.ensae_teaching_cs.helpers.video_helper import make_video png = [os.path.join(temp, _) for _ in os.listdir(temp) if ".png" in _] out = os.path.join(temp, "diff.avi") make_video(png, out, size=(350, 250), format="XVID", fps=5) wait_event(pygame) for font in fonts.values(): del font pygame.quit()
def test_empty_table(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") tbl = TableFormula([["x", "y"]]) self.assertEqual(tbl.size, (0, 2))
def test_pandas_groupbynan(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") types = [(str, "e"), (int, -10), (float, -20.2), (bytes, bytes("a", "ascii"))] skip = (numpy.bool_, numpy.complex64, numpy.complex128) types += [(_, _(5)) for _ in numpy_types() if _ not in skip] for ty in types: data = [{ "this": "cst", "type": "tt1=" + str(ty[0]), "value": ty[1] }, { "this": "cst", "type": "tt2=" + str(ty[0]), "value": ty[1] }, { "this": "cst", "type": "row_for_nan" }] df = pandas.DataFrame(data) gr = pandas_groupby_nan(df, "value") co = gr.sum() li = list(co["value"]) # fLOG("###", li) assert numpy.isnan(li[-1]) for ty in types: data = [{ "this": "cst", "type": "tt1=" + str(ty[0]), "value": ty[1] }, { "this": "cst", "type": "tt2=" + str(ty[0]), "value": ty[1] }, { "this": "cst", "type": "row_for_nan" }] df = pandas.DataFrame(data) try: gr = pandas_groupby_nan(df, ("value", "this")) t = True raise Exception("---") except TypeError: t = False if t: co = gr.sum() li = list(co["value"]) # fLOG("###", li) assert numpy.isnan(li[-1]) try: gr = pandas_groupby_nan(df, ["value", "this"]) t = True except (TypeError, NotImplementedError): t = False pass if t: co = gr.sum() li = list(co["value"]) self.assertEqual(len(li), 2)
def test_notebook_ml_crypted_data(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") self.a_test_notebook_runner("crypted_data", "td2a")
def test_optimisation(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") sol2 = exercice_particulier2() x2 = sol2['x'] d = abs(x2[0] - 0.428571428055853) + abs(x2[1] - 0.2857142848749249) assert d < 1e-5 if is_travis_or_appveyor() == "travis": # skip mkl return sol1 = exercice_particulier1() sol2 = exercice_particulier2() fLOG("cvxopt") fLOG(sol1) fLOG("solution:", sol1['x'].T) fLOG("Arrow_Hurwicz") fLOG(sol2) fLOG("solution:", sol2['x']) x1 = sol1['x'] x2 = sol2['x'] d1 = x1[0] - x2[0] d2 = x1[1] - x2[1] fLOG(d1, d2) assert abs(d1) < 1e-5 assert abs(d2) < 1e-5
jointes par des étudiants. Ces programmes sont récupérés par le script :ref:`fetch_student_projects_from_gmail.py <script-fetch-students-projets-py>`. .. _script-execute-script: """ ######################################### # import import sys import os import pandas ######################################### # logging from pyquickhelper.loghelper import fLOG # execute_student_projects fLOG(OutputPrint=True) ######################################### # import des fonctions dont on a besoin from ensae_teaching_cs.automation_students.interro_motif import execute_python_scripts, _get_code ########################### # paramètre du programme neworder = "nom_prenom key pattern_id cmp motif_dans_sortie sortie_dans_motif dist time size program err out content url".split( ) dest_folder = os.path.normpath( os.path.abspath( os.path.join( *([os.path.dirname(__file__)] + ([".."] * 5) + ["_data", "ecole", "ENSAE", "2016-2017", "1A_november"])))) # expected outputs
def setUp(self): fLOG("add missing dependencing") add_missing_development_version( ["pymyinstall", "pyensae", "pymmails", "jyquickhelper"], __file__, hide=True)
def setUp(self): fLOG("add missing dependencies", OutputPrint=__name__ == "__main__") add_missing_development_version( ["pymyinstall", "pyensae", "pymmails", "jyquickhelper"], __file__, hide=True)
def test_search_predictions_torch(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") from mlinsights.search_rank import SearchEnginePredictionImages # We delay the import as keras backend is not necessarily available. with redirect_stderr(StringIO()): try: import torchvision.models as tmodels # pylint: disable=E0401 except (SyntaxError, ModuleNotFoundError) as e: warnings.warn("torch is not available: {0}".format(e)) return from torchvision import datasets, transforms # pylint: disable=E0401 from torch.utils.data import DataLoader # pylint: disable=E0401 # deep learning model model = tmodels.squeezenet1_1(pretrained=True) # images temp = get_temp_folder(__file__, "temp_search_predictions_torch") dest = os.path.join(temp, "simages") os.mkdir(dest) zipname = os.path.join(temp, "..", "..", "..", "_doc", "notebooks", "explore", "data", "dog-cat-pixabay.zip") files = unzip_files(zipname, where_to=dest) self.assertTrue(len(files) > 0) # sequence of images trans = transforms.Compose([ transforms.Resize((224, 224)), transforms.CenterCrop(224), transforms.ToTensor() ]) imgs_ = datasets.ImageFolder(temp, trans) dataloader = DataLoader(imgs_, batch_size=1, shuffle=False, num_workers=1) img_seq = iter(dataloader) imgs = list(img[0] for img in img_seq) # search se = SearchEnginePredictionImages(model, n_neighbors=5) r = repr(se) self.assertIn("SearchEnginePredictionImages", r) # fit fLOG('[fit]') se.fit(imgs_, fLOG=fLOG) # neighbors fLOG('[test]', type(imgs[0]), imgs[0].shape) score, ind, meta = se.kneighbors(imgs[0]) # assert self.assertIsInstance(ind, (list, numpy.ndarray)) self.assertEqual(len(ind), 5) self.assertEqual(ind[0], 0) self.assertIsInstance(score, numpy.ndarray) self.assertEqual(score.shape, (5, )) self.assertLess(score[0], 50) self.assertIsInstance(meta, (numpy.ndarray, pandas.DataFrame)) self.assertEqual(meta.shape, (5, 2)) self.assertEndsWith('simages/cat-1151519__480.jpg', meta.iloc[0, 1].replace('\\', '/')) # neighbors 2 score, ind, meta = se.kneighbors(imgs) self.assertIsInstance(ind, (list, numpy.ndarray)) self.assertIsInstance(score, numpy.ndarray) self.assertIsInstance(meta, (numpy.ndarray, pandas.DataFrame))
def test_linkedin_search_key(self): self.start() fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") token = self.get_access_token() if token is None: warnings.warn("no credential, unable to test linkedin") return linkedin = LinkedInAccess(*token) res = linkedin.connect() if __name__ == "__main__": if False: for year in range(2010, 2014): fLOG("**** year ", year) se = linkedin.search_profile( params={"keywords": "ensae %d" % year}, count=-1, as_table=True) if se is not None: temp_file = os.path.abspath( os.path.join( os.path.split(__file__)[0], "temp_ensae_%d.txt" % year)) fLOG("writing ", len(se)) se.save(temp_file, encoding="utf8") se = linkedin.search_profile( params={"keywords": "ensae"}, count=-1, as_table=True) temp_file = os.path.abspath( os.path.join( os.path.split(__file__)[0], "temp_ensae2.txt")) fLOG("writing ", len(se)) se.save(temp_file, encoding="utf8") if True: for key in "new-york paris londres singapour montreal pekin shangai tokyo kyoto san francisco boston bank research economy statistics insurance".split(): fLOG("**** key ", key) se = linkedin.search_profile( params={"keywords": "ensae %s" % key}, count=-1, as_df=True) if se is not None: temp_file = os.path.abspath( os.path.join( os.path.split(__file__)[0], "temp_ensae_%s.txt" % key)) fLOG("writing ", len(se)) se.save(temp_file, encoding="utf8") else: fLOG("***", res) try: prof = linkedin.get_profile() except Exception as e: if "Expired access token." in str(e): return else: raise e fLOG("prof", prof) assert prof["lastName"] == "Dupre" fLOG("------") prof = [] se = linkedin.search_profile( params={ "keywords": "ensae"}, as_table=True) fLOG(se) assert "headline" in se.header assert len(se) > 0
def test_algo(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") folder = os.path.join(os.path.abspath(os.path.dirname(__file__)), "temp_algo") if not os.path.exists(folder): os.mkdir(folder) edges = get_data(whereTo=folder, fLOG=fLOG) edges = edges[:1000] max_segment = max(e[-1] for e in edges) possibles = possible_edges(edges, max_segment / 8, fLOG=fLOG) init = bellman(edges, fLOG=fLOG, allow=lambda e: e in possibles) fLOG("---") init = bellman(edges, fLOG=fLOG, allow=lambda e: e in possibles, init=init) fLOG("---") added = kruskal(edges, init, fLOG=fLOG) d = graph_degree(edges + added) allow = sorted([k for k, v in d.items() if v % 2 == 1]) fLOG("degrees", allow) allow = set(allow) fLOG("---") init = bellman( edges, fLOG=fLOG, allow=lambda e: e in possibles or e[0] in allow or e[1] in allow, init=init) fLOG("---") added = kruskal(edges, init, fLOG=fLOG) d = graph_degree(edges + added) allow = sorted([k for k, v in d.items() if v % 2 == 1]) fLOG("degrees", allow)