def test_main(self): st = BufferedPrint() main(args=[], fLOG=st.fprint) res = str(st) self.assertIn("python -m pyquickhelper <command>", res) self.assertIn("Synchronizes a folder", res)
def test_cli_convert_validater_switch(self): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, _ = train_test_split(X, y, random_state=11) clr = LogisticRegression() clr.fit(X_train, y_train) temp = get_temp_folder(__file__, "temp_cli_convert_validate_switch") data = os.path.join(temp, "data.csv") pandas.DataFrame(X_test).to_csv(data, index=False) pkl = os.path.join(temp, "model.pkl") with open(pkl, "wb") as f: pickle.dump(clr, f) res = convert_validate(pkl=pkl, data=data, verbose=0, method="predict,predict_proba", name="output_label,output_probability") st = BufferedPrint() args = ["convert_validate", "--pkl", pkl, '--data', data, '--method', "predict,predict_proba", '--name', "output_label,output_probability", '--verbose', '1', '--use_double', 'switch'] main(args, fLOG=st.fprint) res = str(st) self.assertIn( "[convert_validate] compute predictions with method 'predict_proba'", res)
def test_cli_validate_kmeans(self): temp = get_temp_folder(__file__, "temp_validate_runtime_kmeans") out1 = os.path.join(temp, "raw.csv") out2 = os.path.join(temp, "sum.csv") gr = os.path.join(temp, 'gr.png') st = BufferedPrint() main(args=[ "validate_runtime", "--n_features", "4,50", "-nu", "3", "-re", "3", "-o", "11", "-op", "11", "-v", "2", "--out_raw", out1, "--out_summary", out2, "-b", "1", "--runtime", "python_compiled,onnxruntime1", "--models", "KMeans", "--out_graph", gr, "--dtype", "32" ], fLOG=st.fprint) res = str(st) self.assertIn('KMeans', res) self.assertExists(out1) self.assertExists(out2) self.assertExists(gr) df1 = pandas.read_csv(out1) merged = merge_benchmark({ 'r1-': df1, 'r2-': df1.copy() }, baseline='r1-onnxruntime1') add_cols = list( sorted(c for c in merged.columns if c.endswith('-base'))) suma = summary_report(merged, add_cols=add_cols) self.assertEqual(merged.shape[0], suma.shape[0]) self.assertIn('N=10-base', suma.columns) outdf = os.path.join(temp, "merged.xlsx") suma.to_excel(outdf, index=False)
def test_cli_validate_model_lightgbm(self): temp = get_temp_folder(__file__, "temp_validate_model_lgbm_csv") out1 = os.path.join(temp, "raw.csv") out2 = os.path.join(temp, "sum.csv") st = BufferedPrint() main(args=[ "validate_runtime", "--out_raw", out1, "--out_summary", out2, "--models", "LGBMClassifier", '-o', '10', '-op', '10', '-v', '2', '-b', '1', '-dum', '1', '-du', temp ], fLOG=st.fprint) res = str(st) self.assertIn('LGBMClassifier', res) self.assertExists(out1) self.assertExists(out2) exp1 = os.path.join( temp, "dump-ERROR-python-LGBMClassifier-default-b-cl--op10-nf4.pkl") exp2 = os.path.join( temp, "dump-i-python-LGBMClassifier-default-b-cl-lightgbm.sklearn" ".LGBMClassifierzipmapFalse-op10-nf4.pkl") if not os.path.exists(exp1) and not os.path.exists(exp2): names = os.listdir(temp) raise FileNotFoundError( "Unable to find '{}' or '{}' in\n{}.".format( exp1, exp2, '\n'.join(names)))
def test_onnx_stats(self): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, _ = train_test_split(X, y, random_state=11) clr = LogisticRegression() clr.fit(X_train, y_train) temp = get_temp_folder(__file__, "temp_onnx_stats") data = os.path.join(temp, "data.csv") pandas.DataFrame(X_test).to_csv(data, index=False) pkl = os.path.join(temp, "model.pkl") with open(pkl, "wb") as f: pickle.dump(clr, f) outonnx = os.path.join(temp, 'outolr.onnx') convert_validate(pkl=pkl, data=data, verbose=0, method="predict,predict_proba", outonnx=outonnx, name="output_label,output_probability") st = BufferedPrint() main(args=["onnx_stats", "--name", outonnx], fLOG=st.fprint) res = str(st) self.assertIn("ninits: 0", res)
def test_cli_validate_model_process_csv(self): temp = get_temp_folder(__file__, "temp_validate_model_process_csv") out1 = os.path.join(temp, "raw.csv") out2 = os.path.join(temp, "sum.csv") st = BufferedPrint() main( args=[ "validate_runtime", "--out_raw", out1, "--out_summary", out2, "--models", "LogisticRegression,LinearRegression", '-o', '10', '-op', '11', '-v', '3', '-b', '1', '-se', '1', # '-d', '1', ], fLOG=st.fprint) res = str(st) self.assertIn('Linear', res) self.assertExists(out1) self.assertExists(out2)
def test_onnx_inference_verbose_intermediate(self): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, __, _ = train_test_split(X, y, random_state=11) clr = KMeans() clr.fit(X_train) model_def = to_onnx(clr, X_train.astype(numpy.float32)) for runtime in ['python', 'python_compiled']: with self.subTest(runtime=runtime): oinf = OnnxInference(model_def, inplace=False) buf = BufferedPrint() got = oinf.run({'X': X_test.astype(numpy.float32)}, verbose=15, fLOG=buf.fprint, intermediate=True) self.assertIsInstance(got, dict) res = str(buf) self.assertIn('+kr', res) self.assertIn('+ki', res) self.assertIn('Onnx-Gemm', res) self.assertIn('min=', res) self.assertIn('max=', res) self.assertIn('dtype=', res) inp = oinf.input_names_shapes self.assertIsInstance(inp, list) inp = oinf.input_names_shapes_types self.assertIsInstance(inp, list) out = oinf.output_names_shapes self.assertIsInstance(out, list) out = oinf.output_names_shapes_types self.assertIsInstance(out, list)
def test_kmeans_constraint_weights(self): mat = numpy.array([[0, 0], [0.2, 0.2], [-0.1, -0.1], [1, 1]]) km = ConstraintKMeans(n_clusters=2, verbose=10, kmeans0=False, random_state=1, strategy='weights') buf = BufferedPrint() km.fit(mat, fLOG=buf.fprint) km = ConstraintKMeans(n_clusters=2, verbose=5, kmeans0=False, random_state=1, strategy='weights') km.fit(mat, fLOG=buf.fprint) self.assertEqual(km.cluster_centers_.shape, (2, 2)) self.assertLesser(km.inertia_, 4.55) self.assertEqual(km.cluster_centers_, numpy.array([[0.6, 0.6], [-0.05, -0.05]])) self.assertEqual(km.labels_, numpy.array([1, 0, 1, 0])) pred = km.predict(mat) self.assertEqual(pred, numpy.array([1, 1, 1, 0])) dist = km.transform(mat) self.assertEqual(dist.shape, (4, 2)) score = km.score(mat) self.assertEqual(score.shape, (4, )) self.assertIn("CKMeans", str(buf))
def test_convert_notebook(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") temp = get_temp_folder(__file__, "temp_convert_notebook") source = os.path.join(temp, "..", "data", "td1a_unit_test_ci.ipynb") fold = 'static' if not os.path.exists(fold): os.mkdir(fold) for tpl in ['rst', 'display_priority', 'null']: sty = os.path.join(fold, '%s.tpl' % tpl) sr = os.path.join(temp, '..', 'data', '%s.tpl' % tpl) if not os.path.exists(sr): raise FileNotFoundError(sr) if not os.path.exists(sty): shutil.copy(sr, fold) if not os.path.exists('%s.tpl' % tpl): shutil.copy(sr, '.') with self.subTest(cmd="convert_notebook"): st = BufferedPrint() main(args=["convert_notebook", "-f", source, "-o", temp, "-b", temp, '-fo', 'rst,html'], fLOG=st.fprint) res = str(st) self.assertIn("convert into 'rst'", res) with self.subTest(cmd="run_notebook"): outname = os.path.join(temp, "out_nb.ipynb") st = BufferedPrint() main(args=['run_notebook', '-f', source, '-o', outname], fLOG=st.fprint) res = str(st) self.assertExists(outname) source = outname temp2 = get_temp_folder(__file__, "temp_convert_notebook_next") st = BufferedPrint() main(args=["convert_notebook", "-f", source, "-o", temp2, "-b", temp2, '-fo', 'rst,html'], fLOG=st.fprint) res = str(st) self.assertIn("convert into 'rst'", res) self.assertExists(os.path.join(temp2, "out_nb2html.html"))
def test_sphinx_rst(self): "sphinx rst" st = BufferedPrint() temp = get_temp_folder(__file__, "temp_sphinx_rst") name = os.path.join(temp, "..", "data", "glossary.rst") out = os.path.join(temp, "out") res = main(args=['sphinx_rst', '-i', name, '-o', out], fLOG=st.fprint) out += ".html" self.assertExists(out)
def test_buffered_log(self): def do_something(fLOG=None): if fLOG: fLOG("Did something.") return 3 buf = BufferedPrint() do_something(fLOG=buf.fprint) self.assertEqual(str(buf), "Did something.\n")
def test_cli_csv_perm(self): temp = get_temp_folder(__file__, "temp_cli_csv_perm") name = os.path.join(temp, "res.csv") st = BufferedPrint() main(args=["einsum_test", "--equation", "abc,cd->ad", "--output", name, "--shape", "(5,5,5);(5,5)", "--verbose", "0", "--perm", "1"], fLOG=st.fprint) self.assertExists(name) res = str(st) self.assertIn("wrote", res)
def test_cli(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") st = BufferedPrint() main(args=["read_folder", "--help"], fLOG=st.fprint) res = str(st) self.assertIn("usage: read_folder", res) st = BufferedPrint() fold = os.path.dirname(dummy_ct1()) temp = get_temp_folder(__file__, "temp_cli") dest = os.path.join(temp, "example.xlsx") main(args=["read_folder", "-f", fold, '-r', 'ct1', '--out', dest], fLOG=st.fprint) res = str(st) self.assertExists(dest)
def test_zoom_img_do(self): temp = get_temp_folder(__file__, 'temp_img_zoom') dest = os.path.join(temp, '{}') data = os.path.join(temp, '..', 'data', '*.png') st = BufferedPrint() win = main(args=['zoom_img', '-f', '0.5', '--img', data, '-o', dest], fLOG=st.fprint) res = str(st) self.assertNotIn("usage: zoom_img", res) self.assertIn("Writing '", res)
def test_cli_benchmark_replay(self): temp = get_temp_folder(__file__, "temp_benchmark_replay") out1 = os.path.join(temp, "raw.csv") st = BufferedPrint() out1 = os.path.join(temp, "raw.csv") st = BufferedPrint() main(args=["validate_runtime", "--n_features", "4", "-nu", "3", "-re", "3", "-o", "11", "-op", "11", "-v", "2", "--out_raw", out1, "-b", "0", "--runtime", "python_compiled", "--models", "KMeans", "--dtype", "32", "--dump_all", '1', '--dump_folder', temp], fLOG=st.fprint) out = os.path.join(temp, "res.xlsx") main(args=["benchmark_replay", "--folder", temp, "--out", out, '--verbose', '2'], fLOG=st.fprint) res = str(st) self.assertExists(out) self.assertIn("'folder'", res)
def test_cli_ftp_comma(self): st = BufferedPrint() try: main(args=['ftp_upload', '-f', 'a,b', '-d', 'www/', '-ho', 'ftp.xavierdupre.fr', '-u', 'user', '--pwd', '***', '-ft', '1'], fLOG=st.fprint) except socket.gaierror: # expected return
def test_cli_validate_model_dump(self): fLOG(OutputPrint=__name__ == "__main__") temp = get_temp_folder(__file__, "temp_validate_model_dump") out1 = os.path.join(temp, "raw.csv") out2 = os.path.join(temp, "sum.csv") graph = os.path.join(temp, 'benchmark.png') st = BufferedPrint() models = ','.join([ "LinearRegression", "LogisticRegression", "DecisionTreeRegressor", # "DecisionTreeClassifier", ]) # ~ models = ','.join([ #~ 'KMeans', #~ 'LGBMClassifier', #~ 'LGBMRegressor', #~ 'LinearSVC', #~ 'LogisticRegression', #~ 'MLPClassifier', #~ 'MLPRegressor', #~ 'RandomForestClassifier', #~ 'Perceptron', #~ 'RandomForestClassifier', #~ 'Ridge', #~ 'SGDRegressor', #~ 'RandomForestRegressor', # ~ ]) args = [ "validate_runtime", "--out_raw", out1, "--out_summary", out2, "--models", models, '-r', "python,onnxruntime1", '-o', '10', '-op', '10', '-v', '1', '-b', '1', '-dum', '1', '-du', temp, '-n', '20,100,500', '--out_graph', graph, '--dtype', '32' ] cmd = "python -m mlprodict " + " ".join(args) fLOG(cmd) main(args=args, fLOG=fLOG if __name__ == "__main__" else st.fprint) names = os.listdir(temp) names = [_ for _ in names if "dump-i-" in _] self.assertNotEmpty(names) for i, name in enumerate(names): fLOG("{}/{}: {}".format(i + 1, len(names), name)) fullname = os.path.join(temp, name) with open(fullname, 'rb') as f: pkl = pickle.load(f) root = os.path.splitext(fullname)[0] with open(root + '.onnx', 'wb') as f: f.write(pkl['onnx_bytes']) with open(root + '.data.pkl', 'wb') as f: pickle.dump(pkl['Xort_test'], f) with open(root + '.ypred.pkl', 'wb') as f: pickle.dump(pkl['ypred'], f) with open(root + '.skl.pkl', 'wb') as f: pickle.dump(pkl['skl_model'], f)
def test_sphinx_rst_notoctree(self): "sphinx rst toctree" st = BufferedPrint() temp = get_temp_folder(__file__, "temp_sphinx_rst_notoctree") name = os.path.join(temp, "..", "data", "piecewise_notoc.rst") out = os.path.join(temp, "out") res = main(args=['sphinx_rst', '-i', name, '-o', out], fLOG=st.fprint) out += ".html" self.assertExists(out) with open(out, "r", encoding="utf-8") as f: content = f.read() self.assertIn("pour finalement illustrer", content)
def test_main(self): from tkinter import TclError st = BufferedPrint() try: win = main(args=['--GUITEST'], fLOG=st.fprint) except TclError as e: # probably run from a remote machine warnings.warn(str(e)) return res = str(st) self.assertNotIn("python -m pyquickhelper <command> --help", res) self.assertNotEmpty(win)
def test_convert_notebook2(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") temp = get_temp_folder(__file__, "temp_convert_notebook2") source = os.path.join(temp, "..", "data", "onnx_tree_ensemble_parallel.ipynb") with self.subTest(cmd="convert_notebook"): st = BufferedPrint() main(args=[ "convert_notebook", "-f", source, "-o", temp, "-b", temp, '-fo', 'rst,html' ], fLOG=st.fprint) res = str(st) self.assertIn("convert into 'rst'", res) with self.subTest(cmd="run_notebook"): outname = os.path.join(temp, "out_nb.ipynb") st = BufferedPrint() main(args=['run_notebook', '-f', source, '-o', outname], fLOG=st.fprint) res = str(st) self.assertExists(outname) source = outname temp2 = get_temp_folder(__file__, "temp_convert_notebook2_next") st = BufferedPrint() main(args=[ "convert_notebook", "-f", source, "-o", temp2, "-b", temp2, '-fo', 'rst,html' ], fLOG=st.fprint) res = str(st) self.assertIn("convert into 'rst'", res) self.assertExists(os.path.join(temp2, "out_nb2html.html"))
def test_cli_validate_bench_doc(self): temp = get_temp_folder(__file__, "temp_bench_doc") out1 = os.path.join(temp, "raw.xlsx") out2 = os.path.join(temp, "sum.csv") st = BufferedPrint() main(args=[ "benchmark_doc", "-o", out1, "-ou", out2, "-w", "LinearRegression", '-d', temp, '-r', 'python_compiled' ], fLOG=st.fprint) res = str(st) self.assertIn('Linear', res) self.assertExists(out1) self.assertExists(out2)
def test_cli_asv2csv(self): temp = get_temp_folder(__file__, "temp_asv2csv") file_zip = os.path.join(TestCliAsvBench.data, 'results.zip') unzip_files(file_zip, temp) data = os.path.join(temp, 'results') out = os.path.join(temp, "data.csv") st = BufferedPrint() main(args=["asv2csv", "-f", data, "-o", out], fLOG=st.fprint) self.assertExists(out) df = pandas.read_csv(out) self.assertEqual(df.shape, (168, 66)) out = os.path.join(temp, "data<date>.csv") main(args=["asv2csv", "-f", data, "-o", out], fLOG=st.fprint)
def test_cli_validate_model_lightgbm(self): temp = get_temp_folder(__file__, "temp_validate_model_lgbm_csv") out1 = os.path.join(temp, "raw.csv") out2 = os.path.join(temp, "sum.csv") st = BufferedPrint() main(args=[ "validate_runtime", "--out_raw", out1, "--out_summary", out2, "--models", "LGBMClassifier", '-o', '10', '-op', '10', '-v', '1', '-b', '1' ], fLOG=st.fprint) res = str(st) self.assertIn('LGBMClassifier', res) self.assertExists(out1) self.assertExists(out2)
def test_cli_validate_model_csv_bug(self): temp = get_temp_folder(__file__, "temp_validate_model_csv_bug") out1 = os.path.join(temp, "raw.csv") out2 = os.path.join(temp, "sum.csv") st = BufferedPrint() main(args=[ "validate_runtime", "--out_raw", out1, "--out_summary", out2, "--models", "AgglomerativeClustering", '-o', '10', '-op', '10', '-v', '1', '-b', '1' ], fLOG=st.fprint) res = str(st) self.assertIn('AgglomerativeClustering', res) self.assertExists(out1) self.assertExists(out2)
def test_cli_plot_onnx(self): temp = get_temp_folder(__file__, "temp_cli_plot_onnx") name = os.path.join(temp, "..", "..", "ut_tools", "data", "fft2d_any.onnx") self.assertExists(name) for fmt in ['simple', 'dot', 'io', 'raw']: with self.subTest(fmt=fmt): output = os.path.join(temp, "code_%s.py" % fmt) st = BufferedPrint() main(args=[ "plot_onnx", "--filename", name, '--format', fmt, "--output", output, "--verbose", "1" ], fLOG=st.fprint) self.assertExists(output)
def test_cli_validate_model(self): temp = get_temp_folder(__file__, "temp_validate_model") out1 = os.path.join(temp, "raw.xlsx") out2 = os.path.join(temp, "sum.xlsx") st = BufferedPrint() main(args=[ "validate_runtime", "--out_raw", out1, "--out_summary", out2, "--models", "LogisticRegression,LinearRegression", '-o', '10', '-op', '10', '-v', '1', '-b', '1', '-t', '{"1":{"number":10,"repeat":10},"10":{"number":5,"repeat":5}}' ], fLOG=st.fprint) res = str(st) self.assertIn('Linear', res) self.assertExists(out1) self.assertExists(out2)
def test_cli_onnx_code_numpy(self): temp = get_temp_folder(__file__, "temp_cli_onnx_code_numpy") name = os.path.join(temp, "..", "..", "ut_tools", "data", "fft2d_any.onnx") self.assertExists(name) output = os.path.join(temp, "code_numpy.py") st = BufferedPrint() main(args=[ "onnx_code", "--filename", name, '--format', 'numpy', "--output", output, "--verbose", "1" ], fLOG=st.fprint) self.assertExists(output) with open(output, "r", encoding='utf-8') as f: content = f.read() self.assertIn("def numpy_", content)
def test_cli_validate_model_csv_bug(self): temp = get_temp_folder(__file__, "temp_validate_model_csv_bug") out1 = os.path.join(temp, "raw.csv") out2 = os.path.join(temp, "sum.csv") st = BufferedPrint() self.assertRaise( lambda: main(args=[ "validate_runtime", "--out_raw", out1, "--out_summary", out2, "--models", "AgglomerativeClustering", '-o', '10', '-op', '10', '-v', '0', '-b', '1' ], fLOG=st.fprint), RuntimeError, "No result produced by the benchmark.") res = str(st) self.assertEmpty(res) self.assertExists(out1) self.assertNotExists(out2)
def test_cli_convert_validater_pkl_nodata(self): temp = get_temp_folder(__file__, "temp_cli_convert_validate_pkl_nodata") monx = os.path.join(temp, "gpr.onnx") pkl = os.path.join(temp, "booster.pickle") if not os.path.exists(pkl): return st = BufferedPrint() res = convert_validate(pkl=pkl, data=None, verbose=0, method="predict,predict_proba", name="output_label,output_probability", outonnx=monx, fLOG=st.fprint) res = str(st) self.assertNotIn("[convert_validate] compute predictions", res)
def test_cli_convert_validater_float64_gpr(self): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, _ = train_test_split(X, y, random_state=11) clr = GaussianProcessRegressor() clr.fit(X_train, y_train) temp = get_temp_folder(__file__, "temp_cli_convert_validate_float64_gpr") monx = os.path.join(temp, "gpr.onnx") data = os.path.join(temp, "data.csv") pandas.DataFrame(X_test).to_csv(data, index=False) pkl = os.path.join(temp, "model.pkl") with open(pkl, "wb") as f: pickle.dump(clr, f) try: res = convert_validate( pkl=pkl, data=data, verbose=0, method="predict", name="GPmean", use_double='float64', options="{GaussianProcessRegressor:{'optim':'cdist'}}") except RuntimeError as e: if "requested version 10 < 11 schema version" in str(e): return raise e self.assertNotEmpty(res) st = BufferedPrint() args = [ "convert_validate", "--pkl", pkl, '--data', data, '--method', "predict", '--name', "GPmean", '--verbose', '1', '--use_double', 'float64', '--options', "{GaussianProcessRegressor:{'optim':'cdist'}}", '--outonnx', monx ] main(args, fLOG=st.fprint) res = str(st) self.assertExists(monx) with open(monx, 'rb') as f: model = onnx.load(f) self.assertIn('CDist', str(model))