def test_columns_info(self): import liquer.ext.lq_pandas # register pandas commands and state type from liquer.state_types import encode_state_data, decode_state_data filename = encode_token(os.path.dirname( inspect.getfile(self.__class__))+"/test.csv") assert evaluate(f"df_from-{filename}/df_columns").get() == ["a", "b"] assert evaluate( f"df_from-{filename}/columns_info").get()["columns"] == ["a", "b"] assert evaluate( f"df_from-{filename}/columns_info").get()["has_tags"] == False assert evaluate( f"df_from-{filename}/columns_info").get()["types"]["a"].startswith("int") filename = encode_token(os.path.dirname( inspect.getfile(self.__class__))+"/test_hxl.csv") assert evaluate(f"df_from-{filename}/df_columns").get() == ["a", "b"] assert evaluate( f"df_from-{filename}/columns_info").get()["columns"] == ["a", "b"] assert evaluate( f"df_from-{filename}/columns_info").get()["has_tags"] == True assert evaluate( f"df_from-{filename}/columns_info").get()["tags"]["a"] == "#indicator +num +aaa" assert evaluate( f"df_from-{filename}/columns_info").get()["tags"]["b"] == "#indicator +num +bbb" info = evaluate(f"df_from-{filename}/columns_info").get() b, mime, tid = encode_state_data(info) assert info == decode_state_data(b, tid)
def test_from_with_cache(self, httpserver): import liquer.ext.lq_hxl # register HXL commands and state type test_hxl = open( os.path.dirname(inspect.getfile(self.__class__)) + "/test_hxl.csv").read() httpserver.expect_request("/test_hxl.csv").respond_with_data(test_hxl) url = encode_token(httpserver.url_for("/test_hxl.csv")) query = f"hxl_from-{url}" with tempfile.TemporaryDirectory() as cachepath: set_cache(FileCache(cachepath)) state = evaluate(query) data = state.get() assert data.columns[0].header == "a" assert data.columns[0].display_tag == "#indicator+num+aaa" assert data.columns[1].header == "b" assert data.columns[1].display_tag == "#indicator+num+bbb" state = evaluate(query) data = state.get() assert data.columns[0].header == "a" assert data.columns[0].display_tag == "#indicator+num+aaa" assert data.columns[1].header == "b" assert data.columns[1].display_tag == "#indicator+num+bbb" set_cache(None)
def test_ridge(self): import importlib import liquer.ext.basic import liquer.ext.lq_pandas import liquer.ext.lq_sklearn_regression from liquer.commands import reset_command_registry reset_command_registry() # prevent double-registration # Hack to enforce registering of the commands importlib.reload(liquer.ext.basic) importlib.reload(liquer.ext.lq_pandas) importlib.reload(liquer.ext.lq_sklearn_regression) @first_command def test1(): return pd.DataFrame(dict(x=[1, 2, 3], Y=[10, 20, 30])) @first_command def test2(): return pd.DataFrame(dict(x=[1, 2, 3], Y=[30, 40, 50])) @first_command def test3(): return pd.DataFrame(dict(x1=[1, 2, 3], x2=[0, 0, 1], Y=[30, 40, 55])) df = evaluate("ns-sklearn/test1/ridge").get() assert list(df.x)[0] == pytest.approx(9.52381) df = evaluate("ns-sklearn/test2/ridge").get() assert list(df.x)[0] == pytest.approx(9.52381) assert list(df.intercept)[0] == pytest.approx(20.952381) df = evaluate("ns-sklearn/test3/ridge").get() assert list(df.x1)[0] == pytest.approx(9.562842) assert list(df.x2)[0] == pytest.approx(4.918033) assert list(df.intercept)[0] == pytest.approx(20.901639)
def test_vars(self): import liquer.ext.basic assert evaluate("state_variable-abc").get() is None assert evaluate("let-abc-1/state_variable-abc").get() == "1" assert evaluate("state_variable-abc").get() is None set_var("abc", "123") assert evaluate("state_variable-abc").get() == "123" assert evaluate("let-abc-1/state_variable-abc").get() == "1"
def test_image(self): store = MemoryStore() set_store(store) @first_command def image(): return Image.new(mode="RGB", size=(200, 300)) assert evaluate("image").get().size == (200,300) evaluate_and_save("image/ns-pil/resize-400-600-bilinear/test.png",target_resource_directory="x") image = evaluate("x/test.png/-/dr").get() assert image.size == (400,600)
def test_set_tags(self): import liquer.ext.lq_hxl # register HXL commands and state type import liquer.ext.lq_pandas # register pandas commands and state type filename = encode_token(os.path.dirname( inspect.getfile(self.__class__))+"/test.csv") data = evaluate(f"df_from-{filename}/set_tags-b-indicator+b-a-indicator+a/df2hxl").get() assert data.columns[0].header == "a" assert data.columns[0].display_tag == "#indicator+a" assert data.columns[1].header == "b" assert data.columns[1].display_tag == "#indicator+b" df = evaluate(f"df_from-{filename}/set_tags-b-indicator+b").get() assert list(df.a) == ["",1,3] assert list(df.b) == ["#indicator+b",2,4]
def test_teq(self): import liquer.ext.lq_pandas # register pandas commands and state type filename = encode_token(os.path.dirname( inspect.getfile(self.__class__))+"/test_hxl.csv") state = evaluate(f"df_from-{filename}/teq-a-1") df = state.get() assert "a" in df.columns assert "b" in df.columns assert list(df.a) == ["#indicator +num +aaa", "1"] assert list(df.b) == ["#indicator +num +bbb", "2"] df = evaluate(f"df_from-{filename}/teq-a-3-b-4").get() assert list(df.a) == ["#indicator +num +aaa", "3"] assert list(df.b) == ["#indicator +num +bbb", "4"] df = evaluate(f"df_from-{filename}/teq-a-1-b-4").get() assert list(df.a) == ["#indicator +num +aaa"] assert list(df.b) == ["#indicator +num +bbb"]
def test_head(self): import liquer.ext.lq_pandas # register pandas commands and state type filename = encode_token(os.path.dirname( inspect.getfile(self.__class__))+"/test.csv") df = evaluate(f"df_from-{filename}/head_df-1").get() assert list(df.a) == [1] assert list(df.b) == [2]
def split_df(state, *columns): """Split of dataframe by columns Creates a dataframe with unique (combinations of) value from supplied columns and queries to obtain the corresponding filtered dataframes from the original dataframe. This behaves like qsplit_df, with two important differenced: - each generated query is evaluated (and thus eventually cached) - link is generated and put into link column (state variable link_column) The split_link_type state variable is used to determine the link type; url by default. """ from liquer.parser import parse state = qsplit_df(state, *columns) df = state.get().copy() query_column = state.vars.get("query_column") if query_column is None: query_column = "query" link_column = state.vars.get("link_column") if link_column is None: link_column = "link" split_link_type = state.vars.get("split_link_type") if split_link_type is None: split_link_type = "url" # df.loc[:,link_column] = [evaluate(encode(decode(q)+[["link",split_link_type]])).get() for q in df[query_column]] df.loc[:, link_column] = [ evaluate(parse(q).with_action("link", split_link_type).encode()).get() for q in df[query_column] ] return state.with_data(df)
def tsplit_df(state, *columns): """Split of dataframe by columns (version of split_df expecting a first row with tags)""" from liquer.parser import parse state = qtsplit_df(state, *columns) df = state.get().copy() query_column = state.vars.get("query_column") if query_column is None: query_column = "query" link_column = state.vars.get("link_column") if link_column is None: link_column = "link" split_link_type = state.vars.get("split_link_type") if split_link_type is None: split_link_type = "url" # df.loc[:,link_column] = [""]+[evaluate(encode(decode(q)+[["link",split_link_type]])).get() for q in list(df[query_column])[1:]] df.loc[:, link_column] = [""] + [ evaluate(parse(q).with_action("link", split_link_type).encode()).get() for q in list(df[query_column])[1:] ] return state.with_data(df)
def serve(query): """Main service for evaluating queries""" try: return response(evaluate(query)) except: traceback.print_exc() abort(500)
def test_tsplit(self): import importlib import liquer.ext.lq_pandas # register pandas commands and state type import liquer.ext.basic from liquer.commands import reset_command_registry reset_command_registry() # prevent double-registration # Hack to enforce registering of the commands importlib.reload(liquer.ext.lq_pandas) importlib.reload(liquer.ext.basic) set_var("server", "http://localhost") set_var("api_path", "/q/") filename = encode_token( os.path.dirname(inspect.getfile(self.__class__)) + "/test_hxl.csv") df = evaluate(f"df_from-{filename}/tsplit_df-a").get() assert "a" in df.columns assert "b" not in df.columns assert list(df.a) == ["#indicator +num +aaa", "1", "3"] assert list(df["query"])[1:] == [ f"df_from-{filename}/teq-a-1", f"df_from-{filename}/teq-a-3", ] assert list(df["link"])[1:] == [ f"http://localhost/q/df_from-{filename}/teq-a-1", f"http://localhost/q/df_from-{filename}/teq-a-3", ]
def test_work_with_parquet(self): filename = encode_token( os.path.dirname(inspect.getfile(self.__class__)) + "/test.csv") with TemporaryDirectory() as tmpdir: path = Path(tmpdir) / "test.parquet" evaluate_and_save(f"df_from-{filename}/test.parquet", target_directory=tmpdir) @first_command(volatile=True, cache=False) def execution_context(): ctx = daf.ExecutionContext() ctx.register_parquet("a", str(path)) return ctx @command def process(ctx): return ctx.sql("SELECT a, b, a+b AS c FROM a") evaluate_and_save(f"execution_context/process/result.parquet", target_directory=tmpdir) df = evaluate( f"execution_context/process/datafusion_to_pandas").get() assert "a" in df.columns assert "b" in df.columns assert "c" in df.columns assert list(df.a) == [1, 3] assert list(df.b) == [2, 4] assert list(df.c) == [3, 7]
def test_eq(self): import liquer.ext.lq_pandas # register pandas commands and state type filename = encode_token(os.path.dirname( inspect.getfile(self.__class__))+"/test.csv") state = evaluate(f"df_from-{filename}/eq-a-1") df = state.get() assert "a" in df.columns assert "b" in df.columns assert list(df.a) == [1] assert list(df.b) == [2] df = evaluate(f"df_from-{filename}/eq-a-3-b-4").get() assert list(df.a) == [3] assert list(df.b) == [4] df = evaluate(f"df_from-{filename}/eq-a-1-b-4").get() assert list(df.a) == [] assert list(df.b) == []
def test_append_with_cache(self): import liquer.ext.lq_pandas # register pandas commands and state type with tempfile.TemporaryDirectory() as cachepath: set_cache(FileCache(cachepath)) filename = encode_token(os.path.dirname( inspect.getfile(self.__class__))+"/test.csv") df = evaluate(f"df_from-{filename}/append_df-{filename}").get() assert "a" in df.columns assert "b" in df.columns assert list(df.a) == [1, 3, 1, 3] assert list(df.b) == [2, 4, 2, 4] df = evaluate(f"df_from-{filename}/append_df-{filename}").get() assert "a" in df.columns assert "b" in df.columns assert list(df.a) == [1, 3, 1, 3] assert list(df.b) == [2, 4, 2, 4] set_cache(None)
def test_qsplit2(self): import liquer.ext.lq_pandas # register pandas commands and state type filename = encode_token(os.path.dirname( inspect.getfile(self.__class__))+"/test.csv") df = evaluate(f"df_from-{filename}/qsplit_df-a-b").get() assert list(df.a) == [1, 3] assert list(df.b) == [2, 4] assert list(df["query"]) == [ f"df_from-{filename}/eq-a-1-b-2", f"df_from-{filename}/eq-a-3-b-4"]
def test_df2hxl(self): import liquer.ext.lq_hxl # register HXL commands and state type import liquer.ext.lq_pandas # register pandas commands and state type path = encode_token(os.path.dirname( inspect.getfile(self.__class__))+"/test_hxl.csv") data = evaluate(f"df_from-{path}/df2hxl").get() assert data.columns[0].header == "a" assert data.columns[0].display_tag == "#indicator+num+aaa" assert data.columns[1].header == "b" assert data.columns[1].display_tag == "#indicator+num+bbb"
def test_qsplit1(self): import liquer.ext.lq_pandas # register pandas commands and state type filename = encode_token(os.path.dirname( inspect.getfile(self.__class__))+"/test.csv") df = evaluate(f"df_from-{filename}/qsplit_df-a").get() assert "a" in df.columns assert "b" not in df.columns assert list(df.a) == [1, 3] assert list(df["query"]) == [ f"df_from-{filename}/eq-a-1", f"df_from-{filename}/eq-a-3"]
def test_append(self): import liquer.ext.lq_pandas # register pandas commands and state type filename = encode_token(os.path.dirname( inspect.getfile(self.__class__))+"/test.csv") state = evaluate(f"df_from-{filename}/append_df-{filename}") df = state.get() assert "a" in df.columns assert "b" in df.columns assert list(df.a) == [1, 3, 1, 3] assert list(df.b) == [2, 4, 2, 4]
def test_hxl2df(self, httpserver): import liquer.ext.lq_hxl # register HXL commands and state type test_hxl = open(os.path.dirname( inspect.getfile(self.__class__))+"/test_hxl.csv").read() httpserver.expect_request("/test_hxl.csv").respond_with_data(test_hxl) url = encode_token(httpserver.url_for("/test_hxl.csv")) df = evaluate(f"hxl_from-{url}/hxl2df").get() assert list(df.columns) == ["a", "b"] assert list(df.a[1:]) == ["1", "3"] assert list(df.b[1:]) == ["2", "4"]
def test_concat_recipe(self): import pandas as pd import liquer.ext.basic from liquer.commands import reset_command_registry reset_command_registry() # prevent double-registration importlib.reload(liquer.ext.basic) importlib.reload(liquer.ext.lq_pandas) import liquer.store as st @first_command def hello(offset=0): return pd.DataFrame( dict(a=[1 + offset, 2 + offset], b=[3 + offset, 4 + offset])) substore = st.MemoryStore() substore.store( "recipes.yaml", """ RECIPES: - filename: hello.parquet type: pandas_concat concat: - hello - query: hello-10 column: test value: extra """, {}, ) store = RecipeSpecStore(substore) set_store(store) assert "hello.parquet" in store.keys() df = evaluate("hello.parquet/-/dr").get() assert sorted(df.columns) == ["a", "b", "test"] assert list(df.a) == [1, 2, 11, 12] assert list(df.b) == [3, 4, 13, 14] assert list(df.test) == [None, None, "extra", "extra"] assert store.get_metadata( "hello.parquet")["status"] == Status.READY.value assert store.get_metadata( "hello.parquet")["recipes_key"] == "recipes.yaml" assert store.get_metadata("hello.parquet")["has_recipe"] == True assert store.get_metadata("hello.parquet")["recipes_directory"] == "" assert store.get_metadata("hello.parquet")[ "recipe_name"] == "recipes.yaml/-Ryaml/RECIPES/0#hello.parquet" assert store.get_metadata("hello.parquet")["data_characteristics"][ "description"] == "Dataframe with 3 columns and 4 rows." set_store(None) reset_command_registry()
def test_workbook(self): store = MemoryStore() set_store(store) filename = encode_token( os.path.dirname(inspect.getfile(self.__class__)) + "/test.csv") evaluate_and_save(f"df_from-{filename}/test.xlsx", target_resource_directory="testdir") df = evaluate("testdir/test.xlsx/-/workbook/workbook_sheet_df").get() assert "a" in df.columns assert "b" in df.columns assert list(df.a) == [1, 3] assert list(df.b) == [2, 4]
def get(self, query): """Main service for evaluating queries""" try: b, mimetype, filename = response(evaluate(query)) except: traceback.print_exc() self.set_status(500) self.finish(f"500 - Failed to create a respone to {query}") header = "Content-Type" body = mimetype self.set_header(header, body) self.write(b)
def test_pptx(self): store = MemoryStore() set_store(store) @first_command def make_presentation(): prs = Presentation() title_slide_layout = prs.slide_layouts[0] slide = prs.slides.add_slide(title_slide_layout) title = slide.shapes.title subtitle = slide.placeholders[1] title.text = "Hello, World!" subtitle.text = "python-pptx was here!" return prs @command def add_slide(prs, title="Title"): bullet_slide_layout = prs.slide_layouts[1] slide = prs.slides.add_slide(bullet_slide_layout) shapes = slide.shapes title_shape = shapes.title body_shape = shapes.placeholders[1] title_shape.text = title tf = body_shape.text_frame tf.text = 'Find the bullet slide layout' p = tf.add_paragraph() p.text = 'Use _TextFrame.text for first bullet' p.level = 1 return prs evaluate_and_save(f"make_presentation/add_slide/test.pptx", target_resource_directory="testdir") assert store.get_metadata( "testdir/test.pptx")["type_identifier"] == "pptx_presentation" prs = evaluate("testdir/test.pptx/-/dr/add_slide-Slide2").get() assert len(prs.slides) == 3
def test_dr(self): import pandas as pd import liquer.ext.basic from liquer.commands import reset_command_registry reset_command_registry() # prevent double-registration importlib.reload(liquer.ext.basic) importlib.reload(liquer.ext.lq_pandas) store = MemoryStore() set_store(store) store.store("data.csv", b"a,b\n1,2\n3,4", {}) df = evaluate("data.csv/-/dr").get() assert isinstance(df, pd.DataFrame) assert len(df) == 2 assert len(df.columns) == 2 assert "a" in df.columns assert "b" in df.columns assert list(df.a) == [1, 3] assert list(df.b) == [2, 4]
def test_link(self): import liquer.ext.basic assert evaluate("let-hello-world/state_variable-hello/link").get( ) == "let-hello-world/state_variable-hello" assert evaluate("let-hello-world/state_variable-hello/link-dataurl" ).get() == "data:text/plain;base64,d29ybGQ=" assert evaluate("let-hello-world/state_variable-hello/link-path").get( ) == "/q/let-hello-world/state_variable-hello" assert evaluate("let-hello-world/state_variable-hello/link-url").get( ) == "http://localhost/q/let-hello-world/state_variable-hello" set_var("server", "http://localhost:1234") set_var("api_path", "/liquer/q/") assert evaluate("let-hello-world/state_variable-hello/link-path").get( ) == "/liquer/q/let-hello-world/state_variable-hello" assert evaluate("let-hello-world/state_variable-hello/link-url").get( ) == "http://localhost:1234/liquer/q/let-hello-world/state_variable-hello" set_var("server", "http://localhost") set_var("api_path", "/q/")
def get(self, query): state = evaluate(query) state_json = state.as_dict() self.write(json.dumps(state_json))
def serve(query): """Main service for evaluating queries""" return response(evaluate(query))
def debug_json(query): """Debug query - returns metadata from a state after a query is evaluated""" state = evaluate(query) state_json = state.as_dict() return jsonify(state_json)
def test_state(self): from liquer.commands import command_registry state = evaluate("ns-meta/commands/state").get() assert state["query"] == "ns-meta/commands"