def set_central_cache(cache, manager=None, use_cache_proxy_locally=True): """Set the pool cache to use a cache object. The cache object lives in a single process, which is then accessed from workers using IPC. This is suitable for all types of cache objects, particularly it is important for a MemoryCache. In general this is a more secure solution, but it requires all data to support pickle. It has to be noted that the cache lives in a single process and relies on IPC to work, thus this solution may possibly be outperformed by set_local_cache_constructor. PoolManager instance can be provided via manager. If None, PollManager will be created and started. The manager instance is returned. The use_cache_proxy_locally controls if cache is proxied when used locally (which should be the safe choice). """ global _worker_config if manager is None: manager = PoolManager() manager.start() if _worker_config is None: _worker_config = {} cache_proxy = manager.CacheProxy(cache) _worker_config.update(dict(cache=cache_proxy)) if use_cache_proxy_locally: set_cache(cache_proxy) else: set_cache(cache) return manager
def test_from_with_cache(self, httpserver): import liquer.ext.lq_hxl # register HXL commands and state type test_hxl = open( os.path.dirname(inspect.getfile(self.__class__)) + "/test_hxl.csv").read() httpserver.expect_request("/test_hxl.csv").respond_with_data(test_hxl) url = encode_token(httpserver.url_for("/test_hxl.csv")) query = f"hxl_from-{url}" with tempfile.TemporaryDirectory() as cachepath: set_cache(FileCache(cachepath)) state = evaluate(query) data = state.get() assert data.columns[0].header == "a" assert data.columns[0].display_tag == "#indicator+num+aaa" assert data.columns[1].header == "b" assert data.columns[1].display_tag == "#indicator+num+bbb" state = evaluate(query) data = state.get() assert data.columns[0].header == "a" assert data.columns[0].display_tag == "#indicator+num+aaa" assert data.columns[1].header == "b" assert data.columns[1].display_tag == "#indicator+num+bbb" set_cache(None)
def test_recipe_spec_store(self): import liquer.store as st import liquer.recipes as r from liquer.cache import MemoryCache, set_cache, get_cache reset_command_registry() set_cache(None) @first_command def hello(): return "Hello" store = r.RecipeSpecStore(st.MemoryStore()) store.store( "results/recipes.yaml", b""" subdir: - hello/hello.txt """, {}, ) assert "results/subdir/hello.txt" in store.recipes() assert store.contains("results") assert store.contains("results/subdir") assert store.contains("results/subdir/hello.txt") assert store.is_dir("results") assert store.is_dir("results/subdir") assert not store.is_dir("results/subdir/hello.txt") assert store.get_bytes("results/subdir/hello.txt") == b"Hello"
def _evaluate_and_save_worker(query, target_directory, target_file, worker_config): print(f"Evaluate and save worker started for {query}") set_cache(get_cache(worker_config)) evaluate_and_save(query, target_directory=target_directory, target_file=target_file) return f"Done evaluate and save {query}"
def test_vars(self): set_cache(NoCache()) import liquer.ext.basic assert evaluate("state_variable-abc").get() is None assert evaluate("let-abc-1/state_variable-abc").get() == "1" assert evaluate("state_variable-abc").get() is None set_var("abc", "123") assert evaluate("state_variable-abc").get() == "123" assert evaluate("let-abc-1/state_variable-abc").get() == "1"
def test_recipe_store(self): import liquer.store as st import liquer.recipes as r from liquer.cache import MemoryCache, set_cache, get_cache reset_command_registry() set_cache(None) @first_command def hello(): return "Hello" store = r.RecipeStore(st.MemoryStore()) store.mount_recipe("my/hello.txt", "hello") assert store.contains("my/hello.txt") assert store.get_bytes("my/hello.txt") == b"Hello"
def test_get_stored_metadata(self): from liquer.store import set_store, MemoryStore from liquer.cache import set_cache, MemoryCache, NoCache store = MemoryStore() store.store_metadata("a/b", dict(test="stored value 1")) set_store(store) cache = MemoryCache() cache.store_metadata(dict(query="c/d", test="stored value 2")) set_cache(cache) assert get_stored_metadata("-R/a/b")["test"] == "stored value 1" assert get_stored_metadata("a/b") is None # this represents a query assert get_stored_metadata("c/d")["test"] == "stored value 2" set_store(None) set_cache(NoCache())
def test_append_with_cache(self): import liquer.ext.lq_pandas # register pandas commands and state type with tempfile.TemporaryDirectory() as cachepath: set_cache(FileCache(cachepath)) filename = encode_token(os.path.dirname( inspect.getfile(self.__class__))+"/test.csv") df = evaluate(f"df_from-{filename}/append_df-{filename}").get() assert "a" in df.columns assert "b" in df.columns assert list(df.a) == [1, 3, 1, 3] assert list(df.b) == [2, 4, 2, 4] df = evaluate(f"df_from-{filename}/append_df-{filename}").get() assert "a" in df.columns assert "b" in df.columns assert list(df.a) == [1, 3, 1, 3] assert list(df.b) == [2, 4, 2, 4] set_cache(None)
def set_local_cache_constructor(constructor, arg=None, kwarg=None): """Set the pool cache to use a locally constructed cache. This means that each pool worker will construct its cache locally applying arguments arg and keyword arguments kwarg to the constructor. This is suitable for FileCache (more or less) or a server-based cache (e.g. SQLCache with a database server), but it is not suitable for MemoryCache, since each worker will have its own memory cache and thus the cache will not be shared. In general this is a less secure solution. Even for FileCache there might be collisions if multiple workers try to access the same file. The set_central_cache should be a safe alternative. """ global _worker_config if _worker_config is None: _worker_config = {} _worker_config.update( dict(cache_constructor=constructor, cache_arg=arg, cache_kwarg=kwarg)) cache = constructor(*arg, **kwarg) set_cache(cache)
def test_cache_control(self): from liquer.cache import MemoryCache, set_cache, get_cache @command def cached(state): return state.with_caching(True).with_data(123) @command def cache_off(state): return state.with_caching(False).with_data(234) set_cache(MemoryCache()) assert evaluate("cached").get() == 123 assert evaluate("cache_off").get() == 234 assert get_cache().contains("cached") assert not get_cache().contains("cache_off") set_cache(None) reset_command_registry()
def test_recipe_error_in_query_metadata(self): import liquer.store as st import liquer.recipes as r from liquer.cache import MemoryCache, set_cache, get_cache reset_command_registry() set_cache(None) @first_command def hello(): raise Exception("Hello error") @command def world(x): return str(x) + "world" store = r.RecipeSpecStore(st.MemoryStore()) set_store(store) store.store( "results/recipes.yaml", b""" subdir: - hello/hello.txt """, {}, ) assert "results/subdir/hello.txt" in store.recipes() try: assert store.get_bytes("results/subdir/hello.txt") except KeyNotFoundStoreException: pass assert store.get_metadata("results/subdir/hello.txt")["is_error"] assert store.get_metadata( "results/subdir/hello.txt")["log"][-1]["message"] == "Hello error" child_messages = [ x["message"] for x in evaluate("results/subdir/hello.txt/-/world/hello.txt"). metadata["child_log"] ] print(child_messages) assert "Hello error" in child_messages set_store(None) reset_command_registry()
def test_cache_control(self): from liquer.cache import MemoryCache, set_cache, get_cache @first_command def cached(context): context.enable_cache() return 123 @command def cache_off(x, context): context.disable_cache() return 234 set_cache(MemoryCache()) assert evaluate("cached").get() == 123 assert evaluate("cache_off").get() == 234 assert get_cache().contains("cached") assert not get_cache().contains("cache_off") set_cache(None) reset_command_registry()
import liquer.ext.lq_pandas import pandas as pd from liquer.cache import MemoryCache, FileCache, SQLCache, set_cache # Setting cache @first_command(my_cache="a") def square(count=10): df = pd.DataFrame() for i in range(count): print("square", i) df = df.append(dict(x=i,y=i*i), ignore_index=True) return df @first_command(my_cache="b") def cube(count=10): df = pd.DataFrame() for i in range(count): print("cube", i) df = df.append(dict(x=i,y=i*i*i), ignore_index=True) return df if __name__ == "__main__": set_cache( FileCache("cache_a").if_attribute_equal("my_cache","a") + SQLCache.from_sqlite("cache_b.db").if_attribute_equal("my_cache","b") + MemoryCache() # Everything else ) evaluate_and_save("square/square.csv") evaluate_and_save("cube/cube.csv") evaluate_and_save("square/square.csv") # from cache a evaluate_and_save("cube/cube.csv") # from cache b
def cvrdf(cvr, keep_multiline_values=False, day="TODAY", size=2999): """ Get the document for a specified CVR number as DataFrame. Document has to be found in an index (obtained by the 'index' function taking day and size parameters). If keep_multiline_values is True, arguments with multiline strings are kept, otherwise they are removed (default). """ register_df = register(day=day, size=size) register_df = register_df.loc[register_df.cvrNummer.map(str) == str(cvr), :] df = pd.DataFrame() for index, row in register_df.iterrows(): cvr = str(row.cvrNummer) doc = evaluate(f"ns-{NS}/cvr-{cvr}-xml-{day}-{size}/tojson").get() cvr_df = json2df(doc, keep_multiline_values=keep_multiline_values, init=dict(row)) df = df.append(cvr_df, ignore_index=True) return df if __name__ == "__main__": import liquer.ext.basic import liquer.ext.lq_pandas from liquer.cache import FileCache, set_cache set_cache(FileCache("cache")) register_df = register() cvr = register_df.cvrNummer[0] evaluate_and_save(f"ns-virk/cvrdf-{cvr}/data_{cvr}.csv")
from liquer import * import pandas as pd import numpy as np import liquer.ext.basic import liquer.ext.meta import liquer.ext.lq_pandas # Add pandas support to liquer so that the dataframe conversions work from liquer.store import web_mount_folder from liquer.cache import set_cache, MemoryCache web_mount_folder("gui", "dist/liquer/web/gui") ### Create Flask app and register LiQuer blueprint from flask import Flask import liquer.server.blueprint as bp app = Flask(__name__) set_cache(MemoryCache()) url_prefix = '/liquer' app.register_blueprint(bp.app, url_prefix=url_prefix) @first_command(volatile=True) def hello(): return "Hello" @app.route('/') @app.route('/index.html') def index(): return """<h1>Hello-world app</h1> <ul>
# Modules import liquer.ext.basic import liquer.ext.lq_pandas import liquer.ext.lq_matplotlib import liquer.ext.lq_plotly app = Flask(__name__) # Registering the liquer blueprint under a given url prefix and letting LiQuer know where it is... url_prefix = '/liquer' app.register_blueprint(bp.app, url_prefix=url_prefix) set_var("api_path", url_prefix + "/q/") set_var("server", "http://localhost:5000") # Setting the cache set_cache(FileCache("../cache")) #set_cache(MemoryCache()) wfp_url = "https://data.humdata.org/dataset/4fdcd4dc-5c2f-43af-a1e4-93c9b6539a27/resource/12d7c8e3-eff9-4db0-93b7-726825c4fe9a/download/wfpvam_foodprices.csv" #wfp_url = "https://raw.githubusercontent.com/orest-d/liquer/master/tests/test.csv" wfp_query = "df_from-" + encode_token(wfp_url) @command def datemy(df, y="mp_year", m="mp_month", target="date"): df.loc[:, target] = [ "%04d-%02d-01" % (int(year), int(month)) for year, month in zip(df[y], df[m]) ] return df
def _evaluate_worker(query, worker_config): print(f"Evaluate worker started for {query}") set_cache(get_cache(worker_config)) evaluate(query) return f"Done evaluating {query}"