def get(self, key): c = self.connection.cursor() c.execute( f""" SELECT metadata, state_data FROM {self.table} WHERE query=? """, [key], ) try: metadata, data = c.fetchone() metadata = json.loads(metadata) if metadata.get("status") != "ready": return None except: return None try: state = State() state = state.from_dict(metadata) t = state_types_registry().get(state.type_identifier) state.data = t.from_bytes(self.decode(data)) return state except: logging.exception(f"Cache failed to recover {key}") return None
def evaluate_and_save(query, target_directory=None, target_file=None, cache=None): """Evaluate query and save result. Output is saved either to - a target directory (current working directory by default) to a file deduced from the query, or - to target_file (if specified) Returns a state. """ state = evaluate(query, cache=cache) data = state.get() reg = state_types_registry() t = reg.get(type(data)) path = target_file if path is None: if state.extension is None: b, mime, typeid = encode_state_data(data) path = t.default_filename() else: b, mime, typeid = encode_state_data(data, extension=state.extension) path = t.default_filename( ) if state.filename is None else state.filename if target_directory is not None: path = os.path.join(target_directory, path) with open(path, "wb") as f: f.write(b) return state
def response(state): """Create flask response from a State""" filename = state.metadata.get("filename") b, mimetype, type_identifier = encode_state_data(state.get(), extension=state.extension) if filename is None: filename = state_types_registry().get( type_identifier).default_filename() return b, mimetype, filename
def contains(self, key): state_path = self.to_path(key) if os.path.exists(state_path): state = State() state = state.from_dict(json.loads(open(state_path).read())) else: return False t = state_types_registry().get(state.type_identifier) path = self.to_path(key, prefix="data_", extension=t.default_extension()) if os.path.exists(path): return True else: return False
def remove(self, key): metadata = self.get_metadata(key) if metadata is None: return True if "type_identifier" in metadata: t = state_types_registry().get(metadata["type_identifier"]) path = self.to_path(key, prefix="data_", extension=t.default_extension()) if os.path.exists(path): os.remove(path) state_path = self.to_path(key) if os.path.exists(state_path): os.remove(state_path) return True
def store(self, state): if state.is_error: return None state.metadata["status"] = "ready" t = state_types_registry().get(state.type_identifier) path = self.to_path(state.query) if self.storage.is_supported(path): try: b, mime = t.as_bytes(state.data) metadata = dict(**state.metadata) metadata["mimetype"] = mime self.storage.store(path, b, metadata) return True except: return False return False
def get(self, key): state_path = self.to_path(key) if os.path.exists(state_path): state = State() state = state.from_dict(json.loads(open(state_path).read())) else: return None t = state_types_registry().get(state.type_identifier) path = self.to_path(key, prefix="data_", extension=t.default_extension()) if os.path.exists(path): try: state.data = t.from_bytes(open(path, "rb").read()) return state except: logging.exception(f"Cache failed to recover {key}") return None
def store(self, state): if state.is_error: return None state.metadata["status"] = "ready" if not self.store_metadata(state.metadata): return False t = state_types_registry().get(state.type_identifier) path = self.to_path( state.query, prefix="data_", extension=t.default_extension() ) with open(path, "wb") as f: try: b, mime = t.as_bytes(state.data) f.write(self.encode(b)) except NotImplementedError: return False return True
def store(self, state): try: with open(self.to_path(state.query), "w") as f: f.write(json.dumps(state.as_dict())) except: logging.exception(f"Cache writing error: {state.query}") return False t = state_types_registry().get(state.type_identifier) path = self.to_path(state.query, prefix="data_", extension=t.default_extension()) with open(path, "wb") as f: try: b, mime = t.as_bytes(state.data) f.write(b) except NotImplementedError: return False return True
def __init__( self, key, item_keys=None, extension="parquet", number_format="%04d", batch_number=0, store=None, ): self.key = key self.item_keys = item_keys or [] self.extension = extension self.number_format = number_format self.batch_number = batch_number self.state_type = state_types_registry().get("dataframe") if store is None: store = get_store() self.store = store
def get(self, query): """Main service for evaluating queries""" state = evaluate(query) filename = state.filename extension = None if filename is not None: if "." in filename: extension = filename.split(".")[-1] b, mimetype, type_identifier = encode_state_data(state.get(), extension=extension) if filename is None: filename = state_types_registry().get( type_identifier).default_filename() header = "Content-Type" body = mimetype self.set_header(header, body) self.write(b)
def get(self, key): metadata = self.get_metadata(key) if metadata is None: print(f"(FileCache) Metadata missing: {key}") return None if metadata.get("status") != "ready": print(f"(FileCache) Not ready {key}; ", metadata.get("status")) return None state = State() state.metadata = metadata t = state_types_registry().get(metadata["type_identifier"]) path = self.to_path(key, prefix="data_", extension=t.default_extension()) if os.path.exists(path): try: state.data = t.from_bytes(self.decode(open(path, "rb").read())) return state except: traceback.print_exc() logging.exception(f"Cache failed to recover {key}") return None
def response(state): """Create flask response from a State""" filename = state.filename extension = None if filename is not None: if "." in filename: extension = filename.split(".")[-1] b, mimetype, type_identifier = encode_state_data(state.get(), extension=extension) if filename is None: filename = state_types_registry().get( type_identifier).default_filename() r = make_response(b) r.headers.set('Content-Type', mimetype) if mimetype not in [ "application/json", 'text/plain', 'text/html', 'text/csv', 'image/png', 'image/svg+xml' ]: r.headers.set('Content-Disposition', 'attachment', filename=filename) return r
def response(state): """Create flask response from a State""" b, mimetype, type_identifier = encode_state_data(state.get(), extension=state.extension) filename = state.metadata.get("filename") if filename is None: filename = state_types_registry().get( type_identifier).default_filename() r = make_response(b) r.headers.set("Content-Type", mimetype) if mimetype not in [ "application/json", "text/plain", "text/html", "text/csv", "image/png", "image/svg+xml", ]: r.headers.set("Content-Disposition", "attachment", filename=filename) return r
def store(self, state): if state.is_error: return None state.metadata["status"] = "ready" key = state.query metadata = json.dumps(state.as_dict()) t = state_types_registry().get(state.type_identifier) try: b, mime = t.as_bytes(state.data) except NotImplementedError: return False self._available_keys = None if self.delete_before_insert: self.connection.execute(f"DELETE FROM {self.table} WHERE query=?", [key]) self.connection.execute( f"INSERT INTO {self.table} (query, metadata, state_data) VALUES (?, ?, ?)", [key, metadata, self.encode(b)], ) self.connection.commit() return True
def get(self, key): print(f"GET {key}") metadata = self.get_metadata(key) print(f" METADATA {metadata}") if metadata is None: print(f"(StoreCache) Metadata missing: {key}") return None if metadata.get("status") != "ready": print(f"(StoreCache) Not ready {key}; ", metadata.get("status")) return None state = State() state.metadata = metadata t = state_types_registry().get(metadata["type_identifier"]) path = self.to_path(key) if self.storage.contains(path): try: state.data = t.from_bytes(self.decode(self.storage.get_bytes(path))) return state except: traceback.print_exc() logging.exception(f"Cache failed to recover {key}") return None
def state_types_registry(self): return state_types_registry()
def dr(state, type_identifier=None, extension=None, context=None): """Decode resource Decodes the bytes into a data structure. This is meant to be used in connection to a resource query. Resource part of the query will typically fetch the data from a store and thus return bytes (together with metadata). Command dr will convert the bytes (assuming proper metadata are provided) into a data structure. The metadata must contain type_identifier in metadata or metadata['resource_metadata'], a filename with extension or extension with known decoding. """ from liquer.state_types import state_types_registry from liquer.parser import parse if state.data is None: context.error( f"Bytes expected, None received in dr from {state.query}") return if type_identifier is None: type_identifier = state.metadata.get( "type_identifier", state.metadata.get("resource_metadata", {}).get("type_identifier"), ) if type_identifier in (None, "bytes"): type_identifier = state.metadata.get("resource_metadata", {}).get("type_identifier") if extension is None: extension = state.metadata.get("extension") if extension is None: query = state.metadata.get("query") if query is not None: filename = parse(query).filename() if filename is not None: v = filename.split(".") if len(v) > 1: extension = v[-1] context.info(f"Extension: {extension} - from query '{query}'") else: key = state.metadata.get("resource_metadata", {}).get("key") if key is not None: filename = context.store().key_name(key) v = filename.split(".") if len(v) > 1: extension = v[-1] context.info(f"Extension: {extension} - from key '{key}'") if type_identifier in (None, "bytes"): type_identifier = type_identifier_from_extension(extension) context.info( f"Type identifier: {type_identifier} - from extension '{extension}'" ) if type_identifier is not None: if extension in ("parquet", "xlsx", "csv", "tsv") and type_identifier in ("generic", "dictionary", "pickle"): context.warning( f"Type identifier '{type_identifier}' seems to be inconsistent with the extension '{extension}'" ) context.warning( f"This might indicate a problem with executing the partent query '{context.parent_query}'" ) type_identifier = type_identifier_by_extension.get(extension) context.warning( f"To fix the inconsistency, type identifier: {type_identifier} is used from extension '{extension}'" ) context.info( f"Type identifier: {type_identifier}, Extension: {extension}") t = state_types_registry().get(type_identifier) data = t.from_bytes(state.data, extension=extension) return state.with_data(data) else: context.error(f"Decode resource (dr) command failed") raise Exception( f"Failed to resolve type for query {state.metadata.get('query')}") return state