def parse_arguments(self, args, resource_str: str): if resource_str is None: raise errors.IncompleteQuery("No resources are defined.") self.resources = resource_str.split(",") self.from_ = arg_get(args, "from", int, 0) self.size = arg_get(args, "size", int, 25) self.lexicon_stats = arg_get(args, "lexicon_stats", util_convert.str2bool, True) self.include_fields = arg_get(args, "include_fields", util_convert.str2list(",")) self.exclude_fields = arg_get(args, "exclude_fields", util_convert.str2list(",")) self.fields = [] self.format = arg_get(args, "format") self.format_query = arg_get(args, "format_query") self.q = arg_get(args, "q") or "" self.sort: List[str] = arg_get(args, "sort", util_convert.str2list(",")) or [] self.sort_dict: Dict[str, List[str]] = {} if not self.sort: if len(self.resources) == 1: self.sort = resourcemgr.get_resource( self.resources[0]).default_sort() else: for resource_id in self.resources: self.sort_dict[resource_id] = resourcemgr.get_resource( resource_id).default_sort() self.ast = query_dsl.parse(self.q) self._update_ast()
def get_diff(resource_id, entry_id): from_version = request.args.get("from_version") to_version = request.args.get("to_version") from_date_str = request.args.get("from_date") to_date_str = request.args.get("to_date") from_date = None to_date = None try: if from_date_str: from_date = float(from_date_str) if to_date_str: to_date = float(to_date_str) except ValueError: raise errors.KarpError("Wrong date format", code=50) diff_parameters = { "from_date": from_date, "to_date": to_date, "from_version": from_version, "to_version": to_version, "entry": request.get_json(), } diff, from_version, to_version = entryread.diff( resourcemgr.get_resource(resource_id), entry_id, **diff_parameters) result = {"diff": diff, "from_version": from_version} if to_version: result["to_version"] = to_version return jsonify(result)
def _evaluate_function(function_conf: Dict, src_entry: Dict, src_resource: resourcemgr.Resource): if "multi_ref" in function_conf: function_conf = function_conf["multi_ref"] target_field = function_conf["field"] if "resource_id" in function_conf: target_resource = resourcemgr.get_resource( function_conf["resource_id"], function_conf["resource_version"]) else: target_resource = src_resource if "test" in function_conf: operator, args = list(function_conf["test"].items())[0] filters = {"deleted": False} if operator == "equals": for arg in args: if "self" in arg: filters[target_field] = src_entry[arg["self"]] else: raise NotImplementedError() target_entries = entryread.get_entries_by_column( target_resource, filters) elif operator == "contains": for arg in args: if "self" in arg: filters[target_field] = src_entry[arg["self"]] else: raise NotImplementedError() target_entries = entryread.get_entries_by_column( target_resource, filters) else: raise NotImplementedError() else: raise NotImplementedError() res = indexer.impl.create_empty_list() for entry in target_entries: index_entry = indexer.impl.create_empty_object() list_of_sub_fields = (("tmp", function_conf["result"]), ) _transform_to_index_entry( target_resource, {"tmp": entry["entry"]}, index_entry, list_of_sub_fields, ) indexer.impl.add_to_list_field(res, index_entry["tmp"]) elif "plugin" in function_conf: plugin_id = function_conf["plugin"] import karp.pluginmanager as plugins res = plugins.plugins[plugin_id].apply_plugin_function( src_resource.id, src_resource.version, src_entry) else: raise NotImplementedError() return res
def publish_resource(resource_id, version): resource = resourcemgr.get_resource(resource_id, version=version) if resource.active: click.echo("Resource already published") else: indexmgr.publish_index(resource_id, version=version) click.echo( "Successfully indexed and published all data in {resource_id}, version {version}".format( resource_id=resource_id, version=version ) )
def reindex_resource(resource_id): try: resource = resourcemgr.get_resource(resource_id) indexmgr.publish_index(resource_id) click.echo( "Successfully reindexed all data in {resource_id}, version {version}".format( resource_id=resource_id, version=resource.version ) ) except ResourceNotFoundError: click.echo( "No active version of {resource_id}".format(resource_id=resource_id) )
def get_entry_history(resource_id, entry_id, version): resource_obj = get_resource(resource_id) db_id = resource_obj.model.query.filter_by(entry_id=entry_id).first().id result = resource_obj.history_model.query.filter_by( entry_id=db_id, version=version).first() return { "id": entry_id, "resource": resource_id, "version": version, "entry": json.loads(result.body), "last_modified_by": result.user_id, "last_modified": result.timestamp, }
def test_transform_to_index_entry( es, client_with_entries_scope_session, resource_id: str, fields_config: Dict, src_entry: Dict, expected: Dict, ): # app = app_with_data_f_scope_session(use_elasticsearch=True) with client_with_entries_scope_session.application.app_context(): resource = resourcemgr.get_resource(resource_id) index_entry = transform_to_index_entry(resource, src_entry, fields_config.items()) assert index_entry == expected
def _update_references(resource_id: str, entry_ids: List[str]) -> None: add = collections.defaultdict(list) for src_entry_id in entry_ids: refs = network.get_referenced_entries(resource_id, None, src_entry_id) for field_ref in refs: ref_resource_id = field_ref["resource_id"] ref_resource = resourcemgr.get_resource( ref_resource_id, version=(field_ref["resource_version"])) body = transform_to_index_entry( ref_resource, field_ref["entry"], ref_resource.config["fields"].items()) metadata = resourcemgr.get_metadata(ref_resource, field_ref["id"]) add[ref_resource_id].append( ((field_ref["entry_id"]), metadata, body)) for ref_resource_id, ref_entries in add.items(): indexer.impl.add_entries(ref_resource_id, ref_entries)
def _resolve_ref(resource: resourcemgr.Resource, src_entry: Dict, ref_conf: Dict, field_name: str) -> Optional[Any]: assert field_name in src_entry res = None if "resource_id" in ref_conf: ref_resource = resourcemgr.get_resource( ref_conf["resource_id"], version=ref_conf.get("resource_version")) else: ref_resource = resource if ref_conf["field"].get("collection"): res = indexer.impl.create_empty_list() for ref_id in src_entry[field_name]: ref_entry_body = entryread.get_entry_by_entry_id( ref_resource, str(ref_id)) if ref_entry_body: ref_entry = json.loads(ref_entry_body.body) if ref_conf["field"]["type"] == "object": ref_index_entry = indexer.impl.create_empty_object() for ref_field_name, _ref_field_conf in ref_conf["field"][ "fields"].items(): indexer.impl.assign_field( ref_index_entry, ref_field_name, ref_entry[ref_field_name], ) indexer.impl.add_to_list_field(res, ref_index_entry) # raise NotImplementedError() else: ref = entryread.get_entry_by_entry_id(ref_resource, str(src_entry[field_name])) if ref: ref_entry = {field_name: json.loads(ref.body)} # ref_entry = json.loads(ref.body) ref_index_entry = {} list_of_sub_fields = ((field_name, ref_conf["field"]), ) _transform_to_index_entry(resource, ref_entry, ref_index_entry, list_of_sub_fields) res = ref_index_entry[field_name] return res
def get_referenced_entries(resource_id: str, version: Optional[int], entry_id: str) -> Iterator[Dict[str, Any]]: resource_refs, resource_backrefs = get_refs(resource_id, version=version) src_entry = entryread.get_entry(resource_id, entry_id, version=version) if not src_entry: raise EntryNotFoundError(resource_id, entry_id, resource_version=version) for (ref_resource_id, ref_resource_version, field_name, field) in resource_backrefs: resource = get_resource(ref_resource_id, version=version) for entry in entryread.get_entries_by_column(resource, {field_name: entry_id}): yield _create_ref( ref_resource_id, ref_resource_version, entry["id"], entry["entry_id"], entry["entry"], ) src_body = json.loads(src_entry.body) for (ref_resource_id, ref_resource_version, field_name, field) in resource_refs: ids = src_body.get(field_name) if not field.get("collection", False): ids = [ids] for ref_entry_id in ids: entry = entryread.get_entry(ref_resource_id, ref_entry_id, version=ref_resource_version) if entry: yield _create_ref( ref_resource_id, ref_resource_version, entry.id, entry.entry_id, json.loads(entry.body), )
def reindex( resource_id: str, version: Optional[int] = None, search_entries: Optional[List[Tuple[str, EntryMetadata, Dict]]] = None, ) -> None: """ If `search_entries` is not given, they will be fetched from DB and processed using `transform_to_index_entry` If `search_entries` is given, they most have the same format as the output from `pre_process_resource` """ resource_obj = resourcemgr.get_resource(resource_id, version=version) try: index_name = indexer.impl.create_index(resource_id, resource_obj.config) except NotImplementedError: _logger.error( "No Index module is loaded. Check your configurations...") sys.exit(errors.NoIndexModuleConfigured) if not search_entries: search_entries = pre_process_resource(resource_obj) add_entries(index_name, search_entries, update_refs=False) indexer.impl.publish_index(resource_id, index_name)
def pre_process_resource(resource_id, version, filename): resource = resourcemgr.get_resource(resource_id, version=version) with open(filename, "wb") as fp: processed = indexmgr.pre_process_resource(resource) pickle.dump(processed, fp)
def get_entry(resource_id: str, entry_id: str, version: Optional[int] = None): resource = get_resource(resource_id, version=version) return get_entry_by_entry_id(resource, entry_id)
def get_history( resource_id: str, user_id: Optional[str] = None, entry_id: Optional[str] = None, from_date: Optional[int] = None, to_date: Optional[int] = None, from_version: Optional[int] = None, to_version: Optional[int] = None, current_page: Optional[int] = 0, page_size: Optional[int] = 100, ): resource_obj = get_resource(resource_id) timestamp_field = resource_obj.history_model.timestamp query = resource_obj.history_model.query if user_id: query = query.filter_by(user_id=user_id) if entry_id: current_entry = resource_obj.model.query.filter_by( entry_id=entry_id).first() query = query.filter_by(entry_id=current_entry.id) version_field = resource_obj.history_model.version if entry_id and from_version: query = query.filter(version_field >= from_version) elif from_date is not None: query = query.filter(timestamp_field >= from_date) if entry_id and to_version: query = query.filter(version_field < to_version) elif to_date is not None: query = query.filter(timestamp_field <= to_date) paged_query = query.limit(page_size).offset(current_page * page_size) total = query.count() result = [] for history_entry in paged_query: # TODO fix this, entry_id in history refers to the "normal" id in non-history table entry_id = (resource_obj.model.query.filter_by( id=history_entry.entry_id).first().entry_id) # TODO fix this, we should get the diff in another way, probably store the diffs directly in the database entry_version = history_entry.version if entry_version > 1: previous_body = json.loads( resource_obj.history_model.query.filter_by( entry_id=history_entry.entry_id, version=entry_version - 1).first().body) else: previous_body = {} history_diff = jsondiff.compare(previous_body, json.loads(history_entry.body)) result.append({ "timestamp": history_entry.timestamp, "message": history_entry.message if history_entry.message else "", "entry_id": entry_id, "version": entry_version, "op": history_entry.op, "user_id": history_entry.user_id, "diff": history_diff, }) return result, total