Exemplo n.º 1
0
 def parse_arguments(self, args, resource_str: str):
     if resource_str is None:
         raise errors.IncompleteQuery("No resources are defined.")
     self.resources = resource_str.split(",")
     self.from_ = arg_get(args, "from", int, 0)
     self.size = arg_get(args, "size", int, 25)
     self.lexicon_stats = arg_get(args, "lexicon_stats",
                                  util_convert.str2bool, True)
     self.include_fields = arg_get(args, "include_fields",
                                   util_convert.str2list(","))
     self.exclude_fields = arg_get(args, "exclude_fields",
                                   util_convert.str2list(","))
     self.fields = []
     self.format = arg_get(args, "format")
     self.format_query = arg_get(args, "format_query")
     self.q = arg_get(args, "q") or ""
     self.sort: List[str] = arg_get(args, "sort",
                                    util_convert.str2list(",")) or []
     self.sort_dict: Dict[str, List[str]] = {}
     if not self.sort:
         if len(self.resources) == 1:
             self.sort = resourcemgr.get_resource(
                 self.resources[0]).default_sort()
         else:
             for resource_id in self.resources:
                 self.sort_dict[resource_id] = resourcemgr.get_resource(
                     resource_id).default_sort()
     self.ast = query_dsl.parse(self.q)
     self._update_ast()
Exemplo n.º 2
0
def get_diff(resource_id, entry_id):
    from_version = request.args.get("from_version")
    to_version = request.args.get("to_version")
    from_date_str = request.args.get("from_date")
    to_date_str = request.args.get("to_date")
    from_date = None
    to_date = None
    try:
        if from_date_str:
            from_date = float(from_date_str)
        if to_date_str:
            to_date = float(to_date_str)
    except ValueError:
        raise errors.KarpError("Wrong date format", code=50)

    diff_parameters = {
        "from_date": from_date,
        "to_date": to_date,
        "from_version": from_version,
        "to_version": to_version,
        "entry": request.get_json(),
    }

    diff, from_version, to_version = entryread.diff(
        resourcemgr.get_resource(resource_id), entry_id, **diff_parameters)
    result = {"diff": diff, "from_version": from_version}
    if to_version:
        result["to_version"] = to_version
    return jsonify(result)
Exemplo n.º 3
0
def _evaluate_function(function_conf: Dict, src_entry: Dict,
                       src_resource: resourcemgr.Resource):
    if "multi_ref" in function_conf:
        function_conf = function_conf["multi_ref"]
        target_field = function_conf["field"]
        if "resource_id" in function_conf:
            target_resource = resourcemgr.get_resource(
                function_conf["resource_id"],
                function_conf["resource_version"])
        else:
            target_resource = src_resource

        if "test" in function_conf:
            operator, args = list(function_conf["test"].items())[0]
            filters = {"deleted": False}
            if operator == "equals":
                for arg in args:
                    if "self" in arg:
                        filters[target_field] = src_entry[arg["self"]]
                    else:
                        raise NotImplementedError()
                target_entries = entryread.get_entries_by_column(
                    target_resource, filters)
            elif operator == "contains":
                for arg in args:
                    if "self" in arg:
                        filters[target_field] = src_entry[arg["self"]]
                    else:
                        raise NotImplementedError()
                target_entries = entryread.get_entries_by_column(
                    target_resource, filters)
            else:
                raise NotImplementedError()
        else:
            raise NotImplementedError()

        res = indexer.impl.create_empty_list()
        for entry in target_entries:
            index_entry = indexer.impl.create_empty_object()
            list_of_sub_fields = (("tmp", function_conf["result"]), )
            _transform_to_index_entry(
                target_resource,
                {"tmp": entry["entry"]},
                index_entry,
                list_of_sub_fields,
            )
            indexer.impl.add_to_list_field(res, index_entry["tmp"])
    elif "plugin" in function_conf:
        plugin_id = function_conf["plugin"]
        import karp.pluginmanager as plugins

        res = plugins.plugins[plugin_id].apply_plugin_function(
            src_resource.id, src_resource.version, src_entry)
    else:
        raise NotImplementedError()
    return res
Exemplo n.º 4
0
 def publish_resource(resource_id, version):
     resource = resourcemgr.get_resource(resource_id, version=version)
     if resource.active:
         click.echo("Resource already published")
     else:
         indexmgr.publish_index(resource_id, version=version)
         click.echo(
             "Successfully indexed and published all data in {resource_id}, version {version}".format(
                 resource_id=resource_id, version=version
             )
         )
Exemplo n.º 5
0
 def reindex_resource(resource_id):
     try:
         resource = resourcemgr.get_resource(resource_id)
         indexmgr.publish_index(resource_id)
         click.echo(
             "Successfully reindexed all data in {resource_id}, version {version}".format(
                 resource_id=resource_id, version=resource.version
             )
         )
     except ResourceNotFoundError:
         click.echo(
             "No active version of {resource_id}".format(resource_id=resource_id)
         )
Exemplo n.º 6
0
def get_entry_history(resource_id, entry_id, version):
    resource_obj = get_resource(resource_id)
    db_id = resource_obj.model.query.filter_by(entry_id=entry_id).first().id
    result = resource_obj.history_model.query.filter_by(
        entry_id=db_id, version=version).first()
    return {
        "id": entry_id,
        "resource": resource_id,
        "version": version,
        "entry": json.loads(result.body),
        "last_modified_by": result.user_id,
        "last_modified": result.timestamp,
    }
Exemplo n.º 7
0
def test_transform_to_index_entry(
    es,
    client_with_entries_scope_session,
    resource_id: str,
    fields_config: Dict,
    src_entry: Dict,
    expected: Dict,
):
    # app = app_with_data_f_scope_session(use_elasticsearch=True)
    with client_with_entries_scope_session.application.app_context():
        resource = resourcemgr.get_resource(resource_id)
        index_entry = transform_to_index_entry(resource, src_entry,
                                               fields_config.items())

        assert index_entry == expected
Exemplo n.º 8
0
def _update_references(resource_id: str, entry_ids: List[str]) -> None:
    add = collections.defaultdict(list)
    for src_entry_id in entry_ids:
        refs = network.get_referenced_entries(resource_id, None, src_entry_id)
        for field_ref in refs:
            ref_resource_id = field_ref["resource_id"]
            ref_resource = resourcemgr.get_resource(
                ref_resource_id, version=(field_ref["resource_version"]))
            body = transform_to_index_entry(
                ref_resource, field_ref["entry"],
                ref_resource.config["fields"].items())
            metadata = resourcemgr.get_metadata(ref_resource, field_ref["id"])
            add[ref_resource_id].append(
                ((field_ref["entry_id"]), metadata, body))
    for ref_resource_id, ref_entries in add.items():
        indexer.impl.add_entries(ref_resource_id, ref_entries)
Exemplo n.º 9
0
def _resolve_ref(resource: resourcemgr.Resource, src_entry: Dict,
                 ref_conf: Dict, field_name: str) -> Optional[Any]:
    assert field_name in src_entry
    res = None
    if "resource_id" in ref_conf:
        ref_resource = resourcemgr.get_resource(
            ref_conf["resource_id"], version=ref_conf.get("resource_version"))
    else:
        ref_resource = resource

    if ref_conf["field"].get("collection"):
        res = indexer.impl.create_empty_list()
        for ref_id in src_entry[field_name]:
            ref_entry_body = entryread.get_entry_by_entry_id(
                ref_resource, str(ref_id))
            if ref_entry_body:
                ref_entry = json.loads(ref_entry_body.body)
                if ref_conf["field"]["type"] == "object":
                    ref_index_entry = indexer.impl.create_empty_object()
                    for ref_field_name, _ref_field_conf in ref_conf["field"][
                            "fields"].items():
                        indexer.impl.assign_field(
                            ref_index_entry,
                            ref_field_name,
                            ref_entry[ref_field_name],
                        )

                    indexer.impl.add_to_list_field(res, ref_index_entry)

    #     raise NotImplementedError()
    else:

        ref = entryread.get_entry_by_entry_id(ref_resource,
                                              str(src_entry[field_name]))
        if ref:
            ref_entry = {field_name: json.loads(ref.body)}
            # ref_entry = json.loads(ref.body)
            ref_index_entry = {}
            list_of_sub_fields = ((field_name, ref_conf["field"]), )
            _transform_to_index_entry(resource, ref_entry, ref_index_entry,
                                      list_of_sub_fields)
            res = ref_index_entry[field_name]

    return res
Exemplo n.º 10
0
def get_referenced_entries(resource_id: str, version: Optional[int],
                           entry_id: str) -> Iterator[Dict[str, Any]]:
    resource_refs, resource_backrefs = get_refs(resource_id, version=version)

    src_entry = entryread.get_entry(resource_id, entry_id, version=version)
    if not src_entry:
        raise EntryNotFoundError(resource_id,
                                 entry_id,
                                 resource_version=version)

    for (ref_resource_id, ref_resource_version, field_name,
         field) in resource_backrefs:
        resource = get_resource(ref_resource_id, version=version)
        for entry in entryread.get_entries_by_column(resource,
                                                     {field_name: entry_id}):
            yield _create_ref(
                ref_resource_id,
                ref_resource_version,
                entry["id"],
                entry["entry_id"],
                entry["entry"],
            )

    src_body = json.loads(src_entry.body)
    for (ref_resource_id, ref_resource_version, field_name,
         field) in resource_refs:
        ids = src_body.get(field_name)
        if not field.get("collection", False):
            ids = [ids]
        for ref_entry_id in ids:
            entry = entryread.get_entry(ref_resource_id,
                                        ref_entry_id,
                                        version=ref_resource_version)
            if entry:
                yield _create_ref(
                    ref_resource_id,
                    ref_resource_version,
                    entry.id,
                    entry.entry_id,
                    json.loads(entry.body),
                )
Exemplo n.º 11
0
def reindex(
    resource_id: str,
    version: Optional[int] = None,
    search_entries: Optional[List[Tuple[str, EntryMetadata, Dict]]] = None,
) -> None:
    """
    If `search_entries` is not given, they will be fetched from DB and processed using `transform_to_index_entry`
    If `search_entries` is given, they most have the same format as the output from `pre_process_resource`
    """
    resource_obj = resourcemgr.get_resource(resource_id, version=version)
    try:
        index_name = indexer.impl.create_index(resource_id,
                                               resource_obj.config)
    except NotImplementedError:
        _logger.error(
            "No Index module is loaded. Check your configurations...")
        sys.exit(errors.NoIndexModuleConfigured)
    if not search_entries:
        search_entries = pre_process_resource(resource_obj)
    add_entries(index_name, search_entries, update_refs=False)
    indexer.impl.publish_index(resource_id, index_name)
Exemplo n.º 12
0
 def pre_process_resource(resource_id, version, filename):
     resource = resourcemgr.get_resource(resource_id, version=version)
     with open(filename, "wb") as fp:
         processed = indexmgr.pre_process_resource(resource)
         pickle.dump(processed, fp)
Exemplo n.º 13
0
def get_entry(resource_id: str, entry_id: str, version: Optional[int] = None):
    resource = get_resource(resource_id, version=version)
    return get_entry_by_entry_id(resource, entry_id)
Exemplo n.º 14
0
def get_history(
    resource_id: str,
    user_id: Optional[str] = None,
    entry_id: Optional[str] = None,
    from_date: Optional[int] = None,
    to_date: Optional[int] = None,
    from_version: Optional[int] = None,
    to_version: Optional[int] = None,
    current_page: Optional[int] = 0,
    page_size: Optional[int] = 100,
):
    resource_obj = get_resource(resource_id)
    timestamp_field = resource_obj.history_model.timestamp
    query = resource_obj.history_model.query
    if user_id:
        query = query.filter_by(user_id=user_id)
    if entry_id:
        current_entry = resource_obj.model.query.filter_by(
            entry_id=entry_id).first()
        query = query.filter_by(entry_id=current_entry.id)

    version_field = resource_obj.history_model.version
    if entry_id and from_version:
        query = query.filter(version_field >= from_version)
    elif from_date is not None:
        query = query.filter(timestamp_field >= from_date)
    if entry_id and to_version:
        query = query.filter(version_field < to_version)
    elif to_date is not None:
        query = query.filter(timestamp_field <= to_date)

    paged_query = query.limit(page_size).offset(current_page * page_size)
    total = query.count()

    result = []
    for history_entry in paged_query:
        # TODO fix this, entry_id in history refers to the "normal" id in non-history table
        entry_id = (resource_obj.model.query.filter_by(
            id=history_entry.entry_id).first().entry_id)
        # TODO fix this, we should get the diff in another way, probably store the diffs directly in the database
        entry_version = history_entry.version
        if entry_version > 1:
            previous_body = json.loads(
                resource_obj.history_model.query.filter_by(
                    entry_id=history_entry.entry_id,
                    version=entry_version - 1).first().body)
        else:
            previous_body = {}
        history_diff = jsondiff.compare(previous_body,
                                        json.loads(history_entry.body))
        result.append({
            "timestamp":
            history_entry.timestamp,
            "message":
            history_entry.message if history_entry.message else "",
            "entry_id":
            entry_id,
            "version":
            entry_version,
            "op":
            history_entry.op,
            "user_id":
            history_entry.user_id,
            "diff":
            history_diff,
        })

    return result, total