def _resolve(self, text: Union[str, List[str]], target: Optional[str], type: Optional[str], strategy: ResolvingStrategy, resolving_context: Any, limit: Optional[str], threshold=Optional[float]) \ -> Optional[List[Dict]]: if isinstance(text, list): not_supported(("text", list)) first_filters = f"?id <{self.service.deprecated_property}> \"false\"^^xsd:boolean" if type: first_filters = f"{first_filters} ; a {type}" if strategy == strategy.EXACT_MATCH: label_filter = f" FILTER (?label = \"{text}\")" notation_filter = f" FILTER (?notation = \"{text}\")" limit = 1 else: label_filter = f" FILTER regex(?label, \"{text}\", \"i\")" notation_filter = f" FILTER regex(?notation, \"{text}\", \"i\")" if strategy == strategy.BEST_MATCH: limit = 1 query = """ CONSTRUCT {{ ?id a ?type ; label ?label ; prefLabel ?prefLabel ; subClassOf ?subClassOf ; isDefinedBy ?isDefinedBy ; notation ?notation }} WHERE {{ ?id a ?type ; label ?label ; OPTIONAL {{ ?id subClassOf ?subClassOf ; }} OPTIONAL {{ ?id prefLabel ?prefLabel . }} OPTIONAL {{ ?id isDefinedBy ?isDefinedBy . }} OPTIONAL {{ ?id notation ?notation . }} {{ SELECT * WHERE {{ {{ {0} ; label ?label {1} }} UNION {{ {0} ; notation ?notation {2} }} }} LIMIT {3} }} }} """.format(first_filters, label_filter, notation_filter, limit) expected_fields = [ "type", "label", "prefLabel", "subClassOf", "isDefinedBy", "notation" ] return self.service.perform_query(query, target, expected_fields, limit)
def search(self, resolvers: Optional[List["Resolver"]], *filters, **params) -> List[Resource]: if self.model_context is None: raise ValueError("context model missing") debug = params.get("debug", False) limit = params.get("limit", 100) offset = params.get("offset", None) deprecated = params.get("deprecated", False) cross_bucket = params.get("cross_bucket", False) bucket = params.get("bucket", None) search_in_graph = params.get("search_in_graph", True) distinct = params.get("distinct", False) project_statements = '' if bucket and not cross_bucket: not_supported(("bucket", True)) elif bucket: project_statements = f"Filter (?project = <{'/'.join([self.endpoint,'projects',bucket])}>)" elif not cross_bucket: project_statements = f"Filter (?project = <{'/'.join([self.endpoint,'projects',self.organisation, self.project])}>)" if filters and isinstance(filters[0], dict): filters = create_filters_from_dict(filters[0]) query_statements, query_filters = build_query_statements( self.model_context, filters) query_statements.insert(0, f"<{self.service.project_property}> ?project") query_statements.insert( 1, f"<{self.service.deprecated_property}> {format_type[CategoryDataType.BOOLEAN](deprecated)}" ) statements = "\n".join( (";\n ".join(query_statements), ".\n ".join(query_filters))) query = _create_select_query(f"?id {statements} {project_statements}", distinct, search_in_graph) resources = self.sparql(query, debug=debug, limit=limit, offset=offset) results = self.service.batch_request(resources, BatchAction.FETCH, None, QueryingError) resources = list() for result in results: resource = result.resource try: resource = self.service.to_resource(result.response) except Exception as e: self.service.synchronize_resource(resource, result.response, self.search.__name__, False, False) raise ValueError(e) finally: self.service.synchronize_resource(resource, result.response, self.search.__name__, True, True) resources.append(resource) return resources
def retrieve(self, id: str, version: Optional[Union[int, str]], cross_bucket: bool) -> Resource: if cross_bucket: not_supported(("cross_bucket", True)) try: record = self.service.read(id, version) except StoreLibrary.RecordMissing: raise RetrievalError("resource not found") else: return _to_resource(record)
def _add_prov_property(self, resource, prov_type, reference_property, reference_type, keep, versioned, **kwargs): if versioned and isinstance(resource, str): not_supported(("versioned with resource:str", True)) if isinstance(resource, str): reference = Resource(type=reference_type, id=resource) elif isinstance(resource, Resource): reference = self._forge.reshape(resource, keep, versioned) result = Resource(type=prov_type, **kwargs) result.__setattr__(reference_property, reference) return result
def _resolve(self, text: Union[str, List[str]], target: Optional[str], type: Optional[str], strategy: ResolvingStrategy, resolving_context: Any, limit: Optional[str], threshold: Optional[float]) -> Optional[List[Dict]]: if isinstance(text, list): not_supported(("text", list)) first_filters = f"?id <{self.service.deprecated_property}> \"false\"^^xsd:boolean" if type: first_filters = f"{first_filters} ; a <{type}>" if strategy == strategy.EXACT_MATCH: name_filter = f" FILTER (?name = \"{text}\")" given_name_filter = f" FILTER (?givenName = \"{text}\")" family_name_filter = f" FILTER (?familyName = \"{text}\")" limit = 1 else: name_filter = f" FILTER regex(?name, \"{text}\", \"i\")" given_name_filter = f" FILTER regex(?givenName, \"{text}\", \"i\")" family_name_filter = f" FILTER regex(?familyName, \"{text}\", \"i\")" if strategy == strategy.BEST_MATCH: limit = 1 query = """ CONSTRUCT {{ ?id a ?type ; name ?name ; givenName ?givenName ; familyName ?familyName }} WHERE {{ ?id a ?type . OPTIONAL {{ ?id name ?name . ?id givenName ?givenName . ?id familyName ?familyName . }} {{ SELECT * WHERE {{ {{ {0} ; name ?name {1} }} UNION {{ {0} ; familyName ?familyName; givenName ?givenName {2} }} UNION {{ {0} ; familyName ?familyName; givenName ?givenName {3} }} }} LIMIT {4} }} }} """.format(first_filters, name_filter, given_name_filter, family_name_filter, limit) expected_fields = ["type", "name", "familyName", "givenName"] return self.service.perform_query(query, target, expected_fields, None)
def _resolve(self, text: Union[str, List[str]], target: Optional[str], type: Optional[str], strategy: ResolvingStrategy, resolving_context: Any, limit: Optional[str], threshold=Optional[float]) -> Optional[List[Dict[str, str]]]: if isinstance(text, list): not_supported(("text", list)) resolve_with_properties = None if target is not None: data = self.service[target]["data"] resolve_with_properties = self.service[target][ "resolve_with_properties"] else: data = chain.from_iterable( [self.service[target]["data"] for target in self.targets]) resolve_with_properties = [ "label", "acronym" ] if resolve_with_properties is None else resolve_with_properties if type is not None: data = (x for x in data if x.get("type", None) == type) if strategy == ResolvingStrategy.EXACT_MATCH: try: return next(x for x in data if text and any( [p in x and text == x[p] for p in resolve_with_properties])) except StopIteration: return None else: results = [(_dist([ str(x[prop]) for prop in resolve_with_properties if prop in x ][0], text), x) for x in data if text and any([ p in x and str(text).lower() in str(x[p]).lower() for p in resolve_with_properties ])] if results: ordered = sorted(results, key=lambda x: x[0]) if strategy == ResolvingStrategy.BEST_MATCH: return ordered[0][1] else: # Case: ResolvingStrategy.ALL_MATCHES. return [x[1] for x in ordered] else: return None
def search(self, resolvers: Optional[List["Resolver"]], *filters, **params) -> List[Resource]: # Positional arguments in 'filters' are instances of type Filter from wrappings/paths.py. # Keyword arguments in 'params' could be: # - debug: bool, # - limit: int, # - offset: int, # - deprecated: bool, # - resolving: str, with values in ('exact', 'fuzzy'), # - lookup: str, with values in ('current', 'children'). # POLICY Should use sparql() when SPARQL is chosen here has the querying language. # POLICY Should notify of failures with exception QueryingError including a message. # POLICY Resource _store_metadata should be set using wrappers.dict.wrap_dict(). # POLICY Resource _synchronized should be set to True. # TODO These two operations might be abstracted here when other stores will be implemented. not_supported()
def _sparql(self, query: str, limit: int, offset: int) -> List[Resource]: # POLICY Should notify of failures with exception QueryingError including a message. # POLICY Resource _store_metadata should not be set (default is None). # POLICY Resource _synchronized should not be set (default is False). not_supported()
def _freeze_many(self, resources: List[Resource]) -> None: # Bulk freezing could be optimized by overriding this method in the specialization. # POLICY Should reproduce self._freeze_one() and execution._run_one() behaviours. not_supported()
def _tag_one(self, resource: Resource, value: str) -> None: # POLICY Should notify of failures with exception TaggingError including a message. # POLICY If tagging modify the resource, _store_metadata should be updated. not_supported()
def _deprecate_one(self, resource: Resource) -> None: # POLICY Should notify of failures with exception DeprecationError including a message. # POLICY Resource _store_metadata should be set using wrappers.dict.wrap_dict(). # TODO This operation might be abstracted here when other stores will be implemented. not_supported()
def _download_one(self, url: str, path: str) -> None: # path: FilePath. # POLICY Should notify of failures with exception DownloadingError including a message. not_supported()
def _tag_many(self, resources: List[Resource], value: str) -> None: # Bulk tagging could be optimized by overriding this method in the specialization. # POLICY Should reproduce self._tag_one() and execution._run_one() behaviours. # POLICY If tagging modify the resource, it should be done with status='_synchronized'. not_supported()
def _mappings(self, source: str) -> Dict[str, List[str]]: # POLICY Should raise ValueError if 'source' is not managed by the Model. # POLICY Keys should be managed resource types with mappings for the given data source. # POLICY Values should be available mapping types for the resource type. # The discovery strategy cannot be abstracted as it depends on the Model data organization. not_supported()
def _sources(self) -> List[str]: # The discovery strategy cannot be abstracted as it depends on the Model data organization. not_supported()
def _service_from_directory(dirpath: Path, targets: Dict[str, str]) -> Any: not_supported()
def _service_from_web_service(endpoint: str, targets: Dict[str, str]) -> Any: not_supported()
def _generate_context(self) -> Dict: # POLICY Should generate the Context from the Model data. not_supported()
def resolve_context(self, iri: str) -> Dict: # POLICY Should retrieve the resolved context as dictionary not_supported()
def _prefixes(self) -> Dict[str, str]: not_supported()
def schema_id(self, type: str) -> str: # POLICY Should retrieve the schema id of the given type. not_supported()
def mapping(self, entity: str, source: str, type: Callable) -> Mapping: # POLICY Should raise ValueError if 'entity' or 'source' is not managed by the Model. # The selection strategy cannot be abstracted as it depends on the Model data organization. not_supported()
def _download_one(self, url: str, path: str, store_metadata: Optional[DictWrapper], cross_bucket: bool) -> None: # path: FilePath. # POLICY Should notify of failures with exception DownloadingError including a message. not_supported()
def _register_many(self, resources: List[Resource], schema_id: str) -> None: # Bulk registration could be optimized by overriding this method in the specialization. # POLICY Should reproduce self._register_one() and execution._run_one() behaviours. not_supported()
def _service_from_store(store: Callable, targets: Dict[str, str], **store_config) -> Any: not_supported()
def _retrieve_filename(self, id: str) -> str: # TODO This operation might be adapted if other file metadata are needed. not_supported()
def _upload_one(self, path: Path, content_type: str) -> Any: # path: FilePath. # POLICY Should notify of failures with exception UploadingError including a message. not_supported()
def _update_many(self, resources: List[Resource], schema_id: Optional[str]) -> None: # Bulk update could be optimized by overriding this method in the specialization. # POLICY Should reproduce self._update_one() and execution._run_one() behaviours. not_supported()