def get_ext_ident_object(self, triple_store, subject, field): ident = [ value.split('/')[-1] if _is_valid_uri(value) else value for value in triple_store.objects(subject=subject, predicate=field.predicate) ] if not ident and _is_valid_uri(subject): ident = [subject.split('/')[-1]] return ident
def compute_qname(self, uri, generate=True): if not _is_valid_uri(uri): raise ValueError( '"{}" does not look like a valid URI, cannot serialize this. Did you want to urlencode it?'.format(uri) ) if uri not in self.__cache: namespace, name = split_uri(uri) namespace = URIRef(namespace) prefix = self.store.prefix(namespace) if prefix is None: if not generate: raise KeyError( "No known prefix for {} and generate=False".format(namespace) ) num = 1 while 1: prefix = "ns%s" % num if not self.store.namespace(prefix): break num += 1 self.bind(prefix, namespace) self.__cache[uri] = (prefix, namespace, name) return self.__cache[uri]
def __get_uri_from_graphfile_blob(self, oid): """Search for a graph uri in graph file and return it. Args ---- oid: String oid of a graph file Returns ------- graphuri: String with the graph URI """ try: blob = self.repository.get(oid) except ValueError: logger.debug( "Object with OID {} not found in repository.".format(oid)) return content = blob.read_raw().decode().strip() if content and _is_valid_uri(content): return content raise InvalidConfigurationError( "No graph URI found in blob with OID {}.".format(oid))
def _prepare_object(self, prop: str, prop_type: str, value: Any) -> rdflib.term.Identifier: """ Prepare the object of a triple. Parameters ---------- prop: str property name prop_type: str property type value: Any property value Returns ------- rdflib.term.Identifier An instance of rdflib.term.Identifier """ if prop_type == 'uriorcurie' or prop_type == 'xsd:anyURI': if isinstance(value, str) and PrefixManager.is_curie(value): o = self.uriref(value) elif isinstance(value, str) and PrefixManager.is_iri(value): if _is_valid_uri(value): o = URIRef(value) else: o = Literal(value) else: o = Literal(value) elif prop_type.startswith('xsd'): o = Literal(value, datatype=self.prefix_manager.expand(prop_type)) else: o = Literal(value, datatype=self.prefix_manager.expand("xsd:string")) return o
def compute_qname(self, uri, generate=True): if not _is_valid_uri(uri): raise ValueError( '"{}" does not look like a valid URI, cannot serialize this. Did you want to urlencode it?' .format(uri)) if uri not in self.__cache: try: namespace, name = split_uri(uri) except ValueError as e: namespace = URIRef(uri) prefix = self.store.prefix(namespace) if not prefix: raise e if namespace not in self.__strie: insert_strie(self.__strie, self.__trie, namespace) if self.__strie[namespace]: pl_namespace = get_longest_namespace(self.__strie[namespace], uri) if pl_namespace is not None: namespace = pl_namespace name = uri[len(namespace):] namespace = URIRef(namespace) prefix = self.store.prefix( namespace) # warning multiple prefixes problem if prefix is None: if not generate: raise KeyError( "No known prefix for {} and generate=False".format( namespace)) num = 1 while 1: prefix = "ns%s" % num if not self.store.namespace(prefix): break num += 1 self.bind(prefix, namespace) self.__cache[uri] = (prefix, namespace, name) return self.__cache[uri]
def compute_qname(self, uri, generate=True): if not _is_valid_uri(uri): raise Exception('"%s" does not look like a valid URI, I cannot serialize this. Perhaps you wanted to urlencode it?'%uri) if not uri in self.__cache: namespace, name = split_uri(uri) namespace = URIRef(namespace) prefix = self.store.prefix(namespace) if prefix is None: if not generate: raise Exception( "No known prefix for %s and generate=False") num = 1 while 1: prefix = "ns%s" % num if not self.store.namespace(prefix): break num += 1 self.bind(prefix, namespace) self.__cache[uri] = (prefix, namespace, name) return self.__cache[uri]
def __get_uri_from_graphfile_blob(self, oid): """Search for a graph uri in graph file and return it. Args ---- oid: String oid of a graph file Returns ------- graphuri: String with the graph URI """ try: blob = self.repository.get(oid) except ValueError: logger.debug("Object with OID {} not found in repository.".format(oid)) return content = blob.read_raw().decode().strip() if content and _is_valid_uri(content): return content raise InvalidConfigurationError("No graph URI found in blob with OID {}.".format(oid))
def isValidURI(self, str_uri): #use term._is_valid_unicode(str_uri) return term._is_valid_uri(str_uri) and self.isascii(str_uri)
def _validate(self, value): super(URIRefField, self)._validate(value) if not _is_valid_uri(value): raise InvalidURI(value)
def __cli_parse__(args): # remove the not well formed sentences? add option? # print MRS or parse to DMRS format? path = args.profile prefix = args.prefix.strip("/") semrep = args.semrep.lower() parser = None # Setting verbosity; need to figure a better solution. if args.verbosity == 1: logger.setLevel(20) elif args.verbosity >= 2: logger.setLevel(10) try: # validates path if not isdir(path): raise NotADirectoryError(f"Path is not a directory: {path}") # validates profile if not is_database_directory(path): raise TSDBError(f'Invalid test suite directory: {path}') # validates URI prefix if not _is_valid_uri(prefix): raise Exception(f'Invalid URI: {prefix}') # validate format and get converter to_rdf, from_mrs = _get_converters(semrep) # open Test Suite and start conversion ts = itsdb.TestSuite(path) # logger.info(f"Converting {len(ts['result'])} analysis of {len(ts['item'])} sentences from {args.profile}") logger.info( f"Converting {len(ts['result'])} analysis of {len(ts['item'])} sentences from {args.profile}" ) # Creating the store and the default graph store = plugin.get("IOMemory", Store)() defaultGraph = Graph(store, identifier=BNode()) PROFILE = URIRef(f"{prefix}") # review later defaultGraph.add((PROFILE, RDF.type, DELPH.Profile)) semrepURI, prof_semrep_relation = _get_RDF_semrep(semrep, store) store.bind("erg", ERG) store.bind("delph", DELPH) store.bind("pos", POS) # store.bind("upref", prefix) # may be useful # The tsql takes some time to be processed: # logger.info(f"Loading the profile") logger.info(f"Loading the profile") profile_data = tsql.select('parse-id result-id i-input mrs', ts) logger.info(f"Converting the profile") # Iterating over the results: for (parse_id, result_id, text, mrs_string) in profile_data: logger.debug( f"Converting the result {result_id} of sentence {parse_id}") m = simplemrs.decode(mrs_string) # making sure of the well formedness of "m" if not is_well_formed(m): logger.warning( f"Result {result_id} of sentence {parse_id} is not well formed" ) # continue # converting the MRS object to the representation intended to be converted obj = from_mrs(m) # logger.debug(f"Result {result_id} of item {parse_id}: \n\t{text}\n\t{obj}\n\t{mrs_string}") # Creating URIs for relevant resources. ITEM = URIRef( f"{prefix}/{parse_id}" ) # The item part may be redundant, maybe iterate before the itens RESULT = URIRef(f"{prefix}/{parse_id}/{result_id}") SEMREPI = URIRef(f"{prefix}/{parse_id}/{result_id}/{semrep}") # adding types: defaultGraph.add((ITEM, RDF.type, DELPH.Item)) defaultGraph.add((RESULT, RDF.type, DELPH.Result)) defaultGraph.add((SEMREPI, RDF.type, semrepURI)) # Associating text to item: defaultGraph.add((ITEM, DELPH.hasText, Literal(text))) # Linking those nodes: defaultGraph.add((PROFILE, DELPH.hasItem, ITEM)) defaultGraph.add((ITEM, DELPH.hasResult, RESULT)) defaultGraph.add((RESULT, prof_semrep_relation, SEMREPI)) to_rdf(obj, SEMREPI, store, defaultGraph) # serializes results logger.info(f"Serializing results to {args.output}") ConjunctiveGraph(store).serialize(destination=args.output, format=args.format) logger.info(f"DONE") # except PyDelphinSyntaxError as e: # logger.exception(e) # except ImportError as e: # logger.exception(e) # except TSDBError as e: # logger.exception(e) except Exception as e: logger.error(e)