def _build_url_query(self): """If the current object is already identified by an URL/URI or a list of URL/URIs, we can unambiguously query it/them. The URL(s) becomes the subject(s) of our RDF triple. """ if is_listlike(self.has_url): # We have a list/set/tuple of URLs: the RDF triple will be: # `{ <URL1> ?pred ?obj } UNION { <URL2> ?pred ?obj } .` to_unite = set([]) for url in self.has_url: to_unite.add( RDFTriple(subject=url, predicate=self.args['predicate'], object=self.args['object'], language=self.query_language)) self.query_builder.add_query_alternative_triples(to_unite) else: # We have a simple URL: the RDF triple will be: # `<URL> ?pred ?obj .` self.query_builder.add_query_triple( RDFTriple(subject=self.has_url, predicate=self.args['predicate'], object=self.args['object'], language=self.query_language))
def create_triples_for_single_element(self, entity_name, entity_value, strict_mode): """ If the variable 'has_…' (entity_name) is a single values instead of a list of value, we create a standard query triple. :param entity_name: name of the 'has_…' attribute :param entity_value: values of the 'has_…' attribute :param strict_mode: is strict_mode (check RDF type) is enabled :return: """ # The instance has an instantiated value for a 'has_…' variable. This value will be the # object of an RDF triplet. obj = self.format_value_for_RDF_compliance(entity_value, u"%s_obj" % entity_name) pred = u'?%s' % entity_name if strict_mode and entity_name in self.voc_attributes: to_unite = set([]) for attribute_name in self.voc_attributes[entity_name]: to_unite.add( RDFTriple(subject=self.args['subject'], predicate=attribute_name, object=obj, language=self.query_language)) self.query_builder.add_query_alternative_triples(to_unite) else: self.query_builder.add_query_triple( RDFTriple(subject=self.args['subject'], predicate=pred, object=obj, language=self.query_language))
def test_get_variables(): """RDFTriple - Literal with language: should pass""" rdf = RDFTriple() truth = set([u'?%s_%i' % (role, rdf.class_counter) for role in "sop"]) diff = set(rdf.get_variables()) - truth assert not len(diff)
def test_genericsparqlquery_base_case(): """GenericSPARQLQuery - Base case, no issues: Should pass""" query = GenericSPARQLQuery() simone = RDFTriple(subject=u'?person', object=u'"Simone de Beauvoir"@fr', predicate=u'rdfs:label') birth = RDFTriple( subject=u'?person', object=u'?birthdate', #1908-01-09 predicate=u'dbpedia_owl:birthDate', prefixes=[NameSpace.dbpedia_owl], ) gender = RDFTriple( subject=u'?person', object=u'?gender', predicate=u'<http://xmlns.com/foaf/0.1/gender>', ) triples = [simone, birth, gender] query.add_query_triples(triples) query.set_limit(10) query.commit() true_prefixes = {NameSpace.foaf, NameSpace.dbpedia_owl, NameSpace.rdfs} assert not set(query.prefixes).difference(true_prefixes) assert not query.endpoints.difference({Endpoint.DEFAULT}) assert not set(query.triples).difference(set(triples)) assert query.limit == u'LIMIT 10' truth_query = u'PREFIX foaf: <http://xmlns.com/foaf/0.1/> ' \ u'PREFIX dbo: <http://dbpedia.org/ontology/> ' \ u'PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> ' \ u'SELECT DISTINCT * WHERE {' \ u' ?person rdfs:label "Simone de Beauvoir"@fr .' \ u' ?person dbo:birthDate ?birthdate .' \ u' ?person foaf:gender ?gender . ' \ u'} ' \ u'LIMIT 10' ratio = fuzz.token_sort_ratio(query.queries[Endpoint.DEFAULT], truth_query) assert ratio == 100 truth_results = [[ ('person', 'http://dbpedia.org/resource/Simone_de_Beauvoir'), ('birthdate', '1908-01-09'), ('gender', 'female _(@en)') ], [('person', 'http://dbpedia.org/resource/Simone_de_Beauvoir'), ('birthdate', '1908-1-9'), ('gender', 'female _(@en)')]] assert query.results == truth_results
def create_triples_for_multiple_element(self, entity_name, entity_values): """ If the variable 'has_…' (entity_name) is a list of values instead of a single value, we create an alternate triple. N.B.: If strict mode is enabled for the query, it will be disabled for this particular triples union because this would increase the size of the query too much. :Example: >>> has_author = ["http://www.example.org/X", "http://www.example.org/Y"] Will give the query triple: `{ ?Book has_author <http://www.example.org/X> } UNION { ?Book has_author <http://www.example.org/Y> }.` :param entity_name: name of the 'has_…' attribute :param entity_values: values of the 'has_…' attribute :return: """ logging.debug( "Strict mode is disabled for %s because it contains multiple values." % entity_name) to_unite = set([]) for entity_value in entity_values: to_unite.add( RDFTriple(subject=self.args['subject'], predicate=u'?%s' % entity_name, object=self.format_value_for_RDF_compliance( entity_value, u"%s_obj" % entity_name), language=self.query_language)) self.query_builder.add_query_alternative_triples(to_unite)
def test_rdftripletbuilder_base_case(): """RDFTriple - Base case, no issues: Should pass """ rdf = RDFTriple( subject=u"Test", predicate=u"<http://purl.org/dc/elements/1.1/title>" ) assert rdf.subject == u'Test' assert rdf.object == u'?o_%i' % rdf.class_counter assert rdf.predicate == u'dc:title' assert rdf.prefixes == [NameSpace.dc]
def test_rdftripletbuilder_check_prefix(): """RDFTriple - Prefix checking: Should pass """ rdf1 = RDFTriple( subject=u"foaf:name", predicate=u"<http://purl.org/dc/elements/1.1/title>" ) assert rdf1.subject == u'foaf:name' assert rdf1.object == u'?o_%i' % rdf1.class_counter assert rdf1.predicate == u'dc:title' truth = [NameSpace.dc, NameSpace.foaf] computed = rdf1.prefixes diff = len(set(computed) - set(truth)) assert not diff
def test_rdftripletbuilder_add_prefixes_after_creation(): """RDFTriple - RDF prefix declaration after creation: should pass""" test1 = RDFTriple(object=u'"foo"') test1.add_prefixes([NameSpace.dawgt, "barbarbar: <http://bar.org/bar/bar/>"]) test1.subject = u'barbarbar:foofoofoo' assert test1.__str__() == u'barbarbar:foofoofoo ?p_%i "foo" .' % test1.class_counter diff = set(test1.prefixes) - set([NameSpace.dawgt, NameSpace.barbarbar]) assert not len(diff)
def test_rdftripletbuilder_prefix_normalization(): """RDFTriple - Prefix normalisation: Should pass """ rdf2 = RDFTriple( prefixes=[ " t3st_1234 : <http://foo.org/bar/1.1/buz.owl#> .", "xsd: <http://www.w3.org/2001/XMLSchema#>"]) assert rdf2.subject == u'?s_%i' % rdf2.class_counter assert rdf2.object == u'?o_%i' % rdf2.class_counter assert rdf2.predicate == u'?p_%i' % rdf2.class_counter add_namespace("t3st_1234", "http://foo.org/bar/1.1/buz.owl#") truth = [NameSpace('http://foo.org/bar/1.1/buz.owl#'), NameSpace.xsd] computed = rdf2.prefixes diff = set(computed) - set(truth) assert not len(diff)
def _build_standard_query(self, entities_names, check_type=True, strict_mode=False): """ Updates the query_builder of the object. The queries options relies on the dictionaries contained in pyneql/utils/vocabulary.py. :param entities_names: the class variables beginning with 'has_' which have a value instantiated :param check_type: Boolean. Check the type of the object (e.g: Book, Person, Location,…) directly in the SPARQL queries. If True, the restriction of the object's type is done in the query. For instance, if the object is a Book, activating type checking will build queries where the object to find (?Book) is constrained by an union of RDF triples checking that ?Book is a Book: ``[…] { ?Book a fabio:Book } UNION […] UNION { ?Book a schemaorg:Book } .`` :param strict_mode: Boolean. Check the type of the object's attributes (e.g: label, first name,…) directly in the SPARQL queries. If True, the predicates of the triplet whose values are instantiated will have their types checked against the allowed types listed in ``self.voc_attributes``. Let's take an example: We are looking for a Thing whose *label* is "አዲስ አበባ". - Non strict mode will have its query restrained to elements satisfying the triplet ``?Thing ?has_label "አዲስ አበባ".``. The predicate is left undetermined (``?has_label`` is a variable). - In strict mode, we are strict about the types of predicates of the triplet. For the current class, those predicates will be listed in ``self.voc_attributes['has_label']`` and combined in the SPARQL query. Here, for the example, we set 'has_label' allowed the RDF predicates 'rdfs:label' and u'wdt:P1813'. >>> print(self.voc_attributes['has_label']) >>> [u'rdfs:label', u'wdt:P1813'] So in strict_mode, the query will be constrained to: ``[…]{ ?Thing rdfs:label "አዲስ አበባ" } UNION { ?Thing wdt:P1813 "አዲስ አበባ" }.[…]`` """ if check_type: # Restricting the query to elements of the current type # This will build a query with union of RDF checking the type (eg.Book): # [...] { ?Book a fabio:Book } UNION [...] UNION { ?Book a schemaorg:Book } . to_unite = set([]) for class_type in self.rdf_types: to_unite.add( RDFTriple(subject=self.args['subject'], predicate=u'a', object=class_type, language=self.query_language)) self.query_builder.add_query_alternative_triples(to_unite) # Adding query delimiters, that are the parameters given for query # (i.e stored in the instance variables beginning with "has_"). for entity_name in entities_names: entity_values = self.__dict__.get(entity_name, None) if is_listlike(entity_values): # TODO ici il faudrait créer des alternate triples self.create_triples_for_multiple_element( entity_name, entity_values) else: entity_value = normalize_str(entity_values) self.create_triples_for_single_element(entity_name, entity_value, strict_mode) # Fetching everything about that Thing self.query_builder.add_query_triple( RDFTriple(subject=self.args['subject'], predicate=self.args['predicate'], object=self.args['object'], language=self.query_language))
def test_rdftripletbuilder_add_prefixes_after_creation_inconsistencies(): """RDFTriple - bad RDF prefix declaration: should fail""" test1 = RDFTriple(object=u'"foo"') test1.add_prefix("prefix: bar, abbr: <http://bar.org/bar/bar/>")
def test_rdftripletbuilder_prefix_inconsistencies2(): """RDFTriple - Inconsistency between the vocabulary and a given namespace (the other way around): Should fail """ rdf = RDFTriple(prefixes=["xsd: <http://foo.org/bar/1.1/buz.owl#>"])
def test_rdftripletbuilder_prefix_inconsistencies1(): """RDFTriple - Inconsistency between the vocabulary and a given namespace: Should fail """ # "t3st_1234" is not in the vocabulary but "http://www.w3.org/2001/XMLSchema#" is. rdf = RDFTriple(prefixes=["t3st_1234: <http://www.w3.org/2001/XMLSchema#>"])
def test_rdftripletbuilder_literal_language(): """RDFTriple - Literal with language: should pass""" test1 = RDFTriple(object=u'"Clinton"', language=Lang.Albanian) assert test1.__str__(with_language=True) == u'?s_%i ?p_%i "Clinton"@sq .' % (test1.class_counter, test1.class_counter) assert test1.__str__() == u'?s_%i ?p_%i "Clinton" .' % (test1.class_counter, test1.class_counter) test2 = RDFTriple(object=u'foaf:crap', language=Lang.Albanian) assert test2.__str__(with_language=True) == test2.__str__() test3 = RDFTriple(object="?var", language=Lang.Albanian) assert test3.__str__(with_language=True) == test3.__str__() test4 = RDFTriple(object=u'"1924"', language=Lang.Albanian) assert test4.__str__(with_language=True) == test4.__str__()