def makeSparqlQuery(self, query, endpoint, customHttpHeaders=None): sparql = SPARQLWrapper(endpoint) sparql.setQuery(query) sparql.setMethod(POST) sparql.setReturnFormat(JSON) # LOGGER.debug(query) if customHttpHeaders: for k,v in customHttpHeaders.items(): sparql.addCustomHttpHeader(k,v) try: results = sparql.query().convert() except Exception as e: raise e data = [] for result in results["results"]["bindings"]: ob = {} for k,v in result.items(): if v.get('datatype') == 'http://www.w3.org/2001/XMLSchema#decimal': ob[k] = float(v['value']) elif v.get('datatype') == 'http://www.w3.org/2001/XMLSchema#integer': ob[k] = int(v['value']) # this does not export correctly to cytoscape format: #elif v.get('datatype') == 'http://www.w3.org/2001/XMLSchema#date': # ob[k] = datetime.datetime.strptime(v['value'], '%Y-%m-%d').date() else: ob[k] = str(v['value']) data.append(ob) return data
def _get_wiki_actor_movie_direct(year_begin, year_end, array_out): """ @brief get actor movie data from wiki data Args: year_begin: beginning year of data year_end: ending year of data. array_out: returned data in an array """ sparql = SPARQLWrapper("https://query.wikidata.org/sparql") sparql.setQuery(""" SELECT ?movie ?movieLabel ?actor ?actorLabel WHERE \ {\ ?movie wdt:P31 wd:Q11424.\ ?movie wdt:P161 ?actor.\ ?movie wdt:P364 wd:Q1860.\ ?movie wdt:P577 ?date.\ FILTER(YEAR(?date) >= """ + str(year_begin) + """ && YEAR(?date) <= """ + str(year_end) + """).\ SERVICE wikibase:label { bd:serviceParam wikibase:language \"en\". } \ } """) sparql.addCustomHttpHeader("User-Agent", 'bridges-python') sparql.setReturnFormat(JSON) results = sparql.query().convert() for result in results["results"]["bindings"]: mak = movie_actor_wiki_data.MovieActorWikiData() actor_uri = str(result['actor']['value']) movie_uri = str(result['movie']['value']) actor_uri = actor_uri.replace("http://www.wikidata.org/entity/","",1) movie_uri = movie_uri.replace("http://www.wikidata.org/entity/","",1) mak.actor_uri = actor_uri mak.movie_uri = movie_uri mak.movie_name = str(result['movieLabel']['value']) mak.actor_name = str(result['actorLabel']['value']) array_out.append(mak)
def fetch_SPARQL(self, server, query): sparql = SPARQLWrapper(self._server) if self.authentication_key and self.user: sparql.addCustomHttpHeader('X-authorization', self.authentication_key) if 'WHERE' in query: if self.spoofed_url: resource = self.spoofed_url else: resource = self._server if '/sparql' in resource: p = resource.find('/sparql') resource = resource[:p] FROM = " FROM <{resource}/user/{user}> ".format( resource=resource, user=self.user) p = query.find('WHERE') query = query[:p] + FROM + query[p:] sparql.setQuery(query) sparql.setReturnFormat(JSON) results = sparql.query().convert() if type(results) is bytes and results.startswith(b'<!DOCTYPE html>'): # The query failed. We assume the problem was a lack of # authentication. # Without authentication, SynBioHub redirects to the home # page so raw HTML is returned. raise SPARQLExceptions.Unauthorized() return results
def queryWikidata(query): def extractJsonResults(results): cols = results['head']['vars'] df = pd.DataFrame(columns=cols) dict_list = [] for row in results['results']['bindings']: df_dict = {} for col in cols: try: value = row[col]['value'] except: value = None df_dict[col] = value dict_list.append(df_dict) df = df.append(dict_list, ignore_index=True) return df sparql = SPARQLWrapper('https://query.wikidata.org/sparql') #Set different user aggent to fix 403 errors sparql.addCustomHttpHeader( 'User-Agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36' ) sparql.setReturnFormat(JSON) sparql.setQuery(query) results = sparql.query().convert() df = extractJsonResults(results) return df
def getTimeSignatures(self, opts, bins=5, max_rank=20, max_results=20, formatter="{:.3f}"): q = self.ego_query(opts) sparql = SPARQLWrapper(opts.endpoint) sparql.setQuery(q) sparql.setReturnFormat(JSON) sparql.addCustomHttpHeader("Authorization", "Basic c2Vjbzpsb2dvczAz") results = sparql.query().convert() ea = ego.EgoAnalytics(opts.id, query=results) ea.social_signature(bin_type='linear', bin_n=5, max_rank=max_rank) ts = ea.ss.t_signatures # dict containing the signatures and the nodes which appear in each year_edges = ea.ss.get_year_edges() ssas = ea.ss.average_signature XY = [(i, formatter.format(y)) for i,y in enumerate(ssas, start = 1)] series = [dict(name='average', data=XY)] for (_,arr), year0, year1 in zip(ts.items(), year_edges[:-1], year_edges[1:]): series.append(dict(name="{}–{}".format(year0, year1), data=[(i, formatter.format(v)) for i,(_,v) in enumerate(arr.items(), start = 1)][:max_results] )) # print(series) return series
def execute(self): sparql = SPARQLWrapper(self.endpoint) sparql.setQuery(self.query) sparql.setTimeout(1800) # 30 minutes sparql.setOnlyConneg(True) sparql.addCustomHttpHeader("Content-type", "application/sparql-query") sparql.addCustomHttpHeader("Accept", "text/csv") sparql.setMethod(POST) sparql.setRequestMethod(POSTDIRECTLY) sparql.setReturnFormat(CSV) results = sparql.queryAndConvert() decoded_csv = io.StringIO(results.decode("utf-8")) df = pd.read_csv(decoded_csv, sep=",") return df
def get_sparql_results(endpoint_url: str, query: str, add_prefixes=True) -> dict: """ Makes a SPARQL query to endpoint_url. Args: endpoint_url (str): query endpoint query (str): SPARQL query add_prefixes (bool, optional): whether to add PREFIX lines using the namespaces in heritageconnector.namespace. Defaults to True. Returns: query_result (dict): the JSON result of the query as a dict """ user_agent = generate_user_agent() if add_prefixes: prefix_text = generate_sparql_prefixes_header() query = prefix_text + " \n" + query sparql = SPARQLWrapper(endpoint_url) sparql.setQuery(query) sparql.setMethod("POST") sparql.setReturnFormat(JSON) sparql.addCustomHttpHeader( "User-Agent", user_agent, ) try: return sparql.query().convert() except urllib.error.HTTPError as e: if e.code == 429: logger.debug("429") if e.headers.get("retry-after", None): logger.debug( f"Retrying after {e.headers['retry-after']} seconds") time.sleep(int(e.headers["retry-after"])) else: time.sleep(10) return get_sparql_results(endpoint_url, query) elif e.code == 403: logger.debug("403") return e.read().decode("utf8", "ignore") raise e except json.decoder.JSONDecodeError as e: logger.error("JSONDecodeError. Query:") logger.error(query) raise e
def get_sparql_dataframe(endpoint, query, post=False): sparql = SPARQLWrapper(endpoint) sparql.setQuery(query) if sparql.queryType != SELECT: raise QueryException("Only SPARQL SELECT queries are supported.") if post: sparql.setOnlyConneg(True) sparql.addCustomHttpHeader("Content-type", "application/sparql-query") sparql.addCustomHttpHeader("Accept", "text/csv") sparql.setMethod(POST) sparql.setRequestMethod(POSTDIRECTLY) sparql.setReturnFormat(CSV) results = sparql.query().convert() _csv = io.StringIO(results.decode('utf-8')) return pd.read_csv(_csv, sep=",")
def login(self, user, password): if '/sparql' not in self._server: self._server += '/sparql' p = self._server.find('/sparql') resource = self._server[:p] login_endpoint = SPARQLWrapper(resource + '/login') login_endpoint.setMethod(POST) login_endpoint.addCustomHttpHeader( 'Content-Type', 'application/x-www-form-urlencoded') login_endpoint.addCustomHttpHeader('Accept', 'text/plain') login_endpoint.addCustomHttpHeader('charset', 'utf-8"') login_endpoint.addParameter('email', user) login_endpoint.addParameter('password', password) self.user = user self.authentication_key = login_endpoint.query().response.read( ).decode("utf-8")
class SPARQLWrapper_Test(TestCase): @staticmethod def _get_request(wrapper): return wrapper.query().response.request # possible due to mock above @staticmethod def _get_parameters_from_request(request): if request.get_method() == 'GET': pieces_str = urlparse(request.get_full_url()).query else: if sys.version < '3': pieces_str = request.data else: pieces_str = request.data.decode('ascii') return parse_qs(pieces_str) @staticmethod def _get_request_parameters(wrapper): request = SPARQLWrapper_Test._get_request(wrapper) parameters = SPARQLWrapper_Test._get_parameters_from_request(request) return parameters @staticmethod def _get_request_parameters_as_bytes(wrapper): request = SPARQLWrapper_Test._get_request(wrapper) parameters = SPARQLWrapper_Test._get_parameters_from_request(request) if sys.version < '3': return parameters else: result = {} for k, vs in parameters.iteritems(): result[k] = [v.encode('utf-8') for v in vs] return result @classmethod def setUpClass(cls): urllib2._opener = None # clear value. Due to the order of test execution, the value of urllib2._opener contains, for instance, keepalive.keepalive.HTTPHandler def setUp(self): self.wrapper = SPARQLWrapper(endpoint='http://example.org/sparql') _victim.urlopener = urlopener def testConstructor(self): try: SPARQLWrapper() self.fail("SPARQLWrapper constructor should fail without arguments") except TypeError: pass wrapper = SPARQLWrapper(endpoint='http://example.org/sparql/') self.assertEqual(XML, wrapper.returnFormat, 'default return format is XML') self.assertTrue( wrapper.agent.startswith('sparqlwrapper'), 'default user-agent should start with "sparqlwrapper"' ) wrapper = SPARQLWrapper(endpoint='http://example.org/sparql/', returnFormat='wrongformat') self.assertEqual(XML, wrapper.returnFormat, 'default return format is XML') wrapper = SPARQLWrapper(endpoint='http://example.org/sparql/', defaultGraph='http://example.org/default') parameters = self._get_request_parameters(wrapper) self.assertEqual( ['http://example.org/default'], parameters.get('default-graph-uri'), 'default graph is set' ) def testReset(self): self.wrapper.setMethod(POST) self.wrapper.setQuery('CONSTRUCT WHERE {?a ?b ?c}') self.wrapper.setReturnFormat(N3) self.wrapper.addParameter('a', 'b') self.wrapper.setOnlyConneg(True) request = self._get_request(self.wrapper) parameters = self._get_parameters_from_request(request) onlyConneg = self.wrapper.onlyConneg self.assertEqual('POST', request.get_method()) self.assertTrue(parameters['query'][0].startswith('CONSTRUCT')) self.assertTrue('rdf+n3' in request.get_header('Accept')) self.assertTrue('a' in parameters) self.assertTrue(onlyConneg) self.wrapper.resetQuery() request = self._get_request(self.wrapper) parameters = self._get_parameters_from_request(request) onlyConneg = self.wrapper.onlyConneg self.assertEqual('GET', request.get_method()) self.assertTrue(parameters['query'][0].startswith('SELECT')) self.assertFalse('rdf+n3' in request.get_header('Accept')) self.assertTrue('sparql-results+xml' in request.get_header('Accept')) self.assertFalse('a' in parameters) self.assertFalse('a' in parameters) self.assertTrue(onlyConneg) def testSetReturnFormat(self): with warnings.catch_warnings(record=True) as w: self.wrapper.setReturnFormat('nonexistent format') self.assertEqual(1, len(w), "Warning due to non expected format") self.assertEqual(XML, self.wrapper.query().requestedFormat) self.wrapper.setReturnFormat(JSON) self.assertEqual(JSON, self.wrapper.query().requestedFormat) try: import rdflib_jsonld self.wrapper.setReturnFormat(JSONLD) self.assertEqual(JSONLD, self.wrapper.query().requestedFormat) except ImportError: self.assertRaises(ValueError, self.wrapper.setReturnFormat, JSONLD) def testsSupportsReturnFormat(self): self.assertTrue(self.wrapper.supportsReturnFormat(XML)) self.assertTrue(self.wrapper.supportsReturnFormat(JSON)) self.assertTrue(self.wrapper.supportsReturnFormat(TURTLE)) self.assertTrue(self.wrapper.supportsReturnFormat(N3)) self.assertTrue(self.wrapper.supportsReturnFormat(RDF)) self.assertTrue(self.wrapper.supportsReturnFormat(RDFXML)) self.assertTrue(self.wrapper.supportsReturnFormat(CSV)) self.assertTrue(self.wrapper.supportsReturnFormat(TSV)) self.assertFalse(self.wrapper.supportsReturnFormat('nonexistent format')) try: import rdflib_jsonld self.assertTrue(self.wrapper.supportsReturnFormat(JSONLD)) except ImportError: self.assertFalse(self.wrapper.supportsReturnFormat(JSONLD)) def testAddParameter(self): self.assertFalse(self.wrapper.addParameter('query', 'dummy')) self.assertTrue(self.wrapper.addParameter('param1', 'value1')) self.assertTrue(self.wrapper.addParameter('param1', 'value2')) self.assertTrue(self.wrapper.addParameter('param2', 'value2')) pieces = self._get_request_parameters(self.wrapper) self.assertTrue('param1' in pieces) self.assertEqual(['value1', 'value2'], pieces['param1']) self.assertTrue('param2' in pieces) self.assertEqual(['value2'], pieces['param2']) self.assertNotEqual(['dummy'], 'query') def testSetCredentials(self): request = self._get_request(self.wrapper) self.assertFalse(request.has_header('Authorization')) self.wrapper.setCredentials('login', 'password') request = self._get_request(self.wrapper) self.assertTrue(request.has_header('Authorization')) # expected header for login:password # should succeed for python 3 since pull request #72 self.assertEqual("Basic bG9naW46cGFzc3dvcmQ=", request.get_header('Authorization')) def testAddCustomHttpHeader(self): request = self._get_request(self.wrapper) self.assertFalse(request.has_header('Foo')) # Add new header field name self.wrapper.addCustomHttpHeader('Foo', 'bar') request = self._get_request(self.wrapper) self.assertTrue(request.has_header('Foo')) self.assertEqual("bar", request.get_header('Foo')) # Override a new field name self.wrapper.addCustomHttpHeader('Foo', 'bar') request = self._get_request(self.wrapper) self.assertTrue(request.has_header('Foo')) self.assertEqual("bar", request.get_header('Foo')) self.wrapper.addCustomHttpHeader('Foo', 'bar_2') request = self._get_request(self.wrapper) self.assertTrue(request.has_header('Foo')) self.assertEqual("bar_2", request.get_header('Foo')) # Override header field name self.wrapper.addCustomHttpHeader('User-agent', 'Another UA') request = self._get_request(self.wrapper) self.assertEqual("Another UA", request.get_header('User-agent')) def testClearCustomHttpHeader(self): request = self._get_request(self.wrapper) self.assertFalse(request.has_header('Foo')) # Add new header field name self.wrapper.addCustomHttpHeader('Foo_1', 'bar_1') self.wrapper.addCustomHttpHeader('Foo_2', 'bar_2') self.wrapper.addCustomHttpHeader('Foo_3', 'bar_3') self.assertFalse(self.wrapper.clearCustomHttpHeader('Foo_4')) self.assertTrue(self.wrapper.clearCustomHttpHeader('Foo_3')) customHttpHeaders = self.wrapper.customHttpHeaders self.assertTrue('Foo_1' in customHttpHeaders) self.assertTrue('Foo_2' in customHttpHeaders) self.assertEqual('bar_1', customHttpHeaders['Foo_1']) self.assertEqual('bar_2', customHttpHeaders['Foo_2']) self.assertFalse(self.wrapper.clearCustomHttpHeader('Foo_3'), 'already cleaned') def testSetHTTPAuth(self): self.assertRaises(TypeError, self.wrapper.setHTTPAuth, 123) self.wrapper.setCredentials('login', 'password') request = self._get_request(self.wrapper) self.assertTrue(request.has_header('Authorization')) self.assertIsNone(urllib2._opener) self.wrapper.setHTTPAuth(DIGEST) self.assertIsNone(urllib2._opener) request = self._get_request(self.wrapper) self.assertFalse(request.has_header('Authorization')) self.assertEqual(self.wrapper.http_auth, DIGEST) self.assertIsInstance(urllib2._opener, urllib2.OpenerDirector) self.assertRaises(ValueError, self.wrapper.setHTTPAuth, 'OAuth') self.wrapper.http_auth = "OAuth" self.assertRaises(NotImplementedError, self._get_request, self.wrapper) def testSetQuery(self): self.wrapper.setQuery('PREFIX example: <http://example.org/INSERT/> SELECT * WHERE {?s ?p ?v}') self.assertEqual(SELECT, self.wrapper.queryType) self.wrapper.setQuery('PREFIX e: <http://example.org/> INSERT {e:a e:b e:c}') self.assertEqual(INSERT, self.wrapper.queryType) self.wrapper.setQuery("""#CONSTRUCT {?s ?p ?o} SELECT ?s ?p ?o WHERE {?s ?p ?o}""") self.assertEqual(SELECT, self.wrapper.queryType) with warnings.catch_warnings(record=True) as w: self.wrapper.setQuery('UNKNOWN {e:a e:b e:c}') self.assertEqual(SELECT, self.wrapper.queryType, 'unknown queries result in SELECT') def testSetQueryEncodingIssues(self): #further details from issue #35 query = u'INSERT DATA { <urn:michel> <urn:says> "これはテストです" }' query_bytes = query.encode('utf-8') self.wrapper.setMethod(POST) self.wrapper.setRequestMethod(POSTDIRECTLY) self.wrapper.setQuery(query) request = self._get_request(self.wrapper) self.assertEquals(query_bytes, request.data) self.wrapper.setQuery(query_bytes) request = self._get_request(self.wrapper) self.assertEquals(query_bytes, request.data) self.wrapper.setRequestMethod(URLENCODED) self.wrapper.setQuery(query) parameters = self._get_request_parameters_as_bytes(self.wrapper) self.assertEquals(query_bytes, parameters['update'][0]) self.wrapper.setQuery(query_bytes) parameters = self._get_request_parameters_as_bytes(self.wrapper) self.assertEquals(query_bytes, parameters['update'][0]) try: self.wrapper.setQuery(query.encode('sjis')) self.fail() except UnicodeDecodeError: self.assertTrue(True) try: self.wrapper.setQuery({'foo': 'bar'}) self.fail() except TypeError: self.assertTrue(True) def testSetTimeout(self): self.wrapper.setTimeout(10) self.assertEqual(10, self.wrapper.timeout) self.wrapper.resetQuery() self.assertEqual(None, self.wrapper.timeout) def testClearParameter(self): self.wrapper.addParameter('param1', 'value1') self.wrapper.addParameter('param1', 'value2') self.wrapper.addParameter('param2', 'value2') self.assertFalse(self.wrapper.clearParameter('query')) self.assertTrue(self.wrapper.clearParameter('param1')) pieces = self._get_request_parameters(self.wrapper) self.assertFalse('param1' in pieces) self.assertTrue('param2' in pieces) self.assertEqual(['value2'], pieces['param2']) self.assertFalse(self.wrapper.clearParameter('param1'), 'already cleaned') def testSetMethod(self): self.wrapper.setMethod(POST) request = self._get_request(self.wrapper) self.assertEqual("POST", request.get_method()) self.wrapper.setMethod(GET) request = self._get_request(self.wrapper) self.assertEqual("GET", request.get_method()) def testSetRequestMethod(self): self.assertEqual(URLENCODED, self.wrapper.requestMethod) self.wrapper.setRequestMethod(POSTDIRECTLY) self.assertEqual(POSTDIRECTLY, self.wrapper.requestMethod) def testIsSparqlUpdateRequest(self): self.wrapper.setQuery('DELETE WHERE {?s ?p ?o}') self.assertTrue(self.wrapper.isSparqlUpdateRequest()) self.wrapper.setQuery('DELETE DATA { <urn:john> <urn:likes> <urn:surfing> }') self.assertTrue(self.wrapper.isSparqlUpdateRequest()) self.wrapper.setQuery(""" PREFIX example: <http://example.org/SELECT/> BASE <http://example.org/SELECT> DELETE WHERE {?s ?p ?o} """) self.assertTrue(self.wrapper.isSparqlUpdateRequest()) self.wrapper.setQuery('WITH <urn:graph> DELETE DATA { <urn:john> <urn:likes> <urn:surfing> }') self.assertTrue(self.wrapper.isSparqlUpdateRequest()) self.wrapper.setQuery('INSERT DATA { <urn:john> <urn:likes> <urn:surfing> }') self.assertTrue(self.wrapper.isSparqlUpdateRequest()) self.wrapper.setQuery('WITH <urn:graph> INSERT DATA { <urn:john> <urn:likes> <urn:surfing> }') self.assertTrue(self.wrapper.isSparqlUpdateRequest()) self.wrapper.setQuery('CREATE GRAPH <urn:graph>') self.assertTrue(self.wrapper.isSparqlUpdateRequest()) self.wrapper.setQuery('CLEAR GRAPH <urn:graph>') self.assertTrue(self.wrapper.isSparqlUpdateRequest()) self.wrapper.setQuery('DROP GRAPH <urn:graph>') self.assertTrue(self.wrapper.isSparqlUpdateRequest()) self.wrapper.setQuery('MOVE GRAPH <urn:graph1> TO GRAPH <urn:graph2>') self.assertTrue(self.wrapper.isSparqlUpdateRequest()) self.wrapper.setQuery('LOAD <http://localhost/file.rdf> INTO GRAPH <urn:graph>') self.assertTrue(self.wrapper.isSparqlUpdateRequest()) self.wrapper.setQuery('COPY <urn:graph1> TO GRAPH <urn:graph2>') self.assertTrue(self.wrapper.isSparqlUpdateRequest()) self.wrapper.setQuery('ADD <urn:graph1> TO GRAPH <urn:graph2>') self.assertTrue(self.wrapper.isSparqlUpdateRequest()) def testIsSparqlQueryRequest(self): self.wrapper.setQuery('SELECT * WHERE {?s ?p ?o}') self.assertTrue(self.wrapper.isSparqlQueryRequest()) self.wrapper.setQuery(""" PREFIX example: <http://example.org/DELETE/> BASE <http://example.org/MODIFY> ASK WHERE {?s ?p ?o} """) self.assertTrue(self.wrapper.isSparqlQueryRequest()) self.assertFalse(self.wrapper.isSparqlUpdateRequest()) def testQuery(self): qr = self.wrapper.query() self.assertTrue(isinstance(qr, QueryResult)) request = qr.response.request # possible due to mock above self.assertTrue(isinstance(request, Request)) parameters = self._get_parameters_from_request(request) self.assertTrue('query' in parameters) self.assertTrue('update' not in parameters) self.wrapper.setMethod(POST) self.wrapper.setQuery('PREFIX e: <http://example.org/> INSERT {e:a e:b e:c}') parameters = self._get_request_parameters(self.wrapper) self.assertTrue('update' in parameters) self.assertTrue('query' not in parameters) #_returnFormatSetting = ["format", "output", "results"] self.assertTrue('format' not in parameters) self.assertTrue('output' not in parameters) self.assertTrue('results' not in parameters) _victim.urlopener = urlopener_error_generator(400) try: self.wrapper.query() self.fail('should have raised exception') except QueryBadFormed as e: # TODO: check exception-format pass except: self.fail('got wrong exception') _victim.urlopener = urlopener_error_generator(404) try: self.wrapper.query() self.fail('should have raised exception') except EndPointNotFound as e: # TODO: check exception-format pass except: self.fail('got wrong exception') _victim.urlopener = urlopener_error_generator(500) try: self.wrapper.query() self.fail('should have raised exception') except EndPointInternalError as e: # TODO: check exception-format pass except: self.fail('got wrong exception') _victim.urlopener = urlopener_error_generator(999) try: self.wrapper.query() self.fail('should have raised exception') except HTTPError as e: # TODO: check exception-format pass except: self.fail('got wrong exception') def testQueryEncoding(self): query = 'INSERT DATA { <urn:michel> <urn:says> "é" }' wrapper = SPARQLWrapper('http://example.com:3030/example') wrapper.setMethod(POST) wrapper.setRequestMethod(URLENCODED) wrapper.setQuery(query) _victim.urlopener = urlopener_check_data_encoding wrapper.query() def testQueryAndConvert(self): _oldQueryResult = _victim.QueryResult class FakeQueryResult(object): def __init__(self, result): pass def convert(self): return True try: _victim.QueryResult = FakeQueryResult result = self.wrapper.queryAndConvert() self.assertEqual(True, result) finally: _victim.QueryResult = _oldQueryResult def testComments(self): # see issue #32 self.wrapper.setQuery(""" # this is a comment select * where { ?s ?p ?o } """) self.assertTrue(self.wrapper.isSparqlQueryRequest()) def testHashInPrefixes(self): # see issue #77 self.wrapper.setQuery(""" PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> select * where { ?s ?p ?o } """) self.assertTrue(self.wrapper.isSparqlQueryRequest()) def testHashInPrefixComplex(self): # see issue #77 self.wrapper.setQuery(""" PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX weather: <http://hal.zamia.org/weather/> PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX dbr: <http://dbpedia.org/resource/> PREFIX dbp: <http://dbpedia.org/property/> PREFIX xml: <http://www.w3.org/XML/1998/namespace> PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> SELECT DISTINCT ?location ?cityid ?timezone ?label WHERE { ?location weather:cityid ?cityid . ?location weather:timezone ?timezone . ?location rdfs:label ?label . } """) self.assertTrue(self.wrapper.isSparqlQueryRequest()) def testHashWithNoComments(self): # see issue #77 query = """ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT * WHERE { ?s ?p ?o . } """ parsed_query = self.wrapper._cleanComments(query) self.assertEquals(query, parsed_query) def testCommentBeginningLine(self): # see issue #77 query = """ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> # a comment SELECT * WHERE { ?s ?p ?o . } """ expected_parsed_query = """ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT * WHERE { ?s ?p ?o . } """ parsed_query = self.wrapper._cleanComments(query) self.assertEquals(expected_parsed_query, parsed_query) def testCommentEmtpyLine(self): # see issue #77 query = """ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> # a comment SELECT * WHERE { ?s ?p ?o . } """ expected_parsed_query = """ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT * WHERE { ?s ?p ?o . } """ parsed_query = self.wrapper._cleanComments(query) self.assertEquals(expected_parsed_query, parsed_query) def testCommentsFirstLine(self): # see issue #77 query = """#CONSTRUCT {?s ?p ?o} SELECT ?s ?p ?o WHERE {?s ?p ?o}""" expected_parsed_query = """ SELECT ?s ?p ?o WHERE {?s ?p ?o}""" parsed_query = self.wrapper._cleanComments(query) self.assertEquals(expected_parsed_query, parsed_query) @unittest.skip("issue #80") def testCommentAfterStatements(self): # see issue #77 query = """ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT * WHERE { # this is the where condition ?s ?p ?o . } """ expected_parsed_query = """ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT * WHERE { ?s ?p ?o . } """ parsed_query = self.wrapper._cleanComments(query) self.assertEquals(expected_parsed_query, parsed_query) def testSingleLineQueryLine(self): # see issue #74 query = "prefix whatever: <http://example.org/blah#> ASK { ?s ?p ?o }" parsed_query = self.wrapper._cleanComments(query) self.assertEquals(query, parsed_query) self.wrapper.setQuery(query) self.assertTrue(self.wrapper.isSparqlQueryRequest()) def testOnlyConneg(self): # see issue #82 query = "prefix whatever: <http://example.org/blah#> ASK { ?s ?p ?o }" self.wrapper.setOnlyConneg(False) self.wrapper.setQuery(query) request = self._get_request(self.wrapper) request_params = dict(parse_qsl(urlparse(request.get_full_url()).query)) for returnFormatSetting in ["format", "output", "results"]: # Obviously _returnFormatSetting is not accessible from SPARQLWrapper, so we copy&paste the possible values self.assertTrue(returnFormatSetting in request_params, "URL parameter '%s' was not sent, and it was expected" %returnFormatSetting) #ONLY Content Negotiation self.wrapper.resetQuery() self.wrapper.setOnlyConneg(True) self.wrapper.setQuery(query) request = self._get_request(self.wrapper) request_params = dict(parse_qsl(urlparse(request.get_full_url()).query)) for returnFormatSetting in ["format", "output", "results"]: # Obviously _returnFormatSetting is not accessible from SPARQLWrapper, so we copy&paste the possible values self.assertFalse(returnFormatSetting in request_params, "URL parameter '%s' was sent, and it was not expected (only Content Negotiation)" %returnFormatSetting)
class SPARQLEndpoint: sparql_endpoint: SPARQLWrapper # # # def __init__(self, args=None): """ Create a SPARQL Endpoint (an instance of the class SPARQLWrapper) given the "standard" command line params that we're using for most command line utilities :param args: the CLI args, see set_cli_params """ self.args = args self.verbose = args.verbose self.data_source_code = args.data_source_code self.endpoint_base = args.sparql_endpoint self.database = args.sparql_endpoint_database self.sparql_endpoint = SPARQLWrapper( endpoint=f"{self.endpoint_url()}/query", updateEndpoint=f"{self.endpoint_url()}/update", returnFormat=JSON, ) self.sparql_endpoint.setCredentials(args.sparql_endpoint_userid, passwd=args.sparql_endpoint_passwd) # self.s3_endpoint.addDefaultGraph(graph_iri_for_dataset(self.data_source_code)) # self.s3_endpoint.setUseKeepAlive() def endpoint_url(self) -> str: return f"{self.endpoint_base}/{self.database}" def endpoint_url_for_queries(self) -> str: return f"{self.endpoint_base}/{self.database}/query" def user_id(self): return self.sparql_endpoint.user def password(self): return self.sparql_endpoint.passwd def execute_sparql_select_query(self, sparql_statement, mime_type=MIME_CSV): check_sparql_mime_type(mime_type) if self.verbose: log_item("Executing", sparql_statement) # noinspection PyProtectedMember log_item("Statement Type", self.sparql_endpoint._parseQueryType(sparql_statement)) self.sparql_endpoint.clearCustomHttpHeader("Accept") self.sparql_endpoint.setRequestMethod(URLENCODED) self.sparql_endpoint.setMethod(GET) self.sparql_endpoint.addCustomHttpHeader("Accept", mime_type) log_item("Query", sparql_statement.lstrip()) self.sparql_endpoint.setQuery(sparql_statement.lstrip()) # noinspection PyProtectedMember request = self.sparql_endpoint._createRequest() for (header_name, header_value) in request.header_items(): log_item(header_name, header_value) log_item("Is Update", self.sparql_endpoint.isSparqlUpdateRequest()) log_item("Full URL", request.full_url) # with urllib.request.urlopen(request) as f: # print(f.read().decode('utf-8')) return self._execute_query() def execute_csv_query(self, sparql_statement: str): return self.execute_sparql_query2(sparql_statement) def execute_sparql_query2( self, sparql_statement, graph_iri: str = None, mime: str = MIME_CSV) -> Optional[SPARQLResponse]: if self.verbose: log_item("Executing", sparql_statement) # noinspection PyProtectedMember log_item("Statement Type", self.sparql_endpoint._parseQueryType(sparql_statement)) endpoint_url = self.endpoint_url_for_queries() log_item("SPARQL Endpoint", endpoint_url) log_item("Accept Mime Type", mime) params = { "timeout": 10000, # ms "limit": 10000, "charset": "utf-8" } if graph_iri: params["graph"] = graph_iri params["reasoner"] = "true" log_item("Params", params) # # Using this method: https://www.w3.org/TR/sparql11-protocol/#query-via-post-direct # r = requests.post(endpoint_url, data=sparql_statement, auth=(self.user_id(), self.password()), params=params, headers={ 'Accept': mime, 'Accept-Encoding': '*;q=0, identity;q=1', 'Accept-Charset': '*;q=0, utf-8;q=1', 'Content-type': 'application/sparql_endpoint-query' }, stream=True) if r.status_code == 200: return SPARQLResponse(self, r, mime=mime) log_item('HTTP Status', r.status_code) return None def execute_sparql_statement(self, sparql_statement): if self.verbose: log_item("Executing", sparql_statement) statement_type = self.sparql_endpoint._parseQueryType(sparql_statement) log_item("Statement Type", statement_type) self.sparql_endpoint.clearCustomHttpHeader("Accept") self.sparql_endpoint.setMethod(POST) self.sparql_endpoint.setRequestMethod(URLENCODED) self.sparql_endpoint.addCustomHttpHeader("Accept", "text/boolean") self.sparql_endpoint.addParameter("reasoner", "true") log_item("Query", sparql_statement) self.sparql_endpoint.setQuery(sparql_statement) request = self.sparql_endpoint._createRequest() for (header_name, header_value) in request.header_items(): log_item(header_name, header_value) log_item("Is Update", self.sparql_endpoint.isSparqlUpdateRequest()) log_item("Full URL", request.full_url) # with urllib.request.urlopen(request) as f: # print(f.read().decode('utf-8')) return self._execute_query() def execute_construct(self, sparql_construct_statement: str) -> Optional[Graph]: self.sparql_endpoint.clearCustomHttpHeader("Accept") self.sparql_endpoint.setMethod(GET) self.sparql_endpoint.setReturnFormat( RDFXML) # the call to convert() below depends on this being RDFXML self.sparql_endpoint.setRequestMethod(URLENCODED) self.sparql_endpoint.addParameter("reasoner", "true") # # timeout higher than triple store time out # self.sparql_endpoint.setTimeout(10) # # millisecs. let triple store fail first so timeout earlier than HTTP # self.sparql_endpoint.addParameter("timeout", "2000") log_item("Query", sparql_construct_statement) self.sparql_endpoint.setQuery(sparql_construct_statement) # noinspection PyProtectedMember request = self.sparql_endpoint._createRequest() for (header_name, header_value) in request.header_items(): log_item(header_name, header_value) return self._execute_query() def _execute_query(self): try: result = self.sparql_endpoint.query() response = result.response log_item("Response Code", result.response.code) for (key, value) in result.response.info().items(): log_item(key, value) for (key, value) in result.info().items(): log_item(key, value) if result.response.code in (200, 201): return result except urllib.error.HTTPError as err: error("{} code={}".format(err, err.code)) except urllib.error.URLError as err: error("{} reason={}".format(err, err.reason)) except EndPointNotFound as err: error("{}".format(err)) dump(err) except QueryBadFormed: error( f"Bad formed SPARQL statement: {self.sparql_endpoint.queryString}" ) except Unauthorized: error("Unauthorized to access {}".format( self.sparql_endpoint.endpoint)) except ConnectionRefusedError: error("Could not connect to {}".format( self.sparql_endpoint.endpoint)) return None def handle_error(self, r: requests.Response) -> bool: log_item("URL", r.url) for key, value in r.headers.items(): log_item(key, value) if not self._handle_stardog_error(r): return False if not self._handle_ontotext_error(r): return False log_item("HTTP Status", r.status_code) if r.status_code == 200: return True if r.status_code == 201: return True return False def _handle_stardog_error(self, r: requests.Response) -> bool: """ In case we detect the response header SD-Error-Code we know it's a Stardog server. Then handle the various errors Stardog can give. """ stardog_error = r.headers.get('SD-Error-Code') if not stardog_error: return True if stardog_error == "UnknownDatabase": log_error(f"The database {self.database} does not exist") return False warning(f"Encountered unknown Stardog error {stardog_error}") return True # noinspection PyMethodMayBeStatic def _handle_ontotext_error(self, r: requests.Response) -> bool: # noqa return True
(List of city-specific search terms loaded above) """ sparql = SPARQLWrapper("http://ldf.fi/semparl/sparql") timespans = [['1986', '1995'], ['1969', '1978'], ['2004', '2013']] final_results = pd.DataFrame() k = 0 # For showing query progress for i in timespans[0]: start_year = i[0] end_year = i[1] for City in cities: k += 1 # For showing query progress city = City.label search_term = City.label search_not = City.exclude # show and execute query: print("Query ", k, " run: ", city, " ", search_term, " ", search_not, " ", start_year, "-", end_year) sparql.setQuery( make_query(city, search_term, search_not, start_year, end_year)) sparql.setReturnFormat(JSON) sparql.addCustomHttpHeader("Authorization", authorization) results = sparql.query().convert() link_data = JSON2Pandas(results) final_results = final_results.append(link_data) # write a csv file: #final_results.to_csv('city_mentions.csv', index=False)
def get_results(query): sparql = SPARQLWrapper(endpoint_url) sparql.addCustomHttpHeader('User-Agent', user_agent) sparql.setQuery(query) sparql.setReturnFormat(JSON) return sparql.query().convert()['results']['bindings']
class Sparql: ''' Wrapper class to simplify submitting and fetching SPARQL queries :param endpoint: full URL to the SPARQL endpoint ''' def __init__(self, endpoint: str): ''' :param endpoint: full URI (with port and path) to SPARQL endpoint ''' self._p_endpoint = endpoint self._y_store = SPARQLWrapper(self._p_endpoint) @staticmethod def load(template: str, variables: Hash={}, injections: Hash={}) -> str: ''' Static method to apply mixins, variable substitions, and injections to a query template string. :param template: the SPARQL query template string :param variables: dict of variables and their values :param injections: dict of injections to apply across query template :return: the output query string ''' sx_template = template h_vars = variables or {} h_injections = injections or {} # each def sr_def = r'(?s)'+_directive('@def', True)+r'(.*?)'+_directive('@end') di_defs = re.finditer(sr_def, sx_template) for m_def in di_defs: # ref var name si_var = m_def.group(1) # extract mixin bgp sx_mixin = re.sub(r'(?is)^\s*ask\s*\{\s*(.*)\s*\}\s*$', r'\1', m_def.group(2)) # normalize indentation s_init = re.match(r'^[^\n]*\r?\n([ \t]+)', sx_mixin).group(1) sx_mixin = re.sub(r'(?m)^'+s_init, '', sx_mixin) # remove from template sx_template = sx_template.replace(m_def.group(0), '') # replace all invocations di_invocations = re.finditer(_directive('@mixin', r'[ \t]+'+si_var, True), sx_template) for m_invocation in di_invocations: s_indent = m_invocation.group(1) sx_aligned = re.sub(r'(?m)^', s_indent, sx_mixin) sx_template = sx_template.replace(m_invocation.group(0), sx_aligned) # injections di_inject = re.finditer(_directive('@inject', r'[ \t]+\$([\w]+)', True), sx_template) for m_inject in di_inject: # ref injection label si_inject = m_inject.group(2) # injection not declared if si_inject not in h_injections: continue # apply injection(s) sx_template = sx_template.replace(m_inject.group(0), m_inject.group(1)+h_injections[si_inject]) # IRI variables di_vars = re.finditer(r'<\$(\w+)>', sx_template) for m_vars in di_vars: si_var = m_vars.group(1) # variable not defined if si_var not in h_vars: raise Exception(f'query template requires a value for the variable "{si_var}"') # replace sx_template = sx_template.replace(m_vars.group(0), rdflib.URIRef(h_vars[si_var]).n3()) # return output query string return sx_template.strip() def _set_query(self, s_query): self._y_store.setQuery(S_PREFIXES_SPARQL+'\n'+s_query) self._y_store.clearParameter('infer') self._y_store.addParameter('infer', 'false') self._y_store.clearParameter('sameAs') self._y_store.addParameter('sameAs', 'false') def _submit(self): try: return self._y_store.query() except Exception as e_query: raise Exception(f'while querying """\n{S_PREFIXES_SPARQL}\n{s_query}"""') from e_query def construct(self, query: str) -> str: ''' Submit a SPARQL CONSTRUCT query and return the resulting graph as a Turtle document string :param query: the SPARQL CONSTRUCT query string to submit. Prefixes are prepended automatically ''' self._set_query(query) self._y_store.setReturnFormat(TURTLE) self._y_store.addCustomHttpHeader('Accept', 'text/turtle') self._y_store.setMethod(POST) y_results = self._submit(); # a_results = self._y_store.query() return SB_PREFIXES_TURTLE+y_results.convert() def fetch(self, query: str) -> List[Dict[str, Any]]: ''' Submit a SPARQL SELECT query and return the query result rows as a list of dicts :param query: the SPARQL SELECT query string to submit. Prefixes are prepended automatically ''' self._set_query(query) self._y_store.setReturnFormat(JSON) self._y_store.setMethod(POST) y_results = self._submit() return y_results.convert()['results']['bindings']
class SPARQLWrapper_Test(TestCase): @staticmethod def _get_request(wrapper): return wrapper.query().response.request # possible due to mock above @staticmethod def _get_parameters_from_request(request): if request.get_method() == 'GET': pieces_str = urlparse(request.get_full_url()).query else: if sys.version < '3': pieces_str = request.data else: pieces_str = request.data.decode('ascii') return parse_qs(pieces_str) @staticmethod def _get_request_parameters(wrapper): request = SPARQLWrapper_Test._get_request(wrapper) parameters = SPARQLWrapper_Test._get_parameters_from_request(request) return parameters @staticmethod def _get_request_parameters_as_bytes(wrapper): request = SPARQLWrapper_Test._get_request(wrapper) parameters = SPARQLWrapper_Test._get_parameters_from_request(request) if sys.version < '3': return parameters else: result = {} for k, vs in parameters.iteritems(): result[k] = [v.encode('utf-8') for v in vs] return result @classmethod def setUpClass(cls): urllib2._opener = None # clear value. Due to the order of test execution, the value of urllib2._opener contains, for instance, keepalive.keepalive.HTTPHandler def setUp(self): self.wrapper = SPARQLWrapper(endpoint='http://example.org/sparql') _victim.urlopener = urlopener def testConstructor(self): try: SPARQLWrapper() self.fail("SPARQLWrapper constructor should fail without arguments") except TypeError: pass wrapper = SPARQLWrapper(endpoint='http://example.org/sparql/') self.assertEqual(XML, wrapper.returnFormat, 'default return format is XML') self.assertTrue( wrapper.agent.startswith('sparqlwrapper'), 'default user-agent should start with "sparqlwrapper"' ) wrapper = SPARQLWrapper(endpoint='http://example.org/sparql/', returnFormat='wrongformat') self.assertEqual(XML, wrapper.returnFormat, 'default return format is XML') wrapper = SPARQLWrapper(endpoint='http://example.org/sparql/', defaultGraph='http://example.org/default') parameters = self._get_request_parameters(wrapper) self.assertEqual( ['http://example.org/default'], parameters.get('default-graph-uri'), 'default graph is set' ) def testReset(self): self.wrapper.setMethod(POST) self.wrapper.setQuery('CONSTRUCT WHERE {?a ?b ?c}') self.wrapper.setReturnFormat(N3) self.wrapper.addParameter('a', 'b') self.wrapper.setOnlyConneg(True) request = self._get_request(self.wrapper) parameters = self._get_parameters_from_request(request) onlyConneg = self.wrapper.onlyConneg self.assertEqual('POST', request.get_method()) self.assertTrue(parameters['query'][0].startswith('CONSTRUCT')) self.assertTrue('rdf+n3' in request.get_header('Accept')) self.assertTrue('a' in parameters) self.assertTrue(onlyConneg) self.wrapper.resetQuery() request = self._get_request(self.wrapper) parameters = self._get_parameters_from_request(request) onlyConneg = self.wrapper.onlyConneg self.assertEqual('GET', request.get_method()) self.assertTrue(parameters['query'][0].startswith('SELECT')) self.assertFalse('rdf+n3' in request.get_header('Accept')) self.assertTrue('sparql-results+xml' in request.get_header('Accept')) self.assertFalse('a' in parameters) self.assertFalse('a' in parameters) self.assertTrue(onlyConneg) def testSetReturnFormat(self): with warnings.catch_warnings(record=True) as w: self.wrapper.setReturnFormat('nonexistent format') self.assertEqual(1, len(w), "Warning due to non expected format") self.assertEqual(XML, self.wrapper.query().requestedFormat) self.wrapper.setReturnFormat(JSON) self.assertEqual(JSON, self.wrapper.query().requestedFormat) try: import rdflib_jsonld self.wrapper.setReturnFormat(JSONLD) self.assertEqual(JSONLD, self.wrapper.query().requestedFormat) except ImportError: self.assertRaises(ValueError, self.wrapper.setReturnFormat, JSONLD) def testsSupportsReturnFormat(self): self.assertTrue(self.wrapper.supportsReturnFormat(XML)) self.assertTrue(self.wrapper.supportsReturnFormat(JSON)) self.assertTrue(self.wrapper.supportsReturnFormat(TURTLE)) self.assertTrue(self.wrapper.supportsReturnFormat(N3)) self.assertTrue(self.wrapper.supportsReturnFormat(RDF)) self.assertTrue(self.wrapper.supportsReturnFormat(RDFXML)) self.assertTrue(self.wrapper.supportsReturnFormat(CSV)) self.assertTrue(self.wrapper.supportsReturnFormat(TSV)) self.assertFalse(self.wrapper.supportsReturnFormat('nonexistent format')) try: import rdflib_jsonld self.assertTrue(self.wrapper.supportsReturnFormat(JSONLD)) except ImportError: self.assertFalse(self.wrapper.supportsReturnFormat(JSONLD)) def testAddParameter(self): self.assertFalse(self.wrapper.addParameter('query', 'dummy')) self.assertTrue(self.wrapper.addParameter('param1', 'value1')) self.assertTrue(self.wrapper.addParameter('param1', 'value2')) self.assertTrue(self.wrapper.addParameter('param2', 'value2')) pieces = self._get_request_parameters(self.wrapper) self.assertTrue('param1' in pieces) self.assertEqual(['value1', 'value2'], pieces['param1']) self.assertTrue('param2' in pieces) self.assertEqual(['value2'], pieces['param2']) self.assertNotEqual(['dummy'], 'query') def testSetCredentials(self): request = self._get_request(self.wrapper) self.assertFalse(request.has_header('Authorization')) self.wrapper.setCredentials('login', 'password') request = self._get_request(self.wrapper) self.assertTrue(request.has_header('Authorization')) # expected header for login:password # should succeed for python 3 since pull request #72 self.assertEqual("Basic bG9naW46cGFzc3dvcmQ=", request.get_header('Authorization')) def testAddCustomHttpHeader(self): request = self._get_request(self.wrapper) self.assertFalse(request.has_header('Foo')) # Add new header field name self.wrapper.addCustomHttpHeader('Foo', 'bar') request = self._get_request(self.wrapper) self.assertTrue(request.has_header('Foo')) self.assertEqual("bar", request.get_header('Foo')) # Override a new field name self.wrapper.addCustomHttpHeader('Foo', 'bar') request = self._get_request(self.wrapper) self.assertTrue(request.has_header('Foo')) self.assertEqual("bar", request.get_header('Foo')) self.wrapper.addCustomHttpHeader('Foo', 'bar_2') request = self._get_request(self.wrapper) self.assertTrue(request.has_header('Foo')) self.assertEqual("bar_2", request.get_header('Foo')) # Override header field name self.wrapper.addCustomHttpHeader('User-agent', 'Another UA') request = self._get_request(self.wrapper) self.assertEqual("Another UA", request.get_header('User-agent')) def testClearCustomHttpHeader(self): request = self._get_request(self.wrapper) self.assertFalse(request.has_header('Foo')) # Add new header field name self.wrapper.addCustomHttpHeader('Foo_1', 'bar_1') self.wrapper.addCustomHttpHeader('Foo_2', 'bar_2') self.wrapper.addCustomHttpHeader('Foo_3', 'bar_3') self.assertFalse(self.wrapper.clearCustomHttpHeader('Foo_4')) self.assertTrue(self.wrapper.clearCustomHttpHeader('Foo_3')) customHttpHeaders = self.wrapper.customHttpHeaders self.assertTrue('Foo_1' in customHttpHeaders) self.assertTrue('Foo_2' in customHttpHeaders) self.assertEqual('bar_1', customHttpHeaders['Foo_1']) self.assertEqual('bar_2', customHttpHeaders['Foo_2']) self.assertFalse(self.wrapper.clearCustomHttpHeader('Foo_3'), 'already cleaned') def testSetHTTPAuth(self): self.assertRaises(TypeError, self.wrapper.setHTTPAuth, 123) self.wrapper.setCredentials('login', 'password') request = self._get_request(self.wrapper) self.assertTrue(request.has_header('Authorization')) self.assertIsNone(urllib2._opener) self.wrapper.setHTTPAuth(DIGEST) self.assertIsNone(urllib2._opener) request = self._get_request(self.wrapper) self.assertFalse(request.has_header('Authorization')) self.assertEqual(self.wrapper.http_auth, DIGEST) self.assertIsInstance(urllib2._opener, urllib2.OpenerDirector) self.wrapper.setHTTPAuth(DIGEST) self.wrapper.setCredentials('login', 'password') request = self._get_request(self.wrapper) self.assertEqual(self.wrapper.http_auth, DIGEST) self.assertEqual(self.wrapper.user, "login") self.assertEqual(self.wrapper.passwd, "password") self.assertEqual(self.wrapper.realm, "SPARQL") self.assertNotEqual(self.wrapper.realm, "SPARQL Endpoint") self.wrapper.setHTTPAuth(DIGEST) self.wrapper.setCredentials('login', 'password', realm="SPARQL Endpoint") request = self._get_request(self.wrapper) self.assertEqual(self.wrapper.http_auth, DIGEST) self.assertEqual(self.wrapper.user, "login") self.assertEqual(self.wrapper.passwd, "password") self.assertEqual(self.wrapper.realm, "SPARQL Endpoint") self.assertNotEqual(self.wrapper.realm, "SPARQL") self.assertRaises(ValueError, self.wrapper.setHTTPAuth, 'OAuth') self.wrapper.http_auth = "OAuth" self.assertRaises(NotImplementedError, self._get_request, self.wrapper) def testSetQuery(self): self.wrapper.setQuery('PREFIX example: <http://example.org/INSERT/> SELECT * WHERE {?s ?p ?v}') self.assertEqual(SELECT, self.wrapper.queryType) self.wrapper.setQuery('PREFIX e: <http://example.org/> INSERT {e:a e:b e:c}') self.assertEqual(INSERT, self.wrapper.queryType) self.wrapper.setQuery("""#CONSTRUCT {?s ?p ?o} SELECT ?s ?p ?o WHERE {?s ?p ?o}""") self.assertEqual(SELECT, self.wrapper.queryType) with warnings.catch_warnings(record=True) as w: self.wrapper.setQuery('UNKNOWN {e:a e:b e:c}') self.assertEqual(SELECT, self.wrapper.queryType, 'unknown queries result in SELECT') def testSetQueryEncodingIssues(self): #further details from issue #35 query = u'INSERT DATA { <urn:michel> <urn:says> "これはテストです" }' query_bytes = query.encode('utf-8') self.wrapper.setMethod(POST) self.wrapper.setRequestMethod(POSTDIRECTLY) self.wrapper.setQuery(query) request = self._get_request(self.wrapper) self.assertEquals(query_bytes, request.data) self.wrapper.setQuery(query_bytes) request = self._get_request(self.wrapper) self.assertEquals(query_bytes, request.data) self.wrapper.setRequestMethod(URLENCODED) self.wrapper.setQuery(query) parameters = self._get_request_parameters_as_bytes(self.wrapper) self.assertEquals(query_bytes, parameters['update'][0]) self.wrapper.setQuery(query_bytes) parameters = self._get_request_parameters_as_bytes(self.wrapper) self.assertEquals(query_bytes, parameters['update'][0]) try: self.wrapper.setQuery(query.encode('sjis')) self.fail() except UnicodeDecodeError: self.assertTrue(True) try: self.wrapper.setQuery({'foo': 'bar'}) self.fail() except TypeError: self.assertTrue(True) def testSetTimeout(self): self.wrapper.setTimeout(10) self.assertEqual(10, self.wrapper.timeout) self.wrapper.resetQuery() self.assertEqual(None, self.wrapper.timeout) def testClearParameter(self): self.wrapper.addParameter('param1', 'value1') self.wrapper.addParameter('param1', 'value2') self.wrapper.addParameter('param2', 'value2') self.assertFalse(self.wrapper.clearParameter('query')) self.assertTrue(self.wrapper.clearParameter('param1')) pieces = self._get_request_parameters(self.wrapper) self.assertFalse('param1' in pieces) self.assertTrue('param2' in pieces) self.assertEqual(['value2'], pieces['param2']) self.assertFalse(self.wrapper.clearParameter('param1'), 'already cleaned') def testSetMethod(self): self.wrapper.setMethod(POST) request = self._get_request(self.wrapper) self.assertEqual("POST", request.get_method()) self.wrapper.setMethod(GET) request = self._get_request(self.wrapper) self.assertEqual("GET", request.get_method()) def testSetRequestMethod(self): self.assertEqual(URLENCODED, self.wrapper.requestMethod) self.wrapper.setRequestMethod(POSTDIRECTLY) self.assertEqual(POSTDIRECTLY, self.wrapper.requestMethod) def testIsSparqlUpdateRequest(self): self.wrapper.setQuery('DELETE WHERE {?s ?p ?o}') self.assertTrue(self.wrapper.isSparqlUpdateRequest()) self.wrapper.setQuery('DELETE DATA { <urn:john> <urn:likes> <urn:surfing> }') self.assertTrue(self.wrapper.isSparqlUpdateRequest()) self.wrapper.setQuery(""" PREFIX example: <http://example.org/SELECT/> BASE <http://example.org/SELECT> DELETE WHERE {?s ?p ?o} """) self.assertTrue(self.wrapper.isSparqlUpdateRequest()) self.wrapper.setQuery('WITH <urn:graph> DELETE DATA { <urn:john> <urn:likes> <urn:surfing> }') self.assertTrue(self.wrapper.isSparqlUpdateRequest()) self.wrapper.setQuery('INSERT DATA { <urn:john> <urn:likes> <urn:surfing> }') self.assertTrue(self.wrapper.isSparqlUpdateRequest()) self.wrapper.setQuery('WITH <urn:graph> INSERT DATA { <urn:john> <urn:likes> <urn:surfing> }') self.assertTrue(self.wrapper.isSparqlUpdateRequest()) self.wrapper.setQuery('CREATE GRAPH <urn:graph>') self.assertTrue(self.wrapper.isSparqlUpdateRequest()) self.wrapper.setQuery('CLEAR GRAPH <urn:graph>') self.assertTrue(self.wrapper.isSparqlUpdateRequest()) self.wrapper.setQuery('DROP GRAPH <urn:graph>') self.assertTrue(self.wrapper.isSparqlUpdateRequest()) self.wrapper.setQuery('MOVE GRAPH <urn:graph1> TO GRAPH <urn:graph2>') self.assertTrue(self.wrapper.isSparqlUpdateRequest()) self.wrapper.setQuery('LOAD <http://localhost/file.rdf> INTO GRAPH <urn:graph>') self.assertTrue(self.wrapper.isSparqlUpdateRequest()) self.wrapper.setQuery('COPY <urn:graph1> TO GRAPH <urn:graph2>') self.assertTrue(self.wrapper.isSparqlUpdateRequest()) self.wrapper.setQuery('ADD <urn:graph1> TO GRAPH <urn:graph2>') self.assertTrue(self.wrapper.isSparqlUpdateRequest()) def testIsSparqlQueryRequest(self): self.wrapper.setQuery('SELECT * WHERE {?s ?p ?o}') self.assertTrue(self.wrapper.isSparqlQueryRequest()) self.wrapper.setQuery(""" PREFIX example: <http://example.org/DELETE/> BASE <http://example.org/MODIFY> ASK WHERE {?s ?p ?o} """) self.assertTrue(self.wrapper.isSparqlQueryRequest()) self.assertFalse(self.wrapper.isSparqlUpdateRequest()) def testQuery(self): qr = self.wrapper.query() self.assertTrue(isinstance(qr, QueryResult)) request = qr.response.request # possible due to mock above self.assertTrue(isinstance(request, Request)) parameters = self._get_parameters_from_request(request) self.assertTrue('query' in parameters) self.assertTrue('update' not in parameters) self.wrapper.setMethod(POST) self.wrapper.setQuery('PREFIX e: <http://example.org/> INSERT {e:a e:b e:c}') parameters = self._get_request_parameters(self.wrapper) self.assertTrue('update' in parameters) self.assertTrue('query' not in parameters) #_returnFormatSetting = ["format", "output", "results"] self.assertTrue('format' not in parameters) self.assertTrue('output' not in parameters) self.assertTrue('results' not in parameters) _victim.urlopener = urlopener_error_generator(400) try: self.wrapper.query() self.fail('should have raised exception') except QueryBadFormed as e: # TODO: check exception-format pass except: self.fail('got wrong exception') _victim.urlopener = urlopener_error_generator(401) try: self.wrapper.query() self.fail('should have raised exception') except Unauthorized as e: # TODO: check exception-format pass except: self.fail('got wrong exception') _victim.urlopener = urlopener_error_generator(404) try: self.wrapper.query() self.fail('should have raised exception') except EndPointNotFound as e: # TODO: check exception-format pass except: self.fail('got wrong exception') _victim.urlopener = urlopener_error_generator(414) try: self.wrapper.query() self.fail('should have raised exception') except URITooLong as e: # TODO: check exception-format pass except: self.fail('got wrong exception') _victim.urlopener = urlopener_error_generator(500) try: self.wrapper.query() self.fail('should have raised exception') except EndPointInternalError as e: # TODO: check exception-format pass except: self.fail('got wrong exception') _victim.urlopener = urlopener_error_generator(999) try: self.wrapper.query() self.fail('should have raised exception') except HTTPError as e: # TODO: check exception-format pass except: self.fail('got wrong exception') def testQueryEncoding(self): query = 'INSERT DATA { <urn:michel> <urn:says> "é" }' wrapper = SPARQLWrapper('http://example.com:3030/example') wrapper.setMethod(POST) wrapper.setRequestMethod(URLENCODED) wrapper.setQuery(query) _victim.urlopener = urlopener_check_data_encoding wrapper.query() def testQueryAndConvert(self): _oldQueryResult = _victim.QueryResult class FakeQueryResult(object): def __init__(self, result): pass def convert(self): return True try: _victim.QueryResult = FakeQueryResult result = self.wrapper.queryAndConvert() self.assertEqual(True, result) finally: _victim.QueryResult = _oldQueryResult def testComments(self): # see issue #32 self.wrapper.setQuery(""" # this is a comment select * where { ?s ?p ?o } """) self.assertTrue(self.wrapper.isSparqlQueryRequest()) def testHashInPrefixes(self): # see issue #77 self.wrapper.setQuery(""" PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> select * where { ?s ?p ?o } """) self.assertTrue(self.wrapper.isSparqlQueryRequest()) def testHashInPrefixComplex(self): # see issue #77 self.wrapper.setQuery(""" PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX weather: <http://hal.zamia.org/weather/> PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX dbr: <http://dbpedia.org/resource/> PREFIX dbp: <http://dbpedia.org/property/> PREFIX xml: <http://www.w3.org/XML/1998/namespace> PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> SELECT DISTINCT ?location ?cityid ?timezone ?label WHERE { ?location weather:cityid ?cityid . ?location weather:timezone ?timezone . ?location rdfs:label ?label . } """) self.assertTrue(self.wrapper.isSparqlQueryRequest()) def testHashWithNoComments(self): # see issue #77 query = """ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT * WHERE { ?s ?p ?o . } """ parsed_query = self.wrapper._cleanComments(query) self.assertEquals(query, parsed_query) def testCommentBeginningLine(self): # see issue #77 query = """ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> # a comment SELECT * WHERE { ?s ?p ?o . } """ expected_parsed_query = """ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT * WHERE { ?s ?p ?o . } """ parsed_query = self.wrapper._cleanComments(query) self.assertEquals(expected_parsed_query, parsed_query) def testCommentEmtpyLine(self): # see issue #77 query = """ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> # a comment SELECT * WHERE { ?s ?p ?o . } """ expected_parsed_query = """ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT * WHERE { ?s ?p ?o . } """ parsed_query = self.wrapper._cleanComments(query) self.assertEquals(expected_parsed_query, parsed_query) def testCommentsFirstLine(self): # see issue #77 query = """#CONSTRUCT {?s ?p ?o} SELECT ?s ?p ?o WHERE {?s ?p ?o}""" expected_parsed_query = """ SELECT ?s ?p ?o WHERE {?s ?p ?o}""" parsed_query = self.wrapper._cleanComments(query) self.assertEquals(expected_parsed_query, parsed_query) @unittest.skip("issue #80") def testCommentAfterStatements(self): # see issue #77 query = """ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT * WHERE { # this is the where condition ?s ?p ?o . } """ expected_parsed_query = """ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT * WHERE { ?s ?p ?o . } """ parsed_query = self.wrapper._cleanComments(query) self.assertEquals(expected_parsed_query, parsed_query) def testSingleLineQueryLine(self): # see issue #74 query = "prefix whatever: <http://example.org/blah#> ASK { ?s ?p ?o }" parsed_query = self.wrapper._cleanComments(query) self.assertEquals(query, parsed_query) self.wrapper.setQuery(query) self.assertTrue(self.wrapper.isSparqlQueryRequest()) def testOnlyConneg(self): # see issue #82 query = "prefix whatever: <http://example.org/blah#> ASK { ?s ?p ?o }" self.wrapper.setOnlyConneg(False) self.wrapper.setQuery(query) request = self._get_request(self.wrapper) request_params = dict(parse_qsl(urlparse(request.get_full_url()).query)) for returnFormatSetting in ["format", "output", "results"]: # Obviously _returnFormatSetting is not accessible from SPARQLWrapper, so we copy&paste the possible values self.assertTrue(returnFormatSetting in request_params, "URL parameter '%s' was not sent, and it was expected" %returnFormatSetting) #ONLY Content Negotiation self.wrapper.resetQuery() self.wrapper.setOnlyConneg(True) self.wrapper.setQuery(query) request = self._get_request(self.wrapper) request_params = dict(parse_qsl(urlparse(request.get_full_url()).query)) for returnFormatSetting in ["format", "output", "results"]: # Obviously _returnFormatSetting is not accessible from SPARQLWrapper, so we copy&paste the possible values self.assertFalse(returnFormatSetting in request_params, "URL parameter '%s' was sent, and it was not expected (only Content Negotiation)" %returnFormatSetting)
class SparqlUpdateHook(HttpHook): """ Interact with SPARQL endpoints. :param method: the API method to be called :type method: str :param http_conn_id: :ref:`http connection<howto/connection:http>` that has the SPARQL endpoint url i.e https://dbpedia.org/sparql and optional authentication credentials. Default headers can also be specified in the Extra field in json format. :type http_conn_id: str :param auth_type: The auth type for the service :type auth_type: AuthBase of python requests lib """ default_conn_name = "sparql_endpoint" hook_name = "SPARQLUpdate" def __init__( self, method: str = "POST", http_conn_id: str = default_conn_name, auth_type: Any = HTTPBasicAuth, ) -> None: super().__init__(method, http_conn_id, auth_type) conn = self.get_connection(self.http_conn_id) if conn.host and "://" in conn.host: self.endpoint = conn.host else: # schema defaults to HTTP schema = conn.schema if conn.schema else "http" if conn.host: host = conn.host else: self.log.error() raise ValueError("Host cannot be empty") self.endpoint = schema + "://" + host self.sparql = SPARQLWrapper(self.endpoint) if self.auth_type in AUTH_TYPES: self.sparql.setHTTPAuth(AUTH_TYPES[self.auth_type]) self.sparql.setCredentials(conn.login, conn.password) self.sparql.setMethod(METHODS[self.method]) def sparql_update(self, query: str, headers: Optional[Dict[str, Any]] = None): """Execute a sparql query on a sparql endpoint. :param query: SPARQL Update query to be uploaded or request parameters :type query: str :param headers: additional headers to be passed through as a dictionary :type headers: dict """ query_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), query) if os.path.isfile(query_path): with open(query_path) as f: query = f.read() else: self.log.warning("Query does not point to a file; executing as query text.") self.sparql.setQuery(query) if not self.sparql.isSparqlUpdateRequest(): self.log.warning("Query is not an update query.") if headers is not None: for h in headers.items(): self.sparql.addCustomHttpHeader(h[0], h[1]) self.log.info("Sending query to {}".format(self.endpoint)) results = self.sparql.query() self.log.info(results.response.read()) self.sparql.resetQuery() def insert(self, triples, graph=None): query = "INSERT DATA {\n" if graph is not None: query += "GRAPH <{}> {{\n".format(graph) for t in triples: query += self.to_ntriples(t) query += "}\n" if graph is not None: query += "}" self.sparql_update(query) def insert_file(self, filename, graph=None): """Opens a file and inserts the data.""" g = Graph() g.parse(filename) if not len(g) > 0: return self.insert(g,graph) @staticmethod def to_ntriples(t, namespace_manager=None): return "{} {} {} . \n".format( t[0].n3(namespace_manager), t[1].n3(namespace_manager), t[2].n3(namespace_manager), )
import os from SPARQLWrapper import SPARQLWrapper, JSON from helpers import log sparqlQuery = SPARQLWrapper(os.environ.get('MU_SPARQL_ENDPOINT'), returnFormat=JSON) sparqlQuery.addCustomHttpHeader('mu-auth-sudo', 'true') sparqlUpdate = SPARQLWrapper(os.environ.get('MU_SPARQL_UPDATEPOINT'), returnFormat=JSON) sparqlUpdate.method = 'POST' sparqlUpdate.addCustomHttpHeader('mu-auth-sudo', 'true') def query(the_query): """Execute the given SPARQL query (select/ask/construct)on the triple store and returns the results in the given returnFormat (JSON by default).""" log("execute query: \n" + the_query) sparqlQuery.setQuery(the_query) return sparqlQuery.query().convert() def update(the_query): """Execute the given update SPARQL query on the triple store, if the given query is no update query, nothing happens.""" sparqlUpdate.setQuery(the_query) if sparqlUpdate.isSparqlUpdateRequest(): log("execute query: \n" + the_query) sparqlUpdate.query()