def str_query(uri, relation, value): if value is not None: if isinstance(value, list): # Store lists as json string dumps value = json.dumps(value) if type(value).__module__ == np.__name__: # Convert the numpy value (e.g. int64) to a python value value = value.item() uri_relations = (get_relation("data_type")) escaped_value = "" if relation in uri_relations: escaped_value = escape_helpers.sparql_escape_uri(value) else: escaped_value = escape_helpers.sparql_escape(value) if isinstance(value, bool): # Fix for weird problem with booleans escaped_value = escaped_value.replace("False", "false") escaped_value = escaped_value.replace("True", "true") escaped_value = escaped_value.replace("^^xsd:boolean", "^^<http://mu.semte.ch/vocabularies/typed-literals/boolean>") return "\t\t{uri} {relation} {value} . \n".format(uri=uri, relation=relation, value=escaped_value) return ""
def update_modified(subject, modified=datetime.datetime.now()): """Executes a SPARQL query to update the modification date of the given subject URI (string). The default date is now.""" query = " WITH <%s> " % graph query += " DELETE {" query += " < %s > < %s > %s ." % (subject, DC.Modified, sparql_escape(modified)) query += " }" query += " WHERE {" query += " <%s> <%s> %s ." % (subject, DC.Modified, sparql_escape(modified)) query += " }" update(query) query = " INSERT DATA {" query += " GRAPH <%s> {" % graph query += " <%s> <%s> %s ." % (subject, DC.Modified, sparql_escape(modified)) query += " }" query += " }" update(query)
def insert_finalized(uuid): q = """ PREFIX ext: <http://mu.semte.ch/vocabularies/ext/> PREFIX mu: <http://mu.semte.ch/vocabularies/core/> PREFIX dct: <http://purl.org/dc/terms/> INSERT {{ GRAPH <http://mu.semte.ch/application> {{ ?s ext:finalized {datetime}. }} }} WHERE {{ GRAPH <http://mu.semte.ch/application> {{ ?s mu:uuid {uuid} . }} }} """.format(uuid=escape_helpers.sparql_escape(uuid), datetime=escape_helpers.sparql_escape( datetime.now(timezone('Europe/Brussels')).isoformat())) helpers.update(q)
def run(): # select_query_form = """ # SELECT ?url WHERE {{ # GRAPH <{0}> {{ # # }} # }} # """ insert_query_form = """ INSERT DATA {{ GRAPH <{0}> {{ <{1}> <{2}> {3}{4}. }} }} """ select_query = os.getenv('URL_QUERY') # select_query = select_query_form.format(os.getenv("MU_APPLICATION_GRAPH"), # os.getenv("SITE_PREDICATE")) try: results = helpers.query(select_query)["results"]["bindings"] except Exception as e: helpers.log("Querying SPARQL-endpoint failed:\n{}".format(e)) for result in results: try: url = result["url"]["value"] except KeyError as e: helpers.log('SPARQL query must contain "?url"') # if url in urls: #check if url already has scraped text in store # continue try: helpers.log("Getting URL \"{}\"".format(url)) doc_before = scrape(url) if not doc_before: continue doc_lang = get_lang(doc_before) doc_after = cleanup(doc_before) insert_query = insert_query_form.format(os.getenv('MU_APPLICATION_GRAPH'), url, os.getenv('CONTENT_PREDICATE'), escape_helpers.sparql_escape(doc_after), '@'+doc_lang if doc_lang else '') try: helpers.update(insert_query) except Exception as e: helpers.log("Querying SPARQL-endpoint failed:\n{}".format(e)) continue except Exception as e: helpers.log("Something went wrong ...\n{}".format(str(e))) continue
def str_query(uri, relation, value): if value is not None: uri_relations = ("ext:unitUri") escaped_value = "" if relation in uri_relations: escaped_value = escape_helpers.sparql_escape_uri(value) else: escaped_value = escape_helpers.sparql_escape(value) if isinstance(value, bool): # Fix for weird problem with booleans escaped_value = escaped_value.replace("False", "false") escaped_value = escaped_value.replace("True", "true") escaped_value = escaped_value.replace( "^^xsd:boolean", "^^<http://mu.semte.ch/vocabularies/typed-literals/boolean>") return "\t\t{uri} {relation} {value} . \n".format(uri=uri, relation=relation, value=escaped_value) return ""