def linkset_stats(linkset): query = """ PREFIX void: <http://rdfs.org/ns/void#> PREFIX bdb: <http://vocabularies.bridgedb.org/ops#> PREFIX ll: <http://risis.eu/alignment/predicate/> SELECT DISTINCT ?dataset ?datatype ?alignsMechanism ?total (COUNT (DISTINCT ?RESOURCE) as ?subtotal) (ROUND((COUNT(DISTINCT ?RESOURCE) / ?total)*10000) /100 as ?percentage) {{ <{0}> ll:alignsMechanism ?alignsMechanism . {{ <{0}> bdb:subjectsDatatype ?datatype ; void:subjectsTarget ?dataset . graph <{0}> {{ ?RESOURCE ?p ?o . }} }} UNION {{ <{0}> bdb:objectsDatatype ?datatype ; void:objectsTarget ?dataset . graph <{0}> {{ ?o ?p ?RESOURCE . }} }} {{ SELECT (COUNT(DISTINCT ?RESOURCE) as ?total) ?dataset ?datatype {{ graph ?dataset {{ ?RESOURCE a ?datatype . }} }} GROUP BY ?dataset ?datatype }} }} GROUP BY ?dataset ?datatype ?total ?alignsMechanism """.format(linkset) Qry.display_result(query=query, spacing=60, is_activated=True)
def properties(graph, datatype=None): comment = "# " if datatype is None else "" datatype = datatype if Ut.is_nt_format(datatype) is True else "<{}>".format(datatype) graph = graph if Ut.is_nt_format(graph) is True else "<{}>".format(graph) properties = """ # <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> SELECT DISTINCT ?predicate WHERE {{ GRAPH {} {{ {}?subj {} ?type . ?subj ?predicate ?obj . }} }} """.format(graph, comment, datatype) print properties Qr.display_result(query=properties, spacing=50, limit=0, is_activated=True)
def ds_stats(dataset, datatype, display=True, optional_label=True, graph_list=None): # QUERY THAT COMPUTES STATISTICS OVER THE ALIGNMENTS INVOLVING A PARTICULAR DATASET append = "#" if graph_list is None else "" values = "" # LIST OF GRAPHS if graph_list is not None: for alignment in graph_list: values += " <{}>".format(alignment) comment_opt_lbl = '' if optional_label else '#' query = """ PREFIX void: <http://rdfs.org/ns/void#> PREFIX bdb: <http://vocabularies.bridgedb.org/ops#> PREFIX dataset: <http://risis.eu/dataset/> PREFIX foaf: <http://xmlns.com/foaf/0.1/> PREFIX ll: <http://risis.eu/alignment/predicate/> PREFIX skos: <{2}> SELECT DISTINCT ?dataset ?alignsMechanism (COUNT(DISTINCT ?RESOURCE) as ?total) ?subTotal (ROUND((?subTotal / COUNT(DISTINCT ?RESOURCE) ) *10000)/100 as ?percentage) ?graph ?graph_uri {{ {4}VALUES ?graph_uri {{ {5} }} graph <{0}> {{ ?RESOURCE a <{1}> . }} # TO CHECK WHETHER THE SELECTED GRAPH IS RELATED TO THE SELECTED DATASET #?graph_uri # void:target*/(void:subjectsTarget|void:objectsTarget) <{0}> ; # void:target*/(bdb:subjectsDatatype|bdb:objectsDatatype) <{1}> . # DISPLYING THE USER UPDATED LABEL {3}OPTIONAL {{ graph ?rq {{ ?graph_uri skos:prefLabel ?label }} }} BIND (IF(bound(?label), ?label , ?graph_uri) AS ?graph) # 1. LINKSET WHERE SUBJECTS AND OBJECTS RESOURCES ARE IN THE SAME DATASET {{ SELECT (count(DISTINCT ?RESOURCE) as ?subTotal) ?graph_uri ?dataset ?alignsMechanism {{ ?graph_uri bdb:subjectsDatatype <{1}> ; void:subjectsTarget <{0}> ; void:objectsTarget ?dataset ; ll:alignsMechanism ?alignsMechanism . graph ?graph_uri {{ {{ ?RESOURCE ?SING ?oResource. }} UNION {{ ?oResource ?SING ?RESOURCE . }} }} FILTER (<{0}> = ?dataset) }} GROUP BY ?graph_uri ?dataset ?alignsMechanism }} UNION # 2. WHEN THE DATSET SELECTED IS THE SUBJECT {{ SELECT (count(DISTINCT ?RESOURCE) as ?subTotal) ?graph_uri ?dataset ?alignsMechanism {{ ?graph_uri bdb:subjectsDatatype <{1}> ; void:subjectsTarget <{0}> ; void:objectsTarget ?dataset ; ll:alignsMechanism ?alignsMechanism . graph ?graph_uri {{ ?RESOURCE ?SING ?oResource. }} FILTER (<{0}> != ?dataset) }} GROUP BY ?graph_uri ?dataset ?alignsMechanism }} UNION # 3. WHEN THE DATASET SELECTED IS THE OBJECT {{ SELECT (count(DISTINCT ?RESOURCE) as ?subTotal) ?graph_uri ?dataset ?alignsMechanism {{ ?graph_uri bdb:objectsDatatype <{1}> ; void:objectsTarget <{0}> ; void:subjectsTarget ?dataset ; ll:alignsMechanism ?alignsMechanism . graph ?graph_uri {{ ?oResource ?SING ?RESOURCE . }} FILTER (<{0}> != ?dataset) }} GROUP BY ?graph_uri ?dataset ?alignsMechanism }} UNION # 4. RUN THIS WHEN THE GRAPH IS A LENS AND WHEN SUBJECTS AND OBJECTS RESOURCES ARE IN THE SAME DATASET {{ SELECT (count(DISTINCT ?RESOURCE) as ?subTotal) ?graph_uri ?dataset ?alignsMechanism {{ ?graph_uri void:target ?linkset ; ll:operator ?alignsMechanism . ?linkset bdb:subjectsDatatype <{1}> ; void:subjectsTarget <{0}> ; void:objectsTarget ?dataset . graph ?graph_uri {{ {{ ?RESOURCE ?SING ?oResource. }} UNION {{ ?oResource ?SING ?RESOURCE . }} }} FILTER (<{0}> = ?dataset) }} GROUP BY ?graph_uri ?dataset ?alignsMechanism }} UNION # 5. RUN THIS WHEN THE GRAPH IS A LENS AND WHEN THE SELECTED DATASET IS THE SUBJECT {{ SELECT (count(DISTINCT ?RESOURCE) as ?subTotal) ?graph_uri ?dataset ?alignsMechanism {{ ?graph_uri void:target+ ?linkset ; ll:operator ?alignsMechanism . ?linkset bdb:subjectsDatatype <{1}> ; void:subjectsTarget <{0}> ; void:objectsTarget ?dataset . graph ?graph_uri {{ ?oResource ?SING ?RESOURCE . }} FILTER (<{0}> != ?dataset) }} GROUP BY ?graph_uri ?dataset ?alignsMechanism }} UNION #6. RUN THIS WHEN THE GRAPH IS A LENS AND WHEN THE SELECTED DATASET IS THE OBJECT {{ SELECT (count(DISTINCT ?RESOURCE) as ?subTotal) ?graph_uri ?dataset ?alignsMechanism {{ ?graph_uri void:target+ ?linkset ; ll:operator ?alignsMechanism . ?linkset bdb:objectsDatatype <{1}> ; void:objectsTarget <{0}> ; void:subjectsTarget ?dataset . graph ?graph_uri {{ ?oResource ?SING ?RESOURCE . }} FILTER (<{0}> != ?dataset) }} GROUP BY ?graph_uri ?dataset ?alignsMechanism }} }} GROUP BY ?graph ?subTotal ?dataset ?alignsMechanism ?graph_uri having (?subTotal > 0) ORDER BY ?dataset ?alignsMechanism """.format(dataset, datatype, Ns.skos, comment_opt_lbl, append, values) # print query if display is True: Qry.display_result(query=query, spacing=100, is_activated=True) return query
def visit_data(): # ETER eter_query = """ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX pred: <http://risis.eu/eter_2014/ontology/predicate/> SELECT * {{ GRAPH <http://risis.eu/dataset/eter_2014> {{ ?entity rdf:type ?type ; pred:ETER_ID_Year ?year ; pred:Institution_Name ?name ; pred:Geographic_coordinates__latitude ?lat ; pred:Geographic_coordinates__longitude ?long . OPTIONAL {{ ?entity pred:Total_number_of_full_professors ?professors .}} OPTIONAL {{ ?entity pred:Total_academic_staff_FTE ?academic_staff_FTE . }} OPTIONAL {{ ?entity pred:Total_academic_staff_HC ?academic_staff_HC }} OPTIONAL {{ ?entity pred:Total_students_enrolled_at_ISCED_5 ?ISCED_5. }} OPTIONAL {{ ?entity pred:Total_students_enrolled_at_ISCED_6 ?ISCED_6. }} OPTIONAL {{ ?entity pred:Total_students_enrolled_at_ISCED_7 ?ISCED_7. }} OPTIONAL {{ ?entity pred:Total_students_enrolled_at_ISCED_8 ?ISCED_8. }} }} }} LIMIT 50 """.format() print eter_query Qr.display_result(query=eter_query, spacing=50, limit=0, is_activated=True) grid_query = """ PREFIX coord: <http://www.w3.org/2003/01/geo/wgs84_pos#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX pred: <http://www.grid.ac/ontology/> SELECT * {{ GRAPH <http://risis.eu/dataset/grid_20180208> {{ ?entity rdf:type ?type ; rdfs:label ?name . ?entity pred:hasAddress/coord:long ?long . ?entity pred:hasAddress/coord:lat ?lat . }} }} LIMIT 50 """.format() print grid_query # Qr.display_result(query=grid_query, spacing=50, limit=0, is_activated=True) orgreg_query = """ PREFIX pred: <http://risis.eu/orgreg_20170718/ontology/predicate/> SELECT * {{ GRAPH <http://risis.eu/dataset/orgreg_20170718> {{ ?entity pred:characteristicsOf/pred:Type_of_entity ?type ; pred:Entity_current_name_English ?name . ?entity pred:locationOf/pred:Geographical_coordinates__latitude ?long . ?entity pred:locationOf/pred:Geographical_coordinates__longitude ?lat . }} }} LIMIT 500 """.format()
def coverage_query(merged_lens, dataset, distribution=True, minus=False, datasets_involve=False, activated=False): if activated is False: print "THE FUNCTION [coverage_query] IS NOT ACTIVATED." return None # FETCH ALL DATASETS INVOLVED IN THE LENS if datasets_involve is True: datasets_q = """ SELECT DISTINCT ?datasets {{ {} void:target*/(void:subjectsTarget|void:objectsTarget) ?datasets. }} """.format(Ut.to_nt_format(merged_lens)) # print datasets_q Qr.display_result(query=datasets_q, spacing=50, limit=50, is_activated=True) if distribution is False: distribution_str = "" else: print "\n>>> COMPUTING THE ORIGINAL DISTRIBUTION OF : {}".format(dataset) distribution_str = "# " if minus is True: if distribution is True: print ">>> WARNING: DISTRIBUTION MUST BE SET TO [FALSE] IF MINUS IS SET TO [TRUE]" minus_str = "" else: if distribution is True: print ">>> WARNING: FOR COVERAGE DISTRIBUTION, DISTRIBUTION MUST BE SET TO FALSE" else: print "\n>>> COVERAGE OF {}".format(dataset) minus_str = "# " query = """ PREFIX lens:<http://risis.eu/lens/> PREFIX dataset:<http://risis.eu/dataset/> PREFIX property:<http://risis.eu/orgreg_20170718/ontology/predicate/> # SELECT DISTINCT ?entity ?country SELECT ?country (count(DISTINCT ?entity) as ?total) # SELECT ?country (count( ?country) as ?total) {{ {{ SELECT DISTINCT ?entity {{ {{ # ALL UNIVERSITIES IN ORGREG GRAPH <{1}> {{ ?entity a <http://risis.eu/orgreg_20170718/ontology/class/University> . }} }} {2}{3}MINUS # ==> FETCH ALL UNIVERSITIES IN THE ORGREG DATASET THAT ARE NOT FOUND {2}{{ {2} # LINKS (ENTITIES) FOUND FOR THE SIX DATASETS {2} BIND(<{0}> AS ?lens) {2} {{ GRAPH ?lens {{ ?entity ?pred ?obj . }}}} {2} UNION {2} {{ GRAPH ?lens {{ ?subj ?pred ?entity . }}}} {2}}} }} }} # FETCHING THE COUNTRY FOR EACH ORGANIZATION GRAPH <{1}> {{ ?entity a <http://risis.eu/orgreg_20170718/ontology/class/University> . OPTIONAL {{ ?entity property:locationOf/property:Country_of_location ?_country. }} BIND (IF(bound(?_country), ?_country , "NONE") AS ?country) }} }} GROUP BY ?country ORDER BY ?country """.format(merged_lens, dataset, distribution_str, minus_str) # print query Qr.display_result(query=query, spacing=50, limit=50, is_activated=True)