def longTerm(g_onto, liste_map, g_map, raw_data, g_link) : #Complété, corrigé et fonctionnel. Revoir le return ? ----------------------------------------------------- nbPossible = 0 result = metric() result['name'] = "Long term subject" points = 0 set_URIs = set() for s, _, _ in g_map.triples((None, None, None)) : if isinstance(s, rdflib.term.URIRef) : set_URIs.add(s) for elt in set_URIs : nbPossible = nbPossible + 1 splitted_elt = elt.split('/') for elements in splitted_elt: try : if int(elements) > 1990 and int(elements) < 2050 : points = points + 1 else : result['feedbacks'].append(elements + "should contain a date") except ValueError : pass if nbPossible == 0 : result['score'] = 1 else : result['score'] = points/nbPossible return result
def disjointWith(g_onto, liste_map, g_map, raw_data, g_link) : result = metric() result['name'] = "Misuse of disjointWith" points = 0 nbPossible = 0 for s, _, o in g_map.triples((None, None, None)) : nbPossible = nbPossible + 1 for _, _, o1 in g_onto.triples((s, rdflib.term.URIRef('https://www.w3.org/2002/07/owl#disjointWith'), None)) : if g_onto.triples((o, (rdflib.term.URIRef('a')|rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type')), o1)) is not None : points = points + 1 result['feedbacks'].append(str(o) + "is disjoint with" + s) else : for s1, _, _ in g_onto.triples((None, rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#subClassOf') ,o)): if g_onto.triples((s1, (rdflib.term.URIRef('a')|rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type')), o1)) is not None : points = points + 1 result['feedbacks'].append(str(o) + "is disjoint with" + s) for _, _, o1 in g_onto.triples((o, rdflib.term.URIRef('https://www.w3.org/2002/07/owl#disjointWith'), None)) : if g_onto.triples((s, (rdflib.term.URIRef('a')|rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type')), o1)) is not None : points = points + 1 result['feedbacks'].append(str(o) + "is disjoint with" + s) else : for s1, _, _ in g_onto.triples((None, rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#subClassOf') ,s)): if g_onto.triples((s1, (rdflib.term.URIRef('a')|rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type')), o1)) is not None : points = points + 1 result['feedbacks'].append(str(o) + "is disjoint with" + s) if nbPossible == 0: result['score'] = 1 else: result['score'] = 1-points/nbPossible return result
def existingVocab(g_onto, liste_map, g_map, raw_data, g_link) : set_URIs = set() result = metric() result['name'] = "Use of existing vocabulary" nbPossible = 0 points = 0 for s, p, o in g_map.triples((None, None, None)) : if isinstance(s, rdflib.term.URIRef) : deb = s.split('$')[0] if str(s) == deb : set_URIs.add(s) if isinstance(p, rdflib.term.URIRef) : if p == rdflib.term.URIRef('a') : p = rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type') deb = p.split('$')[0] if str(p) == deb : set_URIs.add(p) if isinstance(o, rdflib.term.URIRef) : deb = o.split('$')[0] if str(o) == deb : set_URIs.add(o) for elt in set_URIs : nbPossible = nbPossible + 1 lien = 'https://lov.linkeddata.es/dataset/lov/api/v2/term/search?q=' + elt + '&type=class' request = requests.get(lien) json_data = json.loads(request.text) if json_data["total_results"] != 0: points = points + 1 else : result['feedbacks'].append(str(elt) + " is not referenced in LOV.") if nbPossible == 0 : result['score'] = 1 else : result['score'] = points/nbPossible return result
def localLink(g_onto, liste_map, g_map, raw_data, g_link): #Fait points = 0 nbPossible = 0 result = metric() result['name'] = "Use of local links" set_URIs = set() links = set() for s, _, _ in g_map.triples((None, None, None)): set_URIs.add(s) for elt in set_URIs: deb = elt.split('$')[0] fin = elt.split('$(')[1].split(')')[0] if elt != deb: for elements in raw_data: link = deb + str(elements['fields'][fin]) links.add(link) for link in links: nbPossible = nbPossible + 1 a = requests.get(link) try: a.raise_for_status() except: points = points + 1 if nbPossible == 0: result['score'] = 1 else: result['score'] = 1 - points / nbPossible return result
def localLinks(g_onto, liste_map, g_map, raw_data, g_link): result = metric() result['name'] = "Use of one global mapping" result['score'] = 1 ressources = set() for s, _, _ in g_map.triples((None, None, None)): if isinstance(s, URIRef): ressources.add(s) isolated_ressources = 0 for ressource in ressources: nb_link = 0 for ressource, _, obj in g_map.triples((ressource, None, None)): if isinstance(obj, URIRef) and obj in ressources: nb_link += 1 break for subj, _, ressource in g_map.triples((None, None, ressource)): if isinstance(subj, URIRef) and subj in ressources: nb_link += 1 break if nb_link == 0: result['feedbacks'].append( f"resource {str(ressource)} is not linked to another resource") isolated_ressources += 1 nb_ressources = len(ressources) if nb_ressources > 1: result['score'] = (nb_ressources - isolated_ressources) / nb_ressources return result
def humanDesc( g_onto, liste_map, g_map, raw_data, g_link ): #Revoir le return, opérationnel sinon ------------------------------------------------- nbPossible = 0 result = metric() result['score'] = 0 result['name'] = "Usage of description or label" points = 0 set_URIs = set() for s, _, _ in g_map.triples((None, None, None)): if isinstance(s, rdflib.term.URIRef): set_URIs.add(s) for elt in set_URIs: passe = False nbPossible = nbPossible + 1 for s2, _, _ in g_link.triples( (elt, rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), None)): passe = True for s2, _, _ in g_link.triples( (elt, rdflib.term.URIRef( 'http://www.w3.org/2000/01/rdf-schema#comment'), None)): passe = True if passe: points = points + 1 else: result['feedbacks'].append( f"Human readable description of the ressource {str(elt)} is missing. Please add an rdfs:comment or rdfs:label" ) if nbPossible: result['score'] = points / (nbPossible) return result
def Error(g_onto, liste_map, g_map, raw_data, g_link): result = metric() result['name'] = "Availability error" points = 0 set_URIs = set() for s, p, o in g_map.triples((None, None, None)): if isinstance(s, rdflib.term.URIRef): set_URIs.add(_process_URI(s)) if isinstance(p, rdflib.term.URIRef) and p != rdflib.term.URIRef('a'): set_URIs.add(_process_URI(p)) if isinstance(o, rdflib.term.URIRef): set_URIs.add(_process_URI(o)) nbPossible = len(set_URIs) for elt in set_URIs: a = requests.get(elt) try: a.raise_for_status() except: result['feedbacks'].append( f"It seems that {str(elt)} is not available: returns HTTP code {a.status_code}" ) points = points + 1 if nbPossible == 0: result['score'] = 1 else: result['score'] = 1 - points / nbPossible return result
def externalLink(g_onto, liste_map, g_map, raw_data, g_link): #Fonctionnel points = 0 nbPossible = 0 result = metric() result['name'] = "Use of external link" set_URIs = set() for _, p, o in g_map.triples((None, None, None)): if isinstance(p, rdflib.term.URIRef): set_URIs.add(p) if isinstance(o, rdflib.term.URIRef): set_URIs.add(o) for elt in set_URIs: deb = elt.split('$')[0] fin = "" try: fin = elt.split('$(')[1].split(')')[0] except: pass if str(elt) != str(deb): for elements in raw_data: link = deb + str(elements['fields'][fin]) nbPossible = nbPossible + 1 a = requests.get(link) try: a.raise_for_status() except: points = points + 1 if nbPossible == 0: result['score'] = 1 else: result['score'] = 1 - points / nbPossible return result
def duplicatedRules(g_onto, liste_map, g_map, raw_data, g_link) : result = metric() result['name'] = "Duplicated rules" result['score'] = 1 if len(liste_map) > len(g_map): result['score'] = len(g_map)/len(liste_map) # Propriété d'un graph RDF result['feedbacks'].append(f"{str(len(liste_map) - len(g_map))} rules are duplicated") return result
def equivalentClassesProperties( g_onto, liste_map, g_map, raw_data, g_link ): #Ici on considère que si une classe equivalente est dans notre mapping, alors elle est correctement utilisée. Corrigé set_SO = set() set_P = set() points = 0 nbPossible = 0 result = metric() result['name'] = "Usage of equivalent classes and properties" for s, p, o in g_map.triples((None, None, None)): set_SO.add(s) set_SO.add(o) set_P.add(p) for subobj in set_SO: for _, _, o2 in g_onto.triples( (subobj, rdflib.term.URIRef( 'http://www.w3.org/2002/07/owl#equivalentClass'), None)): if not isinstance(o2, rdflib.term.BNode): nbPossible = nbPossible + 1 if (None, None, o2) in g_map and not isinstance(o2, rdflib.term.BNode): points = points + 1 elif (o2, None, None) in g_map and not isinstance(o2, rdflib.term.BNode): points = points + 1 else: result['feedbacks'].append( f"{str(o2)} equivalent class of {str(subobj)} is missing.") for pred in set_P: for _, _, o3 in g_onto.triples( (pred, rdflib.term.URIRef( 'https://www.w3.org/2002/07/owl#equivalentProperty'), None)): nbPossible = nbPossible + 1 if (None, None, o3) in g_map and not isinstance(o3, rdflib.term.BNode): points = points + 1 elif (o3, None, None) in g_map and not isinstance(o3, rdflib.term.BNode): points = points + 1 else: result['feedbacks'].append( f"{str(o3)} equivalent property of {str(pred)} is missing") if nbPossible == 0: result['score'] = 1 else: result['score'] = points / nbPossible return result
def subClassesProperties(g_onto, liste_map, g_map, raw_data, g_link): result = metric() result['name'] = "Correct use of subclasses and properties" set_SO = set() set_P = set() points = 0 nbPossible = 0 for s, p, o in g_map.triples((None, None, None)): set_SO.add(s) set_SO.add(o) set_P.add(p) for subobj in set_SO: for _, _, o2 in g_onto.triples( (subobj, rdflib.term.URIRef( 'http://www.w3.org/2000/01/rdf-schema#subClassOf'), None)): if not isinstance(o2, rdflib.term.BNode): nbPossible = nbPossible + 1 if (None, None, o2) in g_map and not isinstance(o2, rdflib.term.BNode): points = points + 1 elif (o2, None, None) in g_map and not isinstance(o2, rdflib.term.BNode): points = points + 1 else: result['feedbacks'].append( f"Super class {str(o2)} of {subobj} is missing.") for pred in set_P: for _, _, o3 in g_onto.triples( (pred, rdflib.term.URIRef( 'http://www.w3.org/2000/01/rdf-schema#subPropertyOf'), None)): if not isinstance(o3, rdflib.term.BNode): nbPossible = nbPossible + 1 if (None, o3, None) in g_map and not isinstance(o3, rdflib.term.BNode): points = points + 1 else: result['feedbacks'].append( f"Super property {str(o3)} of {pred} is missing.") if nbPossible == 0: result['score'] = 1 else: result['score'] = points / nbPossible return result
def verticalCoverage(g_onto, liste_map, g_map, raw_data, g_link): result = metric() result['name'] = "Data coverage" result['score'] = 1 set_dollarVal = set() regexp = re.compile('\(([^)]+)') for s, _, o in g_map.triples((None, None, None)): if regexp.search(str(s)) is not None: set_dollarVal.add(re.search('\(([^)]+)', str(s)).group(1)) if regexp.search(str(o)) is not None: set_dollarVal.add(re.search('\(([^)]+)', str(o)).group(1)) if raw_data and len(raw_data[0]['fields']) > 0: result['score'] = len(set_dollarVal) / len(raw_data[0]['fields']) if result['score'] < 1: result['feedbacks'].append( f"only {len(set_dollarVal)}/{len(raw_data[0]['fields'])} fields of the dataset are mapped" ) return result
def externalURIs(g_onto, liste_map, g_map, raw_data, g_link): #Revoir le return, sinon complet points = 0 nbPossible = 0 result = metric() result['name'] = "Use of external URIs" for s, _, o in g_map.triples((None, None, None)): if isinstance(s, rdflib.term.URIRef) and isinstance( o, rdflib.term.URIRef): #Donc on a un lien entre deux URIs nbPossible = nbPossible + 1 if not ( s, None, o ) in g_onto: #Et si ça n'existe pas dans notre ontologie, alors on a créé un nouveau lien points = points + 1 if nbPossible == 0: result['score'] = 1 else: result['score'] = points / nbPossible return result
def longURI(g_onto, liste_map, g_map, raw_data, g_link): result = metric() result['name'] = "Long URIs" nbPossible = 0 points = 0 set_URIs = set() for s, p, o in g_map.triples((None, None, None)): if isinstance(s, rdflib.term.URIRef): set_URIs.add(s) for s in set_URIs: if isinstance(s, rdflib.term.URIRef): nbPossible = nbPossible + 1 if len(s) >= 80: points = points + 1 result['feedbacks'].append( f"{str(s)} is more than 79 characters") if nbPossible == 0: result['score'] = 1 else: result['score'] = 1 - points / nbPossible return result
def sameAs(g_onto, liste_map, g_map, raw_data, g_link) : result = metric() result['name'] = "Use of sameAs properties" nbPossible = 0 points = 0 set_URIs = set() for s, _, _ in g_map.triples((None, None, None)) : if isinstance(s, rdflib.term.URIRef) : set_URIs.add(s) for elt in set_URIs : nbPossible = nbPossible + 1 for _, _, _ in g_map.triples((elt, rdflib.term.URIRef('http://www.w3.org/2002/07/owl#sameAs'), None)) : points = points + 1 if points < 1 : result['score'] = 0 result['feedbacks'].append("No sameAs defined") else : result['score'] = 0 if nbPossible != 0: result['score'] = points/(nbPossible) return result
def humanReadableURIs(g_onto, liste_map, g_map, raw_data, g_link) : nbPossible = 0 points = 0 result = metric() result['name'] = "Human readable URIs" set_URIs = set() for s, p, o in g_map.triples((None, None, None)): if isinstance(s, rdflib.term.URIRef): set_URIs.add(s) for s in set_URIs: nbPossible = nbPossible + 1 uri = str(s) uri = uri.split('$')[0] if test_HumanReadable(uri): points = points + 1 else : result['feedbacks'].append(f"It seems that {uri} is not a Human Readable URI") if nbPossible == 0: result['score'] = 1 else: result['score'] = points/nbPossible return result
def domainRange(g_onto, liste_map, g_map, raw_data, g_link) : nbPossible = 0 points = 0 liste_O = [] result = metric() result['name'] = "Domain and range of properties" for s, p, o in g_map.triples((None, None,None)): expected = "" nbPossible = nbPossible + 2 boolean = True if p == rdflib.term.URIRef('a') or p == rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'): boolean = True if isinstance(o, rdflib.URIRef) else False expected = "URI" elif p == rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#label'): boolean = True if isinstance(o, rdflib.URIRef) else False expected = "URI" else: for _, _, o2 in g_link.triples((p, rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#domain'), None)): #Pour toutes les valeurs domain de p expected = str(o2) for _, _, o3 in g_link.triples((s, (rdflib.term.URIRef('a')|rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type')), None)) : #On récupère le type du sujet if o2 != o3 and o2 != rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#Resource') : #Si le domaine de p est différent du type du sujet et que, cas général, le domaine de p n'est pas une ressource liste_O.append(o3) #On stock le type du sujet for _, _, o4 in g_link.triples((s, rdflib.term.URIRef('https://www.w3.org/2002/07/owl#equivalentClass'), None)): #Pour tous les équivalents au sujet for _, _, o6 in g_link.triples((s, (rdflib.term.URIRef('a')|rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type')), None)) : #On récupère les type des sujets equivalents liste_O.append(o4) #On stock ces types aussi for O in liste_O : #Pour l'ensemble des types récupérés for _, _, o5 in g_link.triples((O, rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#subClassOf'), None)) : #on regarde pour toutes les sous-classes des types (équivalents ou non) de notre sujet if o2 == o5: #Si ils sont équivalents au domaine boolean = True else: boolean = True if boolean: points = points + 1 else: result['feedbacks'].append(f"{str(p)} has the wrong domain. Expected a {expected}") liste_O = [] boolean = True for _, _, o2 in g_link.triples((p, rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#range'), None)): boolean = False expected = str(o2) o3 = None if isinstance(o, rdflib.term.Literal): if o.datatype is not None: o3 = o.datatype if o2 == o3 : boolean = True if o3 is None: if isinstance(o, rdflib.term.URIRef): for _, _, o3 in g_link.triples((o, rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type')), None): #On récupère le type de l'objet if o2 != o3: liste_O.append(o3) # On stock le type du sujet for _, _, o4 in g_link.triples((s, rdflib.term.URIRef('https://www.w3.org/2002/07/owl#equivalentClass'), None)): #Pour tous les équivalents au sujet for _, _, o6 in g_link.triples((s, (rdflib.term.URIRef('a')|rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type')), None)) : #On récupère les type des sujets equivalents liste_O.append(o4) # On stock ces types aussi for O in liste_O : # Pour l'ensemble des types récupérés for _, _, o5 in g_link.triples((O, rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#subClassOf'), None)) : #on regarde pour toutes les sous-classes des types (équivalents ou non) de notre sujet if o2 == o5: # Si ils sont équivalents au domaine boolean = True else : boolean = True if boolean: points = points + 1 else: result['feedbacks'].append(f"{str(p)} has the wrong range. Expected a {expected}") if nbPossible == 0: result['score'] = 1 else: result['score'] = 1-(nbPossible - points)/nbPossible return result