def get_dcat_metadata(self, graph): dcat_metadata = dict() DCAT = Namespace("http://www.w3.org/ns/dcat#") datasets = list(graph[:RDF.type:DCAT.Dataset]) if len(datasets) > 0: dcat_metadata = self.get_metadata(graph, datasets[0], type='Dataset') # publisher if idutils.is_url(dcat_metadata.get( 'publisher')) or dcat_metadata.get('publisher') is None: publisher = graph.value(datasets[0], DCTERMS.publisher) # FOAF preferred DCAT compliant publisher_name = graph.value(publisher, FOAF.name) dcat_metadata['publisher'] = publisher_name # in some cases a dc title is used (not exactly DCAT compliant) if dcat_metadata.get('publisher') is None: publisher_title = graph.value(publisher, DCTERMS.title) dcat_metadata['publisher'] = publisher_title # creator if idutils.is_url(dcat_metadata.get( 'creator')) or dcat_metadata.get('creator') is None: creators = graph.objects(datasets[0], DCTERMS.creator) creator_name = [] for creator in creators: creator_name.append(graph.value(creator, FOAF.name)) if len(creator_name) > 0: dcat_metadata['creator'] = creator_name # distribution distribution = graph.objects(datasets[0], DCAT.distribution) dcat_metadata['object_content_identifier'] = [] for dist in distribution: durl = graph.value(dist, DCAT.accessURL) #taking only one just to check if licence is available dcat_metadata['license'] = graph.value(dist, DCTERMS.license) # TODO: check if this really works.. dcat_metadata['access_rights'] = ( graph.value(dist, DCTERMS.accessRights) or graph.value(dist, DCTERMS.rights)) dtype = graph.value(dist, DCAT.mediaType) dsize = graph.value(dist, DCAT.bytesSize) dcat_metadata['object_content_identifier'].append({ 'url': str(durl), 'type': str(dtype), 'size': dsize }) #TODO: add provenance metadata retrieval else: self.logger.info( 'FsF-F2-01M : Found DCAT content but could not correctly parse metadata' ) #in order to keep DCAT in the found metadata list, we need to pass at least one metadata value.. dcat_metadata['object_type'] = 'Dataset' return dcat_metadata
def evaluate(self): self.result = License(id=self.metric_number, metric_identifier=self.metric_identifier, metric_name=self.metric_name) licenses_list = [] specified_licenses = self.fuji.metadata_merged.get('license') self.score.earned = 0 spdx_found = False if specified_licenses is not None and specified_licenses != []: self.logger.log( self.fuji.LOG_SUCCESS, '{0} : Found licence information in metadata'.format( self.metric_identifier)) if isinstance( specified_licenses, str ): # licenses maybe string or list depending on metadata schemas specified_licenses = [specified_licenses] for l in specified_licenses: license_output = LicenseOutputInner() #license can be dict or license_output.license = l if isinstance(l, str): isurl = idutils.is_url(l) if isurl: spdx_html, spdx_osi = self.lookup_license_by_url( l, self.metric_identifier) else: # maybe licence name spdx_html, spdx_osi = self.lookup_license_by_name( l, self.metric_identifier) if not spdx_html: self.logger.warning( '{0} : NO SPDX license representation (spdx url, osi_approved) found' .format(self.metric_identifier)) else: self.logger.log( self.fuji.LOG_SUCCESS, '{0} : Found SPDX license representation (spdx url, osi_approved)' .format(self.metric_identifier)) spdx_found = True license_output.details_url = spdx_html license_output.osi_approved = spdx_osi licenses_list.append(license_output) self.result.test_status = "pass" self.setEvaluationCriteriumScore('FsF-R1.1-01M-1', 1, 'pass') self.score.earned = 1 self.maturity = 1 if spdx_found: self.setEvaluationCriteriumScore('FsF-R1.1-01M-2', 1, 'pass') self.score.earned = 2 self.maturity = 3 else: self.logger.warning( '{0} : License information unavailable in metadata'.format( self.metric_identifier)) self.result.output = licenses_list self.result.metric_tests = self.metric_tests self.result.score = self.score self.result.maturity = self.maturity
def isLicense (self, value, metric_id): islicense = False isurl = idutils.is_url(value) spdx_html = None spdx_osi = None if isurl: spdx_html, spdx_osi = self.lookup_license_by_url(value, metric_id) else: spdx_html, spdx_osi = self.lookup_license_by_name(value, metric_id) if spdx_html or spdx_osi: islicense = True return islicense
def get_related_identifiers_url(record: Record, doi_prefix: str) -> List[Dict]: """Create related identifiers URL. Args: related_identifiers (Record): Record API Object from where the related identifiers will be extracted. doi_prefix (str): GEO Knowledge Hub DOI Prefix. Returns: List[Dict]: List of record related identifiers (with URL resolved) Note: The `doi_prefix` is used to check if the items are managed by the GEO Knowledge Hub. """ # extracting related identifiers related_identifiers = py_.get(record, "metadata.related_identifiers", []) new_related_identifiers = [] for related_identifier in related_identifiers: if related_identifier.get("identifier", None): pass scheme = related_identifier["scheme"] identifier = related_identifier["identifier"] related_identifier_obj = py_.set_(py_.clone_deep(related_identifier), "url", "") try: if idutils.is_url(identifier): related_identifier_obj["url"] = identifier else: # checking if the doi is internal if idutils.is_doi(identifier): identifier_split = identifier.split("/") if doi_prefix and identifier_split[0] == doi_prefix: related_identifier_obj["url"] = posixpath.join( "/records", identifier_split[1]) if not related_identifier_obj["url"]: related_identifier_obj["url"] = idutils.to_url( identifier, scheme, "https") except BaseException: related_identifier_obj["url"] = identifier new_related_identifiers.append(related_identifier_obj) return new_related_identifiers
def evaluate(self): self.result = License(id=self.fuji.count, metric_identifier=self.metric_identifier, metric_name=self.metric_name) licenses_list = [] specified_licenses = self.fuji.metadata_merged.get('license') if specified_licenses is not None and specified_licenses != []: if isinstance( specified_licenses, str ): # licenses maybe string or list depending on metadata schemas specified_licenses = [specified_licenses] for l in specified_licenses: license_output = LicenseOutputInner() #license can be dict or license_output.license = l if isinstance(l, str): isurl = idutils.is_url(l) if isurl: spdx_html, spdx_osi = self.lookup_license_by_url( l, self.metric_identifier) else: # maybe licence name spdx_html, spdx_osi = self.lookup_license_by_name( l, self.metric_identifier) if not spdx_html: self.logger.warning( 'FsF-R1.1-01M : NO SPDX license representation (spdx url, osi_approved) found' ) license_output.details_url = spdx_html license_output.osi_approved = spdx_osi licenses_list.append(license_output) self.result.test_status = "pass" self.score.earned = self.total_score else: self.score.earned = 0 self.logger.warning('FsF-R1.1-01M : License unavailable') self.result.output = licenses_list self.result.score = self.score
def get_dcat_metadata(self, graph): dcat_metadata = dict() DCAT = Namespace("http://www.w3.org/ns/dcat#") datasets = list(graph[:RDF.type:DCAT.Dataset]) if len(datasets) > 0: dcat_metadata = self.get_metadata(graph, datasets[0], type='Dataset') # publisher if idutils.is_url(dcat_metadata.get( 'publisher')) or dcat_metadata.get('publisher') is None: publisher = graph.value(datasets[0], DCTERMS.publisher) # FOAF preferred DCAT compliant publisher_name = graph.value(publisher, FOAF.name) dcat_metadata['publisher'] = publisher_name # in some cases a dc title is used (not exactly DCAT compliant) if dcat_metadata.get('publisher') is None: publisher_title = graph.value(publisher, DCTERMS.title) dcat_metadata['publisher'] = publisher_title # creator if idutils.is_url(dcat_metadata.get( 'creator')) or dcat_metadata.get('creator') is None: creators = graph.objects(datasets[0], DCTERMS.creator) creator_name = [] for creator in creators: creator_name.append(graph.value(creator, FOAF.name)) if len(creator_name) > 0: dcat_metadata['creator'] = creator_name # distribution distribution = graph.objects(datasets[0], DCAT.distribution) dcat_metadata['object_content_identifier'] = [] for dist in distribution: dtype, durl, dsize = None, None, None if not (graph.value(dist, DCAT.accessURL) or graph.value(dist, DCAT.downloadURL)): self.logger.info( 'FsF-F2-01M : Trying to retrieve DCAT distributions from remote location -:' + str(dist)) try: distgraph = rdflib.Graph() disturl = str(dist) distresponse = requests.get( disturl, headers={'Accept': 'application/rdf+xml'}) if distresponse.text: distgraph.parse(data=distresponse.text, format="application/rdf+xml") extdist = list( distgraph[:RDF.type:DCAT.Distribution]) durl = (distgraph.value(extdist[0], DCAT.accessURL) or distgraph.value(extdist[0], DCAT.downloadURL)) dsize = distgraph.value(extdist[0], DCAT.byteSize) dtype = distgraph.value(extdist[0], DCAT.mediaType) self.logger.info( 'FsF-F2-01M : Found DCAT distribution URL info from remote location -:' + str(durl)) except Exception as e: self.logger.info( 'FsF-F2-01M : Failed to retrieve DCAT distributions from remote location -:' + str(dist)) #print(e) durl = str(dist) else: durl = (graph.value(dist, DCAT.accessURL) or graph.value(dist, DCAT.downloadURL)) #taking only one just to check if licence is available dcat_metadata['license'] = graph.value( dist, DCTERMS.license) # TODO: check if this really works.. dcat_metadata['access_rights'] = ( graph.value(dist, DCTERMS.accessRights) or graph.value(dist, DCTERMS.rights)) dtype = graph.value(dist, DCAT.mediaType) dsize = graph.value(dist, DCAT.bytesSize) if durl or dtype or dsize: if idutils.is_url(str(durl)): dtype = '/'.join(str(dtype).split('/')[-2:]) dcat_metadata['object_content_identifier'].append({ 'url': str(durl), 'type': dtype, 'size': str(dsize) }) if dcat_metadata['object_content_identifier']: self.logger.info( 'FsF-F3-01M : Found data links in DCAT.org metadata -: ' + str(dcat_metadata['object_content_identifier'])) #TODO: add provenance metadata retrieval else: self.logger.info( 'FsF-F2-01M : Found DCAT content but could not correctly parse metadata' ) #in order to keep DCAT in the found metadata list, we need to pass at least one metadata value.. #dcat_metadata['object_type'] = 'Dataset' return dcat_metadata