def evaluate(self): self.result = RelatedResource(id=self.metric_number, metric_identifier=self.metric_identifier, metric_name=self.metric_name) self.output = RelatedResourceOutput() self.logger.info('{0} : Total number of related resources extracted -: {1}'.format(self.metric_identifier, len(self.fuji.related_resources))) # if self.metadata_merged.get('related_resources'): pid_used = False if self.fuji.related_resources: #print(self.fuji.related_resources) # QC check: exclude potential incorrect relation self.fuji.related_resources = [item for item in self.fuji.related_resources if item.get('related_resource') != self.fuji.pid_url] self.logger.log(self.fuji.LOG_SUCCESS, '{0} : Number of related resources after QC step -: {1}'.format(self.metric_identifier, len( self.fuji.related_resources))) if self.fuji.related_resources: # TODO include source of relation for relation in self.fuji.related_resources: relation_identifier = IdentifierHelper(relation.get('related_resource')) if relation_identifier.is_persistent or 'url' in relation_identifier.identifier_schemes: pid_used = True self.output = self.fuji.related_resources self.result.test_status = 'pass' self.setEvaluationCriteriumScore('FsF-I3-01M-1', 1, 'pass') self.score.earned = self.total_score self.maturity = 2 if pid_used: self.setEvaluationCriteriumScore('FsF-I3-01M-2', 1, 'pass') self.maturity = 3 self.result.metric_tests = self.metric_tests self.result.maturity = self.maturity self.result.score = self.score self.result.output = self.output
def check_registry_support(self): # check if record is listed in major catalogs -> searchable # DataCite registry, Google Dataset search, Mendeley data etc.. #Using the DataCite API in case content negotiation does not work registries_supported = [] #DataCite only for DOIs pidhelper = IdentifierHelper(self.fuji.pid_url) if self.fuji.pid_scheme: if 'doi' in self.fuji.pid_scheme: datacite_registry_helper = MetaDataCatalogueDataCite(self.fuji.logger) datacite_registry_helper.query(pidhelper.normalized_id) if datacite_registry_helper.islisted: registries_supported.append(datacite_registry_helper.source) if not registries_supported: google_registry_helper = MetaDataCatalogueGoogleDataSearch(self.fuji.logger) google_registry_helper.query([pidhelper.normalized_id, self.fuji.landing_url]) if google_registry_helper.islisted: registries_supported.append(google_registry_helper.source) if not registries_supported: mendeley_registry_helper = MetaDataCatalogueMendeleyData(self.fuji.logger) mendeley_registry_helper.query([pidhelper.normalized_id, self.fuji.landing_url]) if mendeley_registry_helper.islisted: registries_supported.append(mendeley_registry_helper.source) return registries_supported
def evaluate(self): # ======= CHECK IDENTIFIER UNIQUENESS ======= self.result = Uniqueness(id=self.metric_number, metric_identifier=self.metric_identifier, metric_name=self.metric_name) self.output = UniquenessOutput() schemes = [i[0] for i in idutils.PID_SCHEMES] self.logger.info('FsF-F1-01D : Using idutils schemes') idhelper = IdentifierHelper(self.fuji.id) found_ids = idhelper.identifier_schemes #found_ids = idutils.detect_identifier_schemes(self.fuji.id) # some schemes like PMID are generic if len(found_ids) > 0: self.logger.log(self.fuji.LOG_SUCCESS,'FsF-F1-01D : Unique identifier schemes found {}'.format(found_ids)) self.setEvaluationCriteriumScore('FsF-F1-01D-1',self.total_score, 'pass') self.maturity = 3 self.output.guid = self.fuji.id self.score.earned = self.total_score # identify main scheme found_id = idhelper.preferred_schema self.fuji.id_scheme = idhelper.identifier_schemes[0] if idhelper.is_persistent: self.fuji.pid_scheme = found_id self.fuji.pid_url = idhelper.identifier_url self.logger.info('FsF-F1-01D : Finalized unique identifier scheme - {}'.format(found_id)) self.output.guid_scheme = found_id self.result.test_status = 'pass' elif self.verify_uuid(self.fuji.id): self.logger.log(self.fuji.LOG_SUCCESS,'FsF-F1-01D : Unique identifier (UUID) scheme found') self.setEvaluationCriteriumScore('FsF-F1-01D-2',0.5, 'pass') self.result.test_status = 'pass' self.output.guid_scheme = 'uuid' self.output.guid = self.fuji.id self.maturity = 1 self.score.earned = 0.5 elif self.verify_hash(self.fuji.id): self.logger.log(self.fuji.LOG_SUCCESS,'FsF-F1-01D : Unique identifier (SHA,MD5) scheme found') self.setEvaluationCriteriumScore('FsF-F1-01D-2',0.5, 'pass') self.result.test_status = 'pass' self.output.guid_scheme = 'hash' self.output.guid = self.fuji.id self.maturity = 1 self.score.earned = 0.5 else: self.result.test_status = 'fail' self.score.earned = 0 self.logger.warning('FsF-F1-01D : Failed to check the identifier scheme!.') self.result.score = self.score self.result.metric_tests = self.metric_tests self.result.output = self.output self.result.maturity = self.maturity_levels.get(self.maturity)
def evaluate(self): self.result = Persistence(id=self.metric_number, metric_identifier=self.metric_identifier, metric_name=self.metric_name) self.output = PersistenceOutput() # ======= CHECK IDENTIFIER PERSISTENCE ======= self.logger.info( 'FsF-F1-02D : PID schemes-based assessment supported by the assessment service - {}' .format(Mapper.VALID_PIDS.value)) check_url = None signposting_pid = None if self.fuji.id_scheme is not None: check_url = self.fuji.pid_url #check_url = idutils.to_url(self.fuji.id, scheme=self.fuji.id_scheme) if self.fuji.id_scheme == 'url': self.fuji.origin_url = self.fuji.id check_url = self.fuji.id if check_url: # ======= RETRIEVE METADATA FROM LANDING PAGE ======= requestHelper = RequestHelper(check_url, self.logger) requestHelper.setAcceptType(AcceptTypes.html_xml) # request neg_source, self.fuji.extruct_result = requestHelper.content_negotiate( 'FsF-F1-02D', ignore_html=False) if not 'html' in str(requestHelper.content_type): self.logger.info( 'FsF-F2-01M :Content type is ' + str(requestHelper.content_type) + ', therefore skipping embedded metadata (microdata, RDFa) tests' ) self.fuji.extruct_result = {} if type(self.fuji.extruct_result) != dict: self.fuji.extruct_result = {} r = requestHelper.getHTTPResponse() response_status = requestHelper.response_status if r: self.fuji.landing_url = requestHelper.redirect_url #in case the test has been repeated because a PID has been found in metadata #print(self.fuji.landing_url, self.fuji.input_id) if self.fuji.repeat_pid_check == True: if self.fuji.landing_url != self.fuji.input_id: self.logger.warning( 'FsF-F1-02D : Landing page URL resolved from PID found in metadata does not match with input URL' ) self.logger.warning( 'FsF-F2-01M : Seems to be a catalogue entry or alternative representation of the data set, landing page URL resolved from PID found in metadata does not match with input URL' ) #self.fuji.repeat_pid_check = False if self.fuji.landing_url not in [ 'https://datacite.org/invalid.html' ]: if response_status == 200: # identify signposting links in header header_link_string = requestHelper.getHTTPResponse( ).getheader('Link') if header_link_string is not None: self.logger.info( 'FsF-F1-02D : Found signposting links in response header of landingpage' ) for preparsed_link in header_link_string.split( ','): found_link = None found_type, type_match = None, None found_rel, rel_match = None, None found_formats, formats_match = None, None parsed_link = preparsed_link.strip().split(';') found_link = parsed_link[0].strip() for link_prop in parsed_link[1:]: if str(link_prop).startswith('rel="'): rel_match = re.search( 'rel=\"(.*?)\"', link_prop) elif str(link_prop).startswith('type="'): type_match = re.search( 'type=\"(.*?)\"', link_prop) elif str(link_prop).startswith( 'formats="'): formats_match = re.search( 'formats=\"(.*?)\"', link_prop) if type_match: found_type = type_match[1] if rel_match: found_rel = rel_match[1] if formats_match: found_formats = formats_match[1] signposting_link_dict = { 'url': found_link[1:-1], 'type': found_type, 'rel': found_rel, 'profile': found_formats } if found_link: self.fuji.signposting_header_links.append( signposting_link_dict) #check if there is a cite-as signposting link if self.fuji.pid_scheme is None: signposting_pid_link = self.fuji.get_signposting_links( 'cite-as') if signposting_pid_link: signposting_pid = signposting_pid_link[0].get( 'url') if signposting_pid: signidhelper = IdentifierHelper #found_ids = idutils.detect_identifier_schemes(signposting_pid[0]) found_id = signidhelper.preferred_schema #if len(found_ids) > 1: # found_ids.remove('url') # found_id = found_ids[0] if signidhelper.is_persistent: self.logger.info( 'FsF-F1-02D : Found object identifier in signposting header links' ) self.fuji.pid_scheme = found_id up = urlparse(self.fuji.landing_url) self.fuji.landing_origin = '{uri.scheme}://{uri.netloc}'.format( uri=up) self.fuji.landing_html = requestHelper.getResponseContent( ) self.fuji.landing_content_type = requestHelper.content_type self.output.resolved_url = self.fuji.landing_url # url is active, although the identifier is not based on a pid scheme self.output.resolvable_status = True self.logger.info( 'FsF-F1-02D : Object identifier active (status code = 200)' ) self.fuji.isMetadataAccessible = True elif response_status in [401, 402, 403]: self.fuji.isMetadataAccessible = False self.logger.warning( "FsF-F1-02D : Resource inaccessible, identifier returned http status code -: {code}" .format(code=response_status)) else: self.fuji.isMetadataAccessible = False self.logger.warning( "FsF-F1-02D : Resource inaccessible, identifier returned http status code -: {code}" .format(code=response_status)) else: self.logger.warning( "FsF-F1-02D : Invalid DOI, identifier resolved to -: {code}" .format(code=self.fuji.landing_url)) else: self.fuji.isMetadataAccessible = False self.logger.warning( "FsF-F1-02D :Resource inaccessible, no response received from -: {}" .format(check_url)) if response_status in [401, 402, 403]: self.logger.warning( "FsF-F1-02D : Resource inaccessible, identifier returned http status code -: {code}" .format(code=response_status)) else: self.logger.warning( "FsF-F1-02D :Resource inaccessible, could not identify an actionable representation for the given identfier -: {}" .format(self.fuji.id)) if self.fuji.pid_scheme is not None: # short_pid = id.normalize_pid(self.id, scheme=pid_scheme) if signposting_pid is None: idhelper = IdentifierHelper(self.fuji.id) self.fuji.pid_url = idhelper.identifier_url #self.fuji.pid_url = idutils.to_url(self.fuji.id, scheme=self.fuji.pid_scheme) else: self.fuji.pid_url = signposting_pid[0] self.output.pid_scheme = self.fuji.pid_scheme self.output.pid = self.fuji.pid_url self.setEvaluationCriteriumScore('FsF-F1-02D-1', 0.5, 'pass') self.score.earned = 0.5 self.maturity = 1 if self.fuji.isMetadataAccessible: self.setEvaluationCriteriumScore('FsF-F1-02D-2', 0.5, 'pass') self.maturity = 3 self.result.test_status = 'pass' self.score.earned = self.total_score # idenfier should be based on a persistence scheme and resolvable #print(self.metric_tests) self.logger.log( self.fuji.LOG_SUCCESS, 'FsF-F1-02D : Persistence identifier scheme -: {}'.format( self.fuji.pid_scheme)) #self.logger.info('FsF-F1-02D : Persistence identifier scheme - {}'.format(self.fuji.pid_scheme)) else: self.score.earned = 0 self.logger.warning( 'FsF-F1-02D : Not a persistent identifier scheme -: {}'.format( self.fuji.id_scheme)) self.result.score = self.score self.result.maturity = self.maturity self.result.metric_tests = self.metric_tests self.result.output = self.output
def assess_by_id(body): # noqa: E501 """assess_by_id Evaluate FAIRness of a data object based on its identifier # noqa: E501 :param body: :type body: dict | bytes :rtype: FAIRResults """ if connexion.request.is_json: debug = True results = [] body = Body.from_dict(connexion.request.get_json()) identifier = body.object_identifier debug = body.test_debug metadata_service_endpoint = body.metadata_service_endpoint oaipmh_endpoint = body.oaipmh_endpoint metadata_service_type = body.metadata_service_type usedatacite = body.use_datacite logger = Preprocessor.logger logger.info('Assessment target: ' + identifier) print('Assessment target: ', identifier, flush=True) ft = FAIRCheck(uid=identifier, test_debug=debug, metadata_service_url=metadata_service_endpoint, metadata_service_type=metadata_service_type, use_datacite=usedatacite, oaipmh_endpoint=oaipmh_endpoint) # set target for remote logging remote_log_host, remote_log_path = Preprocessor.remote_log_host, Preprocessor.remote_log_path #print(remote_log_host, remote_log_path) if remote_log_host and remote_log_path: ft.set_remote_logging_target(remote_log_host, remote_log_path) uid_result, pid_result = ft.check_unique_persistent() ft.retrieve_metadata_embedded(ft.extruct_result) if ft.repeat_pid_check: uid_result, pid_result = ft.check_unique_persistent() include_embedded = True ft.retrieve_metadata_external() if ft.repeat_pid_check: uid_result, pid_result = ft.check_unique_persistent() core_metadata_result = ft.check_minimal_metatadata() content_identifier_included_result = ft.check_content_identifier_included( ) access_level_result = ft.check_data_access_level() license_result = ft.check_license() related_resources_result = ft.check_relatedresources() check_searchable_result = ft.check_searchable() data_content_result = ft.check_data_content_metadata() data_file_format_result = ft.check_data_file_format() community_standards_result = ft.check_community_metadatastandards() data_provenance_result = ft.check_data_provenance() formal_metadata_result = ft.check_formal_metadata() semantic_vocab_result = ft.check_semantic_vocabulary() metadata_preserved_result = ft.check_metadata_preservation() standard_protocol_data_result = ft.check_standardised_protocol_data() standard_protocol_metadata_result = ft.check_standardised_protocol_metadata( ) results.append(uid_result) results.append(pid_result) results.append(core_metadata_result) results.append(content_identifier_included_result) results.append(check_searchable_result) results.append(access_level_result) results.append(formal_metadata_result) results.append(semantic_vocab_result) results.append(related_resources_result) results.append(data_content_result) results.append(license_result) results.append(data_provenance_result) results.append(community_standards_result) results.append(data_file_format_result) results.append(standard_protocol_data_result) results.append(standard_protocol_metadata_result) debug_messages = ft.get_log_messages_dict() ft.logger_message_stream.flush() summary = ft.get_assessment_summary(results) for res_k, res_v in enumerate(results): if ft.isDebug: debug_list = debug_messages.get(res_v['metric_identifier']) # debug_list= ft.msg_filter.getMessage(res_v['metric_identifier']) if debug_list is not None: results[res_k]['test_debug'] = debug_messages.get( res_v['metric_identifier']) else: results[res_k]['test_debug'] = [ 'INFO: No debug messages received' ] else: results[res_k]['test_debug'] = ['INFO: Debugging disabled'] debug_messages = {} ft.logger.handlers = [ft.logger.handlers[-1]] #timestmp = datetime.datetime.now().replace(microsecond=0).isoformat() timestmp = datetime.datetime.now().replace(microsecond=0).isoformat( ) + "Z" # use timestamp format from RFC 3339 as specified in openapi3 metric_spec = Preprocessor.metric_specification metric_version = os.path.basename(Preprocessor.METRIC_YML_PATH) totalmetrics = len(results) request = body.to_dict() if ft.pid_url: idhelper = IdentifierHelper(ft.pid_url) request[ 'normalized_object_identifier'] = idhelper.get_normalized_id() final_response = FAIRResults(request=request, timestamp=timestmp, software_version=ft.FUJI_VERSION, test_id=ft.test_id, metric_version=metric_version, metric_specification=metric_spec, total_metrics=totalmetrics, results=results, summary=summary) return final_response