def metric_138_institution(doc): institutions = set(creators['name'] for node in jsonld_frame( doc, { '@type': 'Dataset', 'creators': { '@type': 'Organization', 'name': {}, } })['@graph'] if node['creators'] for creators in force_list(node['creators']) if creators['name']) affiliated_institutions = list( map( json.loads, set( json.dumps({ 'person': creators['fullName'], 'organization': affiliations['name'] }) for node in jsonld_frame( doc, { '@type': 'Dataset', 'creators': { '@type': 'Person', 'fullName': {}, 'affiliations': { '@type': 'Organization', 'name': {} } } })['@graph'] if node['creators'] for creators in force_list(node['creators']) if creators['fullName'] and creators['affiliations'] for affiliations in force_list(creators['affiliations']) if affiliations['name']))) if institutions: yield { 'value': 1, 'comment': 'Found institution(s): {}'.format(', '.join(institutions), ), } elif affiliated_institutions: yield { 'value': 0.75, 'comment': 'Found affiliated institution(s): {}'.format( ', '.join( '{} <{}>'.format(person['person'], person['organization']) for person in affiliated_institutions), ), } else: yield { 'value': 0, 'comment': 'No institution was identified', }
def metric_145_landing_page(doc): landingPages = set( node['access']['landingPage'] for node in jsonld_frame(doc, { '@type': 'DatasetDistribution', 'access': { 'landingPage': {}, } })['@graph'] if node['access'] and node['access']['landingPage'] ) if landingPages: for landingPage in landingPages: if requests.get(landingPage).status_code < 400: yield { 'value': 1, 'comment': 'Landing page found {} and seems to be accessible'.format(landingPage) } else: yield { 'value': 0.75, 'comment': 'Landing page found {} but seems to report a problem'.format(landingPage) } else: yield { 'value': 0, 'comment': 'Could not identify any landing pages' }
def metric_136_program(doc): programs = set( node['program']['name'] for node in jsonld_frame(doc, { '@type': 'Dataset', 'program': { 'name': {} } })['@graph'] if node['program'] for program in force_list(node['program']) if program['name'] ) if programs: yield { 'value': 1, 'comment': 'Identified program(s): {}'.format( ', '.join(programs) ) } else: yield { 'value': 0, 'comment': 'Could not identify any programs', }
def metric_104_doi(doc): dois = set(node['identifier'] for node in jsonld_frame( doc, { '@type': ['Identifier', 'relatedIdentifier', 'alternateIdentifiers'], 'description': ['doi', 'DOI'], 'identifier': {} })['@graph'] if node['identifier'] and node['description']) if dois: for doi in dois: try: urllib.request.urlopen(doi if doi.startswith('http') else 'https://doi.org/{}'.format(doi)) yield { 'value': 1, 'comment': 'DOI {} was identified and verified'.format(doi) } except urllib.error.HTTPError: yield { 'value': 0.25, 'comment': 'DOI {} was identified but could not be verified'.format( doi) } else: yield { 'value': 0, 'comment': 'No DOIs could be identified', }
def metric_27_contact_pi(doc): people = list(map(json.loads, set( json.dumps({ 'fullName': creators['fullName'], 'roles': creators.get('roles', []), }) for node in jsonld_frame(doc, { '@type': 'Dataset', 'creators': { '@type': 'Person', 'fullName': {}, 'roles': { '@default': [] }, } })['@graph'] if node['creators'] for creators in force_list(node['creators']) if creators['fullName'] ))) PIs = [ person for person in people if 'Principal Investigator' in person['roles'] ] if PIs: yield { 'value': 1, 'comment': 'Found PI(s): {}'.format( ', '.join([person['fullName'] for person in PIs]) ), } elif people: yield { 'value': 0.5, 'comment': 'Found {}, but cannot determine a PI'.format( [ person['fullName'] + ('(' + ', '.join(person['roles']) + ')') if person['roles'] else '' for person in people ] ), } else: yield { 'value': 0, 'comment': 'No PI or people could be identified' }
def metric_137_project(doc): projects = set(storedIn['name'] for node in jsonld_frame( doc, { '@type': 'Dataset', 'storedIn': { '@type': 'DataRepository', 'name': {} } })['@graph'] if node['storedIn'] for storedIn in force_list(node['storedIn']) if storedIn['name']) if projects: yield { 'value': 1, 'comment': 'Identified project(s): {}'.format(', '.join(projects)), } else: yield { 'value': 0, 'comment': 'Could not identify any projects', }
def metric_110_access_protocol(doc): access_protocols = set( access['accessURL'] for node in jsonld_frame(doc, { '@type': 'DatasetDistribution', 'access': { 'accessURL': {}, } })['@graph'] if node['access'] for access in force_list(node['access']) if access['accessURL']) if access_protocols: yield { 'value': 1, 'comment': 'Access protocol(s) found: {}'.format(', '.join(access_protocols)) } else: yield { 'value': 0, 'comment': 'Could not identify any access protocols' }
def metric_108_resource_identifier(doc): identifiers = set( node['identifier'].get('identifierSource', '') + node['identifier']['identifier'] for node in jsonld_frame( doc, { '@type': 'Dataset', 'identifier': { 'identifier': {}, 'identifierSource': { '@default': '' }, } })['@graph'] if node['identifier'] and node['identifier']['identifier']) if identifiers: for identifier in identifiers: if '://' in identifier and requests.get( identifier).status_code < 400: yield { 'value': 1, 'comment': 'Resource identifier {} was identified and verified'. format(identifier) } else: yield { 'value': 0.75, 'comment': 'Resource identifier {} was identified but could not be verified' .format(identifier) } else: yield { 'value': 0, 'comment': 'No resource identifier was found', }
def metric_143_ncbitaxon(doc): taxonomies = list( map( json.loads, set( json.dumps({ 'value': isAbout['name'], 'valueIRI': isAbout.get('identifier', {}).get('identifierSource', '') + isAbout.get('identifier', {}).get('identifier', '') }) for node in jsonld_frame( doc, { '@type': 'Dataset', 'isAbout': { '@type': 'TaxonomicInformation', 'name': {}, 'identifier': { 'identifier': {}, 'identifierSource': { '@default': '' }, } } })['@graph'] if node['isAbout'] for isAbout in force_list(node['isAbout']) if isAbout['name']))) if taxonomies: for taxonomy in taxonomies: value_ns = IRI_to_NS(taxonomy.get('valueIRI')) if taxonomy.get( 'value') and taxonomy.get('valueIRI') and pronto.Term( value_ns, taxonomy['value']) in NCBITaxon: yield { 'value': 1, 'comment': 'Ontological IRI for taxonomy {} and term match what is found in NCBITaxon.' .format(value_ns), } elif taxonomy.get('valueIRI') and value_ns in NCBITaxon: yield { 'value': 0.75, 'comment': 'Ontological IRI for taxonomy {} found in NCBITaxon.'. format(value_ns), } elif taxonomy.get( 'value') and taxonomy['value'] in NCBITaxon_reversed: yield { 'value': 0.75, 'comment': 'Taxonomy `{}` found in NCBITaxon.'.format( taxonomy['value']), } elif taxonomy.get('value') and taxonomy[ 'value'] in NCBITaxon_reversed_synonyms: yield { 'value': 0.5, 'comment': 'Taxonomy `{}` found in NCBITaxon synonyms.'.format( taxonomy['value']), } else: yield { 'value': 0.25, 'comment': 'Taxonomy `{}` found but not in NCBITaxon.'.format( taxonomy.get('value', '') + (('<' + value_ns + '>') if value_ns else '')), } else: yield { 'value': 0.0, 'comment': 'Taxonomy could not be identified', }
def metric_142_edam(doc): filetypes = list(map(json.loads,set( json.dumps({ 'value': information['value'], 'valueIRI': information['valueIRI'], }) for node in jsonld_frame(doc, { '@type': 'Dataset', 'types': { 'information': { 'value': { '@default': '' }, 'valueIRI': { '@default': '' } } } })['@graph'] if node['types'] for types in force_list(node['types']) if types['information'] for information in force_list(types['information']) if information['value'] and information['valueIRI'] ))) if filetypes: for filetype in filetypes: value_ns = IRI_to_NS(filetype.get('valueIRI')) if filetype.get('value') and filetype.get('valueIRI') and pronto.Term(value_ns, filetype['value']) in EDAM: yield { 'value': 1, 'comment': 'Ontological IRI for file type {} and term match what is found in EDAM.'.format( value_ns ), } elif filetype.get('valueIRI') and value_ns in EDAM: yield { 'value': 0.75, 'comment': 'Ontological IRI for filetype {} found in EDAM.'.format( value_ns ), } elif filetype.get('value') and filetype['value'] in EDAM_reversed: yield { 'value': 0.75, 'comment': 'Filetype `{}` found in EDAM.'.format( filetype['value'] ), } elif filetype.get('value') and filetype['value'] in EDAM_reversed_synonyms: yield { 'value': 0.5, 'comment': 'Filetype `{}` found in EDAM synonyms.'.format( filetype['value'] ), } else: yield { 'value': 0.25, 'comment': 'Filetype `{}` found but not in EDAM.'.format( filetype.get('value', '') + (('<' + value_ns + '>') if value_ns else '') ), } else: yield { 'value': 0.0, 'comment': 'filetype could not be identified', }
def metric_144_cellosaurus(doc): cell_lines = list( map( json.loads, set( json.dumps({ 'value': isAbout['name'], 'valueIRI': isAbout.get('identifier', {}).get('identifierSource', '') + isAbout.get('identifier', {}).get('identifier', '') }) for node in jsonld_frame( doc, { '@type': 'Dataset', 'isAbout': { '@type': 'BiologicalEntity', 'name': {}, 'identifier': { 'identifier': {}, 'identifierSource': { '@default': '' }, } } })['@graph'] if node['isAbout'] for isAbout in force_list(node['isAbout']) if isAbout['name']))) if cell_lines: for cell_line in cell_lines: value_ns = IRI_to_NS(cell_line.get('valueIRI')) if cell_line.get( 'value') and cell_line.get('valueIRI') and Cellosaurus.get( value_ns, {}).get('name') == cell_line['value']: yield { 'value': 1, 'comment': 'Ontological IRI for cell line {} and term match what is found in Cellosaurus.' .format(value_ns), } elif cell_line.get('valueIRI') and value_ns in Cellosaurus: yield { 'value': 0.75, 'comment': 'Ontological IRI for cell line {} found in Cellosaurus.'. format(value_ns), } elif cell_line.get('value') and value_ns in Cellosaurus_reversed: yield { 'value': 0.75, 'comment': 'Cell line `{}` found in Cellosaurus.'.format( cell_line['value']), } elif cell_line.get('value') and cell_line[ 'value'] in Cellosaurus_reversed_synonyms: yield { 'value': 0.5, 'comment': 'Cell line `{}` found in Cellosaurus synonyms.'.format( cell_line['value']), } else: yield { 'value': 0.25, 'comment': 'Cell line `{}` found but not in Cellosaurus.'.format( cell_line.get('value', '') + (('<' + value_ns + '>') if value_ns else '')), } else: yield { 'value': 0.0, 'comment': 'Cell line could not be identified', }
def metric_140_uberon(doc): anatomical_parts = list( map( json.loads, set( json.dumps({ 'value': isAbout['name'], 'valueIRI': isAbout.get('identifier', {}).get('identifierSource', '') + isAbout.get('identifier', {}).get('identifier', '') }) for node in jsonld_frame( doc, { '@type': 'Dataset', 'isAbout': { '@type': 'AnatomicalPart', 'name': {}, 'identifier': { 'identifier': {}, 'identifierSource': { '@default': '' }, } } })['@graph'] if node['isAbout'] for isAbout in force_list(node['isAbout']) if isAbout['name']))) if anatomical_parts: for anatomical_part in anatomical_parts: value_ns = IRI_to_NS(anatomical_part.get('valueIRI')) if anatomical_part.get('value') and anatomical_part.get( 'valueIRI') and pronto.Term( value_ns, anatomical_part['value']) in UBERON: yield { 'value': 1, 'comment': 'Ontological IRI for anatomical part {} and term match what is found in UBERON.' .format(value_ns), } elif anatomical_part.get('valueIRI') and value_ns in UBERON: yield { 'value': 0.75, 'comment': 'Ontological IRI for anatomical part {} found in UBERON.'. format(value_ns), } elif anatomical_part.get( 'value') and anatomical_part['value'] in UBERON_reversed: yield { 'value': 0.75, 'comment': 'Anatomical part `{}` found in UBERON.'.format( anatomical_part['value']), } elif anatomical_part.get('value') and anatomical_part[ 'value'] in UBERON_reversed_synonyms: yield { 'value': 0.5, 'comment': 'Anatomical part `{}` found in UBERON synonyms.'.format( anatomical_part['value']), } else: yield { 'value': 0.25, 'comment': 'Anatomical part `{}` found but not in UBERON.'.format( anatomical_part.get('value', '') + (('<' + value_ns + '>') if value_ns else '')), } else: yield { 'value': 0.0, 'comment': 'Anatomical part could not be identified', }
def metric_141_mondo(doc): diseases = list( map( json.loads, set( json.dumps({ 'value': isAbout['name'], 'valueIRI': ( isAbout['identifier'].get('identifierSource', '') + isAbout['identifier'].get('identifier', '') ) if isAbout['identifier'] else '' }) for node in jsonld_frame( doc, { '@type': 'Dataset', 'isAbout': { '@type': 'Disease', 'name': {}, 'identifier': { 'identifier': {}, 'identifierSource': { '@default': '' }, } } })['@graph'] if node['isAbout'] for isAbout in force_list(node['isAbout']) if isAbout['name']))) if diseases: for disease in diseases: value_ns = IRI_to_NS(disease.get('valueIRI')) if disease.get( 'value') and disease.get('valueIRI') and pronto.Term( value_ns, disease['value']) in MONDO: yield { 'value': 1, 'comment': 'Ontological IRI for disease {} and term match what is found in MONDO.' .format(value_ns), } elif disease.get('valueIRI') and disease['valueIRI'] in MONDO: yield { 'value': 0.75, 'comment': 'Ontological IRI for disease {} found in MONDO.'.format( value_ns), } elif disease.get('value') and disease['value'] in MONDO_reversed: yield { 'value': 0.75, 'comment': 'Disease `{}` found in MONDO.'.format(disease['value']), } elif disease.get( 'value') and disease['value'] in MONDO_reversed_synonyms: yield { 'value': 0.5, 'comment': 'Disease `{}` found in MONDO synonyms.'.format( disease['value']), } else: yield { 'value': 0.5, 'comment': 'Disease `{}` found but not in MONDO.'.format( disease.get('value', '') + (('<' + value_ns + '>') if value_ns else '')), } else: yield { 'value': 0.0, 'comment': 'Disease could not be identified', }
def metric_139_bao(doc): assays = list( map( json.loads, set( json.dumps({ 'value': method if type(method) == str else method['value'], 'valueIRI': '' if type(method) == str else method['valueIRI'], }) for node in jsonld_frame( doc, { '@type': 'Dataset', 'types': { 'method': { 'value': { '@default': '' }, 'valueIRI': { '@default': '' } } } })['@graph'] if node['types'] for types in force_list(node['types']) if types['method'] for method in force_list(types['method']) if type(method) == str or (type(method) == dict and method['value'] or method['valueIRI'] )))) if assays: for assay in assays: value_ns = IRI_to_NS(assay.get('valueIRI')) if assay.get('value') and assay.get('valueIRI') and pronto.Term( value_ns, assay.get('value')) in BAO: yield { 'value': 1, 'comment': 'Ontological IRI for Assay {} and term match what is found in BAO.' .format(assay['valueIRI']), } elif value_ns and assay['valueIRI'] in BAO: yield { 'value': 0.75, 'comment': 'Ontological IRI for Assay {} found in BAO.'.format( assay['valueIRI']), } elif assay.get('value') and assay['value'] in BAO_reversed: yield { 'value': 0.75, 'comment': 'Assay {} found in BAO.'.format(assay['value']), } elif assay.get( 'value') and assay['value'] in BAO_reversed_synonyms: yield { 'value': 0.5, 'comment': 'Assay `{}` found in BAO synonyms.'.format(assay['value']), } else: yield { 'value': 0.25, 'comment': 'Assay {} found but not in BAO.'.format( assay.get('value', '') + (('<' + value_ns + '>') if value_ns else '')), } else: yield { 'value': 0.0, 'comment': 'Assay could not be identified', }