def crosswalk_bio(orcid_profile, person_uri, graph, skip_person=False, person_class=FOAF.Person): #If skip_person, then don't create person and add names if not skip_person: person_details = orcid_profile["orcid-profile"]["orcid-bio"]["personal-details"] given_names = person_details["given-names"]["value"] if "given-names" in person_details else None family_name = person_details["family-name"]["value"] if "family-name" in person_details else None full_name = join_if_not_empty((given_names, family_name)) ##Person graph.add((person_uri, RDF.type, person_class)) graph.add((person_uri, RDFS.label, Literal(full_name))) #Note that not assigning class here. ##vcard #Main vcard vcard_uri = person_uri + "-vcard" graph.add((vcard_uri, RDF.type, VCARD.Individual)) #Contact info for graph.add((vcard_uri, OBO.ARG_2000029, person_uri)) #Name vcard vcard_name_uri = person_uri + "-vcard-name" graph.add((vcard_name_uri, RDF.type, VCARD.Name)) graph.add((vcard_uri, VCARD.hasName, vcard_name_uri)) if given_names: graph.add((vcard_name_uri, VCARD.givenName, Literal(given_names))) if family_name: graph.add((vcard_name_uri, VCARD.familyName, Literal(family_name))) #Other identifiers if "external-identifiers" in orcid_profile["orcid-profile"]["orcid-bio"] \ and orcid_profile["orcid-profile"]["orcid-bio"]["external-identifiers"] \ and "external-identifier" in orcid_profile["orcid-profile"]["orcid-bio"]["external-identifiers"]: external_identifiers = orcid_profile["orcid-profile"]["orcid-bio"]["external-identifiers"]["external-identifier"] for external_identifier in external_identifiers: if external_identifier["external-id-common-name"]["value"] == "Scopus Author ID": graph.add((person_uri, VIVO.scopusId, Literal(external_identifier["external-id-reference"]["value"])))
def gen_triples(cr_result, matchlist, publisher_list, journal_list, doi=None): pub_uri = uri_gen('pub', g) # Article info subjects = cr_result["subject"] if "subject" in cr_result else None if "title" in cr_result: if cr_result["title"][0]: title = cr_result["title"][0] elif cr_result["title"]: title = cr_result["title"].strip() else: title = None else: title = None # Publication type if cr_result["type"] == 'journal-article': pubtype = BIBO.AcademicArticle elif cr_result["type"] == 'book-chapter': pubtype = BIBO.Chapter elif cr_result["type"] == 'dataset': pubtype = VIVO.Dataset elif cr_result["type"] == 'proceedings-article': pubtype = VIVO.ConferencePaper elif cr_result["type"] == 'abstract': pubtype = VIVO.Abstract else: pubtype = URIRef(raw_input('Unknown publication type for {}.' ' Enter a valid URI for the type' .format(doi))) # Choose the longer (hopefully non-abbreviated) title journal = (max(cr_result["container-title"], key=len) if "container-title" in cr_result and cr_result["container-title"] else None) if journal: if journal in journal_list: journal_uri = journal_list[journal] print 'found existing '+journal g.add((D[pub_uri], VIVO.hasPublicationVenue, URIRef(journal_uri))) else: # publisher_list = get_publishers() # raw_input(publisher_list) journal_uri = D[uri_gen('n', g)] journal_list[journal] = str(journal_uri) g.add((D[pub_uri], VIVO.hasPublicationVenue, URIRef(journal_uri))) if pubtype == VIVO.ConferencePaper: g.add((URIRef(journal_uri), RDF.type, BIBO.Proceedings)) elif pubtype == BIBO.Chapter: g.add((URIRef(journal_uri), RDF.type, BIBO.Book)) else: g.add((URIRef(journal_uri), RDF.type, BIBO.Journal)) g.add((URIRef(journal_uri), RDFS.label, Literal(journal))) if "publisher" in cr_result: publisher = cr_result["publisher"] if publisher in publisher_list: publisher_uri = publisher_list[publisher] else: publisher_uri = D[uri_gen('n', g)] g.add(((URIRef(publisher_uri)), RDF.type, VIVO.Publisher)) g.add(((URIRef(publisher_uri)), RDFS.label, Literal(publisher))) publisher_list[publisher] = str(publisher_uri) print('Created new publisher "' + publisher + '"') g.add(((URIRef(journal_uri)), VIVO.publisher, URIRef(publisher_uri))) print 'Made new '+journal issue = cr_result["issue"] if "issue" in cr_result else None volume = cr_result["volume"] if "volume" in cr_result else None pages = (cr_result["page"] if "page" in cr_result and 'n/a' not in cr_result["page"] else None) # Authors authors = (parse_authors(cr_result) if "author" in cr_result else None) date = parse_publication_date(cr_result) # Publication date if date: (publication_year, publication_month, publication_day) = date else: (publication_year, publication_month, publication_day) = (None, None, None) date_uri = uri_gen('n', g) g.add((D[pub_uri], VIVO.dateTimeValue, D[date_uri])) add_date(D[date_uri], publication_year, g, publication_month, publication_day) # Add things to the graph if pubtype: g.add((D[pub_uri], RDF.type, pubtype)) if doi: g.add((D[pub_uri], BIBO.doi, Literal(doi))) if issue: g.add((D[pub_uri], BIBO.issue, Literal(issue))) if volume: g.add((D[pub_uri], BIBO.volume, Literal(volume))) if title: g.add((D[pub_uri], RDFS.label, Literal(title))) # Loop through the list of authors, trying to check for existing # authors in the database if authors: for idx, (first_name, surname) in enumerate(authors): full_name = join_if_not_empty((first_name, surname)) rank = idx+1 if full_name in matchlist[0]: pos = matchlist[0].index(full_name) assign_authorship(matchlist[1][pos], g, pub_uri, full_name, matchlist, rank) else: roll = name_lookup(surname) matchlist = name_selecter(roll, full_name, g, first_name, surname, pub_uri, matchlist, rank) # subjects if subjects: for subject in subjects: # NEED TO FIND SUBJECT IN VIVO concept_uri = get_subject(subject, g) if concept_uri: # print 'found existing '+subject g.add((D[pub_uri], VIVO.hasSubjectArea, URIRef(concept_uri))) elif subject in subjectlist[0]: # print 'already made a new one this round '+subject match = subjectlist[0].index(subject) subject_uri = subjectlist[1][match] g.add((D[pub_uri], VIVO.hasSubjectArea, D[subject_uri])) else: # print 'made new '+subject subject_uri = uri_gen('sub', g) subjectlist[0].append(subject) subjectlist[1].append(subject_uri) g.add((D[pub_uri], VIVO.hasSubjectArea, D[subject_uri])) g.add((D[subject_uri], RDF.type, SKOS.Concept)) g.add((D[subject_uri], RDFS.label, Literal(subject))) if pages: pages = pages.split("-") startpage = pages[0] g.add((D[pub_uri], BIBO.pageStart, Literal(startpage))) if len(pages) > 1: endpage = pages[1] g.add((D[pub_uri], BIBO.pageEnd, Literal(endpage))) else: endpage = None
def crosswalk_bio(orcid_profile, person_uri, graph, skip_person=False, person_class=FOAF.Person, existing_vcard_uri=None, skip_name_vcard=False): #Get names (for person and name vcard) person_details = orcid_profile["orcid-profile"]["orcid-bio"].get("personal-details", {}) given_names = person_details.get("given-names", {}).get("value") family_name = person_details.get("family-name", {}).get("value") full_name = join_if_not_empty((given_names, family_name)) #Following is non-vcard bio information #If skip_person, then don't create person and add names if not skip_person: #Add person graph.add((person_uri, RDF.type, person_class)) graph.add((person_uri, RDFS.label, Literal(full_name))) #Biography biography = (orcid_profile["orcid-profile"]["orcid-bio"].get("biography") or {}).get("value") if biography: graph.add((person_uri, VIVO.overview, Literal(biography))) #Other identifiers #Default VIVO-ISF only supports a limited number of identifier types. external_identifiers = \ (orcid_profile["orcid-profile"]["orcid-bio"].get("external-identifiers", {}) or {}).get("external-identifier", []) for external_identifier in external_identifiers: #Scopus ID if external_identifier["external-id-common-name"]["value"] == "Scopus Author ID": graph.add((person_uri, VIVO.scopusId, Literal(external_identifier["external-id-reference"]["value"]))) #ISI Research ID if external_identifier["external-id-common-name"]["value"] == "ResearcherID": graph.add((person_uri, VIVO.researcherId, Literal(external_identifier["external-id-reference"]["value"]))) #Keywords keywords = \ (orcid_profile["orcid-profile"]["orcid-bio"].get("keywords", {}) or {}).get("keyword", []) for keyword in keywords: graph.add((person_uri, VIVO.freetextKeyword, Literal(keyword["value"]))) #Following is vcard bio information #Add main vcard vcard_uri = existing_vcard_uri or person_uri + "-vcard" #Will only add vcard if there is a child vcard add_main_vcard = False if not skip_name_vcard and (given_names or family_name): #Name vcard vcard_name_uri = person_uri + "-vcard-name" graph.add((vcard_name_uri, RDF.type, VCARD.Name)) graph.add((vcard_uri, VCARD.hasName, vcard_name_uri)) if given_names: graph.add((vcard_name_uri, VCARD.givenName, Literal(given_names))) if family_name: graph.add((vcard_name_uri, VCARD.familyName, Literal(family_name))) add_main_vcard = True #Websites researcher_urls = \ (orcid_profile["orcid-profile"]["orcid-bio"].get("researcher-urls", {}) or {}).get("researcher-url", []) for index, researcher_url in enumerate(researcher_urls): url = researcher_url["url"]["value"] url_name = (researcher_url["url-name"] or {}).get("value") vcard_website_uri = person_uri + "-vcard-website" + str(index) graph.add((vcard_website_uri, RDF.type, VCARD.URL)) graph.add((vcard_uri, VCARD.hasURL, vcard_website_uri)) graph.add((vcard_website_uri, VCARD.url, Literal(url, datatype=XSD.anyURI))) if url_name: graph.add((vcard_website_uri, RDFS.label, Literal(url_name))) if add_main_vcard and not existing_vcard_uri: graph.add((vcard_uri, RDF.type, VCARD.Individual)) #Contact info for graph.add((vcard_uri, OBO.ARG_2000029, person_uri))
def crosswalk(self, orcid_profile, person_uri, graph, person_class=FOAF.Person): # Get names (for person and name vcard) given_names = None family_name = None if "name" in orcid_profile["person"]: person_details = orcid_profile["person"]["name"] given_names = person_details.get("given-names", {}).get("value") family_name = person_details.get("family-name", {}).get("value") full_name = join_if_not_empty((given_names, family_name)) # Following is non-vcard bio information # If skip_person, then don't create person and add names if full_name and self.create_strategy.should_create(person_class, person_uri): # Add person graph.add((person_uri, RDF.type, person_class)) graph.add((person_uri, RDFS.label, Literal(full_name))) # Biography if "biography" in orcid_profile["person"]: biography = orcid_profile["person"]["biography"]["content"] if biography: graph.add((person_uri, VIVO.overview, Literal(biography))) # Other identifiers # Default VIVO-ISF only supports a limited number of identifier types. if "external-identifiers" in orcid_profile["person"]: external_identifiers = orcid_profile["person"]["external-identifiers"]["external-identifier"] for external_identifier in external_identifiers: # Scopus ID if external_identifier["external-id-type"] == "Scopus Author ID": graph.add((person_uri, VIVO.scopusId, Literal(external_identifier["external-id-value"]))) # ISI Research ID if external_identifier["external-id-type"] == "ResearcherID": graph.add((person_uri, VIVO.researcherId, Literal(external_identifier["external-id-value"]))) # Keywords if "keywords" in orcid_profile["person"]: keywords = orcid_profile["person"]["keywords"]["keyword"] for keyword in keywords: keywords_content = keyword["content"] if keywords_content: for keyword_content in keywords_content.split(", "): graph.add((person_uri, VIVO.freetextKeyword, Literal(keyword_content))) # Following is vcard bio information # Add main vcard vcard_uri = self.identifier_strategy.to_uri(VCARD.Individual, {"person_uri": person_uri}) # Will only add vcard if there is a child vcard add_main_vcard = False # Name vcard vcard_name_uri = self.identifier_strategy.to_uri(VCARD.Name, {"person_uri": person_uri}) if (given_names or family_name) and self.create_strategy.should_create(VCARD.Name, vcard_name_uri): graph.add((vcard_name_uri, RDF.type, VCARD.Name)) graph.add((vcard_uri, VCARD.hasName, vcard_name_uri)) if given_names: graph.add((vcard_name_uri, VCARD.givenName, Literal(given_names))) if family_name: graph.add((vcard_name_uri, VCARD.familyName, Literal(family_name))) add_main_vcard = True # Websites if "researcher-urls" in orcid_profile["person"]: researcher_urls = orcid_profile["person"]["researcher-urls"]["researcher-url"] for researcher_url in researcher_urls: url = researcher_url["url"]["value"] url_name = researcher_url["url-name"] vcard_website_uri = self.identifier_strategy.to_uri(VCARD.URL, {"url": url}) graph.add((vcard_website_uri, RDF.type, VCARD.URL)) graph.add((vcard_uri, VCARD.hasURL, vcard_website_uri)) graph.add((vcard_website_uri, VCARD.url, Literal(url, datatype=XSD.anyURI))) if url_name: graph.add((vcard_website_uri, RDFS.label, Literal(url_name))) add_main_vcard = True if add_main_vcard and self.create_strategy.should_create(VCARD.Individual, vcard_uri): graph.add((vcard_uri, RDF.type, VCARD.Individual)) # Contact info for graph.add((vcard_uri, OBO.ARG_2000029, person_uri))
def crosswalk(self, orcid_profile, person_uri, graph, person_class=FOAF.Person): # Get names (for person and name vcard) given_names = None family_name = None if "name" in orcid_profile["person"]: person_details = orcid_profile["person"]["name"] given_names = person_details.get("given-names", {}).get("value") family_name = person_details.get("family-name", {}).get("value") full_name = join_if_not_empty((given_names, family_name)) # Following is non-vcard bio information # If skip_person, then don't create person and add names if full_name and self.create_strategy.should_create( person_class, person_uri): # Add person graph.add((person_uri, RDF.type, person_class)) graph.add((person_uri, RDFS.label, Literal(full_name))) # Biography if "biography" in orcid_profile["person"]: biography = orcid_profile["person"]["biography"]["content"] if biography: graph.add((person_uri, VIVO.overview, Literal(biography))) # Other identifiers # Default VIVO-ISF only supports a limited number of identifier types. if "external-identifiers" in orcid_profile["person"]: external_identifiers = orcid_profile["person"][ "external-identifiers"]["external-identifier"] for external_identifier in external_identifiers: # Scopus ID if external_identifier[ "external-id-type"] == "Scopus Author ID": graph.add( (person_uri, VIVO.scopusId, Literal(external_identifier["external-id-value"]))) # ISI Research ID if external_identifier["external-id-type"] == "ResearcherID": graph.add( (person_uri, VIVO.researcherId, Literal(external_identifier["external-id-value"]))) # Keywords if "keywords" in orcid_profile["person"]: keywords = orcid_profile["person"]["keywords"]["keyword"] for keyword in keywords: keywords_content = keyword["content"] if keywords_content: for keyword_content in keywords_content.split(", "): graph.add((person_uri, VIVO.freetextKeyword, Literal(keyword_content))) # Following is vcard bio information # Add main vcard vcard_uri = self.identifier_strategy.to_uri(VCARD.Individual, {"person_uri": person_uri}) # Will only add vcard if there is a child vcard add_main_vcard = False # Name vcard vcard_name_uri = self.identifier_strategy.to_uri( VCARD.Name, {"person_uri": person_uri}) if (given_names or family_name) and self.create_strategy.should_create( VCARD.Name, vcard_name_uri): graph.add((vcard_name_uri, RDF.type, VCARD.Name)) graph.add((vcard_uri, VCARD.hasName, vcard_name_uri)) if given_names: graph.add( (vcard_name_uri, VCARD.givenName, Literal(given_names))) if family_name: graph.add( (vcard_name_uri, VCARD.familyName, Literal(family_name))) add_main_vcard = True # Websites if "researcher-urls" in orcid_profile["person"]: researcher_urls = orcid_profile["person"]["researcher-urls"][ "researcher-url"] for researcher_url in researcher_urls: url = researcher_url["url"]["value"] url_name = researcher_url["url-name"] vcard_website_uri = self.identifier_strategy.to_uri( VCARD.URL, {"url": url}) graph.add((vcard_website_uri, RDF.type, VCARD.URL)) graph.add((vcard_uri, VCARD.hasURL, vcard_website_uri)) graph.add((vcard_website_uri, VCARD.url, Literal(url, datatype=XSD.anyURI))) if url_name: graph.add( (vcard_website_uri, RDFS.label, Literal(url_name))) add_main_vcard = True if add_main_vcard and self.create_strategy.should_create( VCARD.Individual, vcard_uri): graph.add((vcard_uri, RDF.type, VCARD.Individual)) # Contact info for graph.add((vcard_uri, OBO.ARG_2000029, person_uri))
def _get_orcid_title(work): return join_if_not_empty( (work["title"]["title"]["value"], (work["title"].get("subtitle") or {}).get("value")), ": ")
def crosswalk_work(self, work, person_uri, person_surname, graph): # Work metadata may be available from the orcid profile, bibtex contained in the orcid profile, and/or crossref # record. The preferred order (in general) for getting metadata is crossref, bibtex, orcid. # Note that datacite records were considered, but not found to have additional/better metadata. # Work Type work_type = work["type"] if work_type in work_type_map: # Extract # Get external identifiers so that can get DOI external_identifiers = WorksCrosswalk._get_work_identifiers(work) doi = external_identifiers.get("DOI") crossref_record = WorksCrosswalk._fetch_crossref_doi( doi) if doi else {} # Bibtex bibtex = WorksCrosswalk._parse_bibtex(work) # Get title so that can construct work uri title = WorksCrosswalk._get_crossref_title( crossref_record) or bibtex.get( "title") or WorksCrosswalk._get_orcid_title(work) # Work-type work_class = work_type_map[work_type] if work_type == "TRANSLATION" and bibtex and bibtex[ "ENTRYTYPE"] in bibtex_type_map: work_class = bibtex_type_map[bibtex["ENTRYTYPE"]] # Construct work uri work_uri = self.identifier_strategy.to_uri(work_class, {"name": title}) graph.add((work_uri, RDF.type, work_class)) # Title graph.add((work_uri, RDFS.label, Literal(title))) # Publication date (publication_year, publication_month, publication_day) = \ WorksCrosswalk._get_crossref_publication_date(crossref_record) \ or WorksCrosswalk._get_orcid_publication_date(work) \ or WorksCrosswalk._get_bibtext_publication_date(bibtex) or (None, None, None) date_uri = add_date(publication_year, graph, self.identifier_strategy, publication_month, publication_day) if date_uri: graph.add((work_uri, VIVO.dateTimeValue, date_uri)) # Subjects subjects = crossref_record[ "subject"] if crossref_record and "subject" in crossref_record else None if subjects: for subject in subjects: subject_uri = self.identifier_strategy.to_uri( SKOS.Concept, {"name": subject}) graph.add((work_uri, VIVO.hasSubjectArea, subject_uri)) if self.create_strategy.should_create( SKOS.Concept, subject_uri): graph.add((subject_uri, RDF.type, SKOS.Concept)) graph.add((subject_uri, RDFS.label, Literal(subject))) # Contributors (an array of (first_name, surname, VIVO type, e.g., VIVO.Authorship)) bibtex_contributors = [] bibtex_contributors.extend( WorksCrosswalk._get_bibtex_authors(bibtex)) bibtex_contributors.extend( WorksCrosswalk._get_bibtex_editors(bibtex)) # Orcid is better for translations because has translator role if work_type == "TRANSLATION": contributors = WorksCrosswalk._get_orcid_contributors(work) else: contributors = WorksCrosswalk._get_crossref_authors(crossref_record) or bibtex_contributors \ or WorksCrosswalk._get_orcid_contributors(work) if not contributors: # Add person as author or editor. # None, None means this person. if work_type in ("EDITED_BOOK", ): contributors.append((None, None, VIVO.Editorship)) elif work_type == "TRANSLATION": # Translator is a predicate, not a -ship class. contributors.append((None, None, "TRANSLATOR")) else: contributors.append((None, None, VIVO.Authorship)) for (first_name, surname, vivo_type) in contributors: if not surname or person_surname.lower() == surname.lower(): contributor_uri = person_uri else: contributor_uri = self.identifier_strategy.to_uri( FOAF.Person, { "first_name": first_name, "surname": surname }) if self.create_strategy.should_create( FOAF.Person, contributor_uri): graph.add((contributor_uri, RDF.type, FOAF.Person)) full_name = join_if_not_empty((first_name, surname)) graph.add( (contributor_uri, RDFS.label, Literal(full_name))) # Translation is a special case if vivo_type == "TRANSLATOR": graph.add((contributor_uri, BIBO.translator, work_uri)) # So is patent assignee elif work_type == "PATENT": graph.add((contributor_uri, VIVO.assigneeFor, work_uri)) else: contributorship_uri = self.identifier_strategy.to_uri( vivo_type, { "contributor_uri": contributor_uri, "work_uri": work_uri }) graph.add((contributorship_uri, RDF.type, vivo_type)) graph.add((contributorship_uri, VIVO.relates, work_uri)) graph.add( (contributorship_uri, VIVO.relates, contributor_uri)) # Publisher publisher = crossref_record.get("publisher") or bibtex.get( "publisher") if publisher: publisher_uri = self.identifier_strategy.to_uri( FOAF.Organization, {"name": publisher}) graph.add((work_uri, VIVO.publisher, publisher_uri)) if self.create_strategy.should_create(FOAF.Organization, publisher_uri): graph.add((publisher_uri, RDF.type, FOAF.Organization)) graph.add((publisher_uri, RDFS.label, Literal(publisher))) # Volume volume = crossref_record.get("volume") or bibtex.get("volume") if volume: graph.add((work_uri, BIBO.volume, Literal(volume))) # Issue issue = crossref_record.get("issue") or bibtex.get("number") if issue: graph.add((work_uri, BIBO.issue, Literal(issue))) # Pages pages = crossref_record.get("page") or bibtex.get("pages") start_page = None end_page = None if pages and "-" in pages: (start_page, end_page) = re.split(" *-+ *", pages, maxsplit=2) if start_page: graph.add((work_uri, BIBO.pageStart, Literal(start_page))) if end_page: graph.add((work_uri, BIBO.pageEnd, Literal(end_page))) # Identifiers # Add doi in bibtex, but not orcid profile if bibtex and "doi" in bibtex and "DOI" not in external_identifiers: external_identifiers["DOI"] = bibtex["doi"] # Add isbn in bibtex, but not orcid profile if bibtex and "isbn" in bibtex and "ISBN" not in external_identifiers: external_identifiers["ISBN"] = bibtex["isbn"] for identifier_type, identifier in external_identifiers.iteritems( ): identifier_url = None if identifier_type in ("PAT", "OTHER-ID") and work_type == "PATENT": identifier_predicate = VIVO.patentNumber elif identifier_type == "ISBN": clean_isbn = identifier.replace("-", "") if len(clean_isbn) <= 10: identifier_predicate = BIBO.isbn10 else: identifier_predicate = BIBO.isbn13 else: (identifier_predicate, url_template) = identifier_map.get( identifier_type, (None, None)) if url_template: identifier_url = url_template % identifier if identifier_predicate: graph.add( (work_uri, identifier_predicate, Literal(identifier))) if identifier_url: self._add_work_url(identifier_url, work_uri, graph) orcid_url = (work.get("url", {}) or {}).get("value") if orcid_url and WorksCrosswalk._use_url(orcid_url): self._add_work_url(orcid_url, work_uri, graph) bibtex_url = bibtex.get("link") if bibtex_url and WorksCrosswalk._use_url( bibtex_url) and orcid_url != bibtex_url: self._add_work_url(bibtex_url, work_uri, graph) # Series series = bibtex.get("series") # TODO: Figure out how to model series in VIVO-ISF. # Journal # If Crossref has a journal use it journal = WorksCrosswalk._get_crossref_journal(crossref_record) issns = [] if journal: issns = crossref_record.get("ISSN", []) # Otherwise, only use for some work types. elif work_type in journal_map: journal = bibtex.get("journal") if journal: if "issn" in bibtex: issns = [bibtex["issn"]] else: journal = (work.get("journal-title", {}) or {}).get("value") if journal: journal_class = journal_map.get(work_type, BIBO.Journal) journal_uri = self.identifier_strategy.to_uri( journal_class, {"name": journal}) graph.add((work_uri, VIVO.hasPublicationVenue, journal_uri)) if self.create_strategy.should_create(journal_class, journal_uri): graph.add((journal_uri, RDF.type, journal_class)) graph.add((journal_uri, RDFS.label, Literal(journal))) for issn in issns: graph.add((journal_uri, BIBO.issn, Literal(issn))) if work_type in ("BOOK_CHAPTER", ): book_title = bibtex.get("booktitle") if book_title: book_uri = self.identifier_strategy.to_uri( BIBO.Book, {"name": book_title}) graph.add((work_uri, VIVO.hasPublicationVenue, book_uri)) if self.create_strategy.should_create(BIBO.Book, book_uri): graph.add((book_uri, RDF.type, BIBO.Book)) graph.add((book_uri, RDFS.label, Literal(book_title))) if work_type in ("CONFERENCE_PAPER", ): proceeding = bibtex.get("journal") or (work.get( "journal-title", {}) or {}).get("value") if proceeding: proceeding_uri = self.identifier_strategy.to_uri( BIBO.Proceedings, {"name": proceeding}) graph.add( (work_uri, VIVO.hasPublicationVenue, proceeding_uri)) if self.create_strategy.should_create( BIBO.Proceedings, proceeding_uri): graph.add((proceeding_uri, RDF.type, BIBO.Proceedings)) graph.add( (proceeding_uri, RDFS.label, Literal(proceeding)))
def crosswalk_works(orcid_profile, person_uri, graph, subjectlist, journlist, orglist): person_surname = orcid_profile["orcid-profile"]["orcid-bio"]["personal-details"]["family-name"]["value"] #Publications if "orcid-works" in orcid_profile["orcid-profile"]["orcid-activities"] \ and orcid_profile["orcid-profile"]["orcid-activities"]["orcid-works"] \ and "orcid-work" in orcid_profile["orcid-profile"]["orcid-activities"]["orcid-works"]: works = orcid_profile["orcid-profile"]["orcid-activities"]["orcid-works"]["orcid-work"] for work in works: ##Extract #Get external identifiers so that can get DOI external_identifiers = _get_work_identifiers(work) doi = external_identifiers.get("DOI") doi_record = fetch_crossref_doi(doi) if doi else None #Bibtex bibtex = _parse_bibtex(work) #Work Type work_type = work["work-type"] #Title title = work["work-title"]["title"]["value"] work_uri = ns.D[to_hash_identifier(PREFIX_DOCUMENT, (title, work_type))] #Publication date (publication_year, publication_month, publication_day) = _get_doi_publication_date(doi_record) \ if doi_record else _get_publication_date(work) #Subjects subjects = doi_record["subject"] if doi_record and "subject" in doi_record else None #Authors authors = _get_doi_authors(doi_record) if doi_record else None #TODO: Get from ORCID profile if no doi record #Publisher publisher = bibtex.get("publisher") ##Add triples #Title graph.add((work_uri, RDFS.label, Literal(title))) #Person (via Authorship) authorship_uri = work_uri + "-auth" graph.add((authorship_uri, RDF.type, VIVO.Authorship)) graph.add((authorship_uri, VIVO.relates, work_uri)) graph.add((authorship_uri, VIVO.relates, person_uri)) #Other authors if authors: for (first_name, surname) in authors: if not person_surname.lower() == surname.lower(): author_uri = ns.D[to_hash_identifier(PREFIX_PERSON, (first_name, surname))] graph.add((author_uri, RDF.type, FOAF.Person)) full_name = join_if_not_empty((first_name, surname)) graph.add((author_uri, RDFS.label, Literal(full_name))) authorship_uri = author_uri + "-auth" graph.add((authorship_uri, RDF.type, VIVO.Authorship)) graph.add((authorship_uri, VIVO.relates, work_uri)) graph.add((authorship_uri, VIVO.relates, author_uri)) #Date date_uri = work_uri + "-date" graph.add((work_uri, VIVO.dateTimeValue, date_uri)) add_date(date_uri, publication_year, graph, publication_month, publication_day) #Subjects if subjects: for subject in subjects: if subject in subjectlist[0]: match = subjectlist[0].index(subject) subject_uri = subjectlist[1][match] graph.add((work_uri, VIVO.hasSubjectArea, subject_uri)) else: subject_uri = ns.D[to_hash_identifier("sub", (subject,))] subjectlist[0].append(subject) subjectlist[1].append(subject_uri) graph.add((work_uri, VIVO.hasSubjectArea, subject_uri)) graph.add((subject_uri, RDF.type, SKOS.Concept)) graph.add((subject_uri, RDFS.label, Literal(subject))) #Identifier if doi: graph.add((work_uri, BIBO.doi, Literal(doi))) #Also add as a website identifier_url = "http://dx.doi.org/%s" % doi vcard_uri = ns.D[to_hash_identifier("vcard", (identifier_url,))] graph.add((vcard_uri, RDF.type, VCARD.Kind)) #Has contact info graph.add((work_uri, OBO.ARG_2000028, vcard_uri)) #Url vcard vcard_url_uri = vcard_uri + "-url" graph.add((vcard_url_uri, RDF.type, VCARD.URL)) graph.add((vcard_uri, VCARD.hasURL, vcard_url_uri)) graph.add((vcard_url_uri, VCARD.url, Literal(identifier_url, datatype=XSD.anyURI))) #Publisher if publisher: if publisher in orglist[0]: match = orglist[0].index(publisher) publisher_uri = orglist[1][match] graph.add((work_uri, VIVO.publisher, publisher_uri)) else: publisher_uri = ns.D[to_hash_identifier(PREFIX_ORGANIZATION, (publisher,))] orglist[0].append(publisher) orglist[1].append(publisher_uri) graph.add((publisher_uri, RDF.type, FOAF.Organization)) graph.add((publisher_uri, RDFS.label, Literal(publisher))) graph.add((work_uri, VIVO.publisher, publisher_uri)) if work_type == "JOURNAL_ARTICLE": ##Extract #Journal journal = bibtex.get("journal") #Volume volume = bibtex.get("volume") #Number number = bibtex.get("number") #Pages pages = bibtex.get("pages") start_page = None end_page = None if pages and "-" in pages: (start_page, end_page) = re.split(" *-+ *", pages, maxsplit=2) ##Add triples #Type graph.add((work_uri, RDF.type, BIBO.AcademicArticle)) #Journal if journal: if journal in journlist[0]: match = journlist[0].index(journal) journal_uri = journlist[1][match] graph.add((work_uri, VIVO.hasPublicationVenue, journal_uri)) else: journal_uri = ns.D[to_hash_identifier(PREFIX_JOURNAL, (BIBO.Journal, journal))] journlist[0].append(journal) journlist[1].append(journal_uri) graph.add((journal_uri, RDF.type, BIBO.Journal)) graph.add((journal_uri, RDFS.label, Literal(journal))) graph.add((work_uri, VIVO.hasPublicationVenue, journal_uri)) #Volume if volume: graph.add((work_uri, BIBO.volume, Literal(volume))) #Number if number: graph.add((work_uri, BIBO.issue, Literal(number))) #Pages if start_page: graph.add((work_uri, BIBO.pageStart, Literal(start_page))) if end_page: graph.add((work_uri, BIBO.pageEnd, Literal(end_page))) elif work_type == "BOOK": ##Add triples #Type graph.add((work_uri, RDF.type, BIBO.Book)) elif work_type == "DATA_SET": ##Add triples #Type graph.add((work_uri, RDF.type, VIVO.Dataset))
def crosswalk(self, orcid_profile, person_uri, graph): # Work metadata may be available from the orcid profile, bibtex contained in the orcid profile, and/or crossref # record. The preferred order (in general) for getting metadata is crossref, bibtex, orcid. # Note that datacite records were considered, but not found to have additional/better metadata. #Publications for work in ((orcid_profile["orcid-profile"].get("orcid-activities") or {}).get("orcid-works") or {})\ .get("orcid-work", []): #Work Type work_type = work["work-type"] if work_type in work_type_map: ##Extract #Get external identifiers so that can get DOI external_identifiers = WorksCrosswalk._get_work_identifiers(work) doi = external_identifiers.get("DOI") crossref_record = WorksCrosswalk._fetch_crossref_doi(doi) if doi else {} #Bibtex bibtex = WorksCrosswalk._parse_bibtex(work) #Get title so that can construct work uri title = WorksCrosswalk._get_crossref_title(crossref_record) \ or bibtex.get("title") \ or WorksCrosswalk._get_orcid_title(work) #Work-type work_class = work_type_map[work_type] if work_type == "TRANSLATION" and bibtex and bibtex["ENTRYTYPE"] in bibtex_type_map: work_class = bibtex_type_map[bibtex["ENTRYTYPE"]] #Construct work uri work_uri = self.identifier_strategy.to_uri(work_class, {"name": title}) graph.add((work_uri, RDF.type, work_class)) #Title graph.add((work_uri, RDFS.label, Literal(title))) #Publication date (publication_year, publication_month, publication_day) = \ WorksCrosswalk._get_crossref_publication_date(crossref_record) \ or WorksCrosswalk._get_orcid_publication_date(work) \ or WorksCrosswalk._get_bibtext_publication_date(bibtex) or (None, None, None) date_uri = add_date(publication_year, graph, self.identifier_strategy, publication_month, publication_day) if date_uri: graph.add((work_uri, VIVO.dateTimeValue, date_uri)) #Subjects subjects = crossref_record["subject"] if crossref_record and "subject" in crossref_record else None if subjects: for subject in subjects: subject_uri = self.identifier_strategy.to_uri(SKOS.Concept, {"name": subject}) graph.add((work_uri, VIVO.hasSubjectArea, subject_uri)) if self.create_strategy.should_create(SKOS.Concept, subject_uri): graph.add((subject_uri, RDF.type, SKOS.Concept)) graph.add((subject_uri, RDFS.label, Literal(subject))) #Contributors (an array of (first_name, surname, VIVO type, e.g., VIVO.Authorship)) bibtex_contributors = [] bibtex_contributors.extend(WorksCrosswalk._get_bibtex_authors(bibtex)) bibtex_contributors.extend(WorksCrosswalk._get_bibtex_editors(bibtex)) #Orcid is better for translations because has translator role if work_type == "TRANSLATION": contributors = WorksCrosswalk._get_orcid_contributors(work) else: contributors = WorksCrosswalk._get_crossref_authors(crossref_record) or bibtex_contributors \ or WorksCrosswalk._get_orcid_contributors(work) person_surname = orcid_profile["orcid-profile"]["orcid-bio"]["personal-details"]["family-name"]["value"] if not contributors: #Add person as author or editor. #None, None means this person. if work_type in ("EDITED_BOOK",): contributors.append((None, None, VIVO.Editorship)) elif work_type == "TRANSLATION": #Translator is a predicate, not a -ship class. contributors.append((None, None, "TRANSLATOR")) else: contributors.append((None, None, VIVO.Authorship)) for (first_name, surname, vivo_type) in contributors: if not surname or person_surname.lower() == surname.lower(): contributor_uri = person_uri else: contributor_uri = self.identifier_strategy.to_uri(FOAF.Person, {"first_name": first_name, "surname": surname}) if self.create_strategy.should_create(FOAF.Person, contributor_uri): graph.add((contributor_uri, RDF.type, FOAF.Person)) full_name = join_if_not_empty((first_name, surname)) graph.add((contributor_uri, RDFS.label, Literal(full_name))) #Translation is a special case if vivo_type == "TRANSLATOR": graph.add((contributor_uri, BIBO.translator, work_uri)) #So is patent assignee elif work_type == "PATENT": graph.add((contributor_uri, VIVO.assigneeFor, work_uri)) else: contributorship_uri = self.identifier_strategy.to_uri(vivo_type, {"contributor_uri": contributor_uri, "work_uri": work_uri}) graph.add((contributorship_uri, RDF.type, vivo_type)) graph.add((contributorship_uri, VIVO.relates, work_uri)) graph.add((contributorship_uri, VIVO.relates, contributor_uri)) #Publisher publisher = crossref_record.get("publisher") or bibtex.get("publisher") if publisher: publisher_uri = self.identifier_strategy.to_uri(FOAF.Organization, {"name": publisher}) graph.add((work_uri, VIVO.publisher, publisher_uri)) if self.create_strategy.should_create(FOAF.Organization, publisher_uri): graph.add((publisher_uri, RDF.type, FOAF.Organization)) graph.add((publisher_uri, RDFS.label, Literal(publisher))) #Volume volume = crossref_record.get("volume") or bibtex.get("volume") if volume: graph.add((work_uri, BIBO.volume, Literal(volume))) #Issue issue = crossref_record.get("issue") or bibtex.get("number") if issue: graph.add((work_uri, BIBO.issue, Literal(issue))) #Pages pages = crossref_record.get("page") or bibtex.get("pages") start_page = None end_page = None if pages and "-" in pages: (start_page, end_page) = re.split(" *-+ *", pages, maxsplit=2) if start_page: graph.add((work_uri, BIBO.pageStart, Literal(start_page))) if end_page: graph.add((work_uri, BIBO.pageEnd, Literal(end_page))) #Identifiers #Add doi in bibtex, but not orcid profile if bibtex and "doi" in bibtex and "DOI" not in external_identifiers: external_identifiers["DOI"] = bibtex["doi"] #Add isbn in bibtex, but not orcid profile if bibtex and "isbn" in bibtex and "ISBN" not in external_identifiers: external_identifiers["ISBN"] = bibtex["isbn"] for identifier_type, identifier in external_identifiers.iteritems(): identifier_url = None if identifier_type in ("PAT", "OTHER_ID") and work_type == "PATENT": identifier_predicate = VIVO.patentNumber elif identifier_type == "ISBN": clean_isbn = identifier.replace("-", "") if len(clean_isbn) <= 10: identifier_predicate = BIBO.isbn10 else: identifier_predicate = BIBO.isbn13 else: (identifier_predicate, url_template) = identifier_map.get(identifier_type, (None, None)) if url_template: identifier_url = url_template % identifier if identifier_predicate: graph.add((work_uri, identifier_predicate, Literal(identifier))) if identifier_url: self._add_work_url(identifier_url, work_uri, graph) orcid_url = (work.get("url", {}) or {}).get("value") if orcid_url and WorksCrosswalk._use_url(orcid_url): self._add_work_url(orcid_url, work_uri, graph) bibtex_url = bibtex.get("link") if bibtex_url and WorksCrosswalk._use_url(bibtex_url) and orcid_url != bibtex_url: self._add_work_url(bibtex_url, work_uri, graph) #Series series = bibtex.get("series") #TODO: Figure out how to model series in VIVO-ISF. #Journal #If Crossref has a journal use it journal = WorksCrosswalk._get_crossref_journal(crossref_record) if journal: issns = crossref_record.get("ISSN", []) #Otherwise, only use for some work types. elif work_type in journal_map: issns = [] journal = bibtex.get("journal") if journal: if "issn" in bibtex: issns = [bibtex["issn"]] else: journal = (work.get("journal-title", {}) or {}).get("value") if journal: journal_class = journal_map.get(work_type, BIBO.Journal) journal_uri = self.identifier_strategy.to_uri(journal_class, {"name": journal}) graph.add((work_uri, VIVO.hasPublicationVenue, journal_uri)) if self.create_strategy.should_create(journal_class, journal_uri): graph.add((journal_uri, RDF.type, journal_class)) graph.add((journal_uri, RDFS.label, Literal(journal))) for issn in issns: graph.add((journal_uri, BIBO.issn, Literal(issn))) if work_type in ("BOOK_CHAPTER",): book_title = bibtex.get("booktitle") if book_title: book_uri = self.identifier_strategy.to_uri(BIBO.Book, {"name": book_title}) graph.add((work_uri, VIVO.hasPublicationVenue, book_uri)) if self.create_strategy.should_create(BIBO.Book, book_uri): graph.add((book_uri, RDF.type, BIBO.Book)) graph.add((book_uri, RDFS.label, Literal(book_title))) if work_type in ("CONFERENCE_PAPER",): proceeding = bibtex.get("journal") or (work.get("journal-title", {}) or {}).get("value") if proceeding: proceeding_uri = self.identifier_strategy.to_uri(BIBO.Proceedings, {"name": proceeding}) graph.add((work_uri, VIVO.hasPublicationVenue, proceeding_uri)) if self.create_strategy.should_create(BIBO.Proceedings, proceeding_uri): graph.add((proceeding_uri, RDF.type, BIBO.Proceedings)) graph.add((proceeding_uri, RDFS.label, Literal(proceeding)))
def _get_orcid_title(work): return join_if_not_empty((work["work-title"]["title"]["value"], (work["work-title"].get("subtitle") or {}).get("value")), ": ")