Пример #1
0
def crosswalk_bio(orcid_profile, person_uri, graph, skip_person=False, person_class=FOAF.Person):

    #If skip_person, then don't create person and add names
    if not skip_person:
        person_details = orcid_profile["orcid-profile"]["orcid-bio"]["personal-details"]
        given_names = person_details["given-names"]["value"] if "given-names" in person_details else None
        family_name = person_details["family-name"]["value"] if "family-name" in person_details else None
        full_name = join_if_not_empty((given_names, family_name))

        ##Person
        graph.add((person_uri, RDF.type, person_class))
        graph.add((person_uri, RDFS.label, Literal(full_name)))
        #Note that not assigning class here.

        ##vcard
        #Main vcard
        vcard_uri = person_uri + "-vcard"
        graph.add((vcard_uri, RDF.type, VCARD.Individual))
        #Contact info for
        graph.add((vcard_uri, OBO.ARG_2000029, person_uri))
        #Name vcard
        vcard_name_uri = person_uri + "-vcard-name"
        graph.add((vcard_name_uri, RDF.type, VCARD.Name))
        graph.add((vcard_uri, VCARD.hasName, vcard_name_uri))
        if given_names:
            graph.add((vcard_name_uri, VCARD.givenName, Literal(given_names)))
        if family_name:
            graph.add((vcard_name_uri, VCARD.familyName, Literal(family_name)))

    #Other identifiers
    if "external-identifiers" in orcid_profile["orcid-profile"]["orcid-bio"] \
            and orcid_profile["orcid-profile"]["orcid-bio"]["external-identifiers"] \
            and "external-identifier" in orcid_profile["orcid-profile"]["orcid-bio"]["external-identifiers"]:
        external_identifiers = orcid_profile["orcid-profile"]["orcid-bio"]["external-identifiers"]["external-identifier"]
        for external_identifier in external_identifiers:
            if external_identifier["external-id-common-name"]["value"] == "Scopus Author ID":
                graph.add((person_uri, VIVO.scopusId, Literal(external_identifier["external-id-reference"]["value"])))
def gen_triples(cr_result, matchlist, publisher_list, journal_list, doi=None):
    pub_uri = uri_gen('pub', g)

    # Article info
    subjects = cr_result["subject"] if "subject" in cr_result else None
    if "title" in cr_result:
        if cr_result["title"][0]:
            title = cr_result["title"][0]
        elif cr_result["title"]:
            title = cr_result["title"].strip()

        else:
            title = None
    else:
        title = None

    # Publication type
    if cr_result["type"] == 'journal-article':
        pubtype = BIBO.AcademicArticle
    elif cr_result["type"] == 'book-chapter':
        pubtype = BIBO.Chapter
    elif cr_result["type"] == 'dataset':
        pubtype = VIVO.Dataset
    elif cr_result["type"] == 'proceedings-article':
        pubtype = VIVO.ConferencePaper
    elif cr_result["type"] == 'abstract':
        pubtype = VIVO.Abstract
    else:
        pubtype = URIRef(raw_input('Unknown publication type for {}.'
                                   ' Enter a valid URI for the type'
                                   .format(doi)))

    # Choose the longer (hopefully non-abbreviated) title
    journal = (max(cr_result["container-title"], key=len) if
               "container-title" in cr_result and
               cr_result["container-title"] else None)
    if journal:
        if journal in journal_list:
            journal_uri = journal_list[journal]

            print 'found existing '+journal
            g.add((D[pub_uri], VIVO.hasPublicationVenue,
                  URIRef(journal_uri)))

        else:
            # publisher_list = get_publishers()
            # raw_input(publisher_list)

            journal_uri = D[uri_gen('n', g)]
            journal_list[journal] = str(journal_uri)

            g.add((D[pub_uri], VIVO.hasPublicationVenue,
                  URIRef(journal_uri)))
            if pubtype == VIVO.ConferencePaper:
                g.add((URIRef(journal_uri), RDF.type, BIBO.Proceedings))
            elif pubtype == BIBO.Chapter:
                g.add((URIRef(journal_uri), RDF.type, BIBO.Book))
            else:
                g.add((URIRef(journal_uri), RDF.type, BIBO.Journal))
            g.add((URIRef(journal_uri), RDFS.label, Literal(journal)))

            if "publisher" in cr_result:
                publisher = cr_result["publisher"]
                if publisher in publisher_list:
                    publisher_uri = publisher_list[publisher]
                else:
                    publisher_uri = D[uri_gen('n', g)]
                    g.add(((URIRef(publisher_uri)), RDF.type, VIVO.Publisher))
                    g.add(((URIRef(publisher_uri)), RDFS.label,
                          Literal(publisher)))
                    publisher_list[publisher] = str(publisher_uri)
                    print('Created new publisher "' + publisher + '"')
                g.add(((URIRef(journal_uri)), VIVO.publisher,
                      URIRef(publisher_uri)))

            print 'Made new '+journal

    issue = cr_result["issue"] if "issue" in cr_result else None
    volume = cr_result["volume"] if "volume" in cr_result else None
    pages = (cr_result["page"] if "page" in cr_result and 'n/a' not in
             cr_result["page"] else None)

    # Authors
    authors = (parse_authors(cr_result) if "author" in cr_result
               else None)

    date = parse_publication_date(cr_result)

    # Publication date
    if date:
        (publication_year, publication_month, publication_day) = date
    else:
        (publication_year, publication_month, publication_day) = (None,
                                                                  None,
                                                                  None)

    date_uri = uri_gen('n', g)
    g.add((D[pub_uri], VIVO.dateTimeValue, D[date_uri]))
    add_date(D[date_uri], publication_year, g, publication_month,
             publication_day)

    # Add things to the graph
    if pubtype:
        g.add((D[pub_uri], RDF.type, pubtype))
    if doi:
        g.add((D[pub_uri], BIBO.doi, Literal(doi)))
    if issue:
        g.add((D[pub_uri], BIBO.issue, Literal(issue)))
    if volume:
        g.add((D[pub_uri], BIBO.volume, Literal(volume)))
    if title:
        g.add((D[pub_uri], RDFS.label, Literal(title)))

    # Loop through the list of authors, trying to check for existing
    # authors in the database
    if authors:
        for idx, (first_name, surname) in enumerate(authors):
            full_name = join_if_not_empty((first_name, surname))
            rank = idx+1
            if full_name in matchlist[0]:
                pos = matchlist[0].index(full_name)
                assign_authorship(matchlist[1][pos], g, pub_uri,
                                  full_name, matchlist, rank)
            else:
                roll = name_lookup(surname)
                matchlist = name_selecter(roll, full_name, g,
                                          first_name, surname, pub_uri,
                                          matchlist, rank)

    # subjects
    if subjects:
        for subject in subjects:
            # NEED TO FIND SUBJECT IN VIVO
            concept_uri = get_subject(subject, g)

            if concept_uri:
                # print 'found existing '+subject
                g.add((D[pub_uri], VIVO.hasSubjectArea,
                      URIRef(concept_uri)))
            elif subject in subjectlist[0]:
                # print 'already made a new one this round '+subject
                match = subjectlist[0].index(subject)
                subject_uri = subjectlist[1][match]
                g.add((D[pub_uri], VIVO.hasSubjectArea,
                      D[subject_uri]))
            else:
                # print 'made new '+subject
                subject_uri = uri_gen('sub', g)
                subjectlist[0].append(subject)
                subjectlist[1].append(subject_uri)
                g.add((D[pub_uri], VIVO.hasSubjectArea,
                      D[subject_uri]))
                g.add((D[subject_uri], RDF.type, SKOS.Concept))
                g.add((D[subject_uri], RDFS.label, Literal(subject)))

    if pages:
        pages = pages.split("-")
        startpage = pages[0]
        g.add((D[pub_uri], BIBO.pageStart, Literal(startpage)))
        if len(pages) > 1:
            endpage = pages[1]
            g.add((D[pub_uri], BIBO.pageEnd, Literal(endpage)))
        else:
            endpage = None
Пример #3
0
def crosswalk_bio(orcid_profile, person_uri, graph, skip_person=False, person_class=FOAF.Person,
                  existing_vcard_uri=None, skip_name_vcard=False):

    #Get names (for person and name vcard)
    person_details = orcid_profile["orcid-profile"]["orcid-bio"].get("personal-details", {})
    given_names = person_details.get("given-names", {}).get("value")
    family_name = person_details.get("family-name", {}).get("value")
    full_name = join_if_not_empty((given_names, family_name))

    #Following is non-vcard bio information

    #If skip_person, then don't create person and add names
    if not skip_person:
        #Add person
        graph.add((person_uri, RDF.type, person_class))
        graph.add((person_uri, RDFS.label, Literal(full_name)))

    #Biography
    biography = (orcid_profile["orcid-profile"]["orcid-bio"].get("biography") or {}).get("value")
    if biography:
        graph.add((person_uri, VIVO.overview, Literal(biography)))

    #Other identifiers
    #Default VIVO-ISF only supports a limited number of identifier types.
    external_identifiers = \
        (orcid_profile["orcid-profile"]["orcid-bio"].get("external-identifiers", {}) or {}).get("external-identifier", [])
    for external_identifier in external_identifiers:
        #Scopus ID
        if external_identifier["external-id-common-name"]["value"] == "Scopus Author ID":
            graph.add((person_uri, VIVO.scopusId, Literal(external_identifier["external-id-reference"]["value"])))

        #ISI Research ID
        if external_identifier["external-id-common-name"]["value"] == "ResearcherID":
            graph.add((person_uri, VIVO.researcherId, Literal(external_identifier["external-id-reference"]["value"])))

    #Keywords
    keywords =  \
        (orcid_profile["orcid-profile"]["orcid-bio"].get("keywords", {}) or {}).get("keyword", [])
    for keyword in keywords:
        graph.add((person_uri, VIVO.freetextKeyword, Literal(keyword["value"])))

    #Following is vcard bio information

    #Add main vcard
    vcard_uri = existing_vcard_uri or person_uri + "-vcard"
    #Will only add vcard if there is a child vcard
    add_main_vcard = False

    if not skip_name_vcard and (given_names or family_name):
        #Name vcard
        vcard_name_uri = person_uri + "-vcard-name"
        graph.add((vcard_name_uri, RDF.type, VCARD.Name))
        graph.add((vcard_uri, VCARD.hasName, vcard_name_uri))
        if given_names:
            graph.add((vcard_name_uri, VCARD.givenName, Literal(given_names)))
        if family_name:
            graph.add((vcard_name_uri, VCARD.familyName, Literal(family_name)))
        add_main_vcard = True

    #Websites
    researcher_urls = \
        (orcid_profile["orcid-profile"]["orcid-bio"].get("researcher-urls", {}) or {}).get("researcher-url", [])
    for index, researcher_url in enumerate(researcher_urls):
        url = researcher_url["url"]["value"]
        url_name = (researcher_url["url-name"] or {}).get("value")
        vcard_website_uri = person_uri + "-vcard-website" + str(index)
        graph.add((vcard_website_uri, RDF.type, VCARD.URL))
        graph.add((vcard_uri, VCARD.hasURL, vcard_website_uri))
        graph.add((vcard_website_uri, VCARD.url, Literal(url, datatype=XSD.anyURI)))
        if url_name:
            graph.add((vcard_website_uri, RDFS.label, Literal(url_name)))

    if add_main_vcard and not existing_vcard_uri:
        graph.add((vcard_uri, RDF.type, VCARD.Individual))
        #Contact info for
        graph.add((vcard_uri, OBO.ARG_2000029, person_uri))
Пример #4
0
    def crosswalk(self, orcid_profile, person_uri, graph, person_class=FOAF.Person):

        # Get names (for person and name vcard)
        given_names = None
        family_name = None
        if "name" in orcid_profile["person"]:
            person_details = orcid_profile["person"]["name"]
            given_names = person_details.get("given-names", {}).get("value")
            family_name = person_details.get("family-name", {}).get("value")
            full_name = join_if_not_empty((given_names, family_name))

            # Following is non-vcard bio information

            # If skip_person, then don't create person and add names
            if full_name and self.create_strategy.should_create(person_class, person_uri):
                # Add person
                graph.add((person_uri, RDF.type, person_class))
                graph.add((person_uri, RDFS.label, Literal(full_name)))

        # Biography
        if "biography" in orcid_profile["person"]:
            biography = orcid_profile["person"]["biography"]["content"]
            if biography:
                graph.add((person_uri, VIVO.overview, Literal(biography)))

        # Other identifiers
        # Default VIVO-ISF only supports a limited number of identifier types.
        if "external-identifiers" in orcid_profile["person"]:
            external_identifiers = orcid_profile["person"]["external-identifiers"]["external-identifier"]
            for external_identifier in external_identifiers:
                # Scopus ID
                if external_identifier["external-id-type"] == "Scopus Author ID":
                    graph.add((person_uri, VIVO.scopusId, Literal(external_identifier["external-id-value"])))

                # ISI Research ID
                if external_identifier["external-id-type"] == "ResearcherID":
                    graph.add((person_uri, VIVO.researcherId, Literal(external_identifier["external-id-value"])))

        # Keywords
        if "keywords" in orcid_profile["person"]:
            keywords = orcid_profile["person"]["keywords"]["keyword"]
            for keyword in keywords:
                keywords_content = keyword["content"]
                if keywords_content:
                    for keyword_content in keywords_content.split(", "):
                        graph.add((person_uri, VIVO.freetextKeyword, Literal(keyword_content)))

        # Following is vcard bio information

        # Add main vcard
        vcard_uri = self.identifier_strategy.to_uri(VCARD.Individual, {"person_uri": person_uri})
        # Will only add vcard if there is a child vcard
        add_main_vcard = False

        # Name vcard
        vcard_name_uri = self.identifier_strategy.to_uri(VCARD.Name, {"person_uri": person_uri})
        if (given_names or family_name) and self.create_strategy.should_create(VCARD.Name, vcard_name_uri):
            graph.add((vcard_name_uri, RDF.type, VCARD.Name))
            graph.add((vcard_uri, VCARD.hasName, vcard_name_uri))
            if given_names:
                graph.add((vcard_name_uri, VCARD.givenName, Literal(given_names)))
            if family_name:
                graph.add((vcard_name_uri, VCARD.familyName, Literal(family_name)))
            add_main_vcard = True

        # Websites
        if "researcher-urls" in orcid_profile["person"]:
            researcher_urls = orcid_profile["person"]["researcher-urls"]["researcher-url"]
            for researcher_url in researcher_urls:
                url = researcher_url["url"]["value"]
                url_name = researcher_url["url-name"]
                vcard_website_uri = self.identifier_strategy.to_uri(VCARD.URL, {"url": url})
                graph.add((vcard_website_uri, RDF.type, VCARD.URL))
                graph.add((vcard_uri, VCARD.hasURL, vcard_website_uri))
                graph.add((vcard_website_uri, VCARD.url, Literal(url, datatype=XSD.anyURI)))
                if url_name:
                    graph.add((vcard_website_uri, RDFS.label, Literal(url_name)))
                add_main_vcard = True

        if add_main_vcard and self.create_strategy.should_create(VCARD.Individual, vcard_uri):
            graph.add((vcard_uri, RDF.type, VCARD.Individual))
            # Contact info for
            graph.add((vcard_uri, OBO.ARG_2000029, person_uri))
Пример #5
0
    def crosswalk(self,
                  orcid_profile,
                  person_uri,
                  graph,
                  person_class=FOAF.Person):

        # Get names (for person and name vcard)
        given_names = None
        family_name = None
        if "name" in orcid_profile["person"]:
            person_details = orcid_profile["person"]["name"]
            given_names = person_details.get("given-names", {}).get("value")
            family_name = person_details.get("family-name", {}).get("value")
            full_name = join_if_not_empty((given_names, family_name))

            # Following is non-vcard bio information

            # If skip_person, then don't create person and add names
            if full_name and self.create_strategy.should_create(
                    person_class, person_uri):
                # Add person
                graph.add((person_uri, RDF.type, person_class))
                graph.add((person_uri, RDFS.label, Literal(full_name)))

        # Biography
        if "biography" in orcid_profile["person"]:
            biography = orcid_profile["person"]["biography"]["content"]
            if biography:
                graph.add((person_uri, VIVO.overview, Literal(biography)))

        # Other identifiers
        # Default VIVO-ISF only supports a limited number of identifier types.
        if "external-identifiers" in orcid_profile["person"]:
            external_identifiers = orcid_profile["person"][
                "external-identifiers"]["external-identifier"]
            for external_identifier in external_identifiers:
                # Scopus ID
                if external_identifier[
                        "external-id-type"] == "Scopus Author ID":
                    graph.add(
                        (person_uri, VIVO.scopusId,
                         Literal(external_identifier["external-id-value"])))

                # ISI Research ID
                if external_identifier["external-id-type"] == "ResearcherID":
                    graph.add(
                        (person_uri, VIVO.researcherId,
                         Literal(external_identifier["external-id-value"])))

        # Keywords
        if "keywords" in orcid_profile["person"]:
            keywords = orcid_profile["person"]["keywords"]["keyword"]
            for keyword in keywords:
                keywords_content = keyword["content"]
                if keywords_content:
                    for keyword_content in keywords_content.split(", "):
                        graph.add((person_uri, VIVO.freetextKeyword,
                                   Literal(keyword_content)))

        # Following is vcard bio information

        # Add main vcard
        vcard_uri = self.identifier_strategy.to_uri(VCARD.Individual,
                                                    {"person_uri": person_uri})
        # Will only add vcard if there is a child vcard
        add_main_vcard = False

        # Name vcard
        vcard_name_uri = self.identifier_strategy.to_uri(
            VCARD.Name, {"person_uri": person_uri})
        if (given_names or family_name) and self.create_strategy.should_create(
                VCARD.Name, vcard_name_uri):
            graph.add((vcard_name_uri, RDF.type, VCARD.Name))
            graph.add((vcard_uri, VCARD.hasName, vcard_name_uri))
            if given_names:
                graph.add(
                    (vcard_name_uri, VCARD.givenName, Literal(given_names)))
            if family_name:
                graph.add(
                    (vcard_name_uri, VCARD.familyName, Literal(family_name)))
            add_main_vcard = True

        # Websites
        if "researcher-urls" in orcid_profile["person"]:
            researcher_urls = orcid_profile["person"]["researcher-urls"][
                "researcher-url"]
            for researcher_url in researcher_urls:
                url = researcher_url["url"]["value"]
                url_name = researcher_url["url-name"]
                vcard_website_uri = self.identifier_strategy.to_uri(
                    VCARD.URL, {"url": url})
                graph.add((vcard_website_uri, RDF.type, VCARD.URL))
                graph.add((vcard_uri, VCARD.hasURL, vcard_website_uri))
                graph.add((vcard_website_uri, VCARD.url,
                           Literal(url, datatype=XSD.anyURI)))
                if url_name:
                    graph.add(
                        (vcard_website_uri, RDFS.label, Literal(url_name)))
                add_main_vcard = True

        if add_main_vcard and self.create_strategy.should_create(
                VCARD.Individual, vcard_uri):
            graph.add((vcard_uri, RDF.type, VCARD.Individual))
            # Contact info for
            graph.add((vcard_uri, OBO.ARG_2000029, person_uri))
Пример #6
0
 def _get_orcid_title(work):
     return join_if_not_empty(
         (work["title"]["title"]["value"],
          (work["title"].get("subtitle") or {}).get("value")), ": ")
Пример #7
0
    def crosswalk_work(self, work, person_uri, person_surname, graph):
        # Work metadata may be available from the orcid profile, bibtex contained in the orcid profile, and/or crossref
        # record. The preferred order (in general) for getting metadata is crossref, bibtex, orcid.

        # Note that datacite records were considered, but not found to have additional/better metadata.

        # Work Type
        work_type = work["type"]
        if work_type in work_type_map:
            # Extract
            # Get external identifiers so that can get DOI
            external_identifiers = WorksCrosswalk._get_work_identifiers(work)
            doi = external_identifiers.get("DOI")
            crossref_record = WorksCrosswalk._fetch_crossref_doi(
                doi) if doi else {}

            # Bibtex
            bibtex = WorksCrosswalk._parse_bibtex(work)
            # Get title so that can construct work uri
            title = WorksCrosswalk._get_crossref_title(
                crossref_record) or bibtex.get(
                    "title") or WorksCrosswalk._get_orcid_title(work)

            # Work-type
            work_class = work_type_map[work_type]
            if work_type == "TRANSLATION" and bibtex and bibtex[
                    "ENTRYTYPE"] in bibtex_type_map:
                work_class = bibtex_type_map[bibtex["ENTRYTYPE"]]

            # Construct work uri
            work_uri = self.identifier_strategy.to_uri(work_class,
                                                       {"name": title})

            graph.add((work_uri, RDF.type, work_class))

            # Title
            graph.add((work_uri, RDFS.label, Literal(title)))

            # Publication date
            (publication_year, publication_month, publication_day) = \
                WorksCrosswalk._get_crossref_publication_date(crossref_record) \
                or WorksCrosswalk._get_orcid_publication_date(work) \
                or WorksCrosswalk._get_bibtext_publication_date(bibtex) or (None, None, None)
            date_uri = add_date(publication_year, graph,
                                self.identifier_strategy, publication_month,
                                publication_day)
            if date_uri:
                graph.add((work_uri, VIVO.dateTimeValue, date_uri))

            # Subjects
            subjects = crossref_record[
                "subject"] if crossref_record and "subject" in crossref_record else None
            if subjects:
                for subject in subjects:
                    subject_uri = self.identifier_strategy.to_uri(
                        SKOS.Concept, {"name": subject})
                    graph.add((work_uri, VIVO.hasSubjectArea, subject_uri))
                    if self.create_strategy.should_create(
                            SKOS.Concept, subject_uri):
                        graph.add((subject_uri, RDF.type, SKOS.Concept))
                        graph.add((subject_uri, RDFS.label, Literal(subject)))

            # Contributors (an array of (first_name, surname, VIVO type, e.g., VIVO.Authorship))
            bibtex_contributors = []
            bibtex_contributors.extend(
                WorksCrosswalk._get_bibtex_authors(bibtex))
            bibtex_contributors.extend(
                WorksCrosswalk._get_bibtex_editors(bibtex))
            # Orcid is better for translations because has translator role
            if work_type == "TRANSLATION":
                contributors = WorksCrosswalk._get_orcid_contributors(work)
            else:
                contributors = WorksCrosswalk._get_crossref_authors(crossref_record) or bibtex_contributors \
                               or WorksCrosswalk._get_orcid_contributors(work)
            if not contributors:
                # Add person as author or editor.
                # None, None means this person.
                if work_type in ("EDITED_BOOK", ):
                    contributors.append((None, None, VIVO.Editorship))
                elif work_type == "TRANSLATION":
                    # Translator is a predicate, not a -ship class.
                    contributors.append((None, None, "TRANSLATOR"))
                else:
                    contributors.append((None, None, VIVO.Authorship))

            for (first_name, surname, vivo_type) in contributors:
                if not surname or person_surname.lower() == surname.lower():
                    contributor_uri = person_uri
                else:
                    contributor_uri = self.identifier_strategy.to_uri(
                        FOAF.Person, {
                            "first_name": first_name,
                            "surname": surname
                        })
                    if self.create_strategy.should_create(
                            FOAF.Person, contributor_uri):
                        graph.add((contributor_uri, RDF.type, FOAF.Person))
                        full_name = join_if_not_empty((first_name, surname))
                        graph.add(
                            (contributor_uri, RDFS.label, Literal(full_name)))

                # Translation is a special case
                if vivo_type == "TRANSLATOR":
                    graph.add((contributor_uri, BIBO.translator, work_uri))
                # So is patent assignee
                elif work_type == "PATENT":
                    graph.add((contributor_uri, VIVO.assigneeFor, work_uri))
                else:
                    contributorship_uri = self.identifier_strategy.to_uri(
                        vivo_type, {
                            "contributor_uri": contributor_uri,
                            "work_uri": work_uri
                        })
                    graph.add((contributorship_uri, RDF.type, vivo_type))
                    graph.add((contributorship_uri, VIVO.relates, work_uri))
                    graph.add(
                        (contributorship_uri, VIVO.relates, contributor_uri))

            # Publisher
            publisher = crossref_record.get("publisher") or bibtex.get(
                "publisher")
            if publisher:
                publisher_uri = self.identifier_strategy.to_uri(
                    FOAF.Organization, {"name": publisher})
                graph.add((work_uri, VIVO.publisher, publisher_uri))
                if self.create_strategy.should_create(FOAF.Organization,
                                                      publisher_uri):
                    graph.add((publisher_uri, RDF.type, FOAF.Organization))
                    graph.add((publisher_uri, RDFS.label, Literal(publisher)))

            # Volume
            volume = crossref_record.get("volume") or bibtex.get("volume")
            if volume:
                graph.add((work_uri, BIBO.volume, Literal(volume)))

            # Issue
            issue = crossref_record.get("issue") or bibtex.get("number")
            if issue:
                graph.add((work_uri, BIBO.issue, Literal(issue)))

            # Pages
            pages = crossref_record.get("page") or bibtex.get("pages")
            start_page = None
            end_page = None
            if pages and "-" in pages:
                (start_page, end_page) = re.split(" *-+ *", pages, maxsplit=2)
            if start_page:
                graph.add((work_uri, BIBO.pageStart, Literal(start_page)))
            if end_page:
                graph.add((work_uri, BIBO.pageEnd, Literal(end_page)))

            # Identifiers
            # Add doi in bibtex, but not orcid profile
            if bibtex and "doi" in bibtex and "DOI" not in external_identifiers:
                external_identifiers["DOI"] = bibtex["doi"]
            # Add isbn in bibtex, but not orcid profile
            if bibtex and "isbn" in bibtex and "ISBN" not in external_identifiers:
                external_identifiers["ISBN"] = bibtex["isbn"]

            for identifier_type, identifier in external_identifiers.iteritems(
            ):
                identifier_url = None
                if identifier_type in ("PAT",
                                       "OTHER-ID") and work_type == "PATENT":
                    identifier_predicate = VIVO.patentNumber
                elif identifier_type == "ISBN":
                    clean_isbn = identifier.replace("-", "")
                    if len(clean_isbn) <= 10:
                        identifier_predicate = BIBO.isbn10
                    else:
                        identifier_predicate = BIBO.isbn13
                else:
                    (identifier_predicate, url_template) = identifier_map.get(
                        identifier_type, (None, None))
                    if url_template:
                        identifier_url = url_template % identifier

                if identifier_predicate:
                    graph.add(
                        (work_uri, identifier_predicate, Literal(identifier)))
                if identifier_url:
                    self._add_work_url(identifier_url, work_uri, graph)

            orcid_url = (work.get("url", {}) or {}).get("value")
            if orcid_url and WorksCrosswalk._use_url(orcid_url):
                self._add_work_url(orcid_url, work_uri, graph)
            bibtex_url = bibtex.get("link")
            if bibtex_url and WorksCrosswalk._use_url(
                    bibtex_url) and orcid_url != bibtex_url:
                self._add_work_url(bibtex_url, work_uri, graph)

            # Series
            series = bibtex.get("series")
            # TODO: Figure out how to model series in VIVO-ISF.

            # Journal
            # If Crossref has a journal use it
            journal = WorksCrosswalk._get_crossref_journal(crossref_record)
            issns = []
            if journal:
                issns = crossref_record.get("ISSN", [])
            # Otherwise, only use for some work types.
            elif work_type in journal_map:
                journal = bibtex.get("journal")
                if journal:
                    if "issn" in bibtex:
                        issns = [bibtex["issn"]]
                else:
                    journal = (work.get("journal-title", {})
                               or {}).get("value")

            if journal:
                journal_class = journal_map.get(work_type, BIBO.Journal)
                journal_uri = self.identifier_strategy.to_uri(
                    journal_class, {"name": journal})
                graph.add((work_uri, VIVO.hasPublicationVenue, journal_uri))
                if self.create_strategy.should_create(journal_class,
                                                      journal_uri):
                    graph.add((journal_uri, RDF.type, journal_class))
                    graph.add((journal_uri, RDFS.label, Literal(journal)))
                    for issn in issns:
                        graph.add((journal_uri, BIBO.issn, Literal(issn)))

            if work_type in ("BOOK_CHAPTER", ):
                book_title = bibtex.get("booktitle")
                if book_title:
                    book_uri = self.identifier_strategy.to_uri(
                        BIBO.Book, {"name": book_title})
                    graph.add((work_uri, VIVO.hasPublicationVenue, book_uri))
                    if self.create_strategy.should_create(BIBO.Book, book_uri):
                        graph.add((book_uri, RDF.type, BIBO.Book))
                        graph.add((book_uri, RDFS.label, Literal(book_title)))

            if work_type in ("CONFERENCE_PAPER", ):
                proceeding = bibtex.get("journal") or (work.get(
                    "journal-title", {}) or {}).get("value")
                if proceeding:
                    proceeding_uri = self.identifier_strategy.to_uri(
                        BIBO.Proceedings, {"name": proceeding})
                    graph.add(
                        (work_uri, VIVO.hasPublicationVenue, proceeding_uri))
                    if self.create_strategy.should_create(
                            BIBO.Proceedings, proceeding_uri):
                        graph.add((proceeding_uri, RDF.type, BIBO.Proceedings))
                        graph.add(
                            (proceeding_uri, RDFS.label, Literal(proceeding)))
Пример #8
0
def crosswalk_works(orcid_profile, person_uri, graph, subjectlist, journlist, orglist):

    person_surname = orcid_profile["orcid-profile"]["orcid-bio"]["personal-details"]["family-name"]["value"]

    #Publications
    if "orcid-works" in orcid_profile["orcid-profile"]["orcid-activities"] \
            and orcid_profile["orcid-profile"]["orcid-activities"]["orcid-works"] \
            and "orcid-work" in orcid_profile["orcid-profile"]["orcid-activities"]["orcid-works"]:
        works = orcid_profile["orcid-profile"]["orcid-activities"]["orcid-works"]["orcid-work"]
        for work in works:
            ##Extract
            #Get external identifiers so that can get DOI
            external_identifiers = _get_work_identifiers(work)
            doi = external_identifiers.get("DOI")
            doi_record = fetch_crossref_doi(doi) if doi else None

            #Bibtex
            bibtex = _parse_bibtex(work)

            #Work Type
            work_type = work["work-type"]

            #Title
            title = work["work-title"]["title"]["value"]

            work_uri = ns.D[to_hash_identifier(PREFIX_DOCUMENT, (title, work_type))]

            #Publication date
            (publication_year, publication_month, publication_day) = _get_doi_publication_date(doi_record) \
                if doi_record else _get_publication_date(work)

            #Subjects
            subjects = doi_record["subject"] if doi_record and "subject" in doi_record else None

            #Authors
            authors = _get_doi_authors(doi_record) if doi_record else None
            #TODO: Get from ORCID profile if no doi record

            #Publisher
            publisher = bibtex.get("publisher")

            ##Add triples
            #Title
            graph.add((work_uri, RDFS.label, Literal(title)))
            #Person (via Authorship)
            authorship_uri = work_uri + "-auth"
            graph.add((authorship_uri, RDF.type, VIVO.Authorship))
            graph.add((authorship_uri, VIVO.relates, work_uri))
            graph.add((authorship_uri, VIVO.relates, person_uri))
            #Other authors
            if authors:
                for (first_name, surname) in authors:
                    if not person_surname.lower() == surname.lower():
                        author_uri = ns.D[to_hash_identifier(PREFIX_PERSON, (first_name, surname))]
                        graph.add((author_uri, RDF.type, FOAF.Person))
                        full_name = join_if_not_empty((first_name, surname))
                        graph.add((author_uri, RDFS.label, Literal(full_name)))

                        authorship_uri = author_uri + "-auth"
                        graph.add((authorship_uri, RDF.type, VIVO.Authorship))
                        graph.add((authorship_uri, VIVO.relates, work_uri))
                        graph.add((authorship_uri, VIVO.relates, author_uri))

            #Date
            date_uri = work_uri + "-date"
            graph.add((work_uri, VIVO.dateTimeValue, date_uri))
            add_date(date_uri, publication_year, graph, publication_month, publication_day)
            #Subjects
            if subjects:
                for subject in subjects:
                    if subject in subjectlist[0]:
                        match = subjectlist[0].index(subject)
                        subject_uri = subjectlist[1][match]
                        graph.add((work_uri, VIVO.hasSubjectArea, subject_uri))
                    else:
                        subject_uri = ns.D[to_hash_identifier("sub", (subject,))]
                        subjectlist[0].append(subject)
                        subjectlist[1].append(subject_uri)
                        graph.add((work_uri, VIVO.hasSubjectArea, subject_uri))
                        graph.add((subject_uri, RDF.type, SKOS.Concept))
                        graph.add((subject_uri, RDFS.label, Literal(subject)))
            #Identifier
            if doi:
                graph.add((work_uri, BIBO.doi, Literal(doi)))
                #Also add as a website
                identifier_url = "http://dx.doi.org/%s" % doi
                vcard_uri = ns.D[to_hash_identifier("vcard", (identifier_url,))]
                graph.add((vcard_uri, RDF.type, VCARD.Kind))
                #Has contact info
                graph.add((work_uri, OBO.ARG_2000028, vcard_uri))
                #Url vcard
                vcard_url_uri = vcard_uri + "-url"
                graph.add((vcard_url_uri, RDF.type, VCARD.URL))
                graph.add((vcard_uri, VCARD.hasURL, vcard_url_uri))
                graph.add((vcard_url_uri, VCARD.url, Literal(identifier_url, datatype=XSD.anyURI)))

            #Publisher
            if publisher:
                if publisher in orglist[0]:
                    match = orglist[0].index(publisher)
                    publisher_uri = orglist[1][match]
                    graph.add((work_uri, VIVO.publisher, publisher_uri))

                else:
                    publisher_uri = ns.D[to_hash_identifier(PREFIX_ORGANIZATION, (publisher,))]
                    orglist[0].append(publisher)
                    orglist[1].append(publisher_uri)
                    graph.add((publisher_uri, RDF.type, FOAF.Organization))
                    graph.add((publisher_uri, RDFS.label, Literal(publisher)))
                    graph.add((work_uri, VIVO.publisher, publisher_uri))

            if work_type == "JOURNAL_ARTICLE":
                ##Extract
                #Journal
                journal = bibtex.get("journal")
                #Volume
                volume = bibtex.get("volume")
                #Number
                number = bibtex.get("number")
                #Pages
                pages = bibtex.get("pages")
                start_page = None
                end_page = None
                if pages and "-" in pages:
                    (start_page, end_page) = re.split(" *-+ *", pages, maxsplit=2)

                ##Add triples
                #Type
                graph.add((work_uri, RDF.type, BIBO.AcademicArticle))
                #Journal
                if journal:
                    if journal in journlist[0]:
                        match = journlist[0].index(journal)
                        journal_uri = journlist[1][match]
                        graph.add((work_uri, VIVO.hasPublicationVenue, journal_uri))
                    else:
                        journal_uri = ns.D[to_hash_identifier(PREFIX_JOURNAL, (BIBO.Journal, journal))]
                        journlist[0].append(journal)
                        journlist[1].append(journal_uri)
                        graph.add((journal_uri, RDF.type, BIBO.Journal))
                        graph.add((journal_uri, RDFS.label, Literal(journal)))
                        graph.add((work_uri, VIVO.hasPublicationVenue, journal_uri))

                #Volume
                if volume:
                    graph.add((work_uri, BIBO.volume, Literal(volume)))
                #Number
                if number:
                    graph.add((work_uri, BIBO.issue, Literal(number)))
                #Pages
                if start_page:
                    graph.add((work_uri, BIBO.pageStart, Literal(start_page)))
                if end_page:
                    graph.add((work_uri, BIBO.pageEnd, Literal(end_page)))

            elif work_type == "BOOK":
                ##Add triples
                #Type
                graph.add((work_uri, RDF.type, BIBO.Book))
            elif work_type == "DATA_SET":
                ##Add triples
                #Type
                graph.add((work_uri, RDF.type, VIVO.Dataset))
Пример #9
0
    def crosswalk(self, orcid_profile, person_uri, graph):
        # Work metadata may be available from the orcid profile, bibtex contained in the orcid profile, and/or crossref
        # record. The preferred order (in general) for getting metadata is crossref, bibtex, orcid.

        # Note that datacite records were considered, but not found to have additional/better metadata.

        #Publications
        for work in ((orcid_profile["orcid-profile"].get("orcid-activities") or {}).get("orcid-works") or {})\
                .get("orcid-work", []):
            #Work Type
            work_type = work["work-type"]
            if work_type in work_type_map:
                ##Extract
                #Get external identifiers so that can get DOI
                external_identifiers = WorksCrosswalk._get_work_identifiers(work)
                doi = external_identifiers.get("DOI")
                crossref_record = WorksCrosswalk._fetch_crossref_doi(doi) if doi else {}

                #Bibtex
                bibtex = WorksCrosswalk._parse_bibtex(work)
                #Get title so that can construct work uri
                title = WorksCrosswalk._get_crossref_title(crossref_record) \
                    or bibtex.get("title") \
                    or WorksCrosswalk._get_orcid_title(work)

                #Work-type
                work_class = work_type_map[work_type]
                if work_type == "TRANSLATION" and bibtex and bibtex["ENTRYTYPE"] in bibtex_type_map:
                    work_class = bibtex_type_map[bibtex["ENTRYTYPE"]]

                #Construct work uri
                work_uri = self.identifier_strategy.to_uri(work_class, {"name": title})

                graph.add((work_uri, RDF.type, work_class))

                #Title
                graph.add((work_uri, RDFS.label, Literal(title)))

                #Publication date
                (publication_year, publication_month, publication_day) = \
                    WorksCrosswalk._get_crossref_publication_date(crossref_record) \
                    or WorksCrosswalk._get_orcid_publication_date(work) \
                    or WorksCrosswalk._get_bibtext_publication_date(bibtex) or (None, None, None)
                date_uri = add_date(publication_year, graph, self.identifier_strategy,
                                    publication_month, publication_day)
                if date_uri:
                    graph.add((work_uri, VIVO.dateTimeValue, date_uri))

                #Subjects
                subjects = crossref_record["subject"] if crossref_record and "subject" in crossref_record else None
                if subjects:
                    for subject in subjects:
                        subject_uri = self.identifier_strategy.to_uri(SKOS.Concept, {"name": subject})
                        graph.add((work_uri, VIVO.hasSubjectArea, subject_uri))
                        if self.create_strategy.should_create(SKOS.Concept, subject_uri):
                            graph.add((subject_uri, RDF.type, SKOS.Concept))
                            graph.add((subject_uri, RDFS.label, Literal(subject)))

                #Contributors (an array of (first_name, surname, VIVO type, e.g., VIVO.Authorship))
                bibtex_contributors = []
                bibtex_contributors.extend(WorksCrosswalk._get_bibtex_authors(bibtex))
                bibtex_contributors.extend(WorksCrosswalk._get_bibtex_editors(bibtex))
                #Orcid is better for translations because has translator role
                if work_type == "TRANSLATION":
                    contributors = WorksCrosswalk._get_orcid_contributors(work)
                else:
                    contributors = WorksCrosswalk._get_crossref_authors(crossref_record) or bibtex_contributors \
                        or WorksCrosswalk._get_orcid_contributors(work)
                person_surname = orcid_profile["orcid-profile"]["orcid-bio"]["personal-details"]["family-name"]["value"]
                if not contributors:
                    #Add person as author or editor.
                    #None, None means this person.
                    if work_type in ("EDITED_BOOK",):
                        contributors.append((None, None, VIVO.Editorship))
                    elif work_type == "TRANSLATION":
                        #Translator is a predicate, not a -ship class.
                        contributors.append((None, None, "TRANSLATOR"))
                    else:
                        contributors.append((None, None, VIVO.Authorship))

                for (first_name, surname, vivo_type) in contributors:
                    if not surname or person_surname.lower() == surname.lower():
                        contributor_uri = person_uri
                    else:
                        contributor_uri = self.identifier_strategy.to_uri(FOAF.Person, {"first_name": first_name,
                                                                                        "surname": surname})
                        if self.create_strategy.should_create(FOAF.Person, contributor_uri):
                            graph.add((contributor_uri, RDF.type, FOAF.Person))
                            full_name = join_if_not_empty((first_name, surname))
                            graph.add((contributor_uri, RDFS.label, Literal(full_name)))

                    #Translation is a special case
                    if vivo_type == "TRANSLATOR":
                        graph.add((contributor_uri, BIBO.translator, work_uri))
                    #So is patent assignee
                    elif work_type == "PATENT":
                        graph.add((contributor_uri, VIVO.assigneeFor, work_uri))
                    else:
                        contributorship_uri = self.identifier_strategy.to_uri(vivo_type,
                                                                              {"contributor_uri": contributor_uri,
                                                                               "work_uri": work_uri})
                        graph.add((contributorship_uri, RDF.type, vivo_type))
                        graph.add((contributorship_uri, VIVO.relates, work_uri))
                        graph.add((contributorship_uri, VIVO.relates, contributor_uri))

                #Publisher
                publisher = crossref_record.get("publisher") or bibtex.get("publisher")
                if publisher:
                    publisher_uri = self.identifier_strategy.to_uri(FOAF.Organization, {"name": publisher})
                    graph.add((work_uri, VIVO.publisher, publisher_uri))
                    if self.create_strategy.should_create(FOAF.Organization, publisher_uri):
                        graph.add((publisher_uri, RDF.type, FOAF.Organization))
                        graph.add((publisher_uri, RDFS.label, Literal(publisher)))

                #Volume
                volume = crossref_record.get("volume") or bibtex.get("volume")
                if volume:
                    graph.add((work_uri, BIBO.volume, Literal(volume)))

                #Issue
                issue = crossref_record.get("issue") or bibtex.get("number")
                if issue:
                    graph.add((work_uri, BIBO.issue, Literal(issue)))

                #Pages
                pages = crossref_record.get("page") or bibtex.get("pages")
                start_page = None
                end_page = None
                if pages and "-" in pages:
                    (start_page, end_page) = re.split(" *-+ *", pages, maxsplit=2)
                if start_page:
                    graph.add((work_uri, BIBO.pageStart, Literal(start_page)))
                if end_page:
                    graph.add((work_uri, BIBO.pageEnd, Literal(end_page)))

                #Identifiers
                #Add doi in bibtex, but not orcid profile
                if bibtex and "doi" in bibtex and "DOI" not in external_identifiers:
                    external_identifiers["DOI"] = bibtex["doi"]
                #Add isbn in bibtex, but not orcid profile
                if bibtex and "isbn" in bibtex and "ISBN" not in external_identifiers:
                    external_identifiers["ISBN"] = bibtex["isbn"]

                for identifier_type, identifier in external_identifiers.iteritems():
                    identifier_url = None

                    if identifier_type in ("PAT", "OTHER_ID") and work_type == "PATENT":
                        identifier_predicate = VIVO.patentNumber
                    elif identifier_type == "ISBN":
                        clean_isbn = identifier.replace("-", "")
                        if len(clean_isbn) <= 10:
                            identifier_predicate = BIBO.isbn10
                        else:
                            identifier_predicate = BIBO.isbn13
                    else:
                        (identifier_predicate, url_template) = identifier_map.get(identifier_type, (None, None))
                        if url_template:
                            identifier_url = url_template % identifier

                    if identifier_predicate:
                        graph.add((work_uri, identifier_predicate, Literal(identifier)))
                    if identifier_url:
                        self._add_work_url(identifier_url, work_uri, graph)

                orcid_url = (work.get("url", {}) or {}).get("value")
                if orcid_url and WorksCrosswalk._use_url(orcid_url):
                    self._add_work_url(orcid_url, work_uri, graph)
                bibtex_url = bibtex.get("link")
                if bibtex_url and WorksCrosswalk._use_url(bibtex_url) and orcid_url != bibtex_url:
                    self._add_work_url(bibtex_url, work_uri, graph)

                #Series
                series = bibtex.get("series")
                #TODO: Figure out how to model series in VIVO-ISF.

                #Journal
                #If Crossref has a journal use it
                journal = WorksCrosswalk._get_crossref_journal(crossref_record)
                if journal:
                    issns = crossref_record.get("ISSN", [])
                #Otherwise, only use for some work types.
                elif work_type in journal_map:
                    issns = []
                    journal = bibtex.get("journal")
                    if journal:
                        if "issn" in bibtex:
                            issns = [bibtex["issn"]]
                    else:
                        journal = (work.get("journal-title", {}) or {}).get("value")

                if journal:
                    journal_class = journal_map.get(work_type, BIBO.Journal)
                    journal_uri = self.identifier_strategy.to_uri(journal_class, {"name": journal})
                    graph.add((work_uri, VIVO.hasPublicationVenue, journal_uri))
                    if self.create_strategy.should_create(journal_class, journal_uri):
                        graph.add((journal_uri, RDF.type, journal_class))
                        graph.add((journal_uri, RDFS.label, Literal(journal)))
                        for issn in issns:
                            graph.add((journal_uri, BIBO.issn, Literal(issn)))

                if work_type in ("BOOK_CHAPTER",):
                    book_title = bibtex.get("booktitle")
                    if book_title:
                        book_uri = self.identifier_strategy.to_uri(BIBO.Book, {"name": book_title})
                        graph.add((work_uri, VIVO.hasPublicationVenue, book_uri))
                        if self.create_strategy.should_create(BIBO.Book, book_uri):
                            graph.add((book_uri, RDF.type, BIBO.Book))
                            graph.add((book_uri, RDFS.label, Literal(book_title)))

                if work_type in ("CONFERENCE_PAPER",):
                    proceeding = bibtex.get("journal") or (work.get("journal-title", {}) or {}).get("value")
                    if proceeding:
                        proceeding_uri = self.identifier_strategy.to_uri(BIBO.Proceedings, {"name": proceeding})
                        graph.add((work_uri, VIVO.hasPublicationVenue, proceeding_uri))
                        if self.create_strategy.should_create(BIBO.Proceedings, proceeding_uri):
                            graph.add((proceeding_uri, RDF.type, BIBO.Proceedings))
                            graph.add((proceeding_uri, RDFS.label, Literal(proceeding)))
Пример #10
0
 def _get_orcid_title(work):
     return join_if_not_empty((work["work-title"]["title"]["value"],
                             (work["work-title"].get("subtitle") or {}).get("value")), ": ")