示例#1
0
    def orcid_author_works_get_parser(orcid):
        """
        Method to parse the author works from ORCID website into a dictionary object, given the orcid of the author
        """

        out_file = "data/orcid_author_works_get.xml"
        fout = open(out_file, "w")
        print >> fout, ORCID.orcid_author_works_get(orcid,
                                                    kind="xml").encode('utf-8')
        fout.close()

        tree = ET.parse(out_file)
        root_element = tree.getroot()
        ns = '{http://www.orcid.org/ns/orcid}'

        author = {'works': []}

        for child1 in root_element:
            if (child1.tag == ns + 'orcid-profile'):
                for child2 in child1:
                    if (child2.tag == ns + 'orcid-identifier'):
                        for child3 in child2:
                            if (child3.tag == ns + 'path'):
                                author['orcid'] = child3.text
                    elif (child2.tag == ns + 'orcid-activities'):
                        for child3 in child2:
                            if (child3.tag == ns + 'orcid-works'):
                                for child4 in child3:
                                    if (child4.tag == ns + 'orcid-work'):
                                        work = {
                                            'identifiers': [],
                                            'authorIDs': []
                                        }
                                        for child5 in child4:
                                            if (child5.tag == ns +
                                                    'work-title'):
                                                for child6 in child5:
                                                    if (child6.tag == ns +
                                                            'title'):
                                                        work[
                                                            'title'] = child6.text
                                            elif (child5.tag == ns +
                                                  'journal-title'):
                                                work[
                                                    'journalTitle'] = child5.text
                                            elif (child5.tag == ns +
                                                  'work-citation'):
                                                for child6 in child5:
                                                    if (child6.tag == ns +
                                                            'work-citation-type'
                                                        ):
                                                        work[
                                                            'work-citation-type'] = child6.text
                                                    elif (child6.tag == ns +
                                                          'citation'):
                                                        citation = child6.text

                                                if (work['work-citation-type']
                                                        == 'bibtex'):
                                                    work[
                                                        'authors'] = ORCID_Parser.get_authors_list_from_bibtex(
                                                            citation)
                                                elif (work[
                                                        'work-citation-type']
                                                      ==
                                                      'formatted-unspecified'):
                                                    work[
                                                        'authors'] = ORCID_Parser.get_authors_list_from_unformattedtext(
                                                            citation)
                                            elif (child5.tag == ns +
                                                  'publication-date'):
                                                for child6 in child5:
                                                    if (child6.tag == ns +
                                                            'year'):
                                                        work[
                                                            'year'] = child6.text
                                            elif (child5.tag == ns +
                                                  'work-external-identifiers'):
                                                for child6 in child5:
                                                    if (child6.tag == ns +
                                                            'work-external-identifier'
                                                        ):
                                                        identifier = {}
                                                        for child7 in child6:
                                                            if (child7.tag ==
                                                                    ns +
                                                                    'work-external-identifier-type'
                                                                ):
                                                                key = None
                                                                if (child7.text
                                                                        ==
                                                                        'doi'):
                                                                    key = 'doi'
                                                            elif (child7.tag ==
                                                                  ns +
                                                                  'work-external-identifier-id'
                                                                  ):
                                                                value = child7.text

                                                        if key is not None:
                                                            identifier[
                                                                key] = value
                                                            work[key] = value
                                                        work[
                                                            'identifiers'].append(
                                                                identifier)

                                        if 'title' not in work:
                                            work['title'] = ''
                                        if 'doi' not in work:
                                            publications = IEEE_Parser.ieee_publication_search_parser(
                                                work['title'])
                                            if (len(publications) == 1):
                                                for publication in publications:
                                                    work['doi'] = publication[
                                                        'doi']
                                            else:
                                                work['doi'] = ''
                                        if 'authors' not in work:
                                            work['authors'] = []
                                        author['works'].append(work)

        return author
示例#2
0
 def orcid_author_works_get_parser(orcid):
     """
     Method to parse the author works from ORCID website into a dictionary object, given the orcid of the author
     """
     
     out_file = "data/orcid_author_works_get.xml"
     fout = open(out_file, "w")
     print >> fout, ORCID.orcid_author_works_get(orcid, kind="xml").encode('utf-8')
     fout.close()
     
     tree = ET.parse(out_file)
     root_element = tree.getroot()
     ns = '{http://www.orcid.org/ns/orcid}'
     
     author = {'works': []}
     
     for child1 in root_element:
         if(child1.tag == ns + 'orcid-profile'):
             for child2 in child1:
                 if(child2.tag == ns + 'orcid-identifier'):
                     for child3 in child2:
                         if(child3.tag == ns + 'path'):
                             author['orcid'] = child3.text
                 elif(child2.tag == ns + 'orcid-activities'):
                     for child3 in child2:
                         if(child3.tag == ns + 'orcid-works'):
                             for child4 in child3:
                                 if(child4.tag == ns + 'orcid-work'):
                                     work = {'identifiers': [], 'authorIDs': []}
                                     for child5 in child4:
                                         if(child5.tag == ns + 'work-title'):
                                             for child6 in child5:
                                                 if(child6.tag == ns + 'title'):
                                                     work['title'] = child6.text
                                         elif(child5.tag == ns + 'journal-title'):
                                             work['journalTitle'] = child5.text
                                         elif(child5.tag == ns + 'work-citation'):
                                             for child6 in child5:
                                                 if(child6.tag == ns + 'work-citation-type'):
                                                     work['work-citation-type'] = child6.text
                                                 elif(child6.tag == ns + 'citation'):
                                                     citation = child6.text
                                             
                                             if(work['work-citation-type'] == 'bibtex'):
                                                 work['authors'] = ORCID_Parser.get_authors_list_from_bibtex(citation)
                                             elif(work['work-citation-type'] == 'formatted-unspecified'):
                                                 work['authors'] = ORCID_Parser.get_authors_list_from_unformattedtext(citation)
                                         elif(child5.tag == ns + 'publication-date'):
                                             for child6 in child5:
                                                 if(child6.tag == ns + 'year'):
                                                     work['year'] = child6.text
                                         elif(child5.tag == ns + 'work-external-identifiers'):
                                             for child6 in child5:
                                                 if(child6.tag == ns + 'work-external-identifier'):
                                                     identifier = {}
                                                     for child7 in child6:
                                                         if(child7.tag == ns + 'work-external-identifier-type'):
                                                             key = None
                                                             if(child7.text == 'doi'):
                                                                 key = 'doi'
                                                         elif(child7.tag == ns + 'work-external-identifier-id'):
                                                             value = child7.text
                                                     
                                                     if key is not None:
                                                         identifier[key] = value
                                                         work[key] = value
                                                     work['identifiers'].append(identifier)
                                     
                                     if 'title' not in work:
                                         work['title'] = ''
                                     if 'doi' not in work:
                                         publications = IEEE_Parser.ieee_publication_search_parser(work['title'])
                                         if(len(publications) == 1):
                                             for publication in publications:
                                                 work['doi'] = publication['doi']
                                         else:
                                             work['doi'] = ''
                                     if 'authors' not in work:
                                         work['authors'] = []
                                     author['works'].append(work)
     
     return author