Exemplo n.º 1
0
    def save_publications_list(publications_list, orcid):
        """
        Method to save the list of publications if the publication is not already in the database
        """

        client = MongoClient(mongo_constants['server_name'],
                             mongo_constants['port_number'])
        db = client[mongo_constants['database']]
        coll = db[mongo_constants['publication_list']]

        for publication in publications_list:
            if coll.find({'doi': publication['doi']}).count() != 0:
                pub = coll.find_one({'doi': publication['doi']})
                if orcid not in pub['authorIDs']:
                    pub['authorIDs'].append(orcid)
                    coll.update({'doi': pub['doi']},
                                {"$unset": {
                                    'authorIDs': pub['authorIDs']
                                }},
                                upsert=False,
                                multi=False)
                    coll.update({'doi': pub['doi']},
                                {"$set": {
                                    'authorIDs': pub['authorIDs']
                                }},
                                upsert=False,
                                multi=False)
            elif coll.find({'title': publication['title']}).count() != 0:
                pub = coll.find_one({'title': publication['title']})
                if orcid not in pub['authorIDs']:
                    pub['authorIDs'].append(orcid)
                    coll.update({'title': pub['title']},
                                {"$unset": {
                                    'authorIDs': pub['authorIDs']
                                }},
                                upsert=False,
                                multi=False)
                    coll.update({'title': pub['title']},
                                {"$set": {
                                    'authorIDs': pub['authorIDs']
                                }},
                                upsert=False,
                                multi=False)
            else:
                if orcid not in publication['authorIDs']:
                    publication['authorIDs'].append(orcid)
                if 'doi' in publication:
                    ieee_doi_get_result = IEEE_Parser.ieee_doi_get_parser(
                        publication['doi'])
                    if ieee_doi_get_result is not None:
                        for key in ieee_doi_get_result:
                            if key not in publication:
                                publication[key] = ieee_doi_get_result[key]
                        if 'authors' in ieee_doi_get_result:
                            publication['authorsIeee'] = ieee_doi_get_result[
                                'authors']
                publication['authorsSearched'] = 0
                coll.insert(publication)
                print 'Inserted Publication: ' + publication['doi']
Exemplo n.º 2
0
    def save_publications_list(publications_list, orcid):
        """
        Method to save the list of publications if the publication is not already in the database
        """
        
        client = MongoClient(mongo_constants['server_name'], mongo_constants['port_number'])
        db = client[mongo_constants['database']]
        coll = db[mongo_constants['publication_list']]

        for publication in publications_list:
            if coll.find({'doi': publication['doi']}).count() != 0:
                pub = coll.find_one({'doi': publication['doi']})
                if orcid not in pub['authorIDs']:
                    pub['authorIDs'].append(orcid)
                    coll.update({'doi': pub['doi']}, {"$unset": {'authorIDs': pub['authorIDs']}}, upsert=False, multi=False)
                    coll.update({'doi': pub['doi']}, {"$set": {'authorIDs': pub['authorIDs']}}, upsert=False, multi=False)
            elif coll.find({'title': publication['title']}).count() != 0:
                pub = coll.find_one({'title': publication['title']})
                if orcid not in pub['authorIDs']:
                    pub['authorIDs'].append(orcid)
                    coll.update({'title': pub['title']}, {"$unset": {'authorIDs': pub['authorIDs']}}, upsert=False, multi=False)
                    coll.update({'title': pub['title']}, {"$set": {'authorIDs': pub['authorIDs']}}, upsert=False, multi=False)
            else:
                if orcid not in publication['authorIDs']:
                    publication['authorIDs'].append(orcid)
                if 'doi' in publication:
                    ieee_doi_get_result = IEEE_Parser.ieee_doi_get_parser(publication['doi'])
                    if ieee_doi_get_result is not None:
                        for key in ieee_doi_get_result:
                            if key not in publication:
                                publication[key] = ieee_doi_get_result[key]
                        if 'authors' in ieee_doi_get_result:
                            publication['authorsIeee'] = ieee_doi_get_result['authors']
                publication['authorsSearched'] = 0
                coll.insert(publication)
                print 'Inserted Publication: ' + publication['doi']
Exemplo n.º 3
0
    def orcid_author_works_get_parser(orcid):
        """
        Method to parse the author works from ORCID website into a dictionary object, given the orcid of the author
        """

        out_file = "data/orcid_author_works_get.xml"
        fout = open(out_file, "w")
        print >> fout, ORCID.orcid_author_works_get(orcid,
                                                    kind="xml").encode('utf-8')
        fout.close()

        tree = ET.parse(out_file)
        root_element = tree.getroot()
        ns = '{http://www.orcid.org/ns/orcid}'

        author = {'works': []}

        for child1 in root_element:
            if (child1.tag == ns + 'orcid-profile'):
                for child2 in child1:
                    if (child2.tag == ns + 'orcid-identifier'):
                        for child3 in child2:
                            if (child3.tag == ns + 'path'):
                                author['orcid'] = child3.text
                    elif (child2.tag == ns + 'orcid-activities'):
                        for child3 in child2:
                            if (child3.tag == ns + 'orcid-works'):
                                for child4 in child3:
                                    if (child4.tag == ns + 'orcid-work'):
                                        work = {
                                            'identifiers': [],
                                            'authorIDs': []
                                        }
                                        for child5 in child4:
                                            if (child5.tag == ns +
                                                    'work-title'):
                                                for child6 in child5:
                                                    if (child6.tag == ns +
                                                            'title'):
                                                        work[
                                                            'title'] = child6.text
                                            elif (child5.tag == ns +
                                                  'journal-title'):
                                                work[
                                                    'journalTitle'] = child5.text
                                            elif (child5.tag == ns +
                                                  'work-citation'):
                                                for child6 in child5:
                                                    if (child6.tag == ns +
                                                            'work-citation-type'
                                                        ):
                                                        work[
                                                            'work-citation-type'] = child6.text
                                                    elif (child6.tag == ns +
                                                          'citation'):
                                                        citation = child6.text

                                                if (work['work-citation-type']
                                                        == 'bibtex'):
                                                    work[
                                                        'authors'] = ORCID_Parser.get_authors_list_from_bibtex(
                                                            citation)
                                                elif (work[
                                                        'work-citation-type']
                                                      ==
                                                      'formatted-unspecified'):
                                                    work[
                                                        'authors'] = ORCID_Parser.get_authors_list_from_unformattedtext(
                                                            citation)
                                            elif (child5.tag == ns +
                                                  'publication-date'):
                                                for child6 in child5:
                                                    if (child6.tag == ns +
                                                            'year'):
                                                        work[
                                                            'year'] = child6.text
                                            elif (child5.tag == ns +
                                                  'work-external-identifiers'):
                                                for child6 in child5:
                                                    if (child6.tag == ns +
                                                            'work-external-identifier'
                                                        ):
                                                        identifier = {}
                                                        for child7 in child6:
                                                            if (child7.tag ==
                                                                    ns +
                                                                    'work-external-identifier-type'
                                                                ):
                                                                key = None
                                                                if (child7.text
                                                                        ==
                                                                        'doi'):
                                                                    key = 'doi'
                                                            elif (child7.tag ==
                                                                  ns +
                                                                  'work-external-identifier-id'
                                                                  ):
                                                                value = child7.text

                                                        if key is not None:
                                                            identifier[
                                                                key] = value
                                                            work[key] = value
                                                        work[
                                                            'identifiers'].append(
                                                                identifier)

                                        if 'title' not in work:
                                            work['title'] = ''
                                        if 'doi' not in work:
                                            publications = IEEE_Parser.ieee_publication_search_parser(
                                                work['title'])
                                            if (len(publications) == 1):
                                                for publication in publications:
                                                    work['doi'] = publication[
                                                        'doi']
                                            else:
                                                work['doi'] = ''
                                        if 'authors' not in work:
                                            work['authors'] = []
                                        author['works'].append(work)

        return author
Exemplo n.º 4
0
 def orcid_author_works_get_parser(orcid):
     """
     Method to parse the author works from ORCID website into a dictionary object, given the orcid of the author
     """
     
     out_file = "data/orcid_author_works_get.xml"
     fout = open(out_file, "w")
     print >> fout, ORCID.orcid_author_works_get(orcid, kind="xml").encode('utf-8')
     fout.close()
     
     tree = ET.parse(out_file)
     root_element = tree.getroot()
     ns = '{http://www.orcid.org/ns/orcid}'
     
     author = {'works': []}
     
     for child1 in root_element:
         if(child1.tag == ns + 'orcid-profile'):
             for child2 in child1:
                 if(child2.tag == ns + 'orcid-identifier'):
                     for child3 in child2:
                         if(child3.tag == ns + 'path'):
                             author['orcid'] = child3.text
                 elif(child2.tag == ns + 'orcid-activities'):
                     for child3 in child2:
                         if(child3.tag == ns + 'orcid-works'):
                             for child4 in child3:
                                 if(child4.tag == ns + 'orcid-work'):
                                     work = {'identifiers': [], 'authorIDs': []}
                                     for child5 in child4:
                                         if(child5.tag == ns + 'work-title'):
                                             for child6 in child5:
                                                 if(child6.tag == ns + 'title'):
                                                     work['title'] = child6.text
                                         elif(child5.tag == ns + 'journal-title'):
                                             work['journalTitle'] = child5.text
                                         elif(child5.tag == ns + 'work-citation'):
                                             for child6 in child5:
                                                 if(child6.tag == ns + 'work-citation-type'):
                                                     work['work-citation-type'] = child6.text
                                                 elif(child6.tag == ns + 'citation'):
                                                     citation = child6.text
                                             
                                             if(work['work-citation-type'] == 'bibtex'):
                                                 work['authors'] = ORCID_Parser.get_authors_list_from_bibtex(citation)
                                             elif(work['work-citation-type'] == 'formatted-unspecified'):
                                                 work['authors'] = ORCID_Parser.get_authors_list_from_unformattedtext(citation)
                                         elif(child5.tag == ns + 'publication-date'):
                                             for child6 in child5:
                                                 if(child6.tag == ns + 'year'):
                                                     work['year'] = child6.text
                                         elif(child5.tag == ns + 'work-external-identifiers'):
                                             for child6 in child5:
                                                 if(child6.tag == ns + 'work-external-identifier'):
                                                     identifier = {}
                                                     for child7 in child6:
                                                         if(child7.tag == ns + 'work-external-identifier-type'):
                                                             key = None
                                                             if(child7.text == 'doi'):
                                                                 key = 'doi'
                                                         elif(child7.tag == ns + 'work-external-identifier-id'):
                                                             value = child7.text
                                                     
                                                     if key is not None:
                                                         identifier[key] = value
                                                         work[key] = value
                                                     work['identifiers'].append(identifier)
                                     
                                     if 'title' not in work:
                                         work['title'] = ''
                                     if 'doi' not in work:
                                         publications = IEEE_Parser.ieee_publication_search_parser(work['title'])
                                         if(len(publications) == 1):
                                             for publication in publications:
                                                 work['doi'] = publication['doi']
                                         else:
                                             work['doi'] = ''
                                     if 'authors' not in work:
                                         work['authors'] = []
                                     author['works'].append(work)
     
     return author