def save_publications_list(publications_list, orcid): """ Method to save the list of publications if the publication is not already in the database """ client = MongoClient(mongo_constants['server_name'], mongo_constants['port_number']) db = client[mongo_constants['database']] coll = db[mongo_constants['publication_list']] for publication in publications_list: if coll.find({'doi': publication['doi']}).count() != 0: pub = coll.find_one({'doi': publication['doi']}) if orcid not in pub['authorIDs']: pub['authorIDs'].append(orcid) coll.update({'doi': pub['doi']}, {"$unset": { 'authorIDs': pub['authorIDs'] }}, upsert=False, multi=False) coll.update({'doi': pub['doi']}, {"$set": { 'authorIDs': pub['authorIDs'] }}, upsert=False, multi=False) elif coll.find({'title': publication['title']}).count() != 0: pub = coll.find_one({'title': publication['title']}) if orcid not in pub['authorIDs']: pub['authorIDs'].append(orcid) coll.update({'title': pub['title']}, {"$unset": { 'authorIDs': pub['authorIDs'] }}, upsert=False, multi=False) coll.update({'title': pub['title']}, {"$set": { 'authorIDs': pub['authorIDs'] }}, upsert=False, multi=False) else: if orcid not in publication['authorIDs']: publication['authorIDs'].append(orcid) if 'doi' in publication: ieee_doi_get_result = IEEE_Parser.ieee_doi_get_parser( publication['doi']) if ieee_doi_get_result is not None: for key in ieee_doi_get_result: if key not in publication: publication[key] = ieee_doi_get_result[key] if 'authors' in ieee_doi_get_result: publication['authorsIeee'] = ieee_doi_get_result[ 'authors'] publication['authorsSearched'] = 0 coll.insert(publication) print 'Inserted Publication: ' + publication['doi']
def save_publications_list(publications_list, orcid): """ Method to save the list of publications if the publication is not already in the database """ client = MongoClient(mongo_constants['server_name'], mongo_constants['port_number']) db = client[mongo_constants['database']] coll = db[mongo_constants['publication_list']] for publication in publications_list: if coll.find({'doi': publication['doi']}).count() != 0: pub = coll.find_one({'doi': publication['doi']}) if orcid not in pub['authorIDs']: pub['authorIDs'].append(orcid) coll.update({'doi': pub['doi']}, {"$unset": {'authorIDs': pub['authorIDs']}}, upsert=False, multi=False) coll.update({'doi': pub['doi']}, {"$set": {'authorIDs': pub['authorIDs']}}, upsert=False, multi=False) elif coll.find({'title': publication['title']}).count() != 0: pub = coll.find_one({'title': publication['title']}) if orcid not in pub['authorIDs']: pub['authorIDs'].append(orcid) coll.update({'title': pub['title']}, {"$unset": {'authorIDs': pub['authorIDs']}}, upsert=False, multi=False) coll.update({'title': pub['title']}, {"$set": {'authorIDs': pub['authorIDs']}}, upsert=False, multi=False) else: if orcid not in publication['authorIDs']: publication['authorIDs'].append(orcid) if 'doi' in publication: ieee_doi_get_result = IEEE_Parser.ieee_doi_get_parser(publication['doi']) if ieee_doi_get_result is not None: for key in ieee_doi_get_result: if key not in publication: publication[key] = ieee_doi_get_result[key] if 'authors' in ieee_doi_get_result: publication['authorsIeee'] = ieee_doi_get_result['authors'] publication['authorsSearched'] = 0 coll.insert(publication) print 'Inserted Publication: ' + publication['doi']
def orcid_author_works_get_parser(orcid): """ Method to parse the author works from ORCID website into a dictionary object, given the orcid of the author """ out_file = "data/orcid_author_works_get.xml" fout = open(out_file, "w") print >> fout, ORCID.orcid_author_works_get(orcid, kind="xml").encode('utf-8') fout.close() tree = ET.parse(out_file) root_element = tree.getroot() ns = '{http://www.orcid.org/ns/orcid}' author = {'works': []} for child1 in root_element: if (child1.tag == ns + 'orcid-profile'): for child2 in child1: if (child2.tag == ns + 'orcid-identifier'): for child3 in child2: if (child3.tag == ns + 'path'): author['orcid'] = child3.text elif (child2.tag == ns + 'orcid-activities'): for child3 in child2: if (child3.tag == ns + 'orcid-works'): for child4 in child3: if (child4.tag == ns + 'orcid-work'): work = { 'identifiers': [], 'authorIDs': [] } for child5 in child4: if (child5.tag == ns + 'work-title'): for child6 in child5: if (child6.tag == ns + 'title'): work[ 'title'] = child6.text elif (child5.tag == ns + 'journal-title'): work[ 'journalTitle'] = child5.text elif (child5.tag == ns + 'work-citation'): for child6 in child5: if (child6.tag == ns + 'work-citation-type' ): work[ 'work-citation-type'] = child6.text elif (child6.tag == ns + 'citation'): citation = child6.text if (work['work-citation-type'] == 'bibtex'): work[ 'authors'] = ORCID_Parser.get_authors_list_from_bibtex( citation) elif (work[ 'work-citation-type'] == 'formatted-unspecified'): work[ 'authors'] = ORCID_Parser.get_authors_list_from_unformattedtext( citation) elif (child5.tag == ns + 'publication-date'): for child6 in child5: if (child6.tag == ns + 'year'): work[ 'year'] = child6.text elif (child5.tag == ns + 'work-external-identifiers'): for child6 in child5: if (child6.tag == ns + 'work-external-identifier' ): identifier = {} for child7 in child6: if (child7.tag == ns + 'work-external-identifier-type' ): key = None if (child7.text == 'doi'): key = 'doi' elif (child7.tag == ns + 'work-external-identifier-id' ): value = child7.text if key is not None: identifier[ key] = value work[key] = value work[ 'identifiers'].append( identifier) if 'title' not in work: work['title'] = '' if 'doi' not in work: publications = IEEE_Parser.ieee_publication_search_parser( work['title']) if (len(publications) == 1): for publication in publications: work['doi'] = publication[ 'doi'] else: work['doi'] = '' if 'authors' not in work: work['authors'] = [] author['works'].append(work) return author
def orcid_author_works_get_parser(orcid): """ Method to parse the author works from ORCID website into a dictionary object, given the orcid of the author """ out_file = "data/orcid_author_works_get.xml" fout = open(out_file, "w") print >> fout, ORCID.orcid_author_works_get(orcid, kind="xml").encode('utf-8') fout.close() tree = ET.parse(out_file) root_element = tree.getroot() ns = '{http://www.orcid.org/ns/orcid}' author = {'works': []} for child1 in root_element: if(child1.tag == ns + 'orcid-profile'): for child2 in child1: if(child2.tag == ns + 'orcid-identifier'): for child3 in child2: if(child3.tag == ns + 'path'): author['orcid'] = child3.text elif(child2.tag == ns + 'orcid-activities'): for child3 in child2: if(child3.tag == ns + 'orcid-works'): for child4 in child3: if(child4.tag == ns + 'orcid-work'): work = {'identifiers': [], 'authorIDs': []} for child5 in child4: if(child5.tag == ns + 'work-title'): for child6 in child5: if(child6.tag == ns + 'title'): work['title'] = child6.text elif(child5.tag == ns + 'journal-title'): work['journalTitle'] = child5.text elif(child5.tag == ns + 'work-citation'): for child6 in child5: if(child6.tag == ns + 'work-citation-type'): work['work-citation-type'] = child6.text elif(child6.tag == ns + 'citation'): citation = child6.text if(work['work-citation-type'] == 'bibtex'): work['authors'] = ORCID_Parser.get_authors_list_from_bibtex(citation) elif(work['work-citation-type'] == 'formatted-unspecified'): work['authors'] = ORCID_Parser.get_authors_list_from_unformattedtext(citation) elif(child5.tag == ns + 'publication-date'): for child6 in child5: if(child6.tag == ns + 'year'): work['year'] = child6.text elif(child5.tag == ns + 'work-external-identifiers'): for child6 in child5: if(child6.tag == ns + 'work-external-identifier'): identifier = {} for child7 in child6: if(child7.tag == ns + 'work-external-identifier-type'): key = None if(child7.text == 'doi'): key = 'doi' elif(child7.tag == ns + 'work-external-identifier-id'): value = child7.text if key is not None: identifier[key] = value work[key] = value work['identifiers'].append(identifier) if 'title' not in work: work['title'] = '' if 'doi' not in work: publications = IEEE_Parser.ieee_publication_search_parser(work['title']) if(len(publications) == 1): for publication in publications: work['doi'] = publication['doi'] else: work['doi'] = '' if 'authors' not in work: work['authors'] = [] author['works'].append(work) return author