class MaintenanceTest(PrefilledTest): @classmethod def setUpClass(self): super(MaintenanceTest, self).setUpClass() self.cr_api = CrossRefAPI() def test_cleanup_names(self): n = Name.lookup_name(('Anaruic', 'Leclescuantebrste')) n.save() cleanup_names() try: n = Name.objects.get(first='Anaruic', last='Leclescuantebrste') self.assertTrue(False and 'The name has not been cleaned up') except ObjectDoesNotExist: pass def test_name_initial(self): n = self.r2.name p = Paper.create_by_doi("10.1002/ange.19941062339") n1 = p.authors[0].name self.assertEqual((n1.first, n1.last), (n.first, n.last)) def test_update_paper_statuses(self): p = self.cr_api.create_paper_by_doi("10.1016/j.bmc.2005.06.035") p = Paper.from_bare(p) self.assertEqual(p.pdf_url, None) pdf_url = 'https://www.youtube.com/watch?v=dQw4w9WgXcQ' OaiRecord.new(source=self.arxiv, identifier='oai:arXiv.org:aunrisste', about=p, splash_url='http://www.perdu.com/', pdf_url=pdf_url) update_paper_statuses() self.assertEqual(Paper.objects.get(pk=p.pk).pdf_url, pdf_url)
class CrossRefTest(TestCase): def setUp(self): self.api = CrossRefAPI() def test_empty_pubdate(self): # This DOI has an empty 'issued' date p = self.api.create_paper_by_doi('10.1007/978-1-4020-7884-2_13') self.assertEqual(p.pubdate.year, 2006) def test_affiliations(self): p = self.api.create_paper_by_doi('10.4204/eptcs.172.16') self.assertEqual(p.authors[0].affiliation, 'École Normale Supérieure, Paris') def test_dirty_metadata(self): # saving a paper with enough metadata to create a paper, but not # an OaiRecord. p = self.api.save_doi_metadata({ "DOI": "10.1007/978-1-4020-7884-2_13", "subtitle": [], "author": [{ "affiliation": [], "given": "Haowen", "family": "Chan" }, { "affiliation": [], "given": "Adrian", "family": "Perrig" }, { "affiliation": [], "given": "Dawn", "family": "Song" }], "created": { "timestamp": 1166129219000, "date-time": "2006-12-14T20:46:59Z", "date-parts": [[2006, 12, 14]] }, "title": ["Key Distribution Techniques for Sensor Networks"], "type": "book-chapter" }) self.assertTrue(p.is_orphan()) self.assertFalse(p.visible) def test_doctype_book(self): # Books are ignored # (technically, that's because we currently require a # 'container-title' in the metadata) p = self.api.create_paper_by_doi('10.1385/1592597998') self.assertTrue(p.is_orphan()) def test_doi_open(self): self.assertTrue( self.api.create_paper_by_doi('10.15200/winn.145838.88372').pdf_url) self.assertFalse( self.api.create_paper_by_doi('10.5061/dryad.b167g').pdf_url)
class CiteprocTranslator(object): """ A translator for the JSON-based Citeproc format served by Crossref """ def __init__(self): self.cr_api = CrossRefAPI() def format(self): return 'citeproc' def translate(self, header, metadata): try: return self.cr_api.save_doi_metadata(metadata) except ValueError: return
def update_crossref(): """ Updates paper metadata from Crossref """ c = CrossRefAPI() c.fetch_and_save_new_records()
def fetch_orcid_records(self, orcid_identifier, profile=None, use_doi=True): """ Queries ORCiD to retrieve the publications associated with a given ORCiD. It also fetches such papers from the CrossRef search interface. :param profile: The ORCID profile if it has already been fetched before (format: parsed JSON). :param use_doi: Fetch the publications by DOI when we find one (recommended, but slow) :returns: a generator, where all the papers found are yielded. (some of them could be in free form, hence not imported) """ cr_api = CrossRefAPI() # Cleanup iD: orcid_id = validate_orcid(orcid_identifier) if orcid_id is None: raise MetadataSourceException('Invalid ORCiD identifier') # Get ORCiD profile try: if profile is None: profile = OrcidProfile(orcid_id=orcid_id) else: profile = OrcidProfile(json=profile) except MetadataSourceException as e: print e return # As we have fetched the profile, let's update the Researcher self.researcher = Researcher.get_or_create_by_orcid(orcid_identifier, profile.json, update=True) if not self.researcher: return # Reference name ref_name = profile.name ignored_papers = [ ] # list of ignored papers due to incomplete metadata # Get summary publications and separate them in two classes: # - the ones with DOIs, that we will fetch with CrossRef dois_and_putcodes = [] # list of (DOIs,putcode) to fetch # - the ones without: we will fetch ORCID's metadata about them # and try to create a paper with what they provide put_codes = [] for summary in profile.work_summaries: if summary.doi and use_doi: dois_and_putcodes.append((summary.doi, summary.put_code)) else: put_codes.append(summary.put_code) # 1st attempt with DOIs and CrossRef if use_doi: # Let's grab papers with DOIs found in our ORCiD profile. dois = [doi for doi, put_code in dois_and_putcodes] for idx, (success, paper_or_metadata) in enumerate( self.fetch_metadata_from_dois(cr_api, ref_name, orcid_id, dois)): if success: yield paper_or_metadata else: put_codes.append(dois_and_putcodes[idx][1]) # 2nd attempt with ORCID's own crappy metadata works = profile.fetch_works(put_codes) for work in works: if not work: continue # If the paper is skipped due to invalid metadata. # We first try to reconcile it with local researcher author name. # Then, we consider it missed. if work.skipped: print(work.json) print(work.skip_reason) print('work skipped due to incorrect metadata (%s)' % (work.skip_reason)) ignored_papers.append(work.as_dict()) continue yield self.create_paper(work) self.warn_user_of_ignored_papers(ignored_papers) if ignored_papers: print('Warning: Total ignored papers: %d' % (len(ignored_papers)))
def setUpClass(self): super(MaintenanceTest, self).setUpClass() self.cr_api = CrossRefAPI()
class MaintenanceTest(TestCase): @classmethod def setUpClass(self): super(MaintenanceTest, self).setUpClass() self.cr_api = CrossRefAPI() def test_name_initial(self): n = self.r2.name p = Paper.create_by_doi("10.1002/ange.19941062339") n1 = p.authors[0].name self.assertEqual((n1.first, n1.last), (n.first, n.last)) def test_update_paper_statuses(self): p = self.cr_api.create_paper_by_doi("10.1016/j.bmc.2005.06.035") p = Paper.from_bare(p) self.assertEqual(p.pdf_url, None) pdf_url = 'https://www.youtube.com/watch?v=dQw4w9WgXcQ' OaiRecord.new(source=self.arxiv, identifier='oai:arXiv.org:aunrisste', about=p, splash_url='http://www.perdu.com/', pdf_url=pdf_url) update_paper_statuses() self.assertEqual(Paper.objects.get(pk=p.pk).pdf_url, pdf_url) def test_unmerge_paper(self): # First we merge two unrelated papers p1 = Paper.create_by_doi("10.1016/j.bmc.2005.06.035") title1 = p1.title p2 = Paper.create_by_doi("10.1016/j.ijar.2017.06.011") title2 = p2.title p1.merge(p2) # Then we unmerge them unmerge_paper_by_dois(p1) # We have two new papers! p3 = Paper.get_by_doi("10.1016/j.bmc.2005.06.035") self.assertTrue(p3.id != p1.id) self.assertEqual(p3.title, title1) p4 = Paper.get_by_doi("10.1016/j.ijar.2017.06.011") self.assertTrue(p4.id != p1.id) self.assertTrue(p4.id != p3.id) self.assertEqual(p4.title, title2) def test_unmerge_orcid_nones(self): # First, fetch a few DOIs dois = [ "10.1075/aicr.90.09ngo", "10.1075/aicr.90.04wad", ] for doi in dois: Paper.create_by_doi(doi) # Then, fetch an ORCID profile with a buggy version of the ORCID interface, which incorrectly merges papers together with patch.object(OrcidPaperSource, '_oai_id_for_doi') as mock_identifier: mock_identifier.return_value = "https://pub.orcid.org/v2.1/0000-0002-1909-134X/work/None" profile = OrcidProfileStub('0000-0002-1909-134X', instance='orcid.org') trung = Researcher.get_or_create_by_orcid('0000-0002-1909-134X', profile=profile) OrcidPaperSource().fetch_and_save(trung, profile=profile) # The two papers are incorrectly merged! papers = [Paper.get_by_doi(doi) for doi in dois] self.assertEqual(papers[0], papers[1]) # We unmerge them unmerge_orcid_nones() # The two papers are now distinct papers = [Paper.get_by_doi(doi) for doi in dois] self.assertTrue(papers[0] != papers[1])
def setUp(self): self.api = CrossRefAPI()
def fetch_orcid_records(self, orcid_identifier, profile=None, use_doi=True): """ Queries ORCiD to retrieve the publications associated with a given ORCiD. It also fetches such papers from the CrossRef search interface. :param profile: The ORCID profile if it has already been fetched before (format: parsed JSON). :param use_doi: Fetch the publications by DOI when we find one (recommended, but slow) :returns: a generator, where all the papers found are yielded. (some of them could be in free form, hence not imported) """ cr_api = CrossRefAPI() # Cleanup iD: orcid_id = validate_orcid(orcid_identifier) if orcid_id is None: raise MetadataSourceException('Invalid ORCiD identifier') # Get ORCiD profile try: if profile is None: profile = OrcidProfile(orcid_id=orcid_id) else: profile = OrcidProfile(json=profile) except MetadataSourceException as e: print e return # As we have fetched the profile, let's update the Researcher self.researcher = Researcher.get_or_create_by_orcid(orcid_identifier, profile.json, update=True) if not self.researcher: return # Reference name ref_name = profile.name # curl -H "Accept: application/orcid+json" # 'http://pub.orcid.org/v1.2/0000-0002-8612-8827/orcid-works' -L -i dois = [] # list of DOIs to fetch ignored_papers = [ ] # list of ignored papers due to incomplete metadata # Fetch publications (1st attempt with ORCiD data) pubs = jpath('orcid-profile/orcid-activities/orcid-works/orcid-work', profile, []) for pub in pubs: data_paper = ORCIDDataPaper.from_orcid_metadata( ref_name, orcid_id, pub, stop_if_dois_exists=use_doi) if not data_paper: continue if data_paper.dois and use_doi: # We want to batch it rather than manually do it. dois.extend(data_paper.dois) continue # If the paper is skipped due to invalid metadata. # We first try to reconcile it with local researcher author name. # Then, we consider it missed. if data_paper.skipped: data_paper = self.reconcile_paper( ref_name, orcid_id, pub, overrides={ 'authors': [(self.researcher.name.first, self.researcher.name.last)] }) if data_paper.skipped: print('%s is skipped due to incorrect metadata (%s)' % (data_paper, data_paper.skip_reason)) ignored_papers.append(data_paper.as_dict()) continue yield self.create_paper(data_paper) # 2nd attempt with DOIs and CrossRef if use_doi: # Let's grab papers from CrossRef #for success, paper_or_metadata in self.fetch_crossref_incrementally(cr_api, orcid_id): # if success: # yield paper_or_metadata # else: # ignored_papers.append(paper_or_metadata) # print('This metadata (%s) yields no paper.' % # (unicode(paper_or_metadata))) # Let's grab papers with DOIs found in our ORCiD profile. # FIXME(RaitoBezarius): if we fail here, we should get back the pub # and yield it. for success, paper_or_metadata in self.fetch_metadata_from_dois( cr_api, ref_name, orcid_id, dois): if success: yield paper_or_metadata else: ignored_papers.append(paper_or_metadata) print('This metadata (%s) yields no paper.' % (paper_or_metadata)) self.warn_user_of_ignored_papers(ignored_papers) if ignored_papers: print('Warning: Total ignored papers: %d' % (len(ignored_papers)))
def __init__(self): self.cr_api = CrossRefAPI()
class CrossRefTest(TestCase): @classmethod def setUpClass(cls): super(CrossRefTest, cls).setUpClass() cls.testdir = os.path.dirname(os.path.abspath(__file__)) def setUp(self): self.api = CrossRefAPI() def test_empty_pubdate(self): # This DOI has an empty 'issued' date p = self.api.create_paper_by_doi('10.1007/978-1-4020-7884-2_13') self.assertEqual(p.pubdate.year, 2006) def test_invalid_metadata(self): # authors with no family name paper = self.api.create_paper_by_doi('10.4156/aiss.vol3.issue9.31') self.assertEqual(paper, None) def test_affiliations(self): p = self.api.create_paper_by_doi('10.4204/eptcs.172.16') self.assertEqual(p.authors[0].affiliation, 'École Normale Supérieure, Paris') def test_dirty_metadata(self): # saving a paper with enough metadata to create a paper, but not # an OaiRecord. p = self.api.save_doi_metadata({ "DOI": "10.1007/978-1-4020-7884-2_13", "subtitle": [], "author": [ { "affiliation": [], "given": "Haowen", "family": "Chan" }, { "affiliation": [], "given": "Adrian", "family": "Perrig" }, { "affiliation": [], "given": "Dawn", "family": "Song" } ], "created": { "timestamp": 1166129219000, "date-time": "2006-12-14T20:46:59Z", "date-parts": [ [ 2006, 12, 14 ] ] }, "title": [ "Key Distribution Techniques for Sensor Networks" ], "type": "book-chapter"}) self.assertTrue(p.is_orphan()) self.assertFalse(p.visible) def test_doctype_book(self): # Books are ignored # (technically, that's because we currently require a # 'container-title' in the metadata) p = self.api.create_paper_by_doi('10.1385/1592597998') self.assertTrue(p.is_orphan()) def test_doi_open(self): self.assertTrue(self.api.create_paper_by_doi('10.15200/winn.145838.88372').pdf_url) self.assertFalse(self.api.create_paper_by_doi('10.5061/dryad.b167g').pdf_url) def test_fetch_papers(self): generator = self.api.fetch_all_records(filters={'issn':'0302-9743'}) for i in range(30): metadata = next(generator) self.assertTrue(metadata['DOI'].startswith('10.1007/')) def test_import_dump(self): self.api.ingest_dump(os.path.join(self.testdir, 'data/sample_crossref_dump.json.bz2')) p = Paper.get_by_doi('10.1016/j.jadohealth.2015.10.045') self.assertEqual("Sources, Type and Use of Social Support during Early Sexual Development of Black Gay and Bisexual Adolescent Males", p.title)
class CrossRefTest(TestCase): @classmethod def setUpClass(cls): super(CrossRefTest, cls).setUpClass() cls.testdir = os.path.dirname(os.path.abspath(__file__)) def setUp(self): self.api = CrossRefAPI() def test_empty_pubdate(self): # This DOI has an empty 'issued' date p = self.api.create_paper_by_doi('10.1007/978-1-4020-7884-2_13') self.assertEqual(p.pubdate.year, 2006) def test_invalid_metadata(self): # authors with no family name paper = self.api.create_paper_by_doi('10.4156/aiss.vol3.issue9.31') self.assertEqual(paper, None) def test_affiliations(self): p = self.api.create_paper_by_doi('10.4204/eptcs.172.16') self.assertEqual(p.authors[0].affiliation, 'École Normale Supérieure, Paris') def test_dirty_metadata(self): # saving a paper with enough metadata to create a paper, but not # an OaiRecord. p = self.api.save_doi_metadata({ "DOI": "10.1007/978-1-4020-7884-2_13", "subtitle": [], "author": [{ "affiliation": [], "given": "Haowen", "family": "Chan" }, { "affiliation": [], "given": "Adrian", "family": "Perrig" }, { "affiliation": [], "given": "Dawn", "family": "Song" }], "created": { "timestamp": 1166129219000, "date-time": "2006-12-14T20:46:59Z", "date-parts": [[2006, 12, 14]] }, "title": ["Key Distribution Techniques for Sensor Networks"], "type": "book-chapter" }) self.assertTrue(p.is_orphan()) self.assertFalse(p.visible) def test_doctype_book(self): # Books are ignored # (technically, that's because we currently require a # 'container-title' in the metadata) p = self.api.create_paper_by_doi('10.1385/1592597998') self.assertTrue(p.is_orphan()) def test_doi_open(self): self.assertTrue( self.api.create_paper_by_doi('10.15200/winn.145838.88372').pdf_url) self.assertFalse( self.api.create_paper_by_doi('10.5061/dryad.b167g').pdf_url) def test_fetch_papers(self): generator = self.api.fetch_all_records(filters={'issn': '0302-9743'}) for i in range(30): metadata = next(generator) self.assertTrue(metadata['DOI'].startswith('10.1007/')) def test_import_dump(self): self.api.ingest_dump( os.path.join(self.testdir, 'data/sample_crossref_dump.json.bz2')) p = Paper.get_by_doi('10.1016/j.jadohealth.2015.10.045') self.assertEqual( "Sources, Type and Use of Social Support during Early Sexual Development of Black Gay and Bisexual Adolescent Males", p.title)