def setUpClass(self): super(StatisticsTest, self).setUpClass() self.ccf.clear() crps = CrossRefPaperSource(self.ccf) oai = OaiPaperSource(self.ccf) crps.fetch_and_save(self.r2, incremental=True) oai.fetch_and_save(self.r2, incremental=True)
def setUp(self): oaisource = OaiSource.objects.get(identifier='hal') self.oai = OaiPaperSource(oaisource) self.testdir = os.path.dirname(os.path.abspath(__file__)) base_oaisource = OaiSource.objects.get(identifier='base') base_oaisource.endpoint = 'https://some_endpoint' self.base_oai = OaiPaperSource(base_oaisource)
def update_oai_sources(): """ Fetches new and updated records from all configured OAI sources since their last update. """ for source in OaiSource.objects.filter(endpoint__isnull=False): oai = OaiPaperSource(source) oai.ingest(source.last_update.replace(tzinfo=None), metadataPrefix='base_dc') source.last_update = datetime.now() source.save()
def test_preprint(self): """ Submit a preprint """ oai = OaiPaperSource(self.oaisource) oai.add_translator(BASEDCTranslator(self.oaisource)) p = oai.create_paper_by_identifier('ftarxivpreprints:oai:arXiv.org:1207.2079', 'base_dc') p.authors_list = [p.authors_list[0]] r = self.dry_deposit(p, abstract='here is my great result', topic='SDV', depositing_author=0, affiliation=128940) self.assertEqualOrLog(r.status, 'faked')
def test_preprint(self): """ Submit a preprint """ oai = OaiPaperSource(endpoint='http://doai.io/oai') oai.add_translator(BASEDCTranslator()) p = oai.create_paper_by_identifier('ftarxivpreprints:oai:arXiv.org:1207.2079', 'base_dc') p.authors_list = [p.authors_list[0]] r = self.dry_deposit(p, abstract='here is my great result', topic='SDV', depositing_author=0, affiliation=128940) self.assertEqualOrLog(r.status, 'faked')
def test_bad_journal_article(self): """ Submit something that pretends to be a journal article, but for which we fail to find publication metadata. The interface should fall back on something lighter. """ oai = OaiPaperSource(self.oaisource) oai.add_translator(BASEDCTranslator(self.oaisource)) p = oai.create_paper_by_identifier( 'ftalborguniv:oai:pure.atira.dk:openaire/30feea10-9c2f-11db-8ed6-000ea68e967b', 'base_dc') p.authors_list = [p.authors_list[0]] p.doctype = 'journal-article' p.save() r = self.dry_deposit(p, abstract='hey you, yes you', topic='SDV', depositing_author=0, affiliation=128940) self.assertEqualOrLog(r.status, 'faked')
def setUp(self): self.oai = OaiPaperSource(endpoint='http://doai.io/oai') self.oai.add_translator(BASEDCTranslator()) self.oai.add_translator(OAIDCTranslator()) self.oai.add_translator(CiteprocTranslator())
class OaiTest(TestCase): def setUp(self): self.oai = OaiPaperSource(endpoint='http://doai.io/oai') self.oai.add_translator(BASEDCTranslator()) self.oai.add_translator(OAIDCTranslator()) self.oai.add_translator(CiteprocTranslator()) def create(self, *args, **kwargs): # Shortcut for the tests return self.oai.create_paper_by_identifier(*args, **kwargs) def delete(self, identifier): try: r = OaiRecord.objects.get(identifier=identifier) p = r.about r.delete() if p.is_orphan(): p.delete() except OaiRecord.DoesNotExist: pass def test_create_no_match(self): """ Creation of a paper from an OAI record, when the paper does not exist yet. """ oai_id = 'ftunivsavoie:oai:HAL:hal-01063697v1' # first, make sure the paper isn't there already self.delete(oai_id) # create a paper from BASE hal_paper = self.create(oai_id, 'base_dc') self.assertEqual(len(hal_paper.oairecords), 1) self.assertNotEqual(hal_paper.pdf_url, None) self.assertEqual(hal_paper.fingerprint, hal_paper.new_fingerprint()) def test_create_already_existing(self): """ Creation of a paper from an OAI record, when the exact same OAI record already exists. """ # TODO we could repeat this for various papers oai_id = 'ftccsdartic:oai:hal.archives-ouvertes.fr:hal-00830421' # first, make sure the paper isn't there already self.delete(oai_id) # create a paper from BASE hal_paper = self.create(oai_id, 'base_dc') # Create it again! new_paper = self.create(oai_id, 'base_dc') # It's the same thing! self.assertEqual(new_paper, hal_paper) self.assertSetEqual(set(new_paper.oairecords), set(hal_paper.oairecords)) self.assertListEqual(new_paper.bare_author_names(), hal_paper.bare_author_names()) self.assertEqual(new_paper.title, hal_paper.title) def test_create_match_fp(self): """ Addition of an OAI record when it is matched with an existing record by fingerprint. """ first_id = 'oai:crossref.org:10.1016/j.crma.2012.10.021' second_id = 'ftarxivpreprints:oai:arXiv.org:1112.6130' # first, make sure the paper isn't there already self.delete(first_id) # create a paper from BASE cr_paper = self.create(first_id, 'citeproc') # Save the existing records records = set(cr_paper.oairecords) # Create a new paper (refers to the same paper, but coming from # another source) new_paper = self.create(second_id, 'base_dc') # the resulting paper has to be equal to the first one # (this does not check that all their attributes are equal, just # that they are the same row in the database, i.e. have same id) self.assertEqual(new_paper, cr_paper) # the new set of records is the old one plus the new record records.add(OaiRecord.objects.get(identifier=second_id)) self.assertSetEqual(set(new_paper.oairecords), records) @unittest.expectedFailure def test_create_incomplete_metadata(self): """ When we are trying to create a new paper for an incomplete OAI record (in this case, a publication date is missing). Ideally we would still like to match it with the first paper via fingerprint, to add the relevant url. """ first_id = 'ftccsdartic:oai:hal.archives-ouvertes.fr:hal-00939473' second_id = 'ftciteseerx:oai:CiteSeerX.psu:10.1.1.487.869' # first, make sure the paper isn't there already self.delete(first_id) # create a paper from BASE cr_paper = self.create(first_id, 'citeproc') # Save the existing records records = set(cr_paper.oairecords) # Create a new paper (refers to the same paper, but coming from # another source) new_paper = self.create(second_id, 'base_dc') # the resulting paper has to be equal to the first one # (this does not check that all their attributes are equal, just # that they are the same row in the database, i.e. have same id) self.assertEqual(new_paper, cr_paper) # the new set of records is the old one plus the new record records.add(OaiRecord.objects.get(identifier=second_id)) self.assertSetEqual(set(new_paper.oairecords), records) def test_create_match_doi(self): """ Addition of an OAI record when it is matched to an existing paper by DOI """ first_id = 'ftunivmacedonia:oai:dspace.lib.uom.gr:2159/6240' second_id = 'oai:crossref.org:10.1111/j.1574-0862.2005.00325.x' # first, make sure the paper isn't there already self.delete(first_id) # Create a paper from BASE first = self.create(first_id, 'base_dc') self.assertEqual(first.oairecords[0].doi, '10.1111/j.1574-0862.2005.00325.x') records = set(first.oairecords) new_paper = self.create(second_id, 'citeproc') # Make sure that, if a merge happens, the oldest # paper remains (otherwise we create broken links!) self.assertEqual(first, new_paper) records.add(OaiRecord.objects.get(identifier=second_id)) self.assertEqual(set(new_paper.oairecords), records) def test_update_pdf_url(self): """ Two OAI records share the same splash URL, but the second one has a pdf_url. We should add the PDF url to the existing OAI record (merge the two records). """ # first, make sure the paper isn't there already self.delete('oai:crossref.org:10.1007/s10858-015-9994-8') # Create a paper from Crossref first = self.create('oai:crossref.org:10.1007/s10858-015-9994-8', 'citeproc') # initially the PDF url should be empty self.assertEqual(first.oairecords[0].pdf_url, None) # then we import a new identifier new_paper = self.create('ftspringeroc:10.1007/s10858-015-9994-8', 'base_dc') self.assertEqual(first, new_paper) # no new record should be created self.assertEqual(len(new_paper.oairecords), 1) self.assertNotEqual(new_paper.oairecords[0].pdf_url, None) def test_create_match_identifier(self): """ An OAI record with the same identifier already exists but it has been already merged before with another paper with a different fingerprint. """ identifier = 'ftccsdartic:oai:hal.archives-ouvertes.fr:hal-00939473' # create a paper from BASE hal_paper = self.create(identifier, 'base_dc') records = set(hal_paper.oairecords) # change its fingerprint hal_paper.title += ' babebibobu' hal_paper.fingerprint = hal_paper.new_fingerprint() hal_paper.save() # add the OAI record again new_paper = self.create(identifier, 'base_dc') self.assertEqual(new_paper, hal_paper) self.assertSetEqual(records, set(new_paper.oairecords)) def test_create_invalid_metadata(self): """ Metadata that we don't accept """ identifiers = [ # Contributors too long to fit in the db ('ftbnfgallica:oai:bnf.fr:gallica/ark:/12148/btv1b8621766k', 'base_dc'), # No authors ('ftcarmelhelios:oai:archive.library.cmu.edu:heinz:box00200/fld00021/bdl0031/doc0001/', 'base_dc'), # Bad publication date ('ftsehiruniv:oai:earsiv.sehir.edu.tr:11498/28266', 'base_dc'), ] for i, f in identifiers: self.assertEqual(self.create(i, f), None) def test_create_invalid_identifier(self): """ Fetching an invalid identifier from OAI """ with self.assertRaises(BadArgumentError): self.create('aiunrsecauiebleuiest', 'oai_dc') def test_create_invalid_format(self): """ Fetching with an invalid format from OAI """ # Format not available from the interface with self.assertRaises(BadArgumentError): self.create('aiunrsecauiebleuiest', 'unknown_format') # tests of particular translators # TODO: translate them as tests of the translators and not the # whole backend? def test_base_doctype(self): mappings = { 'ftunivsavoie:oai:HAL:hal-01062241v1': 'proceedings-article', 'ftunivsavoie:oai:HAL:hal-01062339v1': 'book-chapter', 'ftunivmacedonia:oai:dspace.lib.uom.gr:2159/6227': 'other', 'ftartxiker:oai:HAL:hal-00845819v1': 'journal-article', 'ftdatacite:oai:oai.datacite.org:402223': 'dataset', } for ident, typ in mappings.items(): paper = self.create(ident, 'base_dc') self.assertEqual(paper.doctype, typ) def test_crossref_invalid_metadata(self): # authors with no family name paper = self.create('oai:crossref.org:10.4156/aiss.vol3.issue9.31', 'citeproc') self.assertEqual(paper, None) def test_datacite(self): paper = self.create('ftdatacite:oai:oai.datacite.org:8558707', 'base_dc') self.assertTrue(paper.pdf_url) def test_pmc(self): paper = self.create('ftpubmed:oai:pubmedcentral.nih.gov:1968744', 'base_dc') self.assertEqual( paper.pdf_url, 'http://www.ncbi.nlm.nih.gov/pmc/articles/PMC1968744') p2 = self.create('ftpubmed:oai:pubmedcentral.nih.gov:4131942', 'base_dc') self.assertEqual(p2.pdf_url, 'http://www.ncbi.nlm.nih.gov/pubmed/24806729') def test_doi_prefix(self): paper = self.create('ftdatacite:oai:oai.datacite.org:3505359', 'base_dc') self.assertTrue(paper.pdf_url is not None)