def get_object(self): queryset = self.get_queryset() pk = self.kwargs.get('pk', None) doi = self.kwargs.get('doi', None) if doi: doi = to_doi(doi) paper = None try: if pk is not None: paper = queryset.get(pk=pk) elif doi is not None: paper = Paper.get_by_doi(doi) else: raise AttributeError("Paper view expects a DOI or a pk") except ObjectDoesNotExist: pass if not paper: paper = Paper.create_by_doi(doi) if paper is None or paper.is_orphan(): raise Http404( _("No %(verbose_name)s found matching the query") % {'verbose_name': Paper._meta.verbose_name}) return paper
def test_query(self): invalid_payloads = [ 'test', '{}', '{"doi":"anurisecbld"}', '{"title":""}', '{"title":"this is a test"}', '{"title":"this is a test","date":"aunriset"}', '{"title":"this is a test","date":"2008"}', '{"title":"this is a test","date":"2008","authors":"test"}', '{"title":"this is a test","date":"2008-03","authors":[]}', '{"title":"this is a test","date":"2008-03","authors":["lsc"]}', '{"title":"test","date":"2008-03","authors":[{"error":"test"}]}', ] for payload in invalid_payloads: self.checkJson(self.postPage('api-paper-query', postargs=payload, postkwargs={'content_type': 'application/json'}), 400) # Paper not found payload = '{"title":"Refining the Conceptualization of a Future-Oriented Self-Regulatory Behavior: Proactive Coping", "date":"2009-07-01","authors":[{"first":"Stephanie Jean","last":"Sohl"},{"first":"Anne","last":"Moyer"}]}' self.checkJson(self.postPage('api-paper-query', postargs=payload, postkwargs={'content_type': 'application/json'}), 404) Paper.create_by_doi('10.1016/j.paid.2009.02.013') # Paper now found self.checkJson(self.postPage('api-paper-query', postargs=payload, postkwargs={'content_type': 'application/json'}), 200) self.checkJson(self.postPage('api-paper-query', postargs='{"doi":"10.1016/j.paid.2009.02.013"}', postkwargs={'content_type': 'application/json'}), 200)
def get_object(self, queryset=None): if queryset is None: queryset = self.get_queryset() pk = self.kwargs.get('pk', None) doi = self.kwargs.get('doi', None) if doi: doi = unquote(doi) doi = to_doi(doi) paper = None try: if pk is not None: paper = queryset.get(pk=pk) elif doi is not None: paper = Paper.get_by_doi(doi) else: raise Http404(_("Paper view expects a DOI or a pk")) except ObjectDoesNotExist: pass if not paper: paper = Paper.create_by_doi(doi) if paper is None or paper.is_orphan(): raise Http404( _("No %(verbose_name)s found matching the query") % {'verbose_name': Paper._meta.verbose_name}) if not paper.visible: raise Http404(_("This paper has been deleted.")) return paper
def test_publication_pdf_url(self): # This paper is gold OA p = Paper.create_by_doi('10.1007/BF02702259') p = Paper.from_bare(p) # so the pdf_url of the publication should be set self.assertEqual(p.publications[0].pdf_url.lower(), 'https://doi.org/10.1007/BF02702259'.lower())
def test_merge(self): # Get a paper with CrossRef metadata p = Paper.create_by_doi('10.1111/j.1744-6570.1953.tb01038.x') p = Paper.from_bare(p) # Create a copy with slight variations names = [BareName.create_bare(f, l) for (f, l) in [('M. H.', 'Jones'), ('R. H.', 'Haase'), ('S. F.', 'Hulbert')]] p2 = Paper.get_or_create( 'A Survey of the Literature on Technical Positions', names, date(year=2011, month=0o1, day=0o1)) # The two are not merged because of the difference in the title self.assertNotEqual(p, p2) # Fix the title of the second one p2.title = 'A Survey of the Literature on Job Analysis of Technical Positions' p2.save() # Check that the new fingerprint is equal to that of the first paper self.assertEqual(p2.new_fingerprint(), p.fingerprint) # and that the new fingerprint and the current differ self.assertNotEqual(p2.new_fingerprint(), p2.fingerprint) # and that the first paper matches its own shit self.assertEqual(Paper.objects.filter( fingerprint=p.fingerprint).first(), p) # The two papers should hence be merged together new_paper = p2.recompute_fingerprint_and_merge_if_needed() self.assertEqual(new_paper.pk, p.pk)
def test_unmerge_orcid_nones(self): # First, fetch a few DOIs dois = [ "10.1075/aicr.90.09ngo", "10.1075/aicr.90.04wad", ] for doi in dois: Paper.create_by_doi(doi) # Then, fetch an ORCID profile with a buggy version of the ORCID interface, which incorrectly merges papers together with patch.object(OrcidPaperSource, '_oai_id_for_doi') as mock_identifier: mock_identifier.return_value = "https://pub.orcid.org/v2.1/0000-0002-1909-134X/work/None" profile = OrcidProfileStub('0000-0002-1909-134X', instance='orcid.org') trung = Researcher.get_or_create_by_orcid('0000-0002-1909-134X', profile=profile) OrcidPaperSource().fetch_and_save(trung, profile=profile) # The two papers are incorrectly merged! papers = [Paper.get_by_doi(doi) for doi in dois] self.assertEqual(papers[0], papers[1]) # We unmerge them unmerge_orcid_nones() # The two papers are now distinct papers = [Paper.get_by_doi(doi) for doi in dois] self.assertTrue(papers[0] != papers[1])
def get_object(self, queryset=None): if queryset is None: queryset = self.get_queryset() pk = self.kwargs.get('pk', None) doi = self.kwargs.get('doi', None) if doi: doi = unquote(doi) doi = to_doi(doi) paper = None try: if pk is not None: paper = queryset.get(pk=pk) elif doi is not None: paper = Paper.get_by_doi(doi) else: raise Http404(_("Paper view expects a DOI or a pk")) except ObjectDoesNotExist: pass if not paper: paper = Paper.create_by_doi(doi) if paper is None or paper.is_orphan(): raise Http404(_("No %(verbose_name)s found matching the query") % {'verbose_name': Paper._meta.verbose_name}) if not paper.visible: raise Http404(_("This paper has been deleted.")) return paper
def test_create_by_doi(self): # we recapitalize the DOI to make sure it is treated in a # case-insensitive way internally p = Paper.create_by_doi('10.1109/sYnAsc.2010.88') p = Paper.from_bare(p) self.assertEqual( p.title, 'Monitoring and Support of Unreliable Services') self.assertEqual(p.publications[0].doi, '10.1109/synasc.2010.88')
def fetch_metadata_from_dois(self, ref_name, orcid_id, dois): crossref_papers = CrossRef.fetch_batch(dois) for paper, doi in zip(crossref_papers, dois): if paper is None: # We try with DOI resolver # This functions does check first on our database if we have an entry Paper.create_by_doi(doi) yield self._enhance_paper(paper, ref_name, orcid_id)
def unmerge_paper_by_dois(paper): """ Given a paper that was merged by mistake, delete it and re-create it with each DOI found in it. """ dois = [record.doi for record in paper.oairecords if record.doi] paper.delete() for doi in dois: Paper.create_by_doi(doi)
def test_create_by_identifier(self): # Paper has no date p = Paper.create_by_oai_id( 'ftciteseerx:oai:CiteSeerX.psu:10.1.1.487.869') self.assertEqual(p, None) # Valid paper p = Paper.create_by_oai_id( 'ftpubmed:oai:pubmedcentral.nih.gov:4131942') self.assertEqual(p.pdf_url, 'http://www.ncbi.nlm.nih.gov/pubmed/24806729')
def test_email_has_more_than_five_questions_with_three_questions(self): ''' Method will return true if 5 or more papers exist for a certain e-mail. E-Mail is passed as a string for a count query. Creating 3 papers. ''' sent_from = Sender(email="*****@*****.**", ip="10.10.50.50") sent_from.save() for x in range(3): paper = Paper(sender=sent_from, sent_to="*****@*****.**", active_until=timezone.now()) paper.save() self.assertEqual(email_has_more_than_five_questions_open('*****@*****.**'), False)
def test_publication_pdf_url(self): # This journal is open access romeo = RomeoAPIStub() journal = romeo.fetch_journal({'issn':'0250-6335'}) # Therefore any paper in it is available from the publisher p = Paper.create_by_doi('10.1007/BF02702259') p = Paper.from_bare(p) self.assertEqual(p.publications[0].journal, journal) # so the pdf_url of the publication should be set self.assertEqual(p.publications[0].pdf_url.lower( ), 'https://doi.org/10.1007/BF02702259'.lower())
def unmerge_paper_by_dois(paper): """ Given a paper that was merged by mistake, delete it and re-create it with each DOI found in it. """ dois = [record.doi for record in paper.oairecords if record.doi] paper.delete() for doi in dois: try: Paper.create_by_doi(doi) except ValueError: continue
def setUp(self): #Create test user but don't login. self.user = User.objects.create_user('test', '*****@*****.**', 'test') self.data = {'user': self.user, 'title': 'Test Title', 'url': 'http://example.com', 'journal': 'Journal of Test', 'year': '2011', 'volume': '1', 'authors': "Author One\nAuthor Two\nAuthor Three", 'issue': '2', 'pages': '3-4', } p = Paper(**self.data) #unpack dictionary to arguments p.save()
def test_get(self): paper, created = Paper.from_dict({"title": "Boring Science stuff"}) self.assertTrue(created) self.assertIsNotNone(paper.id) paper2 = Paper.get("boring science") self.assertIsNotNone(paper2) self.assertEquals(paper, paper2) paper2 = Paper.get(str(paper.id)) self.assertIsNotNone(paper2) self.assertEquals(paper, paper2)
def test_delete(self): paper, created = Paper.from_dict({"title": "Paper"}) self.assertTrue(created) self.assertIsNotNone(paper.id) deleted = paper.delete() self.assertIsNone(paper.id) self.assertEquals((1, {"papers.Paper": 1}), deleted) paper, created = Paper.from_dict( {"title": "Paper", "links": [{"url": "https://example.com"}]} ) self.assertTrue(created) self.assertIsNotNone(paper.id) deleted = paper.delete() self.assertIsNone(paper.id) self.assertEquals( (3, {"papers.Paper": 1, "papers.Paper_links": 1, "links.Link": 1}), deleted ) with NamedTemporaryFile() as f: f.write(b"Lorem ipsum dolorem") paper, created = Paper.from_dict( { "title": "Paper", "links": [{"url": "https://example.com"}], "files": [{"path": f.name}], } ) self.assertTrue(created) self.assertIsNotNone(paper.id) path = os.path.join( settings.MEDIA_ROOT, "papers", str(paper.pk), os.path.basename(f.name) ) self.assertEquals({"path": path}, paper.files.first().to_dict()) deleted = paper.delete() self.assertIsNone(paper.id) self.assertEquals( ( 4, { "papers.Paper": 1, "papers.Paper_links": 1, "links.Link": 1, "files.File": 1, }, ), deleted, ) self.assertFalse(os.path.exists(path))
def test_ingest_dump(self): doi = '10.1080/21645515.2017.1330236' p = Paper.create_by_doi(doi) self.assertEqual(p.pdf_url, None) Paper.create_by_doi(doi) # then load an OAdoi dump oadoi = OadoiAPI() oadoi.load_dump(os.path.join(self.testdir, 'data/sample_unpaywall_snapshot.jsonl.gz')) # the paper is now OA, yay! p = Paper.get_by_doi(doi) self.assertEqual(p.pdf_url, 'http://europepmc.org/articles/pmc5718814?pdf=render')
def handle(self, *args, **options): inBibtexFile = options["bibtex_file"] if not os.path.isfile(inBibtexFile): self.stdout.write( self.style.ERROR( "File '{}' does not exist.".format(inBibtexFile))) sys.exit() try: with open(inBibtexFile, encoding="utf-8") as bibtexFile: bibtexData = bibtexparser.load(bibtexFile) except: self.stdout.write( self.style.WARNING( "Failed reading file with UTF-8 encoding, atttempting to read as Latin-1." )) try: with open(inBibtexFile, encoding="ISO-8859-1") as bibtexFile: bibtexData = bibtexparser.load(bibtexFile) except: self.stdout.write( self.style.ERROR( "Failed reading file with either UTF-8 or Latin-1 encoding." )) sys.exit() bibWriter = bibtexparser.bwriter.BibTexWriter() bibWriter.contents = ["entries"] bibWriter.indent = " " for entry in bibtexData.entries: singleEntryBibDatabase = bibtexparser.bibdatabase.BibDatabase singleEntryBibDatabase.entries = [entry] bibtex = bibtexparser.dumps(singleEntryBibDatabase, bibWriter) link = None for linkKey in ["url", "URL", "doi", "DOI"]: if linkKey in entry: link = entry[linkKey] break if not link: link = 'https://www.google.com/search?q="{}"'.format( entry["title"]) paper = Paper(bibtex=bibtex, link=link) paper.save() self.stdout.write( self.style.SUCCESS(" -- Imported: {}".format(paper))) self.stdout.write( self.style.SUCCESS("Successfully imported {} papers.".format( len(bibtexData.entries))))
def create_oairecord(self, record): """ Given one line of the dump (represented as a dict), add it to the corresponding paper (if it exists) """ doi = to_doi(record['doi']) if not doi: return prefix = doi.split('/')[0] if prefix in free_doi_prefixes: return paper = Paper.get_by_doi(doi) if not paper: try: paper = Paper.create_by_doi(doi) except (MetadataSourceException, ValueError): return if not paper: print('no such paper for doi {doi}'.format(doi=doi)) return url = record['url'] # just to speed things up a bit... if paper.pdf_url == url: return identifier = 'oadoi:' + url source = self.oadoi_source if record['host_type'] == 'publisher': url = doi_to_url(doi) identifier = doi_to_crossref_identifier(doi) source = self.crossref_source record = BareOaiRecord(paper=paper, doi=doi, pubtype=paper.doctype, source=source, identifier=identifier, splash_url=url, pdf_url=record['url']) try: paper.add_oairecord(record) paper.update_availability() # TODO re-enable this #paper.update_index() except (DataError, ValueError): print('Record does not fit in the DB')
def api_paper_doi(request, doi): p = None try: p = Paper.get_by_doi(doi) if not p: p = Paper.create_by_doi(doi) except MetadataSourceException: pass if p is None: return JsonResponse({ 'error': 404, 'message': 'The paper you requested could not be found.', }, status=404) return api_paper_common(request, p)
def test_ingest_dump(self): doi = '10.1016/j.reval.2012.02.143' p = Paper.create_by_doi(doi) self.assertEqual(p.pdf_url, None) Paper.create_by_doi(doi) # then load an OAdoi dump oadoi = OadoiAPI() oadoi.load_dump('devutils/sample_oadoi_dump.csv.gz') # the paper is now OA, yay! p = Paper.get_by_doi(doi) self.assertEqual( p.pdf_url, 'http://prodinra.inra.fr/ft/CC06E77F-B3EE-4BD2-890D-067243B8ACAF')
def test_multiple_get_or_create(self): date = datetime.date(year=2003, month=4, day=9) paper = Paper.get_or_create('Beta-rays in black pudding', list(map(Name.lookup_name, [ ('F.', 'Rodrigo'), ('A.', 'Johnson'), ('Pete', 'Blunsom')])), date) paper2 = Paper.get_or_create('Beta-rays in black pudding', list(map(Name.lookup_name, [ ('Frank', 'Rodrigo'), ('A. L.', 'Johnson'), ('P.', 'Blunsom')])), date) self.assertEqual(paper.pk, paper2.pk) self.assertEqual(Paper.objects.get(pk=paper.pk).bare_author_names(), [('Frank', 'Rodrigo'), ('A. L.', 'Johnson'), ('Pete', 'Blunsom')])
def test_attributions_preserved_by_merge(self): p1 = Paper.create_by_doi('10.4049/jimmunol.167.12.6786') r1 = Researcher.create_by_name('Stephan', 'Hauschildt') p1.set_researcher(4, r1.id) p2 = Paper.create_by_doi('10.1016/j.chemgeo.2015.03.025') r2 = Researcher.create_by_name('Priscille', 'Lesne') p2.set_researcher(0, r2.id) # merge them ! even if they actually don't have anything # to do together p1.merge(p2) p1.check_authors() self.assertEqual(p1.researchers, [r2, r1])
def fetch_and_save_new_records(self, starting_cursor='*', batch_time=datetime.timedelta(days=1)): """ Fetches and stores all new Crossref records updated since the last update time of the associated OaiSource. """ source = OaiSource.objects.get(identifier='crossref') # We substract one day to 'until-update-date' parameter as it is inclusive one_day = datetime.timedelta(days=1) while source.last_update + batch_time < timezone.now(): last_updated = source.last_update until_date = (last_updated + batch_time - one_day).date() to_update = self.fetch_all_records(filters={ 'from-update-date': last_updated.date().isoformat(), 'until-update-date': until_date.isoformat() }, cursor=starting_cursor) for record in to_update: try: bare_paper = self.save_doi_metadata(record) p = Paper.from_bare(bare_paper) p.update_index() except ValueError as e: logger.info(record.get('DOI', 'unkown DOI') + ': %s' % e) logger.info("Updated up to" + until_date.isoformat()) source.last_update += batch_time source.save()
def test_ingest_dump(self): doi = '10.1080/21645515.2017.1330236' p = Paper.create_by_doi(doi) self.assertEqual(p.pdf_url, None) Paper.create_by_doi(doi) # then load an OAdoi dump oadoi = OadoiAPI() oadoi.load_dump( os.path.join(self.testdir, 'data/sample_unpaywall_snapshot.jsonl.gz')) # the paper is now OA, yay! p = Paper.get_by_doi(doi) self.assertEqual( p.pdf_url, 'http://europepmc.org/articles/pmc5718814?pdf=render')
def test_lncs(self): p = Paper.create_by_doi('10.1007/978-3-662-47666-6_5') r = self.dry_deposit(p, abstract = lorem_ipsum, license = self.lc) self.assertEqualOrLog(r.status, 'faked')
def test_create_by_doi(self): # we recapitalize the DOI to make sure it is treated in a # case-insensitive way internally p = Paper.create_by_doi('10.1109/sYnAsc.2010.88') assert p.title == 'Monitoring and Support of Unreliable Services' assert p.publications[0].doi == '10.1109/synasc.2010.88' print(p.publications[0].last_update)
def test_search(self): paper, created = Paper.from_dict({"title": "Boring Science stuff"}) self.assertTrue(created) self.assertIsNotNone(paper.id) paper, created = Paper.from_dict({"title": "Cool Science stuff"}) self.assertTrue(created) self.assertIsNotNone(paper.id) paper, created = Paper.from_dict({"title": "Weird Science"}) self.assertTrue(created) self.assertIsNotNone(paper.id) self.assertEquals(3, Paper.objects.all().count()) self.assertEquals(2, Paper.search("stuff").count()) self.assertEquals(3, Paper.search("science").count())
def test_refresh_deposit_status(self): # This is the identifier of a paper which should # currently be published on HAL preprod hal_id = 'hal-01211282' # First, fake the deposition of a paper p = Paper.create_by_doi('10.1109/lics.2015.37') r = OaiRecord.new( source=self.repo.oaisource, identifier='deposition:1:' + hal_id, splash_url='https://hal-preprod.archives-ouvertes.fr/' + hal_id, pdf_url=None, about=p) f = UploadedPDF.objects.create(user=self.user, orig_name='File.pdf', file=os.path.join( self.testdir, 'testdata/blank.pdf'), thumbnail='my_thumbnail.png') d = DepositRecord.objects.create(paper=p, oairecord=r, repository=self.repo, user=self.user, status='pending', identifier=hal_id, upload_type='postprint', file=f) self.proto.refresh_deposit_status(d) self.assertEqual(d.status, 'published') self.assertTrue(r.pdf_url)
def test_paper_already_in_hal_but_not_in_dissemin(self): """ In this case, Dissemin missed the paper on HAL (for some reason) and so the deposit interface was enabled. But HAL refuses the deposit! We have to give a good error message to the user. """ # this paper is currently in HAL-preprod p = Paper.create_by_doi('10.1051/jphys:01975003607-8060700') # this is just to make sure that we are depositing with # a single author (otherwise, the deposit would fail because # we are not providing enough affiliations). p.authors_list = [p.authors_list[0]] r = self.dry_deposit(p, abstract='this is an abstract', topic='INFO', depositing_author=0, affiliation=59704) # ENS # Deposit fails: a duplicate is found self.assertEqualOrLog(r.status, 'failed') # The error message should be specific self.assertTrue('already in HAL' in r.message)
def fetch_and_save(self, researcher, incremental=False): """ Fetch papers and save them to the database. :param incremental: When set to true, papers are clustered and commited one after the other. This is useful when papers are fetched on the fly for an user. """ if self.ccf is None: raise ValueError('Clustering context factory not provided') for p in self.fetch_bare(researcher): # Save the paper as non-bare p = Paper.from_bare(p) # If clustering happens incrementally, cluster the researcher if incremental: self.ccf.clusterPendingAuthorsForResearcher(researcher) researcher.update_stats() # Check whether this paper is associated with an ORCID id # for the target researcher if researcher.orcid: matches = filter(lambda a: a.orcid == researcher.orcid, p.authors) if matches: self.update_empty_orcid(researcher, False) if self.max_results is not None and count >= self.max_results: break
def test_merge(self, delete_user, django_user_model): u = django_user_model.objects.create(username='******', first_name='Stefan', last_name='Beck') r = Researcher.create_by_name('Stefan', 'Beck', email='*****@*****.**', homepage='https://becks.dissem.in', user=u) u2 = django_user_model.objects.create(username='******', first_name='Stefan', last_name='Beck') r2 = Researcher.create_by_name('Stefan', 'Beck', orcid='0000-0001-8187-9704', homepage='https://sbeck.dissem.in', user=u2) p = Paper.create_by_doi('10.17192/z2016.0217') p.set_researcher(0, r2.pk) p.save() r.merge(r2, delete_user=delete_user) p.refresh_from_db() r.refresh_from_db() assert r.name == r2.name assert r.email == '*****@*****.**' assert r.homepage == 'https://becks.dissem.in' assert r.orcid == r2.orcid assert p.authors_list[0]['researcher_id'] == r.pk with pytest.raises(Researcher.DoesNotExist): r2.refresh_from_db() if delete_user: with pytest.raises(django_user_model.DoesNotExist): u2.refresh_from_db() else: u2.refresh_from_db()
def test_refresh_deposit_status(self): # This is the identifier of a paper which should # currently be published on HAL preprod hal_id = 'hal-01211282' # First, fake the deposition of a paper p = Paper.create_by_doi('10.1109/lics.2015.37') r = OaiRecord.new(source=self.repo.oaisource, identifier='deposition:1:'+hal_id, splash_url='https://hal-preprod.archives-ouvertes.fr/'+hal_id, pdf_url=None, about=p) f = UploadedPDF.objects.create( user=self.user, orig_name='File.pdf', file=os.path.join(self.testdir, 'testdata/blank.pdf'), thumbnail='my_thumbnail.png') d = DepositRecord.objects.create( paper=p, oairecord=r, repository=self.repo, user=self.user, status='pending', identifier=hal_id, upload_type='postprint', file=f) self.proto.refresh_deposit_status(d) self.assertEqual(d.status, 'published') self.assertTrue(r.pdf_url)
def test_lncs(self): """ Submit a paper from LNCS """ p = Paper.create_by_doi('10.1007/978-3-662-47666-6_5') r = self.dry_deposit(p, abstract='this is a test abstract') self.assertEqual(r.status, 'faked')
def test_redirect_pdf(self, check_permanent_redirect): p = Paper.get_by_doi('10.1145/2767109.2767116') p.pdf_url = 'http://my.fantastic.repository/' p.save() check_permanent_redirect('paper-redirect-doi', kwargs={'doi': '10.1145/2767109.2767116'}, url=p.pdf_url)
def test_fetch(self): profile = OrcidProfileStub('0000-0002-8612-8827', instance='orcid.org') papers = list(self.source.fetch_papers(self.researcher, profile=profile)) for paper in papers: paper = Paper.from_bare(paper) self.assertTrue(len(papers) > 1) self.check_papers(papers)
def fetch_and_save_new_records(self, starting_cursor='*', batch_time=datetime.timedelta(days=1)): """ Fetches and stores all new Crossref records updated since the last update time of the associated OaiSource. """ source = OaiSource.objects.get(identifier='crossref') # We substract one day to 'until-update-date' parameter as it is inclusive one_day = datetime.timedelta(days=1) while source.last_update + batch_time < timezone.now(): last_updated = source.last_update until_date = (last_updated + batch_time - one_day).date() to_update = self.fetch_all_records( filters={'from-update-date':last_updated.date().isoformat(), 'until-update-date':until_date.isoformat()}, cursor=starting_cursor) for record in to_update: try: bare_paper = self.save_doi_metadata(record) p = Paper.from_bare(bare_paper) p.update_index() except ValueError as e: logger.info(record.get('DOI', 'unkown DOI') + ': %s' % e) logger.info("Updated up to" + until_date.isoformat()) source.last_update += batch_time source.save()
def create_paper(self, work): assert (not work.skipped) # Create paper authors, orcids = work.authors_and_orcids paper = BarePaper.create( work.title, authors, work.pubdate, visible=True, affiliations=None, orcids=orcids, ) record = BareOaiRecord(source=self.oai_source, identifier=work.api_uri, splash_url=work.splash_url, pubtype=work.pubtype) paper.add_oairecord(record) try: p = Paper.from_bare(paper) p = self.associate_researchers(p) p.save() p.update_index() except ValueError: p = None return p
def test_find_duplicate_records_invalid_url(self): paper = Paper.get_or_create( 'this is a title', [Name.lookup_name(('Jean', 'Saisrien'))], datetime.date(year=2015, month=0o5, day=0o4)) # This used to throw an exception OaiRecord.find_duplicate_records(paper, 'ftp://dissem.in/paper.pdf', None)
def test_create_match_fp(self): """ Addition of an OAI record when it is matched with an existing record by fingerprint. """ doi = '10.1016/j.crma.2012.10.021' oai_id = 'ftarxivpreprints:oai:arXiv.org:1112.6130' # first, make sure the paper isn't there already Paper.objects.filter(oairecord__doi=doi).delete() # create a paper from BASE cr_paper = Paper.create_by_doi(doi) # Save the existing records records = set(cr_paper.oairecords) # Create a new paper (refers to the same paper, but coming from # another source) new_paper = self.create(oai_id, 'base_dc') # the resulting paper has to be equal to the first one # (this does not check that all their attributes are equal, just # that they are the same row in the database, i.e. have same id) self.assertEqual(new_paper, cr_paper) # the new set of records is the old one plus the new record records.add(OaiRecord.objects.get(identifier=oai_id)) self.assertSetEqual(set(new_paper.oairecords), records)
def test_invisible_paper(self): """ If a paper is marked as invisible, then accessing it returns 404 """ p = Paper.create_by_doi('10.1007/978-3-642-14363-2_7') p.visible = False p.save() self.check404('paper', kwargs={'pk': p.id, 'slug': p.slug})
def test_unmerge_paper(self): # First we merge two unrelated papers p1 = Paper.create_by_doi("10.1016/j.bmc.2005.06.035") title1 = p1.title p2 = Paper.create_by_doi("10.1016/j.ijar.2017.06.011") title2 = p2.title p1.merge(p2) # Then we unmerge them unmerge_paper_by_dois(p1) # We have two new papers! p3 = Paper.get_by_doi("10.1016/j.bmc.2005.06.035") self.assertTrue(p3.id != p1.id) self.assertEqual(p3.title, title1) p4 = Paper.get_by_doi("10.1016/j.ijar.2017.06.011") self.assertTrue(p4.id != p1.id) self.assertTrue(p4.id != p3.id) self.assertEqual(p4.title, title2)
def test_500_error(self): with requests_mock.Mocker(real_http=True) as mocker: mocker.get(re.compile(r'.*\.zenodo\.org/.*'), status_code=500) p = Paper.create_by_doi('10.1007/978-3-662-47666-6_5') r = self.dry_deposit(p, abstract = lorem_ipsum, license = ZENODO_DEFAULT_LICENSE_CHOICE) self.assertEqual(r.status, 'failed')
def test_bibtex_formatting(self): dois_bibtex = { '10.1007/978-3-662-49214-7_4': '''@incollection{Tang2016, author = {Tang, Ruiming and Amarilli, Antoine and Senellart, Pierre and Bressan, Stéphane}, doi = {10.1007/978-3-662-49214-7_4}, journal = {Transactions on Large-Scale Data- and Knowledge-Centered Systems XXIV}, month = {jan}, pages = {116-138}, title = {A Framework for Sampling-Based XML Data Pricing}, url = {https://oadoi.org/10.1007/978-3-662-49214-7_4}, year = {2016} }''', '10.1145/3034786.3056121': '''@misc{Amarilli2017, author = {Amarilli, Antoine and Monet, Mikaël and Senellart, Pierre}, doi = {10.1145/3034786.3056121}, journal = {Proceedings of the 36th ACM SIGMOD-SIGACT-SIGAI Symposium on Principles of Database Systems - PODS '17}, month = {jan}, title = {Conjunctive Queries on Probabilistic Graphs: Combined Complexity}, url = {https://oadoi.org/10.1145/3034786.3056121}, year = {2017} }''', '10.1007/978-3-319-45856-4_22': '''@incollection{Amarilli2016, author = {Amarilli, Antoine and Maniu, Silviu and Monet, Mikaël}, doi = {10.1007/978-3-319-45856-4_22}, journal = {Lecture Notes in Computer Science}, month = {jan}, pages = {323-330}, title = {Challenges for Efficient Query Evaluation on Structured Probabilistic Data}, url = {https://oadoi.org/10.1007/978-3-319-45856-4_22}, year = {2016} }''', '10.1103/physrevapplied.11.024003': '''@misc{Verney2019, author = {Verney, Lucas and Lescanne, Raphaël and Devoret, Michel H. and Leghtas, Zaki and Mirrahimi, Mazyar}, doi = {10.1103/physrevapplied.11.024003}, journal = {Physical Review Applied}, month = {feb}, title = {Structural Instability of Driven Josephson Circuits Prevented by an Inductive Shunt}, url = {https://oadoi.org/10.1103/physrevapplied.11.024003}, volume = {11}, year = {2019} }''', } for doi, bibtex in dois_bibtex.items(): p = Paper.create_by_doi(doi) resp = self.getPage('api-paper-doi', args=[doi], getargs={'format': 'bibtex'}) self.assertEqual(resp.status_code, 200) self.assertEqual(resp.content.decode('utf-8').strip(), bibtex.strip()) resp = self.getPage('api-paper-pk', args=[p.id], getargs={'format': 'bibtex'}) self.assertEqual(resp.status_code, 200) self.assertEqual(resp.content.decode('utf-8').strip(), bibtex.strip())
def test_paper_by_doi_escaped(self): """ Automatically unescape DOIs, for issue https://github.com/dissemin/dissemin/issues/517 """ paper = Paper.create_by_doi('10.1175/1520-0426(2003)020<0383%3ARCAACO>2.0.CO%3B2') paper.save() self.checkPermanentRedirect('paper-doi', kwargs={'doi':'10.1175%2F1520-0426%282003%29020%3C0383%3ARCAACO%3E2.0.CO%3B2'}, url=paper.url)
def setUpClass(self): super(ProtocolTest, self).setUpClass() if self is ProtocolTest: raise unittest.SkipTest("Base test") if 'TRAVIS' in os.environ: raise unittest.SkipTest("Skipping deposit test on Travis to avoid mass submissions to sandboxes") self.p1 = Paper.get_or_create( "This is a test paper", [self.r1.name, self.r2.name, self.r4.name], date(year=2014,month=02,day=15))
def test_consolidate_paper(self): p = Paper.create_by_doi('10.1175/jas-d-15-0240.1') self.client.login(username='******', password='******') result = self.checkJson(self.getPage( 'ajax-waitForConsolidatedField', getargs={ 'field': 'abstract', 'id': p.id})) self.client.logout() self.assertTrue(result['success']) self.assertTrue(len(result['value']) > 10)
def test_fetch_dois(self): profile = OrcidProfileStub('0000-0001-6723-6833', instance='orcid.org') pboesu = Researcher.get_or_create_by_orcid('0000-0001-6723-6833', profile=profile) self.source.fetch_and_save(pboesu, profile=profile) doi = '10.3354/meps09890' p = Paper.get_by_doi(doi) dois_in_paper = [r.doi for r in p.oairecords] self.assertTrue(doi in dois_in_paper)
def test_get_form_initial_data(self): paper = Paper.create_by_doi('10.1007/978-3-662-47666-6_5') record = paper.oairecords[0] record_value = "Supercalifragilisticexpialidocious." record.description = record_value record.save() self.proto.init_deposit(paper, self.user) data = self.proto.get_form_initial_data() self.assertIsInstance(data, dict) self.assertEqual(data.get('abstract'), record_value)
def test_submit_deposit_nolicense(self): paper = Paper.create_by_doi('10.1007/978-3-662-47666-6_5') request = self.dry_deposit( paper, license='58fd62fcda3e2400012ca5cc', abstract='Higitus Figitus Migitus Mum.', subjects=['59552884da3e240081ba32de'], tags='Sword, King, Wizard') self.assertEqualOrLog(request.status, 'published')
def test_submit_deposit(self): paper = Paper.create_by_doi('10.1007/978-3-662-47666-6_5') request = self.dry_deposit( paper, license='58fd62fcda3e2400012ca5d3', abstract='Salagadoola menchicka boola bibbidi-bobbidi-boo.', subjects=['59552884da3e240081ba32de'], tags='Pumpkin, Mouse, Godmother') self.assertEqualOrLog(request.status, 'published')
def test_lncs(self): """ Same as test_lncs but with only one author """ p = Paper.create_by_doi('10.1007/978-3-319-63342-8_1') p.authors_list = [p.authors_list[0]] r = self.dry_deposit(p, abstract='this is an abstract', topic='INFO', depositing_author=0, affiliation=59704) # ENS self.assertEqualOrLog(r.status, 'faked')
def test_update_paper_statuses(self): p = self.cr_api.create_paper_by_doi("10.1016/j.bmc.2005.06.035") p = Paper.from_bare(p) self.assertEqual(p.pdf_url, None) pdf_url = 'https://www.youtube.com/watch?v=dQw4w9WgXcQ' OaiRecord.new(source=self.arxiv, identifier='oai:arXiv.org:aunrisste', about=p, splash_url='http://www.perdu.com/', pdf_url=pdf_url) update_paper_statuses() self.assertEqual(Paper.objects.get(pk=p.pk).pdf_url, pdf_url)
def test_keywords(self): """ Keywords are mandatory """ p = Paper.create_by_doi('10.1007/s00268-016-3429-x') p.authors_list = [p.authors_list[0]] r = self.dry_deposit(p, abstract='bla ble bli blo blu', topic='SDV', depositing_author=0, affiliation=128940) self.assertEqualOrLog(r.status, 'faked')
def test_journal_article(self): """ Submit a journal article """ p = Paper.create_by_doi('10.1016/j.agee.2004.10.001') p.authors_list = [p.authors_list[0]] r = self.dry_deposit(p, abstract='here is my great result', topic='SDV', depositing_author=0, affiliation=128940) self.assertEqualOrLog(r.status, 'faked')
def test_lics(self): """ Submit a paper from LICS (type: conference-proceedings) """ p = Paper.create_by_doi('10.1109/lics.2015.37') p.authors_list = [p.authors_list[0]] r = self.dry_deposit(p, abstract='here is my great result', topic='NLIN', depositing_author=0, affiliation=128940) self.assertEqualOrLog(r.status, 'faked')