def test_to_paper(self, container_title, title, citeproc): p = self.test_class.to_paper(citeproc) # Ensure that paper is in database (i.e. created) assert p.pk >= 1 # Check paper fields for author_p, author_c in zip(p.authors_list, citeproc['author']): assert author_p['name']['first'] == author_c['given'] assert author_p['name']['last'] == author_c['family'] assert author_p['affiliation'] == author_c['affiliation'][0][ 'name'] assert author_p['orcid'] == author_c['ORCID'] assert p.pubdate == date(*citeproc['issued']['date-parts'][0]) assert p.title == title # Ensure that oairecord is in database (i.e. created) r = OaiRecord.objects.get(about=p) # Check oairecord fields assert r.doi == citeproc['DOI'] assert r.identifier == doi_to_crossref_identifier(citeproc['DOI']) assert r.issue == citeproc['issue'] assert r.journal_title == container_title assert r.pages == citeproc['page'] assert r.pubdate == date(*citeproc['issued']['date-parts'][0]) assert r.publisher_name == citeproc['publisher'] assert r.source == OaiSource.objects.get(identifier='crossref') assert r.splash_url == doi_to_url(citeproc['DOI']) assert r.volume == citeproc['volume']
def redirect_by_doi(request, doi): """ This view is inherited from doai.io, migrated to this code base to preserve the existing behaviour. We could instead redirect to unpaywall, but that would not include ResearchGate urls. """ doi = unquote(doi) doi = to_doi(doi) if not doi: raise Http404(_("Invalid DOI.")) paper = Paper.get_by_doi(doi) if paper and paper.pdf_url: return HttpResponsePermanentRedirect(paper.pdf_url) return HttpResponsePermanentRedirect(doi_to_url(doi))
def create_oairecord(self, record): """ Given one line of the dump (represented as a dict), add it to the corresponding paper (if it exists) """ doi = to_doi(record['doi']) if not doi: return prefix = doi.split('/')[0] if prefix in free_doi_prefixes: return paper = Paper.get_by_doi(doi) if not paper: try: paper = Paper.create_by_doi(doi) except (MetadataSourceException, ValueError): return if not paper: print('no such paper for doi {doi}'.format(doi=doi)) return url = record['url'] # just to speed things up a bit... if paper.pdf_url == url: return identifier = 'oadoi:' + url source = self.oadoi_source if record['host_type'] == 'publisher': url = doi_to_url(doi) identifier = doi_to_crossref_identifier(doi) source = self.crossref_source record = BareOaiRecord(paper=paper, doi=doi, pubtype=paper.doctype, source=source, identifier=identifier, splash_url=url, pdf_url=record['url']) try: paper.add_oairecord(record) paper.update_availability() # TODO re-enable this #paper.update_index() except (DataError, ValueError): print('Record does not fit in the DB')
def citation_dict(self): """ Dictionary representation of the paper, for citation purposes, based on the internal model used by Python-bibtexparser. """ entry = { 'ENTRYTYPE': PAPER_TYPE_TO_BIBTEX.get(self.doctype, 'misc'), 'ID': ( '%s%s' % ( self.authors[0].name.last, self.pubdate.year ) ), 'title': self.title, 'author': ' and '.join([ '%s, %s' % (a.name.last, a.name.first) for a in self.authors ]) } for publi in self.publications[:3]: if publi.volume: entry['volume'] = publi.volume if publi.pages: entry['pages'] = publi.pages if publi.journal: entry['journal'] = publi.journal.title elif publi.journal_title: entry['journal'] = publi.journal_title if self.pubdate: entry['month'] = self.pubdate.strftime('%b').lower() entry['year'] = self.pubdate.strftime('%Y') if self.abstract: entry['abstract'] = self.abstract doi = self.get_doi() if self.pdf_url: entry['url'] = self.pdf_url if doi: entry['doi'] = doi if not self.pdf_url: entry['url'] = doi_to_url(doi) return entry
def test_get_oairecord_data(self, db, monkeypatch, container_title, issn, citeproc, journal): """ We do some assertions on the results, but relatively lax, as we test the called functions, too """ monkeypatch.setattr(Journal, 'find', lambda issn, title: journal) r = self.test_class._get_oairecord_data(citeproc) assert r['doi'] == citeproc['DOI'] assert r['description'] == citeproc['abstract'] assert r['identifier'] == doi_to_crossref_identifier(citeproc['DOI']) assert r['issn'] == issn assert r['issue'] == citeproc['issue'] assert r['journal'] == journal assert r['journal_title'] == container_title assert r['pages'] == citeproc['page'] assert r['pdf_url'] == '' # Is not OA assert r['pubdate'] == date(*citeproc['issued']['date-parts'][0]) assert r['publisher_name'] == citeproc['publisher'] assert r['pubtype'] == citeproc['type'] assert r['source'] == OaiSource.objects.get(identifier='crossref') assert r['splash_url'] == doi_to_url(citeproc['DOI']) assert r['volume'] == citeproc['volume']
def _get_oairecord_data(cls, data): """ :param data: citeproc metadata :returns: Returns a dict, ready to passed to a BarePaper instance :raises: CiteprocError """ doi = cls._get_doi(data) splash_url = doi_to_url(doi) licenses = data.get('licenses', []) pdf_url = cls._get_pdf_url(doi, licenses, splash_url) journal_title = cls._get_container(data) issn = cls._get_issn(data) journal = Journal.find(issn=issn, title=journal_title) publisher_name = data.get('publisher', '')[:512] publisher = cls._get_publisher(publisher_name, journal) bare_oairecord_data = { 'doi': doi, 'description': cls._get_abstract(data), 'identifier': doi_to_crossref_identifier(doi), 'issn': issn, 'issue': data.get('issue', ''), 'journal': journal, 'journal_title': journal_title, 'pages': data.get('page', ''), 'pdf_url': pdf_url, 'pubdate': cls._get_pubdate(data), 'publisher': publisher, 'publisher_name': publisher_name, 'pubtype': cls._get_pubtype(data), 'source': OaiSource.objects.get(identifier='crossref'), 'splash_url': splash_url, 'volume': data.get('volume', ''), } return bare_oairecord_data
def test_fallback(self): self.checkPermanentRedirect('paper-redirect-doi', kwargs={'doi': '10.1385/1592597998'}, url=doi_to_url('10.1385/1592597998'))
def test_fallback(self, check_permanent_redirect): check_permanent_redirect('paper-redirect-doi', kwargs={'doi': '10.1385/1592597998'}, url=doi_to_url('10.1385/1592597998'))
def _create_publication(paper, metadata): if not metadata: return if not metadata.get('container-title'): return doi = to_doi(metadata.get('DOI', None)) title = metadata['container-title'] if isinstance(title, list): title = title[0] title = title[:512] issn = metadata.get('ISSN', None) if issn and isinstance(issn, list): issn = issn[0] # TODO pass all the ISSN to the RoMEO interface volume = metadata.get('volume', None) pages = metadata.get('page', None) issue = metadata.get('issue', None) date_dict = metadata.get('issued', dict()) pubdate = None if 'date-parts' in date_dict: dateparts = date_dict.get('date-parts')[0] pubdate = date_from_dateparts(dateparts) # for instance it outputs dates like 2014-2-3 publisher_name = metadata.get('publisher', None) if publisher_name: publisher_name = publisher_name[:512] pubtype = metadata.get('type', 'unknown') pubtype = CROSSREF_PUBTYPE_ALIASES.get(pubtype, pubtype) splash_url = doi_to_url(doi) # PDF availability pdf_url = None licenses = set([(license or {}).get('URL') for license in metadata.get('license', [])]) doi_prefix = doi.split('/')[0] if doi_prefix in free_doi_prefixes or any(map(is_oa_license, licenses)): pdf_url = splash_url # Lookup journal search_terms = {'jtitle': title} if issn: search_terms['issn'] = issn journal = fetch_journal(search_terms) publisher = None if journal: publisher = journal.publisher AliasPublisher.increment(publisher_name, journal.publisher) else: publisher = fetch_publisher(publisher_name) barepub = BareOaiRecord( paper=paper, journal_title=title, issue=issue, volume=volume, pubdate=pubdate, pages=pages, doi=doi, pubtype=pubtype, publisher_name=publisher_name, journal=journal, publisher=publisher, pdf_url=pdf_url, splash_url=splash_url, source=OaiSource.objects.get(identifier='crossref'), identifier=doi_to_crossref_identifier(doi)) rec = paper.add_oairecord(barepub) paper.update_availability() return paper, rec
def test_doi_to_url(self): self.assertEqual(doi_to_url('10.1093/jhmas/xxxi.4.480'), 'https://doi.org/10.1093/jhmas/xxxi.4.480')
def _create_publication(paper, metadata): if not metadata: return if not metadata.get('container-title'): return doi = to_doi(metadata.get('DOI', None)) title = metadata['container-title'] if isinstance(title, list): title = title[0] title = title[:512] issn = metadata.get('ISSN', None) if issn and isinstance(issn, list): issn = issn[0] # TODO pass all the ISSN to the RoMEO interface volume = metadata.get('volume', None) pages = metadata.get('page', None) issue = metadata.get('issue', None) date_dict = metadata.get('issued', dict()) pubdate = None if 'date-parts' in date_dict: dateparts = date_dict.get('date-parts')[0] pubdate = date_from_dateparts(dateparts) # for instance it outputs dates like 2014-2-3 publisher_name = metadata.get('publisher', None) if publisher_name: publisher_name = publisher_name[:512] pubtype = metadata.get('type', 'unknown') pubtype = CROSSREF_PUBTYPE_ALIASES.get(pubtype, pubtype) splash_url = doi_to_url(doi) # PDF availability pdf_url = None licenses = set([(license or {}).get('URL') for license in metadata.get('license', [])]) doi_prefix = doi.split('/')[0] if doi_prefix in free_doi_prefixes or any(map(is_oa_license, licenses)): pdf_url = splash_url # Lookup journal journal = Journal.find(issn=issn, title=title) publisher = None if journal: publisher = journal.publisher AliasPublisher.increment(publisher_name, journal.publisher) else: publisher = Publisher.find(publisher_name) barepub = BareOaiRecord( paper=paper, journal_title=title, issue=issue, volume=volume, pubdate=pubdate, pages=pages, doi=doi, pubtype=pubtype, publisher_name=publisher_name, journal=journal, publisher=publisher, pdf_url=pdf_url, splash_url=splash_url, source=OaiSource.objects.get(identifier='crossref'), identifier=doi_to_crossref_identifier(doi)) rec = paper.add_oairecord(barepub) paper.update_availability() return paper, rec
def _create_publication(paper, metadata): if not metadata: return if not 'container-title' in metadata or not metadata['container-title']: return doi = to_doi(metadata.get('DOI',None)) title = metadata['container-title'] if type(title) == type([]): title = title[0] title = title[:512] issn = metadata.get('ISSN',None) if issn and type(issn) == type([]): issn = issn[0] # TODO pass all the ISSN to the RoMEO interface volume = metadata.get('volume',None) pages = metadata.get('page',None) issue = metadata.get('issue',None) date_dict = metadata.get('issued',dict()) pubdate = None if 'date-parts' in date_dict: dateparts = date_dict.get('date-parts')[0] pubdate = date_from_dateparts(dateparts) # for instance it outputs dates like 2014-2-3 publisher_name = metadata.get('publisher', None) if publisher_name: publisher_name = publisher_name[:512] pubtype = metadata.get('type','unknown') pubtype = CROSSREF_PUBTYPE_ALIASES.get(pubtype, pubtype) # PDF availability pdf_url = None licenses = set([(license or {}).get('URL') for license in metadata.get('license', [])]) if any(map(is_oa_license, licenses)): pdf_url = doi_to_url(doi) splash_url = doi_to_url(doi) # Lookup journal search_terms = {'jtitle':title} if issn: search_terms['issn'] = issn journal = fetch_journal(search_terms) publisher = None if journal: publisher = journal.publisher AliasPublisher.increment(publisher_name, journal.publisher) else: publisher = fetch_publisher(publisher_name) barepub = BareOaiRecord( paper=paper, journal_title=title, issue=issue, volume=volume, pubdate=pubdate, pages=pages, doi=doi, pubtype=pubtype, publisher_name=publisher_name, journal=journal, publisher=publisher, pdf_url=pdf_url, splash_url=splash_url, source=crossref_oai_source, identifier=doi_to_crossref_identifier(doi)) rec = paper.add_oairecord(barepub) paper.update_availability() return paper, rec
def create_oairecord(self, record, update_index=True, create_missing_dois=True): """ Given one line of the dump (represented as a dict), add it to the corresponding paper (if it exists) """ doi = to_doi(record['doi']) if not doi: return prefix = doi.split('/')[0] if prefix in free_doi_prefixes: return if not record.get('oa_locations'): return paper = Paper.get_by_doi(doi) if not paper: if not create_missing_dois: return try: paper = Paper.create_by_doi(doi) except (MetadataSourceException, ValueError): return if not paper: logger.info('no such paper for doi {doi}'.format(doi=doi)) return logger.info(doi) paper.cache_oairecords() for oa_location in record.get('oa_locations') or []: url = oa_location['url'] # just to speed things up a bit... if paper.pdf_url == url: return identifier='oadoi:'+url source = self.oadoi_source if oa_location['host_type'] == 'publisher': url = doi_to_url(doi) identifier = doi_to_crossref_identifier(doi) source = self.crossref_source record = BareOaiRecord( paper=paper, doi=doi, pubtype=paper.doctype, source=source, identifier=identifier, splash_url=url, pdf_url=oa_location['url']) try: # We disable checks by DOI since we know the paper has been looked up by DOI already. old_pdf_url = paper.pdf_url paper.add_oairecord(record, check_by_doi=False) super(Paper, paper).update_availability() if old_pdf_url != paper.pdf_url: paper.save() if update_index: paper.update_index() except (DataError, ValueError): logger.warning('Record does not fit in the DB')