Пример #1
0
 def test_to_paper(self, container_title, title, citeproc):
     p = self.test_class.to_paper(citeproc)
     # Ensure that paper is in database (i.e. created)
     assert p.pk >= 1
     # Check paper fields
     for author_p, author_c in zip(p.authors_list, citeproc['author']):
         assert author_p['name']['first'] == author_c['given']
         assert author_p['name']['last'] == author_c['family']
         assert author_p['affiliation'] == author_c['affiliation'][0][
             'name']
         assert author_p['orcid'] == author_c['ORCID']
     assert p.pubdate == date(*citeproc['issued']['date-parts'][0])
     assert p.title == title
     # Ensure that oairecord is in database (i.e. created)
     r = OaiRecord.objects.get(about=p)
     # Check oairecord fields
     assert r.doi == citeproc['DOI']
     assert r.identifier == doi_to_crossref_identifier(citeproc['DOI'])
     assert r.issue == citeproc['issue']
     assert r.journal_title == container_title
     assert r.pages == citeproc['page']
     assert r.pubdate == date(*citeproc['issued']['date-parts'][0])
     assert r.publisher_name == citeproc['publisher']
     assert r.source == OaiSource.objects.get(identifier='crossref')
     assert r.splash_url == doi_to_url(citeproc['DOI'])
     assert r.volume == citeproc['volume']
Пример #2
0
def redirect_by_doi(request, doi):
    """
    This view is inherited from doai.io, migrated to this code base
    to preserve the existing behaviour. We could instead
    redirect to unpaywall, but that would not include ResearchGate urls.
    """
    doi = unquote(doi)
    doi = to_doi(doi)
    if not doi:
        raise Http404(_("Invalid DOI."))
    paper = Paper.get_by_doi(doi)
    if paper and paper.pdf_url:
        return HttpResponsePermanentRedirect(paper.pdf_url)
    return HttpResponsePermanentRedirect(doi_to_url(doi))
Пример #3
0
    def create_oairecord(self, record):
        """
        Given one line of the dump (represented as a dict),
        add it to the corresponding paper (if it exists)
        """
        doi = to_doi(record['doi'])
        if not doi:
            return
        prefix = doi.split('/')[0]
        if prefix in free_doi_prefixes:
            return

        paper = Paper.get_by_doi(doi)
        if not paper:
            try:
                paper = Paper.create_by_doi(doi)
            except (MetadataSourceException, ValueError):
                return
            if not paper:
                print('no such paper for doi {doi}'.format(doi=doi))
                return

        url = record['url']

        # just to speed things up a bit...
        if paper.pdf_url == url:
            return

        identifier = 'oadoi:' + url
        source = self.oadoi_source

        if record['host_type'] == 'publisher':
            url = doi_to_url(doi)
            identifier = doi_to_crossref_identifier(doi)
            source = self.crossref_source

        record = BareOaiRecord(paper=paper,
                               doi=doi,
                               pubtype=paper.doctype,
                               source=source,
                               identifier=identifier,
                               splash_url=url,
                               pdf_url=record['url'])
        try:
            paper.add_oairecord(record)
            paper.update_availability()
            # TODO re-enable this
            #paper.update_index()
        except (DataError, ValueError):
            print('Record does not fit in the DB')
Пример #4
0
def redirect_by_doi(request, doi):
    """
    This view is inherited from doai.io, migrated to this code base
    to preserve the existing behaviour. We could instead
    redirect to unpaywall, but that would not include ResearchGate urls.
    """
    doi = unquote(doi)
    doi = to_doi(doi)
    if not doi:
        raise Http404(_("Invalid DOI."))
    paper = Paper.get_by_doi(doi)
    if paper and paper.pdf_url:
        return HttpResponsePermanentRedirect(paper.pdf_url)
    return HttpResponsePermanentRedirect(doi_to_url(doi))
Пример #5
0
    def citation_dict(self):
        """
        Dictionary representation of the paper, for citation purposes, based on
        the internal model used by Python-bibtexparser.
        """
        entry = {
            'ENTRYTYPE': PAPER_TYPE_TO_BIBTEX.get(self.doctype, 'misc'),
            'ID': (
                '%s%s' % (
                    self.authors[0].name.last,
                    self.pubdate.year
                )
            ),
            'title': self.title,
            'author': ' and '.join([
                '%s, %s' % (a.name.last, a.name.first)
                for a in self.authors
            ])
        }

        for publi in self.publications[:3]:
            if publi.volume:
                entry['volume'] = publi.volume
            if publi.pages:
                entry['pages'] = publi.pages
            if publi.journal:
                entry['journal'] = publi.journal.title
            elif publi.journal_title:
                entry['journal'] = publi.journal_title

        if self.pubdate:
            entry['month'] = self.pubdate.strftime('%b').lower()
            entry['year'] = self.pubdate.strftime('%Y')

        if self.abstract:
            entry['abstract'] = self.abstract

        doi = self.get_doi()
        if self.pdf_url:
            entry['url'] = self.pdf_url
        if doi:
            entry['doi'] = doi
            if not self.pdf_url:
                entry['url'] = doi_to_url(doi)

        return entry
Пример #6
0
 def test_get_oairecord_data(self, db, monkeypatch, container_title, issn,
                             citeproc, journal):
     """
     We do some assertions on the results, but relatively lax, as we test the called functions, too
     """
     monkeypatch.setattr(Journal, 'find', lambda issn, title: journal)
     r = self.test_class._get_oairecord_data(citeproc)
     assert r['doi'] == citeproc['DOI']
     assert r['description'] == citeproc['abstract']
     assert r['identifier'] == doi_to_crossref_identifier(citeproc['DOI'])
     assert r['issn'] == issn
     assert r['issue'] == citeproc['issue']
     assert r['journal'] == journal
     assert r['journal_title'] == container_title
     assert r['pages'] == citeproc['page']
     assert r['pdf_url'] == ''  # Is not OA
     assert r['pubdate'] == date(*citeproc['issued']['date-parts'][0])
     assert r['publisher_name'] == citeproc['publisher']
     assert r['pubtype'] == citeproc['type']
     assert r['source'] == OaiSource.objects.get(identifier='crossref')
     assert r['splash_url'] == doi_to_url(citeproc['DOI'])
     assert r['volume'] == citeproc['volume']
Пример #7
0
    def _get_oairecord_data(cls, data):
        """
        :param data: citeproc metadata
        :returns: Returns a dict, ready to passed to a BarePaper instance
        :raises: CiteprocError
        """
        doi = cls._get_doi(data)
        splash_url = doi_to_url(doi)
        licenses = data.get('licenses', [])
        pdf_url = cls._get_pdf_url(doi, licenses, splash_url)

        journal_title = cls._get_container(data)
        issn = cls._get_issn(data)
        journal = Journal.find(issn=issn, title=journal_title)

        publisher_name = data.get('publisher', '')[:512]
        publisher = cls._get_publisher(publisher_name, journal)

        bare_oairecord_data = {
            'doi': doi,
            'description': cls._get_abstract(data),
            'identifier': doi_to_crossref_identifier(doi),
            'issn': issn,
            'issue': data.get('issue', ''),
            'journal': journal,
            'journal_title': journal_title,
            'pages': data.get('page', ''),
            'pdf_url': pdf_url,
            'pubdate': cls._get_pubdate(data),
            'publisher': publisher,
            'publisher_name': publisher_name,
            'pubtype': cls._get_pubtype(data),
            'source': OaiSource.objects.get(identifier='crossref'),
            'splash_url': splash_url,
            'volume': data.get('volume', ''),
        }

        return bare_oairecord_data
Пример #8
0
 def test_fallback(self):
     self.checkPermanentRedirect('paper-redirect-doi', kwargs={'doi': '10.1385/1592597998'},
         url=doi_to_url('10.1385/1592597998'))
Пример #9
0
 def test_fallback(self, check_permanent_redirect):
     check_permanent_redirect('paper-redirect-doi',
                              kwargs={'doi': '10.1385/1592597998'},
                              url=doi_to_url('10.1385/1592597998'))
Пример #10
0
def _create_publication(paper, metadata):
    if not metadata:
        return
    if not metadata.get('container-title'):
        return
    doi = to_doi(metadata.get('DOI', None))

    title = metadata['container-title']
    if isinstance(title, list):
        title = title[0]
    title = title[:512]

    issn = metadata.get('ISSN', None)
    if issn and isinstance(issn, list):
        issn = issn[0]  # TODO pass all the ISSN to the RoMEO interface
    volume = metadata.get('volume', None)
    pages = metadata.get('page', None)
    issue = metadata.get('issue', None)
    date_dict = metadata.get('issued', dict())
    pubdate = None
    if 'date-parts' in date_dict:
        dateparts = date_dict.get('date-parts')[0]
        pubdate = date_from_dateparts(dateparts)
    # for instance it outputs dates like 2014-2-3
    publisher_name = metadata.get('publisher', None)
    if publisher_name:
        publisher_name = publisher_name[:512]

    pubtype = metadata.get('type', 'unknown')
    pubtype = CROSSREF_PUBTYPE_ALIASES.get(pubtype, pubtype)
    splash_url = doi_to_url(doi)

    # PDF availability
    pdf_url = None
    licenses = set([(license or {}).get('URL')
                    for license in metadata.get('license', [])])
    doi_prefix = doi.split('/')[0]
    if doi_prefix in free_doi_prefixes or any(map(is_oa_license, licenses)):
        pdf_url = splash_url

    # Lookup journal
    search_terms = {'jtitle': title}
    if issn:
        search_terms['issn'] = issn
    journal = fetch_journal(search_terms)

    publisher = None
    if journal:
        publisher = journal.publisher
        AliasPublisher.increment(publisher_name, journal.publisher)
    else:
        publisher = fetch_publisher(publisher_name)

    barepub = BareOaiRecord(
            paper=paper,
            journal_title=title,
            issue=issue,
            volume=volume,
            pubdate=pubdate,
            pages=pages,
            doi=doi,
            pubtype=pubtype,
            publisher_name=publisher_name,
            journal=journal,
            publisher=publisher,
            pdf_url=pdf_url,
            splash_url=splash_url,
            source=OaiSource.objects.get(identifier='crossref'),
            identifier=doi_to_crossref_identifier(doi))
    rec = paper.add_oairecord(barepub)
    paper.update_availability()
    return paper, rec
Пример #11
0
 def test_doi_to_url(self):
      self.assertEqual(doi_to_url('10.1093/jhmas/xxxi.4.480'),
                       'https://doi.org/10.1093/jhmas/xxxi.4.480')
Пример #12
0
def _create_publication(paper, metadata):
    if not metadata:
        return
    if not metadata.get('container-title'):
        return
    doi = to_doi(metadata.get('DOI', None))

    title = metadata['container-title']
    if isinstance(title, list):
        title = title[0]
    title = title[:512]

    issn = metadata.get('ISSN', None)
    if issn and isinstance(issn, list):
        issn = issn[0]  # TODO pass all the ISSN to the RoMEO interface
    volume = metadata.get('volume', None)
    pages = metadata.get('page', None)
    issue = metadata.get('issue', None)
    date_dict = metadata.get('issued', dict())
    pubdate = None
    if 'date-parts' in date_dict:
        dateparts = date_dict.get('date-parts')[0]
        pubdate = date_from_dateparts(dateparts)
    # for instance it outputs dates like 2014-2-3
    publisher_name = metadata.get('publisher', None)
    if publisher_name:
        publisher_name = publisher_name[:512]

    pubtype = metadata.get('type', 'unknown')
    pubtype = CROSSREF_PUBTYPE_ALIASES.get(pubtype, pubtype)
    splash_url = doi_to_url(doi)

    # PDF availability
    pdf_url = None
    licenses = set([(license or {}).get('URL')
                    for license in metadata.get('license', [])])
    doi_prefix = doi.split('/')[0]
    if doi_prefix in free_doi_prefixes or any(map(is_oa_license, licenses)):
        pdf_url = splash_url

    # Lookup journal
    journal = Journal.find(issn=issn, title=title)

    publisher = None
    if journal:
        publisher = journal.publisher
        AliasPublisher.increment(publisher_name, journal.publisher)
    else:
        publisher = Publisher.find(publisher_name)

    barepub = BareOaiRecord(
            paper=paper,
            journal_title=title,
            issue=issue,
            volume=volume,
            pubdate=pubdate,
            pages=pages,
            doi=doi,
            pubtype=pubtype,
            publisher_name=publisher_name,
            journal=journal,
            publisher=publisher,
            pdf_url=pdf_url,
            splash_url=splash_url,
            source=OaiSource.objects.get(identifier='crossref'),
            identifier=doi_to_crossref_identifier(doi))
    rec = paper.add_oairecord(barepub)
    paper.update_availability()
    return paper, rec
Пример #13
0
def _create_publication(paper, metadata):
    if not metadata:
        return
    if not 'container-title' in metadata or not metadata['container-title']:
        return
    doi = to_doi(metadata.get('DOI',None))

    title = metadata['container-title']
    if type(title) == type([]):
        title = title[0]
    title = title[:512]

    issn = metadata.get('ISSN',None)
    if issn and type(issn) == type([]):
        issn = issn[0] # TODO pass all the ISSN to the RoMEO interface
    volume = metadata.get('volume',None)
    pages = metadata.get('page',None)
    issue = metadata.get('issue',None)
    date_dict = metadata.get('issued',dict())
    pubdate = None
    if 'date-parts' in date_dict:
        dateparts = date_dict.get('date-parts')[0]
        pubdate = date_from_dateparts(dateparts)
    # for instance it outputs dates like 2014-2-3
    publisher_name = metadata.get('publisher', None)
    if publisher_name:
        publisher_name = publisher_name[:512]

    pubtype = metadata.get('type','unknown')
    pubtype = CROSSREF_PUBTYPE_ALIASES.get(pubtype, pubtype)

    # PDF availability
    pdf_url = None
    licenses = set([(license or {}).get('URL') for license in metadata.get('license', [])])
    if any(map(is_oa_license, licenses)):
        pdf_url = doi_to_url(doi)

    splash_url = doi_to_url(doi)

    # Lookup journal
    search_terms = {'jtitle':title}
    if issn:
        search_terms['issn'] = issn
    journal = fetch_journal(search_terms)

    publisher = None
    if journal:
        publisher = journal.publisher
        AliasPublisher.increment(publisher_name, journal.publisher)
    else:
        publisher = fetch_publisher(publisher_name)

    barepub = BareOaiRecord(
            paper=paper,
            journal_title=title,
            issue=issue,
            volume=volume,
            pubdate=pubdate,
            pages=pages,
            doi=doi,
            pubtype=pubtype,
            publisher_name=publisher_name,
            journal=journal,
            publisher=publisher,
            pdf_url=pdf_url,
            splash_url=splash_url,
            source=crossref_oai_source,
            identifier=doi_to_crossref_identifier(doi))
    rec = paper.add_oairecord(barepub)
    paper.update_availability()
    return paper, rec
Пример #14
0
    def create_oairecord(self, record, update_index=True, create_missing_dois=True):
        """
        Given one line of the dump (represented as a dict),
        add it to the corresponding paper (if it exists)
        """
        doi = to_doi(record['doi'])
        if not doi:
            return
        prefix = doi.split('/')[0]
        if prefix in free_doi_prefixes:
            return
        if not record.get('oa_locations'):
            return

        paper = Paper.get_by_doi(doi)
        if not paper:
            if not create_missing_dois:
                return
            try:
                paper = Paper.create_by_doi(doi)
            except (MetadataSourceException, ValueError):
                return
            if not paper:
                logger.info('no such paper for doi {doi}'.format(doi=doi))
                return
        logger.info(doi)
        paper.cache_oairecords()

        for oa_location in record.get('oa_locations') or []:
            url = oa_location['url']

            # just to speed things up a bit...
            if paper.pdf_url == url:
                return

            identifier='oadoi:'+url
            source = self.oadoi_source

            if oa_location['host_type'] == 'publisher':
                url = doi_to_url(doi)
                identifier = doi_to_crossref_identifier(doi)
                source = self.crossref_source

            record = BareOaiRecord(
                paper=paper,
                doi=doi,
                pubtype=paper.doctype,
                source=source,
                identifier=identifier,
                splash_url=url,
                pdf_url=oa_location['url'])
            try:
                # We disable checks by DOI since we know the paper has been looked up by DOI already.
                old_pdf_url = paper.pdf_url
                paper.add_oairecord(record, check_by_doi=False)
                super(Paper, paper).update_availability()
                if old_pdf_url != paper.pdf_url:
                    paper.save()
                    if update_index:
                        paper.update_index()
            except (DataError, ValueError):
                logger.warning('Record does not fit in the DB')