Exemplo n.º 1
0
    def test_fetch_journal(self):
        terms = {'issn': '0001-3455'}
        orig_terms = terms.copy()
        journal = self.api.fetch_journal(terms)
        self.assertIsInstance(journal, Journal)
        self.assertEqual(journal.issn, terms['issn'])
        self.assertEqual(journal.essn, None) # Sadly RoMEO does not provide ESSNs vîa this API endpoint
        self.assertEqual(journal.publisher.last_updated, dateutil.parser.parse('2013-03-11T11:27:37Z'))
        self.assertEqual(terms, orig_terms)

        from_model = Journal.find(issn=terms['issn'])
        self.assertEqual(from_model, journal)
Exemplo n.º 2
0
    def test_fix_buggy_romeo_ids(self):
        """
        A long time ago, the SHERPA API returned "DOAJ" or "journal" as publisher id for
        some journals… so we need to update them appropriately.
        """
        publisher = Publisher(romeo_id='DOAJ', preprint='can', postprint='can', pdfversion='can')
        publisher.save()
        journal = Journal(issn='0013-9696', title='Greek Review of Social Research', publisher=publisher)
        journal.save()

        # mocked separately as a different endpoint is used
        with requests_mock.mock() as http_mocker:
            http_mocker.get('http://www.sherpa.ac.uk/downloads/journal-title-issns.php?format=tsv',
                content=self.journals_dump_response)

            with patch.object(Journal, 'change_publisher') as mock_change_publisher:
                self.api.fetch_all_journals()

                new_publisher = Journal.objects.get(issn='0013-9696').publisher

                mock_change_publisher.assert_not_called()

                self.assertEqual(new_publisher.pk, publisher.pk)
                self.assertEqual(new_publisher.romeo_id, '2201')
Exemplo n.º 3
0
    def _get_oairecord_data(cls, data):
        """
        :param data: citeproc metadata
        :returns: Returns a dict, ready to passed to a BarePaper instance
        :raises: CiteprocError
        """
        doi = cls._get_doi(data)
        splash_url = doi_to_url(doi)
        licenses = data.get('licenses', [])
        pdf_url = cls._get_pdf_url(doi, licenses, splash_url)

        journal_title = cls._get_container(data)
        issn = cls._get_issn(data)
        journal = Journal.find(issn=issn, title=journal_title)

        publisher_name = data.get('publisher', '')[:512]
        publisher = cls._get_publisher(publisher_name, journal)

        bare_oairecord_data = {
            'doi': doi,
            'description': cls._get_abstract(data),
            'identifier': doi_to_crossref_identifier(doi),
            'issn': issn,
            'issue': data.get('issue', ''),
            'journal': journal,
            'journal_title': journal_title,
            'pages': data.get('page', ''),
            'pdf_url': pdf_url,
            'pubdate': cls._get_pubdate(data),
            'publisher': publisher,
            'publisher_name': publisher_name,
            'pubtype': cls._get_pubtype(data),
            'source': OaiSource.objects.get(identifier='crossref'),
            'splash_url': splash_url,
            'volume': data.get('volume', ''),
        }

        return bare_oairecord_data
Exemplo n.º 4
0
    def test_find(self):
        j1 = Journal(title='Journal of Synthetic Disillusion',
                     issn=None,
                     essn='1234-0707',
                     publisher=self.publisher)
        j1.save()
        j2 = Journal(title='Slackline Review',
                     issn='4353-2894',
                     essn=None,
                     publisher=self.publisher)
        j2.save()

        self.assertEqual(Journal.find(title='Slackline Review'), j2)
        self.assertEqual(Journal.find(title='slackline review'), j2)
        # We look for ISSN and ESSN in both fields, because they could easily be swapped!
        self.assertEqual(Journal.find(issn='1234-0707'), j1)
        self.assertEqual(Journal.find(essn='1234-0707'), j1)
        self.assertEqual(Journal.find(issn='4353-2894'), j2)
        self.assertEqual(Journal.find(essn='4353-2894'), j2)
        self.assertEqual(Journal.find(title='nonsense'), None)
Exemplo n.º 5
0
def _create_publication(paper, metadata):
    if not metadata:
        return
    if not metadata.get('container-title'):
        return
    doi = to_doi(metadata.get('DOI', None))

    title = metadata['container-title']
    if isinstance(title, list):
        title = title[0]
    title = title[:512]

    issn = metadata.get('ISSN', None)
    if issn and isinstance(issn, list):
        issn = issn[0]  # TODO pass all the ISSN to the RoMEO interface
    volume = metadata.get('volume', None)
    pages = metadata.get('page', None)
    issue = metadata.get('issue', None)
    date_dict = metadata.get('issued', dict())
    pubdate = None
    if 'date-parts' in date_dict:
        dateparts = date_dict.get('date-parts')[0]
        pubdate = date_from_dateparts(dateparts)
    # for instance it outputs dates like 2014-2-3
    publisher_name = metadata.get('publisher', None)
    if publisher_name:
        publisher_name = publisher_name[:512]

    pubtype = metadata.get('type', 'unknown')
    pubtype = CROSSREF_PUBTYPE_ALIASES.get(pubtype, pubtype)
    splash_url = doi_to_url(doi)

    # PDF availability
    pdf_url = None
    licenses = set([(license or {}).get('URL')
                    for license in metadata.get('license', [])])
    doi_prefix = doi.split('/')[0]
    if doi_prefix in free_doi_prefixes or any(map(is_oa_license, licenses)):
        pdf_url = splash_url

    # Lookup journal
    journal = Journal.find(issn=issn, title=title)

    publisher = None
    if journal:
        publisher = journal.publisher
        AliasPublisher.increment(publisher_name, journal.publisher)
    else:
        publisher = Publisher.find(publisher_name)

    barepub = BareOaiRecord(
            paper=paper,
            journal_title=title,
            issue=issue,
            volume=volume,
            pubdate=pubdate,
            pages=pages,
            doi=doi,
            pubtype=pubtype,
            publisher_name=publisher_name,
            journal=journal,
            publisher=publisher,
            pdf_url=pdf_url,
            splash_url=splash_url,
            source=OaiSource.objects.get(identifier='crossref'),
            identifier=doi_to_crossref_identifier(doi))
    rec = paper.add_oairecord(barepub)
    paper.update_availability()
    return paper, rec
Exemplo n.º 6
0
def _create_publication(paper, metadata):
    if not metadata:
        return
    if not metadata.get('container-title'):
        return
    doi = to_doi(metadata.get('DOI', None))

    title = metadata['container-title']
    if isinstance(title, list):
        title = title[0]
    title = title[:512]

    issn = metadata.get('ISSN', None)
    if issn and isinstance(issn, list):
        issn = issn[0]  # TODO pass all the ISSN to the RoMEO interface
    volume = metadata.get('volume', None)
    pages = metadata.get('page', None)
    issue = metadata.get('issue', None)
    date_dict = metadata.get('issued', dict())
    pubdate = None
    if 'date-parts' in date_dict:
        dateparts = date_dict.get('date-parts')[0]
        pubdate = date_from_dateparts(dateparts)
    # for instance it outputs dates like 2014-2-3
    publisher_name = metadata.get('publisher', None)
    if publisher_name:
        publisher_name = publisher_name[:512]

    pubtype = metadata.get('type', 'unknown')
    pubtype = CROSSREF_PUBTYPE_ALIASES.get(pubtype, pubtype)
    splash_url = doi_to_url(doi)

    # PDF availability
    pdf_url = None
    licenses = set([(license or {}).get('URL')
                    for license in metadata.get('license', [])])
    doi_prefix = doi.split('/')[0]
    if doi_prefix in free_doi_prefixes or any(map(is_oa_license, licenses)):
        pdf_url = splash_url

    # Lookup journal
    journal = Journal.find(issn=issn, title=title)

    publisher = None
    if journal:
        publisher = journal.publisher
        AliasPublisher.increment(publisher_name, journal.publisher)
    else:
        publisher = Publisher.find(publisher_name)

    barepub = BareOaiRecord(
        paper=paper,
        journal_title=title,
        issue=issue,
        volume=volume,
        pubdate=pubdate,
        pages=pages,
        doi=doi,
        pubtype=pubtype,
        publisher_name=publisher_name,
        journal=journal,
        publisher=publisher,
        pdf_url=pdf_url,
        splash_url=splash_url,
        source=OaiSource.objects.get(identifier='crossref'),
        identifier=doi_to_crossref_identifier(doi))
    rec = paper.add_oairecord(barepub)
    paper.update_availability()
    return paper, rec
Exemplo n.º 7
0
class TestCiteproc():
    """
    This class groups tests about the Citeproc class
    """

    test_class = Citeproc

    @pytest.mark.parametrize('url, expected', is_oai_license_params)
    def test_is_oa_license(self, url, expected):
        assert self.test_class.is_oa_license(url) == expected

    @pytest.mark.usefixtures('db')
    def test_to_paper(self, container_title, title, citeproc):
        p = self.test_class.to_paper(citeproc)
        # Ensure that paper is in database (i.e. created)
        assert p.pk >= 1
        # Check paper fields
        for author_p, author_c in zip(p.authors_list, citeproc['author']):
            assert author_p['name']['first'] == author_c['given']
            assert author_p['name']['last'] == author_c['family']
            assert author_p['affiliation'] == author_c['affiliation'][0][
                'name']
            assert author_p['orcid'] == author_c['ORCID']
        assert p.pubdate == date(*citeproc['issued']['date-parts'][0])
        assert p.title == title
        # Ensure that oairecord is in database (i.e. created)
        r = OaiRecord.objects.get(about=p)
        # Check oairecord fields
        assert r.doi == citeproc['DOI']
        assert r.identifier == doi_to_crossref_identifier(citeproc['DOI'])
        assert r.issue == citeproc['issue']
        assert r.journal_title == container_title
        assert r.pages == citeproc['page']
        assert r.pubdate == date(*citeproc['issued']['date-parts'][0])
        assert r.publisher_name == citeproc['publisher']
        assert r.source == OaiSource.objects.get(identifier='crossref')
        assert r.splash_url == doi_to_url(citeproc['DOI'])
        assert r.volume == citeproc['volume']

    @pytest.mark.parametrize('mock_function',
                             ['_get_oairecord_data', '_get_paper_data'])
    def test_to_paper_invalid_data(self, monkeypatch, mock_function, citeproc):
        """
        If data is invalid, i.e. metadata is corrupted, somethings missing or so, must raise exception
        """
        def raise_citeproc_error(*args, **kwargs):
            raise CiteprocError

        monkeypatch.setattr(self.test_class, mock_function,
                            raise_citeproc_error)
        with pytest.raises(CiteprocError):
            self.test_class.to_paper(citeproc)

    def test_to_paper_no_data(self):
        """
        If no data, must raise CiteprocError
        """
        with pytest.raises(CiteprocError):
            self.test_class.to_paper(None)

    @pytest.mark.parametrize('name, expected', convert_to_name_pair_list)
    def test_convert_to_name_pair(self, name, expected):
        """
        Test if name pairing works
        """
        assert self.test_class._convert_to_name_pair(name) == expected

    @pytest.mark.parametrize('author_elem, expected',
                             [(dict(), None),
                              ({
                                  'affiliation': [{
                                      'name': 'Porto'
                                  }]
                              }, 'Porto'),
                              ({
                                  'affiliation': [{
                                      'name': 'Porto'
                                  }, {
                                      'name': 'Lissabon'
                                  }]
                              }, 'Porto')])
    def test_get_affiliation(self, author_elem, expected):
        """
        Must return the first affiliation if any
        """
        assert self.test_class._get_affiliation(author_elem) == expected

    def test_get_abstract(self, citeproc):
        """
        Abstract must be set
        """
        assert self.test_class._get_abstract(citeproc) == citeproc['abstract']

    def test_get_abstact_missing(self, citeproc):
        """
        If no abstract, assert blank
        """
        del citeproc['abstract']
        assert self.test_class._get_abstract(citeproc) == ''

    def test_get_abstract_escaping(self, citeproc):
        """
        Must do some escaping, e.g. we sometimes get some jats tags
        """
        # We wrap the current abstract into some jats
        expected = citeproc['abstract']
        citeproc['abstract'] = r'<jats:p>{}<\/jats:p>'.format(expected)
        assert self.test_class._get_abstract(citeproc) == expected

    def test_get_affiliations(self, affiliations, citeproc):
        """
        Must have the same length as citeproc['author'] and identical to list of affiliations
        """
        r = self.test_class._get_affiliations(citeproc)
        assert len(r) == len(citeproc.get('author'))
        assert r == affiliations

    def test_get_affiliations_no_authors(self, citeproc):
        """
        Must rais exception
        """
        del citeproc['author']
        with pytest.raises(CiteprocAuthorError):
            self.test_class._get_affiliations(citeproc)

    def test_get_authors(self, citeproc):
        """
        The list of authors shall be a list of BareNames
        """
        r = self.test_class._get_authors(citeproc)
        assert isinstance(r, list)
        for barename in r:
            assert isinstance(barename, BareName)

    def test_get_authors_empty_list(self, citeproc):
        """
        The list of authors must not be empty
        """
        citeproc['author'] = []
        with pytest.raises(CiteprocAuthorError):
            self.test_class._get_authors(citeproc)

    def test_get_authors_no_list(self, citeproc):
        """
        author in citeproc must be a list
        """
        del citeproc['author']
        with pytest.raises(CiteprocAuthorError):
            self.test_class._get_authors(citeproc)

    def test_get_authors_invalid_author(self, monkeypatch, citeproc):
        """
        If 'None' is an entry, raise exception
        """
        # We mock the function and let it return None, so that name_pairs is a list of None
        monkeypatch.setattr(self.test_class, '_convert_to_name_pair',
                            lambda x: None)
        with pytest.raises(CiteprocAuthorError):
            self.test_class._get_authors(citeproc)

    def test_get_container(self, container_title, citeproc):
        """
        Must return container title
        """
        assert self.test_class._get_container(citeproc) == container_title

    def test_get_container_missing(self):
        """
        Must return exception
        """
        with pytest.raises(CiteprocContainerTitleError):
            self.test_class._get_container(dict())

    def test_get_doi(self, citeproc):
        """
        Must return the DOI
        """
        assert self.test_class._get_doi(citeproc) == citeproc['DOI']

    def test_get_doi_invalid(self):
        """
        Must raise exception
        """
        with pytest.raises(CiteprocDOIError):
            self.test_class._get_doi({'DOI': 'spanish inquisition'})

    def test_get_doi_missing(self):
        """
        Must raise exception
        """
        with pytest.raises(CiteprocDOIError):
            self.test_class._get_doi(dict())

    @pytest.mark.parametrize('issn, expected', [('1234-5675', '1234-5675'),
                                                ([
                                                    '1234-5675',
                                                ], '1234-5675'), ([], '')])
    def test_get_issn(self, citeproc, issn, expected):
        """
        Must return the issn or ''
        """
        citeproc['ISSN'] = issn
        assert self.test_class._get_issn(citeproc) == expected

    def test_get_issn_missing(self, citeproc):
        """
        Must return ''
        """
        del citeproc['ISSN']
        assert self.test_class._get_issn(citeproc) == ''

    @pytest.mark.usefixtures('mock_alias_publisher_increment',
                             'mock_journal_find', 'mock_publisher_find')
    @pytest.mark.parametrize('journal', [Journal(publisher=Publisher()), None])
    def test_get_oairecord_data(self, db, monkeypatch, container_title, issn,
                                citeproc, journal):
        """
        We do some assertions on the results, but relatively lax, as we test the called functions, too
        """
        monkeypatch.setattr(Journal, 'find', lambda issn, title: journal)
        r = self.test_class._get_oairecord_data(citeproc)
        assert r['doi'] == citeproc['DOI']
        assert r['description'] == citeproc['abstract']
        assert r['identifier'] == doi_to_crossref_identifier(citeproc['DOI'])
        assert r['issn'] == issn
        assert r['issue'] == citeproc['issue']
        assert r['journal'] == journal
        assert r['journal_title'] == container_title
        assert r['pages'] == citeproc['page']
        assert r['pdf_url'] == ''  # Is not OA
        assert r['pubdate'] == date(*citeproc['issued']['date-parts'][0])
        assert r['publisher_name'] == citeproc['publisher']
        assert r['pubtype'] == citeproc['type']
        assert r['source'] == OaiSource.objects.get(identifier='crossref')
        assert r['splash_url'] == doi_to_url(citeproc['DOI'])
        assert r['volume'] == citeproc['volume']

    @pytest.mark.usefixtures('mock_journal_find', 'mock_publisher_find')
    def test_get_oairecord_data_missing(self, monkeypatch, container_title,
                                        issn, citeproc):
        """
        Some fields may be empty, namely those with a direct get call
        """
        keys = ['abstract', 'issue', 'publisher', 'page', 'volume']
        for k in keys:
            del citeproc[k]
        r = self.test_class._get_oairecord_data(citeproc)
        keys = ['description', 'issue', 'publisher_name', 'pages', 'volume']
        for k in keys:
            assert r[k] == ''

    @pytest.mark.parametrize('orcid, expected',
                             [({
                                 'ORCID': '0000-0001-8187-9704'
                             }, '0000-0001-8187-9704'),
                              ({
                                  'ORCID': '0000-0001-8187-9705'
                              }, None), ({}, None)])
    def test_get_orcid(self, orcid, expected):
        """
        Must be valid or None
        """
        assert self.test_class._get_orcid(orcid) == expected

    def test_get_orcids(self, orcids, citeproc):
        """
        Must have the same length as citeproc['author'] and identical to list of  orcid
        """
        r = self.test_class._get_orcids(citeproc)
        assert len(r) == len(citeproc.get('author'))
        assert r == orcids

    def test_get_orcid_no_authors(self, citeproc):
        """
        Must rais exception
        """
        del citeproc['author']
        with pytest.raises(CiteprocAuthorError):
            self.test_class._get_orcids(citeproc)

    def test_get_paper_data(self, affiliations, orcids, title, citeproc):
        """
        We do some assertions on the results, but relatively lax, as we test the called functions, too
        """
        r = self.test_class._get_paper_data(citeproc)
        assert r['affiliations'] == affiliations
        for a in r['author_names']:
            assert isinstance(a, BareName)
        assert r['orcids'] == orcids
        assert r['pubdate'] == date(*citeproc['issued']['date-parts'][0])
        assert r['title'] == title

    @pytest.mark.parametrize('doi', [True, False])
    @pytest.mark.parametrize('license', [True, False])
    def test_get_pdf_url(self, monkeypatch, doi, license):
        """
        Must return true or false
        """
        monkeypatch.setattr(self.test_class, '_is_oa_by_doi', lambda x: doi)
        monkeypatch.setattr(self.test_class, '_is_oa_by_license',
                            lambda x: license)
        url = 'https://repository.dissem.in/entry/3242/document.pdf'
        r = self.test_class._get_pdf_url(doi, license, url)
        if doi or license:
            assert r == url
        else:
            assert r == ''

    def test_get_pubdate_issued(self, citeproc):
        """
        If contains issued, take this
        """
        citeproc['created'] = {'date-parts': [[2019, 10, 11]]}
        citeproc['deposited'] = {'date-parts': [[2019, 10, 12]]}
        assert self.test_class._get_pubdate(citeproc) == date(
            *citeproc['issued']['date-parts'][0])

    def test_get_pubdate_created(self, citeproc):
        """
        If contains no issued, take created
        """
        del citeproc['issued']
        citeproc['created'] = {'date-parts': [[2019, 10, 11]]}
        citeproc['deposited'] = {'date-parts': [[2019, 10, 12]]}
        assert self.test_class._get_pubdate(citeproc) == date(
            *citeproc['created']['date-parts'][0])

    def test_get_pubdate_deposited(self, citeproc):
        """
        If contains no issued and created, take deposited
        """
        del citeproc['issued']
        citeproc['deposited'] = {'date-parts': [[2019, 10, 12]]}
        assert self.test_class._get_pubdate(citeproc) == date(
            *citeproc['deposited']['date-parts'][0])

    def test_get_pubdate_no_date(self, citeproc):
        """
        If contains no date, raise exception
        """
        del citeproc['issued']
        with pytest.raises(CiteprocDateError):
            self.test_class._get_pubdate(citeproc)

    def test_get_pubdate_received_none(self, monkeypatch):
        """
        If no valid date is found, raise exception
        """
        monkeypatch.setattr(self.test_class, '_parse_date', lambda x: None)
        with pytest.raises(CiteprocDateError):
            self.test_class._get_pubdate(dict())

    @pytest.mark.usefixtures('mock_alias_publisher_increment')
    def test_get_publisher_by_journal(self):
        """
        Must return Publisher object
        """
        publisher = Publisher()
        journal = Journal(publisher=publisher)
        assert self.test_class._get_publisher('p_name', journal) == publisher

    def test_get_publisher_by_name(self, monkeypatch):
        """
        Must return publisher object
        """
        publisher = Publisher()
        monkeypatch.setattr(Publisher, 'find', lambda x: publisher)
        assert self.test_class._get_publisher('p_name', None) == publisher

    def test_get_pubtype(self):
        """
        Must return something from PAPER_TYPES
        """
        pubtype = 'book'
        assert self.test_class._get_pubtype({'type': pubtype}) == pubtype

    def test_get_pubtype_strange(self):
        """
        Must return other
        """
        assert self.test_class._get_pubtype({'type':
                                             'spanish inquisition'}) == 'other'

    def test_get_pubtype_missing(self):
        """
        Must raise exception
        """
        with pytest.raises(CiteprocPubtypeError):
            self.test_class._get_pubtype(dict())

    def test_get_title(self, citeproc):
        r = self.test_class._get_title(citeproc)
        assert r == citeproc['title'][:1024]
        assert len(r) <= 1024

    def test_get_title_length(self, citeproc):
        """
        Title must no be longer than 1024 chars
        """
        citeproc['title'] = 'x' * 2000
        r = self.test_class._get_title(citeproc)
        assert r == citeproc['title'][:1024]
        assert len(r) <= 1024

    def test_get_title_length_with_unicode(self, citeproc):
        citeproc['title'] = '–' * 1024
        r = self.test_class._get_title(citeproc)
        assert r == citeproc['title'][:341]
        assert len(r) <= 1024

    def test_get_title_no_title(self, citeproc):
        """
        Title is mandatory
        """
        del citeproc['title']
        with pytest.raises(CiteprocTitleError):
            self.test_class._get_title(citeproc)

    def test_get_title_emtpy_string(self, citeproc):
        """
        If no title is found, expect CiteprocTitleError
        """
        citeproc['title'] = ''
        with pytest.raises(CiteprocTitleError):
            self.test_class._get_title(citeproc)

    @pytest.mark.parametrize('doi, expected', [('10.2195/spam', True),
                                               ('10.15122/spam', False)])
    def test_is_oa_by_doi(self, doi, expected):
        """
        Must be true or false
        """
        assert self.test_class._is_oa_by_doi(doi) == expected

    @pytest.mark.parametrize('licenses, expected',
                             [([{
                                 'URL': 'creativecommons.org/licenses/'
                             }], True),
                              ([{
                                  'URL': 'https://dissem.in/not_free'
                              }], False), ([{}], False), ([], False)])
    def test_is_oa_by_license(self, licenses, expected):
        """
        Must be true or false
        """
        assert self.test_class._is_oa_by_license(licenses) == expected

    @pytest.mark.parametrize('data, expected', [({
        'date-parts': [[2019, 10, 10]]
    }, date(2019, 10, 10)), ({
        'raw': '2019-10-10'
    }, date(2019, 10, 10)), (None, None), ({
        'spam': 'ham'
    }, None)])
    def test_parse_date(self, data, expected):
        """
        Must return a valid date or None
        """
        assert self.test_class._parse_date(data) == expected

    @pytest.mark.parametrize('date_parts, expected', [([
        2019,
    ], date(2019, 1, 1)), ([
        2019,
        10,
    ], date(2019, 10, 1)), ([2019, 10, 10], date(2019, 10, 10))])
    def test_parse_date_parts(self, date_parts, expected):
        """
        Must parse the date list
        """
        assert self.test_class._parse_date_parts(date_parts) == expected
Exemplo n.º 8
0
    def test_find(self):
        j1 = Journal(title='Journal of Synthetic Disillusion',
                     issn=None,
                     essn='1234-0707',
                     publisher=self.publisher)
        j1.save()
        j2 = Journal(title='Slackline Review',
                     issn='4353-2894',
                     essn=None,
                     publisher=self.publisher)
        j2.save()

        self.assertEqual(Journal.find(title='Slackline Review'), j2)
        self.assertEqual(Journal.find(title='slackline review'), j2)
        # We look for ISSN and ESSN in both fields, because they could easily be swapped!
        self.assertEqual(Journal.find(issn='1234-0707'), j1)
        self.assertEqual(Journal.find(essn='1234-0707'), j1)
        self.assertEqual(Journal.find(issn='4353-2894'), j2)
        self.assertEqual(Journal.find(essn='4353-2894'), j2)
        self.assertEqual(Journal.find(title='nonsense'), None)
Exemplo n.º 9
0
def fetch_journal(search_terms, matching_mode='exact'):
    """
    Fetch the journal data from RoMEO. Returns an Journal object.
    search_terms should be a dictionnary object containing at least one of these fields:
    """
    allowed_fields = ['issn', 'jtitle']
    terms = search_terms.copy()
    # Make the title HTML-safe before searching for it in the database or in
    # the API
    if 'title' in terms:
        terms['title'] = kill_html(terms['title'])

    # Check the arguments
    if not all(key in allowed_fields for key in terms):
        raise ValueError('The search terms have to belong to ' +
                         str(allowed_fields) + 'but the dictionary I got is ' +
                         str(terms))

    # Remove diacritics (because it has to be sent in ASCII to ROMEO)
    for key in terms:
        terms[key] = remove_diacritics(terms[key])
        if len(terms[key]) > 256:
            return None

    # First check we don't have it already
    journal = find_journal_in_model(terms)
    if journal:
        return journal

    # Perform the query
    if matching_mode != 'exact':
        terms['qtype'] = matching_mode
    root = perform_romeo_query(terms)

    # Find the matching journals (if any)
    journals = list(root.findall('./journals/journal'))

    if not journals:
        return None
    elif len(journals) > 1:
        print("Warning, " + str(len(journals)) +
              " journals match the RoMEO request, " +
              "defaulting to the first one")
        # TODO different behaviour: get the ISSN and try again.
    journal = journals[0]

    names = list(journal.findall('./jtitle'))
    if not names:
        raise MetadataSourceException(
            'RoMEO returned a journal without title.\n' + 'Terms were: ' +
            unicode(terms))
    if len(names) > 1:
        print("Warning, " + str(len(names)) +
              " names provided for one journal, " +
              "defaulting to the first one")
    name = kill_html(names[0].text)

    issn = None
    try:
        issn = nstrip(journal.findall('./issn')[0].text)
    except (KeyError, IndexError):
        pass

    # Now we may have additional info, so it's worth trying again in the model
    model_journal = find_journal_in_model({'issn': issn, 'jtitle': name})
    if model_journal:
        return model_journal

    # Otherwise we need to find the publisher
    publishers = root.findall('./publishers/publisher')
    if not publishers:
        return None
    # TODO here we shouldn't default to the first one but look it up using the
    # <romeopub>
    publisher_desc = publishers[0]

    publisher = get_or_create_publisher(publisher_desc)

    result = Journal(title=name, issn=issn, publisher=publisher)
    result.save()
    return result