Пример #1
0
 def setUpClass(self):
     super(StatisticsTest, self).setUpClass()
     self.ccf.clear()
     crps = CrossRefPaperSource(self.ccf)
     oai = OaiPaperSource(self.ccf)
     crps.fetch_and_save(self.r2, incremental=True)
     oai.fetch_and_save(self.r2, incremental=True)
Пример #2
0
 def setUp(self):
     oaisource = OaiSource.objects.get(identifier='hal')
     self.oai = OaiPaperSource(oaisource)
     self.testdir = os.path.dirname(os.path.abspath(__file__))
     base_oaisource = OaiSource.objects.get(identifier='base')
     base_oaisource.endpoint = 'https://some_endpoint'
     self.base_oai = OaiPaperSource(base_oaisource)
Пример #3
0
 def setUpClass(self):
     super(StatisticsTest, self).setUpClass()
     self.ccf.clear()
     crps = CrossRefPaperSource(self.ccf)
     oai = OaiPaperSource(self.ccf)
     crps.fetch_and_save(self.r2, incremental=True)
     oai.fetch_and_save(self.r2, incremental=True)
Пример #4
0
def update_oai_sources():
    """
    Fetches new and updated records from all configured OAI sources since
    their last update.
    """
    for source in OaiSource.objects.filter(endpoint__isnull=False):
        oai = OaiPaperSource(source)
        oai.ingest(source.last_update.replace(tzinfo=None), metadataPrefix='base_dc')
        source.last_update = datetime.now()
        source.save()
Пример #5
0
def update_oai_sources():
    """
    Fetches new and updated records from all configured OAI sources since
    their last update.
    """
    for source in OaiSource.objects.filter(endpoint__isnull=False):
        oai = OaiPaperSource(source)
        oai.ingest(source.last_update.replace(tzinfo=None),
                   metadataPrefix='base_dc')
        source.last_update = datetime.now()
        source.save()
Пример #6
0
 def test_preprint(self):
     """
     Submit a preprint
     """
     oai = OaiPaperSource(self.oaisource)
     oai.add_translator(BASEDCTranslator(self.oaisource))
     p = oai.create_paper_by_identifier('ftarxivpreprints:oai:arXiv.org:1207.2079', 'base_dc')
     p.authors_list = [p.authors_list[0]]
     r = self.dry_deposit(p,
          abstract='here is my great result',
          topic='SDV',
          depositing_author=0,
          affiliation=128940)
     self.assertEqualOrLog(r.status, 'faked')
Пример #7
0
 def test_preprint(self):
     """
     Submit a preprint
     """
     oai = OaiPaperSource(endpoint='http://doai.io/oai')
     oai.add_translator(BASEDCTranslator())
     p = oai.create_paper_by_identifier('ftarxivpreprints:oai:arXiv.org:1207.2079', 'base_dc')
     p.authors_list = [p.authors_list[0]]
     r = self.dry_deposit(p,
          abstract='here is my great result',
          topic='SDV',
          depositing_author=0,
          affiliation=128940)
     self.assertEqualOrLog(r.status, 'faked')
Пример #8
0
 def test_bad_journal_article(self):
     """
     Submit something that pretends to be a journal article,
     but for which we fail to find publication metadata.
     The interface should fall back on something lighter.
     """
     oai = OaiPaperSource(self.oaisource)
     oai.add_translator(BASEDCTranslator(self.oaisource))
     p = oai.create_paper_by_identifier(
         'ftalborguniv:oai:pure.atira.dk:openaire/30feea10-9c2f-11db-8ed6-000ea68e967b',
         'base_dc')
     p.authors_list = [p.authors_list[0]]
     p.doctype = 'journal-article'
     p.save()
     r = self.dry_deposit(p,
         abstract='hey you, yes you',
         topic='SDV',
         depositing_author=0,
         affiliation=128940)
     self.assertEqualOrLog(r.status, 'faked')
Пример #9
0
 def test_bad_journal_article(self):
     """
     Submit something that pretends to be a journal article,
     but for which we fail to find publication metadata.
     The interface should fall back on something lighter.
     """
     oai = OaiPaperSource(self.oaisource)
     oai.add_translator(BASEDCTranslator(self.oaisource))
     p = oai.create_paper_by_identifier(
         'ftalborguniv:oai:pure.atira.dk:openaire/30feea10-9c2f-11db-8ed6-000ea68e967b',
         'base_dc')
     p.authors_list = [p.authors_list[0]]
     p.doctype = 'journal-article'
     p.save()
     r = self.dry_deposit(p,
                          abstract='hey you, yes you',
                          topic='SDV',
                          depositing_author=0,
                          affiliation=128940)
     self.assertEqualOrLog(r.status, 'faked')
Пример #10
0
 def setUp(self):
     self.oai = OaiPaperSource(endpoint='http://doai.io/oai')
     self.oai.add_translator(BASEDCTranslator())
     self.oai.add_translator(OAIDCTranslator())
     self.oai.add_translator(CiteprocTranslator())
Пример #11
0
class OaiTest(TestCase):
    def setUp(self):
        self.oai = OaiPaperSource(endpoint='http://doai.io/oai')
        self.oai.add_translator(BASEDCTranslator())
        self.oai.add_translator(OAIDCTranslator())
        self.oai.add_translator(CiteprocTranslator())

    def create(self, *args, **kwargs):
        # Shortcut for the tests
        return self.oai.create_paper_by_identifier(*args, **kwargs)

    def delete(self, identifier):
        try:
            r = OaiRecord.objects.get(identifier=identifier)
            p = r.about
            r.delete()
            if p.is_orphan():
                p.delete()
        except OaiRecord.DoesNotExist:
            pass

    def test_create_no_match(self):
        """
        Creation of a paper from an OAI record,
        when the paper does not exist yet.
        """
        oai_id = 'ftunivsavoie:oai:HAL:hal-01063697v1'

        # first, make sure the paper isn't there already
        self.delete(oai_id)
        # create a paper from BASE
        hal_paper = self.create(oai_id, 'base_dc')
        self.assertEqual(len(hal_paper.oairecords), 1)
        self.assertNotEqual(hal_paper.pdf_url, None)
        self.assertEqual(hal_paper.fingerprint, hal_paper.new_fingerprint())

    def test_create_already_existing(self):
        """
        Creation of a paper from an OAI record,
        when the exact same OAI record already exists.
        """
        # TODO we could repeat this for various papers
        oai_id = 'ftccsdartic:oai:hal.archives-ouvertes.fr:hal-00830421'

        # first, make sure the paper isn't there already
        self.delete(oai_id)
        # create a paper from BASE
        hal_paper = self.create(oai_id, 'base_dc')

        # Create it again!
        new_paper = self.create(oai_id, 'base_dc')

        # It's the same thing!
        self.assertEqual(new_paper, hal_paper)
        self.assertSetEqual(set(new_paper.oairecords),
                            set(hal_paper.oairecords))
        self.assertListEqual(new_paper.bare_author_names(),
                             hal_paper.bare_author_names())
        self.assertEqual(new_paper.title, hal_paper.title)

    def test_create_match_fp(self):
        """
        Addition of an OAI record when it is matched
        with an existing record by fingerprint.
        """
        first_id = 'oai:crossref.org:10.1016/j.crma.2012.10.021'
        second_id = 'ftarxivpreprints:oai:arXiv.org:1112.6130'

        # first, make sure the paper isn't there already
        self.delete(first_id)
        # create a paper from BASE
        cr_paper = self.create(first_id, 'citeproc')

        # Save the existing records
        records = set(cr_paper.oairecords)
        # Create a new paper (refers to the same paper, but coming from
        # another source)
        new_paper = self.create(second_id, 'base_dc')
        # the resulting paper has to be equal to the first one
        # (this does not check that all their attributes are equal, just
        # that they are the same row in the database, i.e. have same id)
        self.assertEqual(new_paper, cr_paper)
        # the new set of records is the old one plus the new record
        records.add(OaiRecord.objects.get(identifier=second_id))
        self.assertSetEqual(set(new_paper.oairecords), records)

    @unittest.expectedFailure
    def test_create_incomplete_metadata(self):
        """
        When we are trying to create a new paper for an
        incomplete OAI record (in this case, a publication date is
        missing). Ideally we would still like to match it with the
        first paper via fingerprint, to add the relevant url.
        """
        first_id = 'ftccsdartic:oai:hal.archives-ouvertes.fr:hal-00939473'
        second_id = 'ftciteseerx:oai:CiteSeerX.psu:10.1.1.487.869'

        # first, make sure the paper isn't there already
        self.delete(first_id)
        # create a paper from BASE
        cr_paper = self.create(first_id, 'citeproc')

        # Save the existing records
        records = set(cr_paper.oairecords)
        # Create a new paper (refers to the same paper, but coming from
        # another source)
        new_paper = self.create(second_id, 'base_dc')
        # the resulting paper has to be equal to the first one
        # (this does not check that all their attributes are equal, just
        # that they are the same row in the database, i.e. have same id)
        self.assertEqual(new_paper, cr_paper)
        # the new set of records is the old one plus the new record
        records.add(OaiRecord.objects.get(identifier=second_id))
        self.assertSetEqual(set(new_paper.oairecords), records)

    def test_create_match_doi(self):
        """
        Addition of an OAI record when it is matched
        to an existing paper by DOI
        """
        first_id = 'ftunivmacedonia:oai:dspace.lib.uom.gr:2159/6240'
        second_id = 'oai:crossref.org:10.1111/j.1574-0862.2005.00325.x'

        # first, make sure the paper isn't there already
        self.delete(first_id)
        # Create a paper from BASE
        first = self.create(first_id, 'base_dc')

        self.assertEqual(first.oairecords[0].doi,
                         '10.1111/j.1574-0862.2005.00325.x')
        records = set(first.oairecords)
        new_paper = self.create(second_id, 'citeproc')

        # Make sure that, if a merge happens, the oldest
        # paper remains (otherwise we create broken links!)
        self.assertEqual(first, new_paper)

        records.add(OaiRecord.objects.get(identifier=second_id))
        self.assertEqual(set(new_paper.oairecords), records)

    def test_update_pdf_url(self):
        """
        Two OAI records share the same splash URL, but
        the second one has a pdf_url. We should add the PDF
        url to the existing OAI record (merge the two records).
        """
        # first, make sure the paper isn't there already
        self.delete('oai:crossref.org:10.1007/s10858-015-9994-8')
        # Create a paper from Crossref
        first = self.create('oai:crossref.org:10.1007/s10858-015-9994-8',
                            'citeproc')
        # initially the PDF url should be empty
        self.assertEqual(first.oairecords[0].pdf_url, None)

        # then we import a new identifier
        new_paper = self.create('ftspringeroc:10.1007/s10858-015-9994-8',
                                'base_dc')
        self.assertEqual(first, new_paper)

        # no new record should be created
        self.assertEqual(len(new_paper.oairecords), 1)
        self.assertNotEqual(new_paper.oairecords[0].pdf_url, None)

    def test_create_match_identifier(self):
        """
        An OAI record with the same identifier already
        exists but it has been already merged before with
        another paper with a different fingerprint.
        """
        identifier = 'ftccsdartic:oai:hal.archives-ouvertes.fr:hal-00939473'

        # create a paper from BASE
        hal_paper = self.create(identifier, 'base_dc')
        records = set(hal_paper.oairecords)
        # change its fingerprint
        hal_paper.title += ' babebibobu'
        hal_paper.fingerprint = hal_paper.new_fingerprint()
        hal_paper.save()

        # add the OAI record again
        new_paper = self.create(identifier, 'base_dc')
        self.assertEqual(new_paper, hal_paper)
        self.assertSetEqual(records, set(new_paper.oairecords))

    def test_create_invalid_metadata(self):
        """
        Metadata that we don't accept
        """
        identifiers = [
            # Contributors too long to fit in the db
            ('ftbnfgallica:oai:bnf.fr:gallica/ark:/12148/btv1b8621766k',
             'base_dc'),
            # No authors
            ('ftcarmelhelios:oai:archive.library.cmu.edu:heinz:box00200/fld00021/bdl0031/doc0001/',
             'base_dc'),
            # Bad publication date
            ('ftsehiruniv:oai:earsiv.sehir.edu.tr:11498/28266', 'base_dc'),
        ]
        for i, f in identifiers:
            self.assertEqual(self.create(i, f), None)

    def test_create_invalid_identifier(self):
        """
        Fetching an invalid identifier from OAI
        """
        with self.assertRaises(BadArgumentError):
            self.create('aiunrsecauiebleuiest', 'oai_dc')

    def test_create_invalid_format(self):
        """
        Fetching with an invalid format from OAI
        """
        # Format not available from the interface
        with self.assertRaises(BadArgumentError):
            self.create('aiunrsecauiebleuiest', 'unknown_format')

    # tests of particular translators
    # TODO: translate them as tests of the translators and not the
    # whole backend?

    def test_base_doctype(self):
        mappings = {
            'ftunivsavoie:oai:HAL:hal-01062241v1': 'proceedings-article',
            'ftunivsavoie:oai:HAL:hal-01062339v1': 'book-chapter',
            'ftunivmacedonia:oai:dspace.lib.uom.gr:2159/6227': 'other',
            'ftartxiker:oai:HAL:hal-00845819v1': 'journal-article',
            'ftdatacite:oai:oai.datacite.org:402223': 'dataset',
        }

        for ident, typ in mappings.items():
            paper = self.create(ident, 'base_dc')
            self.assertEqual(paper.doctype, typ)

    def test_crossref_invalid_metadata(self):
        # authors with no family name
        paper = self.create('oai:crossref.org:10.4156/aiss.vol3.issue9.31',
                            'citeproc')
        self.assertEqual(paper, None)

    def test_datacite(self):
        paper = self.create('ftdatacite:oai:oai.datacite.org:8558707',
                            'base_dc')
        self.assertTrue(paper.pdf_url)

    def test_pmc(self):
        paper = self.create('ftpubmed:oai:pubmedcentral.nih.gov:1968744',
                            'base_dc')
        self.assertEqual(
            paper.pdf_url,
            'http://www.ncbi.nlm.nih.gov/pmc/articles/PMC1968744')
        p2 = self.create('ftpubmed:oai:pubmedcentral.nih.gov:4131942',
                         'base_dc')
        self.assertEqual(p2.pdf_url,
                         'http://www.ncbi.nlm.nih.gov/pubmed/24806729')

    def test_doi_prefix(self):
        paper = self.create('ftdatacite:oai:oai.datacite.org:3505359',
                            'base_dc')
        self.assertTrue(paper.pdf_url is not None)