Exemplo n.º 1
0
    def test_01_duplicates_report(self):
        """Check duplication reporting across all articles in the index"""

        # Create 2 identical articles, a duplicate pair
        article1 = models.Article(**ArticleFixtureFactory.make_article_source(
            eissn='1111-1111',
            pissn='2222-2222',
            with_id=False,
            in_doaj=True,
            with_journal_info=True))
        a1_doi = article1.bibjson().get_identifiers('doi')
        assert a1_doi is not None
        article1.save(blocking=True)

        time.sleep(1)

        article2 = models.Article(**ArticleFixtureFactory.make_article_source(
            eissn='1111-1111',
            pissn='2222-2222',
            with_id=False,
            in_doaj=True,
            with_journal_info=True))
        a2_doi = article2.bibjson().get_identifiers('doi')
        assert a2_doi == a1_doi
        article2.save(blocking=True)

        # Run the reporting task
        user = app.config.get("SYSTEM_USERNAME")
        job = article_duplicate_report.ArticleDuplicateReportBackgroundTask.prepare(
            user, outdir=TMP_DIR)
        task = article_duplicate_report.ArticleDuplicateReportBackgroundTask(
            job)
        task.run()

        # The audit log should show we saved the reports to the TMP_DIR defined above
        audit_1 = job.audit.pop(0)
        assert audit_1.get('message', '').endswith(TMP_DIR)
        assert os.path.exists(TMP_DIR + '/duplicate_articles_global_' +
                              dates.today() + '.csv')

        # It should also clean up its interim article csv
        assert not os.path.exists(
            paths.rel2abs(__file__, 'tmp_article_duplicate_report'))

        # The duplicates should be detected and appear in the report and audit summary count
        with open(TMP_DIR + '/duplicate_articles_global_' + dates.today() +
                  '.csv') as f:
            csvlines = f.readlines()
            # We expect one result line + headings: our newest article has 1 duplicate
            res = csvlines.pop()
            assert res.startswith(
                article2.id
            )  # The newest comes first, so article1 is article2's duplicate.
            assert article1.id in res
            assert 'doi+fulltext' in res

        audit_2 = job.audit.pop(0)
        assert audit_2.get(
            'message', ''
        ) == '2 articles processed for duplicates. 1 global duplicate sets found.'
Exemplo n.º 2
0
    def test_09_search(self):
        # Just bringing it all together. Make 4 articles: 3 in DOAJ, 1 not in DOAJ
        # We then expect pre-filters to run on the query, ensuring we only get the 3 in DOAJ articles.
        # We also expect the post-filters to run on the results, ensuring non-public data is deleted from the admin section.
        qsvc = QueryService()

        articles = []
        for i in range(0, 3):
            articles.append(
                models.Article(**ArticleFixtureFactory.make_article_source(
                    with_id=False)))
            assert articles[-1].publisher_record_id() == 'some_identifier'
            articles[-1].save(blocking=True)
        articles.append(
            models.Article(**ArticleFixtureFactory.make_article_source(
                with_id=False, in_doaj=False)))
        articles[-1].save(blocking=True)

        res = qsvc.search('query',
                          'article', {"query": {
                              "match_all": {}
                          }},
                          account=None,
                          additional_parameters={})
        assert res['hits']['total'] == 3, res['hits']['total']

        for hit in res['hits']['hits']:
            am = models.Article(**hit)
            assert am.publisher_record_id() is None, am.publisher_record_id()
Exemplo n.º 3
0
    def test_01_duplicates_report(self):
        """Check duplication reporting across all articles in the index"""

        # Create 2 identical articles, a duplicate pair
        article1 = models.Article(**ArticleFixtureFactory.make_article_source(
            eissn='1111-1111',
            pissn='2222-2222',
            with_id=False,
            in_doaj=True,
            with_journal_info=True
        ))
        a1_doi = article1.bibjson().get_identifiers('doi')
        assert a1_doi is not None
        article1.save(blocking=True)

        time.sleep(1)

        article2 = models.Article(**ArticleFixtureFactory.make_article_source(
            eissn='1111-1111',
            pissn='2222-2222',
            with_id=False,
            in_doaj=True,
            with_journal_info=True
        ))
        a2_doi = article2.bibjson().get_identifiers('doi')
        assert a2_doi == a1_doi
        article2.save(blocking=True)

        # Run the reporting task
        user = app.config.get("SYSTEM_USERNAME")
        job = article_duplicate_report.ArticleDuplicateReportBackgroundTask.prepare(user, outdir=TMP_DIR)
        task = article_duplicate_report.ArticleDuplicateReportBackgroundTask(job)
        task.run()

        # The audit log should show we saved the reports to the TMP_DIR defined above
        audit_1 = job.audit.pop(0)
        assert audit_1.get('message', '').endswith(TMP_DIR)
        assert os.path.exists(TMP_DIR + '/duplicate_articles_global_' + dates.today() + '.csv')

        # It should also clean up its interim article csv
        assert not os.path.exists(paths.rel2abs(__file__, 'tmp_article_duplicate_report'))

        # The duplicates should be detected and appear in the report and audit summary count
        with open(TMP_DIR + '/duplicate_articles_global_' + dates.today() + '.csv') as f:
            csvlines = f.readlines()
            # We expect one result line + headings: our newest article has 1 duplicate
            res = csvlines.pop()
            assert res.startswith(article2.id)            # The newest comes first, so article1 is article2's duplicate.
            assert article1.id in res
            assert 'doi+fulltext' in res

        audit_2 = job.audit.pop(0)
        assert audit_2.get('message', '') == '2 articles processed for duplicates. 1 global duplicate sets found.'
Exemplo n.º 4
0
    def test_01_incoming_article_do(self):
        # make a blank one
        ia = IncomingArticleDO()

        # make one from an incoming article model fixture
        data = ArticleFixtureFactory.make_article_source()
        ia = IncomingArticleDO(data)

        # and one with an author email, which we have removed from the allowed fields recently. It should silently prune
        data = ArticleFixtureFactory.make_article_source()
        data["bibjson"]["author"][0]["email"] = "*****@*****.**"
        ia = IncomingArticleDO(data)
        assert "*****@*****.**" not in ia.json()

        # make another one that's broken
        data = ArticleFixtureFactory.make_article_source()
        del data["bibjson"]["title"]
        with self.assertRaises(DataStructureException):
            ia = IncomingArticleDO(data)

        # now progressively remove the conditionally required/advanced validation stuff
        #
        # missing identifiers
        data = ArticleFixtureFactory.make_article_source()
        data["bibjson"]["identifier"] = []
        with self.assertRaises(DataStructureException):
            ia = IncomingArticleDO(data)

        # no issns specified
        data["bibjson"]["identifier"] = [{"type": "wibble", "id": "alksdjfas"}]
        with self.assertRaises(DataStructureException):
            ia = IncomingArticleDO(data)

        # issns the same (but not normalised the same)
        data["bibjson"]["identifier"] = [{
            "type": "pissn",
            "id": "12345678"
        }, {
            "type": "eissn",
            "id": "1234-5678"
        }]
        with self.assertRaises(DataStructureException):
            ia = IncomingArticleDO(data)

        # too many keywords
        data = ArticleFixtureFactory.make_article_source()
        data["bibjson"]["keywords"] = [
            "one", "two", "three", "four", "five", "six", "seven"
        ]
        with self.assertRaises(DataStructureException):
            ia = IncomingArticleDO(data)
Exemplo n.º 5
0
    def setUp(self):
        super(TestCreateOrUpdateArticle, self).setUp()

        self.publisher = Account()
        self.publisher.add_role("publisher")
        self.publisher.save(blocking=True)

        self.admin = Account()
        self.admin.add_role("admin")
        self.admin.save(blocking=True)

        sources = JournalFixtureFactory.make_many_journal_sources(2, True)
        self.journal1 = Journal(**sources[0])
        self.journal1.set_owner(self.publisher.id)
        jbib1 = self.journal1.bibjson()
        jbib1.add_identifier(jbib1.P_ISSN, "1111-1111")
        jbib1.add_identifier(jbib1.E_ISSN, "2222-2222")
        self.journal1.save(blocking=True)

        self.publisher.add_journal(self.journal1)

        self.journal2 = Journal(**sources[1])
        jbib2 = self.journal2.bibjson()
        jbib2.add_identifier(jbib2.P_ISSN, "1234-5678")
        jbib2.add_identifier(jbib2.E_ISSN, "9876-5432")
        self.journal2.save(blocking=True)

        self.article10 = Article(**ArticleFixtureFactory.make_article_source(
            pissn="1111-1111",
            eissn="2222-2222",
            doi="10.0000/article-10",
            fulltext="https://www.article10.com"))
        self.article10.set_id("articleid10")
        self.article10.save(blocking=True)

        self.article11 = Article(**ArticleFixtureFactory.make_article_source(
            pissn="1111-1111",
            eissn="2222-2222",
            doi="10.0000/article-11",
            fulltext="https://www.article11.com"))
        self.article11.set_id("articleid11")
        self.article11.save(blocking=True)

        self.article2 = Article(**ArticleFixtureFactory.make_article_source(
            pissn="1234-5678",
            eissn="9876-5432",
            doi="10.0000/article-2",
            fulltext="https://www.article2.com"))
        self.article2.set_id("articleid2")
        self.article2.save(blocking=True)
Exemplo n.º 6
0
    def test_09_article(self):
        """test if the OAI-PMH journal feed returns records and only displays journals accepted in DOAJ"""
        article_source = ArticleFixtureFactory.make_article_source(eissn='1234-1234', pissn='5678-5678,', in_doaj=False)
        """test if the OAI-PMH article feed returns records and only displays articles accepted in DOAJ"""
        a_private = models.Article(**article_source)
        ba = a_private.bibjson()
        ba.title = "Private Article"
        a_private.save(blocking=True)

        article_source = ArticleFixtureFactory.make_article_source(eissn='4321-4321', pissn='8765-8765,', in_doaj=True)
        a_public = models.Article(**article_source)
        ba = a_public.bibjson()
        ba.title = "Public Article"
        a_public.save(blocking=True)
        public_id = a_public.id

        time.sleep(1)

        with self.app_test.test_request_context():
            with self.app_test.test_client() as t_client:
                resp = t_client.get(url_for('oaipmh.oaipmh',  specified='article', verb='ListRecords', metadataPrefix='oai_dc'))
                assert resp.status_code == 200

                t = etree.fromstring(resp.data)
                records = t.xpath('/oai:OAI-PMH/oai:ListRecords', namespaces=self.oai_ns)

                # Check we only have one journal returned
                r = records[0].xpath('//oai:record', namespaces=self.oai_ns)
                assert len(r) == 1

                # Check we have the correct journal
                title = r[0].xpath('//dc:title', namespaces=self.oai_ns)[0].text
                # check orcid_id xwalk
                assert str(records[0].xpath('//dc:creator/@id', namespaces=self.oai_ns)[0]) == a_public.bibjson().author[0].get("orcid_id")
                assert records[0].xpath('//dc:title', namespaces=self.oai_ns)[0].text == a_public.bibjson().title

                resp = t_client.get(url_for('oaipmh.oaipmh',  specified='article', verb='GetRecord', metadataPrefix='oai_dc') + '&identifier=abcdefghijk_article')
                assert resp.status_code == 200

                t = etree.fromstring(resp.data)
                records = t.xpath('/oai:OAI-PMH/oai:GetRecord', namespaces=self.oai_ns)

                # Check we only have one journal returnedt
                kids = records[0].getchildren()
                r = records[0].xpath('//oai:record', namespaces=self.oai_ns)
                assert len(r) == 1

                # Check we have the correct journal
                assert records[0].xpath('//dc:title', namespaces=self.oai_ns)[0].text == a_public.bibjson().title
Exemplo n.º 7
0
    def test_11_delete_article_fail(self):
        # set up all the bits we need
        account = models.Account()
        account.set_id('test')
        account.set_name("Tester")
        account.set_email("*****@*****.**")
        journal = models.Journal(**JournalFixtureFactory.make_journal_source(
            in_doaj=True))
        journal.set_owner(account.id)
        journal.save()
        time.sleep(1)

        data = ArticleFixtureFactory.make_article_source()

        # call create on the object (which will save it to the index)
        a = ArticlesCrudApi.create(data, account)

        # let the index catch up
        time.sleep(1)

        # call delete on the object in various context that will fail

        # without an account
        with self.assertRaises(Api401Error):
            ArticlesCrudApi.delete(a.id, None)

        # with the wrong account
        account.set_id("other")
        with self.assertRaises(Api404Error):
            ArticlesCrudApi.delete(a.id, account)

        # on the wrong id
        account.set_id("test")
        with self.assertRaises(Api404Error):
            ArticlesCrudApi.delete("adfasdfhwefwef", account)
Exemplo n.º 8
0
    def test_01_withdraw_task(self):
        sources = JournalFixtureFactory.make_many_journal_sources(10,
                                                                  in_doaj=True)
        ids = []
        articles = []
        for source in sources:
            j = models.Journal(**source)
            j.save()
            ids.append(j.id)

            pissn = j.bibjson().get_identifiers(j.bibjson().P_ISSN)
            eissn = j.bibjson().get_identifiers(j.bibjson().E_ISSN)
            asource = ArticleFixtureFactory.make_article_source(pissn=pissn[0],
                                                                eissn=eissn[0],
                                                                with_id=False)
            a = models.Article(**asource)
            a.save()
            articles.append(a.id)

        time.sleep(2)

        job = SetInDOAJBackgroundTask.prepare("testuser",
                                              journal_ids=ids,
                                              in_doaj=False)
        SetInDOAJBackgroundTask.submit(job)

        time.sleep(2)

        for id in ids:
            j = models.Journal.pull(id)
            assert j.is_in_doaj() is False

        for id in articles:
            a = models.Article.pull(id)
            assert a.is_in_doaj() is False
Exemplo n.º 9
0
    def setUp(self):
        super(TestTaskJournalBulkDelete, self).setUp()

        self.journals = []
        self.articles = []
        for j_src in JournalFixtureFactory.make_many_journal_sources(
                count=TEST_JOURNAL_COUNT):
            j = models.Journal(**j_src)
            self.journals.append(j)
            j.save()
            for i in range(0, TEST_ARTICLES_PER_JOURNAL):
                a = models.Article(**ArticleFixtureFactory.make_article_source(
                    with_id=False,
                    eissn=j.bibjson().first_eissn,
                    pissn=j.bibjson().first_pissn))
                a.save()
                self.articles.append(a)

        sleep(2)

        self.forbidden_accounts = [
            AccountFixtureFactory.make_editor_source()['id'],
            AccountFixtureFactory.make_assed1_source()['id'],
            AccountFixtureFactory.make_assed2_source()['id'],
            AccountFixtureFactory.make_assed3_source()['id']
        ]

        self._make_and_push_test_context(acc=models.Account(
            **AccountFixtureFactory.make_managing_editor_source()))
Exemplo n.º 10
0
    def test_02_create_duplicate_articles(self):
        # set up all the bits we need - 10 articles
        data = ArticleFixtureFactory.make_incoming_api_article()
        dataset = [data] * 10

        # create an account that we'll do the create as
        account = models.Account()
        account.set_id("test")
        account.set_name("Tester")
        account.set_email("*****@*****.**")

        # add a journal to the account
        journal = models.Journal(**JournalFixtureFactory.make_journal_source(
            in_doaj=True))
        journal.set_owner(account.id)
        journal.save()

        time.sleep(2)

        # call create on the object (which will save it to the index)
        with self.assertRaises(Api400Error):
            ids = ArticlesBulkApi.create(dataset, account)

        time.sleep(2)

        with self.assertRaises(ESMappingMissingError):
            all_articles = models.Article.all()
Exemplo n.º 11
0
    def test_04_article_structure_exceptions(self):
        # add some data to the index with a Create
        user_data = ArticleFixtureFactory.make_article_source()

        with self.app_test.test_client() as t_client:
            # log into the app as our user
            self.login(t_client, 'test', 'password123')

            # attempt to CREATE a new article with invalid JSON
            bad_data = json.dumps(user_data) + 'blarglrandomblah'
            response = t_client.post('/api/v1/articles?api_key=' + self.api_key, data=bad_data)
            assert response.status_code == 400  # 400 "Bad Request"
            assert response.mimetype == 'application/json'
            assert 'Supplied data was not valid JSON' in response.json['error']

            # attempt to CREATE a new article with too many keywords (exception propagates from DataObj)
            too_many_kwds = deepcopy(user_data)
            too_many_kwds['bibjson']['keywords'] = ['one', 'two', 'three', 'four', 'five', 'six', 'SEVEN']

            response = t_client.post('/api/v1/articles?api_key=' + self.api_key, data=json.dumps(too_many_kwds))
            assert response.status_code == 400  # 400 "Bad Request"
            assert response.mimetype == 'application/json'
            assert 'maximum of 6 keywords' in response.json['error']

            # attempt to CREATE an article with a missing required field (exception propagates from DataObj)
            missing_title = deepcopy(user_data)
            del missing_title['bibjson']['title']

            response = t_client.post('/api/v1/articles?api_key=' + self.api_key, data=json.dumps(missing_title))
            assert response.status_code == 400  # 400 "Bad Request"
            assert response.mimetype == 'application/json'
            assert "Field 'title' is required but not present" in response.json['error']
Exemplo n.º 12
0
    def test_10_delete_article_success(self):
        # set up all the bits we need
        account = models.Account()
        account.set_id('test')
        account.set_name("Tester")
        account.set_email("*****@*****.**")
        journal = models.Journal(**JournalFixtureFactory.make_journal_source(
            in_doaj=True))
        journal.set_owner(account.id)
        journal.save()
        time.sleep(1)

        data = ArticleFixtureFactory.make_article_source()

        # call create on the object (which will save it to the index)
        a = ArticlesCrudApi.create(data, account)

        # let the index catch up
        time.sleep(1)

        # now delete it
        ArticlesCrudApi.delete(a.id, account)

        # let the index catch up
        time.sleep(1)

        ap = models.Article.pull(a.id)
        assert ap is None
Exemplo n.º 13
0
    def get_duplicate(cls,
                      return_none=False,
                      given_article_id=None,
                      eissn=None,
                      pissn=None,
                      doi=None,
                      fulltext=None,
                      merge_conflict=False):
        article = None
        if not return_none and not merge_conflict:
            source = ArticleFixtureFactory.make_article_source(
                eissn=eissn, pissn=pissn, doi=doi, fulltext=fulltext)
            article = Article(**source)
            article.set_id()

        def mock(*args, **kwargs):
            if merge_conflict:
                raise ArticleMergeConflict()

            supplied_article = args[0]
            if given_article_id is not None:
                if given_article_id == supplied_article.id:
                    return article
            else:
                return article

        return mock
Exemplo n.º 14
0
    def test_03_create_articles_fail(self):
        # if the account is dud
        with self.assertRaises(Api401Error):
            data = ArticleFixtureFactory.make_incoming_api_article()
            dataset = [data] * 10
            ids = ArticlesBulkApi.create(dataset, None)

        # check that the index is empty, as none of them should have been made
        all = [x for x in models.Article.iterall()]
        assert len(all) == 0

        # if the data is bust
        with self.assertRaises(Api400Error):
            account = models.Account()
            account.set_id("test")
            account.set_name("Tester")
            account.set_email("*****@*****.**")
            # add a journal to the account
            journal = models.Journal(
                **JournalFixtureFactory.make_journal_source(in_doaj=True))
            journal.set_owner(account.id)
            journal.save()
            time.sleep(1)
            dataset = dataset[:5] + [{"some": {"junk": "data"}}] + dataset[5:]
            ids = ArticlesBulkApi.create(dataset, account)

        # check that the index is empty, as none of them should have been made
        all = [x for x in models.Article.iterall()]
        assert len(all) == 0
Exemplo n.º 15
0
    def test_02_create_duplicate_articles(self):
        # set up all the bits we need - 10 articles
        data = ArticleFixtureFactory.make_incoming_api_article()
        dataset = [data] * 10

        # create an account that we'll do the create as
        account = models.Account()
        account.set_id("test")
        account.set_name("Tester")
        account.set_email("*****@*****.**")

        # add a journal to the account
        journal = models.Journal(**JournalFixtureFactory.make_journal_source(in_doaj=True))
        journal.set_owner(account.id)
        journal.save()

        time.sleep(2)

        # call create on the object (which will save it to the index)
        with self.assertRaises(Api400Error):
            ids = ArticlesBulkApi.create(dataset, account)

        time.sleep(2)

        with self.assertRaises(ESMappingMissingError):
            all_articles = models.Article.all()
Exemplo n.º 16
0
    def test_03_withdraw(self):
        acc = models.Account()
        acc.set_name("testuser")
        ctx = self._make_and_push_test_context(acc=acc)

        sources = JournalFixtureFactory.make_many_journal_sources(10, in_doaj=True)
        ids = []
        articles = []
        for source in sources:
            j = models.Journal(**source)
            j.save()
            ids.append(j.id)

            pissn = j.bibjson().get_identifiers(j.bibjson().P_ISSN)
            eissn = j.bibjson().get_identifiers(j.bibjson().E_ISSN)
            asource = ArticleFixtureFactory.make_article_source(pissn=pissn[0], eissn=eissn[0], with_id=False)
            a = models.Article(**asource)
            a.save()
            articles.append(a.id)

        time.sleep(2)

        change_in_doaj(ids, False)

        time.sleep(2)

        for id in ids:
            j = models.Journal.pull(id)
            assert j.is_in_doaj() is False

        for id in articles:
            a = models.Article.pull(id)
            assert a.is_in_doaj() is False

        ctx.pop()
Exemplo n.º 17
0
    def test_02_reinstate_task(self):
        sources = JournalFixtureFactory.make_many_journal_sources(10, in_doaj=False)
        ids = []
        articles = []
        for source in sources:
            j = models.Journal(**source)
            j.save()
            ids.append(j.id)

            pissn = j.bibjson().get_identifiers(j.bibjson().P_ISSN)
            eissn = j.bibjson().get_identifiers(j.bibjson().E_ISSN)
            asource = ArticleFixtureFactory.make_article_source(pissn=pissn[0], eissn=eissn[0], with_id=False, in_doaj=False)
            a = models.Article(**asource)
            a.save()
            articles.append(a.id)

        time.sleep(2)

        job = SetInDOAJBackgroundTask.prepare("testuser", journal_ids=ids, in_doaj=True)
        SetInDOAJBackgroundTask.submit(job)

        time.sleep(2)

        for id in ids:
            j = models.Journal.pull(id)
            assert j.is_in_doaj() is True

        for id in articles:
            a = models.Article.pull(id)
            assert a.is_in_doaj() is True
Exemplo n.º 18
0
    def test_03_create_articles_fail(self):
        # if the account is dud
        with self.assertRaises(Api401Error):
            data = ArticleFixtureFactory.make_incoming_api_article()
            dataset = [data] * 10
            ids = ArticlesBulkApi.create(dataset, None)

        # check that the index is empty, as none of them should have been made
        all = [x for x in models.Article.iterall()]
        assert len(all) == 0

        # if the data is bust
        with self.assertRaises(Api400Error):
            account = models.Account()
            account.set_id("test")
            account.set_name("Tester")
            account.set_email("*****@*****.**")
            # add a journal to the account
            journal = models.Journal(**JournalFixtureFactory.make_journal_source(in_doaj=True))
            journal.set_owner(account.id)
            journal.save()
            time.sleep(1)
            dataset = dataset[:5] + [{"some" : {"junk" : "data"}}] + dataset[5:]
            ids = ArticlesBulkApi.create(dataset, account)

        # check that the index is empty, as none of them should have been made
        all = [x for x in models.Article.iterall()]
        assert len(all) == 0
Exemplo n.º 19
0
    def test_07_retrieve_article_fail(self):
        # set up all the bits we need
        # add a journal to the account
        account = models.Account()
        account.set_id('test')
        account.set_name("Tester")
        account.set_email("*****@*****.**")
        journal = models.Journal(**JournalFixtureFactory.make_journal_source(
            in_doaj=True))
        journal.set_owner(account.id)
        journal.save()
        time.sleep(1)

        data = ArticleFixtureFactory.make_article_source()
        data['admin']['in_doaj'] = False
        ap = models.Article(**data)
        ap.save()
        time.sleep(1)

        # should fail when no user and in_doaj is False
        with self.assertRaises(Api401Error):
            a = ArticlesCrudApi.retrieve(ap.id, None)

        # wrong user
        account = models.Account()
        account.set_id("asdklfjaioefwe")
        with self.assertRaises(Api404Error):
            a = ArticlesCrudApi.retrieve(ap.id, account)

        # non-existant article
        account = models.Account()
        account.set_id(ap.id)
        with self.assertRaises(Api404Error):
            a = ArticlesCrudApi.retrieve("ijsidfawefwefw", account)
Exemplo n.º 20
0
    def test_11_delete_article_fail(self):
        # set up all the bits we need
        account = models.Account()
        account.set_id('test')
        account.set_name("Tester")
        account.set_email("*****@*****.**")
        journal = models.Journal(**JournalFixtureFactory.make_journal_source(in_doaj=True))
        journal.set_owner(account.id)
        journal.save()
        time.sleep(1)

        data = ArticleFixtureFactory.make_article_source()

        # call create on the object (which will save it to the index)
        a = ArticlesCrudApi.create(data, account)

        # let the index catch up
        time.sleep(1)

        # call delete on the object in various context that will fail

        # without an account
        with self.assertRaises(Api401Error):
            ArticlesCrudApi.delete(a.id, None)

        # with the wrong account
        account.set_id("other")
        with self.assertRaises(Api404Error):
            ArticlesCrudApi.delete(a.id, account)

        # on the wrong id
        account.set_id("test")
        with self.assertRaises(Api404Error):
            ArticlesCrudApi.delete("adfasdfhwefwef", account)
    def test_has_permissions(self):

        journal_source = JournalFixtureFactory.make_journal_source()
        journal1 = Journal(**journal_source)

        publisher_owner_src = AccountFixtureFactory.make_publisher_source()
        publisher_owner = Account(**publisher_owner_src)
        publisher_stranged_src = AccountFixtureFactory.make_publisher_source()
        publisher_stranged = Account(**publisher_stranged_src)
        admin_src = AccountFixtureFactory.make_managing_editor_source()
        admin = Account(**admin_src)

        journal1.set_owner(publisher_owner)
        journal1.save(blocking=True)

        eissn = journal1.bibjson().get_one_identifier("eissn")
        pissn = journal1.bibjson().get_one_identifier("pissn")

        art_source = ArticleFixtureFactory.make_article_source(eissn=eissn,
                                                               pissn=pissn)
        article = Article(**art_source)

        assert self.svc.has_permissions(publisher_stranged, article, False)
        assert self.svc.has_permissions(publisher_owner, article, True)
        assert self.svc.has_permissions(admin, article, True)
        failed_result = self.svc.has_permissions(publisher_stranged, article, True)
        assert failed_result == {'success': 0, 'fail': 1, 'update': 0, 'new': 0, 'shared': [],
                                 'unowned': [pissn, eissn],
                                 'unmatched': []}, "received: {}".format(failed_result)
Exemplo n.º 22
0
    def test_10_delete_article_success(self):
        # set up all the bits we need
        account = models.Account()
        account.set_id('test')
        account.set_name("Tester")
        account.set_email("*****@*****.**")
        journal = models.Journal(**JournalFixtureFactory.make_journal_source(in_doaj=True))
        journal.set_owner(account.id)
        journal.save()
        time.sleep(1)

        data = ArticleFixtureFactory.make_article_source()

        # call create on the object (which will save it to the index)
        a = ArticlesCrudApi.create(data, account)

        # let the index catch up
        time.sleep(1)

        # now delete it
        ArticlesCrudApi.delete(a.id, account)

        # let the index catch up
        time.sleep(1)

        ap = models.Article.pull(a.id)
        assert ap is None
Exemplo n.º 23
0
    def test_07_retrieve_article_fail(self):
        # set up all the bits we need
        # add a journal to the account
        account = models.Account()
        account.set_id('test')
        account.set_name("Tester")
        account.set_email("*****@*****.**")
        journal = models.Journal(**JournalFixtureFactory.make_journal_source(in_doaj=True))
        journal.set_owner(account.id)
        journal.save()
        time.sleep(1)

        data = ArticleFixtureFactory.make_article_source()
        data['admin']['in_doaj'] = False
        ap = models.Article(**data)
        ap.save()
        time.sleep(1)

        # should fail when no user and in_doaj is False
        with self.assertRaises(Api401Error):
            a = ArticlesCrudApi.retrieve(ap.id, None)

        # wrong user
        account = models.Account()
        account.set_id("asdklfjaioefwe")
        with self.assertRaises(Api404Error):
            a = ArticlesCrudApi.retrieve(ap.id, account)

        # non-existant article
        account = models.Account()
        account.set_id(ap.id)
        with self.assertRaises(Api404Error):
            a = ArticlesCrudApi.retrieve("ijsidfawefwefw", account)
Exemplo n.º 24
0
    def test_prepare_update_admin(self, value, kwargs):

        Article.merge = BLLArticleMockFactory.merge_mock
        Article.pull = BLLArticleMockFactory.pull_mock

        duplicate_arg = kwargs.get("duplicate")
        merge_duplicate_arg = kwargs.get("merge_duplicate")
        is_update_arg = kwargs.get("is_update")
        raises_arg = kwargs.get("raises")

        pissn1 = "1234-5678"
        eissn1 = "9876-5432"
        pissn2 = "1111-1111"
        eissn2 = "2222-2222"
        doi = "10.1234/article-10"
        ft = "https://example.com"

        update_article_id = "update_id"

        article_src = ArticleFixtureFactory.make_article_source(pissn=pissn1,
                                                                eissn=eissn1,
                                                                doi=doi,
                                                                fulltext=ft)
        article = Article(**article_src)
        article.set_id("article_id")

        duplicate = None
        if duplicate_arg != "none":
            duplicate_src = ArticleFixtureFactory.make_article_source(
                pissn=pissn2, eissn=eissn2, doi=doi, fulltext=ft)
            duplicate = Article(**duplicate_src)
            if duplicate_arg == "same_as_update_article_id":
                duplicate.set_id("update_id")
            elif duplicate_arg == "different_then_update_article_id":
                duplicate.set_id("duplicate_id")

        merge_duplicate = True if merge_duplicate_arg == "yes" else False

        if raises_arg == "DuplicateArticle":
            with self.assertRaises(exceptions.DuplicateArticleException):
                self.svc._prepare_update_admin(article, duplicate,
                                               update_article_id,
                                               merge_duplicate)
        else:
            assert self.svc._prepare_update_admin(
                article, duplicate, update_article_id,
                merge_duplicate) == int(is_update_arg)
    def test_prepare_update_publisher(self, value, kwargs):

        Article.merge = BLLArticleMockFactory.merge_mock

        duplicate_arg = kwargs.get("duplicate")
        merge_duplicate_arg = kwargs.get("merge_duplicate")
        doi_or_ft_update_arg = kwargs.get("doi_or_ft_updated")
        is_update_arg = kwargs.get("is_update")
        raises_arg = kwargs.get("raises")

        pissn1 = "1234-5678"
        eissn1 = "9876-5432"
        pissn2 = "1111-1111"
        eissn2 = "2222-2222"
        doi = "10.1234/article-10"
        ft = "https://example.com"

        if doi_or_ft_update_arg == "yes":
            self.svc._doi_or_fulltext_updated = BLLArticleMockFactory.doi_or_fulltext_updated(True,True)
        else:
            self.svc._doi_or_fulltext_updated = BLLArticleMockFactory.doi_or_fulltext_updated(False,False)

        article_src = ArticleFixtureFactory.make_article_source(pissn=pissn1, eissn=eissn1, doi=doi, fulltext=ft)
        article = Article(**article_src)
        article.set_id("article_id")

        duplicate = None
        if duplicate_arg != "none":
            duplicate_src = ArticleFixtureFactory.make_article_source(pissn=pissn2, eissn=eissn2, doi=doi, fulltext=ft)
            duplicate = Article(**duplicate_src)
            if duplicate_arg == "same_as_article_id":
                duplicate.set_id("article_id")
            elif duplicate_arg == "different_than_article_id":
                duplicate.set_id("duplicate_id")

        merge_duplicate = True if merge_duplicate_arg == "yes" else False

        if duplicate_arg == "different_than_article_id":
            self.svc.has_permissions = BLLArticleMockFactory.has_permissions(False)
        else:
            self.svc.has_permissions = BLLArticleMockFactory.has_permissions(True)

        if raises_arg == "DuplicateArticle":
            with self.assertRaises(exceptions.DuplicateArticleException):
                self.svc._prepare_update_publisher(article,duplicate,merge_duplicate,self.publisher,True)
        else:
            assert self.svc._prepare_update_publisher(article,duplicate,merge_duplicate,self.publisher,True) == int(is_update_arg)
Exemplo n.º 26
0
    def test_10_scroll(self):
        # Just bringing it all together. Make 4 articles: 3 in DOAJ, 1 not in DOAJ
        # We then expect pre-filters to run on the query, ensuring we only get the 3 in DOAJ articles.
        # We also expect the post-filters to run on the results, ensuring non-public data is deleted from the admin section.
        qsvc = QueryService()

        articles = []
        for i in range(0, 3):
            articles.append(models.Article(**ArticleFixtureFactory.make_article_source(with_id=False)))
            assert articles[-1].publisher_record_id() == 'some_identifier'
            articles[-1].save(blocking=True)
        articles.append(models.Article(**ArticleFixtureFactory.make_article_source(with_id=False, in_doaj=False)))
        articles[-1].save(blocking=True)
        q = {"query": {"match_all": {}}}
        for res in qsvc.scroll('api_query', 'article', q, None, None):
            am = models.Article(**res)
            assert am.publisher_record_id() is None, am.publisher_record_id()
Exemplo n.º 27
0
    def test_14_article_model_index(self):
        """Check article indexes generate"""
        a = models.Article(**ArticleFixtureFactory.make_article_source())
        assert a.data.get('index', None) is None

        # Generate the index
        a.prep()
        assert a.data.get('index', None) is not None
Exemplo n.º 28
0
    def test_33_article_stats(self):
        articles = []

        # make a bunch of articles variably in doaj/not in doaj, for/not for the issn we'll search
        for i in range(1, 3):
            article = models.Article(
                **ArticleFixtureFactory.make_article_source(eissn="1111-1111",
                                                            pissn="1111-1111",
                                                            with_id=False,
                                                            in_doaj=True))
            article.set_created("2019-01-0" + str(i) + "T00:00:00Z")
            articles.append(article)
        for i in range(3, 5):
            article = models.Article(
                **ArticleFixtureFactory.make_article_source(eissn="1111-1111",
                                                            pissn="1111-1111",
                                                            with_id=False,
                                                            in_doaj=False))
            article.set_created("2019-01-0" + str(i) + "T00:00:00Z")
            articles.append(article)
        for i in range(5, 7):
            article = models.Article(
                **ArticleFixtureFactory.make_article_source(eissn="2222-2222",
                                                            pissn="2222-2222",
                                                            with_id=False,
                                                            in_doaj=True))
            article.set_created("2019-01-0" + str(i) + "T00:00:00Z")
            articles.append(article)
        for i in range(7, 9):
            article = models.Article(
                **ArticleFixtureFactory.make_article_source(eissn="2222-2222",
                                                            pissn="2222-2222",
                                                            with_id=False,
                                                            in_doaj=False))
            article.set_created("2019-01-0" + str(i) + "T00:00:00Z")
            articles.append(article)

        for i in range(len(articles)):
            articles[i].save(blocking=i == len(articles) - 1)

        journal = models.Journal()
        bj = journal.bibjson()
        bj.add_identifier(bj.P_ISSN, "1111-1111")
        stats = journal.article_stats()
        assert stats.get("total") == 2
        assert stats.get("latest") == "2019-01-02T00:00:00Z"
Exemplo n.º 29
0
    def test_14_article_model_index(self):
        """Check article indexes generate"""
        a = models.Article(**ArticleFixtureFactory.make_article_source())
        assert a.data.get('index', None) is None

        # Generate the index
        a.prep()
        assert a.data.get('index', None) is not None
Exemplo n.º 30
0
    def test_01_create_articles_success(self):
        def find_dict_in_list(lst, key, value):
            for i, dic in enumerate(lst):
                if dic[key] == value:
                    return i
            return -1

        # set up all the bits we need - 10 articles
        dataset = []
        for i in range(1, 11):
            data = ArticleFixtureFactory.make_incoming_api_article()
            # change the DOI and fulltext URLs to escape duplicate detection
            # and try with multiple articles
            doi_ix = find_dict_in_list(data['bibjson']['identifier'], 'type',
                                       'doi')
            if doi_ix == -1:
                data['bibjson']['identifier'].append({"type": "doi"})
            data['bibjson']['identifier'][doi_ix][
                'id'] = '10.0000/SOME.IDENTIFIER.{0}'.format(i)

            fulltext_url_ix = find_dict_in_list(data['bibjson']['link'],
                                                'type', 'fulltext')
            if fulltext_url_ix == -1:
                data['bibjson']['link'].append({"type": "fulltext"})
            data['bibjson']['link'][fulltext_url_ix][
                'url'] = 'http://www.example.com/article_{0}'.format(i)

            dataset.append(deepcopy(data))

        # create an account that we'll do the create as
        account = models.Account()
        account.set_id("test")
        account.set_name("Tester")
        account.set_email("*****@*****.**")

        # add a journal to the account
        journal = models.Journal(**JournalFixtureFactory.make_journal_source(
            in_doaj=True))
        journal.set_owner(account.id)
        journal.save()

        time.sleep(2)

        # call create on the object (which will save it to the index)
        ids = ArticlesBulkApi.create(dataset, account)

        # check that we got the right number of ids back
        assert len(ids) == 10
        assert len(list(set(ids))) == 10, len(list(
            set(ids)))  # are they actually 10 unique IDs?

        # let the index catch up
        time.sleep(2)

        # check that each id was actually created
        for id in ids:
            s = models.Article.pull(id)
            assert s is not None
Exemplo n.º 31
0
    def discover_duplicates(cls,
                            doi_duplicates=0,
                            fulltext_duplicates=0,
                            overlap=0):

        if overlap > doi_duplicates or overlap > fulltext_duplicates:
            raise Exception(
                "overlap must be the same as or less than either of doi_duplicates or fulltext_duplicates"
            )

        idents = []
        # first make duplicate records for the total number of desired dois
        for i in range(doi_duplicates):
            idents.append({
                "doi_domain": True,
                "doi": "10.1234/abc/1",
                "fulltext": "http://example.com/unique/" + str(i)
            })

        for i in range(overlap):
            idents[i]["fulltext"] = "http://example.com/1"
            idents[i]["fulltext_domain"] = True

        remaining_fulltexts = fulltext_duplicates - overlap
        for i in range(remaining_fulltexts):
            idents.append({
                "fulltext_domain": True,
                "doi": "10.1234/unique/" + str(i),
                "fulltext": "http://example.com/1"
            })

        possible_duplicates = {"doi": [], "fulltext": []}
        for i, ident in enumerate(idents):
            source = ArticleFixtureFactory.make_article_source(
                eissn="1234-5678",
                pissn="9876-5432",
                doi=ident["doi"],
                fulltext=["fulltext"])
            article = Article(**source)
            article.set_id()
            article.data["last_updated"] = datetime.fromtimestamp(
                i * 100000).strftime("%Y-%m-%dT%H:%M:%SZ")
            if "doi_domain" in ident:
                possible_duplicates["doi"].append(article)
            if "fulltext_domain" in ident:
                possible_duplicates["fulltext"].append(article)

        if len(possible_duplicates["doi"]) == 0:
            del possible_duplicates["doi"]
        if len(possible_duplicates["fulltext"]) == 0:
            del possible_duplicates["fulltext"]

        def mock(article, owner=None, results_per_match_type=10):
            return possible_duplicates

        return mock
Exemplo n.º 32
0
    def test_01_incoming_article_do(self):
        # make a blank one
        ia = IncomingArticleDO()

        # make one from an incoming article model fixture
        data = ArticleFixtureFactory.make_article_source()
        ia = IncomingArticleDO(data)

        # and one with an author email, which we have removed from the allowed fields recently. It should silently prune
        data = ArticleFixtureFactory.make_article_source()
        data["bibjson"]["author"][0]["email"] = "*****@*****.**"
        ia = IncomingArticleDO(data)
        assert "*****@*****.**" not in ia.json()

        # make another one that's broken
        data = ArticleFixtureFactory.make_article_source()
        del data["bibjson"]["title"]
        with self.assertRaises(DataStructureException):
            ia = IncomingArticleDO(data)

        # now progressively remove the conditionally required/advanced validation stuff
        #
        # missing identifiers
        data = ArticleFixtureFactory.make_article_source()
        data["bibjson"]["identifier"] = []
        with self.assertRaises(DataStructureException):
            ia = IncomingArticleDO(data)

        # no issns specified
        data["bibjson"]["identifier"] = [{"type" : "wibble", "id": "alksdjfas"}]
        with self.assertRaises(DataStructureException):
            ia = IncomingArticleDO(data)

        # issns the same (but not normalised the same)
        data["bibjson"]["identifier"] = [{"type" : "pissn", "id": "12345678"}, {"type" : "eissn", "id": "1234-5678"}]
        with self.assertRaises(DataStructureException):
            ia = IncomingArticleDO(data)

        # too many keywords
        data = ArticleFixtureFactory.make_article_source()
        data["bibjson"]["keywords"] = ["one", "two", "three", "four", "five", "six", "seven"]
        with self.assertRaises(DataStructureException):
            ia = IncomingArticleDO(data)
Exemplo n.º 33
0
    def test_02_toc_requirements(self):
        """ Check what we need for ToCs are in the article models """
        a = models.Article(**ArticleFixtureFactory.make_article_source())
        a.prep()

        # To build ToCs we need a volume, an issue, a year and a month.
        assert a.data['bibjson']['journal']['volume'] == '1'
        assert a.data['bibjson']['journal']['number'] == '99'
        assert a.data['index']['date'] == "1991-01-01T00:00:00Z"
        assert a.data['index']['date_toc_fv_month'] == a.data['index']['date'] == "1991-01-01T00:00:00Z"
Exemplo n.º 34
0
    def test_09_update_article_fail(self):
        # set up all the bits we need
        account = models.Account()
        account.set_id('test')
        account.set_name("Tester")
        account.set_email("*****@*****.**")
        journal = models.Journal(**JournalFixtureFactory.make_journal_source(
            in_doaj=True))
        journal.set_owner(account.id)
        journal.save()
        time.sleep(1)

        data = ArticleFixtureFactory.make_article_source()

        # call create on the object (which will save it to the index)
        a = ArticlesCrudApi.create(data, account)

        # let the index catch up
        time.sleep(1)

        # get a copy of the newly created version for use in assertions later
        created = models.Article.pull(a.id)

        # now make an updated version of the object
        data = ArticleFixtureFactory.make_article_source()
        data["bibjson"]["title"] = "An updated title"

        # call update on the object in various context that will fail

        # without an account
        with self.assertRaises(Api401Error):
            ArticlesCrudApi.update(a.id, data, None)

        # with the wrong account
        account.set_id("other")
        with self.assertRaises(Api404Error):
            ArticlesCrudApi.update(a.id, data, account)

        # on the wrong id
        account.set_id("test")
        with self.assertRaises(Api404Error):
            ArticlesCrudApi.update("adfasdfhwefwef", data, account)
Exemplo n.º 35
0
    def test_09_search(self):
        # Just bringing it all together. Make 4 articles: 3 in DOAJ, 1 not in DOAJ
        # We then expect pre-filters to run on the query, ensuring we only get the 3 in DOAJ articles.
        # We also expect the post-filters to run on the results, ensuring non-public data is deleted from the admin section.
        qsvc = QueryService()

        articles = []
        for i in range(0, 3):
            articles.append(models.Article(**ArticleFixtureFactory.make_article_source(with_id=False)))
            assert articles[-1].publisher_record_id() == 'some_identifier'
            articles[-1].save(blocking=True)
        articles.append(models.Article(**ArticleFixtureFactory.make_article_source(with_id=False, in_doaj=False)))
        articles[-1].save(blocking=True)

        res = qsvc.search('query', 'article', {"query": {"match_all": {}}}, account=None, additional_parameters={})
        assert res['hits']['total'] == 3, res['hits']['total']

        for hit in res['hits']['hits']:
            am = models.Article(**hit)
            assert am.publisher_record_id() is None, am.publisher_record_id()
Exemplo n.º 36
0
    def test_09_update_article_fail(self):
        # set up all the bits we need
        account = models.Account()
        account.set_id('test')
        account.set_name("Tester")
        account.set_email("*****@*****.**")
        journal = models.Journal(**JournalFixtureFactory.make_journal_source(in_doaj=True))
        journal.set_owner(account.id)
        journal.save()
        time.sleep(1)

        data = ArticleFixtureFactory.make_article_source()

        # call create on the object (which will save it to the index)
        a = ArticlesCrudApi.create(data, account)

        # let the index catch up
        time.sleep(1)

        # get a copy of the newly created version for use in assertions later
        created = models.Article.pull(a.id)

        # now make an updated version of the object
        data = ArticleFixtureFactory.make_article_source()
        data["bibjson"]["title"] = "An updated title"

        # call update on the object in various context that will fail

        # without an account
        with self.assertRaises(Api401Error):
            ArticlesCrudApi.update(a.id, data, None)

        # with the wrong account
        account.set_id("other")
        with self.assertRaises(Api404Error):
            ArticlesCrudApi.update(a.id, data, account)

        # on the wrong id
        account.set_id("test")
        with self.assertRaises(Api404Error):
            ArticlesCrudApi.update("adfasdfhwefwef", data, account)
Exemplo n.º 37
0
    def test_04_coerce(self):
        data = ArticleFixtureFactory.make_article_source()

        # first some successes
        data["bibjson"]["link"][0][
            "url"] = "http://www.example.com/this_location/here"  # protocol required
        data["bibjson"]["link"][0]["type"] = "fulltext"
        data["admin"]["in_doaj"] = False
        data["created_date"] = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
        ia = IncomingArticleDO(data)
        assert isinstance(ia.bibjson.title, unicode)

        # now test some failures

        # an invalid urls
        data = ArticleFixtureFactory.make_article_source()
        data["bibjson"]["link"][0]["url"] = "Two streets down on the left"
        with self.assertRaises(DataStructureException):
            ia = IncomingArticleDO(data)
        data["bibjson"]["link"][0][
            "url"] = "www.example.com/this_location/here"
        with self.assertRaises(DataStructureException):
            ia = IncomingArticleDO(data)

        # an invalid link type
        data = ArticleFixtureFactory.make_article_source()
        data["bibjson"]["link"][0]["type"] = "cheddar"
        with self.assertRaises(DataStructureException):
            ia = IncomingArticleDO(data)

        # invalid bool
        data = ArticleFixtureFactory.make_article_source()
        data["admin"]["in_doaj"] = "Yes"
        with self.assertRaises(DataStructureException):
            ia = IncomingArticleDO(data)

        # invalid date
        data = ArticleFixtureFactory.make_article_source()
        data["created_date"] = "Just yesterday"
        with self.assertRaises(DataStructureException):
            ia = IncomingArticleDO(data)
Exemplo n.º 38
0
    def test_05_delete_articles_fail(self):
        # set up all the bits we need
        dataset = []
        for i in range(10):
            data = ArticleFixtureFactory.make_incoming_api_article(
                doi="10.123/test/" + str(i),
                fulltext="http://example.com/" + str(i))
            dataset.append(data)

        # create the main account we're going to work as
        article_owner = models.Account()
        article_owner.set_id("test")
        article_owner.set_name("Tester")
        article_owner.set_email("*****@*****.**")
        # create another account which will own the articles so the one
        # above will be "another user" trying to delete our precious articles.
        somebody_else = models.Account()
        somebody_else.set_id("somebody_else")
        somebody_else.set_name("Somebody Else")
        somebody_else.set_email("*****@*****.**")
        # add a journal to the article owner account to create that link
        # between account and articles
        journal = models.Journal(**JournalFixtureFactory.make_journal_source(
            in_doaj=True))
        journal.set_owner(article_owner.id)
        journal.save()
        time.sleep(1)

        # call create on the objects (which will save it to the index)
        ids = ArticlesBulkApi.create(dataset, article_owner)

        # let the index catch up
        time.sleep(2)

        # call delete on the object in various context that will fail

        # without an account
        with self.assertRaises(Api401Error):
            ArticlesBulkApi.delete(ids, None)

        # with the wrong account
        article_owner.set_id("other")
        with self.assertRaises(Api400Error):
            ArticlesBulkApi.delete(ids, somebody_else)

        # on the wrong id
        ids.append("adfasdfhwefwef")
        article_owner.set_id("test")
        with self.assertRaises(Api400Error):
            ArticlesBulkApi.delete(ids, article_owner)

        with self.assertRaises(Api400Error):
            ArticlesBulkApi.delete(ids, article_owner)
Exemplo n.º 39
0
    def test_10_scroll(self):
        # Just bringing it all together. Make 4 articles: 3 in DOAJ, 1 not in DOAJ
        # We then expect pre-filters to run on the query, ensuring we only get the 3 in DOAJ articles.
        # We also expect the post-filters to run on the results, ensuring non-public data is deleted from the admin section.
        qsvc = QueryService()

        articles = []
        for i in range(0, 3):
            articles.append(
                models.Article(**ArticleFixtureFactory.make_article_source(
                    with_id=False)))
            assert articles[-1].publisher_record_id() == 'some_identifier'
            articles[-1].save(blocking=True)
        articles.append(
            models.Article(**ArticleFixtureFactory.make_article_source(
                with_id=False, in_doaj=False)))
        articles[-1].save(blocking=True)
        q = {"query": {"match_all": {}}}
        for res in qsvc.scroll('api_query', 'article', q, None, None):
            am = models.Article(**res)
            assert am.publisher_record_id() is None, am.publisher_record_id()
Exemplo n.º 40
0
 def test_03_toc_uses_both_issns_when_available(self):
     j = models.Journal(**JournalFixtureFactory.make_journal_source(in_doaj=True))
     pissn = j.bibjson().first_pissn
     eissn = j.bibjson().first_eissn
     j.set_last_manual_update()
     j.save(blocking=True)
     a = models.Article(**ArticleFixtureFactory.make_article_source(pissn=pissn, eissn=eissn, in_doaj=True))
     a.save(blocking=True)
     with self.app_test.test_client() as t_client:
         response = t_client.get('/toc/{}'.format(j.bibjson().get_preferred_issn()))
         assert response.status_code == 200
         assert 'var toc_issns = ["{pissn}","{eissn}"];'.format(pissn=pissn, eissn=eissn) in response.data
Exemplo n.º 41
0
    def test_01_create_articles_success(self):
        def find_dict_in_list(lst, key, value):
            for i, dic in enumerate(lst):
                if dic[key] == value:
                    return i
            return -1

        # set up all the bits we need - 10 articles
        dataset = []
        for i in range(1, 11):
            data = ArticleFixtureFactory.make_incoming_api_article()
            # change the DOI and fulltext URLs to escape duplicate detection
            # and try with multiple articles
            doi_ix = find_dict_in_list(data['bibjson']['identifier'], 'type', 'doi')
            if doi_ix == -1:
                data['bibjson']['identifier'].append({"type" : "doi"})
            data['bibjson']['identifier'][doi_ix]['id'] = '10.0000/SOME.IDENTIFIER.{0}'.format(i)
            
            fulltext_url_ix = find_dict_in_list(data['bibjson']['link'], 'type', 'fulltext')
            if fulltext_url_ix == -1:
                data['bibjson']['link'].append({"type" : "fulltext"})
            data['bibjson']['link'][fulltext_url_ix]['url'] = 'http://www.example.com/article_{0}'.format(i)

            dataset.append(deepcopy(data))

        # create an account that we'll do the create as
        account = models.Account()
        account.set_id("test")
        account.set_name("Tester")
        account.set_email("*****@*****.**")

        # add a journal to the account
        journal = models.Journal(**JournalFixtureFactory.make_journal_source(in_doaj=True))
        journal.set_owner(account.id)
        journal.save()

        time.sleep(2)

        # call create on the object (which will save it to the index)
        ids = ArticlesBulkApi.create(dataset, account)

        # check that we got the right number of ids back
        assert len(ids) == 10
        assert len(list(set(ids))) == 10, len(list(set(ids)))  # are they actually 10 unique IDs?

        # let the index catch up
        time.sleep(2)

        # check that each id was actually created
        for id in ids:
            s = models.Article.pull(id)
            assert s is not None
Exemplo n.º 42
0
    def test_04_coerce(self):
        data = ArticleFixtureFactory.make_article_source()

        # first some successes
        data["bibjson"]["link"][0]["url"] = "http://www.example.com/this_location/here"     # protocol required
        data["bibjson"]["link"][0]["type"] = "fulltext"
        data["admin"]["in_doaj"] = False
        data["created_date"] = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
        ia = IncomingArticleDO(data)
        assert isinstance(ia.bibjson.title, unicode)

        # now test some failures

        # an invalid urls
        data = ArticleFixtureFactory.make_article_source()
        data["bibjson"]["link"][0]["url"] = "Two streets down on the left"
        with self.assertRaises(DataStructureException):
            ia = IncomingArticleDO(data)
        data["bibjson"]["link"][0]["url"] = "www.example.com/this_location/here"
        with self.assertRaises(DataStructureException):
            ia = IncomingArticleDO(data)

        # an invalid link type
        data = ArticleFixtureFactory.make_article_source()
        data["bibjson"]["link"][0]["type"] = "cheddar"
        with self.assertRaises(DataStructureException):
            ia = IncomingArticleDO(data)

        # invalid bool
        data = ArticleFixtureFactory.make_article_source()
        data["admin"]["in_doaj"] = "Yes"
        with self.assertRaises(DataStructureException):
            ia = IncomingArticleDO(data)

        # invalid date
        data = ArticleFixtureFactory.make_article_source()
        data["created_date"] = "Just yesterday"
        with self.assertRaises(DataStructureException):
            ia = IncomingArticleDO(data)
Exemplo n.º 43
0
    def test_03_articles_crud(self):
        # add some data to the index with a Create
        user_data = ArticleFixtureFactory.make_article_source()

        with self.app_test.test_client() as t_client:
            # log into the app as our user
            self.login(t_client, 'test', 'password123')

            # CREATE a new article
            response = t_client.post('/api/v1/articles?api_key=' + self.api_key, data=json.dumps(user_data))
            assert response.status_code == 201          # 201 "Created"
            assert response.mimetype == 'application/json'

            # Check it gives back a newly created article, with an ID
            new_ar_id = json.loads(response.data.decode("utf-8"))['id']
            new_ar_loc = json.loads(response.data.decode("utf-8"))['location']
            assert new_ar_id is not None
            assert new_ar_id in new_ar_loc

            # RETRIEVE the same article using the ID
            response = t_client.get('/api/v1/articles/{0}?api_key={1}'.format(new_ar_id, self.api_key))
            assert response.status_code == 200          # 200 "OK"
            assert response.mimetype == 'application/json'

            retrieved_article = json.loads(response.data.decode("utf-8"))
            new_ar_title = retrieved_article['bibjson']['title']
            assert new_ar_title == user_data['bibjson']['title']

            # UPDATE the title of the article
            updated_data = deepcopy(user_data)
            updated_data['bibjson']['title'] = 'This is a new title for this article'
            response = t_client.put('/api/v1/articles/{0}?api_key={1}'.format(new_ar_id, self.api_key), data=json.dumps(updated_data))
            assert response.status_code == 204          # 204 "No Content"
            assert response.mimetype == 'application/json'

            response = t_client.get('/api/v1/articles/{0}?api_key={1}'.format(new_ar_id, self.api_key))
            retrieved_article = json.loads(response.data.decode("utf-8"))
            new_ar_title = retrieved_article['bibjson']['title']
            assert new_ar_title == updated_data['bibjson']['title']
            assert new_ar_title != user_data['bibjson']['title']

            # DELETE the article
            assert models.Article.pull(new_ar_id) is not None
            response = t_client.delete('/api/v1/articles/{0}?api_key={1}'.format(new_ar_id, self.api_key))
            assert response.status_code == 204          # 204 "No Content"
            assert response.mimetype == 'application/json'

            # Try to RETRIEVE the article again - check it isn't there anymore
            response = t_client.get('/api/v1/applications/{0}?api_key={1}'.format(new_ar_id, self.api_key))
            assert response.status_code == 404
            assert response.mimetype == 'application/json'
Exemplo n.º 44
0
    def test_03_articles_crud(self):
        # add some data to the index with a Create
        user_data = ArticleFixtureFactory.make_article_source()

        with self.app_test.test_client() as t_client:
            # log into the app as our user
            self.login(t_client, 'test', 'password123')

            # CREATE a new article
            response = t_client.post('/api/v1/articles?api_key=' + self.api_key, data=json.dumps(user_data))
            assert response.status_code == 201          # 201 "Created"
            assert response.mimetype == 'application/json'

            # Check it gives back a newly created article, with an ID
            new_ar_id = json.loads(response.data)['id']
            new_ar_loc = json.loads(response.data)['location']
            assert new_ar_id is not None
            assert new_ar_id in new_ar_loc

            # RETRIEVE the same article using the ID
            response = t_client.get('/api/v1/articles/{0}?api_key={1}'.format(new_ar_id, self.api_key))
            assert response.status_code == 200          # 200 "OK"
            assert response.mimetype == 'application/json'

            retrieved_article = json.loads(response.data)
            new_ar_title = retrieved_article['bibjson']['title']
            assert new_ar_title == user_data['bibjson']['title']

            # UPDATE the title of the article
            updated_data = deepcopy(user_data)
            updated_data['bibjson']['title'] = 'This is a new title for this article'
            response = t_client.put('/api/v1/articles/{0}?api_key={1}'.format(new_ar_id, self.api_key), data=json.dumps(updated_data))
            assert response.status_code == 204          # 204 "No Content"
            assert response.mimetype == 'application/json'

            response = t_client.get('/api/v1/articles/{0}?api_key={1}'.format(new_ar_id, self.api_key))
            retrieved_article = json.loads(response.data)
            new_ar_title = retrieved_article['bibjson']['title']
            assert new_ar_title == updated_data['bibjson']['title']
            assert new_ar_title != user_data['bibjson']['title']

            # DELETE the article
            assert models.Article.pull(new_ar_id) is not None
            response = t_client.delete('/api/v1/articles/{0}?api_key={1}'.format(new_ar_id, self.api_key))
            assert response.status_code == 204          # 204 "No Content"
            assert response.mimetype == 'application/json'

            # Try to RETRIEVE the article again - check it isn't there anymore
            response = t_client.get('/api/v1/applications/{0}?api_key={1}'.format(new_ar_id, self.api_key))
            assert response.status_code == 404
            assert response.mimetype == 'application/json'
Exemplo n.º 45
0
    def test_33_article_stats(self):
        articles = []

        # make a bunch of articles variably in doaj/not in doaj, for/not for the issn we'll search
        for i in range(1, 3):
            article = models.Article(
                **ArticleFixtureFactory.make_article_source(eissn="1111-1111", pissn="1111-1111", with_id=False, in_doaj=True)
            )
            article.set_created("2019-01-0" + str(i) + "T00:00:00Z")
            articles.append(article)
        for i in range(3, 5):
            article = models.Article(
                **ArticleFixtureFactory.make_article_source(eissn="1111-1111", pissn="1111-1111", with_id=False, in_doaj=False)
            )
            article.set_created("2019-01-0" + str(i) + "T00:00:00Z")
            articles.append(article)
        for i in range(5, 7):
            article = models.Article(
                **ArticleFixtureFactory.make_article_source(eissn="2222-2222", pissn="2222-2222", with_id=False, in_doaj=True)
            )
            article.set_created("2019-01-0" + str(i) + "T00:00:00Z")
            articles.append(article)
        for i in range(7, 9):
            article = models.Article(
                **ArticleFixtureFactory.make_article_source(eissn="2222-2222", pissn="2222-2222", with_id=False, in_doaj=False)
            )
            article.set_created("2019-01-0" + str(i) + "T00:00:00Z")
            articles.append(article)

        for i in range(len(articles)):
            articles[i].save(blocking=i == len(articles) - 1)

        journal = models.Journal()
        bj = journal.bibjson()
        bj.add_identifier(bj.P_ISSN, "1111-1111")
        stats = journal.article_stats()
        assert stats.get("total") == 2
        assert stats.get("latest") == "2019-01-02T00:00:00Z"
Exemplo n.º 46
0
    def test_03_create_article_fail(self):
        # if the account is dud
        with self.assertRaises(Api401Error):
            data = ArticleFixtureFactory.make_article_source()
            a = ArticlesCrudApi.create(data, None)

        # if the data is bust
        with self.assertRaises(Api400Error):
            account = models.Account()
            account.set_id("test")
            account.set_name("Tester")
            account.set_email("*****@*****.**")
            data = {"some" : {"junk" : "data"}}
            a = ArticlesCrudApi.create(data, account)
Exemplo n.º 47
0
    def test_03_create_article_fail(self):
        # if the account is dud
        with self.assertRaises(Api401Error):
            data = ArticleFixtureFactory.make_article_source()
            a = ArticlesCrudApi.create(data, None)

        # if the data is bust
        with self.assertRaises(Api400Error):
            account = models.Account()
            account.set_id("test")
            account.set_name("Tester")
            account.set_email("*****@*****.**")
            data = {"some": {"junk": "data"}}
            a = ArticlesCrudApi.create(data, account)
Exemplo n.º 48
0
    def test_05_toc_correctly_uses_eissn(self):
        j = models.Journal(**JournalFixtureFactory.make_journal_source(in_doaj=True))
        eissn = j.bibjson().first_eissn
        # remove pissn
        j.bibjson().remove_identifiers(idtype=j.bibjson().P_ISSN, id=j.bibjson().first_pissn)

        j.set_last_manual_update()
        j.save(blocking=True)
        a = models.Article(**ArticleFixtureFactory.make_article_source(pissn=eissn, in_doaj=True))
        a.save(blocking=True)
        with self.app_test.test_client() as t_client:
            response = t_client.get('/toc/{}'.format(j.bibjson().get_preferred_issn()))
            assert response.status_code == 200
            assert 'var toc_issns = ["{eissn}"];'.format(eissn=eissn) in response.data
Exemplo n.º 49
0
    def test_05_delete_articles_fail(self):
        # set up all the bits we need
        dataset = []
        for i in range(10):
            data = ArticleFixtureFactory.make_incoming_api_article(doi="10.123/test/" + str(i), fulltext="http://example.com/" + str(i))
            dataset.append(data)

        # create the main account we're going to work as
        article_owner = models.Account()
        article_owner.set_id("test")
        article_owner.set_name("Tester")
        article_owner.set_email("*****@*****.**")
        # create another account which will own the articles so the one
        # above will be "another user" trying to delete our precious articles.
        somebody_else = models.Account()
        somebody_else.set_id("somebody_else")
        somebody_else.set_name("Somebody Else")
        somebody_else.set_email("*****@*****.**")
        # add a journal to the article owner account to create that link
        # between account and articles
        journal = models.Journal(**JournalFixtureFactory.make_journal_source(in_doaj=True))
        journal.set_owner(article_owner.id)
        journal.save()
        time.sleep(1)

        # call create on the objects (which will save it to the index)
        ids = ArticlesBulkApi.create(dataset, article_owner)

        # let the index catch up
        time.sleep(2)

        # call delete on the object in various context that will fail

        # without an account
        with self.assertRaises(Api401Error):
            ArticlesBulkApi.delete(ids, None)

        # with the wrong account
        article_owner.set_id("other")
        with self.assertRaises(Api400Error):
            ArticlesBulkApi.delete(ids, somebody_else)

        # on the wrong id
        ids.append("adfasdfhwefwef")
        article_owner.set_id("test")
        with self.assertRaises(Api400Error):
            ArticlesBulkApi.delete(ids, article_owner)

        with self.assertRaises(Api400Error):
            ArticlesBulkApi.delete(ids, article_owner)
Exemplo n.º 50
0
    def test_05_outgoing_article_do(self):
        # make a blank one
        oa = OutgoingArticleDO()

        # make one from an incoming article model fixture
        data = ArticleFixtureFactory.make_article_source()
        ap = models.Article(**data)

        # add some history to the article (it doesn't matter what it looks like since it shouldn't be there at the other end)
        ap.add_history(bibjson={'Lorem': {'ipsum': 'dolor', 'sit': 'amet'}, 'consectetur': 'adipiscing elit.'})

        # Create the DataObject
        oa = OutgoingArticleDO.from_model(ap)

        # check that it does not contain information that it shouldn't
        assert oa.data.get("index") is None
        assert oa.data.get("history") is None
Exemplo n.º 51
0
    def test_01_article_index_date_parsing(self):
        """ The ToC date histogram needs an accurate datestamp in the article's index """
        a = models.Article(**ArticleFixtureFactory.make_article_source())

        # Check we can handle shortened years
        a.bibjson().year = '12'
        a.bibjson().month = '03'
        d = a.bibjson().get_publication_date()
        assert d == '2012-03-01T00:00:00Z'

        a.bibjson().year = '86'                            # beware: this test will give a false negative 70 years from
        a.bibjson().month = '11'                           # the time of writing; this gives adequate warning (24 years)
        d = a.bibjson().get_publication_date()             # to fix hard-coding of centuries in get_publication_date().
        assert d == '1986-11-01T00:00:00Z'

        # Check we can handle numeric months
        a.bibjson().month = '03'
        a.bibjson().year = '2001'
        d = a.bibjson().get_publication_date()
        assert d == '2001-03-01T00:00:00Z'

        # Check we can handle full months
        a.bibjson().month = 'March'
        a.bibjson().year = '2001'
        d = a.bibjson().get_publication_date()
        assert d == '2001-03-01T00:00:00Z'

        # String cases?
        a.bibjson().month = 'nOVeMBer'
        a.bibjson().year = '2006'
        d = a.bibjson().get_publication_date()
        assert d == '2006-11-01T00:00:00Z'

        # And check we can handle abbreviated months
        a.bibjson().month = 'Dec'
        a.bibjson().year = '1993'
        d = a.bibjson().get_publication_date()
        assert d == '1993-12-01T00:00:00Z'

        # Finally, it wouldn't do if a bogus month was interpreted as a real one. The stamp should be created as Jan.
        a.bibjson().month = 'Flibble'
        a.bibjson().year = '1999'
        d = a.bibjson().get_publication_date()
        assert d == '1999-01-01T00:00:00Z'
Exemplo n.º 52
0
    def setUp(self):
        super(TestTaskJournalBulkDelete, self).setUp()

        ArticleBulkDeleteBackgroundTask.BATCH_SIZE = 13

        self.journals = []
        self.articles = []
        for j_src in JournalFixtureFactory.make_many_journal_sources(count=TEST_JOURNAL_COUNT):
            j = models.Journal(**j_src)
            self.journals.append(j)
            j.save()
            for i in range(0, TEST_ARTICLES_PER_JOURNAL):
                a = models.Article(**ArticleFixtureFactory.make_article_source(with_id=False, eissn=j.bibjson().first_eissn, pissn=j.bibjson().first_pissn))
                a.save()
                self.articles.append(a)

        sleep(2)

        self._make_and_push_test_context(acc=models.Account(**AccountFixtureFactory.make_managing_editor_source()))
Exemplo n.º 53
0
    def test_30_article_journal_sync(self):
        j = models.Journal(**JournalFixtureFactory.make_journal_source(in_doaj=True))
        a = models.Article(**ArticleFixtureFactory.make_article_source(in_doaj=False, with_journal_info=False))

        assert a.has_seal() is False
        assert a.bibjson().journal_issns != j.bibjson().issns()

        reg = models.Journal()
        changed = a.add_journal_metadata(j, reg)

        assert changed is True
        assert a.has_seal() is True
        assert a.is_in_doaj() is True
        assert a.bibjson().journal_issns == j.bibjson().issns()
        assert a.bibjson().publisher == j.bibjson().publisher
        assert a.bibjson().journal_country == j.bibjson().country
        assert a.bibjson().journal_language == j.bibjson().language
        assert a.bibjson().journal_title == j.bibjson().title

        changed = a.add_journal_metadata(j)
        assert changed is False
Exemplo n.º 54
0
    def test_06_retrieve_article_success(self):
        # set up all the bits we need
        # add a journal to the account
        account = models.Account()
        account.set_id('test')
        account.set_name("Tester")
        account.set_email("*****@*****.**")
        journal = models.Journal(**JournalFixtureFactory.make_journal_source(in_doaj=True))
        journal.set_owner(account.id)
        journal.save()
        time.sleep(1)

        data = ArticleFixtureFactory.make_article_source()
        ap = models.Article(**data)
        ap.save()
        time.sleep(1)

        # call retrieve on the object with a valid user
        a = ArticlesCrudApi.retrieve(ap.id, account)

        # call retrieve with no user (will return if in_doaj is True)
        a = ArticlesCrudApi.retrieve(ap.id, None)

        # check that we got back the object we expected
        assert isinstance(a, OutgoingArticleDO)
        assert a.id == ap.id
        assert a.bibjson.journal.start_page == '3', a.bibjson.journal.start_page
        assert a.bibjson.journal.end_page == '21'
        assert a.bibjson.journal.volume == '1'
        assert a.bibjson.journal.number == '99'
        assert a.bibjson.journal.publisher == 'The Publisher', a.bibjson().publisher
        assert a.bibjson.journal.title == 'The Title'
        assert a.bibjson.journal.license[0].title == "CC BY"
        assert a.bibjson.journal.license[0].type == "CC BY"
        assert a.bibjson.journal.license[0].url == "http://license.example.com"
        assert a.bibjson.journal.license[0].version == "1.0"
        assert a.bibjson.journal.license[0].open_access == True
        assert a.bibjson.journal.language == ["EN", "FR"]
        assert a.bibjson.journal.country == "US"
Exemplo n.º 55
0
    def test_04_delete_article_success(self):
        # set up all the bits we need
        dataset = []
        for i in range(10):
            data = ArticleFixtureFactory.make_incoming_api_article(doi="10.123/test/" + str(i), fulltext="http://example.com/" + str(i))
            dataset.append(data)

        # create the account we're going to work as
        account = models.Account()
        account.set_id("test")
        account.set_name("Tester")
        account.set_email("*****@*****.**")
        # add a journal to the account
        journal = models.Journal(**JournalFixtureFactory.make_journal_source(in_doaj=True))
        journal.set_owner(account.id)
        journal.save()
        time.sleep(1)

        # call create on the objects (which will save it to the index)
        ids = ArticlesBulkApi.create(dataset, account)

        # let the index catch up
        time.sleep(2)

        # now delete half of them
        dels = ids[:5]
        ArticlesBulkApi.delete(dels, account)

        # let the index catch up
        time.sleep(2)

        for id in dels:
            ap = models.Article.pull(id)
            assert ap is None
        for id in ids[5:]:
            ap = models.Article.pull(id)
            assert ap is not None
Exemplo n.º 56
0
    def test_public_data_dump(self, name, kwargs):

        clean_arg = kwargs.get("clean")
        prune_arg = kwargs.get("prune")
        types_arg = kwargs.get("types")
        journals_arg = kwargs.get("journals")
        articles_arg = kwargs.get("articles")
        batch_size_arg = kwargs.get("batch_size")
        tmp_write_arg = kwargs.get("tmp_write")
        store_write_arg = kwargs.get("store_write")

        status_arg = kwargs.get("status")

        ###############################################
        ## set up

        clean = True if clean_arg == "yes" else False if clean_arg == "no" else None
        prune = True if prune_arg == "yes" else False if prune_arg == "no" else None
        types = types_arg if types_arg != "-" else None

        journal_count = int(journals_arg)
        article_count = int(articles_arg)
        batch_size = int(batch_size_arg)
        journal_file_count = 0 if journal_count == 0 else (journal_count / batch_size) + 1
        article_file_count = 0 if article_count == 0 else (article_count / batch_size) + 1
        first_article_file_records = 0 if article_count == 0 else batch_size if article_count > batch_size else article_count
        first_journal_file_records = 0 if journal_count == 0 else batch_size if journal_count > batch_size else journal_count

        # add the data to the index first, to maximise the time it has to become available for search
        sources = JournalFixtureFactory.make_many_journal_sources(journal_count, in_doaj=True)
        jids = []
        for i in range(len(sources)):
            source = sources[i]
            journal = models.Journal(**source)
            journal.save()
            jids.append((journal.id, journal.last_updated))

        aids = []
        for i in range(article_count):
            source = ArticleFixtureFactory.make_article_source(
                eissn="{x}000-0000".format(x=i),
                pissn="0000-{x}000".format(x=i),
                with_id=False,
                doi="10.123/{x}".format(x=i),
                fulltext="http://example.com/{x}".format(x=i)
            )
            article = models.Article(**source)
            article.save()
            aids.append((article.id, article.last_updated))

        # construct some test data in the local store
        container_id = app.config["STORE_PUBLIC_DATA_DUMP_CONTAINER"]
        localStore = store.StoreLocal(None)
        localStoreFiles = []
        if clean or prune:
            for i in range(5):
                localStore.store(container_id, "doaj_article_data_2018-01-0" + str(i) + ".tar.gz",
                                 source_stream=StringIO("test"))
                localStore.store(container_id, "doaj_journal_data_2018-01-0" + str(i) + ".tar.gz",
                                 source_stream=StringIO("test"))
            localStoreFiles = localStore.list(container_id)

        app.config["DISCOVERY_RECORDS_PER_FILE"] = batch_size

        # set the mocks for store write failures
        if tmp_write_arg == "fail":
            app.config["STORE_TMP_IMPL"] = StoreMockFactory.no_writes_classpath()

        if store_write_arg == "fail":
            app.config["STORE_IMPL"] = StoreMockFactory.no_writes_classpath()

        # block until all the records are saved
        for jid, lu in jids:
            models.Journal.block(jid, lu, sleep=0.05)
        for aid, lu in aids:
            models.Article.block(aid, lu, sleep=0.05)

        ###########################################################
        # Execution

        job = PublicDataDumpBackgroundTask.prepare("testuser", clean=clean, prune=prune, types=types)
        task = PublicDataDumpBackgroundTask(job)
        BackgroundApi.execute(task)

        # make sure we have a fresh copy of the job
        job = task.background_job
        assert job.status == status_arg

        if job.status != "error":
            article_url = models.cache.Cache.get_public_data_dump().get("article", {}).get("url")
            if types_arg in ["-", "all", "article"]:
                assert article_url is not None
            else:
                assert article_url is None

            journal_url = models.cache.Cache.get_public_data_dump().get("journal", {}).get("url")
            if types_arg in ["-", "all", "journal"]:
                assert journal_url is not None
            else:
                assert journal_url is None

            assert localStore.exists(container_id)
            files = localStore.list(container_id)

            if types_arg in ["-", "all"]:
                assert len(files) == 2
            else:
                assert len(files) == 1

            day_at_start = dates.today()

            if types_arg in ["-", "all", "article"]:
                article_file = "doaj_article_data_" + day_at_start + ".tar.gz"
                assert article_file in files

                stream = localStore.get(container_id, article_file)
                tarball = tarfile.open(fileobj=stream, mode="r:gz")
                members = tarball.getmembers()
                assert len(members) == article_file_count

                if len(members) > 0:
                    f = tarball.extractfile(members[0])
                    data = json.loads(f.read())
                    assert len(data) == first_article_file_records

                    record = data[0]
                    for key in record.keys():
                        assert key in ["admin", "bibjson", "id", "last_updated", "created_date"]
                    if "admin" in record:
                        for key in record["admin"].keys():
                            assert key in ["ticked", "seal"]

            if types_arg in ["-", "all", "journal"]:
                journal_file = "doaj_journal_data_" + day_at_start + ".tar.gz"
                assert journal_file in files

                stream = localStore.get(container_id, journal_file)
                tarball = tarfile.open(fileobj=stream, mode="r:gz")
                members = tarball.getmembers()
                assert len(members) == journal_file_count

                if len(members) > 0:
                    f = tarball.extractfile(members[0])
                    data = json.loads(f.read())
                    assert len(data) == first_journal_file_records

                    record = data[0]
                    for key in record.keys():
                        assert key in ["admin", "bibjson", "id", "last_updated", "created_date"]
                    if "admin" in record:
                        for key in record["admin"].keys():
                            assert key in ["ticked", "seal"]

        else:
            # in the case of an error, we expect the tmp store to have been cleaned up
            tmpStore = store.TempStore()
            assert not tmpStore.exists(container_id)

            # in the case of an error, we expect the main store not to have been touched
            # (for the errors that we are checking for)
            if prune and not clean:
                # no matter what the error, if we didn't specify clean then we expect everything
                # to survive
                survived = localStore.list(container_id)
                assert localStoreFiles == survived
            elif clean:
                # if we specified clean, then it's possible the main store was cleaned before the
                # error occurred, in which case it depends on the error.  This reminds us that
                # clean shouldn't be used in production
                if tmp_write_arg == "fail":
                    assert not localStore.exists(container_id)
                else:
                    survived = localStore.list(container_id)
                    assert localStoreFiles == survived
            else:
                # otherwise, we expect the main store to have survived
                assert not localStore.exists(container_id)
    def test_01_batch_create_article(self, name, kwargs):

        articles_arg = kwargs.get("articles")
        duplicate_in_batch_arg = kwargs.get("duplicate_in_batch")
        duplicate_in_index_arg = kwargs.get("duplicate_in_index")
        account_arg = kwargs.get("account")
        duplicate_check_arg = kwargs.get("duplicate_check")
        merge_duplicate_arg = kwargs.get("merge_duplicate")
        limit_to_account_arg = kwargs.get("limit_to_account")
        add_journal_info_arg = kwargs.get("add_journal_info")

        raises_arg = kwargs.get("raises")
        success_arg = kwargs.get("success")
        fail_arg = kwargs.get("fail")
        update_arg = kwargs.get("update")

        ###############################################
        ## set up

        success = int(success_arg)
        fail = int(fail_arg)
        update = int(update_arg)

        duplicate_in_batch = duplicate_in_batch_arg == "yes"
        duplicate_in_index = int(duplicate_in_index_arg)

        raises = EXCEPTIONS.get(raises_arg)

        duplicate_check = None
        if duplicate_check_arg != "none":
            duplicate_check = True if duplicate_check_arg == "true" else False

        merge_duplicate = None
        if merge_duplicate_arg != "none":
            merge_duplicate = True if merge_duplicate_arg == "true" else False

        limit_to_account = None
        if limit_to_account_arg != "none":
            limit_to_account = True if limit_to_account_arg == "true" else False

        add_journal_info = None
        if add_journal_info_arg != "none":
            add_journal_info = True if add_journal_info_arg == "true" else False

        account = None
        if account_arg != "none":
            source = AccountFixtureFactory.make_publisher_source()
            account = Account(**source)

        journal_specs = []
        last_doi = None
        last_ft = None
        last_issn = None
        last_id = None
        articles = None
        if articles_arg != "none":
            articles = []
            if articles_arg == "yes":
                # one with a DOI and no fulltext
                source = ArticleFixtureFactory.make_article_source(
                    eissn="0000-0000",
                    pissn="0000-0000",
                    doi="10.123/abc/0",
                    fulltext=False
                )
                del source["bibjson"]["journal"]
                article = Article(**source)
                article.set_id()
                articles.append(article)
                if add_journal_info:
                    journal_specs.append({"title" : "0", "pissn" : "0000-0000", "eissn" : "0000-0000"})

                # another with a DOI and no fulltext
                source = ArticleFixtureFactory.make_article_source(
                    eissn="1111-1111",
                    pissn="1111-1111",
                    doi="10.123/abc/1",
                    fulltext=False
                )
                del source["bibjson"]["journal"]
                article = Article(**source)
                article.set_id()
                articles.append(article)
                if add_journal_info:
                    journal_specs.append({"title" : "1", "pissn" : "1111-1111", "eissn" : "1111-1111"})

                # one with a fulltext and no DOI
                source = ArticleFixtureFactory.make_article_source(
                    eissn="2222-2222",
                    pissn="2222-2222",
                    fulltext="http://example.com/2",
                    doi=False
                )
                del source["bibjson"]["journal"]
                article = Article(**source)
                article.set_id()
                articles.append(article)
                if add_journal_info:
                    journal_specs.append({"title" : "2", "pissn" : "2222-2222", "eissn" : "2222-2222"})

                # another one with a fulltext and no DOI
                source = ArticleFixtureFactory.make_article_source(
                    eissn="3333-3333",
                    pissn="3333-3333",
                    fulltext="http://example.com/3",
                    doi=False
                )
                del source["bibjson"]["journal"]
                article = Article(**source)
                article.set_id()
                articles.append(article)
                if add_journal_info:
                    journal_specs.append({"title" : "3", "pissn" : "3333-3333", "eissn" : "3333-3333"})

                last_issn = "3333-3333"
                last_doi = "10.123/abc/1"
                last_ft = "http://example.com/3"
                last_id = articles[-1].id

                if duplicate_in_batch:
                    # one with a duplicated DOI
                    source = ArticleFixtureFactory.make_article_source(
                        eissn="4444-4444",
                        pissn="4444-4444",
                        doi="10.123/abc/0",
                        fulltext="http://example.com/4"
                    )
                    del source["bibjson"]["journal"]
                    article = Article(**source)
                    article.set_id()
                    articles.append(article)
                    if add_journal_info:
                        journal_specs.append({"title" : "4", "pissn" : "4444-4444", "eissn" : "4444-4444"})

                    # one with a duplicated Fulltext
                    source = ArticleFixtureFactory.make_article_source(
                        eissn="5555-5555",
                        pissn="5555-5555",
                        doi="10.123/abc/5",
                        fulltext="http://example.com/1"
                    )
                    del source["bibjson"]["journal"]
                    article = Article(**source)
                    article.set_id()
                    articles.append(article)
                    if add_journal_info:
                        journal_specs.append({"title" : "5", "pissn" : "5555-5555", "eissn" : "5555-5555"})

        ilo_mock = None
        if account_arg == "owner":
            ilo_mock = BLLArticleMockFactory.is_legitimate_owner(legit=True)
        elif account_arg == "own_1":
            ilo_mock = BLLArticleMockFactory.is_legitimate_owner(legit_on_issn=[last_issn])
        else:
            ilo_mock = BLLArticleMockFactory.is_legitimate_owner()
        self.svc.is_legitimate_owner = ilo_mock

        gd_mock = None
        if duplicate_in_index == 1:
            gd_mock = BLLArticleMockFactory.get_duplicate(given_article_id=last_id, eissn=last_issn, pissn=last_issn, doi=last_doi, fulltext=last_ft)
        elif duplicate_in_index == 2:
            gd_mock = BLLArticleMockFactory.get_duplicate(merge_conflict=True)
        else:
            gd_mock = BLLArticleMockFactory.get_duplicate(return_none=True)
        self.svc.get_duplicate = gd_mock

        ios_mock = BLLArticleMockFactory.issn_ownership_status([], [], [], [])
        self.svc.issn_ownership_status = ios_mock

        if add_journal_info:
            gj_mock = ModelArticleMockFactory.get_journal(journal_specs)
            Article.get_journal = gj_mock

        ###########################################################
        # Execution

        if raises is not None:
            with self.assertRaises(raises):
                try:
                    self.svc.batch_create_articles(articles, account, duplicate_check, merge_duplicate,
                                                   limit_to_account, add_journal_info)
                except exceptions.IngestException as e:
                    if duplicate_in_index != 2:
                        report = e.result
                        assert report["success"] == success
                        assert report["fail"] == fail
                        assert report["update"] == update
                        assert report["new"] == success - update
                    raise
        else:
            report = self.svc.batch_create_articles(articles, account, duplicate_check, merge_duplicate,
                                                    limit_to_account, add_journal_info)

            # make sure all the articles are saved before running the asserts
            aids = [(a.id, a.last_updated) for a in articles]
            for aid, lu in aids:
                Article.block(aid, lu, sleep=0.05)

            assert report["success"] == success
            assert report["fail"] == fail
            assert report["update"] == update
            assert report["new"] == success - update

            if success > 0:
                all_articles = Article.all()
                if len(all_articles) != success:
                    time.sleep(0.5)
                    all_articles = Article.all()
                assert len(all_articles) == success
                for article in all_articles:
                    if add_journal_info:
                        assert article.bibjson().journal_title is not None
                    else:
                        assert article.bibjson().journal_title is None

            else:
                # there's nothing in the article index
                with self.assertRaises(ESMappingMissingError):
                    Article.all()