Exemplo n.º 1
0
def metadata():
    # if this is a get request, give the blank form - there is no edit feature
    if request.method == "GET":
        form = ArticleForm()
        return render_template('publisher/metadata.html', form=form)

    # if this is a post request, a form button has been hit and we need to do
    # a bunch of work
    elif request.method == "POST":
        form = ArticleForm(request.form)

        # first we need to do any server-side form modifications which
        # the user might request by pressing the add/remove authors buttons
        more_authors = request.values.get("more_authors")
        remove_author = None
        for v in request.values.keys():
            if v.startswith("remove_authors"):
                remove_author = v.split("-")[1]

        # if the user wants more authors, add an extra entry
        if more_authors:
            form.authors.append_entry()
            return render_template('publisher/metadata.html', form=form)

        # if the user wants to remove an author, do the various back-flips required
        if remove_author is not None:
            keep = []
            while len(form.authors.entries) > 0:
                entry = form.authors.pop_entry()
                if entry.short_name == "authors-" + remove_author:
                    break
                else:
                    keep.append(entry)
            while len(keep) > 0:
                form.authors.append_entry(keep.pop().data)
            return render_template('publisher/metadata.html', form=form)

        # if we get to here, then this is the full submission, and we need to
        # validate and return
        enough_authors = _validate_authors(form)
        if form.validate():
            # if the form validates, then we have to do our own bit of validation,
            # which is to check that there is at least one author supplied
            if not enough_authors:
                return render_template('publisher/metadata.html',
                                       form=form,
                                       author_error=True)
            else:
                xwalk = ArticleFormXWalk()
                art = xwalk.crosswalk_form(form)
                articleService = DOAJ.articleService()
                articleService.create_article(
                    art, current_user._get_current_object())
                flash("Article created/updated", "success")
                form = ArticleForm()
                return render_template('publisher/metadata.html', form=form)
        else:
            return render_template('publisher/metadata.html',
                                   form=form,
                                   author_error=not enough_authors)
 def setUp(self):
     super(TestBLLArticleBatchCreateArticle, self).setUp()
     self.svc = DOAJ.articleService()
     self._is_legitimate_owner = self.svc.is_legitimate_owner
     self._get_duplicate = self.svc.get_duplicate
     self._issn_ownership_status = self.svc.issn_ownership_status
     self._get_journal = Article.get_journal
 def setUp(self):
     super(TestBLLArticleBatchCreateArticle, self).setUp()
     self.svc = DOAJ.articleService()
     self._is_legitimate_owner = self.svc.is_legitimate_owner
     self._get_duplicate = self.svc.get_duplicate
     self._issn_ownership_status = self.svc.issn_ownership_status
     self._get_journal = Article.get_journal
 def setUp(self):
     super(TestBLLPrepareUpdatePublisher, self).setUp()
     self.svc = DOAJ.articleService()
     self.is_id_updated = self.svc._doi_or_fulltext_updated
     self.has_permission = self.svc.has_permissions
     self.merge = Article.merge
     acc_source = AccountFixtureFactory.make_publisher_source()
     self.publisher = Account(**acc_source)
Exemplo n.º 5
0
 def setUp(self):
     super(TestBLLArticleCreateArticle, self).setUp()
     self.svc = DOAJ.articleService()
     self.is_legitimate_owner = self.svc.is_legitimate_owner
     self.ownership = self.svc.issn_ownership_status
     self.duplicate = self.svc.get_duplicate
     self.permission = self.svc.has_permissions
     self.prepare_update_admin = self.svc._prepare_update_admin
     self.prepare_update_publisher = self.svc._prepare_update_publisher
Exemplo n.º 6
0
    def create(cls, articles, account):
        # We run through the articles once, validating in dry-run mode
        # and deduplicating as we go. Then we .save() everything once
        # we know all incoming articles are valid.

        # as long as authentication (in the layer above) has been successful, and the account exists, then
        # we are good to proceed
        if account is None:
            raise Api401Error()

        # convert the data into a suitable article models
        articles = [ArticlesCrudApi.prep_article(data) for data in articles]

        articleService = DOAJ.articleService()
        try:
            result = articleService.batch_create_articles(articles, account)
            return [a.id for a in articles]
        except exceptions.IngestException as e:
            raise Api400Error(e.message)
Exemplo n.º 7
0
    def create(cls, articles, account):
        # We run through the articles once, validating in dry-run mode
        # and deduplicating as we go. Then we .save() everything once
        # we know all incoming articles are valid.

        # as long as authentication (in the layer above) has been successful, and the account exists, then
        # we are good to proceed
        if account is None:
            raise Api401Error()

        # convert the data into a suitable article models
        articles = [ArticlesCrudApi.prep_article(data) for data in articles]

        articleService = DOAJ.articleService()
        try:
            result = articleService.batch_create_articles(articles, account, add_journal_info=True)
            return [a.id for a in articles]
        except exceptions.IngestException as e:
            raise Api400Error(e.message)
Exemplo n.º 8
0
    def test_01_discover_duplicates(self, name, kwargs):

        article_arg = kwargs.get("article")
        owner_arg = kwargs.get("owner")
        article_doi_arg = kwargs.get("article_doi")
        doi_duplicate_arg = kwargs.get("doi_duplicate")
        article_fulltext_arg = kwargs.get("article_fulltext")
        fulltext_duplicate_arg = kwargs.get("fulltext_duplicate")
        articles_by_doi_arg = kwargs.get("articles_by_doi")
        articles_by_fulltext_arg = kwargs.get("articles_by_fulltext")
        raises_arg = kwargs.get("raises")

        raises = EXCEPTIONS.get(raises_arg)

        ###############################################
        ## set up

        owner = None
        if owner_arg != "none":
            owner = Account(**AccountFixtureFactory.make_publisher_source())

        owner_id = None
        if owner is not None:
            owner_id = owner.id

        # create a journal for the owner
        if owner_arg not in ["none"]:
            source = JournalFixtureFactory.make_journal_source(in_doaj=True)
            journal = Journal(**source)
            journal.set_owner(owner.id)
            journal.bibjson().remove_identifiers()
            journal.bibjson().add_identifier("eissn", "1234-5678")
            journal.bibjson().add_identifier("pissn", "9876-5432")
            journal.save()

        # determine what we need to load into the index
        article_ids = []
        aids_block = []
        if owner_arg not in ["none", "no_articles"]:
            for i, ident in enumerate(IDENTS):
                the_doi = ident["doi"]
                if doi_duplicate_arg == "padded":
                    the_doi = "  " + the_doi + "  "
                elif doi_duplicate_arg == "prefixed":
                    the_doi = "https://dx.doi.org/" + the_doi

                the_fulltext = ident["fulltext"]
                if article_fulltext_arg != "invalid":
                    if fulltext_duplicate_arg == "padded":
                        the_fulltext = "  http:" + the_fulltext
                    elif fulltext_duplicate_arg == "http":
                        the_fulltext = "http:" + the_fulltext
                    elif fulltext_duplicate_arg == "https":
                        the_fulltext = "https:" + the_fulltext
                    else:
                        the_fulltext = "http:" + the_fulltext

                source = ArticleFixtureFactory.make_article_source(
                    eissn="1234-5678",
                    pissn="9876-5432",
                    doi=the_doi,
                    fulltext=the_fulltext)
                article = Article(**source)
                article.set_id()
                article.save(blocking=True)
                article_ids.append(article.id)
                aids_block.append((article.id, article.last_updated))

        # generate our incoming article
        article = None
        doi = None
        fulltext = None
        if article_arg == "yes":
            eissn = "1234=5678"  # one matching
            pissn = "6789-1234"  # the other not - issn matches are not relevant to this test

            if article_doi_arg in ["yes", "padded"]:
                doi = "10.1234/abc/11"
                if doi_duplicate_arg in ["yes", "padded"]:
                    doi = IDENTS[0]["doi"]
                if article_doi_arg == "padded":
                    doi = "  doi:" + doi + "  "
            elif article_doi_arg in ["invalid"]:
                doi = IDENTS[-1]["doi"]

            if article_fulltext_arg in ["yes", "padded", "https"]:
                fulltext = "//example.com/11"
                if fulltext_duplicate_arg in ["yes", "padded", "https"]:
                    fulltext = IDENTS[0]["fulltext"]
                if fulltext_duplicate_arg == "padded":
                    fulltext = "  http:" + fulltext + "  "
                elif fulltext_duplicate_arg == "https":
                    fulltext = "https:" + fulltext
                else:
                    fulltext = "http:" + fulltext
            elif article_fulltext_arg == "invalid":
                fulltext = IDENTS[-1]["fulltext"]

            source = ArticleFixtureFactory.make_article_source(
                eissn=eissn, pissn=pissn, doi=doi, fulltext=fulltext)
            article = Article(**source)

            # we need to do this if doi or fulltext are none, because the factory will set a default if we don't
            # provide them
            if doi is None:
                article.bibjson().remove_identifiers("doi")
            if fulltext is None:
                article.bibjson().remove_urls("fulltext")

            article.set_id()

        Article.blockall(aids_block)

        ###########################################################
        # Execution

        svc = DOAJ.articleService()
        if raises is not None:
            with self.assertRaises(raises):
                svc.discover_duplicates(article)
        else:
            possible_articles = svc.discover_duplicates(article)

            if articles_by_doi_arg == "yes":
                assert "doi" in possible_articles
                assert len(possible_articles["doi"]) == 1
                # if this is the "invalid" doi, then we expect it to match the final article, otherwise match the first
                if article_doi_arg == "invalid":
                    assert possible_articles["doi"][0].id == article_ids[-1]
                else:
                    assert possible_articles["doi"][0].id == article_ids[0]
            else:
                if possible_articles is not None:
                    assert "doi" not in possible_articles

            if articles_by_fulltext_arg == "yes":
                assert "fulltext" in possible_articles
                assert len(possible_articles["fulltext"]) == 1
                # if this is the "invalid" fulltext url, then we expect it to match the final article, otherwise match the first
                if article_fulltext_arg == "invalid":
                    assert possible_articles["fulltext"][0].id == article_ids[
                        -1]
                else:
                    assert possible_articles["fulltext"][0].id == article_ids[
                        0]
            else:
                if possible_articles is not None:
                    assert "fulltext" not in possible_articles
Exemplo n.º 9
0
    def test_01_is_legitimate_owner(self, name, kwargs):

        article_arg = kwargs.get("article")
        owner_arg = kwargs.get("owner")
        article_eissn_arg = kwargs.get("article_eissn")
        article_pissn_arg = kwargs.get("article_pissn")
        seen_eissn_arg = kwargs.get("seen_eissn")
        seen_pissn_arg = kwargs.get("seen_pissn")
        journal_owner_arg = kwargs.get("journal_owner")

        raises_arg = kwargs.get("raises")
        legit_arg = kwargs.get("legit")

        raises = EXCEPTIONS.get(raises_arg)

        ###############################################
        ## set up

        owner = None
        if owner_arg != "none":
            owner = Account(**AccountFixtureFactory.make_publisher_source())

        owner_id = None
        if owner is not None:
            owner_id = owner.id

        # generate our incoming article
        article = None
        eissn = None
        pissn = None
        if article_arg == "exists":
            source = ArticleFixtureFactory.make_article_source()
            article = Article(**source)
            article.set_id()

            article.bibjson().remove_identifiers("pissn")
            if article_pissn_arg == "yes":
                pissn = "1234-5678"
                article.bibjson().add_identifier("pissn", pissn)

            article.bibjson().remove_identifiers("eissn")
            if article_eissn_arg == "yes":
                eissn = "9876-5432"
                article.bibjson().add_identifier("eissn", eissn)

        # assemble the issns that will appear to be in the index.  One that is irrelevant, and just
        # serves to be "noise" in the database, and the other that matches the spec required by
        # the test
        issns = [("1111-1111", "2222-2222")]
        if eissn is not None and pissn is not None and seen_eissn_arg == "yes" and seen_pissn_arg == "yes":
            issns.append((eissn, pissn))
        if eissn is not None and seen_eissn_arg == "yes":
            issns.append((eissn, None))
        if pissn is not None and seen_pissn_arg == "yes":
            issns.append((None, pissn))

        owners = []
        if journal_owner_arg == "none":
            owners = [None]
        elif journal_owner_arg == "correct" and owner_id is not None:
            owners = [owner_id]
        elif journal_owner_arg == "incorrect":
            owners = ["randomowner"]
        elif journal_owner_arg == "mix" and owner_id is not None:
            owners.append(owner_id)
            owners.append("randomowner")
            owners.append(None)

        mock = ModelJournalMockFactory.find_by_issn(issns, owners)
        Journal.find_by_issn = mock

        ###########################################################
        # Execution

        svc = DOAJ.articleService()

        if raises is not None:
            with self.assertRaises(raises):
                svc.is_legitimate_owner(article, owner_id)
        else:
            legit = svc.is_legitimate_owner(article, owner_id)

            if legit_arg == "no":
                assert legit is False
            elif legit_arg == "yes":
                assert legit is True
Exemplo n.º 10
0
 def setUp(self):
     super(TestBLLPrepareUpdatePublisher, self).setUp()
     self.svc = DOAJ.articleService()
     self.is_id_updated = self.svc._doi_or_fulltext_updated
     self.merge = Article.merge
     self.pull = Article.pull
Exemplo n.º 11
0
    def _process(self, file_upload):
        job = self.background_job
        upload_dir = app.config.get("UPLOAD_DIR")
        path = os.path.join(upload_dir, file_upload.local_filename)

        if not os.path.exists(path):
            job.add_audit_message(
                u"File not found at path {} . Retrying job later.".format(
                    path))
            count = self.get_param(job.params, "attempts")
            retry_limit = app.config.get("HUEY_TASKS",
                                         {}).get("ingest_articles",
                                                 {}).get("retries", 0)
            self.set_param(job.params, "attempts", count + 1)

            if retry_limit <= count:
                job.add_audit_message(
                    u"File still not found at path {} . Giving up.".format(
                        path))
                job.fail()

            raise RetryException()

        job.add_audit_message(u"Importing from {x}".format(x=path))

        articleService = DOAJ.articleService()
        account = models.Account.pull(file_upload.owner)

        xwalk_name = app.config.get("ARTICLE_CROSSWALKS",
                                    {}).get(file_upload.schema)
        xwalk = plugin.load_class(xwalk_name)()

        ingest_exception = False
        result = {}
        try:
            with open(path) as handle:
                articles = xwalk.crosswalk_file(
                    handle, add_journal_info=False
                )  # don't import the journal info, as we haven't validated ownership of the ISSNs in the article yet
                for article in articles:
                    article.set_upload_id(file_upload.id)
                result = articleService.batch_create_articles(
                    articles, account, add_journal_info=True)
        except IngestException as e:
            job.add_audit_message(
                u"IngestException: {msg}. Inner message: {inner}.  Stack: {x}".
                format(msg=e.message, inner=e.inner_message, x=e.trace()))
            file_upload.failed(e.message, e.inner_message)
            result = e.result
            try:
                file_failed(path)
                ingest_exception = True
            except:
                job.add_audit_message(
                    u"Error cleaning up file which caused IngestException: {x}"
                    .format(x=traceback.format_exc()))
        except (DuplicateArticleException, ArticleNotAcceptable) as e:
            job.add_audit_message(
                u"One or more articles did not contain either a DOI or a Fulltext URL"
            )
            file_upload.failed(
                u"One or more articles did not contain either a DOI or a Fulltext URL"
            )
            try:
                file_failed(path)
            except:
                job.add_audit_message(
                    u"Error cleaning up file which caused Exception: {x}".
                    format(x=traceback.format_exc()))
                return
        except Exception as e:
            job.add_audit_message(
                u"Unanticipated error: {x}".format(x=traceback.format_exc()))
            file_upload.failed("Unanticipated error when importing articles")
            try:
                file_failed(path)
            except:
                job.add_audit_message(
                    u"Error cleaning up file which caused Exception: {x}".
                    format(x=traceback.format_exc()))
                return

        success = result.get("success", 0)
        fail = result.get("fail", 0)
        update = result.get("update", 0)
        new = result.get("new", 0)
        shared = result.get("shared", [])
        unowned = result.get("unowned", [])
        unmatched = result.get("unmatched", [])

        if success == 0 and fail > 0 and not ingest_exception:
            file_upload.failed("All articles in file failed to import")
            job.add_audit_message("All articles in file failed to import")
        if success > 0 and fail == 0:
            file_upload.processed(success, update, new)
        if success > 0 and fail > 0:
            file_upload.partial(success, fail, update, new)
            job.add_audit_message(
                "Some articles in file failed to import correctly, so no articles imported"
            )

        file_upload.set_failure_reasons(list(shared), list(unowned),
                                        list(unmatched))
        job.add_audit_message("Shared ISSNs: " + ", ".join(list(shared)))
        job.add_audit_message("Unowned ISSNs: " + ", ".join(list(unowned)))
        job.add_audit_message("Unmatched ISSNs: " + ", ".join(list(unmatched)))

        if not ingest_exception:
            try:
                os.remove(path)  # just remove the file, no need to keep it
            except Exception as e:
                job.add_audit_message(
                    u"Error while deleting file {x}: {y}".format(x=path,
                                                                 y=e.message))
    def test_01_issn_ownership_status(self, name, kwargs):

        article_arg = kwargs.get("article")
        owner_arg = kwargs.get("owner")
        article_eissn_arg = kwargs.get("article_eissn")
        article_pissn_arg = kwargs.get("article_pissn")
        seen_eissn_arg = kwargs.get("seen_eissn")
        seen_pissn_arg = kwargs.get("seen_pissn")
        journal_owner_arg = kwargs.get("journal_owner")

        raises_arg = kwargs.get("raises")

        raises = EXCEPTIONS.get(raises_arg)

        ###############################################
        ## set up

        owner = None
        if owner_arg != "none":
            owner = Account(**AccountFixtureFactory.make_publisher_source())

        owner_id = None
        if owner is not None:
            owner_id = owner.id

        # generate our incoming article
        article = None
        eissn = None
        pissn = None
        if article_arg == "exists":
            source = ArticleFixtureFactory.make_article_source()
            article = Article(**source)
            article.set_id()

            article.bibjson().remove_identifiers("pissn")
            if article_pissn_arg == "yes":
                pissn = "1234-5678"
                article.bibjson().add_identifier("pissn", pissn)

            article.bibjson().remove_identifiers("eissn")
            if article_eissn_arg == "yes":
                eissn = "9876-5432"
                article.bibjson().add_identifier("eissn", eissn)

        issns = []
        if eissn is not None and pissn is not None and seen_eissn_arg == "yes" and seen_pissn_arg == "yes":
            issns.append((eissn, pissn))
        if eissn is not None and seen_eissn_arg == "yes":
            issns.append((eissn, "4321-9876"))
            issns.append((eissn, None))
        if pissn is not None and seen_pissn_arg == "yes":
            issns.append(("6789-4321", pissn))
            issns.append((None, pissn))

        owners = []
        if journal_owner_arg == "none":
            owners = [None]
        elif journal_owner_arg == "correct" and owner_id is not None:
            owners = [owner_id]
        elif journal_owner_arg == "incorrect":
            owners = ["randomowner"]
        elif journal_owner_arg == "mix" and owner_id is not None:
            owners.append(owner_id)
            owners.append("randomowner")
            owners.append(None)

        mock = ModelJournalMockFactory.find_by_issn(issns, owners)
        Journal.find_by_issn = mock

        ###########################################################
        # Execution

        svc = DOAJ.articleService()

        if raises is not None:
            with self.assertRaises(raises):
                svc.issn_ownership_status(article, owner_id)
        else:
            owned, shared, unowned, unmatched = svc.issn_ownership_status(
                article, owner_id)

            owned_count = 0
            if seen_eissn_arg == "yes" and eissn is not None and journal_owner_arg in [
                    "correct"
            ]:
                assert eissn in owned
                owned_count += 1
            elif eissn is not None:
                assert eissn not in owned

            if seen_pissn_arg == "yes" and pissn is not None and journal_owner_arg in [
                    "correct"
            ]:
                assert pissn in owned
                owned_count += 1
            elif pissn is not None:
                assert pissn not in owned

            assert len(owned) == owned_count

            shared_count = 0
            if seen_eissn_arg == "yes" and eissn is not None and journal_owner_arg in [
                    "mix"
            ]:
                assert eissn in shared
                shared_count += 1
            elif eissn is not None:
                assert eissn not in shared

            if seen_pissn_arg == "yes" and pissn is not None and journal_owner_arg in [
                    "mix"
            ]:
                assert pissn in shared
                shared_count += 1
            elif pissn is not None:
                assert pissn not in shared

            assert len(shared) == shared_count

            unowned_count = 0
            if seen_eissn_arg == "yes" and eissn is not None and journal_owner_arg in [
                    "incorrect", "none"
            ]:
                assert eissn in unowned
                unowned_count += 1
            elif eissn is not None:
                assert eissn not in unowned

            if seen_pissn_arg == "yes" and pissn is not None and journal_owner_arg in [
                    "incorrect", "none"
            ]:
                assert pissn in unowned
                unowned_count += 1
            elif pissn is not None:
                assert pissn not in unowned

            assert len(unowned) == unowned_count

            unmatched_count = 0
            if seen_eissn_arg == "no" and eissn is not None:
                assert eissn in unmatched
                unmatched_count += 1
            elif eissn is not None:
                assert eissn not in unmatched

            if seen_pissn_arg == "no" and pissn is not None:
                assert pissn in unmatched
                unmatched_count += 1
            elif pissn is not None:
                assert pissn not in unmatched

            assert len(unmatched) == unmatched_count
 def setUp(self):
     super(TestBLLArticleGetDuplicates, self).setUp()
     self.svc = DOAJ.articleService()
     self._old_discover_duplicates = self.svc.discover_duplicates
    def test_01_discover_duplicates(self, name, kwargs):

        article_arg = kwargs.get("article")
        owner_arg = kwargs.get("owner")
        article_doi_arg = kwargs.get("article_doi")
        doi_duplicate_arg = kwargs.get("doi_duplicate")
        article_fulltext_arg = kwargs.get("article_fulltext")
        fulltext_duplicate_arg = kwargs.get("fulltext_duplicate")
        articles_by_doi_arg = kwargs.get("articles_by_doi")
        articles_by_fulltext_arg = kwargs.get("articles_by_fulltext")
        raises_arg = kwargs.get("raises")

        raises = EXCEPTIONS.get(raises_arg)

        ###############################################
        ## set up

        owner = None
        if owner_arg != "none":
            owner = Account(**AccountFixtureFactory.make_publisher_source())

        owner_id = None
        if owner is not None:
            owner_id = owner.id

        # create a journal for the owner
        if owner_arg not in ["none"]:
            source = JournalFixtureFactory.make_journal_source(in_doaj=True)
            journal = Journal(**source)
            journal.set_owner(owner.id)
            journal.bibjson().remove_identifiers()
            journal.bibjson().add_identifier("eissn", "1234-5678")
            journal.bibjson().add_identifier("pissn", "9876-5432")
            journal.save(blocking=True)

        # determine what we need to load into the index
        article_ids = []
        aids_block = []
        if owner_arg not in ["none", "no_articles"]:
            for i, ident in enumerate(IDENTS):
                the_doi = ident["doi"]
                if doi_duplicate_arg == "padded":
                    the_doi = "  " + the_doi + "  "
                elif doi_duplicate_arg == "prefixed":
                    the_doi = "https://dx.doi.org/" + the_doi

                the_fulltext = ident["fulltext"]
                if article_fulltext_arg != "invalid":
                    if fulltext_duplicate_arg == "padded":
                        the_fulltext = "  http:" + the_fulltext
                    elif fulltext_duplicate_arg == "http":
                        the_fulltext = "http:" + the_fulltext
                    elif fulltext_duplicate_arg == "https":
                        the_fulltext = "https:" + the_fulltext
                    else:
                        the_fulltext = "http:" + the_fulltext

                source = ArticleFixtureFactory.make_article_source(eissn="1234-5678", pissn="9876-5432", doi=the_doi, fulltext=the_fulltext)
                article = Article(**source)
                article.set_id()
                article.save()
                article_ids.append(article.id)
                aids_block.append((article.id, article.last_updated))

        # generate our incoming article
        article = None
        doi = None
        fulltext = None
        if article_arg == "yes":
            eissn = "1234=5678" # one matching
            pissn = "6789-1234" # the other not - issn matches are not relevant to this test

            if article_doi_arg in ["yes", "padded"]:
                doi = "10.1234/abc/11"
                if doi_duplicate_arg in ["yes", "padded"]:
                    doi = IDENTS[0]["doi"]
                if article_doi_arg == "padded":
                    doi = "  doi:" + doi + "  "
            elif article_doi_arg in ["invalid"]:
                doi = IDENTS[-1]["doi"]

            if article_fulltext_arg in ["yes", "padded", "https"]:
                fulltext = "//example.com/11"
                if fulltext_duplicate_arg in ["yes", "padded", "https"]:
                    fulltext = IDENTS[0]["fulltext"]
                if fulltext_duplicate_arg == "padded":
                    fulltext = "  http:" + fulltext + "  "
                elif fulltext_duplicate_arg == "https":
                    fulltext = "https:" + fulltext
                else:
                    fulltext = "http:" + fulltext
            elif article_fulltext_arg == "invalid":
                fulltext = IDENTS[-1]["fulltext"]

            source = ArticleFixtureFactory.make_article_source(eissn=eissn, pissn=pissn, doi=doi, fulltext=fulltext)
            article = Article(**source)

            # we need to do this if doi or fulltext are none, because the factory will set a default if we don't
            # provide them
            if doi is None:
                article.bibjson().remove_identifiers("doi")
            if fulltext is None:
                article.bibjson().remove_urls("fulltext")

            article.set_id()

        Article.blockall(aids_block)

        ###########################################################
        # Execution

        svc = DOAJ.articleService()
        if raises is not None:
            with self.assertRaises(raises):
                svc.discover_duplicates(article, owner_id)
        else:
            possible_articles = svc.discover_duplicates(article, owner_id)

            if articles_by_doi_arg == "yes":
                assert "doi" in possible_articles
                assert len(possible_articles["doi"]) == 1
                # if this is the "invalid" doi, then we expect it to match the final article, otherwise match the first
                if article_doi_arg == "invalid":
                    assert possible_articles["doi"][0].id == article_ids[-1]
                else:
                    assert possible_articles["doi"][0].id == article_ids[0]
            else:
                if possible_articles is not None:
                    assert "doi" not in possible_articles

            if articles_by_fulltext_arg == "yes":
                assert "fulltext" in possible_articles
                assert len(possible_articles["fulltext"]) == 1
                # if this is the "invalid" fulltext url, then we expect it to match the final article, otherwise match the first
                if article_fulltext_arg == "invalid":
                    assert possible_articles["fulltext"][0].id == article_ids[-1]
                else:
                    assert possible_articles["fulltext"][0].id == article_ids[0]
            else:
                if possible_articles is not None:
                    assert "fulltext" not in possible_articles