def metadata(): # if this is a get request, give the blank form - there is no edit feature if request.method == "GET": form = ArticleForm() return render_template('publisher/metadata.html', form=form) # if this is a post request, a form button has been hit and we need to do # a bunch of work elif request.method == "POST": form = ArticleForm(request.form) # first we need to do any server-side form modifications which # the user might request by pressing the add/remove authors buttons more_authors = request.values.get("more_authors") remove_author = None for v in request.values.keys(): if v.startswith("remove_authors"): remove_author = v.split("-")[1] # if the user wants more authors, add an extra entry if more_authors: form.authors.append_entry() return render_template('publisher/metadata.html', form=form) # if the user wants to remove an author, do the various back-flips required if remove_author is not None: keep = [] while len(form.authors.entries) > 0: entry = form.authors.pop_entry() if entry.short_name == "authors-" + remove_author: break else: keep.append(entry) while len(keep) > 0: form.authors.append_entry(keep.pop().data) return render_template('publisher/metadata.html', form=form) # if we get to here, then this is the full submission, and we need to # validate and return enough_authors = _validate_authors(form) if form.validate(): # if the form validates, then we have to do our own bit of validation, # which is to check that there is at least one author supplied if not enough_authors: return render_template('publisher/metadata.html', form=form, author_error=True) else: xwalk = ArticleFormXWalk() art = xwalk.crosswalk_form(form) articleService = DOAJ.articleService() articleService.create_article( art, current_user._get_current_object()) flash("Article created/updated", "success") form = ArticleForm() return render_template('publisher/metadata.html', form=form) else: return render_template('publisher/metadata.html', form=form, author_error=not enough_authors)
def setUp(self): super(TestBLLArticleBatchCreateArticle, self).setUp() self.svc = DOAJ.articleService() self._is_legitimate_owner = self.svc.is_legitimate_owner self._get_duplicate = self.svc.get_duplicate self._issn_ownership_status = self.svc.issn_ownership_status self._get_journal = Article.get_journal
def setUp(self): super(TestBLLPrepareUpdatePublisher, self).setUp() self.svc = DOAJ.articleService() self.is_id_updated = self.svc._doi_or_fulltext_updated self.has_permission = self.svc.has_permissions self.merge = Article.merge acc_source = AccountFixtureFactory.make_publisher_source() self.publisher = Account(**acc_source)
def setUp(self): super(TestBLLArticleCreateArticle, self).setUp() self.svc = DOAJ.articleService() self.is_legitimate_owner = self.svc.is_legitimate_owner self.ownership = self.svc.issn_ownership_status self.duplicate = self.svc.get_duplicate self.permission = self.svc.has_permissions self.prepare_update_admin = self.svc._prepare_update_admin self.prepare_update_publisher = self.svc._prepare_update_publisher
def create(cls, articles, account): # We run through the articles once, validating in dry-run mode # and deduplicating as we go. Then we .save() everything once # we know all incoming articles are valid. # as long as authentication (in the layer above) has been successful, and the account exists, then # we are good to proceed if account is None: raise Api401Error() # convert the data into a suitable article models articles = [ArticlesCrudApi.prep_article(data) for data in articles] articleService = DOAJ.articleService() try: result = articleService.batch_create_articles(articles, account) return [a.id for a in articles] except exceptions.IngestException as e: raise Api400Error(e.message)
def create(cls, articles, account): # We run through the articles once, validating in dry-run mode # and deduplicating as we go. Then we .save() everything once # we know all incoming articles are valid. # as long as authentication (in the layer above) has been successful, and the account exists, then # we are good to proceed if account is None: raise Api401Error() # convert the data into a suitable article models articles = [ArticlesCrudApi.prep_article(data) for data in articles] articleService = DOAJ.articleService() try: result = articleService.batch_create_articles(articles, account, add_journal_info=True) return [a.id for a in articles] except exceptions.IngestException as e: raise Api400Error(e.message)
def test_01_discover_duplicates(self, name, kwargs): article_arg = kwargs.get("article") owner_arg = kwargs.get("owner") article_doi_arg = kwargs.get("article_doi") doi_duplicate_arg = kwargs.get("doi_duplicate") article_fulltext_arg = kwargs.get("article_fulltext") fulltext_duplicate_arg = kwargs.get("fulltext_duplicate") articles_by_doi_arg = kwargs.get("articles_by_doi") articles_by_fulltext_arg = kwargs.get("articles_by_fulltext") raises_arg = kwargs.get("raises") raises = EXCEPTIONS.get(raises_arg) ############################################### ## set up owner = None if owner_arg != "none": owner = Account(**AccountFixtureFactory.make_publisher_source()) owner_id = None if owner is not None: owner_id = owner.id # create a journal for the owner if owner_arg not in ["none"]: source = JournalFixtureFactory.make_journal_source(in_doaj=True) journal = Journal(**source) journal.set_owner(owner.id) journal.bibjson().remove_identifiers() journal.bibjson().add_identifier("eissn", "1234-5678") journal.bibjson().add_identifier("pissn", "9876-5432") journal.save() # determine what we need to load into the index article_ids = [] aids_block = [] if owner_arg not in ["none", "no_articles"]: for i, ident in enumerate(IDENTS): the_doi = ident["doi"] if doi_duplicate_arg == "padded": the_doi = " " + the_doi + " " elif doi_duplicate_arg == "prefixed": the_doi = "https://dx.doi.org/" + the_doi the_fulltext = ident["fulltext"] if article_fulltext_arg != "invalid": if fulltext_duplicate_arg == "padded": the_fulltext = " http:" + the_fulltext elif fulltext_duplicate_arg == "http": the_fulltext = "http:" + the_fulltext elif fulltext_duplicate_arg == "https": the_fulltext = "https:" + the_fulltext else: the_fulltext = "http:" + the_fulltext source = ArticleFixtureFactory.make_article_source( eissn="1234-5678", pissn="9876-5432", doi=the_doi, fulltext=the_fulltext) article = Article(**source) article.set_id() article.save(blocking=True) article_ids.append(article.id) aids_block.append((article.id, article.last_updated)) # generate our incoming article article = None doi = None fulltext = None if article_arg == "yes": eissn = "1234=5678" # one matching pissn = "6789-1234" # the other not - issn matches are not relevant to this test if article_doi_arg in ["yes", "padded"]: doi = "10.1234/abc/11" if doi_duplicate_arg in ["yes", "padded"]: doi = IDENTS[0]["doi"] if article_doi_arg == "padded": doi = " doi:" + doi + " " elif article_doi_arg in ["invalid"]: doi = IDENTS[-1]["doi"] if article_fulltext_arg in ["yes", "padded", "https"]: fulltext = "//example.com/11" if fulltext_duplicate_arg in ["yes", "padded", "https"]: fulltext = IDENTS[0]["fulltext"] if fulltext_duplicate_arg == "padded": fulltext = " http:" + fulltext + " " elif fulltext_duplicate_arg == "https": fulltext = "https:" + fulltext else: fulltext = "http:" + fulltext elif article_fulltext_arg == "invalid": fulltext = IDENTS[-1]["fulltext"] source = ArticleFixtureFactory.make_article_source( eissn=eissn, pissn=pissn, doi=doi, fulltext=fulltext) article = Article(**source) # we need to do this if doi or fulltext are none, because the factory will set a default if we don't # provide them if doi is None: article.bibjson().remove_identifiers("doi") if fulltext is None: article.bibjson().remove_urls("fulltext") article.set_id() Article.blockall(aids_block) ########################################################### # Execution svc = DOAJ.articleService() if raises is not None: with self.assertRaises(raises): svc.discover_duplicates(article) else: possible_articles = svc.discover_duplicates(article) if articles_by_doi_arg == "yes": assert "doi" in possible_articles assert len(possible_articles["doi"]) == 1 # if this is the "invalid" doi, then we expect it to match the final article, otherwise match the first if article_doi_arg == "invalid": assert possible_articles["doi"][0].id == article_ids[-1] else: assert possible_articles["doi"][0].id == article_ids[0] else: if possible_articles is not None: assert "doi" not in possible_articles if articles_by_fulltext_arg == "yes": assert "fulltext" in possible_articles assert len(possible_articles["fulltext"]) == 1 # if this is the "invalid" fulltext url, then we expect it to match the final article, otherwise match the first if article_fulltext_arg == "invalid": assert possible_articles["fulltext"][0].id == article_ids[ -1] else: assert possible_articles["fulltext"][0].id == article_ids[ 0] else: if possible_articles is not None: assert "fulltext" not in possible_articles
def test_01_is_legitimate_owner(self, name, kwargs): article_arg = kwargs.get("article") owner_arg = kwargs.get("owner") article_eissn_arg = kwargs.get("article_eissn") article_pissn_arg = kwargs.get("article_pissn") seen_eissn_arg = kwargs.get("seen_eissn") seen_pissn_arg = kwargs.get("seen_pissn") journal_owner_arg = kwargs.get("journal_owner") raises_arg = kwargs.get("raises") legit_arg = kwargs.get("legit") raises = EXCEPTIONS.get(raises_arg) ############################################### ## set up owner = None if owner_arg != "none": owner = Account(**AccountFixtureFactory.make_publisher_source()) owner_id = None if owner is not None: owner_id = owner.id # generate our incoming article article = None eissn = None pissn = None if article_arg == "exists": source = ArticleFixtureFactory.make_article_source() article = Article(**source) article.set_id() article.bibjson().remove_identifiers("pissn") if article_pissn_arg == "yes": pissn = "1234-5678" article.bibjson().add_identifier("pissn", pissn) article.bibjson().remove_identifiers("eissn") if article_eissn_arg == "yes": eissn = "9876-5432" article.bibjson().add_identifier("eissn", eissn) # assemble the issns that will appear to be in the index. One that is irrelevant, and just # serves to be "noise" in the database, and the other that matches the spec required by # the test issns = [("1111-1111", "2222-2222")] if eissn is not None and pissn is not None and seen_eissn_arg == "yes" and seen_pissn_arg == "yes": issns.append((eissn, pissn)) if eissn is not None and seen_eissn_arg == "yes": issns.append((eissn, None)) if pissn is not None and seen_pissn_arg == "yes": issns.append((None, pissn)) owners = [] if journal_owner_arg == "none": owners = [None] elif journal_owner_arg == "correct" and owner_id is not None: owners = [owner_id] elif journal_owner_arg == "incorrect": owners = ["randomowner"] elif journal_owner_arg == "mix" and owner_id is not None: owners.append(owner_id) owners.append("randomowner") owners.append(None) mock = ModelJournalMockFactory.find_by_issn(issns, owners) Journal.find_by_issn = mock ########################################################### # Execution svc = DOAJ.articleService() if raises is not None: with self.assertRaises(raises): svc.is_legitimate_owner(article, owner_id) else: legit = svc.is_legitimate_owner(article, owner_id) if legit_arg == "no": assert legit is False elif legit_arg == "yes": assert legit is True
def setUp(self): super(TestBLLPrepareUpdatePublisher, self).setUp() self.svc = DOAJ.articleService() self.is_id_updated = self.svc._doi_or_fulltext_updated self.merge = Article.merge self.pull = Article.pull
def _process(self, file_upload): job = self.background_job upload_dir = app.config.get("UPLOAD_DIR") path = os.path.join(upload_dir, file_upload.local_filename) if not os.path.exists(path): job.add_audit_message( u"File not found at path {} . Retrying job later.".format( path)) count = self.get_param(job.params, "attempts") retry_limit = app.config.get("HUEY_TASKS", {}).get("ingest_articles", {}).get("retries", 0) self.set_param(job.params, "attempts", count + 1) if retry_limit <= count: job.add_audit_message( u"File still not found at path {} . Giving up.".format( path)) job.fail() raise RetryException() job.add_audit_message(u"Importing from {x}".format(x=path)) articleService = DOAJ.articleService() account = models.Account.pull(file_upload.owner) xwalk_name = app.config.get("ARTICLE_CROSSWALKS", {}).get(file_upload.schema) xwalk = plugin.load_class(xwalk_name)() ingest_exception = False result = {} try: with open(path) as handle: articles = xwalk.crosswalk_file( handle, add_journal_info=False ) # don't import the journal info, as we haven't validated ownership of the ISSNs in the article yet for article in articles: article.set_upload_id(file_upload.id) result = articleService.batch_create_articles( articles, account, add_journal_info=True) except IngestException as e: job.add_audit_message( u"IngestException: {msg}. Inner message: {inner}. Stack: {x}". format(msg=e.message, inner=e.inner_message, x=e.trace())) file_upload.failed(e.message, e.inner_message) result = e.result try: file_failed(path) ingest_exception = True except: job.add_audit_message( u"Error cleaning up file which caused IngestException: {x}" .format(x=traceback.format_exc())) except (DuplicateArticleException, ArticleNotAcceptable) as e: job.add_audit_message( u"One or more articles did not contain either a DOI or a Fulltext URL" ) file_upload.failed( u"One or more articles did not contain either a DOI or a Fulltext URL" ) try: file_failed(path) except: job.add_audit_message( u"Error cleaning up file which caused Exception: {x}". format(x=traceback.format_exc())) return except Exception as e: job.add_audit_message( u"Unanticipated error: {x}".format(x=traceback.format_exc())) file_upload.failed("Unanticipated error when importing articles") try: file_failed(path) except: job.add_audit_message( u"Error cleaning up file which caused Exception: {x}". format(x=traceback.format_exc())) return success = result.get("success", 0) fail = result.get("fail", 0) update = result.get("update", 0) new = result.get("new", 0) shared = result.get("shared", []) unowned = result.get("unowned", []) unmatched = result.get("unmatched", []) if success == 0 and fail > 0 and not ingest_exception: file_upload.failed("All articles in file failed to import") job.add_audit_message("All articles in file failed to import") if success > 0 and fail == 0: file_upload.processed(success, update, new) if success > 0 and fail > 0: file_upload.partial(success, fail, update, new) job.add_audit_message( "Some articles in file failed to import correctly, so no articles imported" ) file_upload.set_failure_reasons(list(shared), list(unowned), list(unmatched)) job.add_audit_message("Shared ISSNs: " + ", ".join(list(shared))) job.add_audit_message("Unowned ISSNs: " + ", ".join(list(unowned))) job.add_audit_message("Unmatched ISSNs: " + ", ".join(list(unmatched))) if not ingest_exception: try: os.remove(path) # just remove the file, no need to keep it except Exception as e: job.add_audit_message( u"Error while deleting file {x}: {y}".format(x=path, y=e.message))
def test_01_issn_ownership_status(self, name, kwargs): article_arg = kwargs.get("article") owner_arg = kwargs.get("owner") article_eissn_arg = kwargs.get("article_eissn") article_pissn_arg = kwargs.get("article_pissn") seen_eissn_arg = kwargs.get("seen_eissn") seen_pissn_arg = kwargs.get("seen_pissn") journal_owner_arg = kwargs.get("journal_owner") raises_arg = kwargs.get("raises") raises = EXCEPTIONS.get(raises_arg) ############################################### ## set up owner = None if owner_arg != "none": owner = Account(**AccountFixtureFactory.make_publisher_source()) owner_id = None if owner is not None: owner_id = owner.id # generate our incoming article article = None eissn = None pissn = None if article_arg == "exists": source = ArticleFixtureFactory.make_article_source() article = Article(**source) article.set_id() article.bibjson().remove_identifiers("pissn") if article_pissn_arg == "yes": pissn = "1234-5678" article.bibjson().add_identifier("pissn", pissn) article.bibjson().remove_identifiers("eissn") if article_eissn_arg == "yes": eissn = "9876-5432" article.bibjson().add_identifier("eissn", eissn) issns = [] if eissn is not None and pissn is not None and seen_eissn_arg == "yes" and seen_pissn_arg == "yes": issns.append((eissn, pissn)) if eissn is not None and seen_eissn_arg == "yes": issns.append((eissn, "4321-9876")) issns.append((eissn, None)) if pissn is not None and seen_pissn_arg == "yes": issns.append(("6789-4321", pissn)) issns.append((None, pissn)) owners = [] if journal_owner_arg == "none": owners = [None] elif journal_owner_arg == "correct" and owner_id is not None: owners = [owner_id] elif journal_owner_arg == "incorrect": owners = ["randomowner"] elif journal_owner_arg == "mix" and owner_id is not None: owners.append(owner_id) owners.append("randomowner") owners.append(None) mock = ModelJournalMockFactory.find_by_issn(issns, owners) Journal.find_by_issn = mock ########################################################### # Execution svc = DOAJ.articleService() if raises is not None: with self.assertRaises(raises): svc.issn_ownership_status(article, owner_id) else: owned, shared, unowned, unmatched = svc.issn_ownership_status( article, owner_id) owned_count = 0 if seen_eissn_arg == "yes" and eissn is not None and journal_owner_arg in [ "correct" ]: assert eissn in owned owned_count += 1 elif eissn is not None: assert eissn not in owned if seen_pissn_arg == "yes" and pissn is not None and journal_owner_arg in [ "correct" ]: assert pissn in owned owned_count += 1 elif pissn is not None: assert pissn not in owned assert len(owned) == owned_count shared_count = 0 if seen_eissn_arg == "yes" and eissn is not None and journal_owner_arg in [ "mix" ]: assert eissn in shared shared_count += 1 elif eissn is not None: assert eissn not in shared if seen_pissn_arg == "yes" and pissn is not None and journal_owner_arg in [ "mix" ]: assert pissn in shared shared_count += 1 elif pissn is not None: assert pissn not in shared assert len(shared) == shared_count unowned_count = 0 if seen_eissn_arg == "yes" and eissn is not None and journal_owner_arg in [ "incorrect", "none" ]: assert eissn in unowned unowned_count += 1 elif eissn is not None: assert eissn not in unowned if seen_pissn_arg == "yes" and pissn is not None and journal_owner_arg in [ "incorrect", "none" ]: assert pissn in unowned unowned_count += 1 elif pissn is not None: assert pissn not in unowned assert len(unowned) == unowned_count unmatched_count = 0 if seen_eissn_arg == "no" and eissn is not None: assert eissn in unmatched unmatched_count += 1 elif eissn is not None: assert eissn not in unmatched if seen_pissn_arg == "no" and pissn is not None: assert pissn in unmatched unmatched_count += 1 elif pissn is not None: assert pissn not in unmatched assert len(unmatched) == unmatched_count
def setUp(self): super(TestBLLArticleGetDuplicates, self).setUp() self.svc = DOAJ.articleService() self._old_discover_duplicates = self.svc.discover_duplicates
def test_01_discover_duplicates(self, name, kwargs): article_arg = kwargs.get("article") owner_arg = kwargs.get("owner") article_doi_arg = kwargs.get("article_doi") doi_duplicate_arg = kwargs.get("doi_duplicate") article_fulltext_arg = kwargs.get("article_fulltext") fulltext_duplicate_arg = kwargs.get("fulltext_duplicate") articles_by_doi_arg = kwargs.get("articles_by_doi") articles_by_fulltext_arg = kwargs.get("articles_by_fulltext") raises_arg = kwargs.get("raises") raises = EXCEPTIONS.get(raises_arg) ############################################### ## set up owner = None if owner_arg != "none": owner = Account(**AccountFixtureFactory.make_publisher_source()) owner_id = None if owner is not None: owner_id = owner.id # create a journal for the owner if owner_arg not in ["none"]: source = JournalFixtureFactory.make_journal_source(in_doaj=True) journal = Journal(**source) journal.set_owner(owner.id) journal.bibjson().remove_identifiers() journal.bibjson().add_identifier("eissn", "1234-5678") journal.bibjson().add_identifier("pissn", "9876-5432") journal.save(blocking=True) # determine what we need to load into the index article_ids = [] aids_block = [] if owner_arg not in ["none", "no_articles"]: for i, ident in enumerate(IDENTS): the_doi = ident["doi"] if doi_duplicate_arg == "padded": the_doi = " " + the_doi + " " elif doi_duplicate_arg == "prefixed": the_doi = "https://dx.doi.org/" + the_doi the_fulltext = ident["fulltext"] if article_fulltext_arg != "invalid": if fulltext_duplicate_arg == "padded": the_fulltext = " http:" + the_fulltext elif fulltext_duplicate_arg == "http": the_fulltext = "http:" + the_fulltext elif fulltext_duplicate_arg == "https": the_fulltext = "https:" + the_fulltext else: the_fulltext = "http:" + the_fulltext source = ArticleFixtureFactory.make_article_source(eissn="1234-5678", pissn="9876-5432", doi=the_doi, fulltext=the_fulltext) article = Article(**source) article.set_id() article.save() article_ids.append(article.id) aids_block.append((article.id, article.last_updated)) # generate our incoming article article = None doi = None fulltext = None if article_arg == "yes": eissn = "1234=5678" # one matching pissn = "6789-1234" # the other not - issn matches are not relevant to this test if article_doi_arg in ["yes", "padded"]: doi = "10.1234/abc/11" if doi_duplicate_arg in ["yes", "padded"]: doi = IDENTS[0]["doi"] if article_doi_arg == "padded": doi = " doi:" + doi + " " elif article_doi_arg in ["invalid"]: doi = IDENTS[-1]["doi"] if article_fulltext_arg in ["yes", "padded", "https"]: fulltext = "//example.com/11" if fulltext_duplicate_arg in ["yes", "padded", "https"]: fulltext = IDENTS[0]["fulltext"] if fulltext_duplicate_arg == "padded": fulltext = " http:" + fulltext + " " elif fulltext_duplicate_arg == "https": fulltext = "https:" + fulltext else: fulltext = "http:" + fulltext elif article_fulltext_arg == "invalid": fulltext = IDENTS[-1]["fulltext"] source = ArticleFixtureFactory.make_article_source(eissn=eissn, pissn=pissn, doi=doi, fulltext=fulltext) article = Article(**source) # we need to do this if doi or fulltext are none, because the factory will set a default if we don't # provide them if doi is None: article.bibjson().remove_identifiers("doi") if fulltext is None: article.bibjson().remove_urls("fulltext") article.set_id() Article.blockall(aids_block) ########################################################### # Execution svc = DOAJ.articleService() if raises is not None: with self.assertRaises(raises): svc.discover_duplicates(article, owner_id) else: possible_articles = svc.discover_duplicates(article, owner_id) if articles_by_doi_arg == "yes": assert "doi" in possible_articles assert len(possible_articles["doi"]) == 1 # if this is the "invalid" doi, then we expect it to match the final article, otherwise match the first if article_doi_arg == "invalid": assert possible_articles["doi"][0].id == article_ids[-1] else: assert possible_articles["doi"][0].id == article_ids[0] else: if possible_articles is not None: assert "doi" not in possible_articles if articles_by_fulltext_arg == "yes": assert "fulltext" in possible_articles assert len(possible_articles["fulltext"]) == 1 # if this is the "invalid" fulltext url, then we expect it to match the final article, otherwise match the first if article_fulltext_arg == "invalid": assert possible_articles["fulltext"][0].id == article_ids[-1] else: assert possible_articles["fulltext"][0].id == article_ids[0] else: if possible_articles is not None: assert "fulltext" not in possible_articles