示例#1
0
 def test_01_sitemap(self):
     user = app.config.get("SYSTEM_USERNAME")
     job = sitemap.SitemapBackgroundTask.prepare(user)
     task = sitemap.SitemapBackgroundTask(job)
     BackgroundApi.execute(task)
     time.sleep(1)
     assert len(os.listdir(os.path.join(self.tmp_dir, "sitemap"))) == 1
示例#2
0
文件: test_sitemap.py 项目: DOAJ/doaj
 def test_01_sitemap(self):
     user = app.config.get("SYSTEM_USERNAME")
     job = sitemap.SitemapBackgroundTask.prepare(user)
     task = sitemap.SitemapBackgroundTask(job)
     BackgroundApi.execute(task)
     time.sleep(1)
     assert len(os.listdir(os.path.join(self.tmp_dir, "sitemap"))) == 1
示例#3
0
 def test_0x_check_latest_es_backup(self):
     user = app.config.get("SYSTEM_USERNAME")
     job = check_latest_es_backup.CheckLatestESBackupBackgroundTask.prepare(user)
     task = check_latest_es_backup.CheckLatestESBackupBackgroundTask(job)
     BackgroundApi.execute(task)
示例#4
0
def check_latest_es_backup(job_id):
    job = models.BackgroundJob.pull(job_id)
    task = CheckLatestESBackupBackgroundTask(job)
    BackgroundApi.execute(task)
示例#5
0
def journal_bulk_delete(job_id):
    job = models.BackgroundJob.pull(job_id)
    task = JournalBulkDeleteBackgroundTask(job)
    BackgroundApi.execute(task)
示例#6
0
def async_workflow_notifications(job_id):
    job = models.BackgroundJob.pull(job_id)
    task = AsyncWorkflowBackgroundTask(job)
    BackgroundApi.execute(task)
示例#7
0
def public_data_dump(job_id):
    job = models.BackgroundJob.pull(job_id)
    task = PublicDataDumpBackgroundTask(job)
    BackgroundApi.execute(task)
示例#8
0
def public_data_dump(job_id):
    job = models.BackgroundJob.pull(job_id)
    task = PublicDataDumpBackgroundTask(job)
    BackgroundApi.execute(task)
示例#9
0
def prune_es_backups(job_id):
    job = models.BackgroundJob.pull(job_id)
    task = PruneESBackupsBackgroundTask(job)
    BackgroundApi.execute(task)
示例#10
0
文件: read_news.py 项目: DOAJ/doaj
def read_news(job_id):
    job = models.BackgroundJob.pull(job_id)
    task = ReadNewsBackgroundTask(job)
    BackgroundApi.execute(task)
示例#11
0
def article_cleanup_sync(job_id):
    job = models.BackgroundJob.pull(job_id)
    task = ArticleCleanupSyncBackgroundTask(job)
    BackgroundApi.execute(task)
示例#12
0
def suggestion_bulk_edit(job_id):
    job = models.BackgroundJob.pull(job_id)
    task = SuggestionBulkEditBackgroundTask(job)
    BackgroundApi.execute(task)
示例#13
0
def article_duplicate_report(job_id):
    job = models.BackgroundJob.pull(job_id)
    task = ArticleDuplicateReportBackgroundTask(job)
    BackgroundApi.execute(task)
示例#14
0
def article_bulk_delete(job_id):
    job = models.BackgroundJob.pull(job_id)
    task = ArticleBulkDeleteBackgroundTask(job)
    BackgroundApi.execute(task)
示例#15
0
def journal_csv(job_id):
    job = models.BackgroundJob.pull(job_id)
    task = JournalCSVBackgroundTask(job)
    BackgroundApi.execute(task)
示例#16
0
def read_news(job_id):
    job = models.BackgroundJob.pull(job_id)
    task = ReadNewsBackgroundTask(job)
    BackgroundApi.execute(task)
def article_cleanup_sync(job_id):
    job = models.BackgroundJob.pull(job_id)
    task = ArticleCleanupSyncBackgroundTask(job)
    BackgroundApi.execute(task)
示例#18
0
def request_es_backup(job_id):
    job = models.BackgroundJob.pull(job_id)
    task = RequestESBackupBackgroundTask(job)
    BackgroundApi.execute(task)
示例#19
0
def journal_bulk_delete(job_id):
    job = models.BackgroundJob.pull(job_id)
    task = JournalBulkDeleteBackgroundTask(job)
    BackgroundApi.execute(task)
示例#20
0
    def test_public_data_dump(self, name, kwargs):

        clean_arg = kwargs.get("clean")
        prune_arg = kwargs.get("prune")
        types_arg = kwargs.get("types")
        journals_arg = kwargs.get("journals")
        articles_arg = kwargs.get("articles")
        batch_size_arg = kwargs.get("batch_size")
        tmp_write_arg = kwargs.get("tmp_write")
        store_write_arg = kwargs.get("store_write")

        status_arg = kwargs.get("status")

        ###############################################
        ## set up

        clean = True if clean_arg == "yes" else False if clean_arg == "no" else None
        prune = True if prune_arg == "yes" else False if prune_arg == "no" else None
        types = types_arg if types_arg != "-" else None

        journal_count = int(journals_arg)
        article_count = int(articles_arg)
        batch_size = int(batch_size_arg)
        journal_file_count = 0 if journal_count == 0 else (journal_count //
                                                           batch_size) + 1
        article_file_count = 0 if article_count == 0 else (article_count //
                                                           batch_size) + 1
        first_article_file_records = 0 if article_count == 0 else batch_size if article_count > batch_size else article_count
        first_journal_file_records = 0 if journal_count == 0 else batch_size if journal_count > batch_size else journal_count

        # add the data to the index first, to maximise the time it has to become available for search
        sources = JournalFixtureFactory.make_many_journal_sources(
            journal_count, in_doaj=True)
        jids = []
        for i in range(len(sources)):
            source = sources[i]
            journal = models.Journal(**source)
            journal.save()
            jids.append((journal.id, journal.last_updated))

        aids = []
        for i in range(article_count):
            source = ArticleFixtureFactory.make_article_source(
                eissn="{x}000-0000".format(x=i),
                pissn="0000-{x}000".format(x=i),
                with_id=False,
                doi="10.123/{x}".format(x=i),
                fulltext="http://example.com/{x}".format(x=i))
            article = models.Article(**source)
            article.save()
            aids.append((article.id, article.last_updated))

        # construct some test data in the local store
        container_id = app.config["STORE_PUBLIC_DATA_DUMP_CONTAINER"]
        localStore = store.StoreLocal(None)
        localStoreFiles = []
        if clean or prune:
            for i in range(5):
                localStore.store(container_id,
                                 "doaj_article_data_2018-01-0" + str(i) +
                                 ".tar.gz",
                                 source_stream=StringIO("test"))
                localStore.store(container_id,
                                 "doaj_journal_data_2018-01-0" + str(i) +
                                 ".tar.gz",
                                 source_stream=StringIO("test"))
            localStoreFiles = localStore.list(container_id)

        app.config["DISCOVERY_RECORDS_PER_FILE"] = batch_size

        # set the mocks for store write failures
        if tmp_write_arg == "fail":
            app.config[
                "STORE_TMP_IMPL"] = StoreMockFactory.no_writes_classpath()

        if store_write_arg == "fail":
            app.config["STORE_IMPL"] = StoreMockFactory.no_writes_classpath()

        # block until all the records are saved
        for jid, lu in jids:
            models.Journal.block(jid, lu, sleep=0.05)
        for aid, lu in aids:
            models.Article.block(aid, lu, sleep=0.05)

        ###########################################################
        # Execution

        job = PublicDataDumpBackgroundTask.prepare("testuser",
                                                   clean=clean,
                                                   prune=prune,
                                                   types=types)
        task = PublicDataDumpBackgroundTask(job)
        BackgroundApi.execute(task)

        # make sure we have a fresh copy of the job
        job = task.background_job
        assert job.status == status_arg

        if job.status != "error":
            article_url = models.cache.Cache.get_public_data_dump().get(
                "article", {}).get("url")
            if types_arg in ["-", "all", "article"]:
                assert article_url is not None
            else:
                assert article_url is None

            journal_url = models.cache.Cache.get_public_data_dump().get(
                "journal", {}).get("url")
            if types_arg in ["-", "all", "journal"]:
                assert journal_url is not None
            else:
                assert journal_url is None

            assert localStore.exists(container_id)
            files = localStore.list(container_id)

            if types_arg in ["-", "all"]:
                assert len(files) == 2
            else:
                assert len(files) == 1

            day_at_start = dates.today()

            if types_arg in ["-", "all", "article"]:
                article_file = "doaj_article_data_" + day_at_start + ".tar.gz"
                assert article_file in files

                stream = localStore.get(container_id, article_file)
                tarball = tarfile.open(fileobj=stream, mode="r:gz")
                members = tarball.getmembers()
                assert len(members) == article_file_count

                if len(members) > 0:
                    f = tarball.extractfile(members[0])
                    data = json.loads(f.read().decode("utf-8"))
                    assert len(data) == first_article_file_records

                    record = data[0]
                    for key in list(record.keys()):
                        assert key in [
                            "admin", "bibjson", "id", "last_updated",
                            "created_date"
                        ]
                    if "admin" in record:
                        for key in list(record["admin"].keys()):
                            assert key in ["ticked", "seal"]

            if types_arg in ["-", "all", "journal"]:
                journal_file = "doaj_journal_data_" + day_at_start + ".tar.gz"
                assert journal_file in files

                stream = localStore.get(container_id, journal_file)
                tarball = tarfile.open(fileobj=stream, mode="r:gz")
                members = tarball.getmembers()
                assert len(members) == journal_file_count

                if len(members) > 0:
                    f = tarball.extractfile(members[0])
                    data = json.loads(f.read().decode("utf-8"))
                    assert len(data) == first_journal_file_records

                    record = data[0]
                    for key in list(record.keys()):
                        assert key in [
                            "admin", "bibjson", "id", "last_updated",
                            "created_date"
                        ]
                    if "admin" in record:
                        for key in list(record["admin"].keys()):
                            assert key in ["ticked", "seal"]

        else:
            # in the case of an error, we expect the tmp store to have been cleaned up
            tmpStore = store.TempStore()
            assert not tmpStore.exists(container_id)

            # in the case of an error, we expect the main store not to have been touched
            # (for the errors that we are checking for)
            if prune and not clean:
                # no matter what the error, if we didn't specify clean then we expect everything
                # to survive
                survived = localStore.list(container_id)
                assert localStoreFiles == survived
            elif clean:
                # if we specified clean, then it's possible the main store was cleaned before the
                # error occurred, in which case it depends on the error.  This reminds us that
                # clean shouldn't be used in production
                if tmp_write_arg == "fail":
                    assert not localStore.exists(container_id)
                else:
                    survived = localStore.list(container_id)
                    assert localStoreFiles == survived
            else:
                # otherwise, we expect the main store to have survived
                assert not localStore.exists(container_id)
示例#21
0
def run_reports(job_id):
    job = models.BackgroundJob.pull(job_id)
    task = ReportingBackgroundTask(job)
    BackgroundApi.execute(task)
示例#22
0
文件: sitemap.py 项目: DOAJ/doaj
"""
use this script if you want to manually (and synchronously) execute the sitemap task
"""
from portality.tasks import sitemap
from portality.core import app
from portality.background import BackgroundApi

if __name__ == "__main__":
    user = app.config.get("SYSTEM_USERNAME")
    job = sitemap.SitemapBackgroundTask.prepare(user)
    task = sitemap.SitemapBackgroundTask(job)
    BackgroundApi.execute(task)


示例#23
0
def ingest_articles(job_id):
    job = models.BackgroundJob.pull(job_id)
    task = IngestArticlesBackgroundTask(job)
    BackgroundApi.execute(task)
示例#24
0
文件: reporting.py 项目: DOAJ/doaj
def run_reports(job_id):
    job = models.BackgroundJob.pull(job_id)
    task = ReportingBackgroundTask(job)
    BackgroundApi.execute(task)
示例#25
0
def article_duplicate_report(job_id):
    job = models.BackgroundJob.pull(job_id)
    task = ArticleDuplicateReportBackgroundTask(job)
    BackgroundApi.execute(task)
示例#26
0
def set_in_doaj(job_id):
    job = models.BackgroundJob.pull(job_id)
    task = SetInDOAJBackgroundTask(job)
    BackgroundApi.execute(task)
示例#27
0
def async_workflow_notifications(job_id):
    job = models.BackgroundJob.pull(job_id)
    task = AsyncWorkflowBackgroundTask(job)
    BackgroundApi.execute(task)
示例#28
0
def prune_es_backups(job_id):
    job = models.BackgroundJob.pull(job_id)
    task = PruneESBackupsBackgroundTask(job)
    BackgroundApi.execute(task)
示例#29
0
    def test_public_data_dump(self, name, kwargs):

        clean_arg = kwargs.get("clean")
        prune_arg = kwargs.get("prune")
        types_arg = kwargs.get("types")
        journals_arg = kwargs.get("journals")
        articles_arg = kwargs.get("articles")
        batch_size_arg = kwargs.get("batch_size")
        tmp_write_arg = kwargs.get("tmp_write")
        store_write_arg = kwargs.get("store_write")

        status_arg = kwargs.get("status")

        ###############################################
        ## set up

        clean = True if clean_arg == "yes" else False if clean_arg == "no" else None
        prune = True if prune_arg == "yes" else False if prune_arg == "no" else None
        types = types_arg if types_arg != "-" else None

        journal_count = int(journals_arg)
        article_count = int(articles_arg)
        batch_size = int(batch_size_arg)
        journal_file_count = 0 if journal_count == 0 else (journal_count / batch_size) + 1
        article_file_count = 0 if article_count == 0 else (article_count / batch_size) + 1
        first_article_file_records = 0 if article_count == 0 else batch_size if article_count > batch_size else article_count
        first_journal_file_records = 0 if journal_count == 0 else batch_size if journal_count > batch_size else journal_count

        # add the data to the index first, to maximise the time it has to become available for search
        sources = JournalFixtureFactory.make_many_journal_sources(journal_count, in_doaj=True)
        jids = []
        for i in range(len(sources)):
            source = sources[i]
            journal = models.Journal(**source)
            journal.save()
            jids.append((journal.id, journal.last_updated))

        aids = []
        for i in range(article_count):
            source = ArticleFixtureFactory.make_article_source(
                eissn="{x}000-0000".format(x=i),
                pissn="0000-{x}000".format(x=i),
                with_id=False,
                doi="10.123/{x}".format(x=i),
                fulltext="http://example.com/{x}".format(x=i)
            )
            article = models.Article(**source)
            article.save()
            aids.append((article.id, article.last_updated))

        # construct some test data in the local store
        container_id = app.config["STORE_PUBLIC_DATA_DUMP_CONTAINER"]
        localStore = store.StoreLocal(None)
        localStoreFiles = []
        if clean or prune:
            for i in range(5):
                localStore.store(container_id, "doaj_article_data_2018-01-0" + str(i) + ".tar.gz",
                                 source_stream=StringIO("test"))
                localStore.store(container_id, "doaj_journal_data_2018-01-0" + str(i) + ".tar.gz",
                                 source_stream=StringIO("test"))
            localStoreFiles = localStore.list(container_id)

        app.config["DISCOVERY_RECORDS_PER_FILE"] = batch_size

        # set the mocks for store write failures
        if tmp_write_arg == "fail":
            app.config["STORE_TMP_IMPL"] = StoreMockFactory.no_writes_classpath()

        if store_write_arg == "fail":
            app.config["STORE_IMPL"] = StoreMockFactory.no_writes_classpath()

        # block until all the records are saved
        for jid, lu in jids:
            models.Journal.block(jid, lu, sleep=0.05)
        for aid, lu in aids:
            models.Article.block(aid, lu, sleep=0.05)

        ###########################################################
        # Execution

        job = PublicDataDumpBackgroundTask.prepare("testuser", clean=clean, prune=prune, types=types)
        task = PublicDataDumpBackgroundTask(job)
        BackgroundApi.execute(task)

        # make sure we have a fresh copy of the job
        job = task.background_job
        assert job.status == status_arg

        if job.status != "error":
            article_url = models.cache.Cache.get_public_data_dump().get("article", {}).get("url")
            if types_arg in ["-", "all", "article"]:
                assert article_url is not None
            else:
                assert article_url is None

            journal_url = models.cache.Cache.get_public_data_dump().get("journal", {}).get("url")
            if types_arg in ["-", "all", "journal"]:
                assert journal_url is not None
            else:
                assert journal_url is None

            assert localStore.exists(container_id)
            files = localStore.list(container_id)

            if types_arg in ["-", "all"]:
                assert len(files) == 2
            else:
                assert len(files) == 1

            day_at_start = dates.today()

            if types_arg in ["-", "all", "article"]:
                article_file = "doaj_article_data_" + day_at_start + ".tar.gz"
                assert article_file in files

                stream = localStore.get(container_id, article_file)
                tarball = tarfile.open(fileobj=stream, mode="r:gz")
                members = tarball.getmembers()
                assert len(members) == article_file_count

                if len(members) > 0:
                    f = tarball.extractfile(members[0])
                    data = json.loads(f.read())
                    assert len(data) == first_article_file_records

                    record = data[0]
                    for key in record.keys():
                        assert key in ["admin", "bibjson", "id", "last_updated", "created_date"]
                    if "admin" in record:
                        for key in record["admin"].keys():
                            assert key in ["ticked", "seal"]

            if types_arg in ["-", "all", "journal"]:
                journal_file = "doaj_journal_data_" + day_at_start + ".tar.gz"
                assert journal_file in files

                stream = localStore.get(container_id, journal_file)
                tarball = tarfile.open(fileobj=stream, mode="r:gz")
                members = tarball.getmembers()
                assert len(members) == journal_file_count

                if len(members) > 0:
                    f = tarball.extractfile(members[0])
                    data = json.loads(f.read())
                    assert len(data) == first_journal_file_records

                    record = data[0]
                    for key in record.keys():
                        assert key in ["admin", "bibjson", "id", "last_updated", "created_date"]
                    if "admin" in record:
                        for key in record["admin"].keys():
                            assert key in ["ticked", "seal"]

        else:
            # in the case of an error, we expect the tmp store to have been cleaned up
            tmpStore = store.TempStore()
            assert not tmpStore.exists(container_id)

            # in the case of an error, we expect the main store not to have been touched
            # (for the errors that we are checking for)
            if prune and not clean:
                # no matter what the error, if we didn't specify clean then we expect everything
                # to survive
                survived = localStore.list(container_id)
                assert localStoreFiles == survived
            elif clean:
                # if we specified clean, then it's possible the main store was cleaned before the
                # error occurred, in which case it depends on the error.  This reminds us that
                # clean shouldn't be used in production
                if tmp_write_arg == "fail":
                    assert not localStore.exists(container_id)
                else:
                    survived = localStore.list(container_id)
                    assert localStoreFiles == survived
            else:
                # otherwise, we expect the main store to have survived
                assert not localStore.exists(container_id)
示例#30
0
def generate_sitemap(job_id):
    job = models.BackgroundJob.pull(job_id)
    task = SitemapBackgroundTask(job)
    BackgroundApi.execute(task)
示例#31
0
def check_latest_es_backup(job_id):
    job = models.BackgroundJob.pull(job_id)
    task = CheckLatestESBackupBackgroundTask(job)
    BackgroundApi.execute(task)
示例#32
0
def journal_bulk_edit(job_id):
    job = models.BackgroundJob.pull(job_id)
    task = JournalBulkEditBackgroundTask(job)
    BackgroundApi.execute(task)
示例#33
0
def set_in_doaj(job_id):
    job = models.BackgroundJob.pull(job_id)
    task = SetInDOAJBackgroundTask(job)
    BackgroundApi.execute(task)
示例#34
0
from portality.core import app
from portality.tasks import read_news
from portality.background import BackgroundApi

if __name__ == "__main__":
    if app.config.get("SCRIPTS_READ_ONLY_MODE", False):
        print "System is in READ-ONLY mode, script cannot run"
        exit()

    user = app.config.get("SYSTEM_USERNAME")
    job = read_news.ReadNewsBackgroundTask.prepare(user)
    task = read_news.ReadNewsBackgroundTask(job)
    BackgroundApi.execute(task)
示例#35
0
文件: journal_csv.py 项目: DOAJ/doaj
def journal_csv(job_id):
    job = models.BackgroundJob.pull(job_id)
    task = JournalCSVBackgroundTask(job)
    BackgroundApi.execute(task)