Пример #1
0
    def prepare(cls, username, **kwargs):
        """
        Take an arbitrary set of keyword arguments and return an instance of a BackgroundJob,
        or fail with a suitable exception

        :param kwargs: arbitrary keyword arguments pertaining to this task type
        :return: a BackgroundJob instance representing this task
        """

        super(ArticleBulkDeleteBackgroundTask, cls).prepare(username, **kwargs)

        # first prepare a job record
        job = models.BackgroundJob()
        job.user = username
        job.action = cls.__action__
        refs = {}
        cls.set_reference(refs, "selection_query",
                          json.dumps(kwargs['selection_query']))
        job.reference = refs

        params = {}
        cls.set_param(params, 'ids', kwargs['ids'])

        if not cls._job_parameter_check(params):
            raise BackgroundException(
                u"{}.prepare run without sufficient parameters".format(
                    cls.__name__))

        job.params = params

        return job
Пример #2
0
 def get_task(cls, user=None, run_fail=False, cleanup_fail=False):
     job = models.BackgroundJob()
     if user is not None:
         job.user = user
     return MockBackgroundTask(job,
                               run_fail=run_fail,
                               cleanup_fail=cleanup_fail)
Пример #3
0
    def prepare(cls, username, **kwargs):
        """
        Take an arbitrary set of keyword arguments and return an instance of a BackgroundJob,
        or fail with a suitable exception

        :param kwargs: arbitrary keyword arguments pertaining to this task type
        :return: a BackgroundJob instance representing this task
        """

        super(JournalBulkDeleteBackgroundTask, cls).prepare(username, **kwargs)

        # first prepare a job record
        job = models.BackgroundJob()
        job.user = username
        job.action = cls.__action__
        job.reference = {'selection_query': json.dumps(kwargs['selection_query'])}

        params = {}
        cls.set_param(params, 'ids', kwargs['ids'])

        if not cls._job_parameter_check(params):
            raise BackgroundException("{}.prepare run without sufficient parameters".format(cls.__name__))

        job.params = params

        # now ensure that we have the locks for all the records, if they are lockable
        # will raise an exception if this fails
        lock.batch_lock('journal', kwargs['ids'], username, timeout=app.config.get("BACKGROUND_TASK_LOCK_TIMEOUT", 3600))

        return job
Пример #4
0
    def prepare(cls, username, **kwargs):
        """
        Take an arbitrary set of keyword arguments and return an instance of a BackgroundJob,
        or fail with a suitable exception

        :param kwargs: arbitrary keyword arguments pertaining to this task type
        :return: a BackgroundJob instance representing this task
        """

        write = kwargs.get("write", True)
        prepall = kwargs.get("prepall", False)

        if not write and prepall:
            raise BackgroundException(
                "'prepall' must be used with the 'write' parameter set to True (why prep but not save?)"
            )

        params = {}
        cls.set_param(params, "write", write)
        cls.set_param(params, "prepall", prepall)

        # first prepare a job record
        job = models.BackgroundJob()
        job.user = username
        job.action = cls.__action__
        job.params = params
        if prepall:
            job.add_audit_message(
                "'prepall' arg set. 'unchanged' articles will also have their indexes refreshed."
            )
        return job
Пример #5
0
    def test_1_http_upload_fail(self):
        requests.head = ResponseMockFactory.head_fail
        requests.get = ResponseMockFactory.get_fail

        url= "http://fail"

        file_upload = models.FileUpload()
        file_upload.set_id()
        file_upload.upload("testuser", url, status="exists")

        upload_dir = app.config.get("UPLOAD_DIR")
        path = os.path.join(upload_dir, file_upload.local_filename)
        self.cleanup_paths.append(path)

        job = models.BackgroundJob()

        result = ingestarticles.http_upload(job, path, file_upload)

        assert result is False
        assert file_upload.status == "failed"
        assert file_upload.error is not None and file_upload.error != ""
        assert file_upload.error_details is None
        assert list(file_upload.failure_reasons.keys()) == []

        # now try it with an actual exception
        url= "http://except"
        file_upload.upload("testuser", url, status="exists")

        result = ingestarticles.http_upload(job, path, file_upload)

        assert result is False
        assert file_upload.status == "failed"
        assert file_upload.error is not None and file_upload.error != ""
        assert file_upload.error_details is None
        assert list(file_upload.failure_reasons.keys()) == []
Пример #6
0
    def test_3_submit_retry(self):
        app.config["HUEY_TASKS"]["ingest_articles"]["retries"] = 1

        fu = models.FileUpload()
        fu.validated("doaj")
        fu.save()

        job = models.BackgroundJob()
        params = {}
        params["ingest_articles__file_upload_id"] = fu.id
        params["ingest_articles__attempts"] = 0
        job.params = params
        job.save(blocking=True)

        # this assumes that huey is in always eager mode, and thus this immediately calls the async task,
        # which in turn calls execute, which ultimately calls run
        with self.assertRaises(RetryException):
            ingestarticles.IngestArticlesBackgroundTask.submit(job)

        job = models.BackgroundJob.pull(job.id)
        assert job.params.get("ingest_articles__attempts") == 1
        assert job.status == "processing"

        # now do it again, to see the retry cause the job to fail on the second attempt as per the config
        with self.assertRaises(RetryException):
            ingestarticles.IngestArticlesBackgroundTask.submit(job)

        job = models.BackgroundJob.pull(job.id)
        assert job.params.get("ingest_articles__attempts") == 2
        assert job.status == "error"
Пример #7
0
    def prepare(cls, username, **kwargs):
        """
        Take an arbitrary set of keyword arguments and return an instance of a BackgroundJob,
        or fail with a suitable exception

        :param kwargs: arbitrary keyword arguments pertaining to this task type
        :return: a BackgroundJob instance representing this task
        """
        params = {}
        cls.set_param(
            params, 'clean', False if "clean" not in kwargs else
            kwargs["clean"] if kwargs["clean"] is not None else False)
        cls.set_param(
            params, "prune", False if "prune" not in kwargs else
            kwargs["prune"] if kwargs["prune"] is not None else False)
        cls.set_param(
            params, "types",
            "all" if "types" not in kwargs else kwargs["types"]
            if kwargs["types"] in ["all", "journal", "article"] else "all")

        container = app.config.get("STORE_PUBLIC_DATA_DUMP_CONTAINER")
        if container is None:
            raise BackgroundException(
                "You must set STORE_PUBLIC_DATA_DUMP_CONTAINER in the config")

        # first prepare a job record
        job = models.BackgroundJob()
        job.user = username
        job.action = cls.__action__
        job.params = params
        return job
Пример #8
0
    def prepare(cls, username, **kwargs):
        """
        Take an arbitrary set of keyword arguments and return an instance of a BackgroundJob,
        or fail with a suitable exception

        :param kwargs: arbitrary keyword arguments pertaining to this task type
        :return: a BackgroundJob instance representing this task
        """

        # First prepare a job record
        job = models.BackgroundJob()
        job.user = username
        job.action = cls.__action__

        params = {}
        cls.set_param(
            params, "outdir",
            kwargs.get("outdir", "article_duplicates_" + dates.today()))
        cls.set_param(params, "email", kwargs.get("email", False))
        cls.set_param(
            params, "tmpdir",
            kwargs.get("tmpdir", "tmp_article_duplicates_" + dates.today()))
        cls.set_param(params, "article_csv", kwargs.get("article_csv", False))
        job.params = params

        return job
Пример #9
0
def anonymise_background_job(record):
    try:
        bgjob = models.BackgroundJob(**record)
    except DataStructureException:
        return record

    if bgjob.params and 'suggestion_bulk_edit__note' in bgjob.params:
        bgjob.params['suggestion_bulk_edit__note'] = basic_hash(bgjob.params['suggestion_bulk_edit__note'])

    return bgjob.data
Пример #10
0
    def test_29_background_job(self):
        source = BackgroundFixtureFactory.example()
        bj = models.BackgroundJob(**source)
        bj.save()

        time.sleep(2)

        retrieved = models.BackgroundJob.pull(bj.id)
        assert retrieved is not None

        source = BackgroundFixtureFactory.example()
        source["params"]["ids"] = ["1", "2", "3"]
        source["params"]["type"] = "suggestion"
        source["reference"]["query"] = json.dumps({"query": {"match_all": {}}})
        bj = models.BackgroundJob(**source)
        bj.save()

        bj.add_audit_message("message")
        assert len(bj.audit) == 2
Пример #11
0
    def prepare(cls, username, **kwargs):
        """
        Take an arbitrary set of keyword arguments and return an instance of a BackgroundJob,
        or fail with a suitable exception

        :param kwargs: arbitrary keyword arguments pertaining to this task type
        :return: a BackgroundJob instance representing this task
        """

        super(JournalBulkEditBackgroundTask, cls).prepare(username, **kwargs)

        # first prepare a job record
        job = models.BackgroundJob()
        job.user = username
        job.action = cls.__action__

        refs = {}
        cls.set_reference(refs, "selection_query",
                          json.dumps(kwargs['selection_query']))
        job.reference = refs

        params = {}

        # get the named parameters we know may be there
        cls.set_param(params, 'ids', kwargs['ids'])
        if "note" in kwargs and kwargs[
                "note"] is not None and kwargs["note"] != "":
            cls.set_param(params, 'note', kwargs.get('note', ''))

        # get the metadata overwrites
        if "replacement_metadata" in kwargs:
            metadata = {}
            for k, v in kwargs["replacement_metadata"].items():
                if v is not None and v != "":
                    metadata[k] = v
            if len(metadata.keys()) > 0:
                cls.set_param(params, 'replacement_metadata',
                              json.dumps(metadata))

        if not cls._job_parameter_check(params):
            raise BackgroundException(
                "{}.prepare run without sufficient parameters".format(
                    cls.__name__))

        job.params = params

        # now ensure that we have the locks for all the journals
        # will raise an exception if this fails
        lock.batch_lock("journal",
                        kwargs['ids'],
                        username,
                        timeout=app.config.get("BACKGROUND_TASK_LOCK_TIMEOUT",
                                               3600))

        return job
Пример #12
0
    def prepare(cls, username, **kwargs):
        """
        Take an arbitrary set of keyword arguments and return an instance of a BackgroundJob,
        or fail with a suitable exception

        :param kwargs: arbitrary keyword arguments pertaining to this task type
        :return: a BackgroundJob instance representing this task
        """

        # first prepare a job record
        job = models.BackgroundJob()
        job.user = username
        job.action = cls.__action__
        return job
Пример #13
0
    def test_2_run_errors(self):
        job = models.BackgroundJob()
        task = ingestarticles.IngestArticlesBackgroundTask(job)

        with self.assertRaises(BackgroundException):
            task.run()

        job.params = {}

        with self.assertRaises(BackgroundException):
            task.run()

        job.params = {"ingest_articles__file_upload_id": "whatever"}

        with self.assertRaises(BackgroundException):
            task.run()
Пример #14
0
    def prepare(cls, username, **kwargs):
        """
        Take an arbitrary set of keyword arguments and return an instance of a BackgroundJob,
        or fail with a suitable exception

        :param kwargs: arbitrary keyword arguments pertaining to this task type
        :return: a BackgroundJob instance representing this task
        """
        cdir = app.config.get("CACHE_DIR")
        if cdir is None:
            raise BackgroundException("You must set CACHE_DIR in the config")

        # first prepare a job record
        job = models.BackgroundJob()
        job.user = username
        job.action = cls.__action__
        return job
Пример #15
0
    def prepare(cls, username, **kwargs):
        """
        Take an arbitrary set of keyword arguments and return an instance of a BackgroundJob,
        or fail with a suitable exception

        :param kwargs: arbitrary keyword arguments pertaining to this task type
        :return: a BackgroundJob instance representing this task
        """

        upload_dir = app.config.get("UPLOAD_DIR")
        if upload_dir is None:
            raise BackgroundException("UPLOAD_DIR is not set in configuration")

        f = kwargs.get("upload_file")
        schema = kwargs.get("schema")
        url = kwargs.get("url")
        previous = kwargs.get("previous", [])

        if f is None and url is None:
            raise BackgroundException(
                "You must specify one of 'upload_file' or 'url' as keyword arguments"
            )
        if schema is None:
            raise BackgroundException(
                "You must specify 'schema' in the keyword arguments")

        file_upload_id = None
        if f is not None and f.filename != "":
            file_upload_id = cls._file_upload(username, f, schema, previous)
        elif url is not None and url != "":
            file_upload_id = cls._url_upload(username, url, schema, previous)

        if file_upload_id is None:
            raise BackgroundException("No file upload record was created")

        # first prepare a job record
        job = models.BackgroundJob()
        job.user = username
        job.action = cls.__action__

        params = {}
        cls.set_param(params, "file_upload_id", file_upload_id)
        cls.set_param(params, "attempts", 0)
        job.params = params

        return job
Пример #16
0
    def prepare(cls, username, **kwargs):
        """
        Take an arbitrary set of keyword arguments and return an instance of a BackgroundJob,
        or fail with a suitable exception
        :param username: user who called this job
        :param kwargs: arbitrary keyword arguments pertaining to this task type
        :return: a BackgroundJob instance representing this task
        """

        if not app.config.get("ENABLE_EMAIL", False):
            raise BackgroundException("Email has been disabled in config. Set ENABLE_EMAIL to True to run this task.")

        # first prepare a job record
        job = models.BackgroundJob()
        job.user = username
        job.action = cls.__action__
        return job
Пример #17
0
    def prepare(cls, username, **kwargs):
        """
        Take an arbitrary set of keyword arguments and return an instance of a BackgroundJob,
        or fail with a suitable exception

        :param kwargs: arbitrary keyword arguments pertaining to this task type
        :return: a BackgroundJob instance representing this task
        """

        # first prepare a job record
        job = models.BackgroundJob()
        job.user = username
        job.action = cls.__action__

        journal_ids = kwargs.get("journal_ids")

        params = {}
        cls.set_param(params, "journal_ids", journal_ids)
        cls.set_param(params, "in_doaj", kwargs.get("in_doaj"))

        if journal_ids is None or len(
                journal_ids) == 0 or kwargs.get("in_doaj") is None:
            raise RuntimeError(
                u"SetInDOAJBackgroundTask.prepare run without sufficient parameters"
            )

        job.params = params

        if "selection_query" in kwargs:
            refs = {}
            cls.set_reference(refs, "selection_query",
                              json.dumps(kwargs.get('selection_query')))
            job.reference = refs

        # now ensure that we have the locks for this journal
        # will raise an exception if this fails
        lock.batch_lock("journal",
                        journal_ids,
                        username,
                        timeout=app.config.get("BACKGROUND_TASK_LOCK_TIMEOUT",
                                               3600))

        return job
Пример #18
0
    def prepare(cls, username, **kwargs):
        """
        Take an arbitrary set of keyword arguments and return an instance of a BackgroundJob,
        or fail with a suitable exception

        :param kwargs: arbitrary keyword arguments pertaining to this task type
        :return: a BackgroundJob instance representing this task
        """

        job = models.BackgroundJob()
        job.user = username
        job.action = cls.__action__

        params = {}
        cls.set_param(params, "outdir",
                      kwargs.get("outdir", "report_" + dates.today()))
        cls.set_param(params, "from",
                      kwargs.get("from_date", "1970-01-01T00:00:00Z"))
        cls.set_param(params, "to", kwargs.get("to_date", dates.now()))
        cls.set_param(params, "email", kwargs.get("email", False))
        job.params = params

        return job