Пример #1
0
def test_reenqueue_object_success(reenqueue_object, session, redis,
                                  museum_object, museum_package):
    # Create fake DB entries
    museum_package.downloaded = True
    museum_package.packaged = True
    museum_package.uploaded = True
    museum_package.rejected = True
    session.commit()

    # Create a job that was completed prior to re-enqueuing
    queue = get_queue(QueueType.CONFIRM_SIP)

    queue.enqueue(successful_job, job_id="confirm_sip_123456")
    SimpleWorker([queue], connection=queue.connection).work(burst=True)

    finished_registry = FinishedJobRegistry(queue=queue)
    assert finished_registry.get_job_ids() == ["confirm_sip_123456"]

    result = reenqueue_object(["123456"])

    assert "Object 123456 re-enqueued" in result.stdout

    # New RQ task was enqueued
    queue = get_queue(QueueType.DOWNLOAD_OBJECT)
    assert "download_object_123456" in queue.job_ids

    # Database was updated
    db_museum_object = session.query(MuseumObject).filter_by(id=123456).one()

    assert len(db_museum_object.packages) == 1
    assert not db_museum_object.latest_package

    # Prior finished job was removed
    assert finished_registry.get_job_ids() == []
Пример #2
0
def navbar_stats():
    """
    Retrieve object counts used for the navbar
    """
    # Check cache first
    redis = get_redis_connection()
    result = redis.get("navbar_stats")

    if result:
        result = json.loads(result)
        return jsonify(result)

    queues = (get_queue(QueueType.DOWNLOAD_OBJECT),
              get_queue(QueueType.CREATE_SIP), get_queue(QueueType.SUBMIT_SIP),
              get_queue(QueueType.CONFIRM_SIP))
    result = {"queues": {}}
    for queue in queues:
        result["queues"][queue.name] = {
            "pending": queue.count,
            "processing": StartedJobRegistry(queue=queue).count
        }

    # Add failed
    result["failed"] = sum(
        [FailedJobRegistry(queue=queue).count for queue in queues])

    # Cache result for 2 seconds
    redis.set("navbar_stats", json.dumps(result), ex=2)
    return jsonify(result)
    def test_list_sips_queues(self, client, session, museum_object_factory,
                              museum_package_factory):
        """
        Test that the queue names for object are provided correctly
        """
        museum_object_a = museum_object_factory(id=10,
                                                preserved=True,
                                                title="Object A")
        museum_object_b = museum_object_factory(id=20, title="Object B")

        museum_package_a = museum_package_factory(
            museum_object=museum_object_a, sip_filename="testA.tar")
        museum_object_a.latest_package = museum_package_a

        museum_package_factory(museum_object=museum_object_b,
                               sip_filename="testB.tar")

        session.commit()

        # Enqueue two tasks for each package
        get_queue(QueueType.DOWNLOAD_OBJECT).enqueue(
            successful_job, job_id="download_object_10")
        get_queue(QueueType.SUBMIT_SIP).enqueue(successful_job,
                                                job_id="submit_sip_20")

        result = client.get("/api/list-sips", ).json

        # Only object A will report the queue names, since it's the latest
        # package
        assert len(result["results"]) == 2
        assert result["results"][0]["filename"] == "testB.tar"
        assert result["results"][0]["queues"] == []

        assert result["results"][1]["filename"] == "testA.tar"
        assert result["results"][1]["queues"] == ["download_object"]
def test_freeze_objects_delete_jobs(session, redis, freeze_objects,
                                    museum_object_factory):
    """
    Freeze object with one pending and one failed job, and ensure
    they are both deleted
    """
    def successful_job():
        return ":)"

    def failing_job():
        raise RuntimeError(":(")

    museum_object_factory(id=123456)

    queue_a = get_queue(QueueType.DOWNLOAD_OBJECT)
    queue_b = get_queue(QueueType.SUBMIT_SIP)

    queue_a.enqueue(successful_job, job_id="download_object_123456")
    queue_b.enqueue(failing_job, job_id="submit_sip_123456")
    SimpleWorker([queue_b], connection=queue_b.connection).work(burst=True)

    freeze_objects(["--delete-jobs", "--reason", "Deleting job", "123456"])

    assert len(queue_a.job_ids) == 0
    assert len(queue_b.job_ids) == 0

    assert session.query(MuseumObject).filter_by(
        id=123456, freeze_reason="Deleting job").count() == 1
Пример #5
0
def test_get_object_id2queue_map(redis):
    """
    Test that 'get_object_id2queue_map' returns a correct dictionary
    """
    queue_a = get_queue(QueueType.DOWNLOAD_OBJECT)
    queue_b = get_queue(QueueType.SUBMIT_SIP)

    queue_a.enqueue(successful_job, job_id="download_object_123456")
    queue_b.enqueue(failing_job, job_id="submit_sip_654321")
    SimpleWorker([queue_b], connection=queue_b.connection).work(burst=True)

    queue_map = get_object_id2queue_map([123456, 654321, 111111])
    assert queue_map[123456] == ["download_object"]
    assert queue_map[654321] == ["submit_sip", "failed"]
    assert queue_map[111111] == []
    def test_unfreeze_objects(self, client, session, museum_object_factory):
        """
        Test unfreezing two objects with a specific reason
        """
        museum_object_factory(id=1, frozen=True, freeze_reason="Test reason A")
        museum_object_factory(id=2, frozen=True, freeze_reason="Test reason B")
        museum_object_factory(id=3, frozen=True, freeze_reason="Test reason A")
        museum_object_factory(id=4, frozen=True, freeze_reason="Test reason B")

        # Auto-completion entries can be found
        result = client.get("/web-ui/unfreeze-objects/")
        assert b"Test reason A" in result.data
        assert b"Test reason B" in result.data

        # Objects 1 and 3 will be unfrozen
        result = client.post("/web-ui/unfreeze-objects/",
                             data={"reason": "Test reason A"},
                             follow_redirects=True)
        assert b"2 object(s) were unfrozen." in result.data

        assert (session.query(MuseumObject).filter(MuseumObject.id.in_(
            [1, 3])).filter_by(frozen=False).count() == 2)

        queue = get_queue(QueueType.DOWNLOAD_OBJECT)

        # Objects are not enqueued by default
        assert len(queue.job_ids) == 0
Пример #7
0
def test_create_sip(session, create_sip, museum_package, create_sip_call):
    """
    Test running the 'create_sip' workflow job
    """
    museum_package.downloaded = True
    museum_package.created_date = datetime.datetime(
        2019, 1, 2, 10, 0, 0, 0, tzinfo=datetime.timezone.utc)
    session.commit()

    create_sip(123456, sip_id="testID")

    # Database should be updated
    db_museum_package = session.query(MuseumPackage).filter_by(
        sip_filename="fake_package-testID.tar").one()

    # 'create_sip' was called correctly
    assert not create_sip_call["update"]
    assert create_sip_call["create_date"] == datetime.datetime(
        2019, 1, 2, 10, 0, 0, 0, tzinfo=datetime.timezone.utc)
    assert not create_sip_call["modify_date"]

    assert db_museum_package.downloaded
    assert db_museum_package.packaged
    assert not db_museum_package.uploaded

    # New job should be enqueued
    queue = get_queue(QueueType.SUBMIT_SIP)
    assert queue.jobs[0].id == "submit_sip_123456"
    assert queue.jobs[0].kwargs == {"object_id": 123456, "sip_id": "testID"}
Пример #8
0
def test_enqueue_objects(
        redis, session, enqueue_objects, museum_object_factory):
    """
    Enqueue ten objects from a list of 20 objects
    """
    for i in range(0, 20):
        museum_object_factory(
            id=i, preserved=False,
            metadata_hash="", attachment_metadata_hash=""
        )

    result = enqueue_objects(["--object-count", "5"])
    assert "5 object(s) enqueued" in result.stdout

    queue = get_queue(QueueType.DOWNLOAD_OBJECT)
    # Five jobs are enqueued
    assert len(queue.job_ids) == 5

    result = enqueue_objects(["--object-count", "5"])
    assert "5 object(s) enqueued" in result.stdout

    # Five more jobs are enqueued on second run
    assert len(queue.job_ids) == 10

    # Rest of the jobs are enqueued on third run
    result = enqueue_objects(["--object-count", "100"])
    assert "10 object(s) enqueued" in result.stdout

    assert len(queue.job_ids) == 20
Пример #9
0
def test_unfreeze_objects(session, unfreeze_objects, museum_object_factory):
    museum_object_factory(id=10, frozen=True, freeze_reason="Test reason")
    museum_object_factory(id=20, frozen=True, freeze_reason="Test reason")
    museum_object_factory(id=30, frozen=True, freeze_reason="Test reason")

    # Unfreeze the first object
    result = unfreeze_objects([
        "--with-object-ids", "10", "--with-reason", "Test reason"
    ])

    assert "1 object(s) were updated" in result.stdout
    assert session.query(
        MuseumObject
    ).filter_by(frozen=False, id=10).count() == 1

    # Unfreeze the second and third object
    result = unfreeze_objects(["--with-reason", "Test reason"])

    assert "2 object(s) were updated" in result.stdout
    assert (
        session.query(MuseumObject)
        .filter_by(frozen=False)
        .filter(MuseumObject.id.in_([20, 30]))
        .count() == 2
    )

    queue = get_queue(QueueType.DOWNLOAD_OBJECT)

    # Museum object is not enqueued by default
    assert len(queue.job_ids) == 0
    def test_freeze_objects_already_running(self, session, client,
                                            museum_object_factory):
        """
        Test freezing two objects that already have running jobs
        """
        def successful_job():
            return ":)"

        confirm_queue = get_queue(QueueType.CONFIRM_SIP)
        started_registry = StartedJobRegistry(queue=confirm_queue)

        for i in [5, 10]:
            museum_object_factory(id=i)
            job = confirm_queue.enqueue(successful_job,
                                        job_id=f"download_object_{i}")
            started_registry.add(job, -1)

        result = client.post("/web-ui/freeze-objects/",
                             data={
                                 "reason": "Test reason",
                                 "object_ids": "10\n5"
                             })
        assert (escape(
            "following object IDs have running jobs and can't be frozen: "
            "5, 10").encode("utf-8") in result.data)
Пример #11
0
def test_delete_jobs_for_object_id(redis):
    queue_a = get_queue(QueueType.DOWNLOAD_OBJECT)
    queue_b = get_queue(QueueType.SUBMIT_SIP)

    queue_a.enqueue(successful_job, job_id="download_object_123456")
    queue_b.enqueue(failing_job, job_id="submit_sip_123456")
    SimpleWorker([queue_b], connection=queue_b.connection).work(burst=True)

    # Both the pending and failed jobs should be cancelled
    assert delete_jobs_for_object_id(123456) == 2

    assert len(queue_a.job_ids) == 0
    assert len(queue_b.job_ids) == 0

    # Second run does nothing
    assert delete_jobs_for_object_id(123456) == 0
Пример #12
0
def update_sips(sip_results, sftp):
    """
    Update processed SIPs one-by-one
    """
    # TODO: We could process SIPs in chunks to reduce DB load
    # (eg. 50 SIPs per DB session). However, this requires a bit more
    # complexity and may not be necessary performance-wise.
    queue = get_queue(QueueType.CONFIRM_SIP)

    for sip in sip_results:
        update_sip(sip, sftp=sftp, queue=queue)
def deferred_enqueue_objects(object_count):
    """
    Enqueue given number of objects to the preservation workflow using a
    background RQ job

    :param int object_count: How many objects to enqueue at most
    """
    queue = get_queue(QueueType.ENQUEUE_OBJECTS)
    queue.enqueue(enqueue_objects, kwargs={"object_count": object_count})

    print(f"{object_count} object(s) will be enqueued")

    return object_count
def enqueue_object(object_id):
    """
    Enqueue a single object.

    This can be called separately outside of 'enqueue_objects'. In this case,
    the caller needs to ensure the workflow is locked.
    """
    object_id = int(object_id)
    queue = get_queue(QueueType.DOWNLOAD_OBJECT)

    job_id = f"download_object_{object_id}"
    return queue.enqueue(download_object,
                         kwargs={"object_id": object_id},
                         job_id=job_id)
Пример #15
0
def test_preservation_error(session, create_sip, monkeypatch, museum_package,
                            museum_packages_dir, archive_dir):
    """
    Test that encountering a PreservationError during a 'create_sip'
    job will freeze the object and remove the object from the workflow
    """
    def mock_create_sip(object_id, package_dir, sip_id, create_date,
                        modify_date, update):
        raise PreservationError(detail="Mock error message.",
                                error="Unsupported file format: wad")

    # Create the fake museum package directory
    (museum_packages_dir / "123456" / "sip").mkdir(parents=True)
    (museum_packages_dir / "123456" / "reports").mkdir(parents=True)

    monkeypatch.setattr("passari_workflow.jobs.create_sip.main",
                        mock_create_sip)
    museum_package.downloaded = True
    session.commit()

    create_sip(123456, sip_id="testID")

    # Database should be updated
    db_museum_package = session.query(MuseumPackage).filter_by(
        sip_filename="fake_package-testID.tar").one()
    db_museum_object = session.query(MuseumObject).filter_by(id=123456).one()

    assert db_museum_package.downloaded
    assert not db_museum_package.packaged
    assert not db_museum_package.uploaded
    # The package was cancelled
    assert db_museum_package.cancelled

    assert db_museum_object.frozen
    assert db_museum_object.freeze_reason == "Unsupported file format: wad"
    assert db_museum_object.freeze_source == FreezeSource.AUTOMATIC

    # No new job was enqueued
    queue = get_queue(QueueType.SUBMIT_SIP)
    assert not queue.job_ids

    # The museum package directory was deleted
    assert not (museum_packages_dir / "123456").is_dir()

    # The log file was archived.
    # We only test for the existence of the directory since the actual method
    # is mocked and only creates a directory.
    assert (archive_dir / "123456").is_dir()
Пример #16
0
def test_get_enqueued_object_ids(redis):
    queue = get_queue(QueueType.CREATE_SIP)

    # Complete two jobs
    queue.enqueue(successful_job, job_id="create_sip_124578")
    queue.enqueue(failing_job, job_id="create_sip_998877")
    SimpleWorker([queue], connection=queue.connection).work(burst=True)

    # Don't finish this job
    queue.enqueue(successful_job, job_id="create_sip_555555")

    # Pending and failed object IDs should all be found
    # Finished job ID won't be included
    object_ids = get_enqueued_object_ids()
    assert 124578 not in object_ids
    assert 998877 in object_ids
    assert 555555 in object_ids
Пример #17
0
def test_reenqueue_object_package_enqueued(reenqueue_object, session, redis,
                                           museum_object, museum_package):
    # If a task is already enqueued, nothing will be done
    museum_package.downloaded = True
    museum_package.packaged = True
    museum_package.uploaded = True
    museum_package.rejected = True
    session.commit()

    queue = get_queue(QueueType.CREATE_SIP)
    queue.enqueue(print,
                  kwargs={"object_id": 123456},
                  job_id="create_sip_123456")

    with pytest.raises(ValueError) as exc:
        reenqueue_object(["123456"], success=False)

    assert "Object is still in the workflow" in str(exc.value)
Пример #18
0
def test_unfreeze_objects_enqueue(
        session, unfreeze_objects, museum_object_factory):
    """
    Test that an object is enqueued after unfreezing if the command-line
    flag is used
    """
    museum_object_factory(id=10, frozen=True, freeze_reason="Test reason")

    result = unfreeze_objects(["--with-reason", "Test reason", "--enqueue"])

    assert "1 object(s) were updated" in result.stdout
    assert session.query(
        MuseumObject
    ).filter_by(frozen=False, id=10).count() == 1

    queue = get_queue(QueueType.DOWNLOAD_OBJECT)

    # Job was enqueued
    assert "download_object_10" in queue.job_ids
def reenqueue_object(object_id: int):
    """
    Re-enqueue rejected object into the workflow
    """
    object_id = int(object_id)
    connect_db()

    queue = get_queue(QueueType.DOWNLOAD_OBJECT)

    with scoped_session() as db:
        museum_object = (
            db.query(MuseumObject)
            .join(
                MuseumPackage,
                MuseumObject.latest_package_id == MuseumPackage.id
            )
            .filter(MuseumObject.id == object_id)
            .one()
        )

        if museum_object.latest_package and \
                not museum_object.latest_package.rejected:
            raise ValueError(
                f"Latest package {museum_object.latest_package.sip_filename} "
                f"wasn't rejected"
            )

        object_ids = get_enqueued_object_ids()

        if object_id in object_ids:
            raise ValueError(
                f"Object is still in the workflow and can't be re-enqueued"
            )

        museum_object.latest_package = None

        delete_jobs_for_object_id(object_id)

        queue.enqueue(
            download_object, kwargs={"object_id": object_id},
            job_id=f"download_object_{object_id}"
        )
Пример #20
0
def test_enqueue_objects_with_object_ids(
        redis, session, enqueue_objects, museum_object_factory,
        museum_package_factory):
    """
    Enqueue two specific object IDs
    """
    for i in range(0, 20):
        museum_object_factory(
            id=i, preserved=False,
            metadata_hash="", attachment_metadata_hash=""
        )

    result = enqueue_objects(["--object-ids", "5,8"])
    assert "2 object(s) enqueued" in result.stdout

    queue = get_queue(QueueType.DOWNLOAD_OBJECT)
    assert len(queue.job_ids) == 2

    assert "download_object_5" in queue.job_ids
    assert "download_object_8" in queue.job_ids
    def test_unfreeze_objects_enqueue(self, client, session,
                                      museum_object_factory):
        """
        Unfreeze an object and enqueue it
        """
        museum_object_factory(id=1, frozen=True, freeze_reason="Test reason A")
        museum_object_factory(id=2, frozen=True, freeze_reason="Test reason B")

        # Object 2 will be unfrozen
        result = client.post("/api/unfreeze-objects",
                             data={
                                 "reason": "Test reason B",
                                 "enqueue": "true"
                             })
        assert result.json == {"success": True, "count": 1}

        assert (session.query(MuseumObject).filter_by(
            id=2, frozen=False).count() == 1)

        queue = get_queue(QueueType.DOWNLOAD_OBJECT)

        assert set(["download_object_2"]) == set(queue.job_ids)
    def test_unfreeze_objects_reason(self, client, session,
                                     museum_object_factory):
        """
        Unfreeze two objects using a reason as the filter
        """
        museum_object_factory(id=1, frozen=True, freeze_reason="Test reason A")
        museum_object_factory(id=2, frozen=True, freeze_reason="Test reason B")
        museum_object_factory(id=3, frozen=True, freeze_reason="Test reason A")
        museum_object_factory(id=4, frozen=True, freeze_reason="Test reason B")

        # Objects 1 and 3 will be unfrozen
        result = client.post("/api/unfreeze-objects",
                             data={"reason": "Test reason A"})
        assert result.json == {"success": True, "count": 2}

        assert (session.query(MuseumObject).filter(MuseumObject.id.in_(
            [1, 3])).filter(MuseumObject.frozen == False).count() == 2)

        queue = get_queue(QueueType.DOWNLOAD_OBJECT)

        # Object is not enqueued by default
        assert len(queue.job_ids) == 0
Пример #23
0
def test_museum_package_missing(redis, session, download_object, museum_object,
                                freeze_time):
    """
    Download a museum object when the museum object directory doesn't exist
    yet
    """
    # Do the 'download_object' job.
    freeze_time("2019-02-03 12:00:00")
    download_object(123456)

    # MuseumPackage should be created
    db_museum_object = session.query(MuseumObject).filter(
        MuseumObject.id == 123456).first()

    latest_package = db_museum_object.latest_package
    assert latest_package in db_museum_object.packages

    # The current time is used as the SIP ID
    assert latest_package.sip_filename == "fake_package-20190203-120000.tar"
    # The current time "2019-02-03 12:00:00" is used as the sip ID
    assert latest_package.sip_id == "20190203-120000"
    assert latest_package.downloaded
    assert not latest_package.packaged
    # Metadata hashes are copied from the latest version of the object
    assert latest_package.metadata_hash == museum_object.metadata_hash
    assert latest_package.attachment_metadata_hash == \
        museum_object.attachment_metadata_hash
    # MuseumAttachments are added
    assert len(latest_package.attachments) == 2
    assert latest_package.attachments[0].id == 1234560
    assert latest_package.attachments[1].id == 2469120

    # New job should be enqueued
    queue = get_queue(QueueType.CREATE_SIP)
    assert queue.jobs[0].id == "create_sip_123456"
    assert queue.jobs[0].kwargs == {
        "object_id": 123456,
        "sip_id": "20190203-120000"
    }
def test_freeze_objects_running_jobs(session, redis, freeze_objects,
                                     museum_object_factory):
    """
    Try freezing two objects when they have running jobs.
    """
    def successful_job():
        return ":)"

    museum_object_factory(id=123456)
    museum_object_factory(id=654321)

    queue = get_queue(QueueType.DOWNLOAD_OBJECT)
    started_registry = StartedJobRegistry(queue=queue)
    job_a = queue.enqueue(successful_job, job_id="download_object_123456")
    job_b = queue.enqueue(successful_job, job_id="download_object_654321")
    started_registry.add(job_a, -1)
    started_registry.add(job_b, -1)

    with pytest.raises(WorkflowJobRunningError) as exc:
        freeze_objects(["--reason", "Won't succeed", "654321", "123456"],
                       success=False)

    assert "can't be frozen: 123456, 654321" in str(exc.value)
    def test_unfreeze_objects_enqueue(self, client, session,
                                      museum_object_factory):
        """
        Test unfreezing an object and enqueuing it immediately
        """
        museum_object_factory(id=1, frozen=True, freeze_reason="Test reason A")
        museum_object_factory(id=2, frozen=True, freeze_reason="Test reason B")

        # Unfreeze object 2
        result = client.post("/web-ui/unfreeze-objects/",
                             data={
                                 "reason": "Test reason B",
                                 "enqueue": True
                             },
                             follow_redirects=True)
        assert b"1 object(s) were unfrozen." in result.data

        assert (session.query(MuseumObject).filter_by(
            id=2, frozen=False).count() == 1)

        queue = get_queue(QueueType.DOWNLOAD_OBJECT)

        # Object was enqueued
        assert set(["download_object_2"]) == set(queue.job_ids)
    def test_navbar_stats(self, session, client):
        # Create 1 'download_object' job
        get_queue(QueueType.DOWNLOAD_OBJECT).enqueue(
            successful_job, job_id="download_object_1")

        # Create 2 'create_sip' jobs
        for i in range(2, 4):
            get_queue(QueueType.CREATE_SIP).enqueue(successful_job,
                                                    job_id=f"create_sip_{i}")

        # Create 1 failed 'submit_sip' job
        submit_queue = get_queue(QueueType.SUBMIT_SIP)
        submit_queue.enqueue(failing_job, job_id="submit_sip_4")
        SimpleWorker([submit_queue],
                     connection=submit_queue.connection).work(burst=True)

        # Create 1 started 'confirm_sip' job
        confirm_queue = get_queue(QueueType.CONFIRM_SIP)
        started_registry = StartedJobRegistry(queue=confirm_queue)
        job = confirm_queue.enqueue(successful_job, job_id="confirm_sip_5")
        started_registry.add(job, -1)

        result = client.get("/api/navbar-stats").json

        assert result["queues"]["download_object"] \
            == {"processing": 0, "pending": 1}
        assert result["queues"]["create_sip"] == \
            {"processing": 0, "pending": 2}
        assert result["queues"]["submit_sip"] == \
            {"processing": 0, "pending": 0}
        # TODO: In practice, if one worker is working on a job and there are
        # no pending jobs, this should be 'processing': 1, 'pending': 0.
        # How can we mimic a similar situation in this test scenario?
        assert result["queues"]["confirm_sip"] == \
            {"processing": 1, "pending": 1}

        assert result["failed"] == 1
Пример #27
0
def test_preservation_error(session, download_object, monkeypatch,
                            museum_packages_dir, archive_dir, museum_object,
                            museum_package_factory, with_existing_package):
    """
    Test that encountering a PreservationError during a 'download_object'
    job will freeze the object and remove the object from the workflow.

    The test case has been parametrized with two different scenarios:
    one where a MuseumObject already has one preserved package,
    and a second one where no package has been created yet
    """
    def mock_download_object(object_id, package_dir, sip_id):
        raise PreservationError(detail="Mock detailed error message",
                                error="Filename was not supported")

    # Create the fake museum package directory
    (museum_packages_dir / "123456" / "sip").mkdir(parents=True)

    monkeypatch.setattr("passari_workflow.jobs.download_object.main",
                        mock_download_object)

    # For the test case with an existing package, create a museum package that
    # was uploaded successfully earlier.
    # The PreservationError should *not* affect this package.
    if with_existing_package:
        db_museum_package = museum_package_factory(
            sip_filename="fake_package-testID2.tar",
            created_date=datetime.datetime(2018,
                                           9,
                                           1,
                                           12,
                                           0,
                                           0,
                                           0,
                                           tzinfo=datetime.timezone.utc),
            preserved=True,
            museum_object=museum_object)
        museum_object.latest_package = db_museum_package

    session.commit()

    download_object(123456)

    # Database should be updated
    db_museum_object = session.query(MuseumObject).get(123456)

    assert db_museum_object.frozen
    assert db_museum_object.freeze_reason == "Filename was not supported"
    assert db_museum_object.freeze_source == FreezeSource.AUTOMATIC

    # The previous successful package was not updated.
    # This is because a new package is not created unless the 'download_object'
    # job is successful
    if with_existing_package:
        latest_package = db_museum_object.latest_package

        assert not latest_package.cancelled
        assert latest_package.preserved
        assert latest_package.sip_filename == "fake_package-testID2.tar"
    else:
        assert not db_museum_object.latest_package

    # No new job was enqueued
    queue = get_queue(QueueType.CREATE_SIP)
    assert not queue.job_ids

    # The museum package directory was deleted
    assert not (museum_packages_dir / "123456").is_dir()
Пример #28
0
def download_object(object_id):
    """
    Download an object from MuseumPlus and enqueue the task 'create_sip'
    once the object is downloaded
    """
    object_id = int(object_id)
    connect_db()

    # Create a SIP id from the current time
    sip_id = datetime.datetime.now(
        datetime.timezone.utc).strftime("%Y%m%d-%H%M%S")

    try:
        museum_package = main(
            object_id=int(object_id),
            package_dir=PACKAGE_DIR,
            # 'sip_id' is optional, but giving it as a kwarg ensures the
            # filename of the SIP is correct before it is created.
            sip_id=sip_id)
    except PreservationError as exc:
        # If a PreservationError was raised, freeze the object
        freeze_running_object(object_id=object_id,
                              sip_id=sip_id,
                              freeze_reason=exc.error)
        return
    except OSError as exc:
        if exc.errno == errno.ENOSPC:
            raise OSError(
                errno.ENOSPC,
                "Ran out of disk space. This may have happened because the "
                "package directory ran out of space while downloading a "
                "large attachment. Try removing packages from the directory "
                "and trying again by processing less packages at the same "
                "time.")

        raise

    filename = museum_package.sip_filename

    with scoped_session() as db:
        db_museum_object = db.query(MuseumObject).filter(
            MuseumObject.id == object_id).one()

        db_package = db.query(MuseumPackage).filter_by(
            sip_filename=filename).first()

        # Get the attachments that currently exist for this object
        # and add them to the new MuseumPackage
        attachment_ids = museum_package.museum_object.attachment_ids
        db_attachments = bulk_create_or_get(db, MuseumAttachment,
                                            attachment_ids)

        if not db_package:
            db_package = MuseumPackage(
                sip_filename=filename,
                sip_id=sip_id,
                object_modified_date=(
                    museum_package.museum_object.modified_date),
                downloaded=True,
                metadata_hash=db_museum_object.metadata_hash,
                attachment_metadata_hash=(
                    db_museum_object.attachment_metadata_hash),
                attachments=db_attachments)
            db_package.museum_object = db_museum_object
        else:
            raise EnvironmentError(
                f"Package with filename {filename} already exists")

        db_museum_object.latest_package = db_package

        queue = get_queue(QueueType.CREATE_SIP)
        queue.enqueue(create_sip,
                      kwargs={
                          "object_id": object_id,
                          "sip_id": sip_id
                      },
                      job_id=f"create_sip_{object_id}")
Пример #29
0
def overview_stats():
    """
    Retrieve real-time statistics used in the 'Overview' page
    """
    # Check cache first
    redis = get_redis_connection()
    result = redis.get("overview_stats")

    if result:
        result = json.loads(result)
        return jsonify(result)

    queues = (get_queue(QueueType.DOWNLOAD_OBJECT),
              get_queue(QueueType.CREATE_SIP), get_queue(QueueType.SUBMIT_SIP),
              get_queue(QueueType.CONFIRM_SIP))
    job_count = sum([queue.count for queue in queues])

    failed_count = sum(
        [FailedJobRegistry(queue=queue).count for queue in queues])

    total_count = db.session.query(MuseumObject).count()

    frozen_count = (db.session.query(MuseumObject).filter(
        MuseumObject.frozen).count())

    submitted_count = (db.session.query(MuseumObject).join(
        MuseumPackage,
        MuseumObject.latest_package_id == MuseumPackage.id).filter(
            and_(MuseumObject.latest_package, MuseumPackage.rejected == False,
                 MuseumPackage.preserved == False,
                 MuseumPackage.uploaded)).count())

    rejected_count = (db.session.query(MuseumObject).join(
        MuseumPackage,
        MuseumObject.latest_package_id == MuseumPackage.id).filter(
            and_(MuseumObject.latest_package, MuseumPackage.rejected)).count())

    preserved_count = (db.session.query(MuseumObject).with_transformation(
        MuseumObject.exclude_preservation_pending).filter(
            MuseumObject.preserved).count())

    result = {
        "steps": {
            "pending": {
                "count":
                int(total_count - job_count - failed_count - frozen_count -
                    rejected_count - submitted_count - preserved_count)
            },
        },
        "total_count": total_count
    }

    # Add the individual queues
    for queue in queues:
        result["steps"][queue.name] = {"count": queue.count}

    # Add counts outside of queues
    other_steps = [("preserved", preserved_count),
                   ("rejected", rejected_count),
                   ("submitted", submitted_count), ("frozen", frozen_count),
                   ("failed", failed_count)]

    for name, count in other_steps:
        result["steps"][name] = {"count": count}

    # Cache result for 2 seconds
    redis.set("overview_stats", json.dumps(result), ex=2)
    return jsonify(result)
Пример #30
0
def create_sip(object_id, sip_id):
    """
    Create SIP from a downloaded objec and enqueue the task 'submit_sip'
    once the object is packaged into a SIP
    """
    object_id = int(object_id)
    connect_db()

    # Are we creating a SIP for the first time or updating a preserved
    # package?
    created_date, modified_date = None, None
    with scoped_session() as db:
        last_preserved_package = (
            db.query(MuseumPackage)
            .filter(MuseumPackage.museum_object_id == object_id)
            .filter(MuseumPackage.preserved == True)
            .order_by(MuseumPackage.created_date.desc())
            .first()
        )
        current_package = (
            db.query(MuseumObject)
            .join(
                MuseumPackage,
                MuseumObject.latest_package_id == MuseumPackage.id
            )
            .filter(MuseumObject.id == object_id)
            .one()
            .latest_package
        )

        if not last_preserved_package:
            # We haven't created a preserved SIP yet
            print(f"Creating submission SIP for Object {object_id}")
            created_date = current_package.created_date
        else:
            # We are updating an existing package
            print(f"Creating update SIP for Object {object_id}")
            created_date = last_preserved_package.created_date
            modified_date = current_package.created_date

    # Run the 'create_sip' script
    try:
        museum_package = main(
            object_id=object_id, package_dir=PACKAGE_DIR, sip_id=sip_id,
            create_date=created_date, modify_date=modified_date,
            update=bool(modified_date)
        )
    except PreservationError as exc:
        # If a PreservationError was raised, freeze the object and prevent
        # the object from going further in the workflow.
        freeze_running_object(
            object_id=object_id,
            sip_id=sip_id,
            freeze_reason=exc.error
        )
        return
    except OSError as exc:
        if exc.errno == errno.ENOSPC:
            raise OSError(
                errno.ENOSPC,
                "Ran out of disk space. This may have happened because the "
                "package directory ran out of space while downloading a "
                "large attachment. Try removing packages from the directory "
                "and trying again by processing less packages at the same "
                "time."
            )

        raise

    filename = museum_package.sip_filename

    print(f"Created SIP for Object {object_id}, updating database")

    with scoped_session() as db:
        db_package = db.query(MuseumPackage).filter(
            MuseumPackage.sip_filename == filename
        ).one()
        db_package.packaged = True
        db.query(MuseumObject).filter(
            MuseumObject.id == object_id
        ).update({MuseumObject.latest_package_id: db_package.id})

        queue = get_queue(QueueType.SUBMIT_SIP)
        queue.enqueue(
            submit_sip, kwargs={"object_id": object_id, "sip_id": sip_id},
            job_id=f"submit_sip_{object_id}"
        )