Exemplo n.º 1
0
    def test_pointless(self):
        job = factories.IngestJobFactory(
            raw__datestamp=pendulum.now().subtract(hours=2))
        factories.IngestJobFactory(
            suid=job.suid, raw__datestamp=pendulum.now().subtract(hours=1))

        ingest(job_id=job.id)

        job.refresh_from_db()
        assert job.status == job.STATUS.skipped
        assert job.error_context == job.SkipReasons.pointless.value
Exemplo n.º 2
0
    def test_schedule(self, raw_ages, selected_raw, claim, prior_status,
                      superfluous, expected_status):
        suid = factories.SourceUniqueIdentifierFactory()
        raws = [
            factories.RawDatumFactory(
                suid=suid, datestamp=pendulum.now().subtract(days=days_ago))
            for days_ago in raw_ages
        ]
        expected_raw = raws[selected_raw]

        expected_job = None
        if prior_status:
            expected_job = factories.IngestJobFactory(raw=expected_raw,
                                                      status=getattr(
                                                          IngestJob.STATUS,
                                                          prior_status))

        job = IngestScheduler().schedule(suid,
                                         claim=claim,
                                         superfluous=superfluous)

        if expected_job:
            assert job.id == expected_job.id
        assert job.suid_id == suid.id
        assert job.raw_id == expected_raw.id
        assert job.status == getattr(IngestJob.STATUS, expected_status)
        assert job.claimed == claim
Exemplo n.º 3
0
    def test_no_output(self):
        raw = factories.RawDatumFactory(datum=json.dumps({'@graph': []}))
        job = factories.IngestJobFactory(raw=raw)

        assert not raw.no_output

        ingest(job_id=job.id)

        raw.refresh_from_db()

        assert raw.no_output
Exemplo n.º 4
0
    def test_bulk_reingest(self, mock_ingest):
        with mock.patch('share.ingest.scheduler.IngestScheduler.bulk_schedule'
                        ) as mock_bulk_schedule:
            jobs = [factories.IngestJobFactory() for _ in range(10)]
            mock_bulk_schedule.return_value = jobs
            actual_jobs = IngestScheduler().bulk_reingest(
                mock.sentinel.suid_qs)

            mock_bulk_schedule.assert_called_once_with(mock.sentinel.suid_qs,
                                                       superfluous=True,
                                                       claim=True)

            assert actual_jobs is jobs
            assert mock_ingest.delay.call_args_list == [({
                'job_id': j.id,
                'exhaust': False,
                'superfluous': True,
            }, ) for j in actual_jobs]
Exemplo n.º 5
0
    def test_legacy_pipeline(self, legacy, monkeypatch):
        mock_apply_changes = mock.Mock(return_value=[])
        monkeypatch.setattr(
            'share.tasks.jobs.IngestJobConsumer._apply_changes',
            mock_apply_changes)
        monkeypatch.setattr('django.conf.settings.SHARE_LEGACY_PIPELINE',
                            legacy)

        g = MutableGraph()
        g.add_node('_:id', 'creativework', title='This is a title')

        job = factories.IngestJobFactory(
            raw__datum=json.dumps({'@graph': g.to_jsonld(in_edges=False)}))

        ingest.apply(kwargs={'job_id': job.id}, throw=True)

        if legacy:
            assert NormalizedData.objects.count() == 1
            assert mock_apply_changes.call_count == 1
        else:
            assert NormalizedData.objects.count() == 0
            assert not mock_apply_changes.called
Exemplo n.º 6
0
    def test_bulk_schedule(self, claim, superfluous):
        suid_specs = [
            # raw_ages, expected_raw, job_status
            ([0, 1, 2], 0, 'created'),
            ([5, 4, 2, 3], 2, 'failed'),
            ([2, 1], 1, 'succeeded'),
            ([4, 2], 1, None),
        ]
        suids = set()
        expected_jobs = set()
        for raw_ages, selected_raw, job_status in suid_specs:
            suid = factories.SourceUniqueIdentifierFactory()
            raws = [
                factories.RawDatumFactory(
                    suid=suid,
                    datestamp=pendulum.now().subtract(days=days_ago))
                for days_ago in raw_ages
            ]
            if job_status:
                job = factories.IngestJobFactory(raw=raws[selected_raw],
                                                 status=getattr(
                                                     IngestJob.STATUS,
                                                     job_status))
                expected_jobs.add(job)
            suids.add(suid)

        actual_jobs = IngestScheduler().bulk_schedule(
            SourceUniqueIdentifier.objects.all(),
            claim=claim,
            superfluous=superfluous,
        )

        assert len(actual_jobs) == len(suids)
        assert expected_jobs.issubset(actual_jobs)
        for job in actual_jobs:
            assert bool(job.claimed) == claim
            if superfluous:
                assert job.status == IngestJob.STATUS.created