def test_autoupdate(self, completions, status, new_version, updated): source_config = factories.SourceConfigFactory() source_config.harvester.get_class().VERSION = 1 hl = factories.HarvestJobFactory( status=status, completions=completions, harvester_version=source_config.harvester.version, source_config=source_config, start_date=pendulum.parse('2017-01-01').date(), ) source_config.harvester.get_class().VERSION = new_version tasks.harvest(job_id=hl.id) hl.refresh_from_db() if updated: assert hl.status == HarvestJob.STATUS.succeeded elif new_version > 1: assert hl.status == HarvestJob.STATUS.skipped assert hl.error_context == HarvestJob.SkipReasons.obsolete.value assert (hl.harvester_version == new_version) == updated
def test_latest_date_null(self): source_config = factories.SourceConfigFactory( full_harvest=True, earliest_date=pendulum.parse('2017-01-01').date()) assert len( HarvestScheduler(source_config).all( cutoff=pendulum.parse('2018-01-01').date())) == 365
def test_obsolete(self): source_config = factories.SourceConfigFactory() hlv1 = factories.HarvestJobFactory( harvester_version=source_config.harvester.version, source_config=source_config, start_date=pendulum.parse('2017-01-01').date(), ) old_version = source_config.harvester.get_class().VERSION source_config.harvester.get_class().VERSION += 1 new_version = source_config.harvester.get_class().VERSION hlv2 = factories.HarvestJobFactory( harvester_version=source_config.harvester.version, source_config=source_config, start_date=pendulum.parse('2017-01-01').date(), ) tasks.harvest(job_id=hlv2.id) tasks.harvest(job_id=hlv1.id) hlv1.refresh_from_db() hlv2.refresh_from_db() assert hlv2.status == HarvestJob.STATUS.succeeded assert hlv2.harvester_version == new_version assert hlv1.status == HarvestJob.STATUS.skipped assert hlv1.harvester_version == old_version assert hlv1.error_context == HarvestJob.SkipReasons.obsolete.value
def test_resumption_tokens(self, monkeypatch): harvester = OAIHarvester(factories.SourceConfigFactory(harvester_kwargs={'metadata_prefix': 'oai_dc'})) monkeypatch.setattr(harvester, 'fetch_page', mock.Mock(side_effect=( ([self.OAI_DC_RECORD], 'token'), ([self.OAI_DC_RECORD], None), ))) assert len(list(harvester.fetch_records(''))) == 2
def test_duplicate_resumption_tokens(self, monkeypatch): harvester = OAIHarvester(factories.SourceConfigFactory(harvester_kwargs={'metadata_prefix': 'oai_dc'})) monkeypatch.setattr(harvester, 'fetch_page', mock.Mock(return_value=([self.OAI_DC_RECORD], 'token'))) records = [] with pytest.raises(ValueError) as e: for x in harvester.fetch_records(''): records.append(x) assert len(records) == 1 assert e.value.args == ('Found duplicate resumption token "token" from {!r}'.format(harvester), )
def test_overrides(self, source_config_kwargs, task_kwargs, lock_config): source_config = factories.SourceConfigFactory(**source_config_kwargs) job = factories.HarvestJobFactory(source_config=source_config) if lock_config: t = SyncedThread(source_config.acquire_lock) t.start() try: tasks.harvest(job_id=job.id, **task_kwargs) finally: if lock_config: t.join()
def test_failure_cases(self, source_config_kwargs, task_kwargs, lock_config, exception): source_config = factories.SourceConfigFactory(**source_config_kwargs) job = factories.HarvestJobFactory(source_config=source_config) if lock_config: t = SyncedThread(source_config.acquire_lock) t.start() try: with pytest.raises(exception): tasks.harvest(job_id=job.id, **task_kwargs) finally: if lock_config: t.join()
def test_latest_date(self): source_config = factories.SourceConfigFactory( full_harvest=True, earliest_date=pendulum.parse('2017-01-01').date() ) # We have a harvest job with start_date equal to earliest_date # but a different source_config factories.HarvestJobFactory( start_date=pendulum.parse('2017-01-01').date(), end_date=pendulum.parse('2017-01-02').date(), ) assert len(HarvestScheduler(source_config).all(cutoff=pendulum.parse('2018-01-01').date())) == 365
def test_caught_up(self): source_config = factories.SourceConfigFactory( full_harvest=True, earliest_date=pendulum.parse('2017-01-01').date() ) factories.HarvestJobFactory( source_config=source_config, start_date=pendulum.parse('2017-01-01').date(), end_date=pendulum.parse('2017-01-02').date(), ) factories.HarvestJobFactory( source_config=source_config, start_date=pendulum.parse('2018-01-01').date(), end_date=pendulum.parse('2018-01-02').date(), ) assert len(HarvestScheduler(source_config).all(cutoff=pendulum.parse('2018-01-01').date())) == 0
def make_source_config(context, label, name=None, interval=None, time=None): kwargs = {'label': label} if name is None: kwargs['source'] = factories.SourceFactory() else: kwargs['source'] = models.Source.objects.get(name=name) if interval is not None: kwargs['harvest_interval'] = { 'daily': '1 day', 'weekly': '1 week', 'fortnightly': '2 weeks', 'yearly': '1 year', 'monthly': '1 month', }[interval] if time is not None: kwargs['harvest_after'] = time factories.SourceConfigFactory(**kwargs)
def source_config(): return factories.SourceConfigFactory()
def test_ignores_deleted(self): sc = factories.SourceConfigFactory(source__is_deleted=True) assert list(sc.get_harvester().harvest(ignore_disabled=True)) == []
def test_deleted_source(self): sc = factories.SourceConfigFactory(source__is_deleted=True) with pytest.raises(HarvesterDisabledError): list(sc.get_harvester().harvest())
def source_config(self, request): config_disabled, source_deleted = request.param return factories.SourceConfigFactory(disabled=config_disabled, source__is_deleted=source_deleted)