def test_harvest_abort_failed_jobs_with_source_frequency(self): # prepare data_dict = SOURCE_DICT.copy() source = factories.HarvestSourceObj(**data_dict) job = factories.HarvestJobObj(source=source) # Simulate running job created 4 days ago setattr(job, 'status', 'Running') setattr(job, 'created', datetime.datetime.utcnow() - datetime.timedelta(days=4)) # set source update frequency to biweekly # job will be aborted if it's runs more then 2 weeks setattr(source, 'frequency', 'BIWEEKLY') model.Session.commit() context = { 'model': model, 'session': model.Session, 'ignore_auth': True, 'user': '' } result = get_action('harvest_abort_failed_jobs')(context, { 'life_span': 3 }) job = harvest_model.HarvestJob.get(job.id) assert job.status == 'Running' assert job.source_id == source.id assert 'Aborted jobs: 0' in result
def test_harvest_abort_failed_jobs_with_include_and_exclude(self): # prepare data_dict = SOURCE_DICT.copy() source = factories.HarvestSourceObj(**data_dict) job = factories.HarvestJobObj(source=source) # Simulate running job created 4 days ago setattr(job, 'status', 'Running') setattr(job, 'created', datetime.datetime.utcnow() - datetime.timedelta(days=4)) model.Session.commit() context = { 'model': model, 'session': model.Session, 'ignore_auth': True, 'user': '' } # include must prevaild over exclude result = get_action('harvest_abort_failed_jobs')(context, { 'life_span': 3, 'exclude': source.id, 'include': source.id }) job = harvest_model.HarvestJob.get(job.id) assert job.status in ('Finished', 'Aborted') assert job.source_id == source.id assert 'Aborted jobs: 1' in result
def test_harvest_sources_job_history_clear(self): # prepare data_dict = SOURCE_DICT.copy() source_1 = factories.HarvestSourceObj(**data_dict) data_dict['name'] = 'another-source' data_dict['url'] = 'http://another-url' source_2 = factories.HarvestSourceObj(**data_dict) job_1 = factories.HarvestJobObj(source=source_1) dataset_1 = ckan_factories.Dataset() object_1_ = factories.HarvestObjectObj(job=job_1, source=source_1, package_id=dataset_1['id']) job_2 = factories.HarvestJobObj(source=source_2) dataset_2 = ckan_factories.Dataset() object_2_ = factories.HarvestObjectObj(job=job_2, source=source_2, package_id=dataset_2['id']) # execute context = {'session': model.Session, 'ignore_auth': True, 'user': ''} result = get_action('harvest_sources_job_history_clear')(context, {}) # verify assert sorted(result, key=lambda item: item['id']) == sorted( [{ 'id': source_1.id }, { 'id': source_2.id }], key=lambda item: item['id']) source_1 = harvest_model.HarvestSource.get(source_1.id) assert source_1 assert harvest_model.HarvestJob.get(job_1.id) is None assert harvest_model.HarvestObject.get(object_1_.id) is None dataset_from_db_1 = model.Package.get(dataset_1['id']) assert dataset_from_db_1, 'is None' assert dataset_from_db_1.id == dataset_1['id'] source_2 = harvest_model.HarvestSource.get(source_1.id) assert source_2 assert harvest_model.HarvestJob.get(job_2.id) is None assert harvest_model.HarvestObject.get(object_2_.id) is None dataset_from_db_2 = model.Package.get(dataset_2['id']) assert dataset_from_db_2, 'is None' assert dataset_from_db_2.id == dataset_2['id']
def test_harvest_abort_failed_jobs_with_exclude_source(self): # prepare data_dict = SOURCE_DICT.copy() source1 = factories.HarvestSourceObj(**data_dict) job1 = factories.HarvestJobObj(source=source1) data_dict['name'] = 'another-source' data_dict['url'] = 'http://another-url' source2 = factories.HarvestSourceObj(**data_dict) job2 = factories.HarvestJobObj(source=source2) # Simulate running job created 4 and 5 days ago setattr(job1, 'status', 'Running') setattr(job1, 'created', datetime.datetime.utcnow() - datetime.timedelta(days=4)) setattr(job2, 'status', 'Running') setattr(job2, 'created', datetime.datetime.utcnow() - datetime.timedelta(days=5)) model.Session.commit() context = { 'model': model, 'session': model.Session, 'ignore_auth': True, 'user': '' } # exclude first source with failed job so it's must still be Running result = get_action('harvest_abort_failed_jobs')(context, { 'life_span': 3, 'exclude': source1.id }) job1 = harvest_model.HarvestJob.get(job1.id) job2 = harvest_model.HarvestJob.get(job2.id) assert job1.status == 'Running' assert job1.source_id == source1.id assert job2.status in ('Finished', 'Aborted') assert job2.source_id == source2.id assert 'Aborted jobs: 1' in result
def test_harvest_source_clear(self): source = factories.HarvestSourceObj(**SOURCE_DICT.copy()) job = factories.HarvestJobObj(source=source) dataset = ckan_factories.Dataset() object_ = factories.HarvestObjectObj(job=job, source=source, package_id=dataset['id']) context = {'ignore_auth': True, 'user': ''} result = get_action('harvest_source_clear')(context, {'id': source.id}) assert result == {'id': source.id} source = harvest_model.HarvestSource.get(source.id) assert source assert harvest_model.HarvestJob.get(job.id) is None assert harvest_model.HarvestObject.get(object_.id) is None assert model.Package.get(dataset['id']) is None
def test_harvest_abort_failed_jobs_without_failed_jobs(self): # prepare data_dict = SOURCE_DICT.copy() source = factories.HarvestSourceObj(**data_dict) job = factories.HarvestJobObj(source=source) context = { 'model': model, 'session': model.Session, 'ignore_auth': True, 'user': '' } result = get_action('harvest_abort_failed_jobs')(context, { 'life_span': 3 }) job = harvest_model.HarvestJob.get(job.id) assert job.status == 'New' assert job.source_id == source.id assert result == 'There is no jobs to abort'
def test_harvest_abort_failed_jobs_with_unknown_frequency(self): # prepare data_dict = SOURCE_DICT.copy() source = factories.HarvestSourceObj(**data_dict) job = factories.HarvestJobObj(source=source) # Simulate running job created 4 days ago setattr(job, 'status', 'Running') setattr(job, 'created', datetime.datetime.utcnow() - datetime.timedelta(days=4)) # set unknown update frequency setattr(source, 'frequency', 'YEARLY') model.Session.commit() context = { 'model': model, 'session': model.Session, 'ignore_auth': True, 'user': '' } with pytest.raises(Exception): get_action('harvest_abort_failed_jobs')(context, {'life_span': 3})
def get_source(self): SOURCE_DICT = { "url": "http://test.timeout.com", "name": "test-source-timeout", "title": "Test source timeout", "notes": "Notes source timeout", "source_type": "test-for-action", "frequency": "MANUAL" } source = harvest_factories.HarvestSourceObj(**SOURCE_DICT) try: job = harvest_factories.HarvestJobObj(source=source) except HarvestJobExists: # not sure why job = source.get_jobs()[0] job.status = 'Running' job.save() jobs = source.get_jobs(status='Running') assert_in(job, jobs) return source, job
def test_harvest_source_job_history_clear(self): # prepare source = factories.HarvestSourceObj(**SOURCE_DICT.copy()) job = factories.HarvestJobObj(source=source) dataset = ckan_factories.Dataset() object_ = factories.HarvestObjectObj(job=job, source=source, package_id=dataset['id']) # execute context = {'session': model.Session, 'ignore_auth': True, 'user': ''} result = get_action('harvest_source_job_history_clear')( context, {'id': source.id}) # verify assert result == {'id': source.id} source = harvest_model.HarvestSource.get(source.id) assert source assert harvest_model.HarvestJob.get(job.id) is None assert harvest_model.HarvestObject.get(object_.id) is None dataset_from_db = model.Package.get(dataset['id']) assert dataset_from_db, 'is None' assert dataset_from_db.id == dataset['id']