def setup(self): super(TestHarvestingJob, self).setup() self.source = HarvestSource( url=self.gemini_example.url_for(file_index=0) ) self.job = HarvestingJob( source=self.source, user_ref=self.fixture_user_ref ) self.job.save() self.controller = HarvestingJobController(self.job) self.job2 = None self.source2 = None
class TestHarvestingJob(HarvesterTestCase): fixture_user_ref = u'publisheruser1' def setup(self): super(TestHarvestingJob, self).setup() self.source = HarvestSource( url=self.gemini_example.url_for(file_index=0) ) self.job = HarvestingJob( source=self.source, user_ref=self.fixture_user_ref ) self.job.save() self.controller = HarvestingJobController(self.job) self.job2 = None self.source2 = None def teardown(self): if self.job2: self.delete(self.job2) if self.source2: self.delete(self.source2) super(TestHarvestingJob, self).teardown() def test_create_and_delete_job(self): self.assert_equal(self.job.source_id, self.source.id) self.delete_commit(self.job) self.assert_raises(Exception, HarvestingJob.get, self.job.id) # - check source has not been deleted! HarvestSource.get(self.source.id) def test_harvest_documents(self): before_count = self.count_packages() job = self.controller.harvest_documents() after_count = self.count_packages() self.assert_equal(after_count, before_count + 1) self.assert_equal(job.source.documents[0].package.id, (job.report['packages'][0])) self.assert_true(job.report) self.assert_len(job.report['errors'], 0) self.assert_len(job.report['packages'], 1) def test_harvest_documents_twice_unchanged(self): job = self.controller.harvest_documents() self.assert_len(job.report['errors'], 0) self.assert_len(job.report['packages'], 1) job2 = HarvestingJobController( HarvestingJob( source=self.source, user_ref=self.fixture_user_ref ) ).harvest_documents() self.assert_len(job2.report['errors'], 0) self.assert_len(job2.report['packages'], 0) def test_harvest_documents_twice_changed(self): job = self.controller.harvest_documents() self.assert_len(job.report['errors'], 0) self.assert_len(job.report['packages'], 1) self.source.url = self.gemini_example.url_for(file_index=2) self.source.save() job2 = HarvestingJobController( HarvestingJob( source=self.source, user_ref=self.fixture_user_ref ) ).harvest_documents() self.assert_len(job2.report['errors'], 0) self.assert_len(job2.report['packages'], 1) def test_harvest_documents_source_guid_contention(self): job = self.controller.harvest_documents() source2 = HarvestSource( url=self.gemini_example.url_for(file_index=2) ) job2 = HarvestingJobController( HarvestingJob( source=source2, user_ref=self.fixture_user_ref ) ).harvest_documents() error = job2.report['errors'][0] self.assert_contains(error, "Another source is using metadata GUID") def test_harvest_bad_source_url(self): source = HarvestSource( url=self.gemini_example.url_for_bad(0) ) job = HarvestingJob( source=source, user_ref=self.fixture_user_ref ) before_count = self.count_packages() self.assert_false(job.report) job = HarvestingJobController(job).harvest_documents() after_count = self.count_packages() self.assert_equal(after_count, before_count) self.assert_len(job.report['packages'], 0) self.assert_len(job.report['errors'], 1) error = job.report['errors'][0] self.assert_contains(error, 'Unable to detect source type from content')