Пример #1
0
 def setup(self):
     super(TestHarvestingJob, self).setup()
     self.source = HarvestSource(
         url=self.gemini_example.url_for(file_index=0)
     )
     self.job = HarvestingJob(
         source=self.source,
         user_ref=self.fixture_user_ref
     )
     self.job.save()
     self.controller = HarvestingJobController(self.job)
     self.job2 = None
     self.source2 = None
Пример #2
0
class TestHarvestingJob(HarvesterTestCase):

    fixture_user_ref = u'publisheruser1'

    def setup(self):
        super(TestHarvestingJob, self).setup()
        self.source = HarvestSource(
            url=self.gemini_example.url_for(file_index=0)
        )
        self.job = HarvestingJob(
            source=self.source,
            user_ref=self.fixture_user_ref
        )
        self.job.save()
        self.controller = HarvestingJobController(self.job)
        self.job2 = None
        self.source2 = None

    def teardown(self):
        if self.job2:
            self.delete(self.job2)
        if self.source2:
            self.delete(self.source2)
        super(TestHarvestingJob, self).teardown()

    def test_create_and_delete_job(self):
        self.assert_equal(self.job.source_id, self.source.id)
        self.delete_commit(self.job)
        self.assert_raises(Exception, HarvestingJob.get, self.job.id)
        # - check source has not been deleted!
        HarvestSource.get(self.source.id)

    def test_harvest_documents(self):
        before_count = self.count_packages()
        job = self.controller.harvest_documents()
        after_count = self.count_packages()
        self.assert_equal(after_count, before_count + 1)
        self.assert_equal(job.source.documents[0].package.id,
                          (job.report['packages'][0]))
        self.assert_true(job.report)
        self.assert_len(job.report['errors'], 0)
        self.assert_len(job.report['packages'], 1)

    def test_harvest_documents_twice_unchanged(self):
        job = self.controller.harvest_documents()
        self.assert_len(job.report['errors'], 0)
        self.assert_len(job.report['packages'], 1)
        job2 = HarvestingJobController(
            HarvestingJob(
                source=self.source,
                user_ref=self.fixture_user_ref
                )
            ).harvest_documents()
        self.assert_len(job2.report['errors'], 0)
        self.assert_len(job2.report['packages'], 0)

    def test_harvest_documents_twice_changed(self):
        job = self.controller.harvest_documents()
        self.assert_len(job.report['errors'], 0)
        self.assert_len(job.report['packages'], 1)
        self.source.url = self.gemini_example.url_for(file_index=2)
        self.source.save()
        job2 = HarvestingJobController(
            HarvestingJob(
                source=self.source,
                user_ref=self.fixture_user_ref
                )
            ).harvest_documents()
        self.assert_len(job2.report['errors'], 0)
        self.assert_len(job2.report['packages'], 1)

    def test_harvest_documents_source_guid_contention(self):
        job = self.controller.harvest_documents()
        source2 = HarvestSource(
            url=self.gemini_example.url_for(file_index=2)
            )
        job2 = HarvestingJobController(
            HarvestingJob(
                source=source2,
                user_ref=self.fixture_user_ref
                )
            ).harvest_documents()
        error = job2.report['errors'][0]
        self.assert_contains(error, "Another source is using metadata GUID")

    def test_harvest_bad_source_url(self):
        source = HarvestSource(
            url=self.gemini_example.url_for_bad(0)
            )
        job = HarvestingJob(
            source=source,
            user_ref=self.fixture_user_ref
            )
        before_count = self.count_packages()
        self.assert_false(job.report)
        job = HarvestingJobController(job).harvest_documents()
        after_count = self.count_packages()
        self.assert_equal(after_count, before_count)
        self.assert_len(job.report['packages'], 0)
        self.assert_len(job.report['errors'], 1)
        error = job.report['errors'][0]
        self.assert_contains(error,
                             'Unable to detect source type from content')