def test_last_error_free_returns_correct_job(self): '''Test that, after a successful job A, last_error_free() returns A.''' source, job = self._create_source_and_job() object_ids = gather_stage(FisbrokerPlugin(), job) for object_id in object_ids: harvest_object = HarvestObject.get(object_id) fetch_and_import_stages(FisbrokerPlugin(), harvest_object) job.status = u'Finished' job.save() new_job = self._create_job(source.id) last_error_free_job = FisbrokerPlugin().last_error_free_job(new_job) _assert_equal(last_error_free_job, job) # the import_since date should be the time job_a finished: FisbrokerPlugin().source_config['import_since'] = "last_error_free" import_since = FisbrokerPlugin().get_import_since_date(new_job) import_since_expected = (job.gather_started + timedelta(hours=FisbrokerPlugin().get_timedelta())) _assert_equal(import_since, import_since_expected.strftime("%Y-%m-%dT%H:%M:%S%z")) # the query constraints should reflect the import_since date: constraint = FisbrokerPlugin().get_constraints(new_job)[0] _assert_equal(constraint.literal, PropertyIsGreaterThanOrEqualTo( 'modified', import_since).literal) _assert_equal(constraint.propertyname, PropertyIsGreaterThanOrEqualTo( 'modified', import_since).propertyname)
def run_harvest_job(job, harvester): # In 'harvest_job_create' it would call 'harvest_send_job_to_gather_queue' # which would do 2 things to 'run' the job: # 1. change the job status to Running job.status = 'Running' job.save() # 2. put the job on the gather queue which is consumed by # queue.gather_callback, which determines the harvester and then calls # gather_stage. We simply call the gather_stage. obj_ids = queue.gather_stage(harvester, job) # The object ids are put onto the fetch queue, consumed by # queue.fetch_callback which calls queue.fetch_and_import_stages results_by_guid = {} for obj_id in obj_ids: harvest_object = harvest_model.HarvestObject.get(obj_id) guid = harvest_object.guid results_by_guid[guid] = {'obj_id': obj_id} queue.fetch_and_import_stages(harvester, harvest_object) results_by_guid[guid]['state'] = harvest_object.state results_by_guid[guid]['report_status'] = harvest_object.report_status if harvest_object.state == 'COMPLETE' and harvest_object.package_id: results_by_guid[guid]['dataset'] = \ toolkit.get_action('package_show')( {'ignore_auth': True}, dict(id=harvest_object.package_id)) results_by_guid[guid]['errors'] = harvest_object.errors # Do 'harvest_jobs_run' to change the job status to 'finished' toolkit.get_action('harvest_jobs_run')({'ignore_auth': True}, {}) return results_by_guid
def run_harvest_job(job, harvester): # When 'paster harvest run' is called by the regular cron it does 2 things: # 1. change the job status to Running job.status = "Running" job.save() # 2. put the job on the gather queue which is consumed by # queue.gather_callback, which determines the harvester and then calls # gather_stage. We simply call the gather_stage. obj_ids = queue.gather_stage(harvester, job) # The object ids are put onto the fetch queue, consumed by # queue.fetch_callback which calls queue.fetch_and_import_stages results_by_guid = {} for obj_id in obj_ids: harvest_object = harvest_model.HarvestObject.get(obj_id) guid = harvest_object.guid results_by_guid[guid] = {"obj_id": obj_id} queue.fetch_and_import_stages(harvester, harvest_object) results_by_guid[guid]["state"] = harvest_object.state results_by_guid[guid]["report_status"] = harvest_object.report_status if harvest_object.state == "COMPLETE" and harvest_object.package_id: results_by_guid[guid]["dataset"] = toolkit.get_action("package_show")( {}, dict(id=harvest_object.package_id) ) results_by_guid[guid]["errors"] = harvest_object.errors # Do 'harvest_jobs_run' to change the job status to 'finished' try: toolkit.get_action("harvest_jobs_run")({"ignore_auth": True}, {}) except NoNewHarvestJobError: # This is expected pass return results_by_guid
def test_last_error_free_does_not_return_reimport_job(self): '''Test that reimport jobs are ignored for determining the last error-free job.''' # do a successful job source, job_a = self._create_source_and_job() object_ids = gather_stage(FisbrokerPlugin(), job_a) for object_id in object_ids: harvest_object = HarvestObject.get(object_id) fetch_and_import_stages(FisbrokerPlugin(), harvest_object) job_a.status = u'Finished' job_a.save() LOG.debug("successful job done ...") # do an unsuccessful job # This harvest job should fail, because the mock FIS-broker will look for a different # file on the second harvest run, will not find it and return a "no_record_found" # error. job_b = self._create_job(source.id) object_ids = gather_stage(FisbrokerPlugin(), job_b) for object_id in object_ids: harvest_object = HarvestObject.get(object_id) fetch_and_import_stages(FisbrokerPlugin(), harvest_object) job_b.status = u'Finished' job_b.save() LOG.debug("unsuccessful job done ...") # reset the mock server's counter reset_mock_server(1) # do a reimport job package_id = "3d-gebaudemodelle-im-level-of-detail-2-lod-2-wms-f2a8a483" self._get_test_app().get( url="/api/harvest/reimport?id={}".format(package_id), headers={'Accept': 'application/json'}, extra_environ={'REMOTE_USER': self.context['user'].encode('ascii')} ) LOG.debug("reimport job done ...") new_job = self._create_job(source.id) last_error_free_job = FisbrokerPlugin().last_error_free_job(new_job) # job_a should be the last error free job: _assert_equal(last_error_free_job.id, job_a.id)
def test_last_error_free_does_not_return_unsuccessful_job(self): '''Test that, after a successful job A, followed by an unsuccessful job B, last_error_free() returns A.''' source, job_a = self._create_source_and_job() object_ids = gather_stage(FisbrokerPlugin(), job_a) for object_id in object_ids: harvest_object = HarvestObject.get(object_id) fetch_and_import_stages(FisbrokerPlugin(), harvest_object) job_a.status = u'Finished' job_a.save() # This harvest job should fail, because the mock FIS-broker will look for a different # file on the second harvest run, will not find it and return a "no_record_found" # error. job_b = self._create_job(source.id) object_ids = gather_stage(FisbrokerPlugin(), job_b) for object_id in object_ids: harvest_object = HarvestObject.get(object_id) fetch_and_import_stages(FisbrokerPlugin(), harvest_object) job_b.status = u'Finished' job_b.save() new_job = self._create_job(source.id) last_error_free_job = FisbrokerPlugin().last_error_free_job(new_job) # job_a should be the last error free job: _assert_equal(last_error_free_job, job_a) # the import_since date should be the time job_a finished: FisbrokerPlugin().source_config['import_since'] = "last_error_free" import_since = FisbrokerPlugin().get_import_since_date(new_job) import_since_expected = (job_a.gather_started + timedelta(hours=FisbrokerPlugin().get_timedelta())) _assert_equal(import_since, import_since_expected.strftime("%Y-%m-%dT%H:%M:%S%z")) # the query constraints should reflect the import_since date: constraint = FisbrokerPlugin().get_constraints(new_job)[0] _assert_equal(constraint.literal, PropertyIsGreaterThanOrEqualTo('modified', import_since).literal) _assert_equal(constraint.propertyname, PropertyIsGreaterThanOrEqualTo( 'modified', import_since).propertyname)
def run_harvest_job(job, harvester): # In 'harvest_job_create' it would call 'harvest_send_job_to_gather_queue' # which would do 2 things to 'run' the job: # 1. change the job status to Running job.status = 'Running' job.save() # 2. put the job on the gather queue which is consumed by # queue.gather_callback, which determines the harvester and then calls # gather_stage. We simply call the gather_stage. obj_ids = queue.gather_stage(harvester, job) if not isinstance(obj_ids, list): # gather had nothing to do or errored. Carry on to ensure the job is # closed properly obj_ids = [] # The object ids are put onto the fetch queue, consumed by # queue.fetch_callback which calls queue.fetch_and_import_stages results_by_guid = {} for obj_id in obj_ids: harvest_object = harvest_model.HarvestObject.get(obj_id) guid = harvest_object.guid # force reimport of datasets if hasattr(job, 'force_import'): if guid in job.force_import: harvest_object.force_import = True else: log.info('Skipping: %s', guid) continue results_by_guid[guid] = {'obj_id': obj_id} queue.fetch_and_import_stages(harvester, harvest_object) results_by_guid[guid]['state'] = harvest_object.state results_by_guid[guid]['report_status'] = harvest_object.report_status if harvest_object.state == 'COMPLETE' and harvest_object.package_id: results_by_guid[guid]['dataset'] = \ toolkit.get_action('package_show')( {'ignore_auth': True}, dict(id=harvest_object.package_id)) results_by_guid[guid]['errors'] = harvest_object.errors # Do 'harvest_jobs_run' to change the job status to 'finished' toolkit.get_action('harvest_jobs_run')({'ignore_auth': True}, {}) return results_by_guid