def run_test_harvester(source_id_or_name, force_import): from ckanext.harvest import queue from ckanext.harvest.tests import lib from ckanext.harvest.logic import HarvestJobExists from ckanext.harvest.model import HarvestJob context = { "model": model, "session": model.Session, "user": _admin_user()["name"], } source = tk.get_action("harvest_source_show")(context, { "id": source_id_or_name }) # Determine the job try: job_dict = tk.get_action("harvest_job_create")( context, { "source_id": source["id"] }) except HarvestJobExists: running_jobs = tk.get_action("harvest_job_list")( context, { "source_id": source["id"], "status": "Running" }) if running_jobs: print('\nSource "{0}" apparently has a "Running" job:\n{1}'.format( source.get("name") or source["id"], running_jobs)) if six.PY2: resp = raw_input("Abort it? (y/n)") else: resp = input("Abort it? (y/n)") if not resp.lower().startswith("y"): sys.exit(1) job_dict = tk.get_action("harvest_job_abort")( context, { "source_id": source["id"] }) else: print("Reusing existing harvest job") jobs = tk.get_action("harvest_job_list")(context, { "source_id": source["id"], "status": "New" }) assert (len(jobs) == 1 ), 'Multiple "New" jobs for this source! {0}'.format(jobs) job_dict = jobs[0] job_obj = HarvestJob.get(job_dict["id"]) if force_import: job_obj.force_import = force_import harvester = queue.get_harvester(source["source_type"]) assert harvester, "No harvester found for type: {0}".format( source["source_type"]) lib.run_harvest_job(job_obj, harvester)
def run_test_harvest(self): from ckanext.harvest import queue from ckanext.harvest.tests import lib from ckanext.harvest.logic import HarvestJobExists from ckanext.harvest.model import HarvestJob # Determine the source if len(self.args) >= 2: source_id_or_name = unicode(self.args[1]) else: print 'Please provide a source id' sys.exit(1) context = { 'model': model, 'session': model.Session, 'user': self.admin_user['name'] } source = get_action('harvest_source_show')(context, { 'id': source_id_or_name }) # Determine the job try: job_dict = get_action('harvest_job_create')( context, { 'source_id': source['id'] }) except HarvestJobExists: running_jobs = get_action('harvest_job_list')( context, { 'source_id': source['id'], 'status': 'Running' }) if running_jobs: print '\nSource "%s" apparently has a "Running" job:\n%r' \ % (source.get('name') or source['id'], running_jobs) resp = raw_input('Abort it? (y/n)') if not resp.lower().startswith('y'): sys.exit(1) job_dict = get_action('harvest_job_abort')( context, { 'source_id': source['id'] }) else: print 'Reusing existing harvest job' jobs = get_action('harvest_job_list')(context, { 'source_id': source['id'], 'status': 'New' }) assert len(jobs) == 1, \ 'Multiple "New" jobs for this source! %r' % jobs job_dict = jobs[0] job_obj = HarvestJob.get(job_dict['id']) harvester = queue.get_harvester(source['source_type']) assert harvester, \ 'No harvester found for type: %s' % source['source_type'] lib.run_harvest_job(job_obj, harvester)
def run_test_harvest(self): from ckanext.harvest import queue from ckanext.harvest.tests import lib from ckanext.harvest.logic import HarvestJobExists from ckanext.harvest.model import HarvestJob # Determine the source if len(self.args) >= 2: source_id_or_name = unicode(self.args[1]) else: print 'Please provide a source id' sys.exit(1) context = {'model': model, 'session': model.Session, 'user': self.admin_user['name']} source = get_action('harvest_source_show')( context, {'id': source_id_or_name}) # Determine the job try: job_dict = get_action('harvest_job_create')( context, {'source_id': source['id']}) except HarvestJobExists: running_jobs = get_action('harvest_job_list')( context, {'source_id': source['id'], 'status': 'Running'}) if running_jobs: print '\nSource "%s" apparently has a "Running" job:\n%r' \ % (source.get('name') or source['id'], running_jobs) resp = raw_input('Abort it? (y/n)') if not resp.lower().startswith('y'): sys.exit(1) job_dict = get_action('harvest_job_abort')( context, {'source_id': source['id']}) else: print 'Reusing existing harvest job' jobs = get_action('harvest_job_list')( context, {'source_id': source['id'], 'status': 'New'}) assert len(jobs) == 1, \ 'Multiple "New" jobs for this source! %r' % jobs job_dict = jobs[0] job_obj = HarvestJob.get(job_dict['id']) harvester = queue.get_harvester(source['source_type']) assert harvester, \ 'No harvester found for type: %s' % source['source_type'] lib.run_harvest_job(job_obj, harvester)
def run_test_harvest(self): from ckanext.harvest import queue from ckanext.harvest.tests import lib from ckanext.harvest.logic import HarvestJobExists from ckanext.harvest.model import HarvestJob # Determine the source if len(self.args) >= 2: source_id_or_name = unicode(self.args[1]) else: print "Please provide a source id" sys.exit(1) context = {"model": model, "session": model.Session, "user": self.admin_user["name"]} source = get_action("harvest_source_show")(context, {"id": source_id_or_name}) # Determine the job try: job_dict = get_action("harvest_job_create")(context, {"source_id": source["id"]}) except HarvestJobExists: running_jobs = get_action("harvest_job_list")(context, {"source_id": source["id"], "status": "Running"}) if running_jobs: print '\nSource "%s" apparently has a "Running" job:\n%r' % ( source.get("name") or source["id"], running_jobs, ) resp = raw_input("Abort it? (y/n)") if not resp.lower().startswith("y"): sys.exit(1) job_dict = get_action("harvest_job_abort")(context, {"source_id": source["id"]}) else: print "Reusing existing harvest job" jobs = get_action("harvest_job_list")(context, {"source_id": source["id"], "status": "New"}) assert len(jobs) == 1, 'Multiple "New" jobs for this source! %r' % jobs job_dict = jobs[0] job_obj = HarvestJob.get(job_dict["id"]) harvester = queue.get_harvester(source["source_type"]) assert harvester, "No harvester found for type: %s" % source["source_type"] lib.run_harvest_job(job_obj, harvester)
def test_harvester(test_config, expected_count): """ Test the harvester by running it for real with mocked requests. We need to convert some blocks to helper functions or fixtures, but this is an easy way to verify that a harvester does what it's supposed to over the course of one or more runs, and we should build on it for future tests. """ helpers.reset_db() context = {} context.setdefault('user', 'test_user') context.setdefault('ignore_auth', True) context['model'] = model context['session'] = model.Session user = {} user['name'] = 'test_user' user['email'] = '*****@*****.**' user['password'] = '******' helpers.call_action('user_create', context, **user) org = {'name': 'gome2_test_org', 'url': 'http://example.com/gome2'} owner_org = helpers.call_action('organization_create', context, **org) config = json.dumps(test_config) source = { 'url': 'http://example.com/gome2_test_harvester', 'name': 'gome2_test_harvester', 'owner_org': owner_org['id'], 'source_type': 'gome2', 'config': config } harvest_source_create(context, source) source = harvest_source_show(context, {'id': source['name']}) job_dict = get_action('harvest_job_create')(context, { 'source_id': source['id'] }) job_obj = HarvestJob.get(job_dict['id']) harvester = queue.get_harvester(source['source_type']) with requests_mock.Mocker(real_http=True) as m: m.register_uri( 'GET', 'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-01&wpid=GOME2_O3', # noqa: E501 text=o3_response) m.register_uri( 'GET', 'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-02&wpid=GOME2_O3', # noqa: E501 text=o3_response) m.register_uri( 'GET', 'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-03&wpid=GOME2_O3', # noqa: E501 text=o3_response) m.register_uri( 'GET', 'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-01&wpid=GOME2_NO2', # noqa: E501 text=no2_response) m.register_uri( 'GET', 'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-02&wpid=GOME2_NO2', # noqa: E501 text=no2_response) m.register_uri( 'GET', 'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-03&wpid=GOME2_NO2', # noqa: E501 text=no2_response) m.register_uri( 'GET', 'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-01&wpid=GOME2_TropNO2', # noqa: E501 text=tropno2_response) m.register_uri( 'GET', 'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-02&wpid=GOME2_TropNO2', # noqa: E501 text=tropno2_response) m.register_uri( 'GET', 'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-03&wpid=GOME2_TropNO2', # noqa: E501 text=tropno2_response) m.register_uri( 'GET', 'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-01&wpid=GOME2_SO2', # noqa: E501 text=so2_response) m.register_uri( 'GET', 'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-02&wpid=GOME2_SO2', # noqa: E501 text=so2_response) m.register_uri( 'GET', 'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-03&wpid=GOME2_SO2', # noqa: E501 text=so2_response) m.register_uri( 'GET', 'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-01&wpid=GOME2_SO2mass', # noqa: E501 text=so2mass_response) m.register_uri( 'GET', 'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-02&wpid=GOME2_SO2mass', # noqa: E501 text=so2mass_response) m.register_uri( 'GET', 'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-03&wpid=GOME2_SO2mass', # noqa: E501 text=so2mass_response) lib.run_harvest_job(job_obj, harvester) source = harvest_source_show(context, {'id': source['name']}) assert source['status']['last_job']['status'] == 'Finished' assert source['status']['last_job']['stats']['added'] == expected_count # Re-run the harvester without forcing updates job_dict = get_action('harvest_job_create')(context, { 'source_id': source['id'] }) job_obj = HarvestJob.get(job_dict['id']) harvester = queue.get_harvester(source['source_type']) with requests_mock.Mocker(real_http=True) as m: m.register_uri( 'GET', 'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-01&wpid=GOME2_O3', # noqa: E501 text=o3_response) m.register_uri( 'GET', 'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-02&wpid=GOME2_O3', # noqa: E501 text=o3_response) m.register_uri( 'GET', 'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-03&wpid=GOME2_O3', # noqa: E501 text=o3_response) m.register_uri( 'GET', 'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-01&wpid=GOME2_NO2', # noqa: E501 text=no2_response) m.register_uri( 'GET', 'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-02&wpid=GOME2_NO2', # noqa: E501 text=no2_response) m.register_uri( 'GET', 'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-03&wpid=GOME2_NO2', # noqa: E501 text=no2_response) m.register_uri( 'GET', 'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-01&wpid=GOME2_TropNO2', # noqa: E501 text=tropno2_response) m.register_uri( 'GET', 'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-02&wpid=GOME2_TropNO2', # noqa: E501 text=tropno2_response) m.register_uri( 'GET', 'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-03&wpid=GOME2_TropNO2', # noqa: E501 text=tropno2_response) m.register_uri( 'GET', 'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-01&wpid=GOME2_SO2', # noqa: E501 text=so2_response) m.register_uri( 'GET', 'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-02&wpid=GOME2_SO2', # noqa: E501 text=so2_response) m.register_uri( 'GET', 'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-03&wpid=GOME2_SO2', # noqa: E501 text=so2_response) m.register_uri( 'GET', 'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-01&wpid=GOME2_SO2mass', # noqa: E501 text=so2mass_response) m.register_uri( 'GET', 'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-02&wpid=GOME2_SO2mass', # noqa: E501 text=so2mass_response) m.register_uri( 'GET', 'https://wdc.dlr.de/data_products/VIEWER/missing_days.php?start_date=2018-01-03&wpid=GOME2_SO2mass', # noqa: E501 text=so2mass_response) lib.run_harvest_job(job_obj, harvester) source = harvest_source_show(context, {'id': source['name']}) assert source['status']['last_job']['status'] == 'Finished' assert source['status']['last_job']['stats']['added'] == 0 assert source['status']['last_job']['stats']['updated'] == 0 # Verify that the org has the expected number of datasets now org_response = helpers.call_action('organization_show', context, **{'id': org['name']}) assert org_response['package_count'] == expected_count
def test_harvester(self): """ Test the harvester by running it for real with mocked requests. We need to convert some blocks to helper functions or fixtures, but this is an easy way to verify that a harvester does what it's supposed to over the course of one or more runs, and we should build on it for future tests. """ helpers.reset_db() context = {} context.setdefault('user', 'test_user') context.setdefault('ignore_auth', True) context['model'] = model context['session'] = model.Session user = {} user['name'] = 'test_user' user['email'] = '*****@*****.**' user['password'] = '******' helpers.call_action('user_create', context, **user) org = {'name': 'test_org', 'url': 'https://www.example.com'} owner_org = helpers.call_action('organization_create', context, **org) config_dict = { 'source': 'esa_scihub', 'update_all': False, 'datasets_per_job': 10, 'timeout': 10, 'skip_raw': False } config = json.dumps(config_dict) source = { 'url': 'http://www.scihub.org', 'name': 'scihub_test_harvester', 'owner_org': owner_org['id'], 'source_type': 'esasentinel', 'config': config } harvest_source_create(context, source) source = harvest_source_show(context, {'id': 'scihub_test_harvester'}) job_dict = get_action('harvest_job_create')(context, { 'source_id': source['id'] }) job_obj = HarvestJob.get(job_dict['id']) harvester = queue.get_harvester(source['source_type']) with requests_mock.Mocker(real_http=True) as m: m.register_uri('GET', '/dhus/search?q', text=self.raw_results) lib.run_harvest_job(job_obj, harvester) source = harvest_source_show(context, {'id': 'scihub_test_harvester'}) assert source['status']['last_job']['status'] == 'Finished' assert source['status']['last_job']['stats']['added'] == 10 # Re-run the harvester job_dict = get_action('harvest_job_create')(context, { 'source_id': source['id'] }) job_obj = HarvestJob.get(job_dict['id']) harvester = queue.get_harvester(source['source_type']) with requests_mock.Mocker(real_http=True) as m: m.register_uri('GET', '/dhus/search?q', text=self.raw_results) lib.run_harvest_job(job_obj, harvester) source = harvest_source_show(context, {'id': 'scihub_test_harvester'}) assert source['status']['last_job']['status'] == 'Finished' assert source['status']['last_job']['stats']['added'] == 0 assert source['status']['last_job']['stats']['updated'] == 0 # Re-run the harvester but force updates config_dict = { 'source': 'esa_scihub', 'update_all': True, 'datasets_per_job': 10, 'timeout': 10, 'skip_raw': False } config = json.dumps(config_dict) source['config'] = config harvest_source_update(context, source) job_dict = get_action('harvest_job_create')(context, { 'source_id': source['id'] }) job_obj = HarvestJob.get(job_dict['id']) harvester = queue.get_harvester(source['source_type']) with requests_mock.Mocker(real_http=True) as m: m.register_uri('GET', '/dhus/search?q', text=self.raw_results) lib.run_harvest_job(job_obj, harvester) source = harvest_source_show(context, {'id': 'scihub_test_harvester'}) assert source['status']['last_job']['status'] == 'Finished' assert source['status']['last_job']['stats']['added'] == 0 assert source['status']['last_job']['stats']['updated'] == 10 # Verify that the org now has 10 datasets now org = helpers.call_action('organization_show', context, **{'id': 'test_org'}) assert org['package_count'] == 10