def get_expected_fixture(response_file_name): expected_record = expected_json_results_from_file( 'responses/desy', response_file_name, test_suite='unit', ) return expected_record
yield { 'CRAWLER_HOST_URL': 'http://*****:*****@pytest.mark.parametrize('expected_results', [ expected_json_results_from_file( 'cds', 'fixtures', 'cds_smoke_records_expected.json', ), ], ids=[ 'smoke', ]) def test_cds(set_up_local_environment, expected_results): crawler = get_crawler_instance( set_up_local_environment.get('CRAWLER_HOST_URL')) crawl_results = CeleryMonitor.do_crawl( app=celery_app, monitor_timeout=5, monitor_iter_limit=100, events_limit=1,
def get_configuration_single(): return { 'CRAWLER_HOST_URL': 'http://*****:*****@pytest.mark.parametrize('expected_results, config, spider', [ ( expected_json_results_from_file( 'arxiv', 'fixtures', 'arxiv_expected.json', ), get_configuration(), 'arXiv', ), ( expected_json_results_from_file( 'arxiv', 'fixtures', 'arxiv_expected_single.json', ), get_configuration_single(), 'arXiv_single', ), ],
def cleanup(): # The test must wait until the docker environment is up (takes about 10 # seconds). sleep(10) yield clean_dir(path=os.path.join(os.getcwd(), '.scrapy')) clean_dir('/tmp/file_urls') clean_dir('/tmp/DESY') @pytest.mark.parametrize('expected_results, settings', [ ( expected_json_results_from_file( 'desy', 'fixtures', 'desy_records_ftp_expected.json', ), get_ftp_settings(), ), ( expected_json_results_from_file( 'desy', 'fixtures', 'desy_records_local_expected.json', ), get_local_settings(), ), ], ids=[ 'ftp package',
clean_dir() clean_dir(path=os.path.join(os.getcwd(), '.scrapy')) _, dirs, files = next(os.walk(package_location)) for dir_name in dirs: clean_dir(os.path.join(package_location, dir_name)) for file_name in files: if not file_name.endswith('.zip'): os.unlink(os.path.join(package_location, file_name)) @pytest.mark.parametrize('expected_results, settings', [ ( expected_json_results_from_file( 'wsp', 'fixtures', 'wsp_smoke_records.json', ), get_ftp_settings(), ), ( expected_json_results_from_file( 'wsp', 'fixtures', 'wsp_smoke_records.json', ), get_local_settings(), ), ], ids=[ 'ftp',
'CRAWLER_HOST_URL': 'http://scrapyd:6800', 'CRAWLER_PROJECT': 'hepcrawl', 'CRAWLER_ARGUMENTS': { 'identifier': 'oai:arXiv.org:1401.2122', 'url': 'http://arxiv-http-server.local/oai2', } } @pytest.mark.parametrize( 'expected_results, config, spider', [ ( expected_json_results_from_file( 'arxiv', 'fixtures', 'arxiv_expected.json', ), get_configuration(), 'arXiv', ), ( expected_json_results_from_file( 'arxiv', 'fixtures', 'arxiv_expected_single.json', ), get_configuration_single(), 'arXiv_single', ), ],
def get_configuration_single(): return { 'CRAWLER_HOST_URL': 'http://*****:*****@pytest.mark.parametrize('expected_results, config, spider', [ ( expected_json_results_from_file( 'cds', 'fixtures', 'cds_expected.json', ), get_configuration(), 'CDS', ), ( expected_json_results_from_file( 'cds', 'fixtures', 'cds_single_expected.json', ), get_configuration_single(), 'CDS_single', ), ],
_, dirs, files = next(os.walk(package_location)) for dir_name in dirs: clean_dir(os.path.join(package_location, dir_name)) for file_name in files: if not file_name.endswith('.zip'): os.unlink(os.path.join(package_location, file_name)) @pytest.mark.parametrize( 'expected_results, settings', [ ( expected_json_results_from_file( 'wsp', 'fixtures', 'wsp_smoke_records.json', ), get_ftp_settings(), ), ( expected_json_results_from_file( 'wsp', 'fixtures', 'wsp_smoke_records.json', ), get_local_settings(), ), ], ids=[ 'ftp',
# seconds). sleep(10) yield clean_dir(path=os.path.join(os.getcwd(), '.scrapy')) clean_dir('/tmp/file_urls') clean_dir('/tmp/DESY') @pytest.mark.parametrize( 'expected_results, settings', [ ( expected_json_results_from_file( 'desy', 'fixtures', 'desy_records_ftp_expected.json', ), get_ftp_settings(), ), ( expected_json_results_from_file( 'desy', 'fixtures', 'desy_records_local_expected.json', ), get_local_settings(), ), ], ids=[
yield { 'CRAWLER_HOST_URL': 'http://*****:*****@pytest.mark.parametrize('expected_results', [ expected_json_results_from_file( 'arxiv', 'fixtures', 'arxiv_smoke_record.json', ), ], ids=[ 'smoke', ]) def test_arxiv(set_up_local_environment, expected_results): crawler = get_crawler_instance( set_up_local_environment.get('CRAWLER_HOST_URL')) results = CeleryMonitor.do_crawl( app=celery_app, monitor_timeout=5, monitor_iter_limit=100, events_limit=1,
'https://http-server.local/contribution?id=' ), 'base_proceedings_url': ( 'https://http-server.local/cgi-bin/reader/conf.cgi?confid=' ), } } @pytest.mark.parametrize( 'expected_results, config', [ ( expected_json_results_from_file( 'pos', 'fixtures', 'pos_conference_proceedings_records.json', ), get_configuration(), ), ], ids=[ 'smoke', ] ) def test_pos_conference_paper_record_and_proceedings_record( expected_results, config, ): crawler = get_crawler_instance(config['CRAWLER_HOST_URL'])
'CRAWLER_HOST_URL': 'http://scrapyd:6800', 'CRAWLER_PROJECT': 'hepcrawl', 'CRAWLER_ARGUMENTS': { 'source_file': 'file://' + package_location, } } clean_dir() @pytest.mark.parametrize( 'expected_results', [ expected_json_results_from_file( 'cds', 'fixtures', 'cds_smoke_records_expected.json', ), ], ids=[ 'smoke', ] ) def test_cds(set_up_local_environment, expected_results): crawler = get_crawler_instance( set_up_local_environment.get('CRAWLER_HOST_URL') ) crawl_results = CeleryMonitor.do_crawl( app=celery_app, monitor_timeout=5,