def test_raise_missing_info(app): """Check that the proper exception is raised if name or url is missing.""" from invenio_oaiharvester.errors import NameOrUrlMissing with app.app_context(): with pytest.raises(NameOrUrlMissing): list_records() with pytest.raises(NameOrUrlMissing): get_records([])
def test_raise_wrong_date(app): """Check harvesting of records from multiple setspecs.""" with app.app_context(): with pytest.raises(WrongDateCombination): list_records(metadata_prefix='arXiv', from_date='2015-01-18', until_date='2015-01-17', url='http://export.arxiv.org/oai2', name=None, setspecs='physics:hep-lat')
def test_raise_wrong_date(app): """Check harvesting of records from multiple setspecs.""" with app.app_context(): with pytest.raises(WrongDateCombination): list_records( metadata_prefix='arXiv', from_date='2015-01-18', until_date='2015-01-17', url='http://export.arxiv.org/oai2', name=None, setspecs='physics:hep-lat' )
def test_list_records(app, sample_list_xml, sample_list_xml_cs): """Check harvesting of records from multiple setspecs.""" responses.add( responses.GET, re.compile(r'http?://export.arxiv.org/oai2.*set=cs.*'), body=sample_list_xml_cs, content_type='text/xml' ) responses.add( responses.GET, re.compile(r'http?://export.arxiv.org/oai2.*set=physics.*'), body=sample_list_xml, content_type='text/xml' ) with app.app_context(): _, records = list_records( metadata_prefix='arXiv', from_date='2015-01-15', until_date='2015-01-20', url='http://export.arxiv.org/oai2', name=None, setspecs='cs physics' ) # 46 cs + 150 physics - 6 dupes == 190 assert len(records) == 190
def test_model_based_harvesting_list(app, sample_config, sample_list_xml): """Test harvesting using model.""" from invenio_oaiharvester.utils import get_oaiharvest_object responses.add(responses.GET, re.compile(r'http?://export.arxiv.org/oai2.*set=physics.*'), body=sample_list_xml, content_type='text/xml') with app.app_context(): source = get_oaiharvest_object(sample_config) last_updated = source.lastrun time.sleep(0.1) # to allow for date checking to work better _, records = list_records(name=sample_config) assert len(records) == 150 assert last_updated < get_oaiharvest_object(sample_config).lastrun
def test_list_no_records(app, sample_empty_set): """Check harvesting of records from multiple setspecs.""" responses.add(responses.GET, re.compile(r'http?://export.arxiv.org/oai2.*set=physics.*'), body=sample_empty_set, content_type='text/xml') with app.app_context(): _, records = list_records(metadata_prefix='arXiv', from_date='2015-01-17', until_date='2015-01-17', url='http://export.arxiv.org/oai2', name=None, setspecs='physics:hep-lat') assert not records
def test_model_based_harvesting_list(app, sample_config, sample_list_xml): """Test harvesting using model.""" from invenio_oaiharvester.utils import get_oaiharvest_object responses.add( responses.GET, re.compile(r'http?://export.arxiv.org/oai2.*set=physics.*'), body=sample_list_xml, content_type='text/xml' ) with app.app_context(): source = get_oaiharvest_object(sample_config) last_updated = source.lastrun time.sleep(0.1) # to allow for date checking to work better _, records = list_records(name=sample_config) assert len(records) == 150 assert last_updated < get_oaiharvest_object(sample_config).lastrun
def test_list_no_records(app, sample_empty_set): """Check harvesting of records from multiple setspecs.""" responses.add( responses.GET, re.compile(r'http?://export.arxiv.org/oai2.*set=physics.*'), body=sample_empty_set, content_type='text/xml' ) with app.app_context(): _, records = list_records( metadata_prefix='arXiv', from_date='2015-01-17', until_date='2015-01-17', url='http://export.arxiv.org/oai2', name=None, setspecs='physics:hep-lat' ) assert not records