def test_list_records(self): raw_cs_xml = open(os.path.join( os.path.dirname(__file__), "data/sample_arxiv_response_listrecords_cs.xml" )).read() raw_physics_xml = open(os.path.join( os.path.dirname(__file__), "data/sample_arxiv_response_listrecords_physics.xml" )).read() responses.add( responses.GET, re.compile(r'http?://export.arxiv.org/oai2.*set=cs&.*'), body=raw_cs_xml, content_type='text/xml' ) responses.add( responses.GET, re.compile(r'http?://export.arxiv.org/oai2.*set=physics&.*'), body=raw_physics_xml, content_type='text/xml' ) _, records = list_records( metadata_prefix='arXiv', from_date='2015-01-15', until_date='2015-01-20', url='http://export.arxiv.org/oai2', name=None, setspecs='cs physics' ) # 46 cs + 150 physics - 6 dupes == 190 self.assertTrue(len(records) == 190)
def test_model_based_harvesting_list(self): """Test harvesting using model.""" from invenio_oaiharvester.utils import get_oaiharvest_object source = get_oaiharvest_object('arXiv') last_updated = source.lastrun raw_physics_xml = open(os.path.join( os.path.dirname(__file__), "data/sample_arxiv_response_listrecords_physics.xml" )).read() responses.add( responses.GET, re.compile(r'http?://export.arxiv.org/oai2.*set=physics&.*'), body=raw_physics_xml, content_type='text/xml' ) _, records = list_records(name='arXiv') self.assertTrue(len(records) == 150) self.assertTrue(last_updated < get_oaiharvest_object('arXiv').lastrun)