def testLoggingMoreThan1000(self): httpretty.register_uri( httpretty.GET, "https://registry.cdlib.org/api/v1/collection/198/", body=open(DIR_FIXTURES + '/collection_api_big_test.json').read()) httpretty.register_uri(httpretty.GET, re.compile("http://content.cdlib.org/oai?.*"), body=open(DIR_FIXTURES + '/testOAI-2400-records.xml').read()) collection = Collection( 'https://registry.cdlib.org/api/v1/collection/198/') controller = fetcher.HarvestController('*****@*****.**', collection, config_file=self.config_file, profile_path=self.profile_path) controller.harvest() self.assertEqual(len(self.test_log_handler.records), 13) self.assertEqual(self.test_log_handler.formatted_records[1], '[INFO] HarvestController: 100 records harvested') shutil.rmtree(controller.dir_save) self.assertEqual(self.test_log_handler.formatted_records[10], '[INFO] HarvestController: 1000 records harvested') self.assertEqual(self.test_log_handler.formatted_records[11], '[INFO] HarvestController: 2000 records harvested') self.assertEqual(self.test_log_handler.formatted_records[12], '[INFO] HarvestController: 2400 records harvested')
def testAddRegistryData(self): '''Unittest the _add_registry_data function''' httpretty.register_uri( httpretty.GET, "https://registry.cdlib.org/api/v1/collection/197/", body=open(DIR_FIXTURES + '/collection_api_test.json').read()) httpretty.register_uri(httpretty.GET, re.compile("http://content.cdlib.org/oai?.*"), body=open(DIR_FIXTURES + '/testOAI-128-records.xml').read()) collection = Collection( 'https://registry.cdlib.org/api/v1/collection/197/') self.tearDown_config() # remove ones setup in setUp self.setUp_config(collection) controller = fetcher.HarvestController('*****@*****.**', collection, config_file=self.config_file, profile_path=self.profile_path) obj = {'id': 'fakey', 'otherdata': 'test'} self.assertNotIn('collection', obj) controller._add_registry_data(obj) self.assertIn('collection', obj) self.assertEqual(obj['collection'][0]['@id'], 'https://registry.cdlib.org/api/v1/collection/197/') self.assertNotIn('campus', obj) self.assertIn('campus', obj['collection'][0]) self.assertNotIn('repository', obj) self.assertIn('repository', obj['collection'][0]) # need to test one without campus self.assertEqual(obj['collection'][0]['campus'][0]['@id'], 'https://registry.cdlib.org/api/v1/campus/12/') self.assertEqual(obj['collection'][0]['repository'][0]['@id'], 'https://registry.cdlib.org/api/v1/repository/37/')
def setUp(self): super(HarvestControllerTestCase, self).setUp() httpretty.register_uri( httpretty.GET, "https://registry.cdlib.org/api/v1/collection/197/", body=open(DIR_FIXTURES + '/collection_api_test.json').read()) httpretty.register_uri(httpretty.GET, re.compile("http://content.cdlib.org/oai?.*"), body=open(DIR_FIXTURES + '/testOAI-128-records.xml').read()) self.collection = Collection( 'https://registry.cdlib.org/api/v1/collection/197/') config_file, profile_path = self.setUp_config(self.collection) self.controller_oai = fetcher.HarvestController( '*****@*****.**', self.collection, profile_path=profile_path, config_file=config_file) self.objset_test_doc = json.load( open(DIR_FIXTURES + '/objset_test_doc.json')) class myNow(datetime.datetime): @classmethod def now(cls): return cls(2017, 7, 14, 12, 1) self.old_dt = datetime.datetime datetime.datetime = myNow
def testNuxeoHarvest(self, mock_deepharvest, mock_boto, mock_boto3): '''Test the function of the Nuxeo harvest''' media_json = open(DIR_FIXTURES + '/nuxeo_media_structmap.json').read() mock_boto.return_value.get_bucket.return_value.\ get_key.return_value.\ get_contents_as_string.return_value = media_json httpretty.register_uri( httpretty.GET, 'http://registry.cdlib.org/api/v1/collection/19/', body=open(DIR_FIXTURES + '/collection_api_test_nuxeo.json').read()) mock_deepharvest.return_value.fetch_objects.return_value = json.load( open(DIR_FIXTURES + '/nuxeo_object_list.json')) httpretty.register_uri( httpretty.GET, re.compile('https://example.edu/Nuxeo/site/api/v1/id/.*'), body=open(DIR_FIXTURES + '/nuxeo_doc.json').read()) self.collection = Collection( 'http://registry.cdlib.org/api/v1/collection/19/') with patch( 'ConfigParser.SafeConfigParser', autospec=True) as mock_configparser: config_inst = mock_configparser.return_value config_inst.get.return_value = 'dublincore,ucldc_schema,picture' self.setUp_config(self.collection) self.controller = fetcher.HarvestController( '*****@*****.**', self.collection, config_file=self.config_file, profile_path=self.profile_path) self.assertTrue(hasattr(self.controller, 'harvest')) num = self.controller.harvest() self.assertEqual(num, 5) self.tearDown_config() # verify one record has collection and such filled in fname = os.listdir(self.controller.dir_save)[0] saved_objset = json.load( open(os.path.join(self.controller.dir_save, fname))) saved_obj = saved_objset[0] self.assertEqual(saved_obj['collection'][0]['@id'], u'http://registry.cdlib.org/api/v1/collection/19/') self.assertEqual(saved_obj['collection'][0]['name'], u'Cochems (Edward W.) Photographs') self.assertEqual(saved_obj['collection'][0]['title'], u'Cochems (Edward W.) Photographs') self.assertEqual(saved_obj['collection'][0]['id'], u'19') self.assertEqual(saved_obj['collection'][0]['dcmi_type'], 'I') self.assertEqual(saved_obj['collection'][0]['rights_statement'], 'a sample rights statement') self.assertEqual(saved_obj['collection'][0]['rights_status'], 'PD') self.assertEqual(saved_obj['state'], 'project') self.assertEqual( saved_obj['title'], 'Adeline Cochems having her portrait taken by her father ' 'Edward W, Cochems in Santa Ana, California: Photograph')
def testMARCHarvest(self, mock_boto3): '''Test the function of the MARC harvest''' httpretty.register_uri( httpretty.GET, 'http://registry.cdlib.org/api/v1/collection/', body=open(DIR_FIXTURES + '/collection_api_test_marc.json').read()) self.collection = Collection( 'http://registry.cdlib.org/api/v1/collection/') self.collection.url_harvest = 'file:' + DIR_FIXTURES + '/marc-test' self.setUp_config(self.collection) self.controller = fetcher.HarvestController( '*****@*****.**', self.collection, config_file=self.config_file, profile_path=self.profile_path) self.assertTrue(hasattr(self.controller, 'harvest')) num = self.controller.harvest() self.assertEqual(num, 10) self.tearDown_config()
def testFailsIfNoRecords(self): '''Test that the Controller throws an error if no records come back from fetcher ''' httpretty.register_uri( httpretty.GET, "https://registry.cdlib.org/api/v1/collection/101/", body=open(DIR_FIXTURES + '/collection_api_test.json').read()) httpretty.register_uri(httpretty.GET, re.compile("http://content.cdlib.org/oai?.*"), body=open(DIR_FIXTURES + '/testOAI-no-records.xml').read()) collection = Collection( 'https://registry.cdlib.org/api/v1/collection/101/') controller = fetcher.HarvestController('*****@*****.**', collection, config_file=self.config_file, profile_path=self.profile_path) self.assertRaises(fetcher.NoRecordsFetchedException, controller.harvest)
def testOAIHarvest(self): '''Test the function of the OAI harvest''' httpretty.register_uri( httpretty.GET, 'http://registry.cdlib.org/api/v1/collection/', body=open(DIR_FIXTURES+'/collection_api_test.json').read()) httpretty.register_uri( httpretty.GET, 'http://content.cdlib.org/oai', body=open(DIR_FIXTURES+'/testOAC-url_next-0.xml').read()) self.collection = Collection( 'http://registry.cdlib.org/api/v1/collection/') self.setUp_config(self.collection) self.controller = fetcher.HarvestController( '*****@*****.**', self.collection, config_file=self.config_file, profile_path=self.profile_path) self.assertTrue(hasattr(self.controller, 'harvest')) # TODO: fix why logbook.TestHandler not working for previous logging # self.assertEqual(len(self.test_log_handler.records), 2) self.tearDown_config()
def setUp(self): super(HarvestOAC_JSON_ControllerTestCase, self).setUp() # self.testFile = DIR_FIXTURES+'/collection_api_test_oac.json' httpretty.register_uri( httpretty.GET, "https://registry.cdlib.org/api/v1/collection/178/", body=open(DIR_FIXTURES + '/collection_api_test_oac.json').read()) httpretty.register_uri( httpretty.GET, 'http://dsc.cdlib.org/search?facet=type-tab&style=cui&raw=1&' 'relation=ark:/13030/tf2v19n928', body=open(DIR_FIXTURES + '/testOAC.json').read()) self.collection = Collection( 'https://registry.cdlib.org/api/v1/collection/178/') self.setUp_config(self.collection) self.controller = fetcher.HarvestController( '*****@*****.**', self.collection, config_file=self.config_file, profile_path=self.profile_path)
def testSaveToS3(self, mock_boto3): httpretty.register_uri( httpretty.GET, "https://registry.cdlib.org/api/v1/collection/197/", body=open(DIR_FIXTURES + '/collection_api_test.json').read()) httpretty.register_uri(httpretty.GET, re.compile("http://content.cdlib.org/oai?.*"), body=open(DIR_FIXTURES + '/testOAI-128-records.xml').read()) collection = Collection( 'https://registry.cdlib.org/api/v1/collection/197/') controller = fetcher.HarvestController('*****@*****.**', collection, config_file=self.config_file, profile_path=self.profile_path) controller.save_objset_s3({"xxxx": "yyyy"}) mock_boto3.assert_called_with('s3') mock_boto3().Bucket.assert_called_with('ucldc-ingest') mock_boto3().Bucket().put_object.assert_called_with( Body='{"xxxx": "yyyy"}\n', Key='data-fetched/197/2017-07-14-1201/page-0.jsonl')
def setUp(self): super(HarvestSolr_ControllerTestCase, self).setUp() # self.testFile = DIR_FIXTURES+'/collection_api_test_oac.json' httpretty.register_uri( httpretty.GET, "https://registry.cdlib.org/api/v1/collection/183/", body=open(DIR_FIXTURES + '/collection_api_solr_harvest.json').read()) httpretty.register_uri( httpretty.POST, 'http://example.edu/solr/blacklight/select', body=open(DIR_FIXTURES + '/ucsd-new-feed-missions-bb3038949s-0.xml').read()) self.collection = Collection( 'https://registry.cdlib.org/api/v1/collection/183/') self.setUp_config(self.collection) self.controller = fetcher.HarvestController( '*****@*****.**', self.collection, config_file=self.config_file, profile_path=self.profile_path) print "DIR SAVE::::: {}".format(self.controller.dir_save)
def setUp(self): super(HarvestOAC_XML_ControllerTestCase, self).setUp() # self.testFile = DIR_FIXTURES+'/collection_api_test_oac.json' httpretty.register_uri( httpretty.GET, "https://registry.cdlib.org/api/v1/collection/178/", body=open(DIR_FIXTURES + '/collection_api_test_oac_xml.json').read()) httpretty.register_uri( httpretty.GET, 'http://dsc.cdlib.org/search?facet=type-tab&style=cui&raw=1&' 'relation=ark:/13030/tf0c600134', body=open(DIR_FIXTURES + '/testOAC-url_next-0.xml').read()) self.collection = Collection( 'https://registry.cdlib.org/api/v1/collection/178/') self.setUp_config(self.collection) self.controller = fetcher.HarvestController( '*****@*****.**', self.collection, config_file=self.config_file, profile_path=self.profile_path) print "DIR SAVE::::: {}".format(self.controller.dir_save)
def testHarvestControllerExists(self): httpretty.register_uri( httpretty.GET, "https://registry.cdlib.org/api/v1/collection/197/", body=open(DIR_FIXTURES + '/collection_api_test.json').read()) httpretty.register_uri(httpretty.GET, re.compile("http://content.cdlib.org/oai?.*"), body=open(DIR_FIXTURES + '/testOAI-128-records.xml').read()) collection = Collection( 'https://registry.cdlib.org/api/v1/collection/197/') controller = fetcher.HarvestController('*****@*****.**', collection, config_file=self.config_file, profile_path=self.profile_path) self.assertTrue(hasattr(controller, 'fetcher')) self.assertIsInstance(controller.fetcher, fetcher.OAIFetcher) self.assertTrue(hasattr(controller, 'campus_valid')) self.assertTrue(hasattr(controller, 'dc_elements')) self.assertTrue(hasattr(controller, 'datetime_start')) print(controller.s3path) self.assertEqual(controller.s3path, 'data-fetched/197/2017-07-14-1201/') shutil.rmtree(controller.dir_save)