def _get_observations(self, collection, start=None, end=None, obs_file=None): """ Returns a list of observations from the collection :param collection: name of the collection :param start: earliest observation :param end: latest observation :return: list of observation ids """ assert collection is not None observations = [] params = {'MAXREC': BATCH_SIZE} if start is not None: params['START'] = util.date2ivoa(start) if end is not None: params['END'] = util.date2ivoa(end) response = self._repo_client.get( (CAOM2REPO_OBS_CAPABILITY_ID, collection), params=params) last_datetime = None for line in response.text.splitlines(): columns = line.split('\t') if len(columns) >= 3: obs = columns[1] last_datetime = columns[2] observations.append(obs) else: self.logger.warn('Incomplete listing line: {}'.format(line)) if last_datetime is not None: self._start = util.str2ivoa(last_datetime) return observations
def _get_obs_from_file(self, obs_file, start, end, halt_on_error): obs = [] failed = [] for l in obs_file: tokens = l.split() if len(tokens) > 0: obs_id = tokens[0] if len(tokens) > 1: # we have at least two tokens in line try: last_mod_datetime = util.str2ivoa(tokens[1]) if len(tokens) > 2: # we have more than two tokens in line raise Exception( 'Extra token one line: {}'.format(l)) elif (start and last_mod_datetime<start) or \ (end and last_mod_datetime>end): # last modified date is out of start/end range self.logger.info( 'last modified date is out of start/end range: {}' .format(l)) else: # two tokens in line: <observation id> <last modification date> obs.append(obs_id) except Exception as e: failed.append(obs_id) self.logger.error('FAILED {} - Reason: {}'.format( obs_id, e)) if halt_on_error: raise e else: # only one token in line, line should contain observationID only obs.append(obs_id) return obs
def test_get_obs_from_file(self): level = logging.DEBUG visitor = CAOM2RepoClient(auth.Subject(), level) # no start or end with open(os.path.join(THIS_DIR, 'data/obs_id.txt')) as obs_file: obs_id_list = visitor._get_obs_from_file(obs_file, None, None, False) self.assertEqual('obs_id_1', obs_id_list[0]) self.assertEqual('obs_id_2', obs_id_list[1]) self.assertEqual('obs_id_3', obs_id_list[2]) # last_modified_date is earlier than start with open(os.path.join(THIS_DIR, 'data/obs_id.txt')) as obs_file: obs_id_list = visitor._get_obs_from_file( obs_file, util.str2ivoa('2000-10-11T12:30:00.333'), None, False) self.assertEqual('obs_id_1', obs_id_list[0]) # last_modified_date is between start and end with open(os.path.join(THIS_DIR, 'data/obs_id.txt')) as obs_file: obs_id_list = visitor._get_obs_from_file( obs_file, util.str2ivoa('2000-10-9T12:30:00.333'), util.str2ivoa('2016-10-11T12:30:00.333'), False) self.assertEqual('obs_id_1', obs_id_list[0]) self.assertEqual('obs_id_2', obs_id_list[1]) # last_modified_date is after end with open(os.path.join(THIS_DIR, 'data/obs_id.txt')) as obs_file: obs_id_list = visitor._get_obs_from_file( obs_file, util.str2ivoa('2000-10-9T12:30:00.333'), util.str2ivoa('2017-10-11T12:30:00.333'), False) self.assertEqual('obs_id_1', obs_id_list[0]) self.assertEqual('obs_id_2', obs_id_list[1]) self.assertEqual('obs_id_3', obs_id_list[2]) # error in file with open(os.path.join(THIS_DIR, 'data/obs_id_error.txt')) as obs_file: with self.assertRaises(Exception): obs_id_list = visitor._get_obs_from_file( obs_file, util.str2ivoa('2000-10-9T12:30:00.333'), util.str2ivoa('2016-10-11T12:30:00.333'), True)
def test_get_obs_from_file(self): level = logging.DEBUG visitor = CAOM2RepoClient(auth.Subject(), level) # no start or end with open(os.path.join(THIS_DIR, 'data/obs_id.txt')) as obs_file: obs_id_list = visitor._get_obs_from_file(obs_file, None, None, False) self.assertEquals('obs_id_1', obs_id_list[0]) self.assertEquals('obs_id_2', obs_id_list[1]) self.assertEquals('obs_id_3', obs_id_list[2]) # last_modified_date is earlier than start with open(os.path.join(THIS_DIR, 'data/obs_id.txt')) as obs_file: obs_id_list = visitor._get_obs_from_file(obs_file, util.str2ivoa( '2000-10-11T12:30:00.333'), None, False) self.assertEquals('obs_id_1', obs_id_list[0]) # last_modified_date is between start and end with open(os.path.join(THIS_DIR, 'data/obs_id.txt')) as obs_file: obs_id_list = visitor._get_obs_from_file( obs_file, util.str2ivoa('2000-10-9T12:30:00.333'), util.str2ivoa('2016-10-11T12:30:00.333'), False) self.assertEquals('obs_id_1', obs_id_list[0]) self.assertEquals('obs_id_2', obs_id_list[1]) # last_modified_date is after end with open(os.path.join(THIS_DIR, 'data/obs_id.txt')) as obs_file: obs_id_list = visitor._get_obs_from_file( obs_file, util.str2ivoa('2000-10-9T12:30:00.333'), util.str2ivoa('2017-10-11T12:30:00.333'), False) self.assertEquals('obs_id_1', obs_id_list[0]) self.assertEquals('obs_id_2', obs_id_list[1]) self.assertEquals('obs_id_3', obs_id_list[2]) # error in file with open(os.path.join(THIS_DIR, 'data/obs_id_error.txt')) as obs_file: with self.assertRaises(Exception): obs_id_list = visitor._get_obs_from_file( obs_file, util.str2ivoa('2000-10-9T12:30:00.333'), util.str2ivoa('2016-10-11T12:30:00.333'), True)
def test_process(self): core.BATCH_SIZE = 3 # size of the batch is 3 obs = [['a', 'b', 'c'], ['d'], []] level = logging.DEBUG visitor = CAOM2RepoClient(auth.Subject(), level) visitor.get_observation = MagicMock(return_value=MagicMock( spec=SimpleObservation)) visitor.post_observation = MagicMock() visitor._get_observations = MagicMock(side_effect=obs) (visited, updated, skipped, failed) = visitor.visit(os.path.join(THIS_DIR, 'passplugin.py'), 'cfht') self.assertEqual(4, len(visited)) self.assertEqual(4, len(updated)) self.assertEqual(0, len(skipped)) self.assertEqual(0, len(failed)) obs = [['a', 'b', 'c'], ['d', 'e', 'f'], []] visitor._get_observations = MagicMock(side_effect=obs) (visited, updated, skipped, failed) = visitor.visit(os.path.join(THIS_DIR, 'passplugin.py'), 'cfht') self.assertEqual(6, len(visited)) self.assertEqual(6, len(updated)) self.assertEqual(0, len(skipped)) self.assertEqual(0, len(failed)) # make it return different status. errorplugin returns according to the # id of the observation: True for 'UPDATE', False for 'SKIP' and # raises exception for 'ERROR' obs_ids = [['UPDATE', 'SKIP', 'ERROR'], []] obs = [ SimpleObservation(collection='TEST', observation_id='UPDATE'), SimpleObservation(collection='TEST', observation_id='SKIP'), SimpleObservation(collection='TEST', observation_id='ERROR') ] visitor._get_observations = MagicMock(side_effect=obs_ids) visitor.get_observation = MagicMock(side_effect=obs) (visited, updated, skipped, failed) = visitor.visit(os.path.join(THIS_DIR, 'errorplugin.py'), 'cfht') self.assertEqual(3, len(visited)) self.assertEqual(1, len(updated)) self.assertEqual(1, len(skipped)) self.assertEqual(1, len(failed)) # repeat with other obs obs_ids = [['UPDATE', 'SKIP', 'ERROR'], ['UPDATE', 'SKIP']] obs = [ SimpleObservation(collection='TEST', observation_id='UPDATE'), SimpleObservation(collection='TEST', observation_id='SKIP'), SimpleObservation(collection='TEST', observation_id='ERROR'), SimpleObservation(collection='TEST', observation_id='UPDATE'), SimpleObservation(collection='TEST', observation_id='SKIP') ] visitor._get_observations = MagicMock(side_effect=obs_ids) visitor.get_observation = MagicMock(side_effect=obs) (visited, updated, skipped, failed) = visitor.visit(os.path.join(THIS_DIR, 'errorplugin.py'), 'cfht') self.assertEqual(5, len(visited)) self.assertEqual(2, len(updated)) self.assertEqual(2, len(skipped)) self.assertEqual(1, len(failed)) # repeat but halt on first ERROR -> process only 3 observations obs_ids = [['UPDATE', 'SKIP', 'ERROR'], ['UPDATE', 'SKIP']] obs = [ SimpleObservation(collection='TEST', observation_id='UPDATE'), SimpleObservation(collection='TEST', observation_id='SKIP'), SimpleObservation(collection='TEST', observation_id='ERROR'), SimpleObservation(collection='TEST', observation_id='UPDATE'), SimpleObservation(collection='TEST', observation_id='SKIP') ] visitor._get_observations = MagicMock(side_effect=obs_ids) visitor.get_observation = MagicMock(side_effect=obs) with self.assertRaises(SystemError): visitor.visit(os.path.join(THIS_DIR, 'errorplugin.py'), 'cfht', halt_on_error=True) # test with time boundaries core.BATCH_SIZE = 3 # size of the batch is 3 response = MagicMock() response.text = """ARCHIVE\ta\t2011-01-01T11:00:00.000 ARCHIVE\tb\t211-01-01T11:00:10.000 ARCHIVE\tc\t2011-01-01T12:00:00.000""" response2 = MagicMock() response2.text = """ARCHIVE\td\t2011-02-02T11:00:00.000""" level = logging.DEBUG visitor = CAOM2RepoClient(auth.Subject(), level) visitor.get_observation = MagicMock(return_value=MagicMock( spec=SimpleObservation)) visitor.post_observation = MagicMock() visitor._repo_client.get = MagicMock(side_effect=[response, response2]) start = '2010-10-10T12:00:00.000' end = '2012-12-12T11:11:11.000' (visited, updated, skipped, failed) = visitor.visit(os.path.join(THIS_DIR, 'passplugin.py'), 'cfht', start=util.str2ivoa(start), end=util.str2ivoa(end)) self.assertEqual(4, len(visited)) self.assertEqual(4, len(updated)) self.assertEqual(0, len(skipped)) self.assertEqual(0, len(failed)) calls = [ call((core.CURRENT_CAOM2REPO_OBS_CAPABILITY_ID, 'cfht'), params={ 'START': start, 'END': end, 'MAXREC': 3 }), call( (core.CURRENT_CAOM2REPO_OBS_CAPABILITY_ID, 'cfht'), params={ 'START': '2011-01-01T12:00:00.000', # datetime of the last record in the batch 'END': end, 'MAXREC': 3 }) ] visitor._repo_client.get.assert_has_calls(calls)
def test_process(self): core.BATCH_SIZE = 3 # size of the batch is 3 obs = [['a', 'b', 'c'], ['d'], []] level = logging.DEBUG visitor = CAOM2RepoClient(auth.Subject(), level) visitor.get_observation = MagicMock( return_value=MagicMock(spec=SimpleObservation)) visitor.post_observation = MagicMock() visitor._get_observations = MagicMock(side_effect=obs) (visited, updated, skipped, failed) = visitor.visit( os.path.join(THIS_DIR, 'passplugin.py'), 'cfht') self.assertEqual(4, len(visited)) self.assertEqual(4, len(updated)) self.assertEqual(0, len(skipped)) self.assertEqual(0, len(failed)) obs = [['a', 'b', 'c'], ['d', 'e', 'f'], []] visitor._get_observations = MagicMock(side_effect=obs) (visited, updated, skipped, failed) = visitor.visit( os.path.join(THIS_DIR, 'passplugin.py'), 'cfht') self.assertEqual(6, len(visited)) self.assertEqual(6, len(updated)) self.assertEqual(0, len(skipped)) self.assertEqual(0, len(failed)) # make it return different status. errorplugin returns according to the # id of the observation: True for 'UPDATE', False for 'SKIP' and # raises exception for 'ERROR' obs_ids = [['UPDATE', 'SKIP', 'ERROR'], []] obs = [SimpleObservation(collection='TEST', observation_id='UPDATE'), SimpleObservation(collection='TEST', observation_id='SKIP'), SimpleObservation(collection='TEST', observation_id='ERROR')] visitor._get_observations = MagicMock(side_effect=obs_ids) visitor.get_observation = MagicMock(side_effect=obs) (visited, updated, skipped, failed) = visitor.visit( os.path.join(THIS_DIR, 'errorplugin.py'), 'cfht') self.assertEqual(3, len(visited)) self.assertEqual(1, len(updated)) self.assertEqual(1, len(skipped)) self.assertEqual(1, len(failed)) # repeat with other obs obs_ids = [['UPDATE', 'SKIP', 'ERROR'], ['UPDATE', 'SKIP']] obs = [SimpleObservation(collection='TEST', observation_id='UPDATE'), SimpleObservation(collection='TEST', observation_id='SKIP'), SimpleObservation(collection='TEST', observation_id='ERROR'), SimpleObservation(collection='TEST', observation_id='UPDATE'), SimpleObservation(collection='TEST', observation_id='SKIP')] visitor._get_observations = MagicMock(side_effect=obs_ids) visitor.get_observation = MagicMock(side_effect=obs) (visited, updated, skipped, failed) = visitor.visit( os.path.join(THIS_DIR, 'errorplugin.py'), 'cfht') self.assertEqual(5, len(visited)) self.assertEqual(2, len(updated)) self.assertEqual(2, len(skipped)) self.assertEqual(1, len(failed)) # repeat but halt on first ERROR -> process only 3 observations obs_ids = [['UPDATE', 'SKIP', 'ERROR'], ['UPDATE', 'SKIP']] obs = [SimpleObservation(collection='TEST', observation_id='UPDATE'), SimpleObservation(collection='TEST', observation_id='SKIP'), SimpleObservation(collection='TEST', observation_id='ERROR'), SimpleObservation(collection='TEST', observation_id='UPDATE'), SimpleObservation(collection='TEST', observation_id='SKIP')] visitor._get_observations = MagicMock(side_effect=obs_ids) visitor.get_observation = MagicMock(side_effect=obs) with self.assertRaises(SystemError): visitor.visit(os.path.join( THIS_DIR, 'errorplugin.py'), 'cfht', halt_on_error=True) # test with time boundaries core.BATCH_SIZE = 3 # size of the batch is 3 response = MagicMock() response.text = """ARCHIVE\ta\t2011-01-01T11:00:00.000 ARCHIVE\tb\t211-01-01T11:00:10.000 ARCHIVE\tc\t2011-01-01T12:00:00.000""" response2 = MagicMock() response2.text = """ARCHIVE\td\t2011-02-02T11:00:00.000""" level = logging.DEBUG visitor = CAOM2RepoClient(auth.Subject(), level) visitor.get_observation = MagicMock( return_value=MagicMock(spec=SimpleObservation)) visitor.post_observation = MagicMock() visitor._repo_client.get = MagicMock(side_effect=[response, response2]) start = '2010-10-10T12:00:00.000' end = '2012-12-12T11:11:11.000' (visited, updated, skipped, failed) = visitor.visit( os.path.join(THIS_DIR, 'passplugin.py'), 'cfht', start=util.str2ivoa(start), end=util.str2ivoa(end)) self.assertEqual(4, len(visited)) self.assertEqual(4, len(updated)) self.assertEqual(0, len(skipped)) self.assertEqual(0, len(failed)) calls = [call((core.CAOM2REPO_OBS_CAPABILITY_ID, 'cfht'), params={'START': start, 'END': end, 'MAXREC': 3}), call((core.CAOM2REPO_OBS_CAPABILITY_ID, 'cfht'), params={'START': '2011-01-01T12:00:00.000', # datetime of the last record in the batch 'END': end, 'MAXREC': 3})] visitor._repo_client.get.assert_has_calls(calls)