def test_bad_header(self): """ Ensure that bad data is skipped when it exists. """ # This case tests against a header that does not match # 0000 0000 0000 0100 0000 0000 0000 0151 file_path = os.path.join(RESOURCE_PATH, 'E0000001-BAD-HEADER1.DAT') self.stream_handle = open(file_path, 'rb') with self.assertRaises(SampleException): self.parser = DostaLnWfpParser(self.config, self.start_state, self.stream_handle, self.state_callback, self.pub_callback, self.exception_callback) self.stream_handle.close() # This case tests against a header that does not match global, but matches coastal # 0001 0000 0000 0000 0001 0001 0000 0000 file_path = os.path.join(RESOURCE_PATH, 'E0000001-BAD-HEADER2.DAT') self.stream_handle = open(file_path, 'rb') with self.assertRaises(SampleException): self.parser = DostaLnWfpParser(self.config, self.start_state, self.stream_handle, self.state_callback, self.pub_callback, self.exception_callback) self.stream_handle.close()
def test_mid_state_start(self): """ Test starting the parser in a state in the middle of processing """ file_path = os.path.join(RESOURCE_PATH, 'E0000001.DAT') self.stream_handle = open(file_path, 'rb') # Moving the file position past the header and two records new_state = { StateKey.POSITION: HEADER_BYTES + (WFP_E_GLOBAL_RECOVERED_ENG_DATA_SAMPLE_BYTES * 2) } self.parser = DostaLnWfpParser(self.config, new_state, self.stream_handle, self.state_callback, self.pub_callback, self.exception_callback) particles = self.parser.get_records(4) # Should end up with 4 particles self.assertTrue(len(particles) == 4) self.assert_result(self.test_particle1, particles[3]) self.stream_handle.close()
def test_get_many(self): """ Read test data and pull out multiple data particles at one time. Assert that the results are those we expected. """ file_path = os.path.join(RESOURCE_PATH, 'E0000001.DAT') self.stream_handle = open(file_path, 'rb') self.parser = DostaLnWfpParser(self.config, self.start_state, self.stream_handle, self.state_callback, self.pub_callback, self.exception_callback) particles = self.parser.get_records(20) # Should end up with 20 particles self.assertTrue(len(particles) == 20) self.assert_result(self.test_particle3, particles[19]) particles = self.parser.get_records(30) # Should end up with 30 particles self.assertTrue(len(particles) == 30) self.assert_result(self.test_particle4, particles[29]) self.stream_handle.close()
def test_simple(self): """ Read test data and pull out data particles one at a time. Assert that the results are those we expected. """ file_path = os.path.join(RESOURCE_PATH, 'small.DAT') self.stream_handle = open(file_path, 'rb') self.parser = DostaLnWfpParser(self.config, self.start_state, self.stream_handle, self.state_callback, self.pub_callback, self.exception_callback) particles = self.parser.get_records(6) # Make sure the fifth particle has the correct values self.assert_result(self.test_particle1, particles[5]) test_data = self.get_dict_from_yml('good.yml') for i in range(0, 6): self.assert_result(test_data['data'][i], particles[i]) self.stream_handle.close()
def test_set_state(self): """ Test changing to a new state after initializing the parser and reading data, as if new data has been found and the state has changed """ filepath = os.path.join(RESOURCE_PATH, 'E0000001.DAT') self.stream_handle = open(filepath, 'rb') # Moving the file position past the header and two records new_state = { StateKey.POSITION: HEADER_BYTES + (WFP_E_GLOBAL_RECOVERED_ENG_DATA_SAMPLE_BYTES * 2) } self.parser = DostaLnWfpParser(self.config, new_state, self.stream_handle, self.state_callback, self.pub_callback, self.exception_callback) particles = self.parser.get_records(4) # Should end up with 4 particles self.assertTrue(len(particles) == 4) self.assert_result(self.test_particle1, particles[3]) # Moving the file position past the header and three records new_state = { StateKey.POSITION: HEADER_BYTES + (WFP_E_GLOBAL_RECOVERED_ENG_DATA_SAMPLE_BYTES * 3) } self.parser = DostaLnWfpParser(self.config, new_state, self.stream_handle, self.state_callback, self.pub_callback, self.exception_callback) particles = self.parser.get_records(10) # Should end up with 10 particles self.assertTrue(len(particles) == 10) self.assert_result(self.test_particle2, particles[9]) self.stream_handle.close()
def test_bad_data(self): """ Ensure that bad data is skipped when it exists. """ file_path = os.path.join(RESOURCE_PATH, 'E0000001-BAD-DATA.DAT') self.stream_handle = open(file_path, 'rb') self.parser = DostaLnWfpParser(self.config, self.start_state, self.stream_handle, self.state_callback, self.pub_callback, self.exception_callback) with self.assertRaises(SampleException): self.parser.get_records(1) self.stream_handle.close()
def create_large_yml(self): """ Create a large yml file corresponding to an actual recovered dataset. This is not an actual test - it allows us to create what we need for integration testing, i.e. a yml file. """ file_path = os.path.join(RESOURCE_PATH, 'E0000001.DAT') self.stream_handle = open(file_path, 'rb') self.parser = DostaLnWfpParser(self.config, self.start_state, self.stream_handle, self.state_callback, self.pub_callback, self.exception_callback) # In a single read, get all particles in this file. result = self.parser.get_records(1000) self.particle_to_yml(result, 'E0000001.yml')
def test_long_stream(self): """ Test a long stream """ file_path = os.path.join(RESOURCE_PATH, 'E0000002.DAT') self.stream_handle = open(file_path, 'rb') self.parser = DostaLnWfpParser(self.config, self.start_state, self.stream_handle, self.state_callback, self.pub_callback, self.exception_callback) particles = self.parser.get_records(1000) # Should end up with 683 particles self.assertTrue(len(particles) == 683) self.stream_handle.close()
def test_verify_record_against_yaml(self): """ Read data from a file and pull out data particles one at a time. Verify that the results are those we expected. """ file_path = os.path.join(RESOURCE_PATH, 'E0000001.DAT') self.stream_handle = open(file_path, 'rb') self.parser = DostaLnWfpParser(self.config, self.start_state, self.stream_handle, self.state_callback, self.pub_callback, self.exception_callback) # In a single read, get all particles in this file. result = self.parser.get_records(1000) self.assert_particles(result, 'E0000001.yml', RESOURCE_PATH) self.stream_handle.close()
def _build_recovered_parser(self, parser_state, stream_in): """ Build and return the recovered parser @param parser_state starting parser state to pass to parser @param stream_in Handle of open file to pass to parser """ config = self._parser_config.get(DataSourceKey.DOSTA_LN_WFP) config.update({ DataSetDriverConfigKeys.PARTICLE_MODULE: 'mi.dataset.parser.dosta_ln_wfp', DataSetDriverConfigKeys.PARTICLE_CLASS: 'DostaLnWfpInstrumentParserDataParticle' }) log.debug("My Config: %s", config) parser = DostaLnWfpParser( config, parser_state, stream_in, lambda state, ingested: self._save_parser_state( state, DataSourceKey.DOSTA_LN_WFP, ingested), self._data_callback, self._sample_exception_callback) return parser
def parse(unused, source_file_path, particle_data_handler): log = get_logger() with open(source_file_path, "r") as stream_handle: def exception_callback(exception): log.debug("Exception: %s", exception) particle_data_handler.setParticleDataCaptureFailure() parser = DostaLnWfpParser( {DataSetDriverConfigKeys.PARTICLE_MODULE: 'mi.dataset.parser.dosta_ln_wfp', DataSetDriverConfigKeys.PARTICLE_CLASS: 'DostaLnWfpInstrumentParserDataParticle'}, {StateKey.POSITION: 0}, stream_handle, lambda state, ingested: None, lambda data: None, exception_callback ) driver = DataSetDriver(parser, particle_data_handler) driver.processFileStream() return particle_data_handler