def test_get_many(self): """ This test exercises retrieving 20 particles, verifying the 20th particle, then retrieves 30 particles and verifies the 30th particle. """ file_path = os.path.join(RESOURCE_PATH, 'ctd_1_20131124T005004_458.mpk') stream_handle = open(file_path, 'rb') state = {StateKey.PARTICLES_RETURNED: 0} parser = CtdpfCklMmpCdsParser(self.config, state, stream_handle, self.state_callback, self.pub_callback) particles = parser.get_records(20) # Should end up with 20 particles self.assertTrue(len(particles) == 20) test_data = self.get_dict_from_yml('get_many_one.yml') self.assert_result(test_data['data'][0], particles[19]) particles = parser.get_records(30) # Should end up with 30 particles self.assertTrue(len(particles) == 30) test_data = self.get_dict_from_yml('get_many_two.yml') self.assert_result(test_data['data'][0], particles[29]) stream_handle.close()
def test_mid_state_start(self): """ This test exercises setting the state past one chunk, retrieving particles and verify the result of one of the particles. """ # Using two concatenated msgpack files to simulate two chunks. file_path = os.path.join(RESOURCE_PATH, 'set_state.mpk') stream_handle = open(file_path, 'rb') stat_info = os.stat(file_path) # Moving the file position to the end of the first chunk state = {StateKey.PARTICLES_RETURNED: 20} parser = CtdpfCklMmpCdsParser(self.config, state, stream_handle, self.state_callback, self.pub_callback) particles = parser.get_records(4) log.info(len(particles)) # Should end up with 4 particles self.assertTrue(len(particles) == 4) test_data = self.get_dict_from_yml('set_state.yml') self.assert_result(test_data['data'][23], particles[3]) stream_handle.close()
def test_bad_data_two(self): """ This test verifies that a SampleException is raised when an entire msgpack buffer is not msgpack. """ file_path = os.path.join(RESOURCE_PATH, 'not-msg-pack.mpk') stream_handle = open(file_path, 'rb') state = {StateKey.PARTICLES_RETURNED: 0} parser = CtdpfCklMmpCdsParser(self.config, state, stream_handle, self.state_callback, self.pub_callback) with self.assertRaises(SampleException): parser.get_records(1) stream_handle.close()
def test_bad_data_one(self): """ This test verifies that a SampleException is raised when msgpack data is malformed. """ file_path = os.path.join(RESOURCE_PATH, 'ctd_1_20131124T005004_BAD.mpk') stream_handle = open(file_path, 'rb') state = {StateKey.PARTICLES_RETURNED: 0} parser = CtdpfCklMmpCdsParser(self.config, state, stream_handle, self.state_callback, self.pub_callback) with self.assertRaises(SampleException): parser.get_records(1) stream_handle.close()
def test_long_stream(self): """ This test exercises retrieve approximately 200 particles. """ # Using two concatenated msgpack files to simulate two chunks to get more particles. file_path = os.path.join(RESOURCE_PATH, 'ctd_concat.mpk') stream_handle = open(file_path, 'rb') state = {StateKey.PARTICLES_RETURNED: 0} parser = CtdpfCklMmpCdsParser(self.config, state, stream_handle, self.state_callback, self.pub_callback) # Attempt to retrieve 200 particles, but we will retrieve less particles = parser.get_records(200) # Should end up with 172 particles self.assertEqual(len(particles), 172) stream_handle.close()
def test_simple(self): """ This test reads in a small number of particles and verifies the result of one of the particles. """ file_path = os.path.join(RESOURCE_PATH, 'ctd_1_20131124T005004_458.mpk') stream_handle = open(file_path, 'rb') state = {StateKey.PARTICLES_RETURNED: 0} parser = CtdpfCklMmpCdsParser(self.config, state, stream_handle, self.state_callback, self.pub_callback) particles = parser.get_records(6) for particle in particles: print particle.generate_dict() test_data = self.get_dict_from_yml('good.yml') self.assert_result(test_data['data'][0], particles[5]) stream_handle.close()
def _build_parser(self, stream_handle): parser_config = { DataSetDriverConfigKeys.PARTICLE_MODULE: 'mi.dataset.parser.ctdpf_ckl_mmp_cds', DataSetDriverConfigKeys.PARTICLE_CLASS: 'CtdpfCklMmpCdsParserDataParticle' } parser = CtdpfCklMmpCdsParser(parser_config, None, stream_handle, lambda state, ingested: None, lambda data: None) return parser
def _build_parser(self, parser_state, infile): """ Build and return the parser """ config = self._parser_config config.update({ DataSetDriverConfigKeys.PARTICLE_MODULE: 'mi.dataset.parser.ctdpf_ckl_mmp_cds', DataSetDriverConfigKeys.PARTICLE_CLASS: 'CtdpfCklMmpCdsParserDataParticle' }) log.debug("My Config: %s", config) self._parser = CtdpfCklMmpCdsParser( config, parser_state, infile, self._save_parser_state, self._data_callback, self._sample_exception_callback ) return self._parser
def test_set_state(self): """ This test exercises setting the state past one chunk, retrieving particles, verifying one of the particles, and then setting the state back to the beginning, retrieving a few particles, and verifying one of the particles. """ # Using the default mspack test file. file_path = os.path.join(RESOURCE_PATH, 'set_state.mpk') stream_handle = open(file_path, 'rb') # Moving the file position to the beginning state = {StateKey.PARTICLES_RETURNED: 0} parser = CtdpfCklMmpCdsParser(self.config, state, stream_handle, self.state_callback, self.pub_callback) particles = parser.get_records(4) # Should end up with 4 particles self.assertTrue(len(particles) == 4) log.info(parser._state) stat_info = os.stat(file_path) test_data = self.get_dict_from_yml('set_state.yml') self.assert_result(test_data['data'][3], particles[3]) state = copy.copy(parser._state) log.info(state) parser = CtdpfCklMmpCdsParser(self.config, state, stream_handle, self.state_callback, self.pub_callback) particles = parser.get_records(4) # Should end up with 4 particles self.assertTrue(len(particles) == 4) self.assert_result(test_data['data'][7], particles[3]) # Give a bad position which will be ignored state = {StateKey.PARTICLES_RETURNED: 0} parser = CtdpfCklMmpCdsParser(self.config, state, stream_handle, self.state_callback, self.pub_callback) particles = parser.get_records(1) self.assertTrue(len(particles) == 1) # Give a bad position which will be ignored state = {StateKey.PARTICLES_RETURNED: 0} parser = CtdpfCklMmpCdsParser(self.config, state, stream_handle, self.state_callback, self.pub_callback) particles = parser.get_records(1000) self.assertTrue(len(particles) == 30) self.assert_result(test_data['data'][29], particles[29]) # Provide a bad particles returned state = {StateKey.PARTICLES_RETURNED: 80} parser = CtdpfCklMmpCdsParser(self.config, state, stream_handle, self.state_callback, self.pub_callback) particles = parser.get_records(1) self.assertTrue(len(particles) == 0) stream_handle.close()
def parse(basePythonCodePath, sourceFilePath, particleDataHdlrObj): try: if basePythonCodePath is not None: pass except NameError: basePythonCodePath = os.curdir sys.path.append(basePythonCodePath) from mi.logging import config config.add_configuration(os.path.join(basePythonCodePath, 'res', 'config', 'mi-logging.yml')) from mi.core.log import get_logger log = get_logger() from mi.dataset.dataset_driver import DataSetDriver, ParticleDataHandler from mi.dataset.parser.ctdpf_ckl_mmp_cds import CtdpfCklMmpCdsParser from mi.dataset.dataset_parser import DataSetDriverConfigKeys config = { DataSetDriverConfigKeys.PARTICLE_MODULE: 'mi.dataset.parser.ctdpf_ckl_mmp_cds', DataSetDriverConfigKeys.PARTICLE_CLASS: 'CtdpfCklMmpCdsParserDataParticle' } try: if particleDataHdlrObj is not None: pass except NameError: particleDataHdlrObj = ParticleDataHandler() try: if sourceFilePath is not None: pass except NameError: try: sourceFilePath = sys.argv[1] except IndexError: print "Need a source file path" sys.exit(1) def state_callback(state, ingested): pass def pub_callback(data): log.trace("Found data: %s", data) stream_handle = open(sourceFilePath, 'rb') try: parser = CtdpfCklMmpCdsParser(config, None, stream_handle, state_callback, pub_callback) driver = DataSetDriver(parser, particleDataHdlrObj) driver.processFileStream() finally: stream_handle.close() return particleDataHdlrObj