def given_we_load_data_from_the_source_file_to_the_source_topic(step): topic = world.pipeline_config.raw_topic test_context = world.test_environment.load_context( EXTRACT_TRANSFORM_CONSUME_SCENARIO) knodes = world.pipeline_config.cluster.node_array kwriter = telegraf.KafkaIngestRecordWriter(knodes) kloader = telegraf.KafkaLoader(topic, kwriter, record_type='direct_sales_record', stream_id='test_stream_id', asset_id='test_asset_id') wloader = WorldLoader(topic, kwriter, record_type='direct_sales_record', stream_id='test_stream_id', asset_id='test_asset_id') processor = dmap.WhitespaceCleanupProcessor() extractor = dmap.CSVFileDataExtractor(processor, delimiter='|', quotechar='"') source_filename = world.pipeline_config.get_file_reference( test_context.test_file_alias) extractor.extract(source_filename, load_function=wloader.load) extractor.extract(source_filename, load_function=kloader.load) kwriter.sync(0.1) for rec in wloader.record_list: test_context.loaded_raw_record_list.append(rec) test_context.promise_queue_errors = kwriter.process_write_promise_queue()
def and_load_the_core_records(step): test_context = world.test_environment.load_context( EXTRACT_TRANSFORM_CONSUME_SCENARIO) knodes = world.pipeline_config.cluster.node_array kwriter = telegraf.KafkaIngestRecordWriter(knodes) topic = world.pipeline_config.staging_topic kloader = telegraf.KafkaLoader(world.pipeline_config.staging_topic, kwriter, record_type='direct_sales_record', stream_id='test_stream_id', asset_id='test_asset_id') wloader = WorldLoader(world.pipeline_config.staging_topic, kwriter, record_type='direct_sales_record', stream_id='test_stream_id', asset_id='test_asset_id') # print "\n\n #### record header = %s, record body = %s" % ( # test_context.consumed_raw_into_sst_record_list[0]['header'], # test_context.consumed_raw_into_sst_record_list[0]['body']) for record in test_context.consumed_raw_into_core_record_list: wloader.load(record['body']) kloader.load(record['body']) kwriter.sync(0.1) for rec in wloader.record_list: test_context.loaded_sst_record_list.append(rec)
def test_loader_can_send_records(self): kloader = tg.KafkaLoader(self.target_topic, self.kwriter, pipeline_id='mx_test_pipeline', record_type='mx_test_record') local_filename = 'data/sample_objectstore.csv' #processor = dmap.WhitespaceCleanupProcessor() extractor = dmap.CSVFileDataExtractor(None, delimiter='|', quotechar='"') extractor.extract(local_filename, load_function=kloader.load) self.kwriter.sync(0.1) self.assertTrue(self.kwriter.promise_queue_size > 0) self.assertEquals(len(self.kwriter.process_write_promise_queue()), 0)
def when_we_load_n_records_from_the_source_file(step, max_lin): test_context = world.test_environment.load_context(DATA_COMMIT_SCENARIO) max_lines = int(max_lin) topic = world.pipeline_config.get_user_topic('scratch_topic') knodes = world.pipeline_config.cluster.node_array kwriter = telegraf.KafkaIngestRecordWriter(knodes) kloader = telegraf.KafkaLoader(topic, kwriter, record_type='direct_sales_record', stream_id='test_stream_id', asset_id='test_asset_id') processor = dmap.WhitespaceCleanupProcessor() extractor = dmap.CSVFileDataExtractor(processor, delimiter='|', quotechar='"') source_filename = world.pipeline_config.get_file_reference(test_context.test_file_alias) extractor.extract(source_filename, load_function=kloader.load, max_lines=max_lines) kwriter.sync(0.1)