def test_record_transformer_builder_throws_exception_on_missing_datasource( self): with self.assertRaises(dmap.NonexistentDatasourceException) as context: tfmr = dmap.RecordTransformerBuilder( self.yaml_initfile_path, map_name='missing_datasource_map').build()
def test_record_transform_throws_exception_on_missing_lookup_method(self): source_record = { 'NAME': 'foo', 'COLOR': 'blue', 'SKU': '123.456.789', 'ID': 22 } with self.assertRaises(dmap.NoSuchLookupMethodException) as context: tfmr = dmap.RecordTransformerBuilder( self.yaml_initfile_path, map_name=INVALID_MAP_NAME).build() target_record = tfmr.transform(source_record)
def when_we_read_and_transform_the_records(step): test_context = world.test_environment.load_context( EXTRACT_TRANSFORM_CONSUME_SCENARIO) mssql_db = sqldbx.SQLServerDatabase('', 'Legacy') db_username = world.mssql_username db_password = world.mssql_password mssql_db.login(db_username, db_password, schema='mercury') pmgr = sqldbx.PersistenceManager(mssql_db) transform_map_filename = world.pipeline_config.transform_map map_file_path = os.path.join(world.data_dir, transform_map_filename) transformer_builder = dmap.RecordTransformerBuilder(map_file_path, persistence_mgr=pmgr) tfmr = transformer_builder.build() knodes = world.pipeline_config.cluster.node_array # a kafka group is a numbered context shared by some number of consumers group = world.pipeline_config.get_user_defined_consumer_group( 'scratch_group_2') topic = world.pipeline_config.raw_topic kreader = telegraf.KafkaIngestRecordReader(topic, knodes, group) # show how many partitions this topic spans metadata = kreader.consumer.partitions_for_topic(topic) print '### partitions for topic %s:\n%s' % (topic, '\n'.join( [str(p) for p in metadata])) # TopicPartition named tuple consists of the topic and a partition number tp = TopicPartition(topic, 0) # manually assign one or more partitions to the consumer -- # required if we want to use explicit offsets kreader.consumer.assign([tp]) offset = get_offset(topic) topic_partition = TopicPartition(topic, list(metadata)[0]) kreader.consumer.seek(topic_partition, offset) world_relay = WorldRelay(transformer=tfmr) kreader.read(world_relay, log) for rec in world_relay.read_list: test_context.consumed_raw_into_sst_record_list.append(rec)
def test_record_transform_resolves_record_value_using_lambda(self): source_record = { 'ALIAS': 'foo', 'COLOR': 'blue', 'SKU': '123.456.789', 'COUNT': 0, 'ID': 22 } builder = dmap.RecordTransformerBuilder(self.yaml_initfile_path, map_name='lambda_map') lambda_transformer = builder.build() target_record = lambda_transformer.transform(source_record) self.assertFalse(target_record['in_stock'])
def setUp(self): self.local_env = common.LocalEnvironment('MERCURY_HOME') self.local_env.init() home_dir = self.local_env.get_variable('MERCURY_HOME') self.yaml_initfile_path = os.path.join(home_dir, TRANSFORM_YAML_FILE) self.log = logging.getLogger(LOG_ID) self.good_datasource_name = 'SampleDatasource' self.bad_datasource_name = 'BadDatasource' self.nonexistent_datasource_name = 'NoDatasource' self.builder = dmap.RecordTransformerBuilder(self.yaml_initfile_path, map_name=VALID_MAP_NAME) self.transformer = self.builder.build()
def load_transform_map(map_name: str, yaml_file_path: str): transformer_builder = dmap.RecordTransformerBuilder(yaml_file_path, map_name=map_name) return transformer_builder.build()