def test_external_transforms(self): # TODO Move expansion address resides into PipelineOptions def get_expansion_service(): return "localhost:" + str(self.expansion_port) with self.create_pipeline() as p: res = (p | GenerateSequence( start=1, stop=10, expansion_service=get_expansion_service())) assert_that(res, equal_to([i for i in range(1, 10)])) # We expect to fail here because we do not have a Kafka cluster handy. # Nevertheless, we check that the transform is expanded by the # ExpansionService and that the pipeline fails during execution. with self.assertRaises(Exception) as ctx: with self.create_pipeline() as p: # pylint: disable=expression-not-assigned (p | ReadFromKafka(consumer_config={ 'bootstrap.servers': 'notvalid1:7777, notvalid2:3531' }, topics=['topic1', 'topic2'], key_deserializer='org.apache.kafka.' 'common.serialization.' 'ByteArrayDeserializer', value_deserializer='org.apache.kafka.' 'common.serialization.' 'LongDeserializer', expansion_service=get_expansion_service())) self.assertTrue( 'No resolvable bootstrap urls given in bootstrap.servers' in str(ctx.exception), 'Expected to fail due to invalid bootstrap.servers, but ' 'failed due to:\n%s' % str(ctx.exception)) # We just test the expansion but do not execute. # pylint: disable=expression-not-assigned (self.create_pipeline() | Impulse() | Map(lambda input: (1, input)) | WriteToKafka(producer_config={ 'bootstrap.servers': 'localhost:9092, notvalid2:3531' }, topic='topic1', key_serializer='org.apache.kafka.' 'common.serialization.' 'LongSerializer', value_serializer='org.apache.kafka.' 'common.serialization.' 'ByteArraySerializer', expansion_service=get_expansion_service()))
def run_pipeline(): with beam.Pipeline() as p: (p | beam.Create([{'a' : 'alpha'}, {'b' : 'beta'}]) | 'Convert dict to byte string' >> beam.Map(lambda x: (b'', json.dumps(x).encode('utf-8'))) | beam.Map(lambda x : x).with_output_types(typing.Tuple[bytes, bytes]) # | beam.Map(print) | WriteToKafka(producer_config = {'bootstrap.servers': brokers} , topic=kafka_topic) )
def build_write_pipeline(self, pipeline): _ = ( pipeline | 'Impulse' >> beam.Impulse() | 'Generate' >> beam.FlatMap(lambda x: range(1000)) # pylint: disable=range-builtin-not-iterating | 'Reshuffle' >> beam.Reshuffle() | 'MakeKV' >> beam.Map(lambda x: (b'', str(x).encode())).with_output_types( typing.Tuple[bytes, bytes]) | 'WriteToKafka' >> WriteToKafka( producer_config={'bootstrap.servers': self.bootstrap_servers}, topic=self.topic, expansion_service=self.expansion_service))