def flatMap1(kv): flatlist = [KeyValuePair(0, -1), KeyValuePair(1, -1), KeyValuePair(2, -1)] val = kv.val try: sensorVals = [val['sensor1'], val['sensor2'], val['sensor3'], val['sensor4'], val['sensor5']] flatlist = [KeyValuePair(0, np.average(sensorVals)), KeyValuePair(1, np.std(sensorVals)), KeyValuePair(2, np.log(np.sum(sensorVals)))] except: pass return flatlist
def flatMapValues(self, func): ''' func is lambda or method that changes values to list of values to be flattened ''' # print('flatmapvalues') valfunc = (lambda kv: KeyValuePair(kv.key, func(kv.val))) self.stream = map(valfunc, self.stream) flattenvalues = ( lambda kv: [KeyValuePair(kv.key, val) for val in kv.val]) self.stream = chain.from_iterable(map(flattenvalues, self.stream)) return self
def kafkaconsumer(self): print("Kafka Consumer START... topic:", self.topic) self.consumer.subscribe(self.topic) for msg in self.consumer: # print original message # print ("%s:%d:%d: key=%s value=%s" % (msg.topic, msg.partition, msg.offset, msg.key, msg.value)) # convert message to KeyValuePair kv = KeyValuePair(serde(msg.key, self.keyserde), serde(msg.value, self.valserde)) kv.partition = msg.partition kv.offset = msg.offset kv.time = time.time() # write message to queue for topic self.builder.put_nowait(kv)
def mapValues(self, func): ''' func is lambda or method that changes values ''' valfunc = (lambda kv: KeyValuePair(kv.key, func(kv.val))) self.stream = map(valfunc, self.stream) return self
def mapValues(self, func): ''' func is lambda or method that changes values ''' materialized = self.materialized + '_mapped' self.kstores[materialized] = KStore(materialized) # populate table valfunc = (lambda kv: KeyValuePair(kv.key, func(kv.val))) self.stream = self.table(map(valfunc, self.stream), self.kstores[materialized]) self.materialized = materialized return self
def make_clicks(): producer = KafkaProducer(bootstrap_servers=config['bootstrap.servers']) print('make_clicks') while True: name = names[randint(0, 6)] producer.send(topics, key='0'.encode('utf-8'), value=name.encode('utf-8')) # print(name,'sent...') # sleep for random interval interval = randint(2, 15) * 0.5 # print('interval', interval) time.sleep(interval) if __name__ == '__main__': p = Process(target=make_clicks) p.start() # define builder for streams/table builder = KStreamBuilder(topics, config, keyserde='str', valserde='str') # initialize Kstream with builder views = KStream(builder.builder) views.map(lambda kv: KeyValuePair(kv.val, kv.val, time=kv.time)) anamolous_users = views.count(materialized='usercounts', kwindow=KWindow(60)).filter(lambda kv: kv.val >= 3) # change table back to stream and then send output to Kafka topic anamolous_users.toStream().peeker(peek).to('anomalous', config, keyserde='str', valserde='str') # Note that KStream can either be initialized with a KStreambuilder or a stream (generator) with a Kafka Producer
Created on Feb 22, 2019 @author: Barnwaldo ''' import numpy as np from core.kbuilder import KStreamBuilder, KStream from core.kutils import KeyValuePair config = {'bootstrap.servers': 'localhost:9092', 'group.id': 'barnwaldo', 'session.timeout.ms': 6000} # config = {'bootstrap.servers': '192.168.21.3:9092', 'group.id': 'barnwaldo', 'session.timeout.ms': 6000} topics = 'endpt00' func1 = (lambda kv: KeyValuePair(kv.key / 10, kv.val if kv.val == ">>> stop message <<<" \ else (kv.val['sensor1'] + kv.val['sensor2'] + kv.val['sensor3'] + kv.val['sensor4'] + kv.val['sensor5']) /5)) func2 = (lambda kv: KeyValuePair(kv.key, kv.val if kv.val == ">>> stop message <<<" \ else (kv.val['sensor1'] + kv.val['sensor2'] + kv.val['sensor3'] + kv.val['sensor4'] + kv.val['sensor5']) /5)) pred1 = (lambda kv: kv.key < 4) pred2 = (lambda kv: kv.key < 7) pred3 = (lambda kv: True) predicates = [pred1, pred2, pred3] def flatMapValues1(val): flatlist = [-1, -1, -1] try: sensorVals = [val['sensor1'], val['sensor2'], val['sensor3'], val['sensor4'], val['sensor5']]
Word count Kafka Streams example - migrated from Java to Python pytreams ''' from core.kbuilder import KStreamBuilder, KStream from core.kutils import KeyValuePair # config info for Kafka broker config = { 'bootstrap.servers': '192.168.21.3:9092', 'group.id': 'barnwaldo', 'session.timeout.ms': 6000 } topics = 'word' if __name__ == '__main__': # define builder for streams/table builder = KStreamBuilder(topics, config, keyserde='str', valserde='str') # initialize Kstream with builder textlines = KStream(builder.builder) # stream with splitting text, flattening and grouping - use Python lambdas in pystreams methods textlines.flatMapValues(lambda text: text.lower().split()).groupBy( lambda kv: KeyValuePair(kv.val, kv.val)) # change stream to table with count on key - materialize is name of db table wordcounts = textlines.count(materialized='wordcounter') # change table back to stream and then send output to Kafka topic wordcounts.toStream().to('counts', config, keyserde='str', valserde='str') # Note that KStream can either be initialized with a KStreambuilder or a stream (generator) with a Kafka Producer