def run(self): consumer = KafkaConsumer(bootstrap_servers='localhost:9092', auto_offset_reset='earliest') consumer.subscribe(['my-topic']) for message in consumer: print (message)
def main(): # To consume latest messages and auto-commit offsets consumer = KafkaConsumer( REPLICA['TOPIC'], group_id=REPLICA['GROUP_ID'], metadata_broker_list=REPLICA['BROKER']) # bootstrap_servers=['localhost:9092']) for message in consumer: # message value and key are raw bytes -- decode if necessary! # e.g., for unicode: `message.value.decode('utf-8')` print ("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition, message.offset, message.key, message.value)) # consume earliest available messages, dont commit offsets KafkaConsumer(auto_offset_reset='earliest', enable_auto_commit=False) # consume json messages KafkaConsumer(value_deserializer=lambda m: json.loads(m.decode('ascii'))) # consume msgpack # KafkaConsumer(value_deserializer=msgpack.unpackb) # StopIteration if no message after 1sec KafkaConsumer(consumer_timeout_ms=1000) # Subscribe to a regex topic pattern consumer = KafkaConsumer() consumer.subscribe(pattern='^awesome.*')
class KafkaConsumerSpout(Spout): def __init__(self): super(KafkaConsumerSpout, self).__init__(script=__file__) #Consumer for 'badtopic' kafka topic #Server localhost port 9092 -- Can have multiple clusters for same topic different port def initialize(self, conf, context): self.consumer = KafkaConsumer(bootstrap_servers='localhost:9092',auto_offset_reset='earliest') self.db = pymongo.MongoClient() self.topic='badtopic' self.consumer.subscribe([self.topic]) #The return of this spout tuple['sentence'] = tweet @classmethod def declareOutputFields(cls): return ['sentence','user'] #Each tweet added to 'badtopic' will be a Tuple #For each tuple data is saved at MONGODB DB = BOARD collection = bad def nextTuple(self): for message in self.consumer: algo = message.value if(len(algo) >4): user = algo[:1] if user.isdigit(): aux = 'BOARD'+user algo = algo[2:len(algo)] if(algo[0] == ' '): algo=algo[1:len(algo)] self.db[aux].bad.insert_one({'tweet':algo}) algo=algo.encode('utf-8','ignore') storm.emit([algo,user])
def consume_messages(topics): # stop iterations after 20 mins consumer = KafkaConsumer(bootstrap_servers=[port]) consumer.subscribe(topics) count = 0 print port for message in consumer: # check for stopping input signal.signal(signal.SIGINT , signal_handler) incoming_message = json.loads(message.value,object_hook=_tostring) incoming_topic = message.topic # round trip for consistent values # trip_dist = round(incoming_message["trip_distance"][str(0)],2) count = count + 1 print "--------------" print incoming_message print "--------------" new_entry = {"pick_location": { "lat": float(incoming_message["pickup_latitude"]), "lon": float(incoming_message["pickup_longitude"]) }, "drop_location": { "lat": float(incoming_message["dropoff_latitude"]), "lon": float(incoming_message["dropoff_longitude"]) }, "predicted_timedelta": float(incoming_message["prediction_mins"]), "@hour": incoming_message["pick_up_hour"] } print new_entry es.index(index=incoming_topic, doc_type=incoming_topic[:-1], id=str(count), body =new_entry)
def run(self): consumer = KafkaConsumer(bootstrap_servers='172.16.218.128:10021') consumer.subscribe(['test']) for msg in consumer: msg = next(consumer) print msg
class KafkaReader(): def __init__(self, topic): # To consume latest messages and auto-commit offsets self.consumer = KafkaConsumer(group_id='my-group', bootstrap_servers='localhost:9092', enable_auto_commit=True) self.consumer.subscribe(topics=[topic]) self.buffer = [] def read_message(self): # print len(self.buffer) # if len(self.buffer) == 0: # self.get_batch() # if len(self.buffer) > 0: # return self.buffer.pop(0) # else: # print "11" # return None return self.consumer.next() def get_batch(self): for message in self.consumer: # message value and key are raw bytes -- decode if necessary! # e.g., for unicode: `message.value.decode('utf-8')` self.buffer.append(message) print self.buffer
def Consumer(): data = [] start_time=timer() name=multiprocessing.current_process().name # print(name,'Starting') while True: print(name,'Starting') consumer = KafkaConsumer('topic-weather-stations',group_id='consumer-weather-data',bootstrap_servers=['vm1:9092'],consumer_timeout_ms=15000,heartbeat_interval_ms=1000) consumer.zookeeper_connect='vm1:2181' try: for message in consumer: data.append(message.value) if len(data) >15000: insert_weather_stations(data,name) data=[] else: continue finally: print(name,'Exiting now') if len(data) >0: insert_weather_stations(data,name) data=[] sys.stdout.flush() consumer.close()
def run(self): consumer = KafkaConsumer(bootstrap_servers=IP, auto_offset_reset='earliest') consumer.subscribe(['archive_test']) for message in consumer: print message.value
def Consumer(): data = [] start_time=timer() name=multiprocessing.current_process().name while True: print (name,'Starting') consumer = KafkaConsumer('topic-weather-data',group_id='consumer-weather-data',bootstrap_servers=['vm1:9092'],consumer_timeout_ms=14000,heartbeat_interval_ms=1000) consumer.zookeeper_connect='vm1:2181' try: for message in consumer: data.append(message.value) if len(data) >5000: insert_raw_data(data,name) # collect_data(data) data=[] else: continue finally: print(name,'Exiting now',len(data)) if len(data) >0: try: insert_raw_data(data,name) # collect_data(data) data=[] except Exception,e : print('Error due to ',e) sys.stdout.flush() print (name,'Closing out',timer() - start_time) consumer.close()
def main(): parser = argparse.ArgumentParser(description='Feed Apache Samza metrics into Prometheus.') parser.add_argument('--brokers', metavar='BROKERS', type=str, required=True, help='list of comma-separated kafka brokers: host[:port],host[:port],...') parser.add_argument('--port', metavar='PORT', type=int, nargs='?', default=8080, help='port to serve metrics to Prometheus (default: 8080)') parser.add_argument('--topic', metavar='TOPIC', type=str, nargs='?',default='samza-metrics', help='name of topic to consume (default: "samza-metrics")') parser.add_argument('--from-beginning', action='store_const', const=True, help='consume topic from offset 0') parser.add_argument('--ttl', metavar='GAUGES_TTL', type=int, nargs='?', help='time in seconds after which a metric (or label set) is no longer reported when not updated (default: 60s)') args = parser.parse_args() brokers = args.brokers.split(',') consumer = KafkaConsumer(args.topic, group_id=KAFKA_GROUP_ID, bootstrap_servers=brokers) start_http_server(args.port) set_gauges_ttl(args.ttl) if args.from_beginning: consumer.set_topic_partitions((args.topic, 0, 0)) # FIXME: beginning may not be offset 0 start_ttl_watchdog_thread() try: consume_topic(consumer, args.brokers) except KeyboardInterrupt: pass # FIXME : should we close consumer ? print('Shutting down')
def generator(): keep_alive_in_a_row = 0 messages_read = 0 # init batch messages_read_in_batch = 0 current_batch = {partition: [] for partition in partitions} batch_start_time = time.time() with kafka_pool.kafka_client() as client: consumer = KafkaConsumer(topics, kafka_client=client, auto_commit_enable=False, consumer_timeout_ms=200) while True: try: message = consumer.next() # if we read the message - reset keep alive counter keep_alive_in_a_row = 0 # put message to batch messages_read += 1 messages_read_in_batch += 1 current_batch[message.partition].append(message.value.decode('utf-8')) except ConsumerTimeout: pass # check if it's time to send the batch time_since_batch_start = time.time() - batch_start_time latest_offsets = consumer.offsets("fetch") if time_since_batch_start >= opts['batch_flush_timeout'] != 0 or \ messages_read_in_batch >= opts['batch_limit']: yield from process_batch(latest_offsets, current_batch) # if we hit keep alive count limit - close the stream if messages_read_in_batch == 0: if keep_alive_in_a_row >= opts['batch_keep_alive_limit'] != -1: break keep_alive_in_a_row += 1 # init new batch messages_read_in_batch = 0 current_batch = {partition: [] for partition in partitions} batch_start_time = time.time() yield BATCH_SEPARATOR # check if we reached the stream timeout or message count limit time_since_start = time.time() - start if time_since_start >= opts['stream_timeout'] > 0 or 0 < opts['stream_limit'] <= messages_read: if messages_read_in_batch > 0: yield from process_batch(latest_offsets, current_batch) break
class ChangeFeedPillowTest(SimpleTestCase): # note: these tests require a valid kafka setup running def setUp(self): self._fake_couch = FakeCouchDb() # use a 'real' db name here so that we don't cause other # tests down the line to fail. # Specifically KafkaChangeFeedTest.test_multiple_topics_with_partial_checkpoint self._fake_couch.dbname = 'test_commcarehq' with trap_extra_setup(KafkaUnavailableError): self.consumer = KafkaConsumer( topics.CASE, group_id='test-consumer', bootstrap_servers=[settings.KAFKA_URL], consumer_timeout_ms=100, ) self.pillow = get_change_feed_pillow_for_db('fake-changefeed-pillow-id', self._fake_couch) def test_process_change(self): document = { 'doc_type': 'CommCareCase', 'type': 'mother', 'domain': 'kafka-test-domain', } self.pillow.process_change(Change(id='test-id', sequence_id='3', document=document)) message = self.consumer.next() change_meta = change_meta_from_kafka_message(message.value) self.assertEqual(COUCH, change_meta.data_source_type) self.assertEqual(self._fake_couch.dbname, change_meta.data_source_name) self.assertEqual('test-id', change_meta.document_id) self.assertEqual(document['doc_type'], change_meta.document_type) self.assertEqual(document['type'], change_meta.document_subtype) self.assertEqual(document['domain'], change_meta.domain) self.assertEqual(False, change_meta.is_deletion) with self.assertRaises(ConsumerTimeout): self.consumer.next() def test_process_change_with_unicode_domain(self): document = { 'doc_type': 'CommCareCase', 'type': 'mother', 'domain': u'हिंदी', } self.pillow.process_change(Change(id='test-id', sequence_id='3', document=document)) message = self.consumer.next() change_meta = change_meta_from_kafka_message(message.value) self.assertEqual(document['domain'], change_meta.domain) def test_no_domain(self): document = { 'doc_type': 'CommCareCase', 'type': 'mother', 'domain': None, } self.pillow.process_change(Change(id='test-id', sequence_id='3', document=document)) message = self.consumer.next() change_meta = change_meta_from_kafka_message(message.value) self.assertEqual(document['domain'], change_meta.domain)
def step(self): # Connect to Cassandra cluster = Cluster(['192.168.3.2'], port= 9042) session = cluster.connect() # Link to kafka consumer = KafkaConsumer('qc-qualitative-persist', bootstrap_servers="192.168.3.5:9092") # Process observations for msg in consumer: split_msg = string.split(msg.value,"::") if(len(split_msg) == 9): session.execute( """ INSERT INTO observation.observations_qc_qualitative (feature, procedure, observableproperty, year, month, phenomenontimestart, qualifier, qualifiervalue, comment) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s) """, (split_msg[0], split_msg[1], split_msg[2], int(split_msg[3]), int(split_msg[4]), int(split_msg[5]), split_msg[6], split_msg[7], split_msg[8]) ) # Close link to kafka consumer.close() cluster.shutdown()
def check_kafka_events(): global loopCondition from kafka import KafkaConsumer, KafkaClient, SimpleProducer warehouse_addr = Conf.getWareHouseAddr() consumer = KafkaConsumer("%sResult"%wk.options.warehouse, bootstrap_servers=[warehouse_addr], group_id="cnlab", auto_commit_enable=True, auto_commit_interval_ms=30 * 1000, auto_offset_reset='smallest') while loopCondition: for message in consumer.fetch_messages(): print "topic=%s, partition=%s, offset=%s, key=%s " % (message.topic, message.partition, message.offset, message.key) task = cPickle.loads(message.value) if task.state == Task.TASK_FINISHED: print "taskId:%s,success!!!:%s"%(task.id,task.result) else: print "taskId:%s,failed!!!"%task.id consumer.task_done(message) last_data_time = time.time() if not loopCondition: break
def dump_data( cls, topic=None, timeout=None, poll_timeout=None, enable_auto_commit=False): # TODO: remove this hack # HACK log.debug("Wait 5s to allow kafka node to be ready") time.sleep(5) topic = topic or cls.TOPIC endpoints = list(get_kafka_endpoints()) log.debug("Connect to kafka as consumer - %s", endpoints) if not endpoints: raise RuntimeError("Kafka endpoints not defined") consumer = KafkaConsumer( topic, auto_offset_reset='earliest', enable_auto_commit=enable_auto_commit, value_deserializer=cls.SERIALIZER.loads, bootstrap_servers=endpoints, consumer_timeout_ms=timeout or -1, ) # TODO use native kafka-python poll if poll_timeout: while True: yield list(data.value for data in consumer) time.sleep(poll_timeout / 1000.0) else: for data in consumer: yield data.value consumer.close()
def test_process_change(self): consumer = KafkaConsumer( topics.CASE, group_id='test-consumer', bootstrap_servers=[settings.KAFKA_URL], consumer_timeout_ms=100, ) pillow = ChangeFeedPillow(self._fake_couch, kafka=get_kafka_client(), checkpoint=None) document = { 'doc_type': 'CommCareCase', 'type': 'mother', 'domain': 'kafka-test-domain', } pillow.process_change(Change(id='test-id', sequence_id='3', document=document)) message = consumer.next() change_meta = change_meta_from_kafka_message(message.value) self.assertEqual(COUCH, change_meta.data_source_type) self.assertEqual(self._fake_couch.dbname, change_meta.data_source_name) self.assertEqual('test-id', change_meta.document_id) self.assertEqual(document['doc_type'], change_meta.document_type) self.assertEqual(document['type'], change_meta.document_subtype) self.assertEqual(document['domain'], change_meta.domain) self.assertEqual(False, change_meta.is_deletion) with self.assertRaises(ConsumerTimeout): consumer.next()
def test_end_to_end(kafka_broker): connect_str = 'localhost:' + str(kafka_broker.port) producer = KafkaProducer(bootstrap_servers=connect_str, max_block_ms=10000, value_serializer=str.encode) consumer = KafkaConsumer(bootstrap_servers=connect_str, group_id=None, consumer_timeout_ms=10000, auto_offset_reset='earliest', value_deserializer=bytes.decode) topic = random_string(5) for i in range(1000): producer.send(topic, 'msg %d' % i) producer.flush() producer.close() consumer.subscribe([topic]) msgs = set() for i in range(1000): try: msgs.add(next(consumer).value) except StopIteration: break assert msgs == set(['msg %d' % i for i in range(1000)])
def run(self): consumer = KafkaConsumer(bootstrap_servers='localhost:9092') # consumer.unsubscribe() consumer.subscribe(['tfidf']) for message in consumer: yield (message)
def test_subscription_copy(self): consumer = KafkaConsumer('foo', api_version=(0, 10)) sub = consumer.subscription() assert sub is not consumer.subscription() assert sub == set(['foo']) sub.add('fizz') assert consumer.subscription() == set(['foo'])
def step(self): # Connect to Cassandra cluster = Cluster(['192.168.3.2'], port= 9042) session = cluster.connect() # Link to kafka consumer = KafkaConsumer('observation-persist', bootstrap_servers="192.168.3.5:9092") # Process observations for msg in consumer: split_msg = string.split(msg.value,"::") if(len(split_msg) == 16) : session.execute( """ INSERT INTO observation.observations_numeric (feature, procedure, observableproperty, year, month, phenomenontimestart, phenomenontimeend, value, quality, accuracy, status, processing, uncertml, comment, location, parameters) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) """, (split_msg[0],split_msg[1],split_msg[2],int(split_msg[3]),int(split_msg[4]),int(split_msg[5]),int(split_msg[6]), float(split_msg[7]),split_msg[8],float(split_msg[9]),split_msg[10],split_msg[11],split_msg[12], split_msg[13],split_msg[14],split_msg[15]) ) # Close link to kafka consumer.close() cluster.shutdown()
class KafkaPythonClient(PythonClient): def __init__(self,topic=topic_name, kafkaHost = kafka_host, zookeeperHost=zookeeper_host): self.config["topic"] = topic self.config["kafkaHost"] = kafkaHost self.config["zookeeperHost"] = zookeeperHost super(KafkaPythonClient, self).__init__() def createProducer(self, kafkaSync): self.config["kafkaSync"] = kafkaSync self.producer = KafkaProducer(bootstrap_servers=self.config["kafkaHost"]) def createConsumer(self): self.consumer = KafkaConsumer(bootstrap_servers=self.config["kafkaHost"], enable_auto_commit=True, auto_offset_reset='latest',consumer_timeout_ms=1000) self.consumer.subscribe([self.config["topic"]]) def produce(self, num_msg=20000): self.msgCount = num_msg for x in range (self.msgCount): self.prtProgress(x, 10000) result = self.producer.send(self.config["topic"], self.msg) if self.config["kafkaSync"] == True: # block for "synchronous" mode: try: result_metadata = result.get(timeout=10) except KafkaError: print "*** KAFKA ERROR ***" pass if (x >= 10000): sys.stdout.write('\n') def consume(self, num_msg): count = 0 for message in self.consumer: count += 1 self.prtProgress(count, 10000) sys.stdout.write('\n') if num_msg > 0: if count != num_msg: print "ERROR: KafkaPythonClient.consume: # of messages not as expected, read: {}, expected: {}".format(count, num_msg) return count def startProducer(self): pass def stopProducer(self): self.beforeFlushTimer(self.timeDict['producer']) if self.config["kafkaSync"] == False: self.producer.flush() def stopConsumer(self): pass def initCount(self): self.consume(0) # for p in self.consumer.partitions_for_topic(self.config['topic']): # tp = TopicPartition(self.config['topic'], p) # self.consumer.assign([tp]) # committed = self.consumer.committed(tp) # consumer.seek_to_end(tp) def finalize(self): pass
class ChangeFeedPillowTest(SimpleTestCase): # note: these tests require a valid kafka setup running def setUp(self): self._fake_couch = FakeCouchDb() self._fake_couch.dbname = 'test-couchdb' with trap_extra_setup(KafkaUnavailableError): self.consumer = KafkaConsumer( topics.CASE, group_id='test-consumer', bootstrap_servers=[settings.KAFKA_URL], consumer_timeout_ms=100, ) self.pillow = ChangeFeedPillow( 'fake-changefeed-pillow-id', self._fake_couch, kafka=get_kafka_client(), checkpoint=None ) def test_process_change(self): document = { 'doc_type': 'CommCareCase', 'type': 'mother', 'domain': 'kafka-test-domain', } self.pillow.process_change(Change(id='test-id', sequence_id='3', document=document)) message = self.consumer.next() change_meta = change_meta_from_kafka_message(message.value) self.assertEqual(COUCH, change_meta.data_source_type) self.assertEqual(self._fake_couch.dbname, change_meta.data_source_name) self.assertEqual('test-id', change_meta.document_id) self.assertEqual(document['doc_type'], change_meta.document_type) self.assertEqual(document['type'], change_meta.document_subtype) self.assertEqual(document['domain'], change_meta.domain) self.assertEqual(False, change_meta.is_deletion) with self.assertRaises(ConsumerTimeout): self.consumer.next() def test_process_change_with_unicode_domain(self): document = { 'doc_type': 'CommCareCase', 'type': 'mother', 'domain': u'हिंदी', } self.pillow.process_change(Change(id='test-id', sequence_id='3', document=document)) message = self.consumer.next() change_meta = change_meta_from_kafka_message(message.value) self.assertEqual(document['domain'], change_meta.domain) def test_no_domain(self): document = { 'doc_type': 'CommCareCase', 'type': 'mother', 'domain': None, } self.pillow.process_change(Change(id='test-id', sequence_id='3', document=document)) message = self.consumer.next() change_meta = change_meta_from_kafka_message(message.value) self.assertEqual(document['domain'], change_meta.domain)
def list_topics(): # b_host = 'localhost' # b_port = '9092' b_host = request.args.get('host') b_port = request.args.get('port') consumer = KafkaConsumer(bootstrap_servers=b_host+':'+b_port) # print consumer.topics() return render_template('broker/list_topics.html', topics=consumer.topics())
def start(self): kafka_brokers = '{0}:{1}'.format(self._server,self._port) consumer = KC(bootstrap_servers=[kafka_brokers],group_id=self._topic) partition = [TopicPartition(self._topic,int(self._id))] consumer.assign(partitions=partition) consumer.poll() return consumer
def run(self): consumer = KafkaConsumer(bootstrap_servers='localhost:9092', auto_offset_reset='latest') consumer.subscribe(['parking-test-summary']) while True: msg = consumer.poll(10) time.sleep(5) print msg
def func(topic, partition): i=0 consumer = KafkaConsumer(bootstrap_servers='104.154.53.184:6667', group_id='grp-5327', auto_offset_reset='earliest', consumer_timeout_ms = 10000) consumer.assign([TopicPartition(topic, partition)]) for msg in consumer: i=i+1 print(i)
def Consumer(): #consumer = KafkaConsumer(b"test", group_id=b"my_group", metadata_broker_list=["deepc04.acis.ufl.edu:9092"]) consumer = KafkaConsumer(bootstrap_servers='deepc04.acis.ufl.edu:9092', auto_offset_reset='earliest') consumer.subscribe(['test']) for message in consumer: # This will wait and print messages as they become available print(message)
def check_kafka_is_running(): # Verify we can connect to InfluxDB and DB with a name juniper exists consumer = KafkaConsumer(bootstrap_servers=get_external_ip()+':'+str(KAFKA_BROKER_PORT), auto_offset_reset='earliest') mytopic = consumer.topics() return 1
def run(self): print "consummer" consumer = KafkaConsumer(bootstrap_servers='kafka:9092', auto_offset_reset='earliest') print "consummer ... ok" consumer.subscribe(['my-topic']) for message in consumer: print (message)
def start_consumer(kafka_servers,kafka_topic): print(kafka_servers) consumer = KafkaConsumer(bootstrap_servers=kafka_servers) consumer.subscribe([kafka_topic]) schema = avro.schema.parse(open("DefaultEventRecord.avsc").read()) reader = avro.io.DatumReader(schema) for message in consumer: handle_event(message, reader)
""" 读取 kafka 的用户操作数据并打印 """ from kafka import KafkaConsumer topic = 'test-kafka' bootstrap_servers = ['lax0-obd-206.qtlcdn.net:19092] group_id = 'group7' consumer = KafkaConsumer( topic, # topic的名称 group_id=group_id, # 指定此消费者实例属于的组名,可以不指定 bootstrap_servers=bootstrap_servers, # 指定kafka服务器 auto_offset_reset='latest', # 'smallest': 'earliest', 'largest': 'latest' ) for msg in consumer: print(msg.value.decode('utf-8').encode('utf-8').decode('unicode_escape'))
from kafka import KafkaConsumer from kafka import TopicPartition import numpy as np import base64 import bz2 from flask_socketio import SocketIO, Namespace, emit socketio = SocketIO(app) socketio.init_app(app, cors_allowed_origins="*") # from flask.request import namespace consumer = KafkaConsumer(group_id='group1', bootstrap_servers=['192.168.1.185:9092'], fetch_max_bytes=700000) consumer.assign([TopicPartition(topic="test17", partition=0)]) def print_message(): for msg in consumer: if msg != None: # print(msg.value) img = msg.value b64 = bz2.decompress(img) img = base64.b64decode(b64) img = np.asarray(bytearray(img), dtype='uint8').tolist() print(img) emit('data', img)
from kafka import KafkaConsumer import json consumer=KafkaConsumer('ilya.kobelev', bootstrap_servers='localhost:9092', value_deserializer=lambda m: json.loads(m.decode('ascii'))) for msg in consumer: print(msg.value)
def main(): consumer = KafkaConsumer('topic_test_cluster', bootstrap_servers=['master:9092']) print consumer.partitions_for_topic('topic_test_cluster') print consumer.topics() print consumer.subscription() print consumer.assignment() print consumer.beginning_offsets(consumer.assignment()) # 读取partition为2、偏移量从5开始的数据 consumer.seek(TopicPartition(topic=u'topic_test_cluster', partition=2), 5) for msg in consumer: print('%s:%d:%d: key=%s value=%s' % (msg.topic, msg.partition, msg.offset, msg.key, msg.value))
from kafka import KafkaConsumer import configparser if __name__ == '__main__': # read config conf = configparser.ConfigParser() conf.read('../config.cfg') kafka_topic = conf.get('Kafka', 'topic.in') kafka_srv = conf.get('Kafka', 'bootstrap_servers') # create and run simple consumer consumer = KafkaConsumer(kafka_topic, bootstrap_servers=kafka_srv) count = 0 for msg in consumer: count += 1 print(count)
#!/usr/bin/python import socket import dpkt from dpkt.ip import IP from dpkt.ethernet import Ethernet from dpkt.arp import ARP from kafka import KafkaConsumer def print_pkt(pkt): eth = dpkt.ethernet.Ethernet(pkt) if eth.type != dpkt.ethernet.ETH_TYPE_IP: print 'Non IP Packet type not supported' else: ip = eth.data do_not_fragment = bool(dpkt.ip.IP_DF) more_fragments = bool(dpkt.ip.IP_MF) fragment_offset = ip.off & dpkt.ip.IP_OFFMASK print 'IP: %s -> %s (len=%d ttl=%d DF=%d MF=%d offset=%d)\n' % \ (socket.inet_ntoa(ip.src), socket.inet_ntoa(ip.dst), ip.len, ip.ttl, do_not_fragment, more_fragments, fragment_offset) consumer = KafkaConsumer('pcap_test') for msg in consumer: print msg.key print_pkt(msg.value)
class Master: """Full implementation of Master class""" def __init__(self): self.spiderEnrollChannel = KafkaConsumer( 'spiderEnroll', bootstrap_servers=['18.144.51.15:9092'], group_id='unique') self.urlChannel = KafkaConsumer( 'newUrl', bootstrap_servers=['18.144.51.15:9092'], group_id='unique') self.urls = [] self.spiders = [] # self.lastTimeSend = def loadConfig(self): self.domainPriority = [] # a list of domain ordered by priority self.crawlSpeed = 3 # per second for each spider def checkUrl(self): if len(self.urls) > 0: return else: print('wait for url') for m in self.urlChannel: self.urls.append(m.value.decode('utf-8')) # print(m.value.decode('utf-8')) print('load url: ' + m.value.decode('utf-8')) break def checkSpider(self): if len(self.spiders) > 0: return else: print('wait for spider') for m in self.spiderEnrollChannel: self.spiders.append(m.value.decode('utf-8')) # print(m.value.decode('utf-8')) print('load spider: ' + m.value.decode('utf-8')) break def fetchAllSpiders(self): print("fetch all spiders") msg = self.spiderEnrollChannel.poll() for aKey in msg.keys(): for localMessage in msg[aKey]: print(localMessage.value.decode('utf-8')) self.spiders.append(localMessage.value.decode('utf-8')) print('done') print(self.spiders) def fetchAllUrls(self): print('fetch all urls') msg = self.urlChannel.poll() for aKey in msg.keys(): for localMessage in msg[aKey]: print(localMessage.value.decode('utf-8')) self.urls.append(localMessage.value.decode('utf-8')) print('done') print(self.urls) def sendUrl(self, url): spiderIndex = randint(0, len(spiders)) nextSpider = self.spiders[spiderIndex] print('send url: ' + url + " to " + nextSpider) kafkaHost = '18.144.51.15:9092' p = KafkaProducer(bootstrap_servers=[kafkaHost]) future = p.send(nextSpider, url.encode('ascii')) try: record_metadata = future.get(timeout=10) except KafkaError: # Decide what to do if produce request failed... log.exception() print("send error") # Successful result returns assigned partition and offset # print (record_metadata.topic) # print (record_metadata.partition) # print (record_metadata.offset) def run(self): self.loadConfig() while True: # load config or predefined here # url checks self.checkUrl() # spider checks self.checkSpider() # fetch all spiders self.fetchAllSpiders() # fetch all urls self.fetchAllUrls() # fetch and delete for url in self.urls: self.sendUrl(url)
def parse_message(message, ratings_file): """Parse a Kafka `message` and save rating to `ratings_file`.""" m = re.search( r'^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}),(\d+),GET /rate/(.*?\d{4})=(\d{1})$', message.value) if m: time, userid, movieid, rating = m.groups() ratings_file.write(f"{time},{userid},{movieid},{rating}\n") if __name__ == "__main__": consumer = KafkaConsumer(bootstrap_servers=['localhost:9092'], value_deserializer=lambda x: x.decode('utf-8'), auto_offset_reset='earliest', group_id='group3', enable_auto_commit=True, auto_commit_interval_ms=1000) topic_part = TopicPartition('movielog3', 0) consumer.assign([topic_part]) begin_datetime = datetime.now() - timedelta(hours=0.25) end_datetime = datetime.now() start_offset = consumer.offsets_for_times( {topic_part: begin_datetime.timestamp() * 1000}) consumer.seek(topic_part, start_offset[topic_part].offset) end_offset = consumer.end_offsets([topic_part])[topic_part]
from kafka import KafkaConsumer from json import loads import json consumer = KafkaConsumer('run_application_topic', bootstrap_servers=['localhost:9092'], api_version=(0, 10)) for message in consumer: print(message.value)
if i: print("Updated {0} customers".format(i)) if __name__ == "__main__": brokers = get_brokers() producer = KafkaProducer( bootstrap_servers=brokers, value_serializer=lambda m: json.dumps(m).encode("utf-8"), ) customer_consumer = KafkaConsumer( "customers", bootstrap_servers=brokers, value_deserializer=lambda m: json.loads(m.decode("utf-8")), auto_offset_reset="earliest", enable_auto_commit=True, ) invoice_consumer = KafkaConsumer( "invoices", bootstrap_servers=brokers, value_deserializer=lambda m: json.loads(m.decode("utf-8")), auto_offset_reset="earliest", enable_auto_commit=True, ) # Resync the entire customer topic into memory customer_consumer.topics() customer_consumer.seek_to_beginning()
from kafka import KafkaConsumer consumer = KafkaConsumer('world', group_id='consumer-20171017', bootstrap_servers=['127.0.0.1:9092']) for msg in consumer: recv = "%s:%d:%d:value=%s" % (msg.topic, msg.partition, msg.offset, msg.value) print(recv)
'reconnect_backoff_ms': 50, 'reconnect_backoff_max_ms': 1000, 'max_in_flight_requests_per_connection': 5, 'auto_offset_reset': 'earliest', 'enable_auto_commit': True, 'auto_commit_interval_ms': 3000, 'metadata_max_age_ms': 5 * 60 * 1000, 'heartbeat_interval_ms': 3000, 'session_timeout_ms': 30000, 'max_poll_records': 500 } #必须项,目前所有虫洞的server地址都一样 DEFAULT_CONFIG[ 'bootstrap_servers'] = 'cdmqszentry01.data.mig:10005,cdmqszentry02.data.mig:10069' #生产id,cdmq页面申请的消费者 DEFAULT_CONFIG['client_id'] = cid DEFAULT_CONFIG['group_id'] = cid consumer = KafkaConsumer(topic, **DEFAULT_CONFIG) #consumer=KafkaConsumer(**DEFAULT_CONFIG) #partition = TopicPartition(topic,72) #consumer.assign([partition]) #consumer.seek(partition,0) print("begin to consume data") #其他支持的参数可以参见官方文档https://kafka-python.readthedocs.io/en/master/apidoc/KafkaConsumer.html for msg in consumer: print(msg.partition) #print('topic:{} partition:{} value:{}'.format(msg.topic,msg.partition,msg.value)) #time.sleep(5) except Exception as e: print(str(e))
temp = list() temp.append(label) temp.append(headline) d.append(temp) return d if __name__ == "__main__": sc = SparkContext() sqlContext = SQLContext(sc) consumer = KafkaConsumer('srikar', bootstrap_servers=['localhost:9092'], auto_offset_reset='earliest', enable_auto_commit=True, group_id='test', api_version=(0, 10), consumer_timeout_ms=1000, value_deserializer=lambda x: x.decode('utf-8')) data = [] for i in consumer: tmp = list() tmp.append(i.value.split("||")[0]) tmp.append(i.value.split("||")[1]) data.append(tmp) print(len(data)) df = sqlContext.createDataFrame(data, schema=["category", "text"]) # regular expression tokenizer
#!/usr/bin/env python3 import sys from kafka import KafkaConsumer, TopicPartition server = 'localhost:9092' topic = 'upload' group_id = 'group1' print('Connecting to Kafka') consumer = KafkaConsumer(group_id=group_id, bootstrap_servers=[server]) print('Connected to Kafka') tp = TopicPartition(topic=topic, partition=0) consumer.assign([tp]) consumer.seek(tp, 0) try: for message in consumer: print("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition, message.offset, message.key, message.value)) except KeyboardInterrupt: sys.exit()
java=java, extractor=extractor) assert isinstance(vectors_as_dict, dict) for t in ['statements_by_count', 'calls_by_count', 'literals_by_count']: m[t] = vectors_as_dict[t] # YUK... side effect m return vectors_as_dict # most callers wont use it... but hey why not... if __name__ == "__main__": group = args.group if len(group) < 1: group = None consumer = KafkaConsumer(args.consume_from, group_id=group, auto_offset_reset=args.start, enable_auto_commit=False, max_poll_interval_ms=60000000, bootstrap_servers=args.bootstrap, value_deserializer=json_value_deserializer()) producer = KafkaProducer(value_serializer=json_value_serializer(), bootstrap_servers=args.bootstrap) setup_signals(cleanup) # 1. process the analysis results topic to get vectors for each javascript artefact which has been processed by 1) kafkaspider AND 2) etl_make_fv save_pidfile('pid.eval.controls', root='.') print( "Creating required index in vet_against_control collection... please wait" ) db.vet_against_control.create_index([('origin_url', pymongo.ASCENDING), ('control_url', pymongo.ASCENDING)], unique=True) print("Index creation complete.")
if __name__ == '__main__': """ Client Setup with python3 ./client.py <host> <port> :return: None """ # Check arguments, host and port args = parse_arguments() host = args.host port = args.port # Setup Kafka consumer consumer = KafkaConsumer(consumer_timeout_ms=500) # Start client process with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as so: try: so.connect((host, port)) print(f'{get_message(so)}', end='', flush=True) while True: ready = select([sys.stdin], [], [], 0.5) if ready[0]: command = input() + '\n' so.sendall(bytes(command, 'utf-8')) raw_response = get_message(so) response, exitOrNot = response_handler(raw_response) if exitOrNot: so.close()
def __init__(self): self.consumer = KafkaConsumer('my-topic', group_id='my-group', bootstrap_servers=['kafka:9092'], ) self.counter = 0
def run(args): try: props = {} for prop in args.consumer_config: k, v = prop.split('=') try: v = int(v) except ValueError: pass if v == 'None': v = None props[k] = v if args.brokers: brokers = start_brokers(args.brokers) props['bootstrap_servers'] = [ '{0}:{1}'.format(broker.host, broker.port) for broker in brokers ] print('---> bootstrap_servers={0}'.format( props['bootstrap_servers'])) print() print('-> Producing records') record = bytes(bytearray(args.record_size)) producer = KafkaProducer( compression_type=args.fixture_compression, **props) for i in xrange(args.num_records): producer.send(topic=args.topic, value=record) producer.flush() producer.close() print('-> OK!') print() print('Initializing Consumer...') props['auto_offset_reset'] = 'earliest' if 'consumer_timeout_ms' not in props: props['consumer_timeout_ms'] = 10000 props['metrics_sample_window_ms'] = args.stats_interval * 1000 for k, v in props.items(): print('---> {0}={1}'.format(k, v)) consumer = KafkaConsumer(args.topic, **props) print('---> group_id={0}'.format(consumer.config['group_id'])) print('---> report stats every {0} secs'.format( args.stats_interval)) print('---> raw metrics? {0}'.format(args.raw_metrics)) timer_stop = threading.Event() timer = StatsReporter(args.stats_interval, consumer, event=timer_stop, raw_metrics=args.raw_metrics) timer.start() print('-> OK!') print() records = 0 for msg in consumer: records += 1 if records >= args.num_records: break print('Consumed {0} records'.format(records)) timer_stop.set() except Exception: exc_info = sys.exc_info() traceback.print_exception(*exc_info) sys.exit(1)
print('Producing') producer = KafkaProducer(bootstrap_servers='jeainnova2d1:9093', security_protocol='SSL', ssl_cafile='/etc/ssl/certs/jearootca.cer', ssl_certfile='/etc/ssl/certs/testkafka.pem', ssl_keyfile='/etc/ssl/certs/testkafka.pem', ssl_password='******', value_serializer=lambda x: json.dumps(x).encode('utf-8')) producer.send('iot-readings', {'foo999aabbb': 'bar'}) producer.send('iot-readings', {'foo222aabbb': 'bar'}) producer.flush() print('Consuming') consumer = KafkaConsumer('iot-readings', bootstrap_servers='jeainnova2d1:9093', client_id = 'test_client', group_id = "pi-group", security_protocol='SSL', ssl_cafile='/etc/ssl/certs/jearootca.cer', ssl_certfile='/etc/ssl/certs/testkafka.pem', ssl_keyfile='/etc/ssl/certs/testkafka.pem', ssl_password='******', auto_offset_reset='earliest', value_deserializer=lambda x: json.loads(x.decode('utf-8'))) for message in consumer: message = message.value print(f'message: {message}')
from kafka import KafkaConsumer KAFKA_SERVER = "localhost:9092" KAFKA_TOPIC = "test-otis" print("starting program") consumer = KafkaConsumer(KAFKA_TOPIC, bootstrap_servers=KAFKA_SERVER) print("consumer created") for msg in consumer: print(msg) print("messages printed")
class ETKWorker(object): def __init__(self, master_config, em_paths, logger, worker_id, project_name, kafka_input_args=None, kafka_output_args=None): self.logger = logger self.worker_id = worker_id self.check_interval = 1000 self.exit_sign = False try: kg_schema = KGSchema(master_config) self.etk_ins = ETK(kg_schema, em_paths, logger=logger) except Exception as e: logger.exception('ETK initialization failed') raise e # kafka input self.kafka_input_server = config['input_server'] self.kafka_input_session_timeout = config['input_session_timeout'] self.kafka_input_group_id = config['input_group_id'] self.kafka_input_topic = '{project_name}_in'.format( project_name=project_name) self.kafka_input_args = dict( ) if kafka_input_args is None else kafka_input_args self.kafka_consumer = KafkaConsumer( bootstrap_servers=self.kafka_input_server, group_id=self.kafka_input_group_id, consumer_timeout_ms=self.check_interval, value_deserializer=lambda v: json.loads(v.decode('utf-8')), **self.kafka_input_args) self.kafka_consumer.subscribe([self.kafka_input_topic]) # kafka output self.kafka_output_server = config['output_server'] self.kafka_output_topic = '{project_name}_out'.format( project_name=project_name) self.kafka_output_args = dict( ) if kafka_output_args is None else kafka_output_args self.kafka_producer = KafkaProducer( bootstrap_servers=self.kafka_output_server, value_serializer=lambda v: json.dumps(v).encode('utf-8'), **self.kafka_output_args) self.timeout_count = self.kafka_input_session_timeout / self.check_interval self.current_timeout_count = 0 def process(self): # prev_doc_sent_time = None while not self.exit_sign: # high level api handles batching # will exit once timeout try: for msg in self.kafka_consumer: # force to commit, block till getting response self.kafka_consumer.commit() # get message, clear timeout count self.current_timeout_count = 0 cdr = msg.value # TODO better way to add execution profile # cdr['@execution_profile'] = {'@worker_id': self.worker_id} # doc_arrived_time = time.time() # cdr['@execution_profile']['@doc_arrived_time'] = \ # datetime.utcfromtimestamp(doc_arrived_time).isoformat() # cdr['@execution_profile']['@doc_wait_time'] = \ # 0.0 if not prev_doc_sent_time \ # else float(doc_arrived_time - prev_doc_sent_time) # cdr['@execution_profile']['@doc_length'] = len(json.dumps(cdr)) if 'doc_id' not in cdr or len(cdr['doc_id']) == 0: self.logger.error('invalid cdr: unknown doc_id') continue self.logger.info('processing %s' % cdr['doc_id']) try: # start_run_core_time = time.time() # run etk module doc = self.etk_ins.create_document( cdr, url=cdr['url'] if 'url' in cdr else '', doc_id=cdr['doc_id']) # process_ems returns a list of Documents results = self.etk_ins.process_ems(doc) for result in results: cdr_result = result.cdr_document # TODO remove removing of the provenances fields, once it is working properly cdr_result.pop('provenances', None) if 'type' in cdr_result: cdr_result['type_'] = cdr_result['type'] cdr_result.pop('type') # indexing # TODO indexed_cdr = index_knowledge_graph_fields( cdr_result) if not indexed_cdr: logger.error('indexing in sandpaper failed') continue # cdr = indexed_cdr # cdr['@execution_profile']['@run_core_time'] = \ # float(time.time() - start_run_core_time) # doc_sent_time = time.time() # cdr['@execution_profile']['@doc_sent_time'] = \ # datetime.utcfromtimestamp(doc_sent_time).isoformat() # prev_doc_sent_time = doc_sent_time # cdr['@execution_profile']['@doc_processed_time'] = \ # float(doc_sent_time - doc_arrived_time) # output result r = self.kafka_producer.send( self.kafka_output_topic, indexed_cdr) r.get(timeout=60) # wait till sent self.logger.info('{} done'.format( indexed_cdr['doc_id'])) except Exception as e: self.logger.exception('failed at %s' % cdr['doc_id']) except ValueError as e: # I/O operation on closed epoll fd self.logger.info('consumer closed') self.exit_sign = True except StopIteration as e: # timeout self.current_timeout_count += 1 if self.current_timeout_count >= self.timeout_count: self.exit_sign = True except CommitFailedError as e: self.exit_sign = True # https://github.com/dpkp/kafka-python/blob/535d8f6a85969c4e07de0bc81e14513c677995be/kafka/errors.py#L65 # if this worker is dead, restart and reattach to the group g_restart_worker = True def __del__(self): self.logger.info('ETK worker {} is exiting...'.format(self.worker_id)) try: self.kafka_consumer.close() except: pass try: self.kafka_producer.close() except: pass
import csv import threading # from models.esb_detection.vae import detect as esb_vae from models.esb_detection.seas_decomp import detect as esb_seas from models.esb_detection.heuristic import detect as esb_heur import trace_detector # from models.kpi_detection.vae import detect as kpi_detection from server_config import SERVER_CONFIGURATION # Three topics are available: platform-index, business-index, trace. # Subscribe at least one of them. AVAILABLE_TOPICS = set(['platform-index', 'business-index', 'trace']) CONSUMER = KafkaConsumer('platform-index', 'business-index', 'trace', bootstrap_servers=[SERVER_CONFIGURATION["KAFKA_QUEUE"], ], auto_offset_reset='latest', enable_auto_commit=False, security_protocol='PLAINTEXT') def print_sep(): print('*'*60) class PlatformIndex(): # pylint: disable=too-few-public-methods '''Structure for platform indices''' def __init__(self, data): self.item_id = [data['itemid']] self.name = [data['name']] self.bomc_id = [data['bomc_id']] self.timestamp = [data['timestamp']]
def offset_manage_manually_consume(): """ 手动设置offset :return: """ consumer = KafkaConsumer(TOPIC, bootstrap_servers=BOOTSTRAP_SERVERS) print(consumer.partitions_for_topic(TOPIC)) # 获取topic的分区信息 print(consumer.topics()) # 获取topic列表 当前kafka server有哪些topic print(consumer.subscription()) # 获取当前消费者订阅的topic print(consumer.assignment()) # 获取当前消费者topic、分区信息 print(consumer.beginning_offsets(consumer.assignment())) # 获取当前消费者可消费的偏移量 print(consumer.assignment()) # 获取当前消费者可消费的偏移量 consumer.seek(TopicPartition(topic=u'%s' % TOPIC, partition=0), 235000) # 重置偏移量,从第235000个偏移量消费 for message in consumer: print("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition, message.offset, message.key, message.value))
from kafka import KafkaConsumer # If the broker installed on a separate host machine, # use the machine's IP address brokers = ['10.4.17.190:9092'] #brokers = ['135.222.154.160:9092'] #brokers = ['10.4.17.190:9092'] # If the broker installed on the same host machine, use localhost or 0.0.0.0 #brokers = ['0.0.0.0:9092'] #connect to Kafka server and pass the topic we want to consume topic = 'fitelab_cam_9104' consumer = KafkaConsumer(topic, group_id='myview', bootstrap_servers=brokers) print("Start to listen on Kafka broker") print("It may take some time to receive the first message") print("") for message in consumer: # message value and key are raw bytes -- decode if necessary! # e.g., for unicode: `message.value.decode('utf-8')` print("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition, message.offset, message.key, message.value))
from kafka import KafkaConsumer from kafka import KafkaProducer add_producer = KafkaProducer(bootstrap_servers='localhost:9092') # add_consumer = KafkaConsumer('add_res', bootstrap_servers='localhost:9092',auto_offset_reset='earliest',group_id="test-consumer-group",enable_auto_commit=True) add_consumer = KafkaConsumer('add_res', bootstrap_servers='localhost:9092', auto_offset_reset='earliest', group_id="test-consumer-group", enable_auto_commit=True) while 1: num1 = input() num2 = input() producer_input_str = num1 + "," + num2 producer_input = bytes(producer_input_str, "UTF-8") add_producer.send('add_nums', producer_input) for message in add_consumer: #print(message.value) sum = message.value.decode() msg, sum = sum.split('::') print(msg)
def message_hangup_recovery(): """ 消息挂起与恢复 :return: """ consumer = KafkaConsumer(TOPIC, bootstrap_servers=BOOTSTRAP_SERVERS) consumer.subscribe(topics=[TOPIC]) consumer.topics() consumer.pause(TopicPartition(topic=u'%s' % TOPIC, partition=0)) num = 0 while True: print(num) # pause执行后,consumer不能读取,直到调用resume后恢复 print(consumer.paused()) # 获取当前挂起的消费者 msg = consumer.poll(timeout_ms=5) print(msg) time.sleep(2) num = num + 1 if num == 10: print("resume...") consumer.resume(TopicPartition(topic=u'test', partition=0)) print("resume......")
RST_TOPIC, ) from ret.utilities.trx_updater import trx_updater producer = KafkaProducer( bootstrap_servers=[KAFKA_BROKER_URL], value_serializer=lambda msg: json.dumps(msg).encode( 'utf-8'), # we serialize our data to json for efficent transfer ) logger.debug(f"MML_TOPIC {MML_TOPIC} RST_TOPIC {RST_TOPIC}") consumer = KafkaConsumer( RST_TOPIC, bootstrap_servers=[KAFKA_BROKER_URL], auto_offset_reset='latest', # where to start reading the messages at enable_auto_commit=True, #group_id='event-collector-group-2', # consumer group id value_deserializer=lambda m: json.loads(m.decode( 'utf-8')) # we deserialize our data from json ) def nbi_processor(time_=None, session_=None, trxs_=None): ''' Esta función recibe el query (trxs_) con todas las transacciones a ejecutar. Construye un mensaje al NBI con todas ellas. Espera el mensaje de respuesta y de acuerdo con lo recibido actualiza las transacciones en la BD (transactions y rets) ''' logger.debug(f"time_ {time_} ENV {ENV}")
#!/usr/bin/env python # encoding: utf-8 import socket from kafka import KafkaConsumer from kafka.errors import KafkaError import setting conf = setting.kafka_setting # context.check_hostname = True consumer = KafkaConsumer(bootstrap_servers=conf['bootstrap_servers'], group_id=conf['consumer_id'], api_version = (0,10)) print 'consumer start to consuming...' consumer.subscribe((conf['topic_name'], )) for message in consumer: print message.topic, message.offset, message.key, message.value, message.value, message.partition
from kafka import KafkaConsumer import face_recognition import PIL.Image import msgpack import pickle import cv2 import io rede = pickle.loads(open("encodings.pickle", "rb").read()) topic = "face-detected-test" BROKER_URI = '10.10.3.159:9092' count = 0 consumer = KafkaConsumer( topic, value_deserializer=lambda m: msgpack.unpackb(m, raw=False), bootstrap_servers=BROKER_URI) for msg in consumer: data = msg.value label = str(data['true_label']) img = data['face_detected'] i = PIL.Image.open(io.BytesIO(img)).convert("RGB") path = "database/"+label+".png" i.save(path) unknown_face = face_recognition.load_image_file(path) unknown_face_encodings = face_recognition.face_encodings(unknown_face) if len(unknown_face_encodings) > 0:
from flask import Flask from kafka import KafkaConsumer import mysql.connector from json import loads #app = Flask(__name__) #localhost:9092 to change to url where kafka runs on consumer = KafkaConsumer('Kafka_Topic', bootstrap_servers=['192.168.1.101:9092'], auto_offset_reset='earliest', enable_auto_commit=True, value_deserializer=lambda x: loads(x.decode('utf-8'))) print("t1") #mydb = mysql.connector.connect( # host="localhost", # user="******", # passwd="" #) #cursor = mydb.cursor() #cursor.execute("use insightdb"); #cursor.execute("select * from lens_master limit 10;"); #list_rows = cursor.fetchall(); #mydb.close() # collection = mydb.insightdb.kafkaKube #consumer = None while (consumer is not None):