def consume_topic(callback_url, consumer_group, topic): consumer = None try: consumer = SimpleConsumer(self.kafka, consumer_group, topic, auto_commit=False) messages_read = 0 # we can't read messages infinitely here as we have # a lot of topics/subscribers (much more than threadpool size) while messages_read < self.max_read_messages_per_cycle: # get one message and monitor the time start = monitoring.start_time_measure() message = consumer.get_message(block=False) ms_elapsed = monitoring.stop_time_measure(start) self.metrics['kafka_read'].add({'topic': topic}, ms_elapsed) # if we don't have messages for this topic/subscriber - quit and give chance to others if message is None: logging.info( 'No messages for topic: %s and callback: %s, quiting the thread', topic, callback_url) break try: event = json.loads( message.message.value.decode('utf-8')) response_status = self.forward_event( callback_url, event, topic) # if status is success - mark message as consumed by this subscriber if 200 <= response_status < 300: consumer.commit() else: logging.info( 'Received error response fro consumer: %s', response_status) except: logging.error( "Exception while sending event to consumer") logging.error(traceback.format_exc()) finally: messages_read += 1 return messages_read except UnknownTopicOrPartitionError: logging.error('Adding %s to skip list', topic) except: logging.exception('failed to create kafka client') finally: if consumer is not None: consumer.stop()
class HBaseServer(threading.Thread): """ HBase thread that will continuously read from Kafka queue """ def __init__(self, kafka_url, kafka_topic, hbase_url, hbase_thrift_port, hbase_table): threading.Thread.__init__(self) self.kafka = KafkaClient(kafka_url) self.cons = SimpleConsumer(self.kafka, None, kafka_topic) self.cons.seek(0, 2) self.hbase_connect = happybase.Connection(hbase_url, hbase_thrift_port) self.car_table = self.hbase_connect.table(hbase_table) self.server_on_flag = True self.m = None self.payload = None self.vin = None self.time = None self.data = None self.row_key = None self.count = 0 def run(self): while self.server_on_flag: self.m = self.cons.get_message(block=False) if (self.m is not None): self.payload = json.loads(self.m.message.value) self.vin = str(self.payload['vin']) self.time = str(self.payload['timestamp']) self.data = str(self.payload['data']) self.row_key = self.vin + self.time try: self.car_table.put(self.vin, {'user:mostrecent': self.time}) self.car_table.put(self.row_key, {'car:data': self.data}) self.count = self.count + 1 logger.info( 'HBase Server: key: %s, table: %s, car{data: %s}. Message number: %s', self.row_key, 'rvi', self.data, str(self.count)) except Exception as e: logger.info('%s,Data Push into HBase unsuccessful...', e) else: sleep(1 / 5) def shutdown(self): self.server_on_flag = False logger.info('HBase Server shutting down...')
class HBaseServer(threading.Thread): """ HBase thread that will continuously read from Kafka queue """ def __init__(self, kafka_url, kafka_topic, hbase_url, hbase_thrift_port, hbase_table): threading.Thread.__init__(self) self.kafka = KafkaClient(kafka_url) self.cons = SimpleConsumer(self.kafka, None, kafka_topic) self.cons.seek(0,2) self.hbase_connect = happybase.Connection(hbase_url,hbase_thrift_port) self.car_table = self.hbase_connect.table(hbase_table) self.server_on_flag = True self.m = None self.payload = None self.vin = None self.time = None self.data = None self.row_key = None self.count = 0 def run(self): while self.server_on_flag: self.m = self.cons.get_message(block=False) if (self.m is not None): self.payload = json.loads(self.m.message.value) self.vin = str(self.payload['vin']) self.time = str(self.payload['timestamp']) self.data = str(self.payload['data']) self.row_key = self.vin+self.time try: self.car_table.put(self.vin,{'user:mostrecent':self.time}) self.car_table.put(self.row_key,{'car:data':self.data}) self.count = self.count + 1 logger.info('HBase Server: key: %s, table: %s, car{data: %s}. Message number: %s', self.row_key, 'rvi', self.data, str(self.count)) except Exception as e: logger.info('%s,Data Push into HBase unsuccessful...', e) else: sleep(1/5) def shutdown(self): self.server_on_flag = False logger.info('HBase Server shutting down...')
class KafkaTopicQueue: def __init__(self, topic, host="localhost:9092"): self.topic = topic self.group = "group-for-%s"%(self.topic) self.kafka = SimpleClient(host) self.producer = SimpleProducer(self.kafka) self.consumer = SimpleConsumer(self.kafka, self.group, self.topic) def push(self, v): self.producer.send_messages(self.topic, v) def pop(self): item = self.consumer.get_message() return item.message.value if item else None
class KafkaTopicQueue: def __init__(self, topic, host="localhost:9092"): self.topic = topic self.group = "group-for-%s" % (self.topic) self.kafka = SimpleClient(host) self.producer = SimpleProducer(self.kafka) self.consumer = SimpleConsumer(self.kafka, self.group, self.topic) def push(self, v): self.producer.send_messages(self.topic, v) def pop(self): item = self.consumer.get_message() return item.message.value if item else None
def kafka_consumer(kafka_hosts, schema_host, schema_port, topic, consumer_group="python"): """ 消费kafka对应topic的记录, 非实时消费 :param kafka_hosts: :param schema_host: :param schema_port: :param topic: :param consumer_group: :return: """ # 获取topic最新schema topic_schema, topic_schema_id, schema_version = get_latest_schema_info( schema_host, schema_port, topic) # 消费kafka记录 client = KafkaClient(hosts=kafka_hosts) simple_consumer = SimpleConsumer(client, consumer_group, topic, auto_offset_reset="smallest") collect_logs = [] # 存放消息记录的partition,offset,value msg_exist = True while msg_exist: msg = simple_consumer.get_message(get_partition_info=True) # print "kafka log:", msg # 判断此次获取的记录是否为None,为None则停止消费 if msg is None: msg_exist = False else: msg_partition = msg[0] msg_offset = msg[1].offset msg_value = msg[1].message.value # 对单条记录解码 bytes_msg = io.BytesIO(msg_value[5:]) decode_msg = avro.io.BinaryDecoder(bytes_msg) recode_msg = avro.io.DatumReader( avro.schema.parse(topic_schema)).read(decode_msg) # 收集该log的partition,offset,value信息 msg_collect = [msg_partition, msg_offset, recode_msg] collect_logs.append(msg_collect) collect_logs.sort(key=lambda x: x[0]) # 按partition id排序 print "+++++++Topic: %s+++++++" % topic for index, log in enumerate(collect_logs): print index, log print "Successfully received." return collect_logs
class RVIConsumer(threading.Thread): def __init__(self, kafka_addr, topic, vin, web_url): threading.Thread.__init__(self) self.kafka = KafkaClient(kafka_addr) #kafka_addr self.cons = SimpleConsumer(self.kafka, None, topic) self.cons.seek(0,2) self.vin = vin self.web_url = web_url self.flag = True self.count = 0 self.sleep_count = 0 self.headers = {'Content-Type' : 'application/json'} def is_running(self): return self.flag def run(self): while self.flag: #cons = SimpleConsumer(kafka, None, 'rvi') m = self.cons.get_message(block=False) if (m is not None): payload = json.loads(m.message.value) if(payload['vin'] == self.vin): self.sleep_count = 0 payloadtoweb = json.dumps(m.message.value) r = requests.post(self.web_url, data=payloadtoweb, headers=self.headers) if (r.status_code is 200): print m.message.value + " sent successfully\n" else: print "%s is not available, status code:%d...shutting down now..."%(self.web_url,r.status_code) self.shutdown() else: if (self.sleep_count > 100000): print "No new data for %s... Timing out" % self.vin self.shutdown() time.sleep(1/5) self.sleep_count = self.sleep_count + 1 def shutdown(self): self.flag = False requests.post(self.web_url, data=json.dumps({'vin':self.vin, 'data':'EOM'}), headers=self.headers) print "%s consumer thread shutting down" % self.vin
def serve_user(user): consumer = SimpleConsumer(CLIENT, 'testing', 'user{}_sess{}'.format(user,user)) msg = None msg = consumer.get_message() RECEIVE_TIME = time.time() color='yellow' S_R_LAG = RECEIVE_TIME-SEND_TIME if SEND_TIME else None if msg: print("received message: {} delay: {}".format(msg.message.value.decode(), S_R_LAG)) if msg.message.value.decode() =='True': color='green' else: color='red' return render_template('keylog.html', bgcolor=color)
def main(): """ Usage: dump_to_mongodb dump <topic> --host=<host> [--consumer=<consumer>] """ args = docopt(main.__doc__) host = args["--host"] print "=> Connecting to {0}...".format(host) logger.info("=> Connecting to {0}...".format(host)) kafka = KafkaClient(host) print "=> Connected." logger.info("=> Connected.") if args["dump"]: topic = args["<topic>"] consumer_id = args["--consumer"] or "dump_to_mongodb" consumer = SimpleConsumer( kafka, consumer_id, topic, buffer_size=1024 * 200, # 100kb fetch_size_bytes=1024 * 200, # 100kb max_buffer_size=None # eliminate big message errors ) consumer.seek(0, 1) while True: try: message = consumer.get_message() if message is None: time.sleep(1) continue val = message.message.value logger.info("message.message.value== %s " % val) print('val==', val) try: item = json.loads(val) except: continue if 'meta' in item and 'collection_name' in item['meta']: _insert_item_to_monggodb(item) except: traceback.print_exc() break kafka.close() return 0
class FirehoseConsumer(object): def __init__(self, kafka_hostport, topic, group=None, **kwargs): if not group: group = str(uuid.uuid4()) self.kafka = get_client(kafka_hostport) self.consumer = SimpleConsumer(self.kafka, group, topic, max_buffer_size=1048576 * 32, **kwargs) def get_event(self): data = self.consumer.get_message() if not data: return None when, event, delivery, signature, raw = json.loads(data.message.value) payload = json.loads(raw) return when, event, delivery, signature, payload
def consume_topic(callback_url, consumer_group, topic): consumer = None try: consumer = SimpleConsumer(self.kafka, consumer_group, topic, auto_commit=False) messages_read = 0 # we can't read messages infinitely here as we have # a lot of topics/subscribers (much more than threadpool size) while messages_read < self.max_read_messages_per_cycle: # get one message and monitor the time start = monitoring.start_time_measure() message = consumer.get_message(block=False) ms_elapsed = monitoring.stop_time_measure(start) self.metrics['kafka_read'].add({'topic': topic}, ms_elapsed) # if we don't have messages for this topic/subscriber - quit and give chance to others if message is None: logging.info('No messages for topic: %s and callback: %s, quiting the thread', topic, callback_url) break try: event = json.loads(message.message.value.decode('utf-8')) response_status = self.forward_event(callback_url, event, topic) # if status is success - mark message as consumed by this subscriber if 200 <= response_status < 300: consumer.commit() else: logging.info('Received error response fro consumer: %s', response_status) except: logging.error("Exception while sending event to consumer") logging.error(traceback.format_exc()) finally: messages_read += 1 return messages_read except UnknownTopicOrPartitionError: logging.error('Adding %s to skip list', topic) except: logging.exception('failed to create kafka client') finally: if consumer is not None: consumer.stop()
def main(): """ Usage: dump_to_mongodb dump <topic> --host=<host> [--consumer=<consumer>] """ args = docopt(main.__doc__) host = args["--host"] print "=> Connecting to {0}...".format(host) logger.info("=> Connecting to {0}...".format(host)) kafka = KafkaClient(host) print "=> Connected." logger.info("=> Connected.") if args["dump"]: topic = args["<topic>"] consumer_id = args["--consumer"] or "dump_to_mongodb" consumer = SimpleConsumer(kafka, consumer_id, topic, buffer_size=1024*200, # 100kb fetch_size_bytes=1024*200, # 100kb max_buffer_size=None # eliminate big message errors ) consumer.seek(0, 1) while True: try: message = consumer.get_message() if message is None: time.sleep(1) continue val = message.message.value logger.info("message.message.value== %s " % val) print('val==', val) try: item = json.loads(val) except: continue if 'meta' in item and 'collection_name' in item['meta']: _insert_item_to_monggodb(item) except: traceback.print_exc() break kafka.close() return 0
def dispatch(self): consumer = SimpleConsumer( self.kafka_client, self.consumer_id, self.topic, buffer_size=1024 * 100, # 100kb fetch_size_bytes=1024 * 100, # 100kb max_buffer_size=None # eliminate big message errors ) consumer.seek(0, 1) i = 0 while True: try: message = consumer.get_message() if message is None: print datetime.datetime.now().strftime( "%Y-%m-%d %H:%M:%S"), ' message is None:' logger.info('message is None.') time.sleep(1) continue val = message.message.value try: item = json.loads(val) i += 1 self._process_item(item, i % len(self.aria2_clients)) except: print("error heppened in loads val to process : %s" % val) logger.error("error heppened in loads val to process: %s" % val) continue except: traceback.print_exc() break self.kafka_client.close() return 0
def _run(self): pcount = 0 while True: try: self._logger.info("New KafkaClient %d" % self._partition) kafka = KafkaClient(self._brokers ,str(os.getpid())) try: consumer = SimpleConsumer(kafka, self._group, self._topic, buffer_size = 4096*4, max_buffer_size=4096*32) #except: except Exception as ex: template = "Consumer Failure {0} occured. Arguments:\n{1!r}" messag = template.format(type(ex).__name__, ex.args) self._logger.info("%s" % messag) raise RuntimeError(messag) self._logger.info("Starting %d" % self._partition) # Find the offset of the last message that has been queued consumer.seek(0,2) try: mi = consumer.get_message(timeout=0.1) consumer.commit() except common.OffsetOutOfRangeError: mi = None #import pdb; pdb.set_trace() self._logger.info("Last Queued for %d is %s" % \ (self._partition,str(mi))) self.start_partition() # start reading from last previously processed message consumer.seek(0,1) if self._limit: raise gevent.GreenletExit while True: try: mm = consumer.get_message(timeout=None) if mm is None: continue self._logger.debug("%d Reading offset %d" % (self._partition, mm.offset)) consumer.commit() pcount += 1 if not self.msg_handler(mm): self._logger.info("%d could not handle %s" % (self._partition, str(mm))) raise gevent.GreenletExit except TypeError: gevent.sleep(0.1) except common.FailedPayloadsError as ex: self._logger.info("Payload Error: %s" % str(ex.args)) gevent.sleep(0.1) except gevent.GreenletExit: break except Exception as ex: template = "An exception of type {0} occured. Arguments:\n{1!r}" messag = template.format(type(ex).__name__, ex.args) self._logger.info("%s : traceback %s" % \ (messag, traceback.format_exc())) self.stop_partition() gevent.sleep(2) self._logger.info("Stopping %d pcount %d" % (self._partition, pcount)) return self._partoffset, self._partdb
def _run(self): pcount = 0 pause = False while True: try: if pause: gevent.sleep(2) pause = False self._logger.error("New KafkaClient %s" % self._topic) self._kfk = KafkaClient(self._brokers , "kc-" + self._topic) self._failed = False try: consumer = SimpleConsumer(self._kfk, self._group, self._topic, buffer_size = 4096*4, max_buffer_size=4096*32) #except: except Exception as ex: template = "Consumer Failure {0} occured. Arguments:\n{1!r}" messag = template.format(type(ex).__name__, ex.args) self._logger.error("Error: %s trace %s" % \ (messag, traceback.format_exc())) self._failed = True raise RuntimeError(messag) self._logger.error("Starting %s" % self._topic) # Find the offset of the last message that has been queued consumer.seek(-1,2) try: mi = consumer.get_message(timeout=0.1) consumer.commit() except common.OffsetOutOfRangeError: mi = None #import pdb; pdb.set_trace() self._logger.info("Last Queued for %s is %s" % \ (self._topic,str(mi))) # start reading from last previously processed message if mi != None: consumer.seek(-1,1) else: consumer.seek(0,0) if self._limit: raise gevent.GreenletExit while True: try: mlist = consumer.get_messages(10,timeout=0.5) if not self.msg_handler(mlist): raise gevent.GreenletExit consumer.commit() pcount += len(mlist) except TypeError as ex: self._logger.error("Type Error: %s trace %s" % \ (str(ex.args), traceback.format_exc())) gevent.sleep(0.1) except common.FailedPayloadsError as ex: self._logger.error("Payload Error: %s" % str(ex.args)) gevent.sleep(0.1) except gevent.GreenletExit: break except AssertionError as ex: self._partoffset = ex break except Exception as ex: template = "An exception of type {0} occured. Arguments:\n{1!r}" messag = template.format(type(ex).__name__, ex.args) self._logger.error("%s : traceback %s" % \ (messag, traceback.format_exc())) self.stop_partition() self._failed = True pause = True self._logger.error("Stopping %s pcount %d" % (self._topic, pcount)) partdb = self.stop_partition() return self._partoffset, partdb
def main(): """kafkadump: Kafka topic dump utility for debugging. Usage: kafkadump list --host=<host> kafkadump dump <topic> --host=<host> [--consumer=<consumer>] Examples: List all the topics on your local Kafka instance: python kafkadump.py list --host=<kafkahost>:9092 Dump the contents of a single topic starting from offset 0: python kafkadump.py dump test.crawled_firehose --host=<kafkahost>:9092 Use CTRL+C (SIGINT, KeyboardInterrupt) to stop it from polling Kafka. It will end by printing the total records serviced and the raw output of the most recent record. Options: -h --host <host> Kafka host name where Kafka cluster will be resolved -c --consumer <consumer> Consumer group ID to use for reading messages """ args = docopt(main.__doc__) host = args["--host"] logging.basicConfig() print "=> Connecting to {0}...".format(host) kafka = KafkaClient(host) print "=> Connected." if args["list"]: for topic in kafka.topic_partitions.keys(): print topic return 0 elif args["dump"]: topic = args["<topic>"] consumer_id = args["--consumer"] or "default" consumer = SimpleConsumer(kafka, consumer_id, topic, buffer_size=1024 * 100, # 100kb fetch_size_bytes=1024 * 100, # 100kb max_buffer_size=None # eliminate big message errors ) consumer.seek(0, 0) num_records = 0 total_bytes = 0 item = None while True: try: message = consumer.get_message() if message is None: time.sleep(1) continue val = message.message.value item = json.loads(val) body_bytes = len(item) print item num_records = num_records + 1 total_bytes = total_bytes + body_bytes except: traceback.print_exc() break total_mbs = float(total_bytes) / (1024 * 1024) print if item is not None: print json.dumps(item, indent=4) if num_records == 0: num_records = 1 print num_records, "records", total_mbs, "megabytes", (float(total_bytes) / num_records / 1024), "kb per msg" kafka.close() return 0
class KafkaSimpleConsumer(object): """ Base class for consuming from kafka. Implement the logic to connect to kafka and consume messages. KafkaSimpleConsumer is a wrapper around kafka-python SimpleConsumer. KafkaSimpleConsumer relies on it in order to consume messages from kafka. KafkaSimpleConsumer does not catch exceptions raised by kafka-python. An instance of this class can be used as iterator to consume messages from kafka. .. warning:: This class is considered deprecated in favor of K:py:class:`yelp_kafka.consumer_group.KafkaConsumerGroup`. :param topic: topic to consume from. :type topic: string. :param config: consumer configuration. :type config: dict. :param partitions: topic partitions to consumer from. :type partitions: list. """ def __init__(self, topic, config, partitions=None): self.log = logging.getLogger(self.__class__.__name__) if not isinstance(topic, six.string_types): raise TypeError("Topic must be a string") self.topic = kafka_bytestring(topic) if partitions and not isinstance(partitions, list): raise TypeError("Partitions must be a list") self.partitions = partitions self.kafka_consumer = None self.config = config def connect(self): """ Connect to kafka and create a consumer. It uses config parameters to create a kafka-python KafkaClient and SimpleConsumer. """ # Instantiate a kafka client connected to kafka. self.client = KafkaClient(self.config.broker_list, client_id=self.config.client_id) # Create a kafka SimpleConsumer. self.kafka_consumer = SimpleConsumer( client=self.client, topic=self.topic, partitions=self.partitions, **self.config.get_simple_consumer_args()) self.log.debug( "Connected to kafka. Topic %s, partitions %s, %s", self.topic, self.partitions, ','.join([ '{0} {1}'.format(k, v) for k, v in six.iteritems( self.config.get_simple_consumer_args()) ])) self.kafka_consumer.provide_partition_info() def __iter__(self): for partition, kafka_message in self.kafka_consumer: yield Message( partition=partition, offset=kafka_message[0], key=kafka_message[1].key, value=kafka_message[1].value, ) def __enter__(self): self.connect() def __exit__(self, type, value, tb): self.close() def close(self): """Disconnect from kafka. If auto_commit is enabled commit offsets before disconnecting. """ if self.kafka_consumer.auto_commit is True: try: self.commit() except: self.log.exception("Commit error. " "Offsets may not have been committed") # Close all the connections to kafka brokers. KafkaClient open # connections to all the partition leaders. self.client.close() def get_message(self, block=True, timeout=0.1): """Get message from kafka. It supports the same arguments of get_message in kafka-python SimpleConsumer. :param block: If True, the API will block till at least a message is fetched. :type block: boolean :param timeout: If block is True, the function will block for the specified time (in seconds). If None, it will block forever. :returns: a Kafka message :rtype: Message namedtuple, which consists of: partition number, offset, key, and message value """ fetched_message = self.kafka_consumer.get_message(block, timeout) if fetched_message is None: # get message timed out returns None return None else: partition, kafka_message = fetched_message return Message( partition=partition, offset=kafka_message[0], key=kafka_message[1].key, value=kafka_message[1].value, ) def commit(self, partitions=None): """Commit offset for this consumer group :param partitions: list of partitions to commit, default commits to all partitions. :return: True on success, False on failure. """ if partitions: return self.kafka_consumer.commit(partitions) else: return self.kafka_consumer.commit() def commit_message(self, message): """Commit the message offset for this consumer group. This function does not take care of the consumer offset tracking. It should only be used if auto_commit is disabled and the commit function never called. .. note:: all the messages received before message itself will be committed as consequence. :param message: message to commit. :type message: Message namedtuple, which consists of: partition number, offset, key, and message value :return: True on success, False on failure. """ reqs = [ OffsetCommitRequest( self.topic, message.partition, message.offset, None, ) ] try: if self.config.offset_storage in [None, 'zookeeper', 'dual']: self.client.send_offset_commit_request(self.config.group_id, reqs) if self.config.offset_storage in ['kafka', 'dual']: self.client.send_offset_commit_request_kafka( self.config.group_id, reqs) except KafkaError as e: self.log.error("%s saving offsets: %s", e.__class__.__name__, e) return False else: return True
class KafkaPythonClientSimple(PythonClient): def __init__(self,topic=topic_name, consumerGroup="perftest", kafkaHost=kafka_host, zookeeperHost=zookeeper_host): self.config["topic"] = topic self.config["kafkaHost"] = kafkaHost self.config["zookeeperHost"] = zookeeperHost self.config["consumerGroup"] = consumerGroup self.client = SimpleClient(self.config["kafkaHost"]) super(KafkaPythonClientSimple, self).__init__() def createProducer(self, kafkaSync): self.config["kafkaSync"] = kafkaSync if self.config["kafkaSync"] == True: self.producer = SimpleProducer(self.client, async=False) else: print "ENOIMPL: async not impl. for kafka-python-simple" def createConsumer(self): self.consumer = SimpleConsumer(self.client, topic=self.config["topic"], group=self.config["consumerGroup"], auto_commit= True, max_buffer_size=3000000, iter_timeout=5) def produce(self, num_msg=20000): self.msgCount = num_msg for x in range (self.msgCount): self.prtProgress(x, 10000) self.producer.send_messages(self.config["topic"], self.msg) if (x >= 10000): sys.stdout.write('\n') def consume(self, num_msg=0): count = 0 while True: message=self.consumer.get_message(block=False, timeout=1) # don't use this construct "for message in self.consumer:" instead of "while..." - much slower! if message is None: # print "consume, msg is None" break if len(message) == 0: # print "consume, len(msg) is 0" break count += 1 self.prtProgress(count, 10000) sys.stdout.write('\n') if num_msg > 0: if count != num_msg: print "ERROR: KafkaPythonClientSimple.consume: # of messages not as expected, read: {}, expected: {}".format(count, num_msg) return count def startProducer(self): pass def stopProducer(self): self.beforeFlushTimer(self.timeDict['producer']) self.producer.stop() def stopConsumer(self): pass def initCount(self): self.consume(0) def finalize(self): pass
b"%s" % (args.offset)) try: if args.broker: kclient = KafkaClient("%s" % (args.broker)) # add support for more than 1 parititon consumer = SimpleConsumer(kclient, args.consumer, args.topic, partitions=[0]) consumer.max_buffer_size = None if args.offset: consumer.seek(0, 1) message = consumer.get_message() if message: print "DEBUG: restoring" print("MSG: " + str(message[1][3]) + "\tOFFSET: " + str(message[0]) + "\t KEY: " + str(message.message.key)) if not args.set: zk.set( '/consumers/{0}/offsets/{1}/{2}'.format( args.consumer, args.topic, args.partition), b"%s" % (old_offset)) else: print "Old offset %s" % (old_offset) print "New offset %s" % (args.offset) except: # zk.set('/consumers/{0}/offsets/{1}/{2}'.format(args.consumer, args.topic, args.partition), b"%s" % (old_offset))
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') workers = {} brokers = "localhost:9092,localhost:9093,localhost:9094" group = "workers" kafka = KafkaClient(brokers, str(os.getpid())) cons = SimpleConsumer(kafka, group, "ctrl") cons.provide_partition_info() print "Starting control" end_ready = False while end_ready == False: try: while True: part, mmm = cons.get_message(timeout=None) mm = mmm.message print "Consumed ctrl " + str(mm) if mm.value == "start": if workers.has_key(mm.key): print "Dup partition %s" % mm.key raise ValueError else: ph = UveStreamProc(brokers, int(mm.key), "uve-" + mm.key, "alarm-x" + mm.key, logging) ph.start() workers[int(mm.key)] = ph elif mm.value == "stop": #import pdb; pdb.set_trace() if workers.has_key(int(mm.key)):
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') workers = {} brokers = "localhost:9092,localhost:9093,localhost:9094" group = "workers" kafka = KafkaClient(brokers,str(os.getpid())) cons = SimpleConsumer(kafka, group, "ctrl") cons.provide_partition_info() print "Starting control" end_ready = False while end_ready == False: try: while True: part, mmm = cons.get_message(timeout=None) mm = mmm.message print "Consumed ctrl " + str(mm) if mm.value == "start": if workers.has_key(mm.key): print "Dup partition %s" % mm.key raise ValueError else: ph = UveStreamProc(brokers, int(mm.key), "uve-" + mm.key, "alarm-x" + mm.key, logging) ph.start() workers[int(mm.key)] = ph elif mm.value == "stop": #import pdb; pdb.set_trace() if workers.has_key(int(mm.key)): ph = workers[int(mm.key)] gevent.kill(ph)
if __name__ == '__main__': # Streaming context conf = SparkConf().setMaster("local[2]") sc = SparkContext.getOrCreate(conf=conf) ssc = StreamingContext(sc, 1) # Kafka consumer for Component weight kafka = KafkaClient('localhost:9092') consumer = SimpleConsumer(kafka, topic="weights", group="consumer", auto_offset_reset='latest') weights = consumer.get_message()[1] weight = weights.value.decode('utf-8').split(',') # Get model model = get_model(weight, pretrained=False) # load data from Kafka directKafkaStream = KafkaUtils.createDirectStream( ssc, ["data"], {"metadata.broker.list": "localhost:9092"}) # parse test_data = directKafkaStream.map(lambda line: line[1].split(',')).map( lambda row: [int(x) for x in row]).map(parse_point) # Predict and Train test_data.map(lambda row: [
data, stats = zk.get('/consumers/{0}/offsets/{1}/{2}'.format(args.consumer, args.topic, args.partition)) old_offset = data.decode() if args.offset: zk.set('/consumers/{0}/offsets/{1}/{2}'.format(args.consumer, args.topic, args.partition), b"%s" % (args.offset)) try: if args.broker: kclient = KafkaClient("%s" % (args.broker)) # add support for more than 1 parititon consumer = SimpleConsumer(kclient, args.consumer, args.topic, partitions=[0]) consumer.max_buffer_size = None if args.offset: consumer.seek(0, 1) message = consumer.get_message() if message: print "DEBUG: restoring" print("MSG: " + str(message[1][3]) + "\tOFFSET: " + str(message[0]) + "\t KEY: " + str(message.message.key) ) if not args.set: zk.set('/consumers/{0}/offsets/{1}/{2}'.format(args.consumer, args.topic, args.partition), b"%s" % (old_offset)) else: print "Old offset %s" % (old_offset) print "New offset %s" % (args.offset) except: # zk.set('/consumers/{0}/offsets/{1}/{2}'.format(args.consumer, args.topic, args.partition), b"%s" % (old_offset)) pass
def _run(self): pcount = 0 pause = False while True: try: if pause: gevent.sleep(2) pause = False self._logger.error("New KafkaClient %s" % self._topic) self._kfk = KafkaClient(self._brokers , "kc-" + self._topic) try: consumer = SimpleConsumer(self._kfk, self._group, self._topic, buffer_size = 4096*4, max_buffer_size=4096*32) #except: except Exception as ex: template = "Consumer Failure {0} occured. Arguments:\n{1!r}" messag = template.format(type(ex).__name__, ex.args) self._logger.error("Error: %s trace %s" % \ (messag, traceback.format_exc())) raise RuntimeError(messag) self._logger.error("Starting %s" % self._topic) # Find the offset of the last message that has been queued consumer.seek(-1,2) try: mi = consumer.get_message(timeout=0.1) consumer.commit() except common.OffsetOutOfRangeError: mi = None #import pdb; pdb.set_trace() self._logger.info("Last Queued for %s is %s" % \ (self._topic,str(mi))) # start reading from last previously processed message if mi != None: consumer.seek(-1,1) else: consumer.seek(0,0) if self._limit: raise gevent.GreenletExit while True: try: mlist = consumer.get_messages(10,timeout=0.5) if not self.msg_handler(mlist): raise gevent.GreenletExit consumer.commit() pcount += len(mlist) except TypeError as ex: self._logger.error("Type Error: %s trace %s" % \ (str(ex.args), traceback.format_exc())) gevent.sleep(0.1) except common.FailedPayloadsError as ex: self._logger.error("Payload Error: %s" % str(ex.args)) gevent.sleep(0.1) except gevent.GreenletExit: break except AssertionError as ex: self._partoffset = ex break except Exception as ex: template = "An exception of type {0} occured. Arguments:\n{1!r}" messag = template.format(type(ex).__name__, ex.args) self._logger.error("%s : traceback %s" % \ (messag, traceback.format_exc())) self.stop_partition() pause = True self._logger.error("Stopping %s pcount %d" % (self._topic, pcount)) partdb = self.stop_partition() return self._partoffset, partdb
def _run(self): pcount = 0 while True: try: self._logger.info("New KafkaClient %d" % self._partition) kafka = KafkaClient(self._brokers, str(os.getpid())) try: consumer = SimpleConsumer(kafka, self._group, self._topic, buffer_size=4096 * 4, max_buffer_size=4096 * 32) #except: except Exception as ex: template = "Consumer Failure {0} occured. Arguments:\n{1!r}" messag = template.format(type(ex).__name__, ex.args) self._logger.info("%s" % messag) raise gevent.GreenletExit self._logger.info("Starting %d" % self._partition) # Find the offset of the last message that has been queued consumer.seek(0, 2) try: mi = consumer.get_message(timeout=0.1) except common.OffsetOutOfRangeError: mi = None #import pdb; pdb.set_trace() self._logger.info("Last Queued for %d is %s" % \ (self._partition,str(mi))) # start reading from last previously processed message consumer.seek(0, 1) if mi != None: count = 0 self._logger.info("Catching Up %d" % self._partition) loff = mi.offset coff = 0 while True: try: mm = consumer.get_message(timeout=None) count += 1 if not self.msg_handler(mm): self._logger.info("%d could not process %s" % (self._partition, str(mm))) raise gevent.GreenletExit consumer.commit() coff = mm.offset self._logger.info("Syncing offset %d" % coff) if coff == loff: break except Exception as ex: self._logger.info("Sync Error %s" % str(ex)) break if coff != loff: self._logger.info("Sync Failed for %d count %d" % (self._partition, count)) continue else: self._logger.info("Sync Completed for %d count %d" % (self._partition, count)) if self._limit: raise gevent.GreenletExit while True: try: mm = consumer.get_message(timeout=None) if mm is None: continue consumer.commit() pcount += 1 if not self.msg_handler(mm): self._logger.info("%d could not handle %s" % (self._partition, str(mm))) raise gevent.GreenletExit except TypeError: gevent.sleep(0.1) except common.FailedPayloadsError as ex: self._logger.info("Payload Error: %s" % str(ex.args)) gevent.sleep(0.1) except gevent.GreenletExit: break except Exception as ex: template = "An exception of type {0} occured. Arguments:\n{1!r}" messag = template.format(type(ex).__name__, ex.args) self._logger.info("%s" % messag) gevent.sleep(1) self._logger.info("Stopping %d pcount %d" % (self._partition, pcount)) return self._partoffset, self._partdb
def _run(self): pcount = 0 while True: try: self._logger.error("New KafkaClient %d" % self._partition) self._kfk = KafkaClient(self._brokers, str(os.getpid())) try: consumer = SimpleConsumer(self._kfk, self._group, self._topic, buffer_size=4096 * 4, max_buffer_size=4096 * 32) #except: except Exception as ex: template = "Consumer Failure {0} occured. Arguments:\n{1!r}" messag = template.format(type(ex).__name__, ex.args) self._logger.info("%s" % messag) raise RuntimeError(messag) self._logger.error("Starting %d" % self._partition) # Find the offset of the last message that has been queued consumer.seek(0, 2) try: mi = consumer.get_message(timeout=0.1) consumer.commit() except common.OffsetOutOfRangeError: mi = None #import pdb; pdb.set_trace() self._logger.info("Last Queued for %d is %s" % \ (self._partition,str(mi))) # start reading from last previously processed message if mi != None: consumer.seek(0, 1) else: consumer.seek(0, 0) if self._limit: raise gevent.GreenletExit while True: try: self.resource_check() mlist = consumer.get_messages(10, timeout=0.2) for mm in mlist: if mm is None: continue self._logger.debug("%d Reading offset %d" % \ (self._partition, mm.offset)) consumer.commit() pcount += 1 if not self.msg_handler(mm): self._logger.info("%d could not handle %s" % (self._partition, str(mm))) raise gevent.GreenletExit except TypeError as ex: self._logger.error("Type Error: %s trace %s" % \ (str(ex.args), traceback.format_exc())) gevent.sleep(0.1) except common.FailedPayloadsError as ex: self._logger.error("Payload Error: %s" % str(ex.args)) gevent.sleep(0.1) except gevent.GreenletExit: break except AssertionError as ex: self._partoffset = ex break except Exception as ex: template = "An exception of type {0} occured. Arguments:\n{1!r}" messag = template.format(type(ex).__name__, ex.args) self._logger.error("%s : traceback %s" % \ (messag, traceback.format_exc())) self.stop_partition() gevent.sleep(2) partdb = {} for coll in self._uvedb.keys(): partdb[coll] = {} for gen in self._uvedb[coll].keys(): partdb[coll][gen] = {} for tab in self._uvedb[coll][gen].keys(): for rkey in self._uvedb[coll][gen][tab].keys(): uk = tab + ":" + rkey partdb[coll][gen][uk] = \ set(self._uvedb[coll][gen][tab][rkey].keys()) self._logger.error("Stopping %d pcount %d" % (self._partition, pcount)) self.stop_partition() return self._partoffset, partdb
def _run(self): pcount = 0 while True: try: self._logger.error("New KafkaClient %d" % self._partition) self._kfk = KafkaClient(self._brokers ,str(os.getpid())) try: consumer = SimpleConsumer(self._kfk, self._group, self._topic, buffer_size = 4096*4, max_buffer_size=4096*32) #except: except Exception as ex: template = "Consumer Failure {0} occured. Arguments:\n{1!r}" messag = template.format(type(ex).__name__, ex.args) self._logger.info("%s" % messag) raise RuntimeError(messag) self._logger.error("Starting %d" % self._partition) # Find the offset of the last message that has been queued consumer.seek(0,2) try: mi = consumer.get_message(timeout=0.1) consumer.commit() except common.OffsetOutOfRangeError: mi = None #import pdb; pdb.set_trace() self._logger.info("Last Queued for %d is %s" % \ (self._partition,str(mi))) # start reading from last previously processed message if mi != None: consumer.seek(0,1) else: consumer.seek(0,0) if self._limit: raise gevent.GreenletExit while True: try: self.resource_check() mlist = consumer.get_messages(10,timeout=0.2) for mm in mlist: if mm is None: continue self._logger.debug("%d Reading offset %d" % \ (self._partition, mm.offset)) consumer.commit() pcount += 1 if not self.msg_handler(mm): self._logger.info("%d could not handle %s" % (self._partition, str(mm))) raise gevent.GreenletExit except TypeError as ex: self._logger.error("Type Error: %s trace %s" % \ (str(ex.args), traceback.format_exc())) gevent.sleep(0.1) except common.FailedPayloadsError as ex: self._logger.error("Payload Error: %s" % str(ex.args)) gevent.sleep(0.1) except gevent.GreenletExit: break except AssertionError as ex: self._partoffset = ex break except Exception as ex: template = "An exception of type {0} occured. Arguments:\n{1!r}" messag = template.format(type(ex).__name__, ex.args) self._logger.error("%s : traceback %s" % \ (messag, traceback.format_exc())) self.stop_partition() gevent.sleep(2) partdb = {} for coll in self._uvedb.keys(): partdb[coll] = {} for gen in self._uvedb[coll].keys(): partdb[coll][gen] = {} for tab in self._uvedb[coll][gen].keys(): for rkey in self._uvedb[coll][gen][tab].keys(): uk = tab + ":" + rkey partdb[coll][gen][uk] = \ set(self._uvedb[coll][gen][tab][rkey].keys()) self._logger.error("Stopping %d pcount %d" % (self._partition, pcount)) self.stop_partition() return self._partoffset, partdb