class KafkaMetricSender(MetricSender): def __init__(self, config): super(KafkaMetricSender, self).__init__(config) kafka_config = config["output"]["kafka"] # default topic # self.topic = kafka_config["topic"].encode('utf-8') # producer self.broker_list = kafka_config["brokerList"] self.kafka_client = None self.kafka_producer = None def open(self): self.kafka_client = KafkaClient(self.broker_list, timeout=59) self.kafka_producer = SimpleProducer( self.kafka_client, batch_send=True, batch_send_every_n=500, batch_send_every_t=30 ) def send(self, msg, topic): self.kafka_producer.send_messages(topic, json.dumps(msg)) def close(self): if self.kafka_producer is not None: self.kafka_producer.stop() if self.kafka_client is not None: self.kafka_client.close()
def test_simple_producer(self): start_offset0 = self.current_offset(self.topic, 0) start_offset1 = self.current_offset(self.topic, 1) producer = SimpleProducer(self.client) # Goes to first partition, randomly. resp = producer.send_messages(self.topic, self.msg("one"), self.msg("two")) self.assert_produce_response(resp, start_offset0) # Goes to the next partition, randomly. resp = producer.send_messages(self.topic, self.msg("three")) self.assert_produce_response(resp, start_offset1) self.assert_fetch_offset( 0, start_offset0, [self.msg("one"), self.msg("two")]) self.assert_fetch_offset(1, start_offset1, [self.msg("three")]) # Goes back to the first partition because there's only two partitions resp = producer.send_messages(self.topic, self.msg("four"), self.msg("five")) self.assert_produce_response(resp, start_offset0 + 2) self.assert_fetch_offset(0, start_offset0, [ self.msg("one"), self.msg("two"), self.msg("four"), self.msg("five") ]) producer.stop()
class KafkaMetricSender(MetricSender): def __init__(self, config): super(KafkaMetricSender, self).__init__(config) kafka_config = config["output"]["kafka"] # default topic # self.topic = kafka_config["topic"].encode('utf-8') # producer self.broker_list = kafka_config["brokerList"] self.kafka_client = None self.kafka_producer = None def open(self): self.kafka_client = KafkaClient(self.broker_list, timeout=59) self.kafka_producer = SimpleProducer(self.kafka_client, batch_send=True, batch_send_every_n=500, batch_send_every_t=30) def send(self, msg, topic): self.kafka_producer.send_messages(topic, json.dumps(msg)) def close(self): if self.kafka_producer is not None: self.kafka_producer.stop() if self.kafka_client is not None: self.kafka_client.close()
def test_simple_producer(self): partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [ self.current_offset(self.topic, p) for p in partitions ] producer = SimpleProducer(self.client, random_start=False) # Goes to first partition, randomly. resp = producer.send_messages(self.topic, self.msg("one"), self.msg("two")) self.assert_produce_response(resp, start_offsets[0]) # Goes to the next partition, randomly. resp = producer.send_messages(self.topic, self.msg("three")) self.assert_produce_response(resp, start_offsets[1]) self.assert_fetch_offset( partitions[0], start_offsets[0], [self.msg("one"), self.msg("two")]) self.assert_fetch_offset(partitions[1], start_offsets[1], [self.msg("three")]) # Goes back to the first partition because there's only two partitions resp = producer.send_messages(self.topic, self.msg("four"), self.msg("five")) self.assert_produce_response(resp, start_offsets[0] + 2) self.assert_fetch_offset(partitions[0], start_offsets[0], [ self.msg("one"), self.msg("two"), self.msg("four"), self.msg("five") ]) producer.stop()
def test_batched_simple_producer__triggers_by_message(self): partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [ self.current_offset(self.topic, p) for p in partitions ] # Configure batch producer batch_messages = 5 batch_interval = 5 producer = SimpleProducer(self.client, batch_send=True, batch_send_every_n=batch_messages, batch_send_every_t=batch_interval, random_start=False) # Send 4 messages -- should not trigger a batch resp = producer.send_messages( self.topic, self.msg("one"), self.msg("two"), self.msg("three"), self.msg("four"), ) # Batch mode is async. No ack self.assertEqual(len(resp), 0) # It hasn't sent yet self.assert_fetch_offset(partitions[0], start_offsets[0], []) self.assert_fetch_offset(partitions[1], start_offsets[1], []) # send 3 more messages -- should trigger batch on first 5 resp = producer.send_messages( self.topic, self.msg("five"), self.msg("six"), self.msg("seven"), ) # Batch mode is async. No ack self.assertEqual(len(resp), 0) # send messages groups all *msgs in a single call to the same partition # so we should see all messages from the first call in one partition self.assert_fetch_offset(partitions[0], start_offsets[0], [ self.msg("one"), self.msg("two"), self.msg("three"), self.msg("four"), ]) # Because we are batching every 5 messages, we should only see one self.assert_fetch_offset(partitions[1], start_offsets[1], [ self.msg("five"), ]) producer.stop()
def test_batched_simple_producer__triggers_by_time(self): self.skipTest("Flakey test -- should be refactored or removed") partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [ self.current_offset(self.topic, p) for p in partitions ] batch_interval = 5 producer = SimpleProducer(self.client, async_send=True, batch_send_every_n=100, batch_send_every_t=batch_interval, random_start=False) # Send 5 messages and do a fetch resp = producer.send_messages( self.topic, self.msg("one"), self.msg("two"), self.msg("three"), self.msg("four"), ) # Batch mode is async. No ack self.assertEqual(len(resp), 0) # It hasn't sent yet self.assert_fetch_offset(partitions[0], start_offsets[0], []) self.assert_fetch_offset(partitions[1], start_offsets[1], []) resp = producer.send_messages( self.topic, self.msg("five"), self.msg("six"), self.msg("seven"), ) # Batch mode is async. No ack self.assertEqual(len(resp), 0) # Wait the timeout out time.sleep(batch_interval) self.assert_fetch_offset(partitions[0], start_offsets[0], [ self.msg("one"), self.msg("two"), self.msg("three"), self.msg("four"), ]) self.assert_fetch_offset(partitions[1], start_offsets[1], [ self.msg("five"), self.msg("six"), self.msg("seven"), ]) producer.stop()
def test_acks_none(self): start_offset0 = self.current_offset(self.topic, 0) producer = SimpleProducer(self.client, req_acks=SimpleProducer.ACK_NOT_REQUIRED) resp = producer.send_messages(self.topic, self.msg("one")) self.assertEqual(len(resp), 0) self.assert_fetch_offset(0, start_offset0, [ self.msg("one") ]) producer.stop()
def test_acks_local_write(self): start_offset0 = self.current_offset(self.topic, 0) producer = SimpleProducer(self.client, req_acks=SimpleProducer.ACK_AFTER_LOCAL_WRITE) resp = producer.send_messages(self.topic, self.msg("one")) self.assert_produce_response(resp, start_offset0) self.assert_fetch_offset(0, start_offset0, [ self.msg("one") ]) producer.stop()
def test_async_simple_producer(self): start_offset0 = self.current_offset(self.topic, 0) producer = SimpleProducer(self.client, async=True, random_start=False) resp = producer.send_messages(self.topic, self.msg("one")) self.assertEqual(len(resp), 0) self.assert_fetch_offset(0, start_offset0, [ self.msg("one") ]) producer.stop()
def test_acks_none(self): start_offset0 = self.current_offset(self.topic, 0) producer = SimpleProducer(self.client, req_acks=SimpleProducer.ACK_NOT_REQUIRED) resp = producer.send_messages(self.topic, self.msg("one")) self.assertEqual(len(resp), 0) self.assert_fetch_offset(0, start_offset0, [self.msg("one")]) producer.stop()
def test_batched_simple_producer__triggers_by_time(self): self.skipTest("Flakey test -- should be refactored or removed") partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [self.current_offset(self.topic, p) for p in partitions] batch_interval = 5 producer = SimpleProducer( self.client, async_send=True, batch_send_every_n=100, batch_send_every_t=batch_interval, random_start=False) # Send 5 messages and do a fetch resp = producer.send_messages( self.topic, self.msg("one"), self.msg("two"), self.msg("three"), self.msg("four"), ) # Batch mode is async. No ack self.assertEqual(len(resp), 0) # It hasn't sent yet self.assert_fetch_offset(partitions[0], start_offsets[0], []) self.assert_fetch_offset(partitions[1], start_offsets[1], []) resp = producer.send_messages(self.topic, self.msg("five"), self.msg("six"), self.msg("seven"), ) # Batch mode is async. No ack self.assertEqual(len(resp), 0) # Wait the timeout out time.sleep(batch_interval) self.assert_fetch_offset(partitions[0], start_offsets[0], [ self.msg("one"), self.msg("two"), self.msg("three"), self.msg("four"), ]) self.assert_fetch_offset(partitions[1], start_offsets[1], [ self.msg("five"), self.msg("six"), self.msg("seven"), ]) producer.stop()
def main(): """Run a consumer. Two environment variables are expected: * CONFIG_URI: A PasteDeploy URI pointing at the configuration for the application. * QUEUE: The name of the queue to consume (currently one of "events" or "errors"). """ config_uri = os.environ["CONFIG_URI"] config = paste.deploy.loadwsgi.appconfig(config_uri) logging.config.fileConfig(config["__file__"]) queue_name = os.environ["QUEUE"] queue = MessageQueue("/" + queue_name, max_messages=MAXIMUM_QUEUE_LENGTH, max_message_size=MAXIMUM_EVENT_SIZE) metrics_client = baseplate.make_metrics_client(config) topic_name = config["topic." + queue_name] producer_options = { "codec": CODEC_GZIP, "batch_send_every_n": 20, "batch_send_every_t": 0.01, # 10 milliseconds } while True: try: kafka_client = KafkaClient(config["kafka_brokers"]) kafka_producer = SimpleProducer(kafka_client, **producer_options) except KafkaError as exc: _LOG.warning("could not connect: %s", exc) metrics_client.counter("injector.connection_error").increment() time.sleep(_RETRY_DELAY) continue while True: message = queue.get() for retry in itertools.count(): try: kafka_producer.send_messages(topic_name, message) except KafkaError as exc: _LOG.warning("failed to send message: %s", exc) metrics_client.counter("injector.error").increment() time.sleep(_RETRY_DELAY) else: metrics_client.counter("collected.injector").increment() break kafka_producer.stop()
def test_batched_simple_producer__triggers_by_message(self): partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [self.current_offset(self.topic, p) for p in partitions] # Configure batch producer batch_messages = 5 batch_interval = 5 producer = SimpleProducer( self.client, batch_send=True, batch_send_every_n=batch_messages, batch_send_every_t=batch_interval, random_start=False) # Send 4 messages -- should not trigger a batch resp = producer.send_messages(self.topic, self.msg("one"), self.msg("two"), self.msg("three"), self.msg("four"), ) # Batch mode is async. No ack self.assertEqual(len(resp), 0) # It hasn't sent yet self.assert_fetch_offset(partitions[0], start_offsets[0], []) self.assert_fetch_offset(partitions[1], start_offsets[1], []) # send 3 more messages -- should trigger batch on first 5 resp = producer.send_messages(self.topic, self.msg("five"), self.msg("six"), self.msg("seven"), ) # Batch mode is async. No ack self.assertEqual(len(resp), 0) # send messages groups all *msgs in a single call to the same partition # so we should see all messages from the first call in one partition self.assert_fetch_offset(partitions[0], start_offsets[0], [ self.msg("one"), self.msg("two"), self.msg("three"), self.msg("four"), ]) # Because we are batching every 5 messages, we should only see one self.assert_fetch_offset(partitions[1], start_offsets[1], [ self.msg("five"), ]) producer.stop()
def test_acks_local_write(self): start_offset0 = self.current_offset(self.topic, 0) producer = SimpleProducer( self.client, req_acks=SimpleProducer.ACK_AFTER_LOCAL_WRITE) resp = producer.send_messages(self.topic, self.msg("one")) self.assert_produce_response(resp, start_offset0) self.assert_fetch_offset(0, start_offset0, [self.msg("one")]) producer.stop()
def test_acks_cluster_commit(self): start_offset0 = self.current_offset(self.topic, 0) producer = SimpleProducer( self.client, req_acks=SimpleProducer.ACK_AFTER_CLUSTER_COMMIT) resp = producer.send_messages(self.topic, self.msg("one")) self.assert_produce_response(resp, start_offset0) self.assert_fetch_offset(0, start_offset0, [self.msg("one")]) producer.stop()
def test_async_simple_producer(self): partition = self.client.get_partition_ids_for_topic(self.topic)[0] start_offset = self.current_offset(self.topic, partition) producer = SimpleProducer(self.client, async_send=True, random_start=False) resp = producer.send_messages(self.topic, self.msg("one")) self.assertEqual(len(resp), 0) # flush messages producer.stop() self.assert_fetch_offset(partition, start_offset, [ self.msg("one") ])
def test_async_simple_producer(self): partition = self.client.get_partition_ids_for_topic(self.topic)[0] start_offset = self.current_offset(self.topic, partition) producer = SimpleProducer(self.client, async=True, random_start=False) resp = producer.send_messages(self.topic, self.msg("one")) self.assertEqual(len(resp), 0) # flush messages producer.stop() self.assert_fetch_offset(partition, start_offset, [ self.msg("one") ])
def outputPrediction(iter): #print "output prediction: len = %d, host = %s, pid = %d, tid = %d" % (len(inputs), socket.gethostname(), os.getpid(), thread.get_ident()) producer = None for activity in iter: if producer is None: producer = SimpleProducer(KafkaClient(kafkaHost)) print "[%s] Prediction - user=%s, activity=%s" % (datetime.now(), activity.user_id, activity.activity_label) producer.send_messages(topicOut, activity.serialize()) if producer is not None: producer.stop()
def test_acks_cluster_commit(self): start_offset0 = self.current_offset(self.topic, 0) producer = SimpleProducer( self.client, req_acks=SimpleProducer.ACK_AFTER_CLUSTER_COMMIT) resp = producer.send_messages(self.topic, self.msg("one")) self.assert_produce_response(resp, start_offset0) self.assert_fetch_offset(0, start_offset0, [ self.msg("one") ]) producer.stop()
def test_batched_simple_producer__triggers_by_time(self): start_offset0 = self.current_offset(self.topic, 0) start_offset1 = self.current_offset(self.topic, 1) producer = SimpleProducer(self.client, batch_send=True, batch_send_every_n=100, batch_send_every_t=5, random_start=False) # Send 5 messages and do a fetch resp = producer.send_messages( self.topic, self.msg("one"), self.msg("two"), self.msg("three"), self.msg("four"), ) # Batch mode is async. No ack self.assertEqual(len(resp), 0) # It hasn't sent yet self.assert_fetch_offset(0, start_offset0, []) self.assert_fetch_offset(1, start_offset1, []) resp = producer.send_messages( self.topic, self.msg("five"), self.msg("six"), self.msg("seven"), ) # Batch mode is async. No ack self.assertEqual(len(resp), 0) # Wait the timeout out time.sleep(5) self.assert_fetch_offset(0, start_offset0, [ self.msg("one"), self.msg("two"), self.msg("three"), self.msg("four"), ]) self.assert_fetch_offset(1, start_offset1, [ self.msg("five"), self.msg("six"), self.msg("seven"), ]) producer.stop()
class WeatherProducer(object): def __init__(self, ip_address, topic='weather', port='9092'): self.topic = topic self.kafka = KafkaClient(ip_address + ':' + '9092') self.producer = SimpleProducer(self.kafka) def publish_weather_data(self): metro = MetroDataset() while True: current_time = datetime.datetime.now().strftime('%d/%m/%y %H:%M') date_time = current_time.split(" ") time_now = date_time[1] time_list = time_now.split(":") minutes = time_list[1] interval_flag = int(minutes)/30.0 if interval_flag == 0.0 or interval_flag == 1.0: try: response = self.producer.send_messages(self.topic, json.dumps(metro.publish_data())) except LeaderNotAvailableError: time.sleep(1) response = self.producer.send_messages(self.topic, json.dumps(metro.publish_data())) print response time.sleep(70) self.producer.stop() def publish_to_file(self): metro = MetroDataset() data = {} while True: try: current_time = datetime.datetime.now().strftime('%d/%m/%y %H:%M') date_time = current_time.split(" ") time_now = date_time[1] time_list = time_now.split(":") minutes = time_list[1] interval_flag = int(minutes)/30.0 if interval_flag == 0.0 or interval_flag == 1.0: metro_data = metro.publish_data() if data.has_key(metro_data['date']): if not data[metro_data['date']].has_key(metro_data['time']): data[metro_data['date']].update({metro_data['time']: metro_data['data']}) else: data[metro_data['date']] = {metro_data['time']: metro_data['data']} print data except: with open('data.json', 'w') as outfile: json.dump(data, outfile) sys.exit()
def test_batched_simple_producer__triggers_by_time(self): start_offset0 = self.current_offset(self.topic, 0) start_offset1 = self.current_offset(self.topic, 1) producer = SimpleProducer(self.client, batch_send=True, batch_send_every_n=100, batch_send_every_t=5, random_start=False) # Send 5 messages and do a fetch resp = producer.send_messages(self.topic, self.msg("one"), self.msg("two"), self.msg("three"), self.msg("four"), ) # Batch mode is async. No ack self.assertEqual(len(resp), 0) # It hasn't sent yet self.assert_fetch_offset(0, start_offset0, []) self.assert_fetch_offset(1, start_offset1, []) resp = producer.send_messages(self.topic, self.msg("five"), self.msg("six"), self.msg("seven"), ) # Batch mode is async. No ack self.assertEqual(len(resp), 0) # Wait the timeout out time.sleep(5) self.assert_fetch_offset(0, start_offset0, [ self.msg("one"), self.msg("two"), self.msg("three"), self.msg("four"), ]) self.assert_fetch_offset(1, start_offset1, [ self.msg("five"), self.msg("six"), self.msg("seven"), ]) producer.stop()
def partitionsLoadBalance(): ''' 发送指定信息到kafka-topic(test),此时为5000次 ''' k1 = KafkaClient('localhost:9092') producer = SimpleProducer(k1) for i in range(200): msg = 'This is %dst test' % (i) msg1 = '' for j in range(2000): msg1 = ':'.join([msg1, msg]) producer.send_messages(b'test', msg1) producer.stop()
def partitionsLoadBalance(): """ 发送指定信息到kafka-topic(test),此时为5000次 """ k1 = KafkaClient("localhost:9092") producer = SimpleProducer(k1) for i in range(200): msg = "This is %dst test" % (i) msg1 = "" for j in range(2000): msg1 = ":".join([msg1, msg]) producer.send_messages(b"test", msg1) producer.stop()
def test_async_simple_producer(self): partition = self.client.get_partition_ids_for_topic(self.topic)[0] start_offset = self.current_offset(self.topic, partition) producer = SimpleProducer(self.client, async=True, random_start=False) resp = producer.send_messages(self.topic, self.msg("one")) self.assertEqual(len(resp), 0) # wait for the server to report a new highwatermark while self.current_offset(self.topic, partition) == start_offset: time.sleep(0.1) self.assert_fetch_offset(partition, start_offset, [ self.msg("one") ]) producer.stop()
class EnergyProducer(object): def __init__(self, ip_address, topic='energy', port='9092'): self.topic = topic self.kafka = KafkaClient(ip_address + ':' + '9092') self.producer = SimpleProducer(self.kafka) def replay_energy_data(self,WORKING_DIR=PRODUCER_DIR, start=10 ,end=11, year=2015): dataset = EnergyDataset() for num_month in range(start, end + 1): month = datetime.date(1900, num_month, 1).strftime('%b') print month data = dataset.replay(PRODUCER_DIR, month.lower() ,year) response = self.producer.send_messages(self.topic,json.dumps(data)) print response self.producer.stop()
class SimpleProducer(BaseStreamProducer): def __init__(self, connection, topic): self._connection = connection self._topic = topic self._create() def _create(self): self._producer = KafkaSimpleProducer(self._connection, codec=CODEC_SNAPPY) def send(self, key, *messages): self._producer.send_messages(self._topic, *messages) def flush(self): self._producer.stop() del self._producer self._create() def get_offset(self, partition_id): # Kafka has it's own offset management raise KeyError
def test_simple_producer(self): start_offset0 = self.current_offset(self.topic, 0) start_offset1 = self.current_offset(self.topic, 1) producer = SimpleProducer(self.client, random_start=False) # Goes to first partition, randomly. resp = producer.send_messages(self.topic, self.msg("one"), self.msg("two")) self.assert_produce_response(resp, start_offset0) # Goes to the next partition, randomly. resp = producer.send_messages(self.topic, self.msg("three")) self.assert_produce_response(resp, start_offset1) self.assert_fetch_offset(0, start_offset0, [ self.msg("one"), self.msg("two") ]) self.assert_fetch_offset(1, start_offset1, [ self.msg("three") ]) # Goes back to the first partition because there's only two partitions resp = producer.send_messages(self.topic, self.msg("four"), self.msg("five")) self.assert_produce_response(resp, start_offset0+2) self.assert_fetch_offset(0, start_offset0, [ self.msg("one"), self.msg("two"), self.msg("four"), self.msg("five") ]) producer.stop()
class HeartBeat: def __init__(self,qinfo): self.topic = qinfo['kafka_topic'] self.client = KafkaClient(qinfo['kafka_broke']) self.producer = SimpleProducer(self.client, codec=CODEC_SNAPPY) def send(self,name,num=1): data = { "name":name, "num":num, "time":int(time.time()) } print "***************send********************" data_str = json.dumps(data) self.producer.send_messages(self.topic, data_str) def close(self): self.client.close() self.producer.stop()
def test_simple_producer(self): partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [self.current_offset(self.topic, p) for p in partitions] producer = SimpleProducer(self.client, random_start=False) # Goes to first partition, randomly. resp = producer.send_messages(self.topic, self.msg("one"), self.msg("two")) self.assert_produce_response(resp, start_offsets[0]) # Goes to the next partition, randomly. resp = producer.send_messages(self.topic, self.msg("three")) self.assert_produce_response(resp, start_offsets[1]) self.assert_fetch_offset(partitions[0], start_offsets[0], [ self.msg("one"), self.msg("two") ]) self.assert_fetch_offset(partitions[1], start_offsets[1], [ self.msg("three") ]) # Goes back to the first partition because there's only two partitions resp = producer.send_messages(self.topic, self.msg("four"), self.msg("five")) self.assert_produce_response(resp, start_offsets[0]+2) self.assert_fetch_offset(partitions[0], start_offsets[0], [ self.msg("one"), self.msg("two"), self.msg("four"), self.msg("five") ]) producer.stop()
def simpleTest(): # 使用bamboo:9192kafka服务器进行连接测试 try: kafka = KafkaClient('bamboo:9192') except KafkaUnavailableError as msg: print("KafkaUnavailableError:", msg) sys.exit(-1) except Exception as msg: print("Exception:", msg) sys.exit(-1) producer = SimpleProducer(kafka) print(kafka.topics) ''' Note that the application is responsible for encoding messages to type bytes ''' if 'JOB_TEST_1' in kafka.topics: producer.send_messages(b'JOB_TEST_1', b'some message') producer.send_messages(b'JOB_TEST_1', b'this method', b'is variadic') # Send unicode message producer.send_messages(b'JOB_TEST_1', '你怎么样?'.encode('utf-8')) producer.stop()
def upload_file_to_kafka(self, topic, file_path, **kwargs): """ Utility function to upload contents of file to a given kafka topic :param topic: Kafka topic to which the file will be uploaded :param file_path: Absolute path of the file to be uploaded :param kwargs: append - If True, then file content will be uploaded to existing topic. If topic is not present then new one will be created. If false, and topic is not present then new topic is created. If topic is already present then error is returned. Default, async=False :return: True if content was uploaded else false """ append = kwargs.get('append', False) result = False producer = None try: if not append: # Check if topic is already present if self.kafka.has_metadata_for_topic(topic): print 'Error - Kafka topic : ' + topic + ' already present and append is : ' + str(append) return False # In case of append is True and topic already present/not present # and append is False and topic already not present if self._ensure_kafka_topic_exists(topic): producer = SimpleProducer(self.kafka, batch_send=True, batch_send_every_n=20) with open(file_path, 'rU') as fh: for line in fh: producer.send_messages(topic, line.strip()) result = True except: print 'Error - uploading file : ' + file_path + ' to topic : ' + topic finally: if producer: producer.stop() return result
class KafkaLogger: def __init__(self): from kafka import SimpleProducer, KafkaClient from kafka.common import LeaderNotAvailableError self.kafka_client = KafkaClient(config.KAFKA_SERVER) self.kafka = SimpleProducer(self.kafka_client) self.head = HeadBuilder("db", "type", "tom", config.DB_NAME) try: self.kafka.send_messages(config.KAFKA_TOPIC, b"creating topic") except LeaderNotAvailableError: time.sleep(1) def close(self): self.kafka.stop(0) self.kafka_client.close() def start(self): pass def commit(self): pass def session_info(self): body = bb.session_info() now = int(time.time() * 1000) header = self.head.create('info', config.TOM, now) msg = '{"header": ' + header + ', "body":' + body + '}' self.kafka.send_messages(config.KAFKA_TOPIC, msg.encode("utf8")) def cpu_sys(self, epoch, user_count, system_count, idle_count, percent): "Logs CPU metrics at system level" body = bb.cpu_sys(epoch, user_count, system_count, idle_count, percent) header = self.head.create('system_cpu', config.TOM, epoch) msg = '{"header": ' + header + ', "body":' + body + '}' self.kafka.send_messages(config.KAFKA_TOPIC, msg.encode("utf8")) # if config.PRINT_CONSOLE: print(reply) def cpu_proc(self, epoch, pid, priority, ctx_count, n_threads, cpu_user, cpu_system, percent, pname): "Logs CPU metrics at process level" epoch *= 1000 #this converts it into milliseconds body = bb.cpu_proc(epoch, pid, priority, ctx_count, n_threads, cpu_user, cpu_system, percent, pname) header = self.head.create('process_cpu', config.TOM, epoch) msg = '{"header": ' + header + ', "body":' + body + '}' self.kafka.send_messages(config.KAFKA_TOPIC, msg.encode("utf8")) # if config.PRINT_CONSOLE: print(reply) def mem_sys(self, epoch, available, percent, used, free, swap_total, swap_used, swap_free, swap_in, swap_out, swap_percent): "Logs memory metrics at system level" epoch *= 1000 #this converts it into milliseconds body = bb.mem_sys(epoch, available, percent, used, free, swap_total, swap_used, swap_free, swap_in, swap_out, swap_percent) header = self.head.create('system_memory', config.TOM, epoch) msg = '{"header": ' + header + ', "body":' + body + '}' self.kafka.send_messages(config.KAFKA_TOPIC, msg.encode("utf8")) # if config.PRINT_CONSOLE: print(reply) def mem_proc(self, epoch, pid, rss, vms, percent, pname): "Logs memory metrics at process level" epoch *= 1000 #this converts it into milliseconds body = bb.mem_proc(epoch, pid, rss, vms, percent, pname) header = self.head.create('process_memory', config.TOM, epoch) msg = '{"header": ' + header + ', "body":' + body + '}' self.kafka.send_messages(config.KAFKA_TOPIC, msg.encode("utf8")) # if config.PRINT_CONSOLE: print(reply) def io_sys(self, epoch, bytes_sent, bytes_recv, packets_sent, packets_recv, errin, errout, dropin, dropout): "Print a line to console and to a file" epoch *= 1000 #this converts it into milliseconds body = bb.io_sys(epoch, bytes_sent, bytes_recv, packets_sent, packets_recv, errin, errout, dropin, dropout) header = self.head.create('system_io', config.TOM, epoch) msg = '{"header": ' + header + ', "body":' + body + '}' self.kafka.send_messages(config.KAFKA_TOPIC, msg.encode("utf8")) # if config.PRINT_CONSOLE: print(reply) def proc_error(self, epoch, pid, name): "Print a line to console and to a file" epoch *= 1000 #this converts it into milliseconds body = bb.proc_error(epoch, pid, name) header = self.head.create('event', config.TOM, epoch) msg = '{"header": ' + header + ', "body":' + body + '}' self.kafka.send_messages(config.KAFKA_TOPIC, msg.encode("utf8")) # if config.PRINT_CONSOLE: print(reply) def proc_info(self, epoch, pid, name): "Print a line to console and to a file" epoch *= 1000 #this converts it into milliseconds body = bb.proc_info(epoch, pid, name) header = self.head.create('process_info', config.TOM, epoch) msg = '{"header": ' + header + ', "body":' + body + '}' self.kafka.send_messages(config.KAFKA_TOPIC, msg.encode("utf8")) # if config.PRINT_CONSOLE: print(reply) # from kafka import SimpleProducer, KafkaClient # # # To send messages synchronously # kafka = KafkaClient("192.168.2.79:9092") # producer = SimpleProducer(kafka) # # # Note that the application is responsible for encoding messages to type str # producer.send_messages("my-topic", b"some message") # producer.send_messages("my-topic", b"this method", b"is variadic") # # # Send unicode message # producer.send_messages("my-topic", u'你怎么样?'.encode('utf-8')) # # # To send messages asynchronously # # WARNING: current implementation does not guarantee message delivery on failure! # # messages can get dropped! Use at your own risk! Or help us improve with a PR! # producer = SimpleProducer(kafka, async=True) # producer.send_messages("my-topic", b"async message") # # # To wait for acknowledgements # # ACK_AFTER_LOCAL_WRITE : server will wait till the data is written to # # a local log before sending response # # ACK_AFTER_CLUSTER_COMMIT : server will block until the message is committed # # by all in sync replicas before sending a response # producer = SimpleProducer(kafka, async=False, # req_acks=SimpleProducer.ACK_AFTER_LOCAL_WRITE, # ack_timeout=2000) # # response = producer.send_messages("my-topic", b"another message") # # if response: # print(response[0].error) # print(response[0].offset) # # # To send messages in batch. You can use any of the available # # producers for doing this. The following producer will collect # # messages in batch and send them to Kafka after 20 messages are # # collected or every 60 seconds # # Notes: # # * If the producer dies before the messages are sent, there will be losses # # * Call producer.stop() to send the messages and cleanup # producer = SimpleProducer(kafka, batch_send=True, # batch_send_every_n=20, # batch_send_every_t=60)
def test_batched_simple_producer__triggers_by_message(self): partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [self.current_offset(self.topic, p) for p in partitions] # Configure batch producer batch_messages = 5 batch_interval = 5 producer = SimpleProducer( self.client, async=True, batch_send_every_n=batch_messages, batch_send_every_t=batch_interval, random_start=False) # Send 4 messages -- should not trigger a batch resp = producer.send_messages( self.topic, self.msg("one"), self.msg("two"), self.msg("three"), self.msg("four"), ) # Batch mode is async. No ack self.assertEqual(len(resp), 0) # It hasn't sent yet self.assert_fetch_offset(partitions[0], start_offsets[0], []) self.assert_fetch_offset(partitions[1], start_offsets[1], []) # send 3 more messages -- should trigger batch on first 5 resp = producer.send_messages( self.topic, self.msg("five"), self.msg("six"), self.msg("seven"), ) # Batch mode is async. No ack self.assertEqual(len(resp), 0) # Wait until producer has pulled all messages from internal queue # this should signal that the first batch was sent, and the producer # is now waiting for enough messages to batch again (or a timeout) timeout = 5 start = time.time() while not producer.queue.empty(): if time.time() - start > timeout: self.fail('timeout waiting for producer queue to empty') time.sleep(0.1) # send messages groups all *msgs in a single call to the same partition # so we should see all messages from the first call in one partition self.assert_fetch_offset(partitions[0], start_offsets[0], [ self.msg("one"), self.msg("two"), self.msg("three"), self.msg("four"), ]) # Because we are batching every 5 messages, we should only see one self.assert_fetch_offset(partitions[1], start_offsets[1], [ self.msg("five"), ]) producer.stop()
print() self.counter += 1 if self.counter == self.max_count: return False return True def on_error(self, status): print(status) return False if __name__ == '__main__': parser = argparse.ArgumentParser(description='Streaming tweets into kafka') parser.add_argument('--verbose', type=bool, default=False) parser.add_argument('--count', type=int, default=10000) args = parser.parse_args() # connecting to the kafka server kafka_client = SimpleClient("localhost:9092") # creating a producer to this server kafka_producer = SimpleProducer(kafka_client) # creating a standard output listener of tweets listener = StdOutListener(producer=kafka_producer, count=args.count, verbose=args.verbose) # connecting to the twitter api auth = OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) # streaming tweets from Trump stream = Stream(auth, listener) stream.filter(track=["trump"]) kafka_producer.stop()
class KafkaMetricSender(MetricSender): start_time = time.time() end_time = time.time() def __init__(self, config): super(KafkaMetricSender, self).__init__(config) kafka_config = config["output"]["kafka"] # default topic self.default_topic = None if kafka_config.has_key("default_topic"): self.default_topic = kafka_config["default_topic"].encode('utf-8') self.component_topic_mapping = {} if kafka_config.has_key("component_topic_mapping"): self.component_topic_mapping = kafka_config["component_topic_mapping"] if not self.default_topic and not bool(self.component_topic_mapping): raise Exception("both kafka config 'topic' and 'component_topic_mapping' are empty") # producer self.broker_list = kafka_config["broker_list"] self.kafka_client = None self.kafka_producer = None self.debug_enabled = False self.sent_count = 0 if kafka_config.has_key("debug"): self.debug_enabled = bool(kafka_config["debug"]) logging.info("Overrode output.kafka.debug: " + str(self.debug_enabled)) def get_topic_id(self, msg): if msg.has_key("component"): component = msg["component"] if self.component_topic_mapping.has_key(component): return self.component_topic_mapping[component] else: return self.default_topic else: if not self.default_topic: raise Exception("no default topic found for unknown-component msg: " + str(msg)) return self.default_topic def open(self): logging.info("Opening kafka connection for producer") self.kafka_client = KafkaClient(self.broker_list, timeout=50) self.kafka_producer = SimpleProducer(self.kafka_client, batch_send=False, batch_send_every_n=500, batch_send_every_t=30) self.start_time = time.time() def send(self, msg): if self.debug_enabled: logging.info("Send message: " + str(msg)) self.sent_count += 1 self.kafka_producer.send_messages(self.get_topic_id(msg), json.dumps(msg)) def close(self): logging.info("Closing kafka connection and producer") if self.kafka_producer is not None: self.kafka_producer.stop() if self.kafka_client is not None: self.kafka_client.close() self.end_time = time.time() logging.info("Totally sent " + str(self.sent_count) + " metric events in "+str(self.end_time - self.start_time)+" sec")
class KafkaMetricSender(MetricSender): def __init__(self, config): super(KafkaMetricSender, self).__init__(config) kafka_config = config["output"]["kafka"] # default topic self.default_topic = None if kafka_config.has_key("default_topic"): self.default_topic = kafka_config["default_topic"].encode('utf-8') self.component_topic_mapping = {} if kafka_config.has_key("component_topic_mapping"): self.component_topic_mapping = kafka_config[ "component_topic_mapping"] if not self.default_topic and not bool(self.component_topic_mapping): raise Exception( "both kafka config 'topic' and 'component_topic_mapping' are empty" ) # producer self.broker_list = kafka_config["broker_list"] self.kafka_client = None self.kafka_producer = None self.debug_enabled = False self.sent_count = 0 if kafka_config.has_key("debug"): self.debug_enabled = bool(kafka_config["debug"]) logging.info("Overrode output.kafka.debug: " + str(self.debug_enabled)) def get_topic_id(self, msg): if msg.has_key("component"): component = msg["component"] if self.component_topic_mapping.has_key(component): return self.component_topic_mapping[component] else: return self.default_topic else: if not self.default_topic: raise Exception( "no default topic found for unknown-component msg: " + str(msg)) return self.default_topic def open(self): self.kafka_client = KafkaClient(self.broker_list, timeout=59) self.kafka_producer = SimpleProducer(self.kafka_client, batch_send=True, batch_send_every_n=500, batch_send_every_t=30) def send(self, msg): if self.debug_enabled: logging.info("Send message: " + str(msg)) self.sent_count += 1 self.kafka_producer.send_messages(self.get_topic_id(msg), json.dumps(msg)) def close(self): logging.info("Totally sent " + str(self.sent_count) + " metric events") if self.kafka_producer is not None: self.kafka_producer.stop() if self.kafka_client is not None: self.kafka_client.close()
def test_simple_producer_new_topic(self): producer = SimpleProducer(self.client) resp = producer.send_messages('new_topic', self.msg('foobar')) self.assert_produce_response(resp, 0) producer.stop()
class KafkaDriver: def __init__(self, driver_args, event_loop): self.logger = logging.getLogger('KafkaDriver') # possible TODO: get logger from invoker self.logger.setLevel(logging.INFO) console_log_handler = logging.StreamHandler(sys.stdout) self.logger.addHandler(console_log_handler) self.logger.info("KafkaDriver initialized; driver_args=%s" % (driver_args)) self.event_loop = event_loop if driver_args is "": kafka_server_addr = "localhost:9092" else: kafka_server_addr = driver_args client_id = "KafkaDriver-%d-%d" % (time.time(), os.getpid()) # generate a unique client ID so that Kafka doesn't confuse us with a different instance self.kafka = KafkaClient(kafka_server_addr, client_id=client_id) self.queue_name = None ## APPEND direction self.get_message_stream = None # how frequently to add check for messages and (space permitting) to add them to the GET message stream, in seconds self.MESSAGE_CHECK_FREQ = 0.010 # how many message we have sent from various queues self.get_message_count = 0 self.producer = None ## GET direction self.consumer = None self.get_message_count = 0 self.MAX_KAFKA_REQ_BATCH_MSGS = 200 # most number of messages that we will request from Kafka at a time ######## APPEND direction ######## # called to tell driver of a new stream of appends than are going to come in; these should go to the end of the named queue def prepare_for_append_stream(self, queue_name): self.logger.info("KafkaDriver prepare_for_append_stream got: queue_name=%s" % (queue_name)) self.queue_name = str(queue_name) self.producer = SimpleProducer( self.kafka, async=True, req_acks=SimpleProducer.ACK_AFTER_LOCAL_WRITE, ack_timeout=5000, batch_send=True, batch_send_every_n= 100, batch_send_every_t=1000, random_start=True ) def append(self, payload, ttl): ttl = int(ttl) self.logger.debug("KafkaDriver append got: ttl=%d, payload='%s'" % (ttl, payload)) try: self.producer.send_messages(self.queue_name,payload) except UnknownTopicOrPartitionError: self.logger.warn("Kafka reports unknown topic or invalid partition number: " + str(sys.exc_info())) return 500 except: self.logger.warn("Got exception from kafka-python SimpleProducer:" + str(sys.exc_info())) return 500 # if random.uniform(0,1) < self.FRACTION_MSGS_TO_FAKE_APPEND_ERROR: # self.logger.debug("faking error") # return 400 return 100 def cancel_append_stream(self): self.logger.info("KafkaDriver cancel_append_stream got called") self.producer.stop() self.producer = None self.queue_name = None ######## GET direction ######## # called to tell driver that a new stream of messages is needed for return to a client. message_stream_queue is an instance of MessageStream to use to put messages the driver has available as a response to this request. Other arguments have same meaning as in the Marconi API. def init_get_stream(self, get_message_stream, queue_name_spec, starting_marker, echo_requested, include_claimed): self.logger.info("KafkaDriver prepare_to_get_messages got: queue_name=%s, echo_requested=%s, include_claimed=%s, starting_marker=%s" % (queue_name_spec,str(echo_requested),str(include_claimed),starting_marker)) self.logger.info("warning: KafkaDriver ignores echo_requested and include_claimed in GET requests") self.consume_group = "cg1" # default consume group if len(starting_marker) > 0: self.consume_group = starting_marker self.logger.info("consume group="+self.consume_group) # if the queue name contains "/n" at the end, we interpret that is referring to partition to read from queue_name, partition_part = queue_name_spec.split("/",2) if partition_part is None: partition = None else: partition = int(partition_part) self.logger.info("limiting topic %s to partition %d" % (queue_name, partition)) self.get_message_stream = get_message_stream self.queue_name = str(queue_name) self.consumer = SimpleConsumer( client=self.kafka, group=self.consume_group, topic=self.queue_name, partitions=[partition], auto_commit=False, # it seems we cannot do any kind of commit when using kafka-pythong 0.9.1 with Kafka versions before 0.8.1 because kafka-python will send a OffsetFetchReqeust (request type 9) or OffsetCommitRequest (request type 8) which is not supported fetch_size_bytes= self.MAX_KAFKA_REQ_BATCH_MSGS*4096, # in Marconi,messages can be up to 4k iter_timeout=None, ) self.logger.debug("KafkaDriver: seeking to head of %s" % (self.queue_name)) self.consumer.seek(0,0) # seek to head of topic; TODO: should get starting position from starting_marker param self.periodically_check_for_new_messages() # kick of periodic attainment of new messages (space permitting) def periodically_check_for_new_messages(self): #self.logger.debug("KafkaDriver.periodically_check_for_new_messages()") if self.get_message_stream is not None: # still providing messages self.check_for_new_messages() # TODO: call call_soon() rather than call_later() if we got some messages and there is still space available in the MessageStream self.new_msg_check_callback = self.event_loop.call_later(self.MESSAGE_CHECK_FREQ, self.periodically_check_for_new_messages) # schedules self to run again after MESSAGE_CHECK_FREQ seconds def check_for_new_messages(self): self.logger.debug("KafkaDriver.check_for_new_messages (start): space_used=%d, amount_of_space_avail=%d" % (self.get_message_stream.space_used(), self.get_message_stream.amount_of_space_avail())) max_number_of_messages = self.get_message_stream.amount_of_space_avail() if max_number_of_messages == 0: return # no space left to add message, so don't look for any # now try to get up to max_number_of_messages messages from the topic, but in a non-blocking manner messages = self.consumer.get_messages(count=max_number_of_messages, block=False) self.logger.debug("got %d messages from Kafka" % (len(messages))) assert len(messages) <= max_number_of_messages #add the messages to message stream for message_and_offset in messages: self.get_message_count += 1 offset_str = "%016x" % (message_and_offset.offset) # make offset into 16 hex chars # construct a new message and add it to stream self.get_message_stream.add_message( payload = str(message_and_offset.message.value), marker = offset_str, # TODO: this is supposed to a value that we use a as start_marker but this doesn't indicate the partition, so is not unique id = offset_str, ttl = (2**31)-1, # we don't store the original TTL so (for now at least) just send max signed 32 bit int age = 0, ) #self.logger.debug("KafkaDriver.check_for_new_messages (end): space_used=%d, amount_of_space_avail=%d" % (self.get_message_stream.space_used(), self.get_message_stream.amount_of_space_avail())) # called to let the driver know that there no more messages are needed for the previously requested stream of messages and that it should free up any associated resources. def cancel_get_stream(self): self.new_msg_check_callback.cancel() # cancel call to periodically_check_for_new_messages() self.consumer.stop() self.consumer = None self.get_message_stream = None self.queue_name = None
class KafkaJsonLogger: indices = {} def __init__(self): from kafka import SimpleProducer, KafkaClient from kafka.common import LeaderNotAvailableError self.kafka_client = KafkaClient(config.KAFKA_SERVER) self.kafka = SimpleProducer(self.kafka_client) for oid in config.SNMP_OIDS: self.indices[oid._name()] = 0 self.head = HeadBuilder("db", "type", "tom", config.DB_NAME) try: self.kafka.send_messages(config.KAFKA_TOPIC, b"creating topic") except LeaderNotAvailableError: time.sleep(1) def close(self): self.kafka.stop(0) self.kafka_client.close() def start(self, epoch): head = self.head.create('info', config.TOM, epoch) body = '{"description" : "started ('+ config.SESSION_NAME + ')"'+', "value" : '+str(config.SESSION_ID)+'}' msg = '{"header": '+ head + ', "body":'+ body+'}' self.kafka.send_messages(config.KAFKA_TOPIC, msg.encode("utf8")) if config.PRINT_CONSOLE: print(msg) def stop(self, epoch): head = self.head.create('info', config.TOM, epoch) body = '{"description" : "stopped ('+ config.SESSION_NAME + ')"'+', "value" : '+str(config.SESSION_ID)+'}' msg = '{"header": '+ head + ', "body":'+ body+'}' self.kafka.send_messages(config.KAFKA_TOPIC, msg.encode("utf8")) if config.PRINT_CONSOLE: print(msg) def value(self, epoch, oid, name, value): # name = oid._name() name = name.replace(' ', '_') index = self.indices[name] index += 1 self.indices[name] = index is_error = False str_value = str(value) if oid.numeric and not utils.is_number(str_value): is_error = True head = self.head.create(name, oid.target_name, epoch) body = '{"target" : "' + str(oid.target()) + '", ' + \ '"oid" : "' + str(oid.oid_id) if is_error: body += '", "error" : "' + str_value + '"}' else: body += '", "value" : ' + str_value + '}' msg = '{"header": '+ head + ', "body":'+ body+'}' self.kafka.send_messages(config.KAFKA_TOPIC, msg.encode("utf8")) if config.PRINT_CONSOLE: print(msg) def error(self, epoch, description): head = self.head.create('info', config.TOM, epoch) body = '{"error" : "'+ description + '"}' msg = '{"header": '+ head + ', "body":'+ body+'}' self.kafka.send_messages(config.KAFKA_TOPIC, msg.encode("utf8")) if config.PRINT_CONSOLE: print(msg)
argument_parser.add_argument('--hosts', type=str, default='localhost:9092') argument_parser.add_argument('--topic', type=str, default='test') arguments = argument_parser.parse_args() # getting the text to publish text_to_publish = arguments.message # getting the hosts for Kafka kafka_hosts = arguments.hosts # getting the topic to publish in topic_to_publish_in = arguments.topic # instantiating a Kafka Client kafka_client = SimpleClient(hosts=kafka_hosts) # instantiating a Kafka Producer kafka_producer = SimpleProducer(kafka_client) # printing what we are doing print('publishing "{}" \nto topic "{}" on broker(s) "{}"'.format( text_to_publish, topic_to_publish_in, '", "'.join(kafka_hosts.split(',')))) # publishing the message kafka_producer.send_messages(topic_to_publish_in, text_to_publish.encode('utf-8')) # stopping the kafka producer kafka_producer.stop()
class KafkaAvroLogger: indices = {} def __init__(self): from kafka import SimpleProducer, KafkaClient from kafka.common import LeaderNotAvailableError self.kafka_client = KafkaClient(config.KAFKA_SERVER) self.kafka = SimpleProducer(self.kafka_client) schema_int_src = pkg_resources.resource_string("pypro.snmp", "pypro_snmp_int.avsc").decode('utf-8') schema_float_src = pkg_resources.resource_string("pypro.snmp", "pypro_snmp_float.avsc").decode('utf-8') schema_str_src = pkg_resources.resource_string("pypro.snmp", "pypro_snmp_str.avsc").decode('utf-8') self.schema_int = avro.schema.Parse(schema_int_src) self.schema_float = avro.schema.Parse(schema_float_src) self.schema_str = avro.schema.Parse(schema_str_src) for oid in config.SNMP_OIDS: self.indices[oid._name()] = 0 try: #empty msg to ensure topic is created self.kafka.send_messages(config.KAFKA_TOPIC, (0).to_bytes(1, byteorder='big')) except LeaderNotAvailableError: time.sleep(1) def close(self): self.kafka.stop(0) self.kafka_client.close() def start(self, epoch): if config.PRINT_CONSOLE: print('starting session logging to kafka with avro') def stop(self, epoch): if config.PRINT_CONSOLE: print('stopping session logging to kafka with avro') def value(self, epoch, oid, name, value): name = oid._name() index = self.indices[name] index += 1 self.indices[name] = index str_value = str(value) if oid.is_numeric() and not utils.is_number(str_value): self.error(epoch, "Invalid number, received:"+str_value) return value_int = None value_float = None value_str = None if oid.is_int(): value_int = int(value) elif oid.is_float(): value_float = float(value) else: value_str = str(value) writer = None id = None value = None if value_int is not None: writer = avro.io.DatumWriter(self.schema_int) id = config.AVRO_SCHEMA_INT_ID value = value_int if value_float is not None: writer = avro.io.DatumWriter(self.schema_float) id = config.AVRO_SCHEMA_FLOAT_ID value = value_float if value_str is not None: writer = avro.io.DatumWriter(self.schema_str) id = config.AVRO_SCHEMA_STR_ID value = value_str bytes_writer = io.BytesIO() id_bytes = (id).to_bytes(1, 'big') bytes_writer.write(id_bytes) encoder = avro.io.BinaryEncoder(bytes_writer) writer.write({"header": {"type": oid.oid_name, "tom": oid.target_name, "address": oid.ip, "oid": oid.oid_name, "time":epoch}, "body": {"value": value} }, encoder) raw_bytes = bytes_writer.getvalue() self.kafka.send_messages(config.KAFKA_TOPIC, raw_bytes) if config.PRINT_CONSOLE: print(str(raw_bytes)) def error(self, epoch, description): pass
class KafkaPythonClientSimple(PythonClient): def __init__(self,topic=topic_name, consumerGroup="perftest", kafkaHost=kafka_host, zookeeperHost=zookeeper_host): self.config["topic"] = topic self.config["kafkaHost"] = kafkaHost self.config["zookeeperHost"] = zookeeperHost self.config["consumerGroup"] = consumerGroup self.client = SimpleClient(self.config["kafkaHost"]) super(KafkaPythonClientSimple, self).__init__() def createProducer(self, kafkaSync): self.config["kafkaSync"] = kafkaSync if self.config["kafkaSync"] == True: self.producer = SimpleProducer(self.client, async=False) else: print "ENOIMPL: async not impl. for kafka-python-simple" def createConsumer(self): self.consumer = SimpleConsumer(self.client, topic=self.config["topic"], group=self.config["consumerGroup"], auto_commit= True, max_buffer_size=3000000, iter_timeout=5) def produce(self, num_msg=20000): self.msgCount = num_msg for x in range (self.msgCount): self.prtProgress(x, 10000) self.producer.send_messages(self.config["topic"], self.msg) if (x >= 10000): sys.stdout.write('\n') def consume(self, num_msg=0): count = 0 while True: message=self.consumer.get_message(block=False, timeout=1) # don't use this construct "for message in self.consumer:" instead of "while..." - much slower! if message is None: # print "consume, msg is None" break if len(message) == 0: # print "consume, len(msg) is 0" break count += 1 self.prtProgress(count, 10000) sys.stdout.write('\n') if num_msg > 0: if count != num_msg: print "ERROR: KafkaPythonClientSimple.consume: # of messages not as expected, read: {}, expected: {}".format(count, num_msg) return count def startProducer(self): pass def stopProducer(self): self.beforeFlushTimer(self.timeDict['producer']) self.producer.stop() def stopConsumer(self): pass def initCount(self): self.consume(0) def finalize(self): pass
filepath = args.file topic = args.topic producer = get_kafka_producer(args.broker, args.async) # method that sends messages to given topic send_message = lambda msg: producer.send_messages(topic, msg) read_lines = 0 read_chars = 0 print "starting" for l in read_all_from_file_or_dict(filepath, args.lines): read_lines +=1 read_chars += len(l) responses = send_message(l) if read_lines < args.lines: print "Not enough lines in file" print "stopping" producer.stop() print "stopped" print "Read", read_lines, "lines" print "Read", read_chars, "chars"
def test_batched_simple_producer__triggers_by_message(self): partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [self.current_offset(self.topic, p) for p in partitions] # Configure batch producer batch_messages = 5 batch_interval = 5 producer = SimpleProducer( self.client, async_send=True, batch_send_every_n=batch_messages, batch_send_every_t=batch_interval, random_start=False) # Send 4 messages -- should not trigger a batch resp = producer.send_messages( self.topic, self.msg("one"), self.msg("two"), self.msg("three"), self.msg("four"), ) # Batch mode is async. No ack self.assertEqual(len(resp), 0) # It hasn't sent yet self.assert_fetch_offset(partitions[0], start_offsets[0], []) self.assert_fetch_offset(partitions[1], start_offsets[1], []) # send 3 more messages -- should trigger batch on first 5 resp = producer.send_messages( self.topic, self.msg("five"), self.msg("six"), self.msg("seven"), ) # Batch mode is async. No ack self.assertEqual(len(resp), 0) # Wait until producer has pulled all messages from internal queue # this should signal that the first batch was sent, and the producer # is now waiting for enough messages to batch again (or a timeout) timeout = 5 start = time.time() while not producer.queue.empty(): if time.time() - start > timeout: self.fail('timeout waiting for producer queue to empty') time.sleep(0.1) # send messages groups all *msgs in a single call to the same partition # so we should see all messages from the first call in one partition self.assert_fetch_offset(partitions[0], start_offsets[0], [ self.msg("one"), self.msg("two"), self.msg("three"), self.msg("four"), ]) # Because we are batching every 5 messages, we should only see one self.assert_fetch_offset(partitions[1], start_offsets[1], [ self.msg("five"), ]) producer.stop()
class KafkaJsonLogger: indices = {} def __init__(self): from kafka import SimpleProducer, KafkaClient from kafka.common import LeaderNotAvailableError self.kafka_client = KafkaClient(config.KAFKA_SERVER) self.kafka = SimpleProducer(self.kafka_client) for oid in config.SNMP_OIDS: self.indices[oid._name()] = 0 self.head = HeadBuilder("db", "type", "tom", config.DB_NAME) try: self.kafka.send_messages(config.KAFKA_TOPIC, b"creating topic") except LeaderNotAvailableError: time.sleep(1) def close(self): self.kafka.stop(0) self.kafka_client.close() def start(self, epoch): head = self.head.create('info', config.TOM, epoch) body = '{"description" : "started (' + config.SESSION_NAME + ')"' + ', "value" : ' + str( config.SESSION_ID) + '}' msg = '{"header": ' + head + ', "body":' + body + '}' self.kafka.send_messages(config.KAFKA_TOPIC, msg.encode("utf8")) if config.PRINT_CONSOLE: print(msg) def stop(self, epoch): head = self.head.create('info', config.TOM, epoch) body = '{"description" : "stopped (' + config.SESSION_NAME + ')"' + ', "value" : ' + str( config.SESSION_ID) + '}' msg = '{"header": ' + head + ', "body":' + body + '}' self.kafka.send_messages(config.KAFKA_TOPIC, msg.encode("utf8")) if config.PRINT_CONSOLE: print(msg) def value(self, epoch, oid, name, value): # name = oid._name() name = name.replace(' ', '_') index = self.indices[name] index += 1 self.indices[name] = index is_error = False str_value = str(value) if oid.numeric and not utils.is_number(str_value): is_error = True head = self.head.create(name, oid.target_name, epoch) body = '{"target" : "' + str(oid.target()) + '", ' + \ '"oid" : "' + str(oid.oid_id) if is_error: body += '", "error" : "' + str_value + '"}' else: body += '", "value" : ' + str_value + '}' msg = '{"header": ' + head + ', "body":' + body + '}' self.kafka.send_messages(config.KAFKA_TOPIC, msg.encode("utf8")) if config.PRINT_CONSOLE: print(msg) def error(self, epoch, description): head = self.head.create('info', config.TOM, epoch) body = '{"error" : "' + description + '"}' msg = '{"header": ' + head + ', "body":' + body + '}' self.kafka.send_messages(config.KAFKA_TOPIC, msg.encode("utf8")) if config.PRINT_CONSOLE: print(msg)