class DockerExecutor(object): def __init__(self, warehouse, warehouse_result): self.warehouse = warehouse self.warehouse_result = warehouse_result self.kafka = KafkaClient(Conf.getWareHouseAddr()) self.producer = KeyedProducer(self.kafka) self.consumer = KafkaConsumer( self.warehouse, bootstrap_servers=[Conf.getWareHouseAddr()], group_id="cnlab", auto_commit_enable=True, auto_commit_interval_ms=30 * 1000, auto_offset_reset='smallest') def run(self): i = 1 for message in self.consumer.fetch_messages(): logger.debug("%d,%s:%s:%s: key=%s " % (i, message.topic, message.partition, message.offset, message.key)) task = cPickle.loads(message.value) i = i + 1 result = task.run(0) self.producer.send_messages(self.warehouse_result, task.id, cPickle.dumps(result))
def test_keyedproducer_message_types(self): client = MagicMock() client.get_partition_ids_for_topic.return_value = [0, 1] producer = KeyedProducer(client) topic = b"test-topic" key = b"testkey" bad_data_types = ( u'你怎么样?', 12, ['a', 'list'], ('a', 'tuple'), { 'a': 'dict' }, ) for m in bad_data_types: with self.assertRaises(TypeError): logging.debug("attempting to send message of type %s", type(m)) producer.send_messages(topic, key, m) good_data_types = ( b'a string!', None, ) for m in good_data_types: # This should not raise an exception producer.send_messages(topic, key, m)
def test_hashed_partitioner(self): partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [self.current_offset(self.topic, p) for p in partitions] producer = KeyedProducer(self.client, partitioner=HashedPartitioner) resp1 = producer.send_messages(self.topic, self.key("1"), self.msg("one")) resp2 = producer.send_messages(self.topic, self.key("2"), self.msg("two")) resp3 = producer.send_messages(self.topic, self.key("3"), self.msg("three")) resp4 = producer.send_messages(self.topic, self.key("3"), self.msg("four")) resp5 = producer.send_messages(self.topic, self.key("4"), self.msg("five")) offsets = {partitions[0]: start_offsets[0], partitions[1]: start_offsets[1]} messages = {partitions[0]: [], partitions[1]: []} keys = [self.key(k) for k in ["1", "2", "3", "3", "4"]] resps = [resp1, resp2, resp3, resp4, resp5] msgs = [self.msg(m) for m in ["one", "two", "three", "four", "five"]] for key, resp, msg in zip(keys, resps, msgs): k = hash(key) % 2 partition = partitions[k] offset = offsets[partition] self.assert_produce_response(resp, offset) offsets[partition] += 1 messages[partition].append(msg) self.assert_fetch_offset(partitions[0], start_offsets[0], messages[partitions[0]]) self.assert_fetch_offset(partitions[1], start_offsets[1], messages[partitions[1]]) producer.stop()
def test_keyedproducer_null_payload(self): partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [ self.current_offset(self.topic, p) for p in partitions ] producer = KeyedProducer(self.client, partitioner=RoundRobinPartitioner) key = "test" resp = producer.send_messages(self.topic, self.key("key1"), self.msg("one")) self.assert_produce_response(resp, start_offsets[0]) resp = producer.send_messages(self.topic, self.key("key2"), None) self.assert_produce_response(resp, start_offsets[1]) resp = producer.send_messages(self.topic, self.key("key3"), None) self.assert_produce_response(resp, start_offsets[0] + 1) resp = producer.send_messages(self.topic, self.key("key4"), self.msg("four")) self.assert_produce_response(resp, start_offsets[1] + 1) self.assert_fetch_offset(partitions[0], start_offsets[0], [self.msg("one"), None]) self.assert_fetch_offset(partitions[1], start_offsets[1], [None, self.msg("four")]) producer.stop()
def test_round_robin_partitioner(self): partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [ self.current_offset(self.topic, p) for p in partitions ] producer = KeyedProducer(self.client, partitioner=RoundRobinPartitioner) resp1 = producer.send_messages(self.topic, self.key("key1"), self.msg("one")) resp2 = producer.send_messages(self.topic, self.key("key2"), self.msg("two")) resp3 = producer.send_messages(self.topic, self.key("key3"), self.msg("three")) resp4 = producer.send_messages(self.topic, self.key("key4"), self.msg("four")) self.assert_produce_response(resp1, start_offsets[0] + 0) self.assert_produce_response(resp2, start_offsets[1] + 0) self.assert_produce_response(resp3, start_offsets[0] + 1) self.assert_produce_response(resp4, start_offsets[1] + 1) self.assert_fetch_offset( partitions[0], start_offsets[0], [self.msg("one"), self.msg("three")]) self.assert_fetch_offset( partitions[1], start_offsets[1], [self.msg("two"), self.msg("four")]) producer.stop()
class KafkaLoggingHandler(logging.Handler): def __init__(self, host, port, topic, key=None): logging.Handler.__init__(self) self.kafka_client = KafkaClient(host, port) self.key = key if key is None: self.producer = SimpleProducer(self.kafka_client, topic) else: self.producer = KeyedProducer(self.kafka_client, topic) def emit(self, record): #drop kafka logging to avoid infinite recursion if record.name == 'kafka': return try: #use default formatting msg = self.format(record) #produce message if self.key is None: self.producer.send_messages(msg) else: self.producer.send(self.key, msg) except: import traceback ei = sys.exc_info() traceback.print_exception(ei[0], ei[1], ei[2], None, sys.stderr) del ei def close(self): self.producer.stop() logging.Handler.close(self)
def test_hashed_partitioner(self): partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [self.current_offset(self.topic, p) for p in partitions] producer = KeyedProducer(self.client, partitioner=HashedPartitioner) resp1 = producer.send_messages(self.topic, self.key("1"), self.msg("one")) resp2 = producer.send_messages(self.topic, self.key("2"), self.msg("two")) resp3 = producer.send_messages(self.topic, self.key("3"), self.msg("three")) resp4 = producer.send_messages(self.topic, self.key("3"), self.msg("four")) resp5 = producer.send_messages(self.topic, self.key("4"), self.msg("five")) offsets = {partitions[0]: start_offsets[0], partitions[1]: start_offsets[1]} messages = {partitions[0]: [], partitions[1]: []} keys = [self.key(k) for k in ["1", "2", "3", "3", "4"]] resps = [resp1, resp2, resp3, resp4, resp5] msgs = [self.msg(m) for m in ["one", "two", "three", "four", "five"]] for key, resp, msg in zip(keys, resps, msgs): k = hash(key) % 2 partition = partitions[k] offset = offsets[partition] self.assert_produce_response(resp, offset) offsets[partition] += 1 messages[partition].append(msg) self.assert_fetch_offset(partitions[0], start_offsets[0], messages[partitions[0]]) self.assert_fetch_offset(partitions[1], start_offsets[1], messages[partitions[1]]) producer.stop()
def message_sender(m): """Send (key, value) to a Kafka producer""" client = SimpleClient('localhost:9092') producer = KeyedProducer(client) rdds = m.collect() for d in rdds: producer.send_messages('flask', bytes.encode(str(d[0])), d[1]) return
def sendMsg(topic, lines): if lines.__len__() > 0: brokers = 'cdh-slave0:9092,cdh-slave1:9092,cdh-slave2:9092' kafka = KafkaClient(brokers) producer = KeyedProducer(kafka) for line in lines: ran = "_" + str(random.randint(0, 10)) producer.send_messages(topic, topic + ran, line) producer.stop()
def sendMsg(topic, lines): if lines.__len__() > 0: brokers = '10.117.181.44:9092,10.117.108.143:9092,10.117.21.79:9092' kafka = KafkaClient(brokers) producer = KeyedProducer(kafka) for line in lines: ran = "_" + str(random.randint(0, 10)) producer.send_messages(topic, topic + ran, line) producer.stop()
def keyedProduce(self,topic, key, value): kafka=KafkaClient(self.configs["broker_list"].split(",")) keyedProducer=KeyedProducer(kafka,async=True) undone=True while(undone): try: keyedProducer.send_messages(topic, key, value) undone=False except LeaderNotAvailableError: sleep(10) print("LeaderNotAvailableError") pass
class KeyedProducer(BaseStreamProducer): def __init__(self, connection, topic_done, partitioner_cls, codec): self._prod = None self._conn = connection self._topic_done = topic_done self._partitioner_cls = partitioner_cls self._codec = codec def _connect_producer(self): if self._prod is None: try: self._prod = KafkaKeyedProducer( self._conn, partitioner=self._partitioner_cls, codec=self._codec) except BrokerResponseError: self._prod = None logger.warning("Could not connect producer to Kafka server") return False return True def send(self, key, *messages): success = False max_tries = 5 if self._connect_producer(): n_tries = 0 while not success and n_tries < max_tries: try: self._prod.send_messages(self._topic_done, key, *messages) success = True except MessageSizeTooLargeError as e: logger.error(str(e)) break except BrokerResponseError: n_tries += 1 logger.warning( "Could not send message. Try {0}/{1}".format( n_tries, max_tries)) sleep(1.0) return success def flush(self): if self._prod is not None: self._prod.stop() def get_offset(self, partition_id): # Kafka has it's own offset management raise KeyError
def keyed_messages(): '''Keyed messages''' from kafka import (KafkaClient, KeyedProducer, Murmur2Partitioner, RoundRobinPartitioner) kafka = KafkaClient(KAFKA_SERVER) # HashedPartitioner is default (currently uses python hash()) producer = KeyedProducer(kafka) producer.send_messages(b'topic1', b'key1', b'some message') producer.send_messages(b'topic1', b'key2', b'this methode') # Murmur2Partitioner attempts to mirror the java client hashing producer = KeyedProducer(kafka, partitioner=Murmur2Partitioner) # Or just produce round-robin (or just use SimpleProducer) producer = KeyedProducer(kafka, partitioner=RoundRobinPartitioner)
class KeyedProducer(BaseStreamProducer): def __init__(self, connection, topic_done, partitioner_cls, codec): self._prod = None self._conn = connection self._topic_done = topic_done self._partitioner_cls = partitioner_cls self._codec = codec def _connect_producer(self): if self._prod is None: try: self._prod = KafkaKeyedProducer(self._conn, partitioner=self._partitioner_cls, codec=self._codec) except BrokerResponseError: self._prod = None logger.warning("Could not connect producer to Kafka server") return False return True def send(self, key, *messages): success = False max_tries = 5 if self._connect_producer(): n_tries = 0 while not success and n_tries < max_tries: try: self._prod.send_messages(self._topic_done, key, *messages) success = True except MessageSizeTooLargeError as e: logger.error(str(e)) break except BrokerResponseError: n_tries += 1 logger.warning( "Could not send message. Try {0}/{1}".format( n_tries, max_tries) ) sleep(1.0) return success def flush(self): if self._prod is not None: self._prod.stop() def get_offset(self, partition_id): # Kafka has it's own offset management raise KeyError
def keyedProducerTest2(): '''test KeyedProducer @topic:单replica情况(JOB_TEST_1) @function:测试KeyedProducer,向指定的broker发布消息, 并验证develops-dev1:9193关闭之后的异常报错 ''' import pdb pdb.set_trace() kafkaClient = KafkaClient('devops-dev1:9193') producer = KeyedProducer(kafkaClient) message = "This is a test-" index = 0 while True: tmpmsg = message + str(index) producer.send_messages(b'JOB_TEST_1', 'keys', tmpmsg) index += 1 time.sleep(1)
def keyedProducerTest2(): '''test KeyedProducer @topic:单replica情况(JOB_TEST_1) @function:测试KeyedProducer,向指定的broker发布消息, 并验证develops-dev1:9193关闭之后的异常报错 ''' import pdb pdb.set_trace() kafkaClient = KafkaClient('devops-dev1:9193') producer = KeyedProducer(kafkaClient) message = "This is a test-" index = 0 while True: tmpmsg = message + str(index) producer.send_messages(b'JOB_TEST_1', 'keys', tmpmsg) index += 1 time.sleep(1)
def kafkaTasks(self, addr, topic,tasks): try : from kafka import SimpleProducer, KafkaClient, KeyedProducer except: logger.error("kafka-python is not installed") raise Exception("kafka-python is not installed") kafka_client = None try : kafka_client = KafkaClient(addr) producer = KeyedProducer(kafka_client) for task in tasks: #self.producer.send_messages(self.warehouse,task.id, json.dumps(task,default=object2dict)) producer.send_messages(topic, self.manager.name, cPickle.dumps(task)) finally: if kafka_client: kafka_client.close()
def test_keyedproducer_message_types(self): client = MagicMock() client.get_partition_ids_for_topic.return_value = [0, 1] producer = KeyedProducer(client) topic = b"test-topic" key = b"testkey" bad_data_types = (u"你怎么样?", 12, ["a", "list"], ("a", "tuple"), {"a": "dict"}) for m in bad_data_types: with self.assertRaises(TypeError): logging.debug("attempting to send message of type %s", type(m)) producer.send_messages(topic, key, m) good_data_types = (b"a string!", None) for m in good_data_types: # This should not raise an exception producer.send_messages(topic, key, m)
def kafkaTasks(self, addr, topic, tasks): try: from kafka import SimpleProducer, KafkaClient, KeyedProducer except: logger.error("kafka-python is not installed") raise Exception("kafka-python is not installed") kafka_client = None try: kafka_client = KafkaClient(addr) producer = KeyedProducer(kafka_client) for task in tasks: #self.producer.send_messages(self.warehouse,task.id, json.dumps(task,default=object2dict)) producer.send_messages(topic, self.manager.name, cPickle.dumps(task)) finally: if kafka_client: kafka_client.close()
def test_keyedproducer_message_types(self): client = MagicMock() client.get_partition_ids_for_topic.return_value = [0, 1] producer = KeyedProducer(client) topic = b"test-topic" key = b"testkey" bad_data_types = (u'你怎么样?', 12, ['a', 'list'], ('a', 'tuple'), {'a': 'dict'},) for m in bad_data_types: with self.assertRaises(TypeError): logging.debug("attempting to send message of type %s", type(m)) producer.send_messages(topic, key, m) good_data_types = (b'a string!', None,) for m in good_data_types: # This should not raise an exception producer.send_messages(topic, key, m)
def test_round_robin_partitioner(self): partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [self.current_offset(self.topic, p) for p in partitions] producer = KeyedProducer(self.client, partitioner=RoundRobinPartitioner) resp1 = producer.send_messages(self.topic, self.key("key1"), self.msg("one")) resp2 = producer.send_messages(self.topic, self.key("key2"), self.msg("two")) resp3 = producer.send_messages(self.topic, self.key("key3"), self.msg("three")) resp4 = producer.send_messages(self.topic, self.key("key4"), self.msg("four")) self.assert_produce_response(resp1, start_offsets[0]+0) self.assert_produce_response(resp2, start_offsets[1]+0) self.assert_produce_response(resp3, start_offsets[0]+1) self.assert_produce_response(resp4, start_offsets[1]+1) self.assert_fetch_offset(partitions[0], start_offsets[0], [ self.msg("one"), self.msg("three") ]) self.assert_fetch_offset(partitions[1], start_offsets[1], [ self.msg("two"), self.msg("four") ]) producer.stop()
def test_keyedproducer_null_payload(self): partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [self.current_offset(self.topic, p) for p in partitions] producer = KeyedProducer(self.client, partitioner=RoundRobinPartitioner) key = "test" resp = producer.send_messages(self.topic, self.key("key1"), self.msg("one")) self.assert_produce_response(resp, start_offsets[0]) resp = producer.send_messages(self.topic, self.key("key2"), None) self.assert_produce_response(resp, start_offsets[1]) resp = producer.send_messages(self.topic, self.key("key3"), None) self.assert_produce_response(resp, start_offsets[0]+1) resp = producer.send_messages(self.topic, self.key("key4"), self.msg("four")) self.assert_produce_response(resp, start_offsets[1]+1) self.assert_fetch_offset(partitions[0], start_offsets[0], [ self.msg("one"), None ]) self.assert_fetch_offset(partitions[1], start_offsets[1], [ None, self.msg("four") ]) producer.stop()
def main(): # To send messages synchronously kafka = KafkaClient('localhost:9092') producer = KeyedProducer(kafka) # Insure that topic exists kafka.ensure_topic_exists('test') while True: input_str = raw_input("Press enter to send another message, otherwise press 'q' to quit: ") if input_str and input_str in "qQ": sys.exit(0) if not input_str: print "No input was provided" else: producer.send_messages( 'test', # topic 'topic-key', # key "(time: {}, message: {})".format(get_time(), input_str), # message )
class DockerExecutor(object): def __init__(self,warehouse,warehouse_result): self.warehouse = warehouse self.warehouse_result = warehouse_result self.kafka = KafkaClient(Conf.getWareHouseAddr()) self.producer = KeyedProducer(self.kafka) self.consumer = KafkaConsumer(self.warehouse, bootstrap_servers=[Conf.getWareHouseAddr()], group_id="cnlab", auto_commit_enable=True, auto_commit_interval_ms=30 * 1000, auto_offset_reset='smallest') def run(self): i=1 for message in self.consumer.fetch_messages(): logger.debug("%d,%s:%s:%s: key=%s " % (i,message.topic, message.partition, message.offset, message.key)) task = cPickle.loads(message.value) i = i + 1 result = task.run(0) self.producer.send_messages(self.warehouse_result, task.id, cPickle.dumps(result))
def keyedProducerTest3(): '''test KeyedProducer @topic:单replica情况(JOB_TEST_1) @function:测试KeyedProducer,向指定的broker发布消息, 并验证develops-dev1:9193关闭之后的异常的恢复情况 (等待10秒,不用重新拉起,自动关联) ''' import pdb pdb.set_trace() kafkaClient = KafkaClient('devops-dev1:9193') producer = KeyedProducer(kafkaClient) message = "This is a test-" index = 0 while True: try: tmpmsg = message + str(index) producer.send_messages(b'JOB_TEST_1', 'keys', tmpmsg) index += 1 time.sleep(1) except (FailedPayloadsError,KafkaUnavailableError), msg: print 'Occur FailedPayloadsError error, msg:', msg time.sleep(10)
def keyedProducerTest3(): '''test KeyedProducer @topic:单replica情况(JOB_TEST_1) @function:测试KeyedProducer,向指定的broker发布消息, 并验证develops-dev1:9193关闭之后的异常的恢复情况 (等待10秒,不用重新拉起,自动关联) ''' import pdb pdb.set_trace() kafkaClient = KafkaClient('devops-dev1:9193') producer = KeyedProducer(kafkaClient) message = "This is a test-" index = 0 while True: try: tmpmsg = message + str(index) producer.send_messages(b'JOB_TEST_1', 'keys', tmpmsg) index += 1 time.sleep(1) except (FailedPayloadsError, KafkaUnavailableError) as msg: print('Occur FailedPayloadsError error, msg:', msg) time.sleep(10)
class KeyedProducer(BaseStreamProducer): def __init__(self, connection, topic_done, partitioner_cls): self._prod = None self._conn = connection self._topic_done = topic_done self._partitioner_cls = partitioner_cls def _connect_producer(self): if self._prod is None: try: self._prod = KafkaKeyedProducer( self._conn, partitioner=self._partitioner_cls, codec=CODEC_SNAPPY) except BrokerResponseError: self._prod = None logger.warning("Could not connect producer to Kafka server") return False return True def send(self, key, *messages): success = False max_tries = 5 if self._connect_producer(): n_tries = 0 while not success and n_tries < max_tries: try: self._prod.send_messages(self._topic_done, key, *messages) success = True except MessageSizeTooLargeError, e: logger.error(str(e)) break except BrokerResponseError: n_tries += 1 logger.warning( "Could not send message. Try {0}/{1}".format( n_tries, max_tries)) sleep(1.0)
def send_messages_to_kafka(self, topic, num_msgs=-1, msg_len=70, msg_interval=0.1, is_multi=True, write=True): """ This function sends dummy messages to Kafka :param topic: Name of Kafka topic :param num_msgs: Number messages to be sent to Kafka :param msg_len: Length of the message :param msg_interval: Interval time for message to be sent :param is_multi: True if multi-partitioned kafka; False if single partition :return: None """ print "Topic:", topic, "#msgs=", num_msgs, "len=", msg_len, "int=", msg_interval, "mult=", is_multi, "w=", write if write: out = open("/tmp/kafka-input-module-msgs.log", "w") kafka_client = KafkaClient(self.broker) if is_multi: producer = KeyedProducer(kafka_client, partitioner=RoundRobinPartitioner) producer_type = 'KeyedProducer' else: producer = SimpleProducer(kafka_client) producer_type = 'SimpleProducer' count = 0 num_msgs = int(num_msgs) while count != num_msgs: count += 1 msg = str(count) + ": Message from " + producer_type + " : " + str(datetime.now()) + " " + "#"*msg_len msg = msg[:msg_len] if is_multi: producer.send_messages(topic, "key"+str(count), msg) else: producer.send_messages(topic, msg) if write: out.write(msg+"\n") time.sleep(msg_interval) print "Sent", count, "messages!!"
class KeyedProducer(BaseStreamProducer): def __init__(self, connection, topic_done, partitioner_cls): self._prod = None self._conn = connection self._topic_done = topic_done self._partitioner_cls = partitioner_cls def _connect_producer(self): if self._prod is None: try: self._prod = KafkaKeyedProducer(self._conn, partitioner=self._partitioner_cls, codec=CODEC_SNAPPY) except BrokerResponseError: self._prod = None logger.warning("Could not connect producer to Kafka server") return False return True def send(self, key, *messages): success = False max_tries = 5 if self._connect_producer(): n_tries = 0 while not success and n_tries < max_tries: try: self._prod.send_messages(self._topic_done, key, *messages) success = True except MessageSizeTooLargeError, e: logger.error(str(e)) break except BrokerResponseError: n_tries += 1 logger.warning( "Could not send message. Try {0}/{1}".format( n_tries, max_tries) ) sleep(1.0)
def test_async_keyed_producer(self): partition = self.client.get_partition_ids_for_topic(self.topic)[0] start_offset = self.current_offset(self.topic, partition) producer = KeyedProducer(self.client, partitioner = RoundRobinPartitioner, async=True) resp = producer.send_messages(self.topic, self.key("key1"), self.msg("one")) self.assertEqual(len(resp), 0) # wait for the server to report a new highwatermark while self.current_offset(self.topic, partition) == start_offset: time.sleep(0.1) self.assert_fetch_offset(partition, start_offset, [ self.msg("one") ]) producer.stop()
def test_async_keyed_producer(self): partition = self.client.get_partition_ids_for_topic(self.topic)[0] start_offset = self.current_offset(self.topic, partition) producer = KeyedProducer(self.client, partitioner = RoundRobinPartitioner, async=True) resp = producer.send_messages(self.topic, self.key("key1"), self.msg("one")) self.assertEqual(len(resp), 0) # wait for the server to report a new highwatermark while self.current_offset(self.topic, partition) == start_offset: time.sleep(0.1) self.assert_fetch_offset(partition, start_offset, [ self.msg("one") ]) producer.stop()
def test_switch_leader_keyed_producer(self): topic = self.topic producer = KeyedProducer(self.client, async_send=False) # Send 10 random messages for _ in range(10): key = random_string(3).encode('utf-8') msg = random_string(10).encode('utf-8') producer.send_messages(topic, key, msg) # kill leader for partition 0 self._kill_leader(topic, 0) recovered = False started = time.time() timeout = 60 while not recovered and (time.time() - started) < timeout: try: key = random_string(3).encode('utf-8') msg = random_string(10).encode('utf-8') producer.send_messages(topic, key, msg) if producer.partitioners[topic].partition(key) == 0: recovered = True except (FailedPayloadsError, KafkaConnectionError, RequestTimedOutError, NotLeaderForPartitionError): log.debug("caught exception sending message -- will retry") continue # Verify we successfully sent the message self.assertTrue(recovered) # send some more messages just to make sure no more exceptions for _ in range(10): key = random_string(3).encode('utf-8') msg = random_string(10).encode('utf-8') producer.send_messages(topic, key, msg)
def test_switch_leader_keyed_producer(self): topic = self.topic producer = KeyedProducer(self.client, async=False) # Send 10 random messages for _ in range(10): key = random_string(3).encode('utf-8') msg = random_string(10).encode('utf-8') producer.send_messages(topic, key, msg) # kill leader for partition 0 self._kill_leader(topic, 0) recovered = False started = time.time() timeout = 60 while not recovered and (time.time() - started) < timeout: try: key = random_string(3).encode('utf-8') msg = random_string(10).encode('utf-8') producer.send_messages(topic, key, msg) if producer.partitioners[topic].partition(key) == 0: recovered = True except (FailedPayloadsError, ConnectionError, RequestTimedOutError, NotLeaderForPartitionError): log.debug("caught exception sending message -- will retry") continue # Verify we successfully sent the message self.assertTrue(recovered) # send some more messages just to make sure no more exceptions for _ in range(10): key = random_string(3).encode('utf-8') msg = random_string(10).encode('utf-8') producer.send_messages(topic, key, msg)
with open(file_path) as data_file: features = json.load(data_file).get('features') # To send messages asynchronously client = KafkaClient('kafka.dev:9092') producer = KeyedProducer(client, async=True) topic = 'starbucks' client.ensure_topic_exists(topic) #run all? feature_count = len(features) #or run some? #feature_count = 10 interval = 1 #in seconds index = 5 while index < feature_count: time.sleep(interval) photos = [] photos_url = [] if features[index].get('properties').get('photos'): for photo in features[index].get('properties').get('photos').split(','): photos += [photo] photos_url += ['http://127.0.0.1:8001/starbucks/{}.jpg'.format(photo)] if features[index].get('properties').get('photos'): features[index]['properties']['photos'] = photos if features[index].get('properties').get('photos_url'): features[index]['properties']['photos_url'] = photos_url producer.send_messages(topic, b'feature', json.dumps(features[index])) print "Sent message to topic: {}.".format(topic) print features[index] index += 1
# To wait for acknowledgements # ACK_AFTER_LOCAL_WRITE : server will wait till the data is written to # a local log before sending response # ACK_AFTER_CLUSTER_COMMIT : server will block until the message is committed # by all in sync replicas before sending a response producer = SimpleProducer(client, async=False, req_acks=SimpleProducer.ACK_AFTER_LOCAL_WRITE, ack_timeout=2000, sync_fail_on_error=False) responses = producer.send_messages('my-topic', b'another message') for r in responses: logging.info(r.offset) from kafka import (SimpleClient, KeyedProducer, Murmur2Partitioner, RoundRobinPartitioner) kafka = SimpleClient('localhost:9092') # HashedPartitioner is default (currently uses python hash()) producer = KeyedProducer(kafka) producer.send_messages('my-topic', b'key1', b'some message') producer.send_messages('my-topic', b'key2', b'this methode') # Murmur2Partitioner attempts to mirror the java client hashing producer = KeyedProducer(kafka, partitioner=Murmur2Partitioner) # Or just produce round-robin (or just use SimpleProducer) producer = KeyedProducer(kafka, partitioner=RoundRobinPartitioner)
def genMessage(msg,key,topic): producer = KeyedProducer(kafka) producer.send_messages(topic,key, msg)
import logging from kafka import SimpleProducer, KafkaClient, KeyedProducer from kafka.common import LeaderNotAvailableError logging.basicConfig() logger = logging.getLogger('kafka-app') # IP:PORT of a Kafka broker. The typical port is 9092. KAFKA_BROKER_IP_PORT = os.getenv('KAFKA_BROKER', 'cdh-slave0:9092') print "KAFKA BROKER: " + KAFKA_BROKER_IP_PORT kafka = KafkaClient(KAFKA_BROKER_IP_PORT) producer = KeyedProducer(kafka) # Note that the application is responsible for encoding messages to type str while True: print('Sending...') logger.info('Sending...') try: list=["xxx","yyy"] for line in list: ran = "_"+str(random.randint(0, 10)) producer.send_messages("test_sitepvv3","test_sitepvv3"+ran, line) except LeaderNotAvailableError: logger.warning('Caught a LeaderNotAvailableError. This seems to happen when auto-creating a new topic.') print('Caught a LeaderNotAvailableError. This seems to happen when auto-creating a new topic.') time.sleep(3) # kafka.close()
from config import KAFKA_URL kafka = KafkaClient(KAFKA_URL) producer = SimpleProducer(kafka) # send basic messages try: producer.send_messages(b'python-sample-topic-1', b'test message 1') producer.send_messages(b'python-sample-topic-1', b'test message 2', b'test 3 additional message') except FailedPayloadsError as e: print 'Simple Producer payload exception...' print e #send keyed message producer2 = KeyedProducer(kafka) try: producer2.send_messages(b'python-sample-topic-1', b'key1', b'test keyed message 1') except FailedPayloadsError as e: print 'Keyed Producer payload exception...' print e # # To send messages asynchronously # producer = SimpleProducer(kafka, async=True) # producer.send_messages(b'my-topic', b'async message') # # To wait for acknowledgements # # ACK_AFTER_LOCAL_WRITE : server will wait till the data is written to # # a local log before sending response # # ACK_AFTER_CLUSTER_COMMIT : server will block until the message is committed # # by all in sync replicas before sending a response # producer = SimpleProducer(kafka, async=False, # req_acks=SimpleProducer.ACK_AFTER_LOCAL_WRITE,
class KafkaBackend(Backend): def __init__(self, manager): self._manager = manager settings = manager.settings # Kafka connection parameters self._server = settings.get('KAFKA_LOCATION') self._topic_todo = settings.get('OUTGOING_TOPIC', "frontier-todo") self._topic_done = settings.get('INCOMING_TOPIC', "frontier-done") self._group = settings.get('FRONTIER_GROUP', "scrapy-crawler") self._get_timeout = float(settings.get('KAFKA_GET_TIMEOUT', 5.0)) self._partition_id = settings.get('SPIDER_PARTITION_ID') # Kafka setup self._conn = KafkaClient(self._server) self._prod = None self._cons = None logger = getLogger("kafka") handler = StreamHandler() logger.addHandler(handler) self._connect_consumer() self._connect_producer() store_content = settings.get('STORE_CONTENT') self._encoder = Encoder(manager.request_model, send_body=store_content) self._decoder = Decoder(manager.request_model, manager.response_model) def _connect_producer(self): """If producer is not connected try to connect it now. :returns: bool -- True if producer is connected """ if self._prod is None: try: self._prod = KeyedProducer(self._conn, partitioner=FingerprintPartitioner, codec=CODEC_SNAPPY) except BrokerResponseError: self._prod = None if self._manager is not None: self._manager.logger.backend.warning( "Could not connect producer to Kafka server") return False return True def _connect_consumer(self): """If consumer is not connected try to connect it now. :returns: bool -- True if consumer is connected """ if self._cons is None: try: self._cons = SimpleConsumer( self._conn, self._group, self._topic_todo, partitions=[self._partition_id], buffer_size=131072, max_buffer_size=1048576) except BrokerResponseError: self._cons = None if self._manager is not None: self._manager.logger.backend.warning( "Could not connect consumer to Kafka server") return False return True @classmethod def from_manager(clas, manager): return clas(manager) def frontier_start(self): if self._connect_consumer(): self._manager.logger.backend.info( "Successfully connected consumer to " + self._topic_todo) else: self._manager.logger.backend.warning( "Could not connect consumer to {0}. I will try latter.".format( self._topic_todo)) def frontier_stop(self): # flush everything if a batch is incomplete self._prod.stop() def _send_message(self, encoded_message, key, fail_wait_time=1.0, max_tries=5): start = time.clock() success = False if self._connect_producer(): n_tries = 0 while not success and n_tries < max_tries: try: self._prod.send_messages(self._topic_done, key, encoded_message) success = True except MessageSizeTooLargeError, e: self._manager.logger.backend.error(str(e)) self._manager.logger.backend.debug("Message: %s" % encoded_message) break except BrokerResponseError: n_tries += 1 if self._manager is not None: self._manager.logger.backend.warning( "Could not send message. Try {0}/{1}".format( n_tries, max_tries) ) time.sleep(fail_wait_time)
from kafka import KafkaClient , KeyedProducer kafka = KafkaClient('localhost:9092') import json producer = KeyedProducer(kafka) cars = {"cars": [{"type": "Feature", "id": 807011662, "geometry": {"type": "Point", "coordinates": [-118.287668,34.264679] }},{"type": "Feature", "id": 1115494253, "geometry": {"type": "Point", "coordinates": [-118.284910,34.262454] }},{"type": "Feature", "id": 1656112982, "geometry": {"type": "Point", "coordinates": [-118.284912,34.262095] }},{"type": "Feature", "id": 103209461, "geometry": {"type": "Point", "coordinates": [-118.294320,34.263207] }},{"type": "Feature", "id": 136105512, "geometry": {"type": "Point", "coordinates": [-118.29356,34.267076] }},{"type": "Feature", "id": 1723962104, "geometry": {"type": "Point", "coordinates": [-118.29080,34.26690] }},{"type": "Feature", "id": 1707068090, "geometry": {"type": "Point", "coordinates": [-118.296152,34.267168] }},{"type": "Feature", "id": 601751984, "geometry": {"type": "Point", "coordinates": [-118.296011,34.266832] }},{"type": "Feature", "id": 822507730, "geometry": {"type": "Point", "coordinates": [-118.296011,34.266832] }},{"type": "Feature", "id": 918577934, "geometry": {"type": "Point", "coordinates": [-118.297265,34.269539] }},{"type": "Feature", "id": 1979841256, "geometry": {"type": "Point", "coordinates": [-118.297931,34.266868] }},{"type": "Feature", "id": 1604018127, "geometry": {"type": "Point", "coordinates": [-118.29356,34.267076] }},{"type": "Feature", "id": 2127593395, "geometry": {"type": "Point", "coordinates": [-118.294693,34.267501] }},{"type": "Feature", "id": 985543876, "geometry": {"type": "Point", "coordinates": [-118.290892,34.266891] }},{"type": "Feature", "id": 961446961, "geometry": {"type": "Point", "coordinates": [-118.293907,34.265554] }},{"type": "Feature", "id": 1254271218, "geometry": {"type": "Point", "coordinates": [-118.288029,34.264371] }},{"type": "Feature", "id": 1437596832, "geometry": {"type": "Point", "coordinates": [-118.294097,34.262547] }}]} #for car in cars['cars']: # car['geometry']['coordinates'] = [car['geometry']['coordinates'][0] + .1,car['geometry']['coordinates'][1] + .1] producer.send_messages(b'cars', b'1', bytes(json.dumps(cars))) #producer.send_messages(b'people', b'1', b'') #producer.send_messages(b'cars', b'1', b'{ "type": "Feature", "id": "1234", "geometry": {"type": "Point", "coordinates": [-118.385, 34.065] } }') #producer.send_messages(b'bicycles', b'1', b'')
"id": 961446961, "geometry": { "type": "Point", "coordinates": [-118.293907, 34.265554] } }, { "type": "Feature", "id": 1254271218, "geometry": { "type": "Point", "coordinates": [-118.288029, 34.264371] } }, { "type": "Feature", "id": 1437596832, "geometry": { "type": "Point", "coordinates": [-118.294097, 34.262547] } }] } #for car in cars['cars']: # car['geometry']['coordinates'] = [car['geometry']['coordinates'][0] + .1,car['geometry']['coordinates'][1] + .1] producer.send_messages(b'cars', b'1', bytes(json.dumps(cars))) #producer.send_messages(b'people', b'1', b'') #producer.send_messages(b'cars', b'1', b'{ "type": "Feature", "id": "1234", "geometry": {"type": "Point", "coordinates": [-118.385, 34.065] } }') #producer.send_messages(b'bicycles', b'1', b'')
class KafkaBackend(Backend): def __init__(self, manager): self._manager = manager settings = manager.settings # Kafka connection parameters self._server = settings.get('KAFKA_LOCATION') self._topic_todo = settings.get('OUTGOING_TOPIC', "frontier-todo") self._topic_done = settings.get('INCOMING_TOPIC', "frontier-done") self._group = settings.get('FRONTIER_GROUP', "scrapy-crawler") self._get_timeout = float(settings.get('KAFKA_GET_TIMEOUT', 5.0)) self._partition_id = settings.get('SPIDER_PARTITION_ID') # Kafka setup self._conn = KafkaClient(self._server) self._prod = None self._cons = None logger = getLogger("kafka") handler = StreamHandler() logger.addHandler(handler) self._connect_consumer() self._connect_producer() store_content = settings.get('STORE_CONTENT') self._encoder = Encoder(manager.request_model, send_body=store_content) self._decoder = Decoder(manager.request_model, manager.response_model) def _connect_producer(self): """If producer is not connected try to connect it now. :returns: bool -- True if producer is connected """ if self._prod is None: try: self._prod = KeyedProducer(self._conn, partitioner=FingerprintPartitioner, codec=CODEC_SNAPPY) except BrokerResponseError: self._prod = None if self._manager is not None: self._manager.logger.backend.warning( "Could not connect producer to Kafka server") return False return True def _connect_consumer(self): """If consumer is not connected try to connect it now. :returns: bool -- True if consumer is connected """ if self._cons is None: try: self._cons = SimpleConsumer(self._conn, self._group, self._topic_todo, partitions=[self._partition_id], buffer_size=131072, max_buffer_size=1048576) except BrokerResponseError: self._cons = None if self._manager is not None: self._manager.logger.backend.warning( "Could not connect consumer to Kafka server") return False return True @classmethod def from_manager(clas, manager): return clas(manager) def frontier_start(self): if self._connect_consumer(): self._manager.logger.backend.info( "Successfully connected consumer to " + self._topic_todo) else: self._manager.logger.backend.warning( "Could not connect consumer to {0}. I will try latter.".format( self._topic_todo)) def frontier_stop(self): # flush everything if a batch is incomplete self._prod.stop() def _send_message(self, encoded_message, key, fail_wait_time=1.0, max_tries=5): start = time.clock() success = False if self._connect_producer(): n_tries = 0 while not success and n_tries < max_tries: try: self._prod.send_messages(self._topic_done, key, encoded_message) success = True except MessageSizeTooLargeError, e: self._manager.logger.backend.error(str(e)) self._manager.logger.backend.debug("Message: %s" % encoded_message) break except BrokerResponseError: n_tries += 1 if self._manager is not None: self._manager.logger.backend.warning( "Could not send message. Try {0}/{1}".format( n_tries, max_tries)) time.sleep(fail_wait_time)
print("Connecting to " + kafka_host + "...") # try connecting to Kafka until successful disconnected = True while (disconnected): try: status = KeyedProducer( KafkaClient(kafka_host) ) disconnected = False except Exception as e: pass disconnected = True print("Waiting for " + kafka_host + " to become available...") time.sleep(3) # send initial status messages try: status.send_messages( TPC_REQUEST.encode("UTF8"), "status","check".encode("UTF8")) status.send_messages( TPC_RESPONSE.encode("UTF8"), "status","check".encode("UTF8")) except Exception as e: pass print("Connected to " + kafka_host + ".") # init logger logging.basicConfig(format='%(asctime)s.%(msecs)s:%(name)s:%(thread)d:%(levelname)s:%(process)d:%(message)s', level=logging.ERROR) # Main loop waits indefinitely for requests # then passes them to processMessage() # while (True): try: # kafka producer
def publish_partition(partition): kafka = KafkaClient(args_broadcast.value.kafka_hosts) producer = KeyedProducer(kafka, partitioner=RoundRobinPartitioner, async=True, batch_send=True) producer.send_messages('popular_users', message_key.value, *[json.dumps(user) for user in partition])
print("Connecting to " + kafka_host + "...") # try connecting to Kafka until successful disconnected = True while (disconnected): try: status = KeyedProducer(KafkaClient(kafka_host)) disconnected = False except Exception as e: pass disconnected = True print("Waiting for " + kafka_host + " to become available...") time.sleep(3) # send initial status messages try: status.send_messages(TPC_REQUEST.encode("UTF8"), "status", "check".encode("UTF8")) status.send_messages(TPC_RESPONSE.encode("UTF8"), "status", "check".encode("UTF8")) except Exception as e: pass print("Connected to " + kafka_host + ".") # init logger logging.basicConfig( format= '%(asctime)s.%(msecs)s:%(name)s:%(thread)d:%(levelname)s:%(process)d:%(message)s', level=logging.ERROR) # Main loop waits indefinitely for requests # then passes them to processMessage()
infinitives = ["to make a pie.", "for no apparent reason.", "because the sky is green.", "for a disease.", "to be able to make toast explode.", "to know more about archeology."] def sing_sen_maker(new_word): s_nouns.append(new_word) return ' '.join( [random.choice(s_nouns), random.choice(s_verbs), random.choice(s_nouns).lower() or random.choice(p_nouns).lower(), random.choice(infinitives)]) def id_generator(size=6, chars=charset): return ''.join(random.choice(chars) for _ in range(size)) if __name__ == "__main__": import json from kafka import KafkaClient, KeyedProducer kafka = KafkaClient('localhost:9092') producer = KeyedProducer(kafka) tw_id = 0 for i in range(10000): tag = random.choice(hashtags) message = sing_sen_maker(tag) tw_id = tw_id + 1 tweet = { 'id': tw_id, 'message': message, 'time': time.ctime() } msg = json.dumps(tweet) producer.send_messages(b'topic1', tag, msg) #print "#{0}:{1}".format(tag, message) time.sleep(random.randint(1,50)*0.0001)
req.add_header('Cookie' , str(xsrf__cookie) +"; "+ str(login__cookie)) httpHandler = urllib2.HTTPHandler() opener = urllib2.build_opener(httpHandler) response = opener.open(req) if response.getcode() == 200: print OKGREEN+"[SUCCESS]"+ENDC transactions = json.loads(response.read()) for transaction in transactions['responseList']: hash_string = str(str(transaction['id'])+str(transaction['responseTime'])+str(transaction['timeConsuming'])) if hash_string not in transaction_list: transaction_list.append(hash_string) print ("\t"+ BOLD +"TRANSACTION_NAME : "+ ENDC + OKBLUE + str(transaction['transactionName']) + ENDC).ljust(70) + (BOLD + " RESPONSE_TIME : "+ ENDC + OKBLUE + str(transaction['responseTime']) + ENDC).ljust(70) + (BOLD + " THROUGHPUT : "+ ENDC + OKBLUE + str(transaction['throughput']) + ENDC).ljust(70) + (BOLD +" TIME_CONSUMING : "+ ENDC + OKBLUE + str(transaction['timeConsuming']) + ENDC) logger.log(METRICS,"\tTRANSACTION_NAME : "+ str(transaction['transactionName'])+ " RESPONSE_TIME : "+str(transaction['responseTime']) +" THROUGHPUT : "+ str(transaction['throughput']) +" TIME_CONSUMING : "+ str(transaction['timeConsuming']) ) producer.send_messages(b'picasso-apppulse',b'METRICS', b"APP_NAME : "+str(application['appName'])+" APP_ID : "+str(application['appId'])+" TRANSACTION_NAME : "+ str(transaction['transactionName'])+ " RESPONSE_TIME : "+str(transaction['responseTime']) +" THROUGHPUT : "+ str(transaction['throughput']) +" TIME_CONSUMING : "+ str(transaction['timeConsuming'])) #-----------------TRACES-------------------- print "GETTING TRANSACTION NAME '%s' TRACES" %(transaction['transactionName']), url = traces_url + str(app_id) +"?TENANTID="+ str(tenantId) +"&from=" + str(before) + "&to=" +str(now)+ "&transactionId=" + str(transaction['id']) + "&timeView=pastThirtyMinutes&orderBy=slowest" req = urllib2.Request(url) req.add_header('X-XSRF-TOKEN', str(xsrf_token)) req.add_header('Cookie' , str(xsrf__cookie) +"; "+ str(login__cookie)) httpHandler = urllib2.HTTPHandler() opener = urllib2.build_opener(httpHandler) response = opener.open(req) if response.getcode() == 200: print OKGREEN+"[SUCCESS]"+ENDC traces = json.loads(response.read())
from kafka import (KafkaClient, KeyedProducer, HashedPartitioner, RoundRobinPartitioner) kafka = KafkaClient("localhost:9092") # HashedPartitioner is default producer = KeyedProducer(kafka) producer.send_messages("filmon-topic2", "key1", "some message KEYED") producer.send_messages("filmon-topic2", "key2", "this methode KEYED XXRR") producer = KeyedProducer(kafka, partitioner=RoundRobinPartitioner)
def __produce_kafka_message(client, topic, key, event): producer = KeyedProducer(client) producer.send_messages(topic, key, event)
from kafka.common import ProduceRequest, FailedPayloadsError from config import KAFKA_URL kafka = KafkaClient(KAFKA_URL) producer = SimpleProducer(kafka) # send basic messages try: producer.send_messages(b'python-sample-topic-1', b'test message 1') producer.send_messages(b'python-sample-topic-1', b'test message 2', b'test 3 additional message') except FailedPayloadsError as e: print 'Simple Producer payload exception...' print e #send keyed message producer2 = KeyedProducer(kafka) try: producer2.send_messages(b'python-sample-topic-1', b'key1', b'test keyed message 1') except FailedPayloadsError as e: print 'Keyed Producer payload exception...' print e # # To send messages asynchronously # producer = SimpleProducer(kafka, async=True) # producer.send_messages(b'my-topic', b'async message') # # To wait for acknowledgements # # ACK_AFTER_LOCAL_WRITE : server will wait till the data is written to # # a local log before sending response # # ACK_AFTER_CLUSTER_COMMIT : server will block until the message is committed # # by all in sync replicas before sending a response # producer = SimpleProducer(kafka, async=False,
def genMessage(msg, key, topic): producer = KeyedProducer(kafka) producer.send_messages(topic, key, msg)