def produce(): p = Producer({'bootstrap.servers': KAFKA_SERVER}) i = 0 while True: p.produce(KAFKA_TOPIC, "message" + str(i)) i += 1
def test_produce_headers_should_fail(): """ Test produce() with timestamp arg """ p = Producer({'socket.timeout.ms': 10, 'error_cb': error_cb, 'message.timeout.ms': 10}) with pytest.raises(NotImplementedError) as e: p.produce('mytopic', value='somedata', key='a key', headers=[('headerkey', 'headervalue')]) assert 'Producer message headers requires confluent-kafka-python built for librdkafka version >=v0.11.4' in str(e)
class KafkaPublisher(object): def __init__(self, connection, asynchronous=True): from confluent_kafka import Producer self.producer = Producer(connection or {}) self.asynchronous = asynchronous def publish(self, channel, value, key=None): self.producer.produce(topic=channel, value=value, key=key) if not self.asynchronous: self.producer.flush()
def publish(datasource="USB"): p = Producer({'bootstrap.servers': 'localhost:9092'}) if datasource=="USB": inputf=Streaming_AbstractGenerator.StreamAbsGen("USBWWAN_stream","USBWWAN") else: inputf=Streaming_AbstractGenerator.StreamAbsGen("KingCobra","KingCobra") for data in inputf: print "publishing data:",data," in Kafka Topic" try: p.produce('neuronraindata', data.encode('utf-8')) except: pass
def test_fatal(): """ Test fatal exceptions """ # Configure an invalid broker and make sure the ALL_BROKERS_DOWN # error is seen in the error callback. p = Producer({'error_cb': error_cb}) with pytest.raises(KafkaException) as exc: KafkaError._test_raise_fatal() err = exc.value.args[0] assert isinstance(err, KafkaError) assert err.fatal() is True p.poll(0) # Need some p use to avoid flake8 unused warning
def test_produce_timestamp(): """ Test produce() with timestamp arg """ p = Producer({'socket.timeout.ms': 10, 'error_cb': error_cb, 'message.timeout.ms': 10}) # Requires librdkafka >=v0.9.4 try: p.produce('mytopic', timestamp=1234567) except NotImplementedError: # Should only fail on non-supporting librdkafka if libversion()[1] >= 0x00090400: raise p.flush()
def test_error_cb(): """ Test the error callback. """ global seen_all_brokers_down # Configure an invalid broker and make sure the ALL_BROKERS_DOWN # error is seen in the error callback. p = Producer({'bootstrap.servers': '127.0.0.1:1', 'socket.timeout.ms': 10, 'error_cb': error_cb}) t_end = time.time() + 5 while not seen_all_brokers_down and time.time() < t_end: p.poll(1) assert seen_all_brokers_down
class KafkaWorkflowCommunicationSender(object): _requires = ['confluent-kafka'] def __init__(self, message_converter=ProtobufWorkflowCommunicationConverter): kafka_config = walkoff.config.Config.WORKFLOW_COMMUNICATION_KAFKA_CONFIG self.producer = Producer(kafka_config) self.topic = walkoff.config.Config.WORKFLOW_COMMUNICATION_KAFKA_TOPIC self.message_converter = message_converter def shutdown(self): self.producer.flush() @staticmethod def _delivery_callback(err, msg): if err is not None: logger.error('Kafka message delivery failed: {}'.format(err)) def pause_workflow(self, workflow_execution_id): """Pauses a workflow currently executing. Args: workflow_execution_id (UUID): The execution ID of the workflow. """ logger.info('Pausing workflow {0}'.format(workflow_execution_id)) message = self.message_converter.create_workflow_pause_message(workflow_execution_id) self._send_workflow_communication_message(message, workflow_execution_id) def abort_workflow(self, workflow_execution_id): """Aborts a workflow currently executing. Args: workflow_execution_id (UUID): The execution ID of the workflow. """ logger.info('Aborting running workflow {0}'.format(workflow_execution_id)) message = self.message_converter.create_workflow_abort_message(workflow_execution_id) self._send_workflow_communication_message(message, workflow_execution_id) def send_exit_to_workers(self): """Sends the exit message over the communication sockets, otherwise worker receiver threads will hang""" message = self.message_converter.create_worker_exit_message() self._send_workflow_communication_message(message, None) def _send_workflow_communication_message(self, message, workflow_id): self._send_message(message, self.topic, workflow_id) def _send_message(self, message, topic, key): self.producer.produce(topic, message, key=key, callback=self._delivery_callback)
def open(self): """ Open a connection to the Kafka service. Should return False if initialization fails. """ LOG.info("Opening connection to the remote Kafka services at %s" % self.hosts) self._kafka_producer = Producer(**self._conf) return True
def __init__(self, execution_db, message_converter=ProtobufWorkflowResultsConverter, socket_id=None): self._ready = False self.id_ = socket_id kafka_config = walkoff.config.Config.WORKFLOW_RESULTS_KAFKA_CONFIG self.producer = Producer(kafka_config) self.execution_db = execution_db self.topic = walkoff.config.Config.WORKFLOW_RESULTS_KAFKA_TOPIC self.message_converter = message_converter if self.check_status(): self._ready = True
def connect(self): self.consumer = Consumer({ 'bootstrap.servers': self.bootstrap_servers, 'group.id': self.group_id, 'default.topic.config': { 'auto.offset.reset': 'smallest' } }) print("subscribing to %s" % self.consumer_topic) self.consumer.subscribe([ self.consumer_topic ]) print("Subscribed to topic %s " % self.consumer_topic) self.producer = Producer({ 'bootstrap.servers': self.bootstrap_servers, 'group.id': self.group_id })
def producer(args, sniff_timeout_ms=500, sniff_promisc=True): """ Captures packets from a network interface and sends them to a Kafka topic. """ # setup the signal handler signal.signal(signal.SIGINT, signal_handler) global producer_args producer_args = args # connect to kafka logging.info("Connecting to Kafka; %s", args.kafka_configs) kafka_producer = Producer(args.kafka_configs) # initialize packet capture logging.info("Starting packet capture") capture = pcapy.open_live(args.interface, args.snaplen, sniff_promisc, sniff_timeout_ms) pkts_in = 0 try: while not finished.is_set() and (args.max_packets <= 0 or pkts_in < args.max_packets): # capture a packet (pkt_hdr, pkt_raw) = capture.next() if pkt_hdr is not None: logging.debug("Packet received: pkts_in=%d, pkt_len=%s", pkts_in, pkt_hdr.getlen()) pkts_in += 1 pkt_ts = timestamp(pkt_hdr) kafka_producer.produce(args.kafka_topic, key=pack_ts(pkt_ts), value=pkt_raw, callback=delivery_callback) # pretty print, if needed if args.pretty_print > 0 and pkts_in % args.pretty_print == 0: print 'Packet received[%s]' % (pkts_in) # serve the callback queue kafka_producer.poll(0) finally: # flush all messages logging.info("Waiting for '%d' message(s) to flush", len(kafka_producer)) kafka_producer.flush() # pkts_out may not be initialized if the callback was never executed pkts_out = 0 if hasattr(delivery_callback, "pkts_out"): pkts_out = delivery_callback.pkts_out logging.info("'%d' packet(s) in, '%d' packet(s) out", pkts_in, pkts_out)
def test_dr_msg_errstr(): """ Test that the error string for failed messages works (issue #129). The underlying problem is that librdkafka reuses the message payload for error value on Consumer messages, but on Producer messages the payload is the original payload and no rich error string exists. """ p = Producer({"message.timeout.ms": 10}) def handle_dr(err, msg): # Neither message payloads must not affect the error string. assert err is not None assert err.code() == KafkaError._MSG_TIMED_OUT assert "Message timed out" in err.str() # Unicode safe string p.produce('mytopic', "This is the message payload", on_delivery=handle_dr) # Invalid unicode sequence p.produce('mytopic', "\xc2\xc2", on_delivery=handle_dr) p.flush()
class Publisher(): def __init__(self, config={'bootstrap.servers': 'pulsing.jhk.org:9092', 'retries': 3, 'api.version.request': True}): super().__init__() self.__producer = Producer(config) self.logger = logging.getLogger(__name__) def publish(self, topic, data): self.logger.debug('publish %s - %s', topic, data) self.__producer.produce(topic, data.encode('utf-8')) self.__producer.flush() @property def producer(self): return self.__producer def __eq__(self, other): return self.__producer == other.__producer def __str__(self): return self.__producer.__str__() def __hash__(self): return self.__producer.__hash__()
def __init__(self, message_converter=ProtobufWorkflowCommunicationConverter): kafka_config = walkoff.config.Config.WORKFLOW_COMMUNICATION_KAFKA_CONFIG self.producer = Producer(kafka_config) self.topic = walkoff.config.Config.WORKFLOW_COMMUNICATION_KAFKA_TOPIC self.message_converter = message_converter
def __init__( self, storage_name: str, raw_topic: Optional[str], replacements_topic: Optional[str], max_batch_size: int, max_batch_time_ms: int, bootstrap_servers: Sequence[str], group_id: str, commit_log_topic: Optional[str], auto_offset_reset: str, queued_max_messages_kbytes: int, queued_min_messages: int, rapidjson_deserialize: bool, rapidjson_serialize: bool, commit_retry_policy: Optional[RetryPolicy] = None, ) -> None: self.storage = get_writable_storage(storage_name) self.bootstrap_servers = bootstrap_servers stream_loader = self.storage.get_table_writer().get_stream_loader() self.raw_topic: Topic if raw_topic is not None: self.raw_topic = Topic(raw_topic) else: self.raw_topic = Topic(stream_loader.get_default_topic_spec().topic_name) self.replacements_topic: Optional[Topic] if replacements_topic is not None: self.replacements_topic = Topic(replacements_topic) else: replacement_topic_spec = stream_loader.get_replacement_topic_spec() if replacement_topic_spec is not None: self.replacements_topic = Topic(replacement_topic_spec.topic_name) else: self.replacements_topic = None self.commit_log_topic: Optional[Topic] if commit_log_topic is not None: self.commit_log_topic = Topic(commit_log_topic) else: commit_log_topic_spec = stream_loader.get_commit_log_topic_spec() if commit_log_topic_spec is not None: self.commit_log_topic = Topic(commit_log_topic_spec.topic_name) else: self.commit_log_topic = None # XXX: This can result in a producer being built in cases where it's # not actually required. self.producer = Producer( { "bootstrap.servers": ",".join(self.bootstrap_servers), "partitioner": "consistent", "message.max.bytes": 50000000, # 50MB, default is 1MB } ) self.metrics = MetricsWrapper( environment.metrics, "consumer", tags={"group": group_id, "storage": storage_name}, ) self.max_batch_size = max_batch_size self.max_batch_time_ms = max_batch_time_ms self.group_id = group_id self.auto_offset_reset = auto_offset_reset self.queued_max_messages_kbytes = queued_max_messages_kbytes self.queued_min_messages = queued_min_messages if commit_retry_policy is None: commit_retry_policy = BasicRetryPolicy( 3, constant_delay(1), lambda e: isinstance(e, KafkaException) and e.args[0].code() in ( KafkaError.REQUEST_TIMED_OUT, KafkaError.NOT_COORDINATOR_FOR_GROUP, KafkaError._WAIT_COORD, ), ) self.__commit_retry_policy = commit_retry_policy self.__rapidjson_deserialize = rapidjson_deserialize self.__rapidjson_serialize = rapidjson_serialize
def __init__(self, config={'bootstrap.servers': 'pulsing.jhk.org:9092', 'retries': 3, 'api.version.request': True}): super().__init__() self.__producer = Producer(config) self.logger = logging.getLogger(__name__)
# -*- coding: utf-8 -*- from confluent_kafka import Producer p = Producer({ 'bootstrap.servers': '127.0.0.1:9092', }) def delivery_report(err, msg): """ Called once for each message produced to indicate delivery result. Triggered by poll() or flush(). """ if err is not None: print('Message delivery failed: {}'.format(err)) else: print('Message delivered to {} [{}]'.format(msg.topic(), msg.partition())) some_data_source = ['124', '245'] for data in some_data_source: # Trigger any available delivery report callbacks from previous produce() calls p.poll(0) # Asynchronously produce a message, the delivery report callback # will be triggered from poll() above, or flush() below, when the message has # been successfully delivered or failed permanently. p.produce('mytopic', data.encode('utf-8'), callback=delivery_report) # Wait for any outstanding messages to be delivered and delivery report # callbacks to be triggered. p.flush()
class KafkaConnector(object): """Simple wrapper class to configure a simple kafka consumer and producer pair, so that they can be used to perform simple filter() and map() operations over the received tweets""" def __init__( self, group_id=None, consumer_topic='consumer_limbo', producer_topic='consumer_limbo', logging_topic='minteressa_stats', bootstrap_servers='kafka:9092' ): self.group_id = group_id self.bootstrap_servers = bootstrap_servers self.consumer_topic = consumer_topic self.producer_topic = producer_topic self.logging_topic = logging_topic self.consumer = None self.producer = None def listen(self): while True: msg = self.consumer.poll() if msg is None: continue if msg.error(): # Error or event if msg.error().code() == KafkaError._PARTITION_EOF: # End of partition event sys.stderr.write( '%% %s [%d] reached end at offset %d\n' % ( msg.topic(), msg.partition(), msg.offset() ) ) elif msg.error(): # Error raise KafkaException(msg.error()) else: # Proper message sys.stdout.write( '%s [partition-%d] at offset %d with key %s:\n' % ( msg.topic(), msg.partition(), msg.offset(), str(msg.key()) ) ) yield msg def connect(self): self.consumer = Consumer({ 'bootstrap.servers': self.bootstrap_servers, 'group.id': self.group_id, 'default.topic.config': { 'auto.offset.reset': 'smallest' } }) print("subscribing to %s" % self.consumer_topic) self.consumer.subscribe([ self.consumer_topic ]) print("Subscribed to topic %s " % self.consumer_topic) self.producer = Producer({ 'bootstrap.servers': self.bootstrap_servers, 'group.id': self.group_id }) def send(self, message, producer_topic=None): producer_topic = producer_topic \ if producer_topic is not None \ else self.producer_topic self.producer.produce( producer_topic, message ) # self.producer.flush() def log(self, message, logging_topic=None): logging_topic = logging_topic \ if logging_topic is not None \ else self.logging_topic self.producer.produce(logging_topic, message) self.producer.flush() def close(self): self.consumer.close() self.producer.close()
def _connect(self) -> Producer: return Producer(self._config)
def test_basic_api(): """ Basic API tests, these wont really do anything since there is no broker configured. """ try: p = Producer() except TypeError as e: assert str(e) == "expected configuration dict" p = Producer({'socket.timeout.ms': 10, 'error_cb': error_cb, 'message.timeout.ms': 10}) p.produce('mytopic') p.produce('mytopic', value='somedata', key='a key') def on_delivery(err, msg): print('delivery', str) # Since there is no broker, produced messages should time out. assert err.code() == KafkaError._MSG_TIMED_OUT p.produce(topic='another_topic', value='testing', partition=9, callback=on_delivery) p.poll(0.001) p.flush(0.002) p.flush() try: p.list_topics(timeout=0.2) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._TRANSPORT)
class KafkaProducer(object): def __init__(self, test_number, producer_id, broker_manager, acks_mode, in_flight_limit, print_mod): self._stopping = False self.broker_manager = broker_manager self.producer_id = f"PRODUCER(Test:{test_number} Id:{producer_id})" self.producer = None self.acks_mode = acks_mode self.message_type = None self.terminate = False self.key_count = 1 self.in_flight_limit = in_flight_limit self.print_mod = print_mod # message tracking self.curr_pos = 0 self.pos_acks = 0 self.neg_acks = 0 self.key_index = 0 self.keys = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'] self.val = 1 self.msg_set = set() self.pending_ack = set() def create_producer(self): self.producer = Producer({ 'bootstrap.servers': self.broker_manager.get_bootstrap_servers(), 'message.send.max.retries': 0, #'batch.num.messages': 1000, #'stats_cb': my_stats_callback, #'statistics.interval.ms': 100, 'default.topic.config': { 'request.required.acks': self.acks_mode } }) def get_actor(self): return self.producer_id def delivery_report(self, err, msg): if self.terminate: return val = msg.value().decode("utf-8") self.pending_ack.remove(val) if err: self.neg_acks += 1 #console_out(err, self.get_actor()) else: self.pos_acks += 1 self.msg_set.add(val) if (self.neg_acks + self.pos_acks) % self.print_mod == 0: console_out( f"Pos acks: {self.pos_acks} Neg acks: {self.neg_acks}", self.get_actor()) def configure_as_sequence(self, sequence_count): self.key_count = sequence_count self.message_type = "sequence" def configure_as_partitioned_sequence(self, sequence_count): self.key_count = sequence_count self.message_type = "partitioned-sequence" def start_producing(self, topic, msg_count): for msg_index in range(0, msg_count): if self.terminate: break self.producer.poll(0) body = None if self.message_type == "partitioned-sequence": topic = f"{topic}-{self.keys[self.key_index]}" body = f"{self.keys[self.key_index]}={self.val}" elif self.message_type == "sequence": body = f"{self.keys[self.key_index]}={self.val}" else: body = uuid.uuid4() while len(self.pending_ack) > self.in_flight_limit: time.sleep(0.2) self.producer.poll(0) self.producer.produce(topic, value=body.encode('utf-8'), key=self.keys[self.key_index], callback=self.delivery_report) self.pending_ack.add(body) self.curr_pos += 1 self.key_index += 1 if self.key_index == self.key_count: self.key_index = 0 self.val += 1 def print_final_count(self): console_out( f"Final Count => Sent: {self.curr_pos} Pos acks: {self.pos_acks} Neg acks: {self.neg_acks} Undeliverable: {self.undeliverable} No Acks: {self.no_acks}", self.get_actor()) def get_msg_set(self): return self.msg_set def stop_producing(self): self.terminate = True self.print_final_count()
from confluent_kafka import Producer p = Producer({'streams.producer.default.stream': '/demo-stream'}) some_data_source = ["msg1", "msg2", "msg3"] for data in some_data_source: p.produce('topic1', data.encode('utf-8')) p.flush()
print("function= ", func) result = eval(func) print(result) print() return result #main (creating connection to clients and sending recieving required results) if __name__ == "__main__": c = Consumer({ 'bootstrap.servers': sys.argv[1], 'group.id': '1', 'auto.offset.reset': 'earliest' }) c.subscribe(['server']) p = Producer({'bootstrap.servers': sys.argv[1]}) while True: msg = c.poll(1.0) if msg is None: continue if msg.error(): print("Consumer error: {}".format(msg.error())) continue # decode the message consumed notification = msg.value().decode('utf-8') params = notification.split('$') data = params[0] topic = params[1]
def __init__(self, server_addr: str, topic: str): self.producer = Producer({'bootstrap.servers': server_addr}) self.topic = topic
async def produce(topic_name): """Produces data into the Kafka Topic""" p = Producer({"bootstrap.servers": BROKER_URL}) while True: p.produce(topic_name, ClickEvent().serialize()) await asyncio.sleep(1.0)
class Zeus: ''' Class to hold methods and variables for Inference. ''' def __init__(self): ''' Method called when object of class is created. ''' # Initialize Result Topic Consumer self.frames_consumer = Consumer({ 'bootstrap.servers': 'localhost:9092', 'group.id': 'cameras', 'auto.offset.reset': 'earliest' }) self.frames_consumer.subscribe([frames_topic]) # Initialize Frames Topic Producer self.inference_producer = Producer({ 'bootstrap.servers': 'localhost:9092', 'message.max.bytes': '10000000' }) # Get options self.opt = opts().init() self.opt.debug = max(self.opt.debug, 1) # Instantiate the Model self.detector = CtdetDetector(self.opt) def delivery_report(self, err, msg): ''' Called once for each message produced to indicate delivery result. Triggered by poll() or flush(). ''' if err is not None: print('Message delivery failed: {}'.format(err)) else: pass def infer(self): ''' Method to share inferred knowledge ''' print('Ready for Inference!') start = 0 while True: start = time.time() self.inference_producer.poll(0) data = self.frames_consumer.poll() if data is None: time.sleep(0.01) continue if data.error(): print("Consumer error: {}".format(data.error())) continue data = pickle.loads(data.value()) # Parse the Data batch = data['Batch'] # Batch of Images batch_len = data['BatchLength'] # Length of Batch height = data['ImageHeight'] # Height of Image width = data['ImageWidth'] # Width of Image channels = data['Channels'] # Color Channels # Get batch in NumPy ndarray # batch = np.fromstring(batch, np.uint8).reshape((batch_len, height, width, channels)) # Perform Inference results = self.detector.run(batch) results = results['results'] # Cleanse the result results_scrubbed = list() for result in results.keys(): classes = list() bbox = list() confidence = list() for cat in range(1, num_classes + 1): for val in results[result][cat]: conf = val[4] if not conf > thresh_conf: continue x1 = val[0] y1 = val[1] x2 = val[2] y2 = val[3] classes.append(cat) confidence.append(conf) bbox.append([x1, y1, x2, y2]) results_scrubbed.append([classes, confidence, bbox]) data = dict() data.update({'Batch': batch}) data.update({'BatchLength': batch_len}) data.update({'ImageHeight': height}) data.update({'ImageWidth': width}) data.update({'Channels': channels}) data.update({'Results': results_scrubbed}) self.inference_producer.produce(inference_topic, pickle.dumps(data), callback=self.delivery_report) self.inference_producer.flush() print(time.time() - start, end='\r') self.frames_consumer.close()
sys.stderr.write( f'%% Message delivered to topic:[{msg.topic()}]-partition:[{msg.partition():d}] @ offset[{msg.offset()}]\n' ) # 主程式進入點 if __name__ == '__main__': # 步驟1. 設定要連線到Kafka集群的相關設定 props = { # Kafka集群在那裡? 'bootstrap.servers': 'localhost:9092', # <-- 置換成要連接的Kafka集群 'error_cb': error_cb # 設定接收error訊息的callback函數 } # 步驟2. 產生一個Kafka的Producer的實例 producer = Producer(**props) # 步驟3. 指定想要發佈訊息的topic名稱 topicName = 'ak03.asyncsending' msgCount = 100000 # 10萬筆 try: print('Start sending messages ...') time_start = int(round(time.time() * 1000)) # produce(topic, [value], [key], [partition], [on_delivery], [timestamp], [headers]) ''' // ** 示範: Asynchronous Send ** // 透過一個Callback函式我們可以非同步地取得由Broker回覆訊息發佈的ack結果 // 這種方法可以取得Broker回覆訊息發佈的ack結果, 同時又可以取得好的throughput (建議的作法) '''
def __init__(self, connection, asynchronous=True): from confluent_kafka import Producer self.producer = Producer(connection or {}) self.asynchronous = asynchronous
import sys if __name__ == '__main__': if len(sys.argv) != 3: sys.stderr.write('Usage: %s <bootstrap-brokers> <topic>\n' % sys.argv[0]) sys.exit(1) broker = sys.argv[1] topic = sys.argv[2] # Producer configuration # See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md conf = {'bootstrap.servers': broker} # Create Producer instance p = Producer(**conf) # Optional per-message delivery callback (triggered by poll() or flush()) # when a message has been successfully delivered or permanently # failed delivery (after retries). def delivery_callback(err, msg): if err: sys.stderr.write('%% Message failed delivery: %s\n' % err) else: sys.stderr.write('%% Message delivered to %s [%d] @ %d\n' % (msg.topic(), msg.partition(), msg.offset())) # Read lines from stdin, produce each line to Kafka for line in sys.stdin: try: # Produce line (without newline)
def __init__( self, storage_key: StorageKey, raw_topic: Optional[str], replacements_topic: Optional[str], max_batch_size: int, max_batch_time_ms: int, bootstrap_servers: Sequence[str], group_id: str, commit_log_topic: Optional[str], auto_offset_reset: str, queued_max_messages_kbytes: int, queued_min_messages: int, strategy_factory_type: StrategyFactoryType, processes: Optional[int], input_block_size: Optional[int], output_block_size: Optional[int], commit_retry_policy: Optional[RetryPolicy] = None, profile_path: Optional[str] = None, ) -> None: self.storage = get_writable_storage(storage_key) self.bootstrap_servers = bootstrap_servers stream_loader = self.storage.get_table_writer().get_stream_loader() self.raw_topic: Topic if raw_topic is not None: self.raw_topic = Topic(raw_topic) else: self.raw_topic = Topic(stream_loader.get_default_topic_spec().topic_name) self.replacements_topic: Optional[Topic] if replacements_topic is not None: self.replacements_topic = Topic(replacements_topic) else: replacement_topic_spec = stream_loader.get_replacement_topic_spec() if replacement_topic_spec is not None: self.replacements_topic = Topic(replacement_topic_spec.topic_name) else: self.replacements_topic = None self.commit_log_topic: Optional[Topic] if commit_log_topic is not None: self.commit_log_topic = Topic(commit_log_topic) else: commit_log_topic_spec = stream_loader.get_commit_log_topic_spec() if commit_log_topic_spec is not None: self.commit_log_topic = Topic(commit_log_topic_spec.topic_name) else: self.commit_log_topic = None # XXX: This can result in a producer being built in cases where it's # not actually required. self.producer = Producer( { "bootstrap.servers": ",".join(self.bootstrap_servers), "partitioner": "consistent", "message.max.bytes": 50000000, # 50MB, default is 1MB } ) self.metrics = MetricsWrapper( environment.metrics, "consumer", tags={"group": group_id, "storage": storage_key.value}, ) self.max_batch_size = max_batch_size self.max_batch_time_ms = max_batch_time_ms self.group_id = group_id self.auto_offset_reset = auto_offset_reset self.queued_max_messages_kbytes = queued_max_messages_kbytes self.queued_min_messages = queued_min_messages self.strategy_factory_type = strategy_factory_type self.processes = processes self.input_block_size = input_block_size self.output_block_size = output_block_size self.__profile_path = profile_path if ( self.processes is not None and self.strategy_factory_type is not StrategyFactoryType.STREAMING ): raise ValueError( "process count can only be specified when using streaming strategy" ) if commit_retry_policy is None: commit_retry_policy = BasicRetryPolicy( 3, constant_delay(1), lambda e: isinstance(e, KafkaException) and e.args[0].code() in ( KafkaError.REQUEST_TIMED_OUT, KafkaError.NOT_COORDINATOR_FOR_GROUP, KafkaError._WAIT_COORD, ), ) self.__commit_retry_policy = commit_retry_policy
from confluent_kafka import Producer p = Producer({'bootstrap.servers': 'localhost'}) def delivery_report(err, msg): """ Called once for each message produced to indicate delivery result. Triggered by poll() or flush(). """ if err is not None: print('Message delivery failed: {}'.format(err)) else: print('Message delivered to {} [{}]'.format(msg.topic(), msg.partition())) for x in range(10): p.produce('test.foo', 'das ist nur ein Test: %d' % x, callback=delivery_report) p.flush()
from confluent_kafka import Producer import avro.schema import avro.io import io import random if __name__ == "__main__": conf = {'bootstrap.servers': 'localhost:9092'} producer = Producer(**conf) # Kafka topic topic = "topic-example-200" # Path to profile.avsc avro schema schema_path = "profile.avsc" schema = avro.schema.Parse(open(schema_path).read()) for i in range(10): writer = avro.io.DatumWriter(schema) bytes_writer = io.BytesIO() encoder = avro.io.BinaryEncoder(bytes_writer) writer.write( { "name": "tom", "number1": random.randint(0, 10), "number2": 2.0 }, encoder) raw_bytes = bytes_writer.getvalue() producer.produce(topic, raw_bytes) producer.flush()
import pyodbc from confluent_kafka import Producer print ('---login--- ') conn = pyodbc.connect("DRIVER={ODBC Driver 17 for SQL Server};SERVER=DevSQL01;DATABASE=HackStream;UID=greatscott;PWD=H4ppyFunB4ll;") cursor = conn.cursor() cursor.execute('SELECT MAX(ActionId) FROM HackStream.dbo.Funnel') for row in cursor: print (row) print ('--end--') p = Producer({'bootstrap.servers': '172.16.43.33:9092'}) p.produce('Funnel', key='hello', value='world') p.flush(10)
from confluent_kafka import Producer some_data_source = ['dddd','ffff','gggg'] p = Producer({'bootstrap.servers':'172.17.0.4'}) for data in some_data_source: p.produce('ragnatopic', data.encode('utf-8')) p.flush()
img_path = "data/1mb.jpg" # img_path = "../data/10mb.jpg" image = open(img_path, 'rb') # open binary file in read mode image_read = image.read() data = base64.b64encode(image_read) print("size 1: ", len(image_read)) print("size 2: ", len(data)) # broker = "freepsw-template-centos-4cpu-1:9092" broker = "kafka-test:9092" topic = "latency-test" # See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md p = Producer({ 'bootstrap.servers': broker, 'acks': 1, # 'socket.nagle.disable': True, 'message.max.bytes': 15000000 }) conf = { 'bootstrap.servers': broker, 'group.id': 'my-group', 'auto.offset.reset': 'latest', # 'socket.nagle.disable': True, # 'fetch.wait.max.ms': 0, # 'heartbeat.interval.ms': '1000', 'message.max.bytes': 15000000 } logger = logging.getLogger('consumer') logger.setLevel(logging.DEBUG) handler = logging.StreamHandler()
import json admin = AdminClient({'bootstrap.servers': 'kafka-1:9092'}) metadata = admin.list_topics(timeout=10) topics = metadata.topics if 'citibike.station.update.1' not in topics: admin.create_topics([ NewTopic('citibike.station.update.1', num_partitions=3, replication_factor=1) ]) p = Producer({ 'bootstrap.servers': 'kafka-1:9092', 'api.version.request': True }) @retry(wait_exponential_multiplier=1000, wait_exponential_max=10000) def scrape_station_status(): r = requests.get( 'https://gbfs.citibikenyc.com/gbfs/en/station_status.json') if r.status_code != 200: raise IOError('Failed request.') else: print('scraping data') return ujson.loads(r.content.decode('utf-8')) scrape_station_status()
# Reads S3 image keys from a text file and produces them to a Kafka topic. from confluent_kafka import Producer from time import sleep if __name__ == "__main__": S3_KEYS_PATH = 's3_keys_test.txt' KAFKA_SERVER = 'localhost:9092' OUTPUT_TOPIC = 'image-s3-keys' producer = Producer({"bootstrap.servers": KAFKA_SERVER}) for i, s3_key in enumerate(map(str.strip, open(S3_KEYS_PATH))): producer.produce(OUTPUT_TOPIC, key=s3_key, value=s3_key) print('%d, key %s' % (i, s3_key)) sleep(0.0001) producer.flush()
class KafkaDestination(object): """ syslog-ng Apache Kafka destination. """ _kafka_producer = None _conf = dict() def __init__(self): self.hosts = None self.topic = None self.msg_key = None self.partition = None self.programs = None self.group_id = None self.broker_version = None self.verbose = False self.display_stats = False self.producer_config = None def init(self, args): """ This method is called at initialization time. Should return False if initialization fails. """ if 'producer_config' in args: try: self.producer_config = ast.literal_eval(args['producer_config']) self._conf.update(self.producer_config) except ValueError: LOG.error("Given config %s is not in a Python dict format." % args['producer_config']) try: self.hosts = args['hosts'] self.topic = args['topic'] self._conf['bootstrap.servers'] = self.hosts except KeyError: LOG.error("Missing `hosts` or `topic` option...") return False if 'msg_key' in args: self.msg_key = args['msg_key'] LOG.info("Message key used will be %s" % self.msg_key) if 'partition' in args: self.partition = args['partition'] LOG.info("Partition to produce to %s" % self.partition) # optional `programs` parameter to filter out messages if 'programs' in args: self.programs = parse_str_list(args['programs']) LOG.info("Programs to filter against %s" % self.programs) if 'group_id' in args: self.group_id = args['group_id'] self._conf['group.id'] = self.group_id LOG.info("Broker group_id=%s" % self.group_id) if 'broker_version' in args: self.broker_version = args['broker_version'] if '.'.join(self.broker_version.split('.')[:2]) in ('0.10', '0.11'): self._conf['api.version.request'] = True else: self._conf['broker.version.fallback'] = self.broker_version self._conf['api.version.request'] = False LOG.info("Broker version=%s" % self.broker_version) else: self.broker_version = DEFAULT_BROKER_VERSION_FALLBACK self._conf[ 'broker.version.fallback'] = DEFAULT_BROKER_VERSION_FALLBACK self._conf['api.version.request'] = False LOG.warn("Default broker version fallback %s " "will be applied here." % DEFAULT_BROKER_VERSION_FALLBACK) self._conf['on_delivery'] = delivery_callback if 'verbose' in args: # provide a global `on_delivery` callback in the `Producer()` config # dict better for memory consumptions vs per message callback. self.verbose = ast.literal_eval(args['verbose']) if not self.verbose: # only interested in delivery failures here. We do provide a # global on_delivery callback in the Producer() config dict and # also set delivery.report.only.error. self._conf['delivery.report.only.error'] = True LOG.info("Verbose mode is OFF: you will not be able to see " "messages in here. Failures only. Use 'verbose=('True')' " "in your destination options to see successfully " "processed messages in your logs.") # display broker stats? if 'display_stats' in args: self.display_stats = ast.literal_eval(args['display_stats']) if self.display_stats: self._conf['stats_cb'] = stats_callback LOG.info("Broker statistics will be displayed.") LOG.info( "Initialization of Kafka Python driver w/ args=%s" % self._conf) return True def open(self): """ Open a connection to the Kafka service. Should return False if initialization fails. """ LOG.info("Opening connection to the remote Kafka services at %s" % self.hosts) self._kafka_producer = Producer(**self._conf) return True def is_opened(self): """ Check if the connection to Kafka is able to receive messages. Should return False if target is not open. """ return self._kafka_producer is not None def close(self): """ Close the connection to the Kafka service. """ LOG.debug("KafkaDestination.close()....") if self._kafka_producer is not None: LOG.debug("Flushing producer w/ a timeout of 30 seconds...") self._kafka_producer.flush(30) return True # noinspection PyMethodMayBeStatic def deinit(self): """ This method is called at deinitialization time. """ LOG.debug("KafkaDestination.deinit()....") if self._kafka_producer: self._kafka_producer = None return True def send(self, ro_msg): """ Send a message to the target service It should return True to indicate success, False will suspend the destination for a period specified by the time-reopen() option. :return: True or False """ # do nothing if msg is empty if not ro_msg: return True # no syslog-ng `values-pair` here we dealing with `LogMessage` if type(ro_msg) != dict: # syslog-ng `LogMessage` is read-only # goal is rfc5424 we cannot use values-pair because of memory leaks try: msg = {'FACILITY': ro_msg.FACILITY, 'PRIORITY': ro_msg.PRIORITY, 'HOST': ro_msg.HOST, 'PROGRAM': ro_msg.PROGRAM, 'DATE': ro_msg.DATE, 'MESSAGE': ro_msg.MESSAGE} except AttributeError: LOG.error("Your version of syslog-ng is not supported. " "Please use syslog-ng 3.7.x") return False else: LOG.warn("You are using `values-pair` if you are using " "syslog-ng <= 3.11 it is known to be leaking...") msg = ro_msg try: # check if we do have a program filter defined. msg_program = msg['PROGRAM'] if self.programs is not None: if msg_program not in self.programs: # notify of success return True if msg_program == 'firewall': firewall_msg = msg['MESSAGE'] msg['MESSAGE'] = parse_firewall_msg(firewall_msg) elif msg_program == 'nat': nat_msg = msg['MESSAGE'] msg['MESSAGE'] = parse_nat_msg(nat_msg) # convert date string to UNIX timestamp msg_date = msg['DATE'] if msg_date is not None: msg['DATE'] = date_str_to_timestamp(msg_date) msg_string = str(msg) kwargs = {} if self.msg_key and self.msg_key in msg.keys(): kwargs['key'] = msg[self.msg_key] if self.partition: try: kwargs['partition'] = int(self.partition) except ValueError: LOG.warning( "Ignore partition=%s because it is not an int." % self.partition) self._kafka_producer.produce(self.topic, msg_string, **kwargs) # `poll()` doesn't do any sleeping at all if you give it 0, all # it does is grab a mutex, check a queue, and release the mutex. # It is okay to call poll(0) after each produce call, the # performance impact is negligible, if any. self._kafka_producer.poll(0) except BufferError: LOG.error("Producer queue is full. This message will be discarded. " "%d messages waiting to be delivered.", len(self._kafka_producer)) # do not return False here as the destination would be closed # and we would have to restart syslog-ng sleep(5) return True except (KafkaException, UnicodeEncodeError) as e: LOG.error("An error occurred while trying to send messages... " "See details: %s" % e, exc_info=True) sleep(5) # do not return False here as the destination would be closed # and we would have to restart syslog-ng return True return True
class KafkaWorkflowResultsSender(object): def __init__(self, execution_db, message_converter=ProtobufWorkflowResultsConverter, socket_id=None): self._ready = False self.id_ = socket_id kafka_config = walkoff.config.Config.WORKFLOW_RESULTS_KAFKA_CONFIG self.producer = Producer(kafka_config) self.execution_db = execution_db self.topic = walkoff.config.Config.WORKFLOW_RESULTS_KAFKA_TOPIC self.message_converter = message_converter if self.check_status(): self._ready = True def shutdown(self): self.producer.flush() @staticmethod def _delivery_callback(err, msg): if err is not None: logger.error('Kafka message delivery failed: {}'.format(err)) def _format_topic(self, event): return '{}.{}'.format(self.topic, event.name) def handle_event(self, workflow, sender, **kwargs): """Listens for the data_sent callback, which signifies that an execution element needs to trigger a callback in the main thread. Args: workflow (Workflow): The Workflow object that triggered the event sender (ExecutionElement): The execution element that sent the signal. kwargs (dict): Any extra data to send. """ event = kwargs['event'] if event in [WalkoffEvent.TriggerActionAwaitingData, WalkoffEvent.WorkflowPaused]: saved_workflow = SavedWorkflow.from_workflow(workflow) self.execution_db.session.add(saved_workflow) self.execution_db.session.commit() elif event == WalkoffEvent.ConsoleLog: action = workflow.get_executing_action() sender = action if self.id_: packet_bytes = self.message_converter.event_to_protobuf(sender, workflow, **kwargs) self.producer.produce(self._format_topic(event), packet_bytes, callback=self._delivery_callback) else: event.send(sender, data=kwargs.get('data', None)) def is_ready(self): return self._ready def check_status(self): if self.producer is not None: return True return False def send_ready_message(self): WalkoffEvent.CommonWorkflowSignal.send(sender={'id': '1'}, event=WalkoffEvent.WorkerReady) def create_workflow_request_message(self, workflow_id, workflow_execution_id, start=None, start_arguments=None, resume=False, environment_variables=None, user=None): return self.message_converter.create_workflow_request_message(workflow_id, workflow_execution_id, start, start_arguments, resume, environment_variables, user)
them into python code""" with gzip.open(filename, "r") as tweetfile: for row in tweetfile: yield json.loads(row) def delivery_callback (self, err, msg): if err: print('%% Message failed delivery: %s\n' % err) else: print('%% Message delivered to %s [%d]\n' % \ (msg.topic(), msg.partition())) if __name__ == '__main__': kfk = Producer({ 'bootstrap.servers': "kafka:2181", 'group.id': "json_producer" }) time.sleep(10) def delivery_callback (err, msg): if err: sys.stderr.write('%% Message failed delivery: %s\n' % err) else: sys.stderr.write('%% Message delivered to %s [%d]\n' % \ (msg.topic(), msg.partition())) for tweet in get_tweet('examples/tweets-200k.txt.gz'): # if len(tweet['entities']['urls']) > 0 and \ # any(tweet['lang'] in l for l in ['es', 'en']): try: print("%s: %s" % (tweet['user']['screen_name'], tweet['text']))
class ProducerServer: """ Setup Basic Kafka Servers """ # Tracks existing topics across all Producer instances existing_topics = set([]) def __init__(self, conf): self.conf = conf self.producer = Producer({ "bootstrap.servers": conf.get("producer", "bootstrap.servers") }) self.client = AdminClient({ "bootstrap.servers": conf.get("producer", "bootstrap.servers") }) if conf.get("producer", "topic_name") not in ProducerServer.existing_topics: self.create_topic() ProducerServer.existing_topics.add(conf.get("producer", "topic_name")) def topic_exists(self, topic_name): """Checks if the given topic exists""" topics = self.client.list_topics(timeout=5) return topics.topics.get(topic_name) is not None def create_topic(self): """ Create topic if it doesn't exists """ exists = self.topic_exists(self.conf.get("producer", "topic_name")) if exists: logger.info(f"topic already exists: {self.conf.get('producer', 'topic_name')}") else: futures = self.client.create_topics([ NewTopic( topic=self.conf.get("producer", "topic_name"), num_partitions=self.conf.getint("producer", "num_partitions"), replication_factor=self.conf.getint("producer", "replication_factor") ) ]) for topic, future in futures.items(): try: future.result() logger.info(f"topic created: {self.conf.get('producer', 'topic_name')}") except Exception as e: logger.error(f"failed to create topic {self.conf.get('producer', 'topic_name')}: {e}") @staticmethod def serialize_json(json_data): """ Take JSON dictionary object and convert that to string(serialize) :param json_data: JSON dictionary object :return: JSON string """ return json.dumps(json_data) def run(self): """ Read JSON data and produce serialized rows to Kafa Topic """ try: with open(self.conf.get("producer","input_file")) as f: data = json.loads(f.read()) logger.info(f"Reading {len(data)} lines from {self.conf.get('producer','input_file')}") for idx, row in tqdm(enumerate(data), total=len(data), desc="Producer:> "): self.producer.poll(timeout=self.conf.getfloat("producer", "consume_timeout")) message = self.serialize_json(row) logger.info(f"Serialized Data: {message}") self.producer.produce( topic=self.conf.get("producer","topic_name"), value=message, callback=self.delivery_callback ) time.sleep(0.2) logger.info("Processing complete \n Cleaning Producer!") self.close() except KeyboardInterrupt as e: self.close() @staticmethod def delivery_callback(err, msg): """ Callback triggered by produce function to check successful delivery of message to broker """ if err is not None: logger.error(f"Failed to deliver message: {err}") else: logger.info(f"Successfully produced message to topic {msg.topic()}") def close(self): """Prepares the producer for exit by cleaning up the producer""" try: if self.conf.get("producer","topic_name"): self.producer.flush() logger.info("Producer Shutdown!!! ") except Exception as e: logger.error("producer close incomplete - skipping")
#if (success + fail) % 10000 == 0: # print("Success: " + str(success) + " Failed: " + str(fail)) def printStats(): global sent global success global fail print("Sent: " + str(sent)) print("Delivered: " + str(success)) print("Failed: " + str(fail)) acks_mode = sys.argv[3] p = Producer({'bootstrap.servers': '172.17.0.3:9092,172.17.0.4:9093,172.17.0.5:9094', 'message.send.max.retries': 0, #'batch.num.messages': 1000, #'stats_cb': my_stats_callback, #'statistics.interval.ms': 1000, 'default.topic.config': { 'request.required.acks': acks_mode }}) topic = sys.argv[4] count = int(sys.argv[1]) + 1 wait_period = float(sys.argv[2]) success = 0 fail = 0 sent = 0 atexit.register(printStats) for data in range(1, count): # Trigger any available delivery report callbacks from previous produce() calls p.poll(0)
key = data['ALPACA_API_KEY'] secret_key = data['ALPACA_SECRET_KEY'] base_url = data['PAPER_URL'] yamlfile.close() api = REST( key, secret_key, base_url ) proc = mp.Process(target=alpaca_process_run) proc.start() conf = {'bootstrap.servers': 'localhost:9093'} request_producer = Producer(**conf) symbol_brain_dic = {} while True: symbol = input() if symbol != 'STOP' and symbol != 'STOP_ALL': req_symbols = symbol.split(',') print(req_symbols) for symbol in req_symbols: data = api.get_asset(symbol) if data.shortable and data.easy_to_borrow: cmd = Command() cmd.command_type = CommandType.START_LIVE sc = StrategyConfiguration() sc.type = StrategyType.TICK sc.interval = 50
import ccloud_lib if __name__ == '__main__': # Initialization args = ccloud_lib.parse_args() config_file = args.config_file topic = args.topic conf = ccloud_lib.read_ccloud_config(config_file) # Create Producer instance p = Producer({ 'bootstrap.servers': conf['bootstrap.servers'], 'sasl.mechanisms': 'PLAIN', 'security.protocol': 'SASL_SSL', 'sasl.username': conf['sasl.username'], 'sasl.password': conf['sasl.password'] }) # Create topic if needed # Examples of additional admin API functionality: # https://github.com/confluentinc/confluent-kafka-python/blob/master/examples/adminapi.py a = AdminClient({ 'bootstrap.servers': conf['bootstrap.servers'], 'sasl.mechanisms': 'PLAIN', 'security.protocol': 'SASL_SSL', 'sasl.username': conf['sasl.username'], 'sasl.password': conf['sasl.password'] }) fs = a.create_topics([NewTopic(
print("nb hours: " + str(nb_hours)) # create a table of node instances thr = sumM3Thresholder(nb_hours) # Create Kafka Consumer instance c = Consumer({ 'bootstrap.servers': sys.argv[1], 'group.id': 'sumM3Consumer' }) # Subscribe to topic 'm3Analysis' c.subscribe(['m3Analysis']) # Create a provider instance. p = Producer({'bootstrap.servers': sys.argv[1]}) # Process messages try: while True: try: s3msg_in = sumM3Message() s3msg_in.poll_kafka(c, 300.0) if s3msg_in.topic == "": print("No good message for 300 sec.") else: # Check whether this message triggers a threshold if thr.checkList(s3msg_in): # this message needs re-broadcasting msg = thr.node_list[s3msg_in.node_dns].to_string() print("Sending: " + msg)
# $ python -m venv ccloud_example # $ source ccloud_example/bin/activate # $ pip install confluent_kafka # $ python confluent_cloud.py # $ deactivate import uuid from confluent_kafka import Producer, Consumer p = Producer({ 'bootstrap.servers': '<ccloud bootstrap servers>', 'broker.version.fallback': '0.10.0.0', 'api.version.fallback.ms': 0, 'sasl.mechanisms': 'PLAIN', 'security.protocol': 'SASL_SSL', 'sasl.username': '******', 'sasl.password': '******' }) def acked(err, msg): """Delivery report callback called (from flush()) on successful or failed delivery of the message.""" if err is not None: print("failed to deliver message: {}".format(err.str())) else: print("produced to: {} [{}] @ {}".format(msg.topic(), msg.partition(), msg.offset())) p.produce('python-test-topic', value='python test value', callback=acked)
"Sydney-NSW-AU": range(45, 75), "Mumbai-NH-IN": range(70, 100), "London-UK-GB": range(45, 75) } location_hour_offset = [0, -3, 14, 9, 6] weather_types = ["Sunny", "Cloudy", "Fog", "Rain", "Lightning", "Windy"] humidities = range(30, 100) wind_speed_mph = range(0, 20) ET = tz.gettz("America/New_York") starting_datetime = datetime(2018, 6, 1, 0, 0, 0, 0, tzinfo=ET) # Create Kafka Producer # https://docs.confluent.io/current/clients/confluent-kafka-python/#confluent_kafka.Producer.Producer # https://docs.confluent.io/current/installation/configuration/producer-configs.html producer = Producer({"bootstrap.servers": "localhost:9092"}) def acked(err, msg): if err is not None: print(f"Failed to deliver message: {msg.value()}: {err.str()}") else: print(f"Message produced: {msg.value()}") def generate_weather_strings(current_generation: int, current_datetime: datetime) -> Tuple[str, str]: for index, (location, temps) in enumerate(location_mapped_to_temp_in_ranges.items()): current_day = current_generation / 24 base_current_hour = fmod(
help='A TSV file') parser.add_argument( 'kafka_hosts', metavar='h', type=str, nargs=1, help='A comma separated list of Kafka hosts, e.g. 127.0.0.1:9092.') parsed_args = parser.parse_args() tsv_path = parsed_args.tsv_path[0] in_tsv = open(tsv_path, 'r') hosts = parsed_args.kafka_hosts[0] log.info(f'Connecting to Kafka broker(s): {hosts}') reader = csv.DictReader(in_tsv, delimiter='\t') start = time.monotonic() log.info('Beginning production of messages') producer = Producer({'bootstrap.servers': hosts}) count = 0 for idx, row in enumerate(reader): count = idx encoded = _parse_row(row) while True: try: producer.produce('inbound_images', encoded) except BufferError: # Give the producer time to catch up before retrying producer.poll(1) break if idx % 10000 == 0: log.info(f'Produced {idx} messages so far') print(f'Produced {count} at rate {count / (time.monotonic() - start)}/s') in_tsv.close()
def publish(request): elasticsearch_server = 'http://localhost:9200/clincoded' return_object = {'status': 'Fail', 'message': 'Unable to deliver message'} # Check that required parameters have been provided if not('type' in request.params and 'uuid' in request.params): return_object['message'] = 'Required parameters missing in request' return return_object # Attempt to retrieve data (from Elasticsearch) try: searchRes = requests.get('{}/{}/{}'.format(elasticsearch_server, request.params['type'], request.params['uuid']), timeout=10) if searchRes.status_code != requests.codes.ok: return_object['message'] = 'Data search failed' return return_object except Exception as e: return_object['message'] = 'Data search could not be completed' return return_object # Store JSON-encoded content of search result(s) try: resultJSON = searchRes.json() except Exception as e: return_object['message'] = 'Retrieved data not in expected format' return return_object # Check that search found data if 'found' not in resultJSON or not(resultJSON['found']): return_object['message'] = 'Requested data could not be found' return return_object # Check that data has expected elements try: data_type_to_publish = resultJSON['_source']['embedded']['resourceType'] if data_type_to_publish == 'classification': evidence_to_publish = resultJSON['_source']['embedded']['resourceParent']['gdm'] publishing_affiliation = resultJSON['_source']['embedded']['resource']['affiliation'] evidence_counts_to_publish = resultJSON['_source']['embedded']['resource']['classificationPoints'] elif data_type_to_publish == 'interpretation': evidence_to_publish = resultJSON['_source']['embedded']['resourceParent']['interpretation'] else: raise Exception except Exception as e: return_object['message'] = 'Retrieved data missing expected elements' return return_object # Check that message should be sent? (approved status? permission to publish?) # Construct message try: if data_type_to_publish == 'interpretation': message_template = deepcopy(clincoded.messaging.templates.vci_to_dx.message_template) data_to_remove = clincoded.messaging.templates.vci_to_dx.data_to_remove add_data_to_msg_template(resultJSON['_source']['embedded'], None, None, message_template) else: message_template = deepcopy(clincoded.messaging.templates.gci_to_dx.message_template) classification_points = deepcopy(evidence_counts_to_publish) add_data_to_msg_template(resultJSON['_source']['embedded'], gather_evidence(evidence_to_publish, publishing_affiliation), gather_evidence_counts(classification_points, True), message_template) message = json.dumps(message_template, separators=(',', ':')) except Exception as e: return_object['message'] = 'Failed to build complete message' return return_object # Transform message (if necessary, via independent service) try: if data_type_to_publish == 'interpretation': remove_data_from_msg_template(data_to_remove, message_template['interpretation']) message_template['interpretation'] = transform_interpretation(message_template['interpretation'], request.host) message = json.dumps(message_template, separators=(',', ':')) except Exception as e: if e.args: return_object['message'] = e.args else: return_object['message'] = 'Failed to build complete message' return return_object # Configure message delivery parameters kafka_cert_pw = '' if 'KAFKA_CERT_PW' in os.environ: kafka_cert_pw = os.environ['KAFKA_CERT_PW'] kafka_conf = {'bootstrap.servers': 'localhost:9093', 'log_level': 0, 'security.protocol': 'ssl', 'ssl.key.location': 'etc/certs/client.key', 'ssl.key.password': kafka_cert_pw, 'ssl.certificate.location': 'etc/certs/client.crt', 'ssl.ca.location': 'etc/certs/server.crt'} kafka_topic = 'test' kafka_timeout = 10 if request.host != 'localhost:6543': kafka_conf = {'bootstrap.servers': 'exchange.clinicalgenome.org:9093', 'log_level': 0, 'security.protocol': 'ssl', 'ssl.key.location': 'etc/certs/dataexchange/client.key', 'ssl.key.password': kafka_cert_pw, 'ssl.certificate.location': 'etc/certs/dataexchange/client.crt', 'ssl.ca.location': 'etc/certs/dataexchange/server.crt'} if data_type_to_publish == 'interpretation': kafka_topic = 'variant_interpretation' else: kafka_topic = 'gene_validity' if request.host != 'curation.clinicalgenome.org': kafka_topic += '_dev' # Send message p = Producer(**kafka_conf) def delivery_callback(err, msg): nonlocal return_object if err: return_object['message'] = err else: return_object = {'status': 'Success', 'message': message, 'partition': msg.partition(), 'offset': msg.offset()} try: p.produce(kafka_topic, message, callback=delivery_callback) p.flush(kafka_timeout) return return_object except Exception as e: return_object['message'] = 'Message delivery failed' return return_object
def produce(self): # debug debuger = dt2ts(self.start) * 1000 stream = self.getBGPStream() topic = self._create_topic() producer = Producer({ 'bootstrap.servers': self.config['DEFAULT']['KafkaServer'], 'default.topic.config': { 'compression.codec': 'snappy', 'queue.buffering.max.messages': 1000000, } }) rec = BGPRecord() stream.start() logging.info(f"[{topic}] start producing") try: while stream and stream.get_next_record(rec): if rec.time > dt2ts(self.end): logging.error(f"[{topic}] time window exceed") break if rec.status != "valid": continue completeRecord = dict() completeRecord["rec"] = getRecordDict(rec) recordTimeStamp = int(rec.time) * 1000 completeRecord["elements"] = [] elem = rec.get_next_elem() count = 0 while (elem): elementDict = getElementDict(elem) completeRecord["elements"].append(elementDict) count += 1 if count > MAX_ELEMENT: producer.produce(topic, msgpack.packb( {'record': completeRecord}, use_bin_type=True), callback=self._delivery_report, timestamp=recordTimeStamp) producer.poll(0) completeRecord["elements"] = [] count = 0 elem = rec.get_next_elem() if len(completeRecord['elements']): producer.produce(topic, msgpack.packb({'record': completeRecord}, use_bin_type=True), callback=self._delivery_report, timestamp=recordTimeStamp) producer.poll(0) # debug if recordTimeStamp > debuger: logging.debug( f"[{topic}] produced at {ts2dt(debuger//1000)}") debuger += 3600000 except Exception as e: logging.error(f"[{topic}] exit with error : {e}") return finally: producer.produce(topic, msgpack.packb({'end': True}, use_bin_type=True), callback=self._delivery_report, timestamp=dt2ts(self.end) * 1000) producer.poll(0) producer.flush() logging.info(f"[{topic}] done producing")
def test_produce_headers(): """ Test produce() with timestamp arg """ p = Producer({'socket.timeout.ms': 10, 'error_cb': error_cb, 'message.timeout.ms': 10}) binval = pack('hhl', 1, 2, 3) headers_to_test = [ [('headerkey', 'headervalue')], [('dupkey', 'dupvalue'), ('empty', ''), ('dupkey', 'dupvalue')], [('dupkey', 'dupvalue'), ('dupkey', 'diffvalue')], [('key_with_null_value', None)], [('binaryval', binval)], [('alreadyutf8', u'Småland'.encode('utf-8'))], [('isunicode', 'Jämtland')], {'headerkey': 'headervalue'}, {'dupkey': 'dupvalue', 'empty': '', 'dupkey': 'dupvalue'}, # noqa: F601 {'dupkey': 'dupvalue', 'dupkey': 'diffvalue'}, # noqa: F601 {'key_with_null_value': None}, {'binaryval': binval}, {'alreadyutf8': u'Småland'.encode('utf-8')}, {'isunicode': 'Jämtland'} ] for headers in headers_to_test: print('headers', type(headers), headers) p.produce('mytopic', value='somedata', key='a key', headers=headers) p.produce('mytopic', value='somedata', headers=headers) with pytest.raises(TypeError): p.produce('mytopic', value='somedata', key='a key', headers=('a', 'b')) with pytest.raises(TypeError): p.produce('mytopic', value='somedata', key='a key', headers=[('malformed_header')]) with pytest.raises(TypeError): p.produce('mytopic', value='somedata', headers={'anint': 1234}) p.flush()
def main(): producer = Producer({'bootstrap.servers': 'localhost:9092'}) producer.produce('test-topic', b'Hello, Kafka.') # producer.poll(0) producer.flush()