def test_basic_api():
    """ Basic API tests, these wont really do anything since there is no
        broker configured. """

    try:
        p = Producer()
    except TypeError as e:
        assert str(e) == "expected configuration dict"

    p = Producer({'socket.timeout.ms': 10,
                  'error_cb': error_cb,
                  'message.timeout.ms': 10})

    p.produce('mytopic')
    p.produce('mytopic', value='somedata', key='a key')

    def on_delivery(err, msg):
        print('delivery', str)
        # Since there is no broker, produced messages should time out.
        assert err.code() == KafkaError._MSG_TIMED_OUT

    p.produce(topic='another_topic', value='testing', partition=9,
              callback=on_delivery)

    p.poll(0.001)

    p.flush(0.002)
    p.flush()

    try:
        p.list_topics(timeout=0.2)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._TRANSPORT)
Пример #2
0
    def kafka_check_topic(self, myTopic):
        print("Connecting as kafka consumer to check for topic: " + myTopic)
        test = False

        conf = {
            "bootstrap.servers": self.bootstrapServerStr,
            "client.id": socket.gethostname(),
            "error_cb": self.kafka_producer_error_cb,
            "security.protocol": self.kafka_security_protocol,
            "sasl.mechanisms": self.kafka_sasl_mechanisms,
            "sasl.username": self.kafka_sasl_username,
            "sasl.password": self.kafka_sasl_password,
            "ssl.ca.location": self.kafka_ssl_ca_location,
            "ssl.certificate.location": self.kafka_ssl_certificate_location,
            "ssl.key.location": self.kafka_ssl_key_location,
            "ssl.key.password": self.kafka_ssl_key_password
        }

        while test == False:
            time.sleep(1)
            print("waiting for kafka producer to connect")

            try:
                # shouldn't be used directly: self.kafka_client = kafka.KafkaClient(self.kafka_broker)
                kafka_producer = KafkaProducer(conf)
                kafka_producer.list_topics(topic=myTopic, timeout=1)
                test = True
            except KafkaException as e:
                # print(e.args[0])
                print("waiting for " + myTopic + " topic...")
Пример #3
0
class KafkaProducer:
    def __init__(self,conf):
        self.producer = Producer(conf)

    @cached(cache=TTLCache(maxsize=1024, ttl=60))
    def get_topic_partition_count(self,topic_name):
        cmd = self.producer.list_topics(topic_name)
        tmd = cmd.topics.get(topic_name,None)
        pcount = 0
        if tmd:
            pcount = len(tmd.partitions)
        return pcount

    def send_records(self,topic,records,headers):
        responses = []
        def delivery_report(err, msg):
            """ Called once for each message produced to indicate delivery result.
                Triggered by poll() or flush(). """
            if err is not None:
                LOGGER.info('Message delivery failed: {}'.format(err))
            else:
                LOGGER.info('Message delivered {} {} {} [{}] {}'.format( msg.timestamp(),msg.offset(), msg.topic(), msg.partition(), msg.key()))

            keystr = None if err or not msg.key() else msg.key().decode('UTF-8') 
            if not err:
                report=dict(timestamp=msg.timestamp()[1],partition=msg.partition(),\
                    offset=msg.offset(),key=keystr)
            else:
                report=dict(error = f"{err}",status="PRODUCER_ERROR")
            responses.append(report)

        partition_count = self.get_topic_partition_count(topic)
        if not partition_count:
            LOGGER.warn(f"Requested topic {topic} does not exist")
            return "TOPIC_NOT_FOUND",dict(reason=f"Topic {topic} not found or not accessible to current user")

        LOGGER.info(f"sending records - {records}")

        for record in records:
            data = json.dumps(record["value"])
            key = record.get('key')
            partition = record.get('partition',None)
            if partition:
                try:
                    partition = int(partition)
                except:
                    partition = 0
            if partition:
                record_partition =  partition % partition_count
                self.producer.produce(topic,value=data,partition=record_partition, key=key, callback=delivery_report,headers=headers)
            else:
                self.producer.produce(topic, data, key=key, callback=delivery_report,headers=headers)
            self.producer.poll(.01)
        self.producer.flush()
        LOGGER.info(f"Responses - {responses}")
        retval = {"key_schema_id": None,"value_schema_id": None,"offsets": responses}

        return None, responses
def test_basic_api():
    """ Basic API tests, these wont really do anything since there is no
        broker configured. """

    try:
        p = Producer()
    except TypeError as e:
        assert str(e) == "expected configuration dict"

    p = Producer({
        'socket.timeout.ms': 10,
        'error_cb': error_cb,
        'default.topic.config': {
            'message.timeout.ms': 10
        }
    })

    p.produce('mytopic')
    p.produce('mytopic', value='somedata', key='a key')

    def on_delivery(err, msg):
        print('delivery', str)
        # Since there is no broker, produced messages should time out.
        assert err.code() == KafkaError._MSG_TIMED_OUT

    p.produce(topic='another_topic',
              value='testing',
              partition=9,
              callback=on_delivery)

    p.poll(0.001)

    p.flush(0.002)
    p.flush()

    try:
        p.list_topics(timeout=0.2)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT,
                                    KafkaError._TRANSPORT)
Пример #5
0
class KafkaProducer(object):
    """
    生产者
    """
    def __init__(self, kafka_url, topic):
        self.__kafka_url = kafka_url
        self.__topic = topic
        self.producer = Producer({
            'bootstrap.servers': self.__kafka_url,
            'log.connection.close': False,
            'request.required.acks': 0,
            'queue.buffering.max.ms': 5000,
            'queue.buffering.max.messages': 10000,
            'batch.num.messages': 200
        })

        self.create_topic()
        self.__partitions = self.producer.list_topics().topics[self.__topic].partitions

    def create_topic(self, num_partitions=3, replication_factor=1):
        if self.__topic not in self.producer.list_topics().topics:
            ac = AdminClient({'bootstrap.servers': self.__kafka_url})
            futmap = ac.create_topics([NewTopic(self.__topic, num_partitions, replication_factor)])
            time.sleep(2)

    def send_log(self, err, msg):
        pass

    def get_target_partition_id(self, key):
        return hash(key) % len(self.__partitions)

    def send(self, data, key):
        target_partition_id = self.get_target_partition_id(key)
        self.producer.produce(self.__topic, json.dumps(data).encode('utf-8'), partition=target_partition_id,
                              callback=self.send_log)

    def flush(self):
        self.producer.flush()
Пример #6
0
    def test_create_kafka_topic(self):
        #topic was already created in circlCI set up (config.yml step 6).  Just checking that it's still up
        conf = kafka_utils.read_config('producer_google_chicago_1.config',
                                       'producer_google_chicago_test')
        print("config file path is", conf)
        producer_config = {'bootstrap.servers': conf['bootstrap.servers']}
        p = Producer(producer_config)
        info = p.list_topics()
        topic = info.topics['christian_test'].topic
        partitions = info.topics['christian_test'].partitions
        num_partitions = list(partitions.keys())
        result = (topic, len(num_partitions))

        assert result == ('christian_test', 1)
Пример #7
0
def get_cluster_metadata(bootstrap_servers):
    """
    Return cluster metadata for the cluster specified by bootstrap_servers.

    Parameters
    ----------
    bootstrap_servers: str
        comma-delimited string of Kafka broker host:port, for example "localhost:9092"

    Returns
    -------
        confluent_kafka.admin.ClusterMetadata
    """
    kafka_producer = Producer({"bootstrap.servers": bootstrap_servers})
    cluster_metadata = kafka_producer.list_topics()
    return cluster_metadata
Пример #8
0
    def create_topics(self, topics: List[Tuple[str, int, int]]) -> None:
        """Creates a list of kafka topics.

        :param topics: List of tuples where:
            1. element: name of the topic to create
            2. element: number of partitions
            3. element: number of replicas in the cluster
        """
        producer = Producer(
            {'bootstrap.servers': svt.conf.get('kafka', 'bootstrap_servers')})
        existing_topics = producer.list_topics().topics
        new_topics = [(topic, 1, 1) for topic in topics
                      if topic not in existing_topics]
        nts = [NewTopic(top[0], top[1], top[2]) for top in topics]

        if nts:
            self.admin.create_topics(nts)
            log.info(f"Created topics: {topics}")
        else:
            log.info(f"Topics: {topics} are already existed!")
Пример #9
0
    def _produce(self, key: str, value: str, headers: dict,
                 payload: AsyncProducerPayload) -> None:
        config = {'bootstrap.servers': self.actor.service.address}
        if self.actor.service.ssl:
            config['security.protocol'] = 'SSL'
        producer = Producer(config)

        if payload.enable_topic_creation:
            topics = producer.list_topics(self.topic)
            if topics.topics[self.topic].error is not None:
                _create_topic(self.actor.service.address,
                              self.topic,
                              ssl=self.actor.service.ssl)

        producer.poll(0)
        producer.produce(self.topic,
                         value,
                         key=key,
                         headers=headers,
                         callback=_kafka_delivery_report)
        producer.flush()
Пример #10
0
def main():
    args = get_args()
    producer = Producer({
        'bootstrap.servers': args.brokers,
        'client.id': socket.gethostname()
    })
    if args.operation is Operation.PRODUCE:
        for topic in args.topics.split(','):
            producer.produce(
                topic=topic.strip(),
                value=args.message,
                callback=lambda err, msg: message_ack(err, msg, topic))
            producer.poll(5)
    if args.operation is Operation.LIST_TOPICS:
        print(stdiocolours.OKBLUE + "\nTOPICS:" + stdiocolours.ENDC)
        count = 1
        for topic in producer.list_topics().topics:
            print(str(count) + ":", topic)
            count += 1
    if args.operation is Operation.WATCH_PRODUCE:
        watch_dir()
Пример #11
0
class FanIn(AgentCommon):
    registered = False
    loggerName = None
    # All time is in seconds as float. We use time_ns to get highest resolution
    timet0 = 0
    t0_on_first_mqtt = 0
    MsgCount = 0

    def __init__(self,
                 configFile,
                 debug,
                 encrypt,
                 TopicForThisProcess=False,
                 batching=False,
                 mqttcounter=False,
                 mqttPassthrough=False):
        """
                Class init
        """

        self.sensors = []
        self.mqttTopicList = []

        self.loggerName = "simulator.agent." + __version__ + ".log"

        self.config = self.checkConfigurationFile(
            configFile, ["Daemon", "Logger", "Kafka", "MQTT"])

        self.kafka_broker = self.config.get("Kafka", "kafka_broker")
        self.kafka_port = int(self.config.get("Kafka", "kafka_port"))
        self.kafkaProducerTopic = self.config.get("Kafka",
                                                  "kafkaProducerTopic")
        self.kafka_security_protocol = self.config.get(
            "Kafka", "kafka_security.protocol")
        self.kafka_sasl_mechanisms = self.config.get("Kafka",
                                                     "kafka_sasl.mechanisms")
        self.kafka_sasl_username = self.config.get("Kafka",
                                                   "kafka_sasl.username")
        self.kafka_sasl_password = self.config.get("Kafka",
                                                   "kafka_sasl.password")
        self.kafka_ssl_ca_location = self.config.get("Kafka",
                                                     "kafka_ssl.ca.location")
        self.kafka_ssl_certificate_location = self.config.get(
            "Kafka", "kafka_ssl.certificate.location")
        self.kafka_ssl_key_location = self.config.get(
            "Kafka", "kafka_ssl.key.location")
        self.kafka_ssl_key_password = self.config.get(
            "Kafka", "kafka_ssl.key.password")
        self.myFanIn_mqtt_encryption_enabled = encrypt
        self.mqtt_broker = self.config.get("MQTT", "mqtt_broker")
        self.mqtt_port = int(self.config.get("MQTT", "mqtt_port"))
        self.mqttBatching = batching
        self.enableMQTTbatchCount = mqttcounter
        self.enableMQTTpassthrough = mqttPassthrough

        # create topic list: [ ("topicName1", int(qos1)),("topicName2", int(qos2)) ]
        #                    [ ("ibswitch", 0), ("redfish", 0)]
        addValue = []
        if TopicForThisProcess != False:
            # multi-process version
            value = TopicForThisProcess.split(":")
            addValue.append(value[0])
            addValue.append(int(value[1]))
            self.processID = os.getpid()
            self.threadID = threading.get_ident()
            self.logMPMT = "P-{:d} | T-{:d} |".format(self.processID,
                                                      self.threadID)

            if not self.enableMQTTpassthrough:
                print(
                    "MULTIPROC {:s} Starting FanIn Gateway in its process for {:s}. MQTT batch pass-through is DISABLED."
                    .format(self.logMPMT, TopicForThisProcess))
            else:
                print(
                    "MULTIPROC {:s} Starting FanIn Gateway in its process for {:s}. MQTT batch pass-through is ENABLED."
                    .format(self.logMPMT, TopicForThisProcess))

            self.mqttTopicList.append(addValue)
            self.processID = os.getpid()
            self.myFanInGatewayName = "FanIn-test[" + str(self.processID) + "]"
        else:
            # single threaded version
            value = self.config.get("MQTT", "mqttSingleThreadTopic").split(":")
            addValue.append(value[0])
            addValue.append(int(value[1]))
            self.mqttTopicList.append(addValue)
            self.myFanInGatewayName = "FanIn-test"

        addValue = []
        value = self.config.get("MQTT",
                                "mqttRegistrayionResultTopic").split(":")
        addValue.append(value[0])
        addValue.append(int(value[1]))
        self.mqttTopicList.append(addValue)

        self.bootstrapServerStr = self.kafka_broker + ":" + str(
            self.kafka_port)

        # Register to the framework
        self.myFanInGateway_id = -1
        self.myFanInGateway_debug = debug
        self.myFanInGateway_uuid = str(uuid.uuid4())
        self.myFanInGateway_uid = self.myFanInGatewayName + str(
            random.randint(1, 100001))

        # for thread safe counter
        self.myFanInGateway_threadLock = threading.Lock()

        self.myMQTTregistered = False
        self.kafka_producer = None
        self.kafka_consumer = None

        self.kafka_msg_counter = 1
        self.kafka_msg_ack_received = 0

        super().__init__(configFile, debug)

        #message counter per uuid
        self.myMQTTtopicCounterPerUUID = {}

    def resetLogLevel(self, logLevel):
        """
                Resets the log level 
        """
        self.logger = KrakenMareLogger().getLogger(self.loggerName, logLevel)

    #######################################################################################
    # MQTT agent methods
    # sends MQTT messages to Kafka (in batches)
    # TODO: do we need multiple threads here?
    # TODO: have processing method per client type OR topic for each sensor type to convert messages?
    def mqtt_on_message(self, client, userdata, message):
        if self.myFanInGateway_debug == True:
            print("mqtt_on_message start")

        query_data = []
        k = 0

        if message.topic == self.mqttTopicList[0][0]:
            self.done = False
            if self.timet0 == 0:
                self.timet0 = time.time_ns() / 1000000000

            if self.t0_on_first_mqtt == 0:
                self.t0_on_first_mqtt = time.time_ns() / 1000000000

            # if passthrough is enabled, send mqtt batch directly to kafka
            if not self.enableMQTTpassthrough:

                if self.mqttBatching == True:
                    query_data = self.msg_serializer.decode_message(
                        message.payload)
                else:
                    query_data.append(message.payload)

                for data in query_data["tripletBatch"]:
                    # check, if I know agent UUID and adjust my MQTT topic counter accordingly
                    if (self.enableMQTTbatchCount == True):
                        try:
                            # if current batch count is -1 smaller then SEND count do nothing since this is ok
                            if not (self.myMQTTtopicCounterPerUUID[
                                    data["sensorUuid"]] ==
                                    (int(data["sensorValue"]) - 1)) and not (
                                        self.myMQTTtopicCounterPerUUID[
                                            data["sensorUuid"]] -
                                        int(data["sensorValue"]) == 0):
                                print(
                                    "ATTENTION: Missing # of MQTTbatches for agent UUID: "
                                    + str(data["sensorUuid"]) +
                                    " and topic: " +
                                    str(self.mqttTopicList[0][0]) + " is:" +
                                    str(
                                        int(data["sensorValue"]) -
                                        self.myMQTTtopicCounterPerUUID[
                                            data["sensorUuid"]]))

                            self.myMQTTtopicCounterPerUUID[
                                data["sensorUuid"]] = int(data["sensorValue"])

                            if self.myFanInGateway_debug == True:
                                logMPMT = str("P-{:d} : ".format(os.getpid()))
                                print(logMPMT + self.mqttTopicList[0][0] +
                                      "| UUID: " + str(data["sensorUuid"]) +
                                      "| MQTT batch count: " +
                                      str(self.myMQTTtopicCounterPerUUID[
                                          data["sensorUuid"]]))
                        except:
                            self.myMQTTtopicCounterPerUUID[
                                data["sensorUuid"]] = int(data["sensorValue"])
                            if int(data["sensorValue"]) != 1:
                                print(
                                    "ATTENTION: Missing # of MQTTbatches for agent UUID: "
                                    + str(data["sensorUuid"]) +
                                    " and topic: " +
                                    str(self.mqttTopicList[0][0]) + " is: " +
                                    str(int(data["sensorValue"]) - 1))

                    try:
                        #                    print(str(data["sensorUuid"]) + ", " + str(data["sensorValue"]))
                        raw_bytes = self.msg_serializer.encode_record_with_schema_id(
                            self.send_time_series_schema_id, data)
                        self.kafka_producer.produce(
                            self.kafkaProducerTopic,
                            raw_bytes,
                            on_delivery=self.kafka_producer_on_delivery,
                        )
                        self.kafka_msg_counter += 1
                        k += 1

                        if self.myFanInGateway_debug == True:
                            print(
                                str(self.kafka_msg_counter) +
                                ":published to Kafka")

                        if self.kafka_msg_counter % 1000 == 0:
                            deltat = time.time_ns() / 1000000000 - self.timet0
                            deltaMsg = self.kafka_msg_counter - self.MsgCount
                            self.MsgCount = self.kafka_msg_counter
                            self.timet0 = time.time_ns() / 1000000000
                            elapsed = (int)(time.time_ns() / 1000000000 -
                                            self.t0_on_first_mqtt)
                            logMPMT = "{:d} secs | Process-{:d} | Thread-{:d} | TopicMqtt-{:s}".format(
                                elapsed,
                                os.getpid(),
                                threading.get_ident(),
                                str(message.topic),
                            )
                            print(
                                logMPMT + " | " + str(self.kafka_msg_counter) +
                                " messages published to Kafka, rate = {:.2f} msg/sec"
                                .format(deltaMsg / deltat))

                    except BufferError as e1:
                        print(
                            "%% Local producer queue is full (%d messages awaiting delivery): try again\n"
                            % len(self.kafka_producer))
                        print(e1)
                    except KafkaException as e2:
                        print(
                            "MQTT message not published to Kafka! Cause is ERROR:"
                        )
                        print(e2)

                if not k == 47:
                    print("Samples in last processed message was: " + str(k))

            else:
                #passthrough
                try:
                    # print(str(data["sensorUuid"]) + ", " + str(data["sensorValue"]))
                    self.kafka_producer.produce(
                        self.kafkaProducerTopic,
                        message.payload,
                        on_delivery=self.kafka_producer_on_delivery,
                    )
                    self.kafka_msg_counter += 1

                    if self.myFanInGateway_debug == True:
                        print(
                            str(self.kafka_msg_counter) +
                            ":published to Kafka")

                    if self.kafka_msg_counter % 1000 == 0:
                        deltat = time.time_ns() / 1000000000 - self.timet0
                        deltaMsg = self.kafka_msg_counter - self.MsgCount
                        self.MsgCount = self.kafka_msg_counter
                        self.timet0 = time.time_ns() / 1000000000
                        elapsed = (int)(time.time_ns() / 1000000000 -
                                        self.t0_on_first_mqtt)
                        logMPMT = "{:d} secs | Process-{:d} | Thread-{:d} | TopicMqtt-{:s}".format(
                            elapsed,
                            os.getpid(),
                            threading.get_ident(),
                            str(message.topic),
                        )
                        print(
                            logMPMT + " | " + str(self.kafka_msg_counter) +
                            " MQTT batch messages published to Kafka, rate = {:.2f} msg/sec"
                            .format(deltaMsg / deltat))

                except BufferError as e1:
                    print(
                        "%% Local producer queue is full (%d messages awaiting delivery): try again\n"
                        % len(self.kafka_producer))
                    print(e1)
                except KafkaException as e2:
                    print(
                        "MQTT message not published to Kafka! Cause is ERROR:")
                    print(e2)

            self.mqttMsgTimer = time.time()

        else:
            if self.myFanInGateway_debug == True:
                print("Not ibswitch topic")

    # END MQTT agent methods
    #######################################################################################

    #######################################################################################
    # Kafka agent methods

    # Kafka error printer
    def kafka_producer_error_cb(self, err):
        logMPMT = "P-{:d} | T-{:d} |".format(os.getpid(),
                                             threading.get_ident())
        print("{:s} KAFKA_PROD_CALLBACK_ERR : {:s}".format(logMPMT, str(err)))

    def kafka_producer_on_delivery(self, err, msg):
        if err:
            print(
                "KAFKA_MESSAGE_CALLBACK_ERR : %% Message failed delivery: %s - to %s [%s] @ %s\n"
                % (err, msg.topic(), str(msg.partition()), str(msg.offset())))
        else:
            self.kafka_msg_ack_received += 1
            if self.myFanInGateway_debug == True:
                print("%% Message delivered to %s [%d] @ %d\n" %
                      (msg.topic(), msg.partition(), msg.offset()))

    # connect to Kafka broker as producer to check topic 'myTopic'
    def kafka_check_topic(self, myTopic):
        print("Connecting as kafka consumer to check for topic: " + myTopic)
        test = False

        conf = {
            "bootstrap.servers": self.bootstrapServerStr,
            "client.id": socket.gethostname(),
            "error_cb": self.kafka_producer_error_cb,
            "security.protocol": self.kafka_security_protocol,
            "sasl.mechanisms": self.kafka_sasl_mechanisms,
            "sasl.username": self.kafka_sasl_username,
            "sasl.password": self.kafka_sasl_password,
            "ssl.ca.location": self.kafka_ssl_ca_location,
            "ssl.certificate.location": self.kafka_ssl_certificate_location,
            "ssl.key.location": self.kafka_ssl_key_location,
            "ssl.key.password": self.kafka_ssl_key_password
        }

        while test == False:
            time.sleep(1)
            print("waiting for kafka producer to connect")

            try:
                # shouldn't be used directly: self.kafka_client = kafka.KafkaClient(self.kafka_broker)
                kafka_producer = KafkaProducer(conf)
                kafka_producer.list_topics(topic=myTopic, timeout=1)
                test = True
            except KafkaException as e:
                # print(e.args[0])
                print("waiting for " + myTopic + " topic...")

    # connect to Kafka broker as producer

    def kafka_producer_connect(self):
        test = False

        conf = {
            "bootstrap.servers": self.bootstrapServerStr,
            "client.id": socket.gethostname(),
            "error_cb": self.kafka_producer_error_cb,
            "security.protocol": self.kafka_security_protocol,
            "sasl.mechanisms": self.kafka_sasl_mechanisms,
            "sasl.username": self.kafka_sasl_username,
            "sasl.password": self.kafka_sasl_password,
            "ssl.ca.location": self.kafka_ssl_ca_location,
            "ssl.certificate.location": self.kafka_ssl_certificate_location,
            "ssl.key.location": self.kafka_ssl_key_location,
            "ssl.key.password": self.kafka_ssl_key_password,
            "linger.ms": 1000,
            "message.max.bytes": 2560000,
            "queue.buffering.max.messages": 2000000,
        }

        while test == False:
            time.sleep(2)
            print("waiting for kafka producer to connect")

            try:
                # shouldn't be used directly: self.kafka_client = kafka.KafkaClient(self.kafka_broker)
                self.kafka_producer = KafkaProducer(conf)
                self.kafka_producer.list_topics(timeout=1)
                test = True
            except KafkaException as e:
                print(e.args[0])
                print("waiting for Kafka brokers..." + self.bootstrapServerStr)

        print(self.__class__.__name__ + "." +
              inspect.currentframe().f_code.co_name + ": producer connected")

    # END Kafka agent methods
    #######################################################################################

    def signal_handler(self, signal, frame):
        self.mqtt_close()
        sys.exit(0)

    # main method of FanIn
    def run(self):
        # local and debug flag are not used from here at the moment

        # self.kafka_check_topic("registration-result")
        self.kafka_check_topic(self.kafkaProducerTopic)
        # self.mqtt_registration()
        self.kafka_producer_connect()
        # TODO: should be own process via process class (from multiprocessing import Process)
        # generate list of mqtt topics to subscribe, used in initial connection and to re-subscribe on re-connect

        mqttSubscriptionTopics = self.mqttTopicList
        print("MQTT topic list:" + str(self.mqttTopicList))

        # start mqtt client
        myLoopForever = False
        myCleanSession = True
        self.mqtt_init(
            self.myFanInGateway_uuid,
            mqttSubscriptionTopics,
            myLoopForever,
            myCleanSession,
            self.myFanIn_mqtt_encryption_enabled,
        )

        # start listening to data
        # self.mqtt_subscription()
        regularLog = 300
        logMPMT = str("P-{:d} : ".format(os.getpid()))
        self.done = False
        self.mqttMsgTimer = time.time()
        while True:
            time.sleep(0.05)
            self.kafka_producer.poll(0)
            regularLog -= 1
            if regularLog <= 0:
                regularLog = 300

            if self.mqttMsgTimer + 10 < time.time(
            ) and self.done == False and self.enableMQTTbatchCount == True:
                for uuid in self.myMQTTtopicCounterPerUUID:
                    print(logMPMT + self.mqttTopicList[0][0] + "| UUID: " +
                          str(uuid) + "| MQTT batch count: " +
                          str(self.myMQTTtopicCounterPerUUID[uuid]))
                    self.done = True

        self.mqtt_close
        print("FanIn terminated")
Пример #12
0
class KafkaProducer:
    def __init__(self, conf):
        self.producer = Producer(conf)

    def get_topic_list(self, showInternal=True):
        cmd = self.producer.list_topics()
        tmd = cmd.topics
        topic_list = list(tmd.keys())
        if not showInternal:
            topic_list = [t for t in topic_list if not (t and t[0] == '_')]
        return None, topic_list

    def get_topic_partitions(self, topic_name):
        cmd = self.producer.list_topics(topic_name)
        tmd = cmd.topics.get(topic_name, None)
        if not tmd:
            return 'TOPIC_NOT_FOUND', f"{topic_name} not found"

        partitions = [dict(partition=partition.id,leader=partition.leader,\
            replicas=[{'broker':replica_id,\
                'leader':replica_id==partition.leader,\
                'in_sync': replica_id in partition.isrs} for replica_id in partition.replicas]) for _,partition in tmd.partitions.items()]
        result = dict(name=tmd.topic, partitions=partitions)
        result['configs'] = {
            f'k{i}': f'not implemented v{i}'
            for i in range(5)
        }
        return None, result

    # @cached(cache=TTLCache(maxsize=1024, ttl=60))
    def get_topic_partition_count(self, topic_name):
        cmd = self.producer.list_topics(topic_name)
        tmd = cmd.topics.get(topic_name, None)
        pcount = 0
        if tmd:
            pcount = len(tmd.partitions)
        return pcount

    def send_records(self, topic, records, headers):
        responses = []

        def delivery_report(err, msg):
            """ Called once for each message produced to indicate delivery result.
                Triggered by poll() or flush(). """
            if err is not None:
                LOGGER.info('Message delivery failed: {}'.format(err))
            else:
                LOGGER.info('Message delivered {} {} {} [{}] {}'.format(
                    msg.timestamp(), msg.offset(), msg.topic(),
                    msg.partition(), msg.key()))

            keystr = None if err or not msg.key() else msg.key().decode(
                'UTF-8')
            if not err:
                report=dict(timestamp=msg.timestamp()[1],partition=msg.partition(),\
                    offset=msg.offset(),key=keystr)
            else:
                report = dict(error=f"{err}", status="PRODUCER_ERROR")
            responses.append(report)

        partition_count = self.get_topic_partition_count(topic)
        if not partition_count:
            LOGGER.warn(f"Requested topic {topic} does not exist")
            return "TOPIC_NOT_FOUND", dict(
                reason=
                f"Topic {topic} not found or not accessible to current user")

        LOGGER.info(f"sending records - {records}")

        for record in records:
            data = json.dumps(record["value"])
            key = record.get('key')
            partition = record.get('partition', None)
            if partition:
                try:
                    partition = int(partition)
                except:
                    partition = 0
            if partition:
                record_partition = partition % partition_count
                self.producer.produce(topic,
                                      value=data,
                                      partition=record_partition,
                                      key=key,
                                      callback=delivery_report,
                                      headers=headers)
            else:
                self.producer.produce(topic,
                                      data,
                                      key=key,
                                      callback=delivery_report,
                                      headers=headers)
            self.producer.poll(.01)
        self.producer.flush()
        LOGGER.info(f"Responses - {responses}")
        retval = {
            "key_schema_id": None,
            "value_schema_id": None,
            "offsets": responses
        }
        return None, responses
Пример #13
0
class KafkaProducerConfluent:
    """
    Продюсер (Производитель). confluent_kafka
    """
    """
    Инициализация
    """
    def __init__(self,
                 hosts=None,
                 configuration=None,
                 use_tx=False,
                 one_topic_name=None,
                 auto_flush_size=0,
                 flush_is_bad=False):
        """

        :param configuration:
        https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
        """

        if configuration is None:
            self.configuration = {
                'client.id':
                default_cfg.DEFAULT_CONNECTION_OPTION_ADMIN['client_id'],
                'socket.timeout.ms':
                default_cfg.DEFAULT_BROKER_TIMEOUT_MS_OPERATIONS
            }

            if use_tx:
                self.configuration['transactional.id'] = str(uuid4())
        else:
            self.configuration = configuration

        if hosts:
            self.configuration['bootstrap.servers'] = hosts
        else:
            if not self.configuration.get('bootstrap.servers'):
                self.configuration[
                    'bootstrap.servers'] = GeneralConfig.KAFKA_URL

        self.use_tx = use_tx
        self.topic_part_itr = None
        self.topic_parts = None
        self.one_topic_name = one_topic_name

        if auto_flush_size:
            self.auto_flush = True
        else:
            self.auto_flush = False

        self.auto_flush_size = auto_flush_size
        self.auto_flush_itr = 0
        self.flush_is_bad = flush_is_bad

    """
    Контекст
    """

    def __enter__(self):

        self.auto_flush_itr = 0
        self.producer = Producer(self.configuration)
        self.update_partition_settings(name_topic=self.one_topic_name)

        if self.use_tx:
            try:
                self.producer.abort_transaction(
                    default_cfg.DEFAULT_TRANSACTION_TIMEOUT_SEC)
            except Exception:
                pass

            self.producer.init_transactions(
                default_cfg.DEFAULT_TRANSACTION_TIMEOUT_SEC)
            self.producer.begin_transaction()

        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        """
        После выхода

        :param exc_type:
        :param exc_val:
        :param exc_tb:
        :return:
        """

        self.auto_flush_itr = 0
        if self.use_tx:
            if exc_type:
                self.producer.abort_transaction()
            else:
                # flush вызывается под капотом commit_transaction
                self.producer.commit_transaction(
                    default_cfg.DEFAULT_TRANSACTION_TIMEOUT_SEC)
        else:
            self.producer.flush(default_cfg.DEFAULT_FLUSH_TIMER_SEC)

        del self

    """
    Вспомогательные операции
    """

    def get_list_topics(self):
        """
        Все топики
        :return:
        """
        try:
            res = self.producer.list_topics().topics
            return res
        except Exception:
            return None

    def get_one_topic(self, name):
        """
        Один топик по имени
        :param name:
        :return:
        """
        try:
            res = self.producer.list_topics(topic=name).topics
            return res
        except Exception:
            return None

    def update_partition_settings(self, name_topic=None):
        """
        Обновить настройки партиций всех топиков

        :param name_topic: - либо конкретного топика
        :return:
        """

        if self.topic_parts is None:
            self.topic_part_itr = {}
            self.topic_parts = {}

        if name_topic is None:
            topics = self.get_list_topics()
        else:
            if self.topic_parts.get(name_topic) is not None:
                self.topic_parts.pop(name_topic)

            topics = self.get_one_topic(name_topic)

        for name, topic_obj in topics.items():
            list_partitions = list(topic_obj.partitions)
            if len(list_partitions) <= 1:
                continue

            self.topic_parts[name] = list_partitions
            self.topic_part_itr[name] = 0

    def put_data(self,
                 key,
                 value,
                 topic=None,
                 callback=None,
                 partition=None,
                 poll_time=0):
        """
        Поместить данные в очередь на обработку для брокера сообщений
        Чтобы не думать об этом - дампим в строку джсона сразу. Имя топика и ключа - строго строкой

        :param key: - ключ сообщения. Сделать пустым если исползуется автопопил сообщений средствами кафки
        :param value: - значение сообщения

        :param topic: - имя топика - если не задано -то будет применяться имя основного топика self.one_topic_name
        :param partition: - раздел топика(число). если не указано - то балансировка нагрузки по разделам

        :param callback: func(err, msg): if err is not None...
        :return:
        """

        dict_args = self._put_validation_and_transform(key=key,
                                                       value=value,
                                                       topic=topic,
                                                       callback=callback,
                                                       partition=partition)

        self._put_data_default(dict_args=dict_args, poll_time=poll_time)

    def _put_validation_and_transform(self,
                                      key,
                                      value,
                                      topic=None,
                                      callback=None,
                                      partition=None):
        """
        Для разных алгоритмов вставки - формирует словарь аргументов вставки
        """

        if topic is None and self.one_topic_name is None:
            raise AttributeError('NEED TOPIC NAME!')

        if topic is None:
            topic = self.one_topic_name

        dict_args = {
            'topic': str(topic),
            'value': jsd(value),
        }

        if key:
            dict_args['key']: str(key)

        if callback:
            dict_args['callback'] = callback

        if partition:
            # Прямое задание позиции

            dict_args['partition'] = partition
        else:
            # Смещение позиции равномерно

            top_name = dict_args['topic']
            topic_parts = self.topic_parts.get(top_name)
            if topic_parts:

                current_position = self.topic_part_itr[top_name]

                if key:
                    # Партиция нужна если есть ключ
                    dict_args['partition'] = topic_parts[current_position]

                current_position += 1
                if current_position >= len(topic_parts):
                    current_position = 0

                self.topic_part_itr[top_name] = current_position

        return dict_args

    def _put_data_default(self, dict_args, poll_time=0):
        """
        Первоначальный замысел вставки с доработками
        """

        if self.auto_flush:
            # Авто-ожидание приёма буфера сообщений - третья версия

            self.producer.produce(**dict_args)
            self.producer.poll(poll_time)

            self.auto_flush_itr = self.auto_flush_itr + 1
            if self.auto_flush_itr >= self.auto_flush_size:
                self.auto_flush_itr = 0
                self.producer.flush(default_cfg.DEFAULT_FLUSH_TIMER_SEC)
        else:
            if self.flush_is_bad:
                # Вторая версия алгоритма - флушить по факту
                try:
                    self.producer.produce(**dict_args)
                    self.producer.poll(poll_time)
                except BufferError:
                    #  Дожидаемся когда кафка разгребёт очередь
                    self.producer.flush(default_cfg.DEFAULT_FLUSH_TIMER_SEC)
            else:
                # Первая версия
                self.producer.produce(**dict_args)
                self.producer.poll(poll_time)

    def put_data_direct(self,
                        key,
                        value,
                        topic=None,
                        callback=None,
                        partition=None):
        """
        Прямая вставка с преобразованием данных. Метод poll не используется
        """

        dict_args = self._put_validation_and_transform(key=key,
                                                       value=value,
                                                       topic=topic,
                                                       callback=callback,
                                                       partition=partition)

        if self.auto_flush:
            self.producer.produce(**dict_args)

            self.auto_flush_itr = self.auto_flush_itr + 1
            if self.auto_flush_itr >= self.auto_flush_size:
                self.auto_flush_itr = 0
                self.producer.flush(default_cfg.DEFAULT_FLUSH_TIMER_SEC)
        else:
            if self.flush_is_bad:
                try:
                    self.producer.produce(**dict_args)
                except BufferError:
                    #  Дожидаемся когда кафка разгребёт очередь
                    self.producer.flush(default_cfg.DEFAULT_FLUSH_TIMER_SEC)
            else:
                self.producer.produce(**dict_args)
Пример #14
0
class Publisher:
    """
    A class for publishing bluesky documents to a Kafka broker.

    The intention is that Publisher objects be subscribed to a RunEngine.

    Reference: https://github.com/confluentinc/confluent-kafka-python/issues/137

    There is no default configuration. A reasonable production configuration for use
    with bluesky is Kafka's "idempotent" configuration specified by
        producer_config = {
            "enable.idempotence": True
        }
    This is short for
        producer_config = {
            "acks": "all",                              # acknowledge only after all brokers receive a message
            "retries": sys.maxsize,                     # retry indefinitely
            "max.in.flight.requests.per.connection": 5  # maintain message order *when retrying*
        }

    This means three things:
        1) delivery acknowledgement is not sent until all replicate brokers have received a message
        2) message delivery will be retried indefinitely (messages will not be dropped by the Producer)
        3) message order will be maintained during retries

    A reasonable testing configuration is
        producer_config={
            "acks": 1,
            "request.timeout.ms": 5000,
        }

    Parameters
    ----------
    topic : str
        Topic to which all messages will be published.
    bootstrap_servers: str
        Comma-delimited list of Kafka server addresses as a string such as ``'127.0.0.1:9092'``.
    key : str
        Kafka "key" string. Specify a key to maintain message order. If None is specified
        no ordering will be imposed on messages.
    producer_config : dict, optional
        Dictionary configuration information used to construct the underlying Kafka Producer.
    on_delivery : function(err, msg), optional
        A function to be called after a message has been delivered or after delivery has
        permanently failed.
    flush_on_stop_doc : bool, optional
        False by default, set to True to flush() the underlying Kafka Producer when a stop
        document is published.
    serializer : function, optional
        Function to serialize data. Default is pickle.dumps.

    Example
    -------
    Publish documents from a RunEngine to a Kafka broker on localhost port 9092.

    >>> publisher = Publisher(
    >>>     topic="bluesky.documents",
    >>>     bootstrap_servers='localhost:9092',
    >>>     key="abcdef"
    >>> )
    >>> RE = RunEngine({})
    >>> RE.subscribe(publisher)
    """
    def __init__(
        self,
        topic,
        bootstrap_servers,
        key,
        producer_config=None,
        on_delivery=None,
        flush_on_stop_doc=False,
        serializer=msgpack.dumps,
    ):
        self.topic = topic
        self._bootstrap_servers = bootstrap_servers
        self._key = key
        # in the case that "bootstrap.servers" is included in producer_config
        # combine it with the bootstrap_servers argument
        self._producer_config = dict()
        if producer_config is not None:
            self._producer_config.update(producer_config)
        if "bootstrap.servers" in self._producer_config:
            self._producer_config["bootstrap.servers"] = ",".join([
                bootstrap_servers, self._producer_config["bootstrap.servers"]
            ])
        else:
            self._producer_config["bootstrap.servers"] = bootstrap_servers

        logger.debug("producer configuration: %s", self._producer_config)

        if on_delivery is None:
            self.on_delivery = default_delivery_report
        else:
            self.on_delivery = on_delivery

        self._flush_on_stop_doc = flush_on_stop_doc
        self._producer = Producer(self._producer_config)
        self._serializer = serializer

    def __str__(self):
        return ("bluesky_kafka.Publisher("
                f"topic='{self.topic}',"
                f"key='{self._key}',"
                f"bootstrap_servers='{self._bootstrap_servers}'"
                f"producer_config='{self._producer_config}'"
                ")")

    def get_cluster_metadata(self, timeout=5.0):
        """
        Return information about the Kafka cluster and this Publisher's topic.

        Parameters
        ----------
        timeout: float, optional
            maximum time in seconds to wait before timing out, -1 for infinite timeout,
            default is 5.0s

        Returns
        -------
        cluster_metadata: confluent_kafka.admin.ClusterMetadata
        """
        cluster_metadata = self._producer.list_topics(topic=self.topic,
                                                      timeout=timeout)
        return cluster_metadata

    def __call__(self, name, doc):
        """
        Publish the specified name and document as a Kafka message.

        Flushing the Producer on every stop document guarantees
        that _at the latest_ all documents for a run will be delivered
        to the broker(s) at the end of the run. Without this flush
        the documents for a short run may wait for some time to be
        delivered. The flush call is blocking so it is a bad idea to
        flush after every document but reasonable to flush after a
        stop document since this is the end of the run.

        Parameters
        ----------
        name: str
            Document name, one of "start", "descriptor", "event", "resource", "datum", "stop".
        doc: dict
            event-model document dictionary

        """
        logger.debug(
            "publishing document to Kafka broker(s):"
            "topic: '%s'\n"
            "key:   '%s'\n"
            "name:  '%s'\n"
            "doc:    %s",
            self.topic,
            self._key,
            name,
            doc,
        )
        self._producer.produce(
            topic=self.topic,
            key=self._key,
            value=self._serializer((name, doc)),
            on_delivery=self.on_delivery,
        )
        if self._flush_on_stop_doc and name == "stop":
            self.flush()

    def flush(self):
        """
        Flush all buffered messages to the broker(s).
        """
        logger.debug(
            "flushing Kafka Producer for topic '%s' and key '%s'",
            self.topic,
            self._key,
        )
        self._producer.flush()
Пример #15
0
class ConfluentKafkaMsgQAPI:
    """
    This class provides API's into interact with Kafka Queue.
    """
    def __init__(self,
                 is_producer=False,
                 is_consumer=False,
                 perform_subscription=False,
                 thread_identifier=None):
        if not is_producer and not is_consumer:
            logging_to_console_and_syslog(
                "ConfluentKafkaMsgQAPI: You need to pick either producer or consumer."
            )
            pass
        self.producer_instance = None
        self.consumer_instance = None
        self.broker_name = None
        self.topic = None
        self.producer_conf = None
        self.consumer_conf = None
        self.is_topic_created = False
        self.perform_subscription = perform_subscription
        self.thread_identifier = thread_identifier
        self.__read_environment_variables()
        if is_producer:
            self.__producer_connect()
        if is_consumer:
            self.__consumer_connect()

    def __read_environment_variables(self):
        """
        This method is used to read the environment variables defined in the OS.
        :return:
        """
        while self.broker_name is None or \
                self.topic is None:
            time.sleep(2)
            logging_to_console_and_syslog(
                "ConfluentKafkaMsgQAPI: "
                "Trying to read the environment variables...")
            self.broker_name = os.getenv("broker_name_key", default=None)
            self.topic = os.getenv("topic_key", default=None)
        logging_to_console_and_syslog(
            "ConfluentKafkaMsgQAPI: broker_name={}".format(self.broker_name))
        logging_to_console_and_syslog("ConfluentKafkaMsgQAPI: topic={}".format(
            self.topic))

    # Optional per-message delivery callback (triggered by poll() or flush())
    # when a message has been successfully delivered or permanently
    # failed delivery (after retries).
    @staticmethod
    def delivery_callback(err, msg):
        if err:
            logging_to_console_and_syslog('%% Message failed delivery: %s\n' %
                                          err)
        else:
            logging_to_console_and_syslog(
                '%% Message delivered to %s [%d] @ %s\n' %
                (msg.topic(), msg.partition(), str(msg.offset())))

    def __producer_connect(self):
        """
        This method tries to connect to the kafka broker based upon the type of kafka.
        :return:
        """
        while self.producer_instance is None:
            try:
                self.producer_conf = {'bootstrap.servers': self.broker_name}
                # Create Producer instance
                self.producer_instance = Producer(**self.producer_conf)
            except:
                print("Exception in user code:")
                print("-" * 60)
                traceback.print_exc(file=sys.stdout)
                print("-" * 60)
                time.sleep(5)
            else:
                logging_to_console_and_syslog(
                    "ConfluentKafkaMsgQAPI: Successfully "
                    "connected to broker_name={}".format(self.broker_name))

    def __consumer_connect(self):
        status = False
        try:
            if self.perform_subscription:
                self.__consumer_connect_to_broker()
                self.__subscribe_to_a_topic()
                # self.__iterate_over_kafka_consumer_instance_messages()
            else:
                self.__consumer_connect_to_kafka_broker_and_to_a_topic()
                # self.__consumer_poll_for_new_messages()
            status = True
        except:
            logging_to_console_and_syslog(
                "{}:Exception occurred while polling for "
                "a message from kafka Queue. {} ".format(
                    self.thread_identifier,
                    sys.exc_info()[0]))

            print("{}:Exception in user code:".format(self.thread_identifier))
            print("-" * 60)
            traceback.print_exc(file=sys.stdout)
            print("-" * 60)
        return status

    def enqueue(self, filename):
        """
        This method tries to post a message to the pre-defined kafka topic.
        :param filename:
        :return status False or True:
        """
        status = False

        if filename is None or len(filename) == 0:
            logging_to_console_and_syslog(
                "ConfluentKafkaMsgQAPI: filename is None or invalid")
            return status
        if self.producer_instance is None:
            logging_to_console_and_syslog(
                "ConfluentKafkaMsgQAPI: instance is None")
            return status

        if not self.is_topic_created:
            try:
                if self.producer_instance.list_topics(self.topic, timeout=1.0):
                    logging_to_console_and_syslog(
                        "Found topic name = {} in the zookeeper.".format(
                            self.topic))
                    self.is_topic_created = True
            except KafkaException:
                self.kafka_admin_client = admin.AdminClient(self.producer_conf)
                logging_to_console_and_syslog("Creating topic {}.".format(
                    self.topic))
                ret = self.kafka_admin_client.create_topics(
                    new_topics=[
                        admin.NewTopic(topic=self.topic, num_partitions=1)
                    ],
                    operation_timeout=1.0)
                logging_to_console_and_syslog("ret = {}".format(ret))

        # Asynchronously produce a message, the delivery report callback
        # will be triggered from poll() above, or flush() below, when the message has
        # been successfully delivered or failed permanently.
        logging_to_console_and_syslog(
            "ConfluentKafkaMsgQAPI: Posting filename={} into "
            "kafka broker={}, topic={}".format(filename, self.broker_name,
                                               self.topic))
        value = filename.encode('utf-8')
        try:
            # Produce line (without newline)
            self.producer_instance.produce(
                self.topic,
                value,
                callback=ConfluentKafkaMsgQAPI.delivery_callback)
            status = True
        except BufferError:
            sys.stderr.write('%% Local producer queue is full '
                             '(%d messages awaiting delivery): try again\n' %
                             len(self.producer_instance))
            status = False
        except:
            print("ConfluentKafkaMsgQAPI: Exception in user code:")
            print("-" * 60)
            traceback.print_exc(file=sys.stdout)
            print("-" * 60)
            status = False
        else:
            event = "ConfluentKafkaMsgQAPI: Posting filename={} into " \
                    "kafka broker={}, topic={}." \
                .format(filename,
                        self.broker_name,
                        self.topic)
            logging_to_console_and_syslog(event)
            # Wait for any outstanding messages to be delivered and delivery report
            # callbacks to be triggered.
            # Serve delivery callback queue.
            # NOTE: Since produce() is an asynchronous API this poll() call
            #       will most likely not serve the delivery callback for the
            #       last produce()d message.
            self.producer_instance.poll(timeout=0.1)
            # Wait until all messages have been delivered
            # sys.stderr.write('%% Waiting for %d deliveries\n' % len(self.producer_instance))
            self.producer_instance.flush(timeout=0.1)

            return status

    def __consumer_connect_to_kafka_broker_and_to_a_topic(self):
        """
        This method tries to connect to the kafka broker.
        :return:
        """
        pass

    def __consumer_poll_for_new_messages(self):

        logging_to_console_and_syslog(
            "{}: Polling the kafka consumer instance for "
            "new messages in the topic {}.".format(self.thread_identifier,
                                                   self.topic))
        # Read messages from Kafka, print to stdout
        try:
            while True:
                msg = self.consumer_instance.poll(timeout=1.0)
                if msg is None:
                    continue
                if msg.error():
                    raise KafkaException(msg.error())
                else:
                    # Proper message
                    sys.stderr.write('%% %s [%d] at offset %d with key %s:\n' %
                                     (msg.topic(), msg.partition(),
                                      msg.offset(), str(msg.key())))
                    print(msg.value())

        except KeyboardInterrupt:
            sys.stderr.write('%% Aborted by user\n')

        finally:
            # Close down consumer to commit final offsets.
            self.consumer_instance.close()
        """
        msg = self.consumer_instance.poll(timeout=5.0)
        if msg is None:
            return None

        if msg.error():
            raise KafkaException(msg.error())
        else:
            logging_to_console_and_syslog("msg = {}".format(msg))

            logging_to_console_and_syslog('Consumer:{}: Rcvd msg %% %s [%d] at offset %d with key %s: value : %s\n'
                                          .format(self.thread_identifier,
                                                  msg.topic(),
                                                  msg.partition(),
                                                  msg.offset(),
                                                  str(msg.key()),
                                                  str(msg.value()))
                                          )
        return msg.value()
        """
        return None

    def __consumer_connect_to_broker(self):
        """
        This method tries to connect to the kafka broker.
        :return:
        """
        if self.consumer_instance:
            return

        # Consumer configuration
        # See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
        """
            self.consumer_conf = {'bootstrap.servers': self.broker_name,
                              'group.id': 'kafka-consumer{}'.format(self.thread_identifier),
                              'session.timeout.ms': 6000,
                              'auto.offset.reset': 'earliest'}
        """
        consumer_conf = {
            'bootstrap.servers': self.broker_name,
            'group.id': 'group',
            'session.timeout.ms': 6000,
            'auto.offset.reset': 'earliest'
        }
        consumer_conf['stats_cb'] = stats_cb
        consumer_conf['statistics.interval.ms'] = 0

        # Create logger for consumer (logs will be emitted when poll() is called)
        logger = logging.getLogger('consumer')
        logger.setLevel(logging.DEBUG)
        handler = logging.StreamHandler()
        handler.setFormatter(
            logging.Formatter('%(asctime)-15s %(levelname)-8s %(message)s'))
        logger.addHandler(handler)

        while self.consumer_instance is None:
            try:

                logging_to_console_and_syslog(
                    "Consumer:{}:Trying to connect to broker_name={}".format(
                        self.thread_identifier, self.broker_name))
                # Create Consumer instance
                # Hint: try debug='fetch' to generate some log messages
                self.consumer_instance = Consumer(consumer_conf, logger=logger)
            except:
                logging_to_console_and_syslog(
                    "Consumer:{}:Exception in user code:".format(
                        self.thread_identifier))
                logging_to_console_and_syslog("-" * 60)
                traceback.print_exc(file=sys.stdout)
                logging_to_console_and_syslog("-" * 60)
                time.sleep(5)

        logging_to_console_and_syslog("Consumer:{}:Consumer Successfully "
                                      "connected to broker_name={}".format(
                                          self.thread_identifier,
                                          self.broker_name))

    @staticmethod
    def print_assignment(consumer, partitions):
        logging_to_console_and_syslog('consumer = {}, Assignment {}:',
                                      repr(consumer), partitions)

    def __subscribe_to_a_topic(self):
        try:
            # Subscribe to topics
            cluster_meta_data = self.consumer_instance.list_topics(self.topic,
                                                                   timeout=0.3)
            logging_to_console_and_syslog("ClusterMetaData={}".format(
                repr(cluster_meta_data)))
            if self.topic not in cluster_meta_data.topics.keys():
                logging_to_console_and_syslog(
                    "Topic {} is "
                    "not found in the ClusterMetaData {}".format(
                        self.topic, repr(cluster_meta_data.topics.keys())))
                raise KafkaException

            def print_assignment(consumer, partitions):
                print('Assignment:', partitions)

            # Subscribe to topics
            self.consumer_instance.subscribe(self.topics,
                                             on_assign=print_assignment)
            """
            self.consumer_instance.subscribe(self.topic,
                                             on_assign=ConfluentKafkaMsgQAPI.print_assignment)
            """
        except:
            logging_to_console_and_syslog(
                "Consumer:{}: Subscribed to topic {}.".format(
                    self.thread_identifier, self.topic))
        return True

    def __iterate_over_kafka_consumer_instance_messages(self):
        """
        logging_to_console_and_syslog("Consumer:{}: dequeue {}."
                                      .format(self.thread_identifier,
                                           self.topic))
        """
        pass

    def dequeue(self):
        try:
            if self.perform_subscription:
                # logging_to_console_and_syslog("{}:Perform __consumer_poll_for_new_messages."
                #                              .format(self.thread_identifier))
                return self.__consumer_poll_for_new_messages()
            else:
                # logging_to_console_and_syslog("{}:Perform __iterate_over_kafka_consumer_instance_messages."
                #                             .format(self.thread_identifier))
                return self.__iterate_over_kafka_consumer_instance_messages()

        except:
            logging_to_console_and_syslog(
                "ConfluentKafkaMsgQAPI:Exception occurred while polling for "
                "a message from kafka Queue. {} ".format(sys.exc_info()[0]))

            logging_to_console_and_syslog(
                "ConfluentKafkaMsgQAPI:Exception in user code:")
            logging_to_console_and_syslog("-" * 60)
            traceback.print_exc(file=sys.stdout)
            logging_to_console_and_syslog("-" * 60)

        return None

    def cleanup(self):
        pass
Пример #16
0
from confluent_kafka import Producer
import random
import time
conf = {
    'bootstrap.servers': '131.247.3.206:9092',
    'client.id': 'producerAdil1',
}

producer = Producer(conf)
print(producer.list_topics())
x = random.randint(1, 1000)


def on_callback(err, msg):
    if err:
        print(err)
    else:
        print(msg)


ts = time.time()
producer.produce('transactions',
                 key='1',
                 value=f'{ts}: ${x}',
                 on_delivery=on_callback)
producer.flush()
print(x)
def receipt(err, msg):
    """Defines an acknowledgments for the producer and consumer"""
    if err is not None:
        print("Error: {0}".format(err))
    else:
        print("{0}: Message on topic {1} on partition {2} with a value"
              " of {3}".format(
                  time.strftime('%Y-%m%d %H:%M:%S',
                                time.localtime(msg.timestamp()[1] / 1000)),
                  msg.topic(), msg.partition(),
                  msg.value().decode('utf-8')))


print("Topics available to publish: {0}".format(', '.join(
    p.list_topics().topics)))

for _ in range(10):

    data = {
        'name': fake.name(),
        'age': fake.random_int(min=18, max=101, step=1),
        'street': fake.street_address(),
        'city': fake.city(),
        'state': fake.state(),
        'zip': fake.zipcode(),
        'lng': float(fake.longitude()),
        'lat': float(fake.latitude())
    }

    m = json.dumps(data)
Пример #18
0
class ConfluentKafkaMsgQAPI:
    """
    This class provides API's into interact with Kafka Queue.
    """
    def __init__(self,
                 is_producer=False,
                 is_consumer=False,
                 perform_subscription=False,
                 thread_identifier=None):
        if not is_producer and not is_consumer:
            logging_to_console_and_syslog(
                "ConfluentKafkaMsgQAPI: You need to pick either producer or consumer."
            )
            pass
        self.producer_instance = None
        self.consumer_instance = None
        self.broker_name = None
        self.topic = None
        self.producer_conf = None
        self.consumer_conf = None
        self.is_topic_created = False
        self.perform_subscription = perform_subscription
        self.thread_identifier = thread_identifier
        self.__read_environment_variables()
        # if is_producer:
        #    self.__producer_connect()
        # if is_consumer:
        #    self.__consumer_connect()

    def __read_environment_variables(self):
        """
        This method is used to read the environment variables defined in the OS.
        :return:
        """
        while self.broker_name is None or \
                self.topic is None:
            time.sleep(2)
            logging_to_console_and_syslog(
                "ConfluentKafkaMsgQAPI: "
                "Trying to read the environment variables...")
            self.broker_name = os.getenv("broker_name_key", default=None)
            self.topic = os.getenv("topic_key", default=None)
        logging_to_console_and_syslog(
            "ConfluentKafkaMsgQAPI: broker_name={}".format(self.broker_name))
        logging_to_console_and_syslog("ConfluentKafkaMsgQAPI: topic={}".format(
            self.topic))

    # Optional per-message delivery callback (triggered by poll() or flush())
    # when a message has been successfully delivered or permanently
    # failed delivery (after retries).
    @staticmethod
    def delivery_callback(err, msg):
        if err:
            logging_to_console_and_syslog('%% Message failed delivery: %s\n' %
                                          err)
        else:
            logging_to_console_and_syslog(
                '%% Message delivered to %s [%d] @ %s\n' %
                (msg.topic(), msg.partition(), str(msg.offset())))

    def __producer_connect(self):
        """
        This method tries to connect to the kafka broker based upon the type of kafka.
        :return:
        """
        is_connected = False
        if self.producer_instance is None:
            try:
                self.producer_conf = {'bootstrap.servers': self.broker_name}
                # Create Producer instance
                self.producer_instance = Producer(**self.producer_conf)
                is_connected = True
            except:
                print("Exception in user code:")
                print("-" * 60)
                traceback.print_exc(file=sys.stdout)
                print("-" * 60)
                time.sleep(5)
            else:
                logging_to_console_and_syslog(
                    "ConfluentKafkaMsgQAPI: Successfully "
                    "connected to broker_name={}".format(self.broker_name))
        return is_connected

    def enqueue(self, filename):
        """
        This method tries to post a message to the pre-defined kafka topic.
        :param filename:
        :return status False or True:
        """
        status = False

        if filename is None or len(filename) == 0:
            logging_to_console_and_syslog(
                "ConfluentKafkaMsgQAPI: filename is None or invalid")
            return status
        if self.producer_instance is None:
            logging_to_console_and_syslog(
                "KafkaMsgQAPI: Producer instance is None. Trying to create one.."
            )
            if not self.__producer_connect():
                logging_to_console_and_syslog(
                    "Unable to create producer instance.")
                return status

        if not self.is_topic_created:
            try:
                if self.producer_instance.list_topics(self.topic, timeout=1.0):
                    logging_to_console_and_syslog(
                        "Found topic name = {} in the zookeeper.".format(
                            self.topic))
                    self.is_topic_created = True
            except KafkaException:
                kafka_admin_client = admin.AdminClient(self.producer_conf)
                logging_to_console_and_syslog("Creating topic {}.".format(
                    self.topic))
                ret = kafka_admin_client.create_topics(new_topics=[
                    admin.NewTopic(topic=self.topic, num_partitions=1)
                ],
                                                       operation_timeout=1.0)
                logging_to_console_and_syslog("ret = {}".format(ret))

        # Asynchronously produce a message, the delivery report callback
        # will be triggered from poll() above, or flush() below, when the message has
        # been successfully delivered or failed permanently.
        logging_to_console_and_syslog(
            "ConfluentKafkaMsgQAPI: Posting filename={} into "
            "kafka broker={}, topic={}".format(filename, self.broker_name,
                                               self.topic))
        value = filename.encode('utf-8')
        try:
            # Produce line (without newline)
            self.producer_instance.produce(
                self.topic,
                value,
                callback=ConfluentKafkaMsgQAPI.delivery_callback)
            status = True
        except BufferError:
            sys.stderr.write('%% Local producer queue is full '
                             '(%d messages awaiting delivery): try again\n' %
                             len(self.producer_instance))
            status = False
        except:
            print("ConfluentKafkaMsgQAPI: Exception in user code:")
            print("-" * 60)
            traceback.print_exc(file=sys.stdout)
            print("-" * 60)
            status = False
        else:
            event = "ConfluentKafkaMsgQAPI: Posting filename={} into " \
                    "kafka broker={}, topic={}." \
                .format(filename,
                        self.broker_name,
                        self.topic)
            logging_to_console_and_syslog(event)
            # Wait for any outstanding messages to be delivered and delivery report
            # callbacks to be triggered.
            # Serve delivery callback queue.
            # NOTE: Since produce() is an asynchronous API this poll() call
            #       will most likely not serve the delivery callback for the
            #       last produce()d message.
            self.producer_instance.poll(timeout=0.1)
            # Wait until all messages have been delivered
            # sys.stderr.write('%% Waiting for %d deliveries\n' % len(self.producer_instance))
            self.producer_instance.flush(timeout=0.1)

            return status

    def __consumer_connect_to_broker(self):
        """
        This method tries to connect to the kafka broker.
        :return:
        """
        is_connected = False

        # Consumer configuration
        # See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
        """
            self.consumer_conf = {'bootstrap.servers': self.broker_name,
                              'group.id': 'kafka-consumer',
                              'session.timeout.ms': 6000,
                              'auto.offset.reset': 'earliest'}
        """
        if self.consumer_instance is None:
            try:

                logging_to_console_and_syslog(
                    "Consumer:{}:Trying to connect to broker_name={}".format(
                        self.thread_identifier, self.broker_name))
                # Create Consumer instance
                # Hint: try debug='fetch' to generate some log messages
                consumer_conf = {
                    'bootstrap.servers': self.broker_name,
                    'group.id': self.topic,
                    'session.timeout.ms': 6000,
                    'auto.offset.reset': 'earliest'
                }

                # consumer_conf['stats_cb'] = stats_cb
                # consumer_conf['statistics.interval.ms'] = 0
                self.consumer_instance = Consumer(consumer_conf)
                is_connected = True
            except:
                logging_to_console_and_syslog(
                    "Consumer:{}:Exception in user code:".format(
                        self.thread_identifier))
                logging_to_console_and_syslog("-" * 60)
                traceback.print_exc(file=sys.stdout)
                logging_to_console_and_syslog("-" * 60)
                time.sleep(5)

        logging_to_console_and_syslog("Consumer:{}:Consumer Successfully "
                                      "connected to broker_name={}".format(
                                          self.thread_identifier,
                                          self.broker_name))
        return is_connected

    @staticmethod
    def print_assignment(consumer, partitions):
        print('consumer = {}, Assignment {}:'.format(consumer, partitions))

    def dequeue(self):
        conf = {
            'bootstrap.servers': self.broker_name,
            'group.id': self.topic,
            'session.timeout.ms': 6000,
            'auto.offset.reset': 'earliest'
        }
        if not self.consumer_instance:
            self.consumer_instance = Consumer(conf)
            self.consumer_instance.subscribe(
                [self.topic], on_assign=ConfluentKafkaMsgQAPI.print_assignment)
        msg = self.consumer_instance.poll(timeout=1.0)
        if msg is None or msg.error():
            return None
        else:
            logging_to_console_and_syslog(
                '%% %s [%d] at offset %d with key %s:\n' %
                (msg.topic(), msg.partition(), msg.offset(), str(msg.key())))
            msg = msg.value().decode('utf8')
            logging_to_console_and_syslog("msg.value()={}".format(msg))
            self.consumer_instance.close()
            self.consumer_instance = None
            return msg

    def cleanup(self):
        if self.consumer_instance:
            self.consumer_instance.close()
            self.consumer_instance = None