Пример #1
0
    def get_connection(self):
        if not self.bootstrap_server:
            self.app.logger.info(
                "The connection to Kafka is disabled. Check the '.env' file!")
            return True  # True is needed to run in platform-only mode
        try:
            # Check the connectivity and update kafka topics
            self.system_topics = self.k_admin_client.list_topics(
                timeout=3.0).topics
            # create PLATFORM_TOPIC if not already done
            if not self.system_topics.get(PLATFORM_TOPIC, None):
                self.k_admin_client.create_topics(
                    [kafka_admin.NewTopic(PLATFORM_TOPIC, 3, 1)])
                self.app.logger.info(
                    "Created platform logging topic with name '{}'.".format(
                        PLATFORM_TOPIC))

            self.app.logger.debug(
                "Connected to Kafka Bootstrap Servers '{}'.".format(
                    self.bootstrap_server))
            return True
        except cimpl.KafkaException:
            self.app.logger.error(
                "Couldn't connect to Kafka Bootstrap servers.")
            self.app.logger.error(
                "Check the Kafka Bootstrap Servers '{}'!".format(
                    self.bootstrap_server))
            return False
Пример #2
0
    def makeConsumer(self,
                     TopicIn,
                     consumerID,
                     topicConfig={'auto.offset.reset': 'earliest'},
                     randomID=False,
                     topicParams=[1, 1]):
        # Set up consumer
        if (randomID):
            consumerID = consumerID + '{:04d}'.format(random.randint(0, 9999))
        finTopicIn = self.topicBasename + '.' + TopicIn
        print('Consumer Topic: {}'.format(finTopicIn))
        if (finTopicIn not in self.topics):
            #            raise ValueError('TopicIn does not exist: {}'.format(self.topic_in))
            print('Warning: Consumer created the topic: {}'.format(finTopicIn))
            new_topic = admin.NewTopic(finTopicIn, topicParams[0],
                                       topicParams[1])
            self.kafka_admin.create_topics([
                new_topic,
            ])

        #print('TopicIn exists: {}'.format(TopicIn))
        params = self.connectParams
        params[
            'group.id'] = consumerID  # change this if you want to restart from the first/newest message
        params['default.topic.config'] = topicConfig

        consumer = Consumer(params)  # producer only used for log
        consumer.subscribe([finTopicIn])
        consumer.poll(0.1)

        #return lambda : self.consumeInput(finTopicIn,consumer)
        return (finTopicIn, consumer)
Пример #3
0
def create_topic(topics):
    availableTopics = adminClient.list_topics().topics
    topicsToDelete = []

    for topic in topics:
        if topic in availableTopics:
            topicsToDelete.append(topic)

    delete_topic(topicsToDelete)
    time.sleep(0.1)
    newTopics = []
    for topic in topics:
        newTopics.append(ckAdmin.NewTopic(topic, 1, 1))

    fs = adminClient.create_topics(newTopics)
    print(fs)
    exceptions = []
    for topic, f in fs.items():
        try:
            f.result()
            print("Topic {} created".format(topic))
        except Exception as e:
            exceptions.append("Failed to create topic {} : {}".format(
                topic, e))

    if len(exceptions) != 0:
        raise Exception(''.join(x for x in exceptions))
 def create_topic(topic, config, num_partitions=0):
     """
     Creates a kafka topic based on given data
     :param topic: string
     :param config: dict
     :param num_partitions: int
     :return: None
     """
     admin.NewTopic(topic=topic, num_partitions=num_partitions, config=config)
Пример #5
0
def to_stream_dir(path_dir, kafka_client):
    producer = Producer(KAFKA_CONFIG)
    for file in os.listdir(path_dir):
        if file[-6:] == "tar.gz":
            topic_name = file.split(".")[0]
            new_topic = admin.NewTopic(topic_name, 1, 1)
            kafka_client.create_topics([new_topic])
            logging.info(f"Creating topic {topic_name}")
            to_stream_tar(file, producer, topic_name)
Пример #6
0
 def create_system_topics(self, system_name):
     # Create the set of Kafka topics for system_name
     if self.bootstrap_server is None:
         self.app.logger.warning(
             "Skipped to create system topics as the platform-only mode is used."
         )
         return None
     else:
         self.app.logger.debug("Creating Kafka Topic for new system.")
         # Create system topics
         self.k_admin_client.create_topics(
             [  # TODO: set num_partitions to 1
                 kafka_admin.NewTopic(system_name + ".log",
                                      num_partitions=3,
                                      replication_factor=1),
                 kafka_admin.NewTopic(system_name + ".int",
                                      num_partitions=3,
                                      replication_factor=1),
                 kafka_admin.NewTopic(system_name + ".ext",
                                      num_partitions=3,
                                      replication_factor=1)
             ])
         self.app.logger.info(
             "Created system topics for '{}'".format(system_name))
Пример #7
0
    def makeProducer(self, TopicOut, topicParams=[1, 1]):
        # setup producer
        finTopicOut = self.topicBasename + '.' + TopicOut
        print('Producer Topic: {}'.format(finTopicOut))

        if not (finTopicOut in self.topics):
            print('Should create Topic: {}'.format(finTopicOut))
            new_topic = admin.NewTopic(finTopicOut, topicParams[0],
                                       topicParams[1])
            self.kafka_admin.create_topics([
                new_topic,
            ])

        #return lambda : self.produceOutput(finTopicOut,Producer(self.connectParams))
        return finTopicOut, Producer(self.connectParams)
Пример #8
0
    def testTopic(self, Topic, topicParams=[1, 1]):
        if (not self.connectParams):
            print('Should setConnection first!')
            return False

        self.topics = self.kafka_admin.list_topics(
        ).topics  # refresh topic list
        if (Topic in self.topics):
            #print('TopicLog exists: {}'.format(TopicLog))
            pass
        else:
            print('Should create TopicLog')
            new_topic = admin.NewTopic(Topic, topicParams[0], topicParams[1])
            self.kafka_admin.create_topics([
                new_topic,
            ])
        return True
Пример #9
0
def create_topic(cfg: config.Config):
    """Ensure that the required Kafka topic exists."""
    # based on code in https://github.com/confluentinc/confluent-kafka-python
    client = admin.AdminClient(cfg.get_kafka_config())
    topic = admin.NewTopic(cfg.kafka_topic,
                           num_partitions=3,
                           replication_factor=1)
    topic_map = client.create_topics([topic])

    for (topic, future) in topic_map.items():
        try:
            future.result()
            logger.info('Topic %s created', topic)
        except Exception as err:
            if err.args[0].code() != kafka.KafkaError.TOPIC_ALREADY_EXISTS:
                raise
            logger.debug('Topic %s already exists', topic)
Пример #10
0
def test_topic_creation():
    """ This method recreates the test topics. """
    print("Recreate test topics.")
    topic_config = dict({"retention.ms": 3600000})  # store for one hour only
    res_dict = k_admin_client.create_topics(
        [kafka_admin.NewTopic(topic, num_partitions=1, replication_factor=1, config=topic_config)
         for topic in [KAFKA_TOPIC_IN_0, KAFKA_TOPIC_IN_1, KAFKA_TOPIC_OUT]])

    # Wait for each operation to finish.
    for topic, f in res_dict.items():
        try:
            f.result()  # The result itself is None
            print(f"Topic '{topic}' created")
        except Exception as e:
            print(f"Topic '{topic}' couldn't be created: {e}")
    k_admin_client.poll(3.1)  # small timeout for synchronizing

    topics = k_admin_client.list_topics(timeout=3.0).topics
    assert KAFKA_TOPIC_IN_0 in topics
    assert KAFKA_TOPIC_IN_1 in topics
    assert KAFKA_TOPIC_OUT in topics
Пример #11
0
def check_topic_existence(connection, topic):
    """ Check Topic Existence and Create it if needed"""

    chk_topic = connection.list_topics().topics

    if chk_topic.get(topic):
        print "\nINFO: %s topic Exists." % topic
    else:
        print "\nINFO: Creating the Topic %s" % topic
        # NewTopic specifies per-topic settings for passing to passed to AdminClient.create_topics().
        setTopic = admin.NewTopic(topic,
                                  num_partitions=1,
                                  replication_factor=1)
        fs = connection.create_topics([setTopic], request_timeout=10)

        for t, f in fs.items():
            try:
                f.result()  # The result itself is None
                print("Topic {} Created".format(t))
            except KafkaException as e:
                print("Falied to Create Topic {}: {}".format(t, e))
                sys.exit()
Пример #12
0
    def create_topics(self):

        topic_list = []

        for topic_name in self.conf.topics:

            topic = self.conf.topics[topic_name]
            print(topic)
            topic_list.append(
                kad.NewTopic(topic.name, topic.num_partitions,
                             topic.replication_factor))

        # new_topic = kad.NewTopic('topic100', 3, 2)
        # Number-of-partitions  = 1
        # Number-of-replicas    = 1

        a = self.admin.create_topics(
            topic_list)  # CREATE (a list(), so you can create multiple).

        # pprint.pprint(a)

        sleep(3)

        self.list_topics()
Пример #13
0
    def create_topics(self,
                      topics,
                      number_of_partitions=3,
                      replication_factor=1):
        new_topics = [
            admin.NewTopic(topic,
                           num_partitions=number_of_partitions,
                           replication_factor=replication_factor)
            for topic in topics
        ]
        # Call create_topics to asynchronously create topics, a dict
        # of <topic,future> is returned.
        fs = self.admin_client.create_topics(new_topics)

        # Wait for operation to finish.
        # Timeouts are preferably controlled by passing request_timeout=15.0
        # to the create_topics() call.
        # All futures will finish at the same time.
        for topic, f in fs.items():
            try:
                f.result()  # The result itself is None
                print("Topic {} created".format(topic))
            except Exception as e:
                print("Failed to create topic {}: {}".format(topic, e))
def verify_admin():
    """ Verify Admin API """

    a = admin.AdminClient({'bootstrap.servers': bootstrap_servers})
    our_topic = topic + '_admin_' + str(uuid.uuid4())
    num_partitions = 2

    topic_config = {"compression.type": "gzip"}

    #
    # First iteration: validate our_topic creation.
    # Second iteration: create topic.
    #
    for validate in (True, False):
        fs = a.create_topics([
            admin.NewTopic(our_topic,
                           num_partitions=num_partitions,
                           config=topic_config,
                           replication_factor=1)
        ],
                             validate_only=validate,
                             operation_timeout=10.0)

        for topic2, f in fs.items():
            f.result()  # trigger exception if there was an error

    #
    # Find the topic in list_topics
    #
    verify_topic_metadata(a, {our_topic: num_partitions})

    #
    # Increase the partition count
    #
    num_partitions += 3
    fs = a.create_partitions(
        [admin.NewPartitions(our_topic, new_total_count=num_partitions)],
        operation_timeout=10.0)

    for topic2, f in fs.items():
        f.result()  # trigger exception if there was an error

    #
    # Verify with list_topics.
    #
    verify_topic_metadata(a, {our_topic: num_partitions})

    def verify_config(expconfig, configs):
        """
        Verify that the config key,values in expconfig are found
        and matches the ConfigEntry in configs.
        """
        for key, expvalue in expconfig.items():
            entry = configs.get(key, None)
            assert entry is not None, "Config {} not found in returned configs".format(
                key)

            assert entry.value == str(expvalue), \
                "Config {} with value {} does not match expected value {}".format(key, entry, expvalue)

    #
    # Get current topic config
    #
    resource = admin.ConfigResource(admin.RESOURCE_TOPIC, our_topic)
    fs = a.describe_configs([resource])
    configs = fs[resource].result()  # will raise exception on failure

    # Verify config matches our expectations
    verify_config(topic_config, configs)

    #
    # Now change the config.
    #
    topic_config["file.delete.delay.ms"] = 12345
    topic_config["compression.type"] = "snappy"

    for key, value in topic_config.items():
        resource.set_config(key, value)

    fs = a.alter_configs([resource])
    fs[resource].result()  # will raise exception on failure

    #
    # Read the config back again and verify.
    #
    fs = a.describe_configs([resource])
    configs = fs[resource].result()  # will raise exception on failure

    # Verify config matches our expectations
    verify_config(topic_config, configs)

    #
    # Delete the topic
    #
    fs = a.delete_topics([our_topic])
    fs[our_topic].result()  # will raise exception on failure
    print("Topic {} marked for deletion".format(our_topic))
Пример #15
0
def to_stream_dir(kafka_client):
    topic_name = "MgC-Topic"
    new_topic = admin.NewTopic(topic_name, 1, 1)
    kafka_client.create_topics([new_topic])
    logging.info(f"Creating topic {topic_name}")
    return topic_name
Пример #16
0
    data = res.json()
    return data


with open(os.path.join(WORKING_DIRECTORY, 'city.list.json')) as json_file:
    cities = json.load(json_file)

chile = []
for index, city in enumerate(cities):
    if city["country"] == "CL":
        chile.append(city)

logging.info(f"Total cities {len(chile)}")

calls = 0
kafka_client = admin.AdminClient(KAFKA_CONFIG)
new_topic = admin.NewTopic("weather", 1, 1)
kafka_client.create_topics([new_topic])
producer = Producer(KAFKA_CONFIG)
while True:
    for index, city in enumerate(chile):
        calls += 1

        query = current_city(city['id'], api_key)
        if query["cod"] != 200:
            logging.error(f"Error in {index} {city}")
            break
        else:
            logging.info(f"calls: {calls} city: {city['name']}")
            producer.produce("weather", pickle.dumps(query))
def verify_admin():
    """ Verify Admin API """

    a = admin.AdminClient({'bootstrap.servers': bootstrap_servers})
    our_topic = topic + '_admin_' + str(uuid.uuid4())
    num_partitions = 2

    topic_config = {"compression.type": "gzip"}

    #
    # First iteration: validate our_topic creation.
    # Second iteration: create topic.
    #
    for validate in (True, False):
        fs = a.create_topics([
            admin.NewTopic(our_topic,
                           num_partitions=num_partitions,
                           config=topic_config,
                           replication_factor=1)
        ],
                             validate_only=validate,
                             operation_timeout=10.0)

        for topic2, f in fs.items():
            f.result()  # trigger exception if there was an error

    #
    # Find the topic in list_topics
    #
    verify_topic_metadata(a, {our_topic: num_partitions})

    #
    # Increase the partition count
    #
    num_partitions += 3
    fs = a.create_partitions(
        [admin.NewPartitions(our_topic, new_total_count=num_partitions)],
        operation_timeout=10.0)

    for topic2, f in fs.items():
        f.result()  # trigger exception if there was an error

    #
    # Verify with list_topics.
    #
    verify_topic_metadata(a, {our_topic: num_partitions})

    #
    # Verify with list_groups.
    #

    # Produce some messages
    p = confluent_kafka.Producer({"bootstrap.servers": bootstrap_servers})
    p.produce(our_topic, 'Hello Python!', headers=produce_headers)
    p.produce(our_topic, key='Just a key and headers', headers=produce_headers)

    def consume_messages(group_id):
        # Consume messages
        conf = {
            'bootstrap.servers': bootstrap_servers,
            'group.id': group_id,
            'session.timeout.ms': 6000,
            'enable.auto.commit': False,
            'on_commit': print_commit_result,
            'error_cb': error_cb,
            'auto.offset.reset': 'earliest',
            'enable.partition.eof': True
        }
        c = confluent_kafka.Consumer(conf)
        c.subscribe([our_topic])
        eof_reached = dict()
        while True:
            msg = c.poll()
            if msg is None:
                raise Exception(
                    'Got timeout from poll() without a timeout set: %s' % msg)

            if msg.error():
                if msg.error().code(
                ) == confluent_kafka.KafkaError._PARTITION_EOF:
                    print('Reached end of %s [%d] at offset %d' %
                          (msg.topic(), msg.partition(), msg.offset()))
                    eof_reached[(msg.topic(), msg.partition())] = True
                    if len(eof_reached) == len(c.assignment()):
                        print(
                            'EOF reached for all assigned partitions: exiting')
                        break
                else:
                    print('Consumer error: %s: ignoring' % msg.error())
                    break
            # Commit offset
            c.commit(msg, asynchronous=False)

    group1 = 'test-group-1'
    group2 = 'test-group-2'
    consume_messages(group1)
    consume_messages(group2)
    # list_groups without group argument
    groups = set(group.id for group in a.list_groups(timeout=10))
    assert group1 in groups, "Consumer group {} not found".format(group1)
    assert group2 in groups, "Consumer group {} not found".format(group2)
    # list_groups with group argument
    groups = set(group.id for group in a.list_groups(group1))
    assert group1 in groups, "Consumer group {} not found".format(group1)
    groups = set(group.id for group in a.list_groups(group2))
    assert group2 in groups, "Consumer group {} not found".format(group2)

    def verify_config(expconfig, configs):
        """
        Verify that the config key,values in expconfig are found
        and matches the ConfigEntry in configs.
        """
        for key, expvalue in expconfig.items():
            entry = configs.get(key, None)
            assert entry is not None, "Config {} not found in returned configs".format(
                key)

            assert entry.value == str(expvalue), \
                "Config {} with value {} does not match expected value {}".format(key, entry, expvalue)

    #
    # Get current topic config
    #
    resource = admin.ConfigResource(admin.RESOURCE_TOPIC, our_topic)
    fs = a.describe_configs([resource])
    configs = fs[resource].result()  # will raise exception on failure

    # Verify config matches our expectations
    verify_config(topic_config, configs)

    #
    # Now change the config.
    #
    topic_config["file.delete.delay.ms"] = 12345
    topic_config["compression.type"] = "snappy"

    for key, value in topic_config.items():
        resource.set_config(key, value)

    fs = a.alter_configs([resource])
    fs[resource].result()  # will raise exception on failure

    #
    # Read the config back again and verify.
    #
    fs = a.describe_configs([resource])
    configs = fs[resource].result()  # will raise exception on failure

    # Verify config matches our expectations
    verify_config(topic_config, configs)

    #
    # Delete the topic
    #
    fs = a.delete_topics([our_topic])
    fs[our_topic].result()  # will raise exception on failure
    print("Topic {} marked for deletion".format(our_topic))
Пример #18
0
    def enqueue(self, filename):
        """
        This method tries to post a message to the pre-defined kafka topic.
        :param filename:
        :return status False or True:
        """
        status = False

        if filename is None or len(filename) == 0:
            logging_to_console_and_syslog(
                "ConfluentKafkaMsgQAPI: filename is None or invalid")
            return status
        if self.producer_instance is None:
            logging_to_console_and_syslog(
                "KafkaMsgQAPI: Producer instance is None. Trying to create one.."
            )
            if not self.__producer_connect():
                logging_to_console_and_syslog(
                    "Unable to create producer instance.")
                return status

        if not self.is_topic_created:
            try:
                if self.producer_instance.list_topics(self.topic, timeout=1.0):
                    logging_to_console_and_syslog(
                        "Found topic name = {} in the zookeeper.".format(
                            self.topic))
                    self.is_topic_created = True
            except KafkaException:
                kafka_admin_client = admin.AdminClient(self.producer_conf)
                logging_to_console_and_syslog("Creating topic {}.".format(
                    self.topic))
                ret = kafka_admin_client.create_topics(new_topics=[
                    admin.NewTopic(topic=self.topic, num_partitions=1)
                ],
                                                       operation_timeout=1.0)
                logging_to_console_and_syslog("ret = {}".format(ret))

        # Asynchronously produce a message, the delivery report callback
        # will be triggered from poll() above, or flush() below, when the message has
        # been successfully delivered or failed permanently.
        logging_to_console_and_syslog(
            "ConfluentKafkaMsgQAPI: Posting filename={} into "
            "kafka broker={}, topic={}".format(filename, self.broker_name,
                                               self.topic))
        value = filename.encode('utf-8')
        try:
            # Produce line (without newline)
            self.producer_instance.produce(
                self.topic,
                value,
                callback=ConfluentKafkaMsgQAPI.delivery_callback)
            status = True
        except BufferError:
            sys.stderr.write('%% Local producer queue is full '
                             '(%d messages awaiting delivery): try again\n' %
                             len(self.producer_instance))
            status = False
        except:
            print("ConfluentKafkaMsgQAPI: Exception in user code:")
            print("-" * 60)
            traceback.print_exc(file=sys.stdout)
            print("-" * 60)
            status = False
        else:
            event = "ConfluentKafkaMsgQAPI: Posting filename={} into " \
                    "kafka broker={}, topic={}." \
                .format(filename,
                        self.broker_name,
                        self.topic)
            logging_to_console_and_syslog(event)
            # Wait for any outstanding messages to be delivered and delivery report
            # callbacks to be triggered.
            # Serve delivery callback queue.
            # NOTE: Since produce() is an asynchronous API this poll() call
            #       will most likely not serve the delivery callback for the
            #       last produce()d message.
            self.producer_instance.poll(timeout=0.1)
            # Wait until all messages have been delivered
            # sys.stderr.write('%% Waiting for %d deliveries\n' % len(self.producer_instance))
            self.producer_instance.flush(timeout=0.1)

            return status
Пример #19
0
import logging
import pickle

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s %(levelname)s %(name)s.%(funcName)s: %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S')

STREAM_PATH = "http://stream.pushshift.io/?type=submissions"
KAFKA_CONFIG = {"bootstrap.servers": "localhost:9092"}


def process_message(msg, topic, producer):
    logging.info(f"event: {msg.event} id: {msg.id}")
    data = msg.data
    json_data = json.loads(data)
    producer.produce(topic, pickle.dumps(json_data))


if __name__ == "__main__":
    kafka_client = admin.AdminClient(KAFKA_CONFIG)
    queue = SSEClient(STREAM_PATH)
    new_topic = admin.NewTopic("reddit", 1, 1)
    kafka_client.create_topics([new_topic])
    producer = Producer(KAFKA_CONFIG)
    for msg in queue:
        try:
            process_message(msg, "reddit", producer)
        except Exception as e:
            print(e)