def test_alter_configs_api():
    """ alter_configs() tests, these wont really do anything since there
        is no broker configured. """

    a = AdminClient({"socket.timeout.ms": 10})
    fs = a.alter_configs([ConfigResource(confluent_kafka.admin.RESOURCE_BROKER, "3",
                                         set_config={"some": "config"})])
    # ignore the result

    with pytest.raises(Exception):
        a.alter_configs(None)

    with pytest.raises(Exception):
        a.alter_configs("something")

    with pytest.raises(ValueError):
        a.alter_configs([])

    fs = a.alter_configs([ConfigResource("topic", "mytopic",
                                         set_config={"set": "this",
                                                     "and": "this"}),
                          ConfigResource(confluent_kafka.admin.RESOURCE_GROUP,
                                         "mygroup")],
                         request_timeout=0.123)

    with pytest.raises(KafkaException):
        for f in concurrent.futures.as_completed(iter(fs.values())):
            f.result(timeout=1)
 def create_topic(self, topic, conf):
     """ Create the topic if it doesn't already exist """
     admin = AdminClient(conf)
     fs = admin.create_topics([NewTopic(topic, num_partitions=2, replication_factor=3)])
     f = fs[topic]
     try:
         res = f.result()  # noqa unused variable
     except KafkaException as ex:
         if ex.args[0].code() == KafkaError.TOPIC_ALREADY_EXISTS:
             self.logger.info("Topic {} already exists: good".format(topic))
         else:
             raise
def test_basic_api():
    """ Basic API tests, these wont really do anything since there is no
        broker configured. """

    with pytest.raises(TypeError):
        a = AdminClient()

    a = AdminClient({"socket.timeout.ms": 10})

    a.poll(0.001)

    try:
        a.list_topics(timeout=0.2)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._TRANSPORT)
Пример #4
0
    except Exception as e:
        print("Consumer error: {}\n".format(e))
    c.close()


try:
    bs = argv[1]
    print('\nbootstrap server: {}'.format(bs))
    bootstrap_server = bs
except:
    # no bs X-D
    bootstrap_server = 'localhost:9092'
    print('⚠️  No bootstrap server defined, defaulting to {}\n'.format(
        bootstrap_server))

a = AdminClient({'bootstrap.servers': bootstrap_server})
try:
    md = a.list_topics(timeout=10)
    print("""
    Connected to bootstrap server(%s) and it returned metadata for brokers as follows:
    %s
        ---------------------
         This step just confirms that the bootstrap connection was successful. 
         For the consumer to work your client will also need to be able to resolve the broker(s) returned
            in the metadata above.
         If the host(s) shown are not accessible from where your client is running you need to change 
            your advertised.listener configuration on the Kafka broker(s).
    """ % (bootstrap_server, md.brokers))

    try:
        Produce(['foo / ' + datetime.now().strftime('%Y-%m-%d %H:%M:%S')])
Пример #5
0
class BatchedConsumerTest(TestCase):
    def _get_producer(self, topic):
        cluster_name = settings.KAFKA_TOPICS[topic]["cluster"]
        conf = {
            "bootstrap.servers": settings.KAFKA_CLUSTERS[cluster_name]["common"][
                "bootstrap.servers"
            ],
            "session.timeout.ms": 6000,
        }
        return Producer(conf)

    def setUp(self):
        super().setUp()
        self.events_topic = f"events-{uuid.uuid4().hex}"
        self.commit_log_topic = f"events-commit-{uuid.uuid4().hex}"
        self.override_settings_cm = override_settings(
            KAFKA_TOPICS={
                "events": {"cluster": "default", "topic": self.events_topic},
                "snuba-commit-log": {"cluster": "default", "topic": self.commit_log_topic},
            },
        )
        self.override_settings_cm.__enter__()

        cluster_options = kafka_config.get_kafka_admin_cluster_options(
            "default", {"allow.auto.create.topics": "true"}
        )
        self.admin_client = AdminClient(cluster_options)
        wait_for_topics(self.admin_client, [self.events_topic, self.commit_log_topic])

    def tearDown(self):
        super().tearDown()
        self.override_settings_cm.__exit__(None, None, None)
        self.admin_client.delete_topics([self.events_topic, self.commit_log_topic])

    @patch("sentry.eventstream.kafka.postprocessworker.dispatch_post_process_group_task")
    def test_post_process_forwarder_batch_consumer(self, dispatch_post_process_group_task):
        consumer_group = f"consumer-{uuid.uuid1().hex}"
        synchronize_commit_group = f"sync-consumer-{uuid.uuid1().hex}"

        events_producer = self._get_producer("events")
        commit_log_producer = self._get_producer("snuba-commit-log")
        message = json.dumps(kafka_message_payload()).encode()

        eventstream = KafkaEventStream()
        consumer = eventstream._build_consumer(
            consumer_group=consumer_group,
            commit_log_topic=self.commit_log_topic,
            synchronize_commit_group=synchronize_commit_group,
            commit_batch_size=1,
            initial_offset_reset="earliest",
        )

        # produce message to the events topic
        events_producer.produce(self.events_topic, message)
        assert events_producer.flush(5) == 0, "events producer did not successfully flush queue"

        # Move the committed offset forward for our synchronizing group.
        commit_log_producer.produce(
            self.commit_log_topic,
            key=f"{self.events_topic}:0:{synchronize_commit_group}".encode(),
            value=f"{1}".encode(),
        )
        assert (
            commit_log_producer.flush(5) == 0
        ), "snuba-commit-log producer did not successfully flush queue"

        # Run the loop for sometime
        for _ in range(3):
            consumer._run_once()
            time.sleep(1)

        # Verify that the task gets called once
        dispatch_post_process_group_task.assert_called_once_with(
            event_id="fe0ee9a2bc3b415497bad68aaf70dc7f",
            project_id=1,
            group_id=43,
            primary_hash="311ee66a5b8e697929804ceb1c456ffe",
            is_new=False,
            is_regression=None,
            is_new_group_environment=False,
        )
Пример #6
0
 def __init__(self, kafka_hosts: List[str]):
     self.config = {
         "bootstrap.servers": ",".join(kafka_hosts),
     }
     self.client = AdminClient(self.config)
from confluent_kafka.admin import AdminClient

client = AdminClient({"bootstrap.servers": "PLAINTEXT://127.0.0.1:9092"})


def topic_exists(topic):
    """Checks if the given topic exists in Kafka"""

    topic_metadata = client.list_topics(timeout=5)
    return topic in set(t.topic for t in iter(topic_metadata.topics.values()))


def contains_substring(to_test, substr):
    _before, match, _after = to_test.partition(substr)
    return len(match) > 0


def topic_pattern_match(pattern):
    """
        Takes a string `pattern`
        Returns `True` if one or more topic names contains substring `pattern`.
        Returns `False` if not.
    """
    topic_metadata = client.list_topics()
    topics = topic_metadata.topics
    filtered_topics = {
        key: value
        for key, value in topics.items() if contains_substring(key, pattern)
    }
    return len(filtered_topics) > 0
    def __init__(self, kafkaAddress, schemaRegistryAddress,
                 kafkaConnectAddress, credentialPath, testVersion, enableSSL):
        self.testVersion = testVersion
        self.credentialPath = credentialPath
        with open(self.credentialPath) as f:
            credentialJson = json.load(f)
            testHost = credentialJson["host"]
            testUser = credentialJson["user"]
            testDatabase = credentialJson["database"]
            testSchema = credentialJson["schema"]
            testWarehouse = credentialJson["warehouse"]
            pk = credentialJson["encrypted_private_key"]
            pk_passphrase = credentialJson["private_key_passphrase"]

        self.TEST_DATA_FOLDER = "./test_data/"
        self.httpHeader = {
            'Content-type': 'application/json',
            'Accept': 'application/json'
        }

        self.SEND_INTERVAL = 0.01  # send a record every 10 ms
        self.VERIFY_INTERVAL = 60  # verify every 60 secs
        self.MAX_RETRY = 120  # max wait time 120 mins
        self.MAX_FLUSH_BUFFER_SIZE = 5000  # flush buffer when 10000 data was in the queue

        self.kafkaConnectAddress = kafkaConnectAddress
        self.schemaRegistryAddress = schemaRegistryAddress
        self.kafkaAddress = kafkaAddress

        if enableSSL:
            print(datetime.now().strftime("\n%H:%M:%S "), "=== Enable SSL ===")
            self.client_config = {
                "bootstrap.servers": kafkaAddress,
                "security.protocol": "SASL_SSL",
                "ssl.ca.location": "./crts/ca-cert",
                "sasl.mechanism": "PLAIN",
                "sasl.username": "******",
                "sasl.password": "******"
            }
        else:
            self.client_config = {"bootstrap.servers": kafkaAddress}

        self.adminClient = AdminClient(self.client_config)
        self.producer = Producer(self.client_config)
        sc_config = self.client_config
        sc_config['schema.registry.url'] = schemaRegistryAddress
        self.avroProducer = AvroProducer(sc_config)

        reg = "[^\/]*snowflakecomputing"  # find the account name
        account = re.findall(reg, testHost)
        if len(account) != 1 or len(account[0]) < 20:
            print(
                datetime.now().strftime("%H:%M:%S "),
                "Format error in 'host' field at profile.json, expecting account.snowflakecomputing.com:443"
            )

        pkb = parsePrivateKey(pk, pk_passphrase)
        self.snowflake_conn = snowflake.connector.connect(
            user=testUser,
            private_key=pkb,
            account=account[0][:-19],
            warehouse=testWarehouse,
            database=testDatabase,
            schema=testSchema)
Пример #9
0
 def __init__(self):
     self.kafka_config = {
         'bootstrap.servers': svt.conf.get('kafka', 'bootstrap_servers')
     }
     self.admin = AdminClient(self.kafka_config)
Пример #10
0
            if config.name in config_dict.keys():
                if config_dict[config.name] != config.value:
                    must_alter = True

    if must_alter:
        fs = admin.alter_configs(
            [ConfigResource("TOPIC", topic, set_config=config_dict)])
        for res, f in fs.items():
            f.result()  # empty, but raises exception on failure
            print("%s configuration successfully altered" % (res))


if __name__ == "__main__":
    while True:
        admin = AdminClient({'bootstrap.servers': brokers})
        print_broker_info(admin, detailed_config=True)
        create_or_ensure_topic(admin, "games", 64, 1)
        ensure_topic_config(
            admin,
            "games",
            {
                "retention.ms": "3600000"  # 1 hour?
            })
        create_or_ensure_topic(admin, "complete_games", 64, 1)
        ensure_topic_config(
            admin,
            "complete_games",
            {
                "retention.ms": "3600000"  # 1 hour?
            })
class Producer:
    """Defines and provides common functionality amongst Producers"""

    # Tracks existing topics across all Producer instances
    existing_topics = set([])

    def __init__(
        self,
        topic_name,
        key_schema,
        value_schema=None,
        num_partitions=1,
        num_replicas=1,
    ):
        """Initializes a Producer object with basic settings"""
        self.topic_name = topic_name
        self.key_schema = key_schema
        self.value_schema = value_schema
        self.num_partitions = num_partitions
        self.num_replicas = num_replicas
        self.broker_properties = {
            "bootstrap.servers": BROKER_URL,
            "schema.registry.url": SCHEMA_REGISTRY_URL,
        }

        self.admin_client = AdminClient({"bootstrap.servers": BROKER_URL})

        # If the topic does not already exist, try to create it
        if self.topic_name not in Producer.existing_topics:
            self.create_topic()
            Producer.existing_topics.add(self.topic_name)

        # TODO: Configure the AvroProducer
        self.producer = AvroProducer(self.broker_properties,
                                     default_key_schema=self.key_schema,
                                     default_value_schema=self.value_schema)

    def create_topic(self):
        """Creates the producer topic if it does not already exist"""
        if self.topic_exists(self.topic_name):
            return

        client = AdminClient({"bootstrap.servers": BROKER_URL})
        futures = client.create_topics([
            NewTopic(topic=self.topic_name,
                     num_partitions=self.num_partitions,
                     replication_factor=self.num_replicas),
        ])

        for topic, future in futures.items():
            try:
                future.result()
                logger.info(f"{self.topic_name} is created")
            except Exception as e:
                logger.error(f"{self.topic_name} is not created: {e}")

    def topic_exists(self, topic_name):
        """Checks if the given topic exists"""
        topic_metadata = self.admin_client.list_topics(timeout=5)
        return topic_name in set(t.topic
                                 for t in iter(topic_metadata.topics.values()))

    def time_millis(self):
        return int(round(time.time() * 1000))

    def close(self):
        """Prepares the producer for exit by cleaning up the producer"""
        self.producer.flush(timeout=10)

    def time_millis(self):
        """Use this function to get the key for Kafka Events"""
        return int(round(time.time() * 1000))
Пример #12
0
def topic_exists(topic):
    """Checks if the given topic exists in Kafka"""
    client = AdminClient({"bootstrap.servers": "PLAINTEXT://localhost:9092"})
    topic_metadata = client.list_topics(timeout=5)
    return topics in topic_metadata.topics
Пример #13
0
    # Create Producer instance
    p = Producer({
           'bootstrap.servers': conf['bootstrap.servers'],
           'sasl.mechanisms': 'PLAIN',
           'security.protocol': 'SASL_SSL',
           'sasl.username': conf['sasl.username'],
           'sasl.password': conf['sasl.password']
    })

    # Create topic if needed
    # Examples of additional admin API functionality:
    # https://github.com/confluentinc/confluent-kafka-python/blob/master/examples/adminapi.py
    a = AdminClient({
           'bootstrap.servers': conf['bootstrap.servers'],
           'sasl.mechanisms': 'PLAIN',
           'security.protocol': 'SASL_SSL',
           'sasl.username': conf['sasl.username'],
           'sasl.password': conf['sasl.password']
    })
    fs = a.create_topics([NewTopic(
         topic,
         num_partitions=1,
         replication_factor=3
    )])
    for topic, f in fs.items():
        try:
            f.result()  # The result itself is None
            print("Topic {} created".format(topic))
        except Exception as e:
            # Continue if error code TOPIC_ALREADY_EXISTS, which may be true
            if e.args[0].code() != KafkaError.TOPIC_ALREADY_EXISTS:
def test_create_topics_api():
    """ create_topics() tests, these wont really do anything since there is no
        broker configured. """

    a = AdminClient({"socket.timeout.ms": 10})
    f = a.create_topics([NewTopic("mytopic", 3, 2)],
                        validate_only=True)
    # ignore the result

    with pytest.raises(Exception):
        a.create_topics(None)

    with pytest.raises(Exception):
        a.create_topics("mytopic")

    with pytest.raises(Exception):
        a.create_topics(["mytopic"])

    with pytest.raises(Exception):
        a.create_topics([None, "mytopic"])

    with pytest.raises(Exception):
        a.create_topics([None, NewTopic("mytopic", 1, 2)])

    fs = a.create_topics([NewTopic("mytopic", 3, 2)])
    with pytest.raises(KafkaException):
        for f in concurrent.futures.as_completed(iter(fs.values())):
            f.result(timeout=1)

    fs = a.create_topics([NewTopic("mytopic", 3,
                                   replica_assignment=[[10, 11], [0, 1, 2], [15, 20]],
                                   config={"some": "config"})])
    with pytest.raises(KafkaException):
        for f in concurrent.futures.as_completed(iter(fs.values())):
            f.result(timeout=1)

    fs = a.create_topics([NewTopic("mytopic", 3, 2),
                          NewTopic("othertopic", 1, 10),
                          NewTopic("third", 500, 1, config={"more": "config",
                                                            "anint": 13,
                                                            "config2": "val"})],
                         validate_only=True,
                         request_timeout=0.5,
                         operation_timeout=300.1)

    for f in concurrent.futures.as_completed(iter(fs.values())):
        e = f.exception(timeout=1)
        assert isinstance(e, KafkaException)
        assert e.args[0].code() == KafkaError._TIMED_OUT

    with pytest.raises(TypeError):
        a.create_topics([NewTopic("mytopic", 3, 2)],
                        validate_only="maybe")

    with pytest.raises(ValueError):
        a.create_topics([NewTopic("mytopic", 3, 2)],
                        validate_only=False,
                        request_timeout=-5)

    with pytest.raises(ValueError):
        a.create_topics([NewTopic("mytopic", 3, 2)],
                        operation_timeout=-4.12345678)

    with pytest.raises(TypeError):
        a.create_topics([NewTopic("mytopic", 3, 2)],
                        unknown_operation="it is")

    with pytest.raises(TypeError):
        a.create_topics([NewTopic("mytopic", 3, 2,
                                  config=["fails", "because not a dict"])])
def main():
    mnemonic = os.environ.get('MNEMONIC')
    if mnemonic == None:
        raise Exception('Environment MNEMONIC must be defined.')

    mnemonic = mnemonic.lower().strip()

    # get the command line arguments
    args = handle_arguments()

    # read JSON input data
    actions = read_json_input(args.actions)
    print(actions)

    cluster_config = read_json_input(args.clusterconfig)
    print(cluster_config)

    bootstrap = cluster_config[mnemonic][actions['environment']]

    ####################################################################################################
    # client options
    ####################################################################################################
    # admin_options = read_json_input(args.config)
    admin_options = read_json_input("config.json")
    admin_options.update(bootstrap)
    print(admin_options)

    consumer_options = admin_options.copy()
    consumer_options.update({'group.id': 'safe_delete'})

    producer_options = admin_options
    ####################################################################################################

    # read version from topic
    latest_applied_uid = get_latest_applied(consumer_options, UID_TOPIC_NAME,
                                            READ_TIMEOUT)

    # if not the first time, we check that we haven't applied this one yet
    if latest_applied_uid:
        if int(actions[JSON_INPUT_UID]) <= int(latest_applied_uid):
            print("Already done!")
            return False  # we do nothing if it's been applied already

    # TODO test the env (how to get the current env?)
    # required env received in --> actions[JSON_INPUT_ENV]

    # apply the actions
    a = AdminClient(admin_options)
    success, results = topics_recreate(
        a, actions[JSON_INPUT_COMMANDS][JSON_INPUT_RECREATE_TOPICS])

    print(results)

    # write uid in topic
    set_latest_applied(producer_options, UID_TOPIC_NAME,
                       str(actions[JSON_INPUT_UID]))

    if success:
        return True
    else:
        return False
Пример #16
0
    def run(self):
        logger.debug("Starting snuba query subscriber")
        self.offsets.clear()

        def on_assign(consumer, partitions):
            updated_partitions = []
            for partition in partitions:
                if self.resolve_partition_force_offset:
                    partition = self.resolve_partition_force_offset(partition)
                    updated_partitions.append(partition)

                if partition.offset == OFFSET_INVALID:
                    updated_offset = None
                else:
                    updated_offset = partition.offset
                self.offsets[partition.partition] = updated_offset
            if updated_partitions:
                self.consumer.assign(updated_partitions)
            logger.info(
                "query-subscription-consumer.on_assign",
                extra={
                    "offsets": str(self.offsets),
                    "partitions": str(partitions),
                },
            )

        def on_revoke(consumer, partitions):
            partition_numbers = [
                partition.partition for partition in partitions
            ]
            self.commit_offsets(partition_numbers)
            for partition_number in partition_numbers:
                self.offsets.pop(partition_number, None)
            logger.info(
                "query-subscription-consumer.on_revoke",
                extra={
                    "offsets": str(self.offsets),
                    "partitions": str(partitions),
                },
            )

        self.consumer = Consumer(self.cluster_options)
        if settings.KAFKA_CONSUMER_AUTO_CREATE_TOPICS:
            # This is required for confluent-kafka>=1.5.0, otherwise the topics will
            # not be automatically created.
            admin_client = AdminClient(self.admin_cluster_options)
            wait_for_topics(admin_client, [self.topic])

        self.consumer.subscribe([self.topic],
                                on_assign=on_assign,
                                on_revoke=on_revoke)

        try:
            i = 0
            while True:
                message = self.consumer.poll(0.1)
                if message is None:
                    continue

                error = message.error()
                if error is not None:
                    raise KafkaException(error)

                i = i + 1

                with sentry_sdk.start_transaction(
                        op="handle_message",
                        name="query_subscription_consumer_process_message",
                        sampled=True,
                ), metrics.timer("snuba_query_subscriber.handle_message"):
                    self.handle_message(message)

                # Track latest completed message here, for use in `shutdown` handler.
                self.offsets[message.partition()] = message.offset() + 1

                if i % self.commit_batch_size == 0:
                    logger.debug("Committing offsets")
                    self.commit_offsets()
        except KeyboardInterrupt:
            pass

        self.shutdown()
Пример #17
0
class Producer:
    """Defines and provides common functionality amongst Producers"""

    # Tracks existing topics across all Producer instances
    existing_topics = set([])

    def __init__(
        self,
        topic_name,
        key_schema,
        value_schema=None,
        num_partitions=1,
        num_replicas=1,
    ):
        """Initializes a Producer object with basic settings"""
        self.topic_name = topic_name
        self.key_schema = key_schema
        self.value_schema = value_schema
        self.num_partitions = num_partitions
        self.num_replicas = num_replicas

        # :: Configure the broker properties below. Reference the project README
        # and use the Host URL for Kafka and Schema Registry!
        self.broker_properties = {
            "bootstrap.servers": BROKER_URL,
            "schema.registry.url": SCHEMA_REGISTRY_URL
        }

        self.client = AdminClient({"bootstrap.servers": BROKER_URL})
        # If the topic does not already exist, try to create it
        if self.topic_name not in Producer.existing_topics:
            self.create_topic()
            Producer.existing_topics.add(self.topic_name)

        # :: Configure the AvroProducer
        self.producer = AvroProducer(self.broker_properties,
                                     default_key_schema=self.key_schema,
                                     default_value_schema=self.value_schema)

    def create_topic(self):
        """Creates the producer topic if it does not already exist"""

        # :: Code that creates the topic for this producer if it does not already exist on
        # the Kafka Broker.
        logger.info(
            "-------------- 1] Check if topic exists in kafka (Extra check)  -----------------"
        )
        #exists = False
        exists = self.topic_exists()
        logger.info(f"Topic {self.topic_name} exists: {exists}")
        #         client = AdminClient({"bootstrap.servers": BROKER_URL})
        #         topic_meta = client.list_topics(timeout = 50)
        #         print(str(topic_meta.topics))

        if exists is False:
            print(self.topic_name)
            logger.info("-------------- 2] Topic creation -----------------")
            futures = client.create_topics([
                NewTopic(topic=self.topic_name,
                         num_partitions=self.num_partitions,
                         replication_factor=self.num_replicas,
                         config={"compression.type": "lz4"})
            ])

            for topic, future in futures.items():
                try:
                    future.result()
                    print("topic created")
                except Exception as e:
                    print(f"failed to create topic {self.topic_name}: {e}")

    def topic_exists(self):
        #         print("--------------")
        #         client = AdminClient({'bootstrap.servers': self.broker_properties["bootstrap.servers"]})
        topic_metadata = self.client.list_topics()
        logger.info(str(topic_metadata))
        return topic_metadata.topics.get(self.topic_name) is not None

    def close(self):
        """Prepares the producer for exit by cleaning up the producer"""
        # :: Cleanup code for the Producer here
        if self.producer is not None:
            self.producer.flush()
        logger.info("Producer close - with cleanup")

    def time_millis(self):
        """Use this function to get the key for Kafka Events"""
        return int(round(time.time() * 1000))
def test_create_partitions_api():
    """ create_partitions() tests, these wont really do anything since there
        is no broker configured. """

    a = AdminClient({"socket.timeout.ms": 10})
    fs = a.create_partitions([NewPartitions("mytopic", 50)])
    # ignore the result

    with pytest.raises(TypeError):
        a.create_partitions(None)

    with pytest.raises(Exception):
        a.create_partitions("mytopic")

    with pytest.raises(Exception):
        a.create_partitions([])

    with pytest.raises(Exception):
        a.create_partitions([None, NewPartitions("mytopic", 2)])

    fs = a.create_partitions([NewPartitions("mytopic", 100),
                              NewPartitions("other", 3,
                                            replica_assignment=[[10, 11], [15, 20]])])
    with pytest.raises(KafkaException):
        for f in concurrent.futures.as_completed(iter(fs.values())):
            f.result(timeout=1)

    fs = a.create_partitions([NewPartitions("mytopic", 2),
                              NewPartitions("othertopic", 10),
                              NewPartitions("third", 55,
                                            replica_assignment=[[1, 2, 3, 4, 5, 6, 7], [2]])],
                             validate_only=True,
                             request_timeout=0.5,
                             operation_timeout=300.1)

    for f in concurrent.futures.as_completed(iter(fs.values())):
        e = f.exception(timeout=1)
        assert isinstance(e, KafkaException)
        assert e.args[0].code() == KafkaError._TIMED_OUT
Пример #19
0
 def create_topic(self):
     """Creates the producer topic if it does not already exist"""
     AdminClient(self.broker_properties).create_topics(
         [NewTopic(self.topic_name, num_partitions=1)])
class KafkaTest:
    def __init__(self, kafkaAddress, schemaRegistryAddress,
                 kafkaConnectAddress, credentialPath, testVersion, enableSSL):
        self.testVersion = testVersion
        self.credentialPath = credentialPath
        with open(self.credentialPath) as f:
            credentialJson = json.load(f)
            testHost = credentialJson["host"]
            testUser = credentialJson["user"]
            testDatabase = credentialJson["database"]
            testSchema = credentialJson["schema"]
            testWarehouse = credentialJson["warehouse"]
            pk = credentialJson["encrypted_private_key"]
            pk_passphrase = credentialJson["private_key_passphrase"]

        self.TEST_DATA_FOLDER = "./test_data/"
        self.httpHeader = {
            'Content-type': 'application/json',
            'Accept': 'application/json'
        }

        self.SEND_INTERVAL = 0.01  # send a record every 10 ms
        self.VERIFY_INTERVAL = 60  # verify every 60 secs
        self.MAX_RETRY = 120  # max wait time 120 mins
        self.MAX_FLUSH_BUFFER_SIZE = 5000  # flush buffer when 10000 data was in the queue

        self.kafkaConnectAddress = kafkaConnectAddress
        self.schemaRegistryAddress = schemaRegistryAddress
        self.kafkaAddress = kafkaAddress

        if enableSSL:
            print(datetime.now().strftime("\n%H:%M:%S "), "=== Enable SSL ===")
            self.client_config = {
                "bootstrap.servers": kafkaAddress,
                "security.protocol": "SASL_SSL",
                "ssl.ca.location": "./crts/ca-cert",
                "sasl.mechanism": "PLAIN",
                "sasl.username": "******",
                "sasl.password": "******"
            }
        else:
            self.client_config = {"bootstrap.servers": kafkaAddress}

        self.adminClient = AdminClient(self.client_config)
        self.producer = Producer(self.client_config)
        sc_config = self.client_config
        sc_config['schema.registry.url'] = schemaRegistryAddress
        self.avroProducer = AvroProducer(sc_config)

        reg = "[^\/]*snowflakecomputing"  # find the account name
        account = re.findall(reg, testHost)
        if len(account) != 1 or len(account[0]) < 20:
            print(
                datetime.now().strftime("%H:%M:%S "),
                "Format error in 'host' field at profile.json, expecting account.snowflakecomputing.com:443"
            )

        pkb = parsePrivateKey(pk, pk_passphrase)
        self.snowflake_conn = snowflake.connector.connect(
            user=testUser,
            private_key=pkb,
            account=account[0][:-19],
            warehouse=testWarehouse,
            database=testDatabase,
            schema=testSchema)

    def msgSendInterval(self):
        # sleep self.SEND_INTERVAL before send the second message
        sleep(self.SEND_INTERVAL)

    def startConnectorWaitTime(self):
        sleep(10)

    def verifyWaitTime(self):
        # sleep two minutes before verify result in SF DB
        print(datetime.now().strftime("\n%H:%M:%S "),
              "=== Sleep {} secs before verify result in Snowflake DB ===".
              format(self.VERIFY_INTERVAL),
              flush=True)
        sleep(self.VERIFY_INTERVAL)

    def verifyWithRetry(self, func, round):
        retryNum = 0
        while retryNum < self.MAX_RETRY:
            try:
                func(round)
                break
            except test_suit.test_utils.ResetAndRetry:
                retryNum = 0
                print(datetime.now().strftime("%H:%M:%S "),
                      "=== Reset retry count and retry ===",
                      flush=True)
            except test_suit.test_utils.RetryableError as e:
                retryNum += 1
                print(datetime.now().strftime("%H:%M:%S "),
                      "=== Failed, retryable. {}===".format(e.msg),
                      flush=True)
                self.verifyWaitTime()
            except test_suit.test_utils.NonRetryableError as e:
                print(datetime.now().strftime("\n%H:%M:%S "),
                      "=== Non retryable error raised ===\n{}".format(e.msg),
                      flush=True)
                raise test_suit.test_utils.NonRetryableError()
            except snowflake.connector.errors.ProgrammingError as e:
                if e.errno == 2003:
                    retryNum += 1
                    print(datetime.now().strftime("%H:%M:%S "),
                          "=== Failed, table not created ===",
                          flush=True)
                    self.verifyWaitTime()
                else:
                    raise
        if retryNum == self.MAX_RETRY:
            print(datetime.now().strftime("\n%H:%M:%S "),
                  "=== Max retry exceeded ===",
                  flush=True)
            raise test_suit.test_utils.NonRetryableError()

    def createTopics(self, topicName, partitionNum=1, replicationNum=1):
        self.adminClient.create_topics(
            [NewTopic(topicName, partitionNum, replicationNum)])

    def sendBytesData(self, topic, value, key=[], partition=0, headers=[]):
        if len(key) == 0:
            for i, v in enumerate(value):
                self.producer.produce(topic,
                                      value=v,
                                      partition=partition,
                                      headers=headers)
                if (i + 1) % self.MAX_FLUSH_BUFFER_SIZE == 0:
                    self.producer.flush()
        else:
            for i, (k, v) in enumerate(zip(key, value)):
                self.producer.produce(topic,
                                      value=v,
                                      key=k,
                                      partition=partition,
                                      headers=headers)
                if (i + 1) % self.MAX_FLUSH_BUFFER_SIZE == 0:
                    self.producer.flush()
        self.producer.flush()

    def sendAvroSRData(self,
                       topic,
                       value,
                       value_schema,
                       key=[],
                       key_schema="",
                       partition=0):
        if len(key) == 0:
            for i, v in enumerate(value):
                self.avroProducer.produce(topic=topic,
                                          value=v,
                                          value_schema=value_schema,
                                          partition=partition)
                if (i + 1) % self.MAX_FLUSH_BUFFER_SIZE == 0:
                    self.producer.flush()
        else:
            for i, (k, v) in enumerate(zip(key, value)):
                self.avroProducer.produce(topic=topic,
                                          value=v,
                                          value_schema=value_schema,
                                          key=k,
                                          key_schema=key_schema,
                                          partition=partition)
                if (i + 1) % self.MAX_FLUSH_BUFFER_SIZE == 0:
                    self.producer.flush()
        self.avroProducer.flush()

    def cleanTableStagePipe(self,
                            connectorName,
                            topicName="",
                            partitionNumber=1):
        if topicName == "":
            topicName = connectorName
        tableName = topicName
        stageName = "SNOWFLAKE_KAFKA_CONNECTOR_{}_STAGE_{}".format(
            connectorName, topicName)

        print(datetime.now().strftime("\n%H:%M:%S "),
              "=== Drop table {} ===".format(tableName))
        self.snowflake_conn.cursor().execute(
            "DROP table IF EXISTS {}".format(tableName))

        print(datetime.now().strftime("%H:%M:%S "),
              "=== Drop stage {} ===".format(stageName))
        self.snowflake_conn.cursor().execute(
            "DROP stage IF EXISTS {}".format(stageName))

        for p in range(partitionNumber):
            pipeName = "SNOWFLAKE_KAFKA_CONNECTOR_{}_PIPE_{}_{}".format(
                connectorName, topicName, p)
            print(datetime.now().strftime("%H:%M:%S "),
                  "=== Drop pipe {} ===".format(pipeName))
            self.snowflake_conn.cursor().execute(
                "DROP pipe IF EXISTS {}".format(pipeName))

        print(datetime.now().strftime("%H:%M:%S "), "=== Done ===", flush=True)

    def verifyStageIsCleaned(self, connectorName, topicName=""):
        if topicName == "":
            topicName = connectorName
        stageName = "SNOWFLAKE_KAFKA_CONNECTOR_{}_STAGE_{}".format(
            connectorName, topicName)

        res = self.snowflake_conn.cursor().execute(
            "list @{}".format(stageName)).fetchone()
        if res is not None:
            raise RetryableError("stage not cleaned up ")

    # validate content match gold regex
    def regexMatchOneLine(self, res, goldMetaRegex, goldContentRegex):
        meta = res[0].replace(" ", "").replace("\n", "")
        content = res[1].replace(" ", "").replace("\n", "")
        goldMetaRegex = "^" + goldMetaRegex.replace("\"", "\\\"").replace("{", "\\{").replace("}", "\\}") \
            .replace("[", "\\[").replace("]", "\\]").replace("+", "\\+") + "$"
        goldContentRegex = "^" + goldContentRegex.replace("\"", "\\\"").replace("{", "\\{").replace("}", "\\}") \
            .replace("[", "\\[").replace("]", "\\]").replace("+", "\\+") + "$"
        if re.search(goldMetaRegex, meta) is None:
            raise test_suit.test_utils.NonRetryableError(
                "Record meta data:\n{}\ndoes not match gold regex "
                "label:\n{}".format(meta, goldMetaRegex))
        if re.search(goldContentRegex, content) is None:
            raise test_suit.test_utils.NonRetryableError(
                "Record content:\n{}\ndoes not match gold regex "
                "label:\n{}".format(content, goldContentRegex))

    def updateConnectorConfig(self, fileName, connectorName, configMap):
        with open('./rest_request_generated/' + fileName + '.json') as f:
            c = json.load(f)
            config = c['config']
            for k in configMap:
                config[k] = configMap[k]
        requestURL = "http://{}/connectors/{}/config".format(
            self.kafkaConnectAddress, connectorName)
        r = requests.put(requestURL, json=config, headers=self.httpHeader)
        print(datetime.now().strftime("%H:%M:%S "), r,
              " updated connector config")

    def restartConnector(self, connectorName):
        requestURL = "http://{}/connectors/{}/restart".format(
            self.kafkaConnectAddress, connectorName)
        r = requests.post(requestURL, headers=self.httpHeader)
        print(datetime.now().strftime("%H:%M:%S "), r, " restart connector")

    def pauseConnector(self, connectorName):
        requestURL = "http://{}/connectors/{}/pause".format(
            self.kafkaConnectAddress, connectorName)
        r = requests.put(requestURL, headers=self.httpHeader)
        print(datetime.now().strftime("%H:%M:%S "), r, " pause connector")

    def resumeConnector(self, connectorName):
        requestURL = "http://{}/connectors/{}/resume".format(
            self.kafkaConnectAddress, connectorName)
        r = requests.put(requestURL, headers=self.httpHeader)
        print(datetime.now().strftime("%H:%M:%S "), r, " resume connector")

    def deleteConnector(self, connectorName):
        requestURL = "http://{}/connectors/{}".format(self.kafkaConnectAddress,
                                                      connectorName)
        r = requests.delete(requestURL, headers=self.httpHeader)
        print(datetime.now().strftime("%H:%M:%S "), r, " delete connector")

    def closeConnector(self, fileName, nameSalt):
        snowflake_connector_name = fileName.split(".")[0] + nameSalt
        delete_url = "http://{}/connectors/{}".format(
            self.kafkaConnectAddress, snowflake_connector_name)
        print(datetime.now().strftime("\n%H:%M:%S "),
              "=== Delete connector {} ===".format(snowflake_connector_name))
        code = requests.delete(delete_url, timeout=10).status_code
        print(datetime.now().strftime("%H:%M:%S "), code)

    def createConnector(self, fileName, nameSalt):
        rest_template_path = "./rest_request_template"
        rest_generate_path = "./rest_request_generated"

        with open(self.credentialPath) as f:
            credentialJson = json.load(f)
            testHost = credentialJson["host"]
            testUser = credentialJson["user"]
            testDatabase = credentialJson["database"]
            testSchema = credentialJson["schema"]
            pk = credentialJson["private_key"]

        print(
            datetime.now().strftime("\n%H:%M:%S "),
            "=== generate sink connector rest reqeuest from {} ===".format(
                rest_template_path))
        if not os.path.exists(rest_generate_path):
            os.makedirs(rest_generate_path)
        snowflake_connector_name = fileName.split(".")[0] + nameSalt

        print(
            datetime.now().strftime("\n%H:%M:%S "),
            "=== Connector Config JSON: {}, Connector Name: {} ===".format(
                fileName, snowflake_connector_name))
        with open("{}/{}".format(rest_template_path, fileName), 'r') as f:
            config = f.read() \
                .replace("SNOWFLAKE_PRIVATE_KEY", pk) \
                .replace("SNOWFLAKE_HOST", testHost) \
                .replace("SNOWFLAKE_USER", testUser) \
                .replace("SNOWFLAKE_DATABASE", testDatabase) \
                .replace("SNOWFLAKE_SCHEMA", testSchema) \
                .replace("CONFLUENT_SCHEMA_REGISTRY", self.schemaRegistryAddress) \
                .replace("SNOWFLAKE_TEST_TOPIC", snowflake_connector_name) \
                .replace("SNOWFLAKE_CONNECTOR_NAME", snowflake_connector_name)
            with open("{}/{}".format(rest_generate_path, fileName), 'w') as fw:
                fw.write(config)

        MAX_RETRY = 20
        retry = 0
        delete_url = "http://{}/connectors/{}".format(
            self.kafkaConnectAddress, snowflake_connector_name)
        post_url = "http://{}/connectors".format(self.kafkaConnectAddress)
        while retry < MAX_RETRY:
            try:
                code = requests.delete(delete_url, timeout=10).status_code
                if code == 404 or code == 200 or code == 201:
                    break
            except:
                pass
            print(
                datetime.now().strftime("\n%H:%M:%S "),
                "=== sleep for 30 secs to wait for kafka connect to accept connection ==="
            )
            sleep(30)
            retry += 1
        if retry == MAX_RETRY:
            errorExit(
                "\n=== max retry exceeded, kafka connect not ready in 10 mins ==="
            )

        r = requests.post(post_url,
                          json=json.loads(config),
                          headers=self.httpHeader)
        print(datetime.now().strftime("%H:%M:%S "),
              json.loads(r.content.decode("utf-8"))["name"], r.status_code)
 def _create_admin_client(self):
     connection_config = {'bootstrap.servers': self.__kafka_hosts}
     client = AdminClient(connection_config)
     return client
 def test_scrape_metadata_on_connection_exception_should_re_raise(self):
     test_config = {'bootstrap.servers': self.__HOST}
     admin_client = AdminClient(test_config)
     scraper = MetadataScraper(self.__HOST, admin_client)
     self.assertRaises(Exception, scraper.get_metadata)
Пример #23
0
        if check and arr[0] >= arr[1] // 2:
            fw.write(f"{line}")

        line = f.readline()

    f.close()
    fw.close()

consumer = Consumer({
    'bootstrap.servers': config['DEFAULT']['KafkaServer'],
    'group.id': 'mygroup',
    'client.id': 'client-1',
    'enable.auto.commit': True,
    'session.timeout.ms': 6000,
    'default.topic.config': {
        'auto.offset.reset': 'smallest'
    },
})
admin_client = AdminClient(
    {'bootstrap.servers': config["DEFAULT"]["KafkaServer"]})
clusterMetaData = consumer.list_topics()

# gather topic name
topics_delete = []
for key in clusterMetaData.topics:
    if topic_header in key:
        topics_delete.append(key)

# delete finished topics
admin_client.delete_topics(topics_delete)
Пример #24
0
class ConfluentAdminClient:

    def __init__(self, kafka_hosts: List[str]):
        self.config = {
            "bootstrap.servers": ",".join(kafka_hosts),
        }
        self.client = AdminClient(self.config)

    @property
    def consumer_config(self):
        return {
            **self.config,
            "max.poll.interval.ms": 10000,
            "auto.offset.reset": "smallest",
            "enable.auto.commit": False,
        }

    @staticmethod
    def _group_offsets(
            offsets: List[Offset]) -> Dict[str, List[Offset]]:
        d: DefaultDict[str, List[Offset]] = defaultdict(list)
        for offset in offsets:
            d[offset.consumer_group].append(offset)
        return d

    @staticmethod
    def _get_offsets(consumer_group, partitions, config,
                     timeout=CONSUMER_OFFSET_TIMEOUT) -> List[Offset]:
        offsets = []
        consumer = ck.Consumer({**config, 'group.id': consumer_group})
        for tp in consumer.committed(partitions, timeout=timeout):
            if tp.offset == -1001:
                continue
            offset = Offset(consumer_group, tp.topic,
                            tp.partition, tp.offset)
            offsets.append(offset)
        consumer.close()
        return offsets

    def _threaded_get_offsets(self, partitions, consumer_groups,
                              no_of_threads) -> List[Offset]:
        offsets: List[Offset] = []
        with ThreadPoolExecutor(max_workers=no_of_threads) as executor:
            futures = {executor.submit(ConfluentAdminClient._get_offsets,
                                       cg, partitions, self.config): cg
                       for cg in consumer_groups}
            for future in as_completed(futures):
                cg = futures[future]
                try:
                    _offsets = future.result()
                except Exception as exc:
                    msg = f'Encountered error when reading comsumer offset ' \
                          f'for consumer group: {cg}'
                    raise ConsumerOffsetError(msg) from exc
                offsets.extend(_offsets)
        return offsets

    def get_consumer_groups(self) -> List[str]:
        consumer_groups = self.client.list_groups()
        return [g.id for g in consumer_groups]

    def get_consumer_offsets(
        self, topics: List[str], ignore_group_regex: str = IGNORE_GROUP_REGEX,
        no_of_threads: int = 1
            ) -> List[Offset]:
        broker_topics = self.client.list_topics().topics
        partitions = []
        for topic_name in topics:
            partitions.extend([ck.TopicPartition(topic_name, k)
                               for k in broker_topics[topic_name].partitions])
        consumer_groups = []
        logger.info('Fetch consumer groups from broker')
        for consumer_group in self.get_consumer_groups():
            if re.findall(ignore_group_regex, consumer_group):
                logger.debug(f'Ignoring consumer group: {consumer_group}')
                continue
            consumer_groups.append(consumer_group)
        logger.info(f'Fetch consumer offsets for {len(consumer_groups)} '
                    'consumer groups')
        if no_of_threads == 1:
            offsets: List[Offset] = []
            for cg in consumer_groups:
                _offsets = ConfluentAdminClient._get_offsets(
                    cg, partitions, self.consumer_config,)
                offsets.extend(_offsets)
            return offsets
        return self._threaded_get_offsets(partitions, consumer_groups,
                                          no_of_threads)

    def set_consumer_offsets(self, offsets: List[Offset]):
        grouped_offsets = ConfluentAdminClient._group_offsets(
            offsets)
        for consumer_group, _offsets in grouped_offsets.items():
            consumer = ck.Consumer({**self.consumer_config,
                                    'group.id': consumer_group})
            tps = [ck.TopicPartition(o.topic, o.partition, o.value)
                   for o in _offsets]
            logger.info(f'Set {len(tps)} offsets for consumer '
                        f'group: {consumer_group}')
            consumer.commit(offsets=tps, asynchronous=False)
class Producer:
    """Defines and provides common functionality amongst Producers"""

    # Tracks existing topics across all Producer instances
    existing_topics = set([])

    def __init__(
        self,
        topic_name,
        key_schema,
        value_schema=None,
        num_partitions=1,
        num_replicas=1,
    ):
        """Initializes a Producer object with basic settings"""
        self.topic_name = topic_name
        self.key_schema = key_schema
        self.value_schema = value_schema
        self.num_partitions = num_partitions
        self.num_replicas = num_replicas

        #
        #
        # TODO: Configure the broker properties below. Make sure to reference the project README
        # and use the Host URL for Kafka and Schema Registry!
        #
        #
        self.broker_properties = {
            "bootstrap.servers": BROKER_URL,
            "schema.registry.url": SCHEMA_REGISTRY_URL
        }

        self.admin_client = AdminClient({"bootstrap.servers": BROKER_URL})

        # If the topic does not already exist, try to create it
        if self.topic_name not in Producer.existing_topics:
            self.create_topic()
            Producer.existing_topics.add(self.topic_name)

        # TODO: Configure the AvroProducer
        self.producer = AvroProducer(self.broker_properties,
                                     default_key_schema=self.key_schema,
                                     default_value_schema=selft.value_schema)

    def topic_exists(self):
        """Checks if the given topic exists"""
        topic_metadata = self.admin_client.list_topics(timeout=5)
        return self.topic_name in set(
            t.topic for t in iter(topic_metadata.topics.values()))

    def create_topic(self):
        """Creates the producer topic if it does not already exist"""
        #
        #
        # TODO: Write code that creates the topic for this producer if it does not already exist on
        # the Kafka Broker.
        #
        #
        if self.topic_exists():
            logger.info(
                "topic creation kafka integration incomplete - skipping")
            return

        futures = self.admin_client.create_topics([
            NewTopic(topic=self.topic_name,
                     num_partitions=self.num_partitions,
                     replication_factor=self.num_replicas),
        ])
        logger.info("topic creation kafka integration complete")

    def time_millis(self):
        return int(round(time.time() * 1000))

    def close(self):
        """Prepares the producer for exit by cleaning up the producer"""
        #
        #
        # TODO: Write cleanup code for the Producer here
        #
        #
        logger.info("producer close incomplete - skipping")
Пример #26
0
import click

from confluent_kafka.admin import AdminClient, NewTopic
admin_client = AdminClient({"bootstrap.servers": "localhost:9092"})


@click.command()
@click.argument("topic_name")
@click.option("--partitions", default=1)
@click.option("--replication", default=1)
def main(topic_name, partitions, replication):
    print(f"creating topic -- {topic_name}")
    topic_list = []
    topic_list.append(NewTopic(topic_name, partitions, replication))
    admin_client.create_topics(topic_list)


if __name__ == "__main__":
    main()
class TestImport(unittest.TestCase):
    def setUp(self):
        self.broker = "kafka:9092"
        # self.broker = "localhost:9093"
        self.admin_client = AdminClient({'bootstrap.servers': self.broker})

    def test_000_it(self):
        import py_elastic_int_testing
        assert_that(py_elastic_int_testing, hamcrest.not_none())

    def test_001_list_topics(self):
        # wait 90 seconds until broker is up

        result = self.admin_client.list_topics(timeout=90)

        assert_that(result.orig_broker_name, equal_to("%s/1" % self.broker))

    def test_002_test_create_topic(self):

        new_topics = [NewTopic("testrun", 1, 1)]

        result_futures = self.admin_client.create_topics(new_topics,
                                                         request_timeout=15.0)

        for topic, f in result_futures.items():
            try:
                f.result()  # The result itself is None
                self.assertTrue(True)
            except Exception as e:
                assert_that(str(e), contains_string("already exists."))

    def test_003_create_topic_with_config(self):
        config = {
            "delete.retention.ms": 3600,
            "retention.bytes": 10000,
            "retention.ms": 3600,
        }
        new_topics = [NewTopic("test_config", 1, 1, config=config)]

        result_futures = self.admin_client.create_topics(new_topics=new_topics)

        for topic, f in result_futures.items():
            try:
                f.result()  # The result itself is None
                self.assertTrue(True)
            except Exception as e:
                assert_that(str(e), contains_string("already exists."))

    def test_get_config(self):
        config_resource = [ConfigResource(RESOURCE_TOPIC, "test_config")]
        futures = self.admin_client.describe_configs(config_resource)
        for res, f in futures.items():
            try:
                configs = f.result()

                assert_that(configs["retention.ms"].value, equal_to("3600"))
                assert_that(configs["retention.bytes"].value, equal_to("10000"))
                assert_that(configs["delete.retention.ms"].value,
                            equal_to("3600"))

            except KafkaException as e:
                raise
            except Exception:
                raise

    def test_delete_topics(self):
        topics = ["testrun", "testrun2"]
        futures = self.admin_client.delete_topics(topics, operation_timeout=30)

        for topic, f in futures.items():
            result = f.result()
            self.assertIsNone(result)
Пример #28
0
def launch(api_key, port, data_directory=None, topic='announce'):

    logging.basicConfig(level=logging.DEBUG)

    # Initialize the database
    if data_directory is None:
        data_directory = os.getcwd()
    db = tinydb.TinyDB(os.path.join(data_directory, 'run_db.json'))

    logging.info('Constructing local consumer')
    consumer = Consumer({
        'bootstrap.servers': 'localhost:' + str(port),
        'group.id': 0,
        'auto.offset.reset': 'earliest',
        'sasl.username': '******',
        'sasl.password': api_key,
        'security.protocol': 'sasl_plaintext',
        'sasl.mechanism': 'PLAIN',
    })

    adm_client = AdminClient({
        'bootstrap.servers': 'localhost:' + str(port),
        'group.id': 0,
        'auto.offset.reset': 'earliest',
        'sasl.username': '******',
        'sasl.password': api_key,
        'security.protocol': 'sasl_plaintext',
        'sasl.mechanism': 'PLAIN',
    })

    # Clean up the Kafka board
    try:
        results = adm_client.delete_topics(
            list(consumer.list_topics().topics.keys()))
        for v in results.values():
            v.result()
    except ValueError:
        pass

    # Create the announce topic
    try:
        logging.info('Setting up announce topic')
        tp_future = adm_client.create_topics([NewTopic('announce', 1, 1)])
        tp_future['announce'].result()  # Wait for the future
        logging.info('Topic created!')
    except KafkaException as ex:
        logging.warning(ex)

    logging.info('Connecting to topic: %s', topic)
    consumer.subscribe([topic])

    # Main consumer loop
    while True:
        msg = consumer.poll(0.1)

        # Validate the message is good
        if msg is None:
            continue
        if msg.error():
            logging.error('Topic Consumer Error: %s', msg.error())
            continue
        logging.info('Processing Message')
        process_message(msg.value(), db, data_directory, api_key, port,
                        adm_client)
 def setUp(self):
     self.broker = "kafka:9092"
     # self.broker = "localhost:9093"
     self.admin_client = AdminClient({'bootstrap.servers': self.broker})
Пример #30
0
            ws.close()


def onClose(ws):
    print("connection Closed")


# In[7]:

if __name__ == "__main__":
    #creating producer log file
    logging.basicConfig(filename="producer_log.log", level=logging.INFO)

    #unit test to check if zookeeper an kafka are running properly
    conf = {'bootstrap.servers': 'localhost:9092'}
    admin_client = AdminClient(conf)
    topics = admin_client.list_topics().topics
    if not topics:
        logging.error(RuntimeError())
    else:
        logging.info(topics)
    future = []
    #websocket for data stream
    startTime = time.time()
    socket = "wss://data.alpaca.markets/stream"
    ws = websocket.WebSocketApp(socket,
                                on_open=onOpen,
                                on_close=onClose,
                                on_message=onMsg)
    try:
        producer = KafkaProducer(bootstrap_servers=" localhost:9092")
Пример #31
0
def bootstrap(
    *,
    bootstrap_server: Sequence[str],
    kafka: bool,
    migrate: bool,
    force: bool,
    log_level: Optional[str] = None,
) -> None:
    """
    Warning: Not intended to be used in production yet.
    """
    if not force:
        raise click.ClickException("Must use --force to run")

    setup_logging(log_level)

    logger = logging.getLogger("snuba.bootstrap")

    import time

    if kafka:
        logger.debug("Using Kafka with %r", bootstrap_server)
        from confluent_kafka.admin import AdminClient, NewTopic

        attempts = 0
        while True:
            try:
                logger.debug("Attempting to connect to Kafka (attempt %d)", attempts)
                client = AdminClient(
                    get_default_kafka_configuration(
                        bootstrap_servers=bootstrap_server,
                        override_params={"socket.timeout.ms": 1000},
                    )
                )
                client.list_topics(timeout=1)
                break
            except Exception as e:
                logger.error(
                    "Connection to Kafka failed (attempt %d)", attempts, exc_info=e
                )
                attempts += 1
                if attempts == 60:
                    raise
                time.sleep(1)

        topics = {}
        for name in ACTIVE_DATASET_NAMES:
            dataset = get_dataset(name)
            for entity in dataset.get_all_entities():
                writable_storage = entity.get_writable_storage()
                if writable_storage:
                    table_writer = writable_storage.get_table_writer()
                    stream_loader = table_writer.get_stream_loader()
                    for topic_spec in stream_loader.get_all_topic_specs():
                        if topic_spec.topic_name in topics:
                            continue
                        logger.debug(
                            "Adding topic %s to creation list", topic_spec.topic_name
                        )
                        topics[topic_spec.topic_name] = NewTopic(
                            topic_spec.topic_name,
                            num_partitions=topic_spec.partitions_number,
                            replication_factor=topic_spec.replication_factor,
                        )

        logger.debug("Initiating topic creation")
        for topic, future in client.create_topics(
            list(topics.values()), operation_timeout=1
        ).items():
            try:
                future.result()
                logger.info("Topic %s created", topic)
            except Exception as e:
                logger.error("Failed to create topic %s", topic, exc_info=e)

    if migrate:
        check_clickhouse_connections()
        Runner().run_all(force=True)
Пример #32
0
def main():
    client = AdminClient({'bootstrap.servers': 'localhost:9092'})
    try:
        asyncio.run(consume("com.udacity.police-calls"))
    except KeyboardInterrupt as e:
        print('shutting down')
Пример #33
0
def topic_exists(topic):
    """Checks if the given topic exists in Kafka"""
    client = AdminClient({"bootstrap.servers": os.getenv('KAFKA_URL')})
    topic_metadata = client.list_topics(timeout=5)
    return topic in set(t.topic for t in iter(topic_metadata.topics.values()))
Пример #34
0
def main():
    admin_client = AdminClient({"bootstrap.servers": "localhost:9092"})
    metadata = admin_client.list_topics()
    for topic in metadata.topics.items():
        print(topic)
Пример #35
0
class Producer:
    """Defines and provides common functionality amongst Producers"""

    # Tracks existing topics across all Producer instances
    existing_topics = set([])

    BROKER_URL = "PLAINTEXT://localhost:9092,PLAINTEXT://localhost:9093,PLAINTEXT://localhost:9094"
    SCHEMA_REGISTRY_URL = "http://localhost:8081"

    client = AdminClient({"bootstrap.servers": BROKER_URL})

    def __init__(
        self,
        topic_name,
        key_schema,
        value_schema=None,
        num_partitions=1,
        num_replicas=1,
    ):
        """Initializes a Producer object with basic settings"""
        self.topic_name = topic_name
        self.key_schema = key_schema
        self.value_schema = value_schema
        self.num_partitions = num_partitions
        self.num_replicas = num_replicas
        self.broker_properties = {
            "bootstrap.servers": self.BROKER_URL,
            "schema.registry.url": self.SCHEMA_REGISTRY_URL
        }

        # If the topic does not already exist, try to create it
        if self.topic_name not in Producer.existing_topics:
            self.create_topic()
            Producer.existing_topics.add(self.topic_name)

        self.producer = AvroProducer(
            {
                "bootstrap.servers": self.BROKER_URL,
                'schema.registry.url': self.SCHEMA_REGISTRY_URL
            },
            default_key_schema=self.key_schema,
            default_value_schema=self.value_schema)

    def create_topic(self):
        """Creates the producer topic if it does not already exist"""

        clusterMetadata = self.client.list_topics(timeout=60)
        if (clusterMetadata.topics.get(self.topic_name) is None):
            logger.info(f"Creating topic {self.topic_name}")
            """Creates the topic with the given topic name"""
            futures = self.client.create_topics([
                NewTopic(
                    topic=self.topic_name,
                    num_partitions=1,  #self.num_partitions,
                    replication_factor=1,  #self.num_replicas,
                    config={
                        "delete.retention.ms": 100,
                        "compression.type": "lz4",
                        "file.delete.delay.ms": 100
                    })
            ])

            for topic, future in futures.items():
                try:
                    future.result()
                    logger.info("topic created")
                except Exception as e:
                    logger.error(
                        f"failed to create topic {self.topic_name}: {e}")
                    raise

    def time_millis(self):
        return int(round(time.time() * 1000))

    def close(self):
        """Prepares the producer for exit by cleaning up the producer"""

        future = self.client.delete_topics(list(self.topic_name),
                                           operation_timeout=30)

        # Wait for operation to finish.
        for topic, f in future.items():
            try:
                f.result()  # The result itself is None
                logger.info("Topic {} deleted".format(topic))
            except Exception as e:
                logger.error("Failed to delete topic {}: {}".format(topic, e))

    def time_millis(self):
        """Use this function to get the key for Kafka Events"""
        return int(round(time.time() * 1000))
Пример #36
0
def get_client(kafka_host="localhost:9092"):
    """ Get Kafka admin client for the given host and port
    :param kafka_host: Kafka host:port
    :return: Kafka admin client
    """
    return AdminClient({'bootstrap.servers': kafka_host})
def test_delete_topics_api():
    """ delete_topics() tests, these wont really do anything since there is no
        broker configured. """

    a = AdminClient({"socket.timeout.ms": 10})
    fs = a.delete_topics(["mytopic"])
    # ignore the result

    with pytest.raises(Exception):
        a.delete_topics(None)

    with pytest.raises(Exception):
        a.delete_topics("mytopic")

    with pytest.raises(Exception):
        a.delete_topics([])

    with pytest.raises(ValueError):
        a.delete_topics([None, "mytopic"])

    fs = a.delete_topics(["mytopic", "other"])
    with pytest.raises(KafkaException):
        for f in concurrent.futures.as_completed(iter(fs.values())):
            f.result(timeout=1)

    fs = a.delete_topics(["mytopic", "othertopic", "third"],
                         request_timeout=0.5,
                         operation_timeout=300.1)
    for f in concurrent.futures.as_completed(iter(fs.values())):
        e = f.exception(timeout=1)
        assert isinstance(e, KafkaException)
        assert e.args[0].code() == KafkaError._TIMED_OUT

    with pytest.raises(TypeError):
        a.delete_topics(["mytopic"],
                        validate_only="maybe")