class AbstractConsumer(ABC): def __init__(self, group_id: str, topic_name: str, last: bool): offset_reset = "earliest" if last: offset_reset = "latest" self._config = Config().create_confluent_config() self._config.update({ "group.id": group_id, "error_cb": raise_for_kafka_error, # We need to commit offsets manually once we"re sure it got saved # to the sink "enable.auto.commit": True, "enable.partition.eof": False, # We need this to start at the last committed offset instead of the # latest when subscribing for the first time "default.topic.config": { "auto.offset.reset": offset_reset }, }) self._consumer = confluent_kafka.Consumer(self._config) self._subscribe(topic_name) def _subscribe(self, topic: str) -> None: self._consumer.subscribe([topic]) @abstractmethod def consume(self, amount: int) -> int: pass def _consume_single_message(self, timeout=30) -> Optional[Message]: message = self._consumer.poll(timeout=timeout) raise_for_message(message) return message
def test_kafka_python_config(config: Config): config.context_switch("context_5") expected_config = { "bootstrap_servers": ["kafka:9094", "kafka1:9094", "kafka2:9094", "kafka3:9094"], "security_protocol": "SASL_SSL", "sasl_mechanism": "PLAIN", "sasl_plain_username": "******", "sasl_plain_password": "******", "ssl_cafile": "/my/ca.crt", "ssl_certfile": "/my/certificate.crt", "ssl_keyfile": "/my/certificate.key", "ssl_password": "******", } actual_config = config.create_kafka_python_config() assert expected_config == actual_config
def test_pykafka_config(mocker: mock, config: Config): ssl_config_sentinel = mock.sentinel.ssl_config ssl_config_mock = mocker.patch("esque.config.SslConfig", return_value=ssl_config_sentinel) plain_authenticator_sentinel = mock.sentinel.plain_authenticator plain_authenticator_mock = mocker.patch( "pykafka.sasl_authenticators.PlainAuthenticator", return_value=plain_authenticator_sentinel ) config.context_switch("context_5") expected_config = { "hosts": "kafka:9094,kafka1:9094,kafka2:9094,kafka3:9094", "sasl_authenticator": plain_authenticator_sentinel, "ssl_config": ssl_config_sentinel, } actual_config = config.create_pykafka_config() assert expected_config == actual_config ssl_config_mock.assert_called_with( **{ "cafile": "/my/ca.crt", "certfile": "/my/certificate.crt", "keyfile": "/my/certificate.key", "password": "******", } ) plain_authenticator_mock.assert_called_with(user="******", password="******", security_protocol="SASL_SSL")
def __init__(self): self._config = Config() self.confluent_client = AdminClient( self._config.create_confluent_config()) self.pykafka_client = pykafka.client.KafkaClient( **self._config.create_pykafka_config(), broker_version="1.0.0") self.confluent_client.poll(timeout=1) self.__topic_controller = None
def __init__(self): self.queue_length = 100000 self.internal_queue_length_limit = self.queue_length / 0.5 self._config = Config().create_confluent_config() self._config.update({ "on_delivery": delivery_callback, "error_cb": raise_for_kafka_error, "queue.buffering.max.messages": self.queue_length, })
def test_sasl_params(config: Config): assert config.sasl_params == {} config.context_switch("context_5") assert config.sasl_params == { "mechanism": "PLAIN", "user": "******", "password": "******" } assert config.sasl_mechanism == "PLAIN"
def test_ssl_params(config: Config): assert config.ssl_params == {} config.context_switch("context_5") assert config.ssl_params == { "cafile": "/my/ca.crt", "certfile": "/my/certificate.crt", "keyfile": "/my/certificate.key", "password": "******", }
def test_current_context_bootstrap_servers(config: Config): assert config.bootstrap_servers == ["localhost:9091"] config.context_switch("context_3") assert config.bootstrap_servers == [ "node01.cool-domain.com:9093", "node02.cool-domain.com:9093", "node03.cool-domain.com:9093", ]
class Producer(ABC): def __init__(self): self.queue_length = 100000 self.internal_queue_length_limit = self.queue_length / 0.5 self._config = Config().create_confluent_config() self._config.update({ "on_delivery": delivery_callback, "error_cb": raise_for_kafka_error, "queue.buffering.max.messages": self.queue_length, }) @abstractmethod def produce(self, topic_name: str) -> int: pass
def _create_config(self): try: self._config = Config.get_instance() except ConfigNotExistsException: click.echo(f"No config provided in {config_dir()}") if ensure_approval( f"Should a sample file be created in {config_dir()}"): config_dir().mkdir(exist_ok=True) copyfile(sample_config_path().as_posix(), config_path()) else: raise if ensure_approval("Do you want to modify the config file now?"): click.edit(filename=config_path().as_posix()) self._config = Config.get_instance()
def test_fix_missing_context_config(interactive_cli_runner: CliRunner, load_config: config_loader): load_config(LOAD_BROKEN_CONFIG) _cfg = Config(disable_validation=True) assert _cfg.current_context not in _cfg.available_contexts interactive_cli_runner.invoke(esque, args=["config", "fix"], catch_exceptions=False) _cfg = Config.get_instance() assert _cfg.current_context in _cfg.available_contexts
def unittest_config(request: FixtureRequest, load_config: config_loader) -> Config: conffile, _ = load_config(LOAD_INTEGRATION_TEST_CONFIG) esque_config = Config.get_instance() if request.config.getoption("--local"): esque_config.context_switch("local") return esque_config
def __init__( self, group_id: str, topic_name: str, output_directory: pathlib.Path, last: bool, match: str = None, initialize_default_output_directory: bool = False, enable_auto_commit: bool = True, ): super().__init__( group_id, topic_name, output_directory, last, match, initialize_default_output_directory, enable_auto_commit, ) self.schema_registry_client = SchemaRegistryClient( Config.get_instance().schema_registry) self.writers[-1] = (StdOutAvroWriter( schema_registry_client=self.schema_registry_client) if output_directory is None else AvroFileWriter( self.output_directory / "partition_any", self.schema_registry_client)) if self._initialize_default_output_directory and self.output_directory is not None: self.writers[-1].init_destination_directory()
def test_validation_called(mocker: mock, load_config: config_loader): conf_path, conf_content = load_config() validator_mock = mocker.patch("esque.validation.validate_esque_config") Config() validated_config_dict, = validator_mock.call_args[0] assert validated_config_dict == yaml.safe_load(conf_content)
def test_invalid_config(load_config: config_loader): conf_path, conf_content = load_config() conf_content += '\nasdf:"' conf_path.write_text(conf_content) with pytest.raises(ScannerError): Config()
def commit_offsets(self, consumer_id: str, offsets: List[TopicPartition]): config = Config.get_instance() consumer = Consumer({ "group.id": consumer_id, **config.create_confluent_config() }) consumer.commit(offsets=offsets, asynchronous=False) consumer.close()
def test_confluent_config(config: Config): config.context_switch("context_5") expected_config = { "bootstrap.servers": "kafka:9094,kafka1:9094,kafka2:9094,kafka3:9094", "security.protocol": "SASL_SSL", "schema.registry.url": "http://schema-registry.example.com", "sasl.mechanisms": "PLAIN", "sasl.username": "******", "sasl.password": "******", "ssl.ca.location": "/my/ca.crt", "ssl.certificate.location": "/my/certificate.crt", "ssl.key.location": "/my/certificate.key", "ssl.key.password": "******", } actual_config = config.create_confluent_config(include_schema_registry=True) assert expected_config == actual_config
def _get_partitions( self, topic: Topic, retrieve_last_timestamp: bool, get_partition_watermarks: bool = True) -> List[Partition]: assert not ( retrieve_last_timestamp and not get_partition_watermarks ), "Can not retrieve timestamp without partition watermarks" config = Config.get_instance().create_confluent_config() config.update({ "group.id": ESQUE_GROUP_ID, "topic.metadata.refresh.interval.ms": "250" }) with closing(confluent_kafka.Consumer(config)) as consumer: confluent_topic = consumer.list_topics( topic=topic.name).topics[topic.name] partitions: List[Partition] = [] if not get_partition_watermarks: return [ Partition(partition_id, -1, -1, meta.isrs, meta.leader, meta.replicas, None) for partition_id, meta in confluent_topic.partitions.items() ] for partition_id, meta in confluent_topic.partitions.items(): try: low, high = consumer.get_watermark_offsets( TopicPartition(topic=topic.name, partition=partition_id)) except KafkaException: # retry after metadata should be refreshed (also consider small network delays) # unfortunately we cannot explicitly cause and wait for a metadata refresh time.sleep(1) low, high = consumer.get_watermark_offsets( TopicPartition(topic=topic.name, partition=partition_id)) latest_timestamp = None if high > low and retrieve_last_timestamp: assignment = [ TopicPartition(topic=topic.name, partition=partition_id, offset=high - 1) ] consumer.assign(assignment) msg = consumer.poll(timeout=10) if msg is None: logger.warning( f"Due to timeout latest timestamp for topic `{topic.name}` " f"and partition `{partition_id}` is missing.") else: latest_timestamp = float(msg.timestamp()[1]) / 1000 partition = Partition(partition_id, low, high, meta.isrs, meta.leader, meta.replicas, latest_timestamp) partitions.append(partition) return partitions
def test_config_too_new(load_config: config_loader): conf_path, conf_content = load_config() data = yaml.safe_load(conf_content) data["version"] = CURRENT_VERSION + 1 with conf_path.open("w") as f: yaml.dump(data, f) with pytest.raises(ConfigTooNew): Config()
def config_fix(state: State): """Fix simple errors in esque config. Fixes simple errors like wrong current_contexts in the esque config when the configs was tampered with manually.""" try: state.config.context_switch(state.config.current_context) click.echo("Your config seems fine. 🎉") except ValidationException: _cfg: Config = Config(disable_validation=True) if _cfg.current_context not in _cfg.available_contexts: click.echo( f"Found invalid current context. Switching context to state {_cfg.available_contexts[0]}." ) _cfg.context_switch(_cfg.available_contexts[0]) Config.set_instance(_cfg) state.config.save() else: click.echo( "Can't fix this configuration error try fixing it manually.")
def consumer(topic_object: Topic, consumer_group): _config = Config().create_confluent_config() _config.update({ "group.id": consumer_group, "error_cb": raise_for_kafka_error, # We need to commit offsets manually once we"re sure it got saved # to the sink "enable.auto.commit": False, "enable.partition.eof": False, # We need this to start at the last committed offset instead of the # latest when subscribing for the first time "default.topic.config": { "auto.offset.reset": "latest" }, }) _consumer = confluent_kafka.Consumer(_config) _consumer.assign( [TopicPartition(topic=topic_object.name, partition=0, offset=0)]) yield _consumer
class Cluster: def __init__(self): self._config = Config() self.confluent_client = AdminClient( self._config.create_confluent_config()) self.pykafka_client = pykafka.client.KafkaClient( **self._config.create_pykafka_config(), broker_version="1.0.0") self.confluent_client.poll(timeout=1) self.__topic_controller = None @property def topic_controller(self) -> TopicController: if self.__topic_controller is None: self.__topic_controller = TopicController(self, self._config) return self.__topic_controller @property def bootstrap_servers(self): return self._config.bootstrap_servers def get_metadata(self): return self.confluent_client.list_topics(timeout=1) @property def brokers(self): metadata = self.confluent_client.list_topics(timeout=1) return sorted( [{ "id": broker.id, "host": broker.host, "port": broker.port } for broker in metadata.brokers.values()], key=operator.itemgetter("id"), ) def retrieve_config(self, config_type: ConfigResource.Type, id): requested_resources = [ConfigResource(config_type, str(id))] futures = self.confluent_client.describe_configs(requested_resources) (old_resource, future), = futures.items() future = ensure_kafka_futures_done([future]) result = future.result() return unpack_confluent_config(result)
def test_edit_config(mocker: mock, interactive_cli_runner: CliRunner, load_config: config_loader): conf_path, old_conf_text = load_config() data = yaml.safe_load(old_conf_text) data["contexts"]["dupe"] = data["contexts"]["context_1"] mocker.patch.object(click, "edit", return_value=yaml.dump(data)) result = interactive_cli_runner.invoke(config_edit, catch_exceptions=False) assert result.exit_code == 0 config = Config() assert "dupe" in config.available_contexts
def target_topic_avro_consumer(unittest_config: Config, target_topic: Tuple[str, int]) -> AvroConsumer: consumer = AvroConsumer( { "group.id": "asdf", "enable.auto.commit": False, "enable.partition.eof": False, **unittest_config.create_confluent_config(include_schema_registry=True), } ) consumer.assign([TopicPartition(topic=target_topic[0], partition=i, offset=0) for i in range(target_topic[1])]) yield consumer consumer.close()
def __init__(self, group_id: str, topic_name: str, last: bool): offset_reset = "earliest" if last: offset_reset = "latest" self._config = Config().create_confluent_config() self._config.update({ "group.id": group_id, "error_cb": raise_for_kafka_error, # We need to commit offsets manually once we"re sure it got saved # to the sink "enable.auto.commit": True, "enable.partition.eof": False, # We need this to start at the last committed offset instead of the # latest when subscribing for the first time "default.topic.config": { "auto.offset.reset": offset_reset }, }) self._consumer = confluent_kafka.Consumer(self._config) self._subscribe(topic_name)
def __init__(self, topic_name: str, match: str = None): self.queue_length = 100000 self.internal_queue_length_limit = self.queue_length / 0.5 self._config = Config.get_instance().create_confluent_config() self._setup_config() self.logger = logging.getLogger(__name__) self._topic_name = topic_name self._match = match self._producer = None if self._match is not None: self._rule_tree = RuleTree(match) else: self._rule_tree = None self.create_internal_producer()
def __init__(self): self.no_verify = False try: self.config = Config() except ConfigNotExistsException: click.echo(f"No config provided in {config_dir()}") config_dir().mkdir(exist_ok=True) if ensure_approval( f"Should a sample file be created in {config_dir()}"): copyfile(sample_config_path().as_posix(), config_path()) if ensure_approval("Do you want to modify the config file now?"): click.edit(filename=config_path().as_posix()) sys.exit(0) self._cluster = None
def _setup_config(self): offset_reset = "earliest" if self._last: offset_reset = "latest" self._config = Config.get_instance().create_confluent_config() self._config.update({ "group.id": self._group_id, "error_cb": log_error, # We need to commit offsets manually once we"re sure it got saved # to the sink "enable.auto.commit": self._enable_auto_commit, "enable.partition.eof": True, # We need this to start at the last committed offset instead of the # latest when subscribing for the first time "default.topic.config": { "auto.offset.reset": offset_reset }, })
def randomly_generated_consumer_groups(filled_topic, unittest_config: Config, prefix="") -> str: randomly_generated_consumer_group = prefix + "".join( random.choices(ascii_letters, k=8)) _config = unittest_config.create_confluent_config() _config.update({ "group.id": randomly_generated_consumer_group, "enable.auto.commit": False, "default.topic.config": { "auto.offset.reset": "latest" }, }) _consumer = confluent_kafka.Consumer(_config) _consumer.assign( [TopicPartition(topic=filled_topic.name, partition=0, offset=0)]) for i in range(2): msg = _consumer.consume(timeout=10)[0] _consumer.commit(msg, asynchronous=False) return randomly_generated_consumer_group
def __init__(self, working_dir: pathlib.Path): super().__init__(working_dir) self._config.update({"schema.registry.url": Config().schema_registry}) self._producer = AvroProducer(self._config)