def __init__(self, context, num_nodes, kafka, security_protocol, topic, messages, new_consumer=False, settings={}): super(ConsumerPerformanceService, self).__init__(context, num_nodes) self.kafka = kafka self.security_config = SecurityConfig(security_protocol) self.security_protocol = security_protocol self.topic = topic self.messages = messages self.new_consumer = new_consumer self.settings = settings # These less-frequently used settings can be updated manually after instantiation self.fetch_size = None self.socket_buffer_size = None self.threads = None self.num_fetch_threads = None self.group = None self.from_latest = None
def __init__(self, context, num_nodes, kafka, security_protocol, topic, num_records, record_size, throughput, settings={}, intermediate_stats=False, client_id="producer-performance", jmx_object_names=None, jmx_attributes=[]): JmxMixin.__init__(self, num_nodes, jmx_object_names, jmx_attributes) PerformanceService.__init__(self, context, num_nodes) self.kafka = kafka self.security_config = SecurityConfig(security_protocol) self.security_protocol = security_protocol self.args = { 'topic': topic, 'num_records': num_records, 'record_size': record_size, 'throughput': throughput } self.settings = settings self.intermediate_stats = intermediate_stats self.client_id = client_id
def __init__(self, context, num_nodes, zk, security_protocol=SecurityConfig.PLAINTEXT, interbroker_security_protocol=SecurityConfig.PLAINTEXT, topics=None, quota_config=None, jmx_object_names=None, jmx_attributes=[]): """ :type context :type zk: ZookeeperService :type topics: dict """ Service.__init__(self, context, num_nodes) JmxMixin.__init__(self, num_nodes, jmx_object_names, jmx_attributes) self.zk = zk if security_protocol == SecurityConfig.SSL or interbroker_security_protocol == SecurityConfig.SSL: self.security_config = SecurityConfig(SecurityConfig.SSL) else: self.security_config = SecurityConfig(SecurityConfig.PLAINTEXT) self.security_protocol = security_protocol self.interbroker_security_protocol = interbroker_security_protocol self.port = 9092 if security_protocol == SecurityConfig.PLAINTEXT else 9093 self.topics = topics self.quota_config = quota_config
class EndToEndLatencyService(PerformanceService): logs = { "end_to_end_latency_log": { "path": "/mnt/end-to-end-latency.log", "collect_default": True }, } def __init__(self, context, num_nodes, kafka, security_protocol, topic, num_records, consumer_fetch_max_wait=100, acks=1): super(EndToEndLatencyService, self).__init__(context, num_nodes) self.kafka = kafka self.security_config = SecurityConfig(security_protocol) self.security_protocol = security_protocol self.args = { 'topic': topic, 'num_records': num_records, 'consumer_fetch_max_wait': consumer_fetch_max_wait, 'acks': acks } def _worker(self, idx, node): args = self.args.copy() self.security_config.setup_node(node) if self.security_protocol == SecurityConfig.SSL: ssl_config_file = SecurityConfig.SSL_DIR + "/security.properties" node.account.create_file(ssl_config_file, str(self.security_config)) else: ssl_config_file = "" args.update({ 'zk_connect': self.kafka.zk.connect_setting(), 'bootstrap_servers': self.kafka.bootstrap_servers(), 'ssl_config_file': ssl_config_file }) cmd = "/opt/kafka/bin/kafka-run-class.sh kafka.tools.EndToEndLatency "\ "%(bootstrap_servers)s %(topic)s %(num_records)d "\ "%(acks)d 20 %(ssl_config_file)s" % args cmd += " | tee /mnt/end-to-end-latency.log" self.logger.debug("End-to-end latency %d command: %s", idx, cmd) results = {} for line in node.account.ssh_capture(cmd): if line.startswith("Avg latency:"): results['latency_avg_ms'] = float(line.split()[2]) if line.startswith("Percentiles"): results['latency_50th_ms'] = float(line.split()[3][:-1]) results['latency_99th_ms'] = float(line.split()[6][:-1]) results['latency_999th_ms'] = float(line.split()[9]) self.results[idx - 1] = results
def __init__(self, context, num_nodes, kafka, topic, security_protocol=None, new_consumer=None, message_validator=None, from_beginning=True, consumer_timeout_ms=None, client_id="console-consumer", jmx_object_names=None, jmx_attributes=[]): """ Args: context: standard context num_nodes: number of nodes to use (this should be 1) kafka: kafka service topic: consume from this topic security_protocol: security protocol for Kafka connections new_consumer: use new Kafka consumer if True message_validator: function which returns message or None from_beginning: consume from beginning if True, else from the end consumer_timeout_ms: corresponds to consumer.timeout.ms. consumer process ends if time between successively consumed messages exceeds this timeout. Setting this and waiting for the consumer to stop is a pretty good way to consume all messages in a topic. """ JmxMixin.__init__(self, num_nodes, jmx_object_names, jmx_attributes) PerformanceService.__init__(self, context, num_nodes) self.kafka = kafka self.new_consumer = new_consumer self.args = { 'topic': topic, } self.consumer_timeout_ms = consumer_timeout_ms self.from_beginning = from_beginning self.message_validator = message_validator self.messages_consumed = {idx: [] for idx in range(1, num_nodes + 1)} self.client_id = client_id # Process client configuration self.prop_file = self.render( 'console_consumer.properties', consumer_timeout_ms=self.consumer_timeout_ms, client_id=self.client_id) # Add security properties to the config. If security protocol is not specified, # use the default in the template properties. self.security_config = SecurityConfig(security_protocol, self.prop_file) self.security_protocol = self.security_config.security_protocol if self.new_consumer is None: self.new_consumer = self.security_protocol == SecurityConfig.SSL if self.security_protocol == SecurityConfig.SSL and not self.new_consumer: raise Exception( "SSL protocol is supported only with the new consumer") self.prop_file += str(self.security_config)
def __init__(self, context, num_nodes, kafka, topic, max_messages=-1, security_protocol="PLAINTEXT"): super(KafkaLog4jAppender, self).__init__(context, num_nodes) self.kafka = kafka self.topic = topic self.max_messages = max_messages self.security_protocol = security_protocol self.security_config = SecurityConfig(security_protocol)
def __init__(self, context, num_nodes, kafka, security_protocol, topic, num_records, consumer_fetch_max_wait=100, acks=1): super(EndToEndLatencyService, self).__init__(context, num_nodes) self.kafka = kafka self.security_config = SecurityConfig(security_protocol) self.security_protocol = security_protocol self.args = { 'topic': topic, 'num_records': num_records, 'consumer_fetch_max_wait': consumer_fetch_max_wait, 'acks': acks }
class KafkaLog4jAppender(BackgroundThreadService): logs = { "producer_log": { "path": "/mnt/kafka_log4j_appender.log", "collect_default": False} } def __init__(self, context, num_nodes, kafka, topic, max_messages=-1, security_protocol="PLAINTEXT"): super(KafkaLog4jAppender, self).__init__(context, num_nodes) self.kafka = kafka self.topic = topic self.max_messages = max_messages self.security_protocol = security_protocol self.security_config = SecurityConfig(security_protocol) def _worker(self, idx, node): cmd = self.start_cmd(node) self.logger.debug("VerifiableLog4jAppender %d command: %s" % (idx, cmd)) self.security_config.setup_node(node) node.account.ssh(cmd) def start_cmd(self, node): cmd = "/opt/%s/bin/" % kafka_dir(node) cmd += "kafka-run-class.sh org.apache.kafka.tools.VerifiableLog4jAppender" cmd += " --topic %s --broker-list %s" % (self.topic, self.kafka.bootstrap_servers()) if self.max_messages > 0: cmd += " --max-messages %s" % str(self.max_messages) if self.security_protocol == SecurityConfig.SSL: cmd += " --security-protocol SSL" cmd += " --ssl-truststore-location %s" % str(SecurityConfig.TRUSTSTORE_PATH) cmd += " --ssl-truststore-password %s" % str(SecurityConfig.ssl_stores['ssl.truststore.password']) cmd += " 2>> /mnt/kafka_log4j_appender.log | tee -a /mnt/kafka_log4j_appender.log &" return cmd def stop_node(self, node): node.account.kill_process("VerifiableLog4jAppender", allow_fail=False) if self.worker_threads is None: return # block until the corresponding thread exits if len(self.worker_threads) >= self.idx(node): # Need to guard this because stop is preemptively called before the worker threads are added and started self.worker_threads[self.idx(node) - 1].join() def clean_node(self, node): node.account.kill_process("VerifiableLog4jAppender", clean_shutdown=False, allow_fail=False) node.account.ssh("rm -rf /mnt/kafka_log4j_appender.log", allow_fail=False)
def prop_file(self, node): """Return a string which can be used to create a configuration file appropriate for the given node.""" # Process client configuration prop_file = self.render('console_consumer.properties') if hasattr(node, "version") and node.version <= LATEST_0_8_2: # in 0.8.2.X and earlier, console consumer does not have --timeout-ms option # instead, we have to pass it through the config file prop_file += "\nconsumer.timeout.ms=%s\n" % str( self.consumer_timeout_ms) # Add security properties to the config. If security protocol is not specified, # use the default in the template properties. self.security_config = SecurityConfig(self.security_protocol, prop_file) self.security_protocol = self.security_config.security_protocol prop_file += str(self.security_config) return prop_file
def __init__(self, context, num_nodes, kafka, topic, security_protocol=None, max_messages=-1, throughput=100000): super(VerifiableProducer, self).__init__(context, num_nodes) self.kafka = kafka self.topic = topic self.max_messages = max_messages self.throughput = throughput self.acked_values = [] self.not_acked_values = [] self.prop_file = "" self.security_config = SecurityConfig(security_protocol, self.prop_file) self.security_protocol = self.security_config.security_protocol self.prop_file += str(self.security_config)
class ConsumerPerformanceService(PerformanceService): """ See ConsumerPerformance.scala as the source of truth on these settings, but for reference: "zookeeper" "The connection string for the zookeeper connection in the form host:port. Multiple URLS can be given to allow fail-over. This option is only used with the old consumer." "broker-list", "A broker list to use for connecting if using the new consumer." "topic", "REQUIRED: The topic to consume from." "group", "The group id to consume on." "fetch-size", "The amount of data to fetch in a single request." "from-latest", "If the consumer does not already have an establishedoffset to consume from, start with the latest message present in the log rather than the earliest message." "socket-buffer-size", "The size of the tcp RECV size." "threads", "Number of processing threads." "num-fetch-threads", "Number of fetcher threads. Defaults to 1" "new-consumer", "Use the new consumer implementation." "consumer.config", "Consumer config properties file." """ # Root directory for persistent output PERSISTENT_ROOT = "/mnt/consumer_performance" LOG_DIR = os.path.join(PERSISTENT_ROOT, "logs") STDOUT_CAPTURE = os.path.join(PERSISTENT_ROOT, "consumer_performance.stdout") STDERR_CAPTURE = os.path.join(PERSISTENT_ROOT, "consumer_performance.stderr") LOG_FILE = os.path.join(LOG_DIR, "consumer_performance.log") LOG4J_CONFIG = os.path.join(PERSISTENT_ROOT, "tools-log4j.properties") CONFIG_FILE = os.path.join(PERSISTENT_ROOT, "consumer.properties") logs = { "consumer_performance_output": { "path": STDOUT_CAPTURE, "collect_default": True }, "consumer_performance_stderr": { "path": STDERR_CAPTURE, "collect_default": True }, "consumer_performance_log": { "path": LOG_FILE, "collect_default": True } } def __init__(self, context, num_nodes, kafka, security_protocol, topic, messages, new_consumer=False, settings={}): super(ConsumerPerformanceService, self).__init__(context, num_nodes) self.kafka = kafka self.security_config = SecurityConfig(security_protocol) self.security_protocol = security_protocol self.topic = topic self.messages = messages self.new_consumer = new_consumer self.settings = settings # These less-frequently used settings can be updated manually after instantiation self.fetch_size = None self.socket_buffer_size = None self.threads = None self.num_fetch_threads = None self.group = None self.from_latest = None @property def args(self): """Dictionary of arguments used to start the Consumer Performance script.""" args = { 'topic': self.topic, 'messages': self.messages, } if self.new_consumer: args['new-consumer'] = "" args['broker-list'] = self.kafka.bootstrap_servers() else: args['zookeeper'] = self.kafka.zk.connect_setting() if self.fetch_size is not None: args['fetch-size'] = self.fetch_size if self.socket_buffer_size is not None: args['socket-buffer-size'] = self.socket_buffer_size if self.threads is not None: args['threads'] = self.threads if self.num_fetch_threads is not None: args['num-fetch-threads'] = self.num_fetch_threads if self.group is not None: args['group'] = self.group if self.from_latest: args['from-latest'] = "" return args def start_cmd(self, node): cmd = "export LOG_DIR=%s;" % ConsumerPerformanceService.LOG_DIR cmd += " export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%s\";" % ConsumerPerformanceService.LOG4J_CONFIG cmd += " /opt/%s/bin/kafka-consumer-perf-test.sh" % kafka_dir(node) for key, value in self.args.items(): cmd += " --%s %s" % (key, value) cmd += " --consumer.config %s" % ConsumerPerformanceService.CONFIG_FILE for key, value in self.settings.items(): cmd += " %s=%s" % (str(key), str(value)) cmd += " 2>> %(stderr)s | tee -a %(stdout)s" % { 'stdout': ConsumerPerformanceService.STDOUT_CAPTURE, 'stderr': ConsumerPerformanceService.STDERR_CAPTURE } return cmd def _worker(self, idx, node): node.account.ssh("mkdir -p %s" % ConsumerPerformanceService.PERSISTENT_ROOT, allow_fail=False) log_config = self.render('tools_log4j.properties', log_file=ConsumerPerformanceService.LOG_FILE) node.account.create_file(ConsumerPerformanceService.LOG4J_CONFIG, log_config) node.account.create_file(ConsumerPerformanceService.CONFIG_FILE, str(self.security_config)) self.security_config.setup_node(node) cmd = self.start_cmd(node) self.logger.debug("Consumer performance %d command: %s", idx, cmd) last = None for line in node.account.ssh_capture(cmd): last = line # Parse and save the last line's information parts = last.split(',') self.results[idx - 1] = { 'total_mb': float(parts[2]), 'mbps': float(parts[3]), 'records_per_sec': float(parts[5]), }
class ConsoleConsumer(JmxMixin, BackgroundThreadService): # Root directory for persistent output PERSISTENT_ROOT = "/mnt/console_consumer" STDOUT_CAPTURE = os.path.join(PERSISTENT_ROOT, "console_consumer.stdout") STDERR_CAPTURE = os.path.join(PERSISTENT_ROOT, "console_consumer.stderr") LOG_DIR = os.path.join(PERSISTENT_ROOT, "logs") LOG_FILE = os.path.join(LOG_DIR, "console_consumer.log") LOG4J_CONFIG = os.path.join(PERSISTENT_ROOT, "tools-log4j.properties") CONFIG_FILE = os.path.join(PERSISTENT_ROOT, "console_consumer.properties") logs = { "consumer_stdout": { "path": STDOUT_CAPTURE, "collect_default": False }, "consumer_stderr": { "path": STDERR_CAPTURE, "collect_default": False }, "consumer_log": { "path": LOG_FILE, "collect_default": True } } def __init__(self, context, num_nodes, kafka, topic, security_protocol=SecurityConfig.PLAINTEXT, new_consumer=False, message_validator=None, from_beginning=True, consumer_timeout_ms=None, version=TRUNK, client_id="console-consumer", jmx_object_names=None, jmx_attributes=[]): """ Args: context: standard context num_nodes: number of nodes to use (this should be 1) kafka: kafka service topic: consume from this topic security_protocol: security protocol for Kafka connections new_consumer: use new Kafka consumer if True message_validator: function which returns message or None from_beginning: consume from beginning if True, else from the end consumer_timeout_ms: corresponds to consumer.timeout.ms. consumer process ends if time between successively consumed messages exceeds this timeout. Setting this and waiting for the consumer to stop is a pretty good way to consume all messages in a topic. """ JmxMixin.__init__(self, num_nodes, jmx_object_names, jmx_attributes) BackgroundThreadService.__init__(self, context, num_nodes) self.kafka = kafka self.new_consumer = new_consumer self.args = { 'topic': topic, } self.consumer_timeout_ms = consumer_timeout_ms for node in self.nodes: node.version = version self.from_beginning = from_beginning self.message_validator = message_validator self.messages_consumed = {idx: [] for idx in range(1, num_nodes + 1)} self.client_id = client_id self.security_protocol = security_protocol # Validate a few configs if self.new_consumer is None: self.new_consumer = self.security_protocol == SecurityConfig.SSL if self.security_protocol == SecurityConfig.SSL and not self.new_consumer: raise Exception( "SSL protocol is supported only with the new consumer") def prop_file(self, node): """Return a string which can be used to create a configuration file appropriate for the given node.""" # Process client configuration prop_file = self.render('console_consumer.properties') if hasattr(node, "version") and node.version <= LATEST_0_8_2: # in 0.8.2.X and earlier, console consumer does not have --timeout-ms option # instead, we have to pass it through the config file prop_file += "\nconsumer.timeout.ms=%s\n" % str( self.consumer_timeout_ms) # Add security properties to the config. If security protocol is not specified, # use the default in the template properties. self.security_config = SecurityConfig(self.security_protocol, prop_file) self.security_protocol = self.security_config.security_protocol prop_file += str(self.security_config) return prop_file def start_cmd(self, node): """Return the start command appropriate for the given node.""" args = self.args.copy() args['zk_connect'] = self.kafka.zk.connect_setting() args['stdout'] = ConsoleConsumer.STDOUT_CAPTURE args['stderr'] = ConsoleConsumer.STDERR_CAPTURE args['log_dir'] = ConsoleConsumer.LOG_DIR args['log4j_config'] = ConsoleConsumer.LOG4J_CONFIG args['config_file'] = ConsoleConsumer.CONFIG_FILE args['stdout'] = ConsoleConsumer.STDOUT_CAPTURE args['jmx_port'] = self.jmx_port args['kafka_dir'] = kafka_dir(node) args['broker_list'] = self.kafka.bootstrap_servers() cmd = "export JMX_PORT=%(jmx_port)s; " \ "export LOG_DIR=%(log_dir)s; " \ "export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%(log4j_config)s\"; " \ "/opt/%(kafka_dir)s/bin/kafka-console-consumer.sh " \ "--topic %(topic)s --consumer.config %(config_file)s" % args if self.new_consumer: cmd += " --new-consumer --bootstrap-server %(broker_list)s" % args else: cmd += " --zookeeper %(zk_connect)s" % args if self.from_beginning: cmd += " --from-beginning" if self.consumer_timeout_ms is not None: # version 0.8.X and below do not support --timeout-ms option # This will be added in the properties file instead if node.version > LATEST_0_8_2: cmd += " --timeout-ms %s" % self.consumer_timeout_ms cmd += " 2>> %(stderr)s | tee -a %(stdout)s &" % args return cmd def pids(self, node): try: cmd = "ps ax | grep -i console_consumer | grep java | grep -v grep | awk '{print $1}'" pid_arr = [ pid for pid in node.account.ssh_capture( cmd, allow_fail=True, callback=int) ] return pid_arr except (subprocess.CalledProcessError, ValueError) as e: return [] def alive(self, node): return len(self.pids(node)) > 0 def _worker(self, idx, node): node.account.ssh("mkdir -p %s" % ConsoleConsumer.PERSISTENT_ROOT, allow_fail=False) # Create and upload config file self.logger.info("console_consumer.properties:") prop_file = self.prop_file(node) self.logger.info(prop_file) node.account.create_file(ConsoleConsumer.CONFIG_FILE, prop_file) self.security_config.setup_node(node) # Create and upload log properties log_config = self.render('tools_log4j.properties', log_file=ConsoleConsumer.LOG_FILE) node.account.create_file(ConsoleConsumer.LOG4J_CONFIG, log_config) # Run and capture output cmd = self.start_cmd(node) self.logger.debug("Console consumer %d command: %s", idx, cmd) consumer_output = node.account.ssh_capture(cmd, allow_fail=False) first_line = next(consumer_output, None) if first_line is not None: self.start_jmx_tool(idx, node) for line in itertools.chain([first_line], consumer_output): msg = line.strip() if self.message_validator is not None: msg = self.message_validator(msg) if msg is not None: self.messages_consumed[idx].append(msg) self.read_jmx_output(idx, node) def start_node(self, node): BackgroundThreadService.start_node(self, node) def stop_node(self, node): node.account.kill_process("console_consumer", allow_fail=True) wait_until(lambda: not self.alive(node), timeout_sec=10, backoff_sec=.2, err_msg="Timed out waiting for consumer to stop.") def clean_node(self, node): if self.alive(node): self.logger.warn( "%s %s was still alive at cleanup time. Killing forcefully..." % (self.__class__.__name__, node.account)) JmxMixin.clean_node(self, node) node.account.kill_process("java", clean_shutdown=False, allow_fail=True) node.account.ssh("rm -rf %s" % ConsoleConsumer.PERSISTENT_ROOT, allow_fail=False) self.security_config.clean_node(node)
class ProducerPerformanceService(JmxMixin, PerformanceService): logs = { "producer_performance_log": { "path": "/mnt/producer-performance.log", "collect_default": True}, } def __init__(self, context, num_nodes, kafka, security_protocol, topic, num_records, record_size, throughput, settings={}, intermediate_stats=False, client_id="producer-performance", jmx_object_names=None, jmx_attributes=[]): JmxMixin.__init__(self, num_nodes, jmx_object_names, jmx_attributes) PerformanceService.__init__(self, context, num_nodes) self.kafka = kafka self.security_config = SecurityConfig(security_protocol) self.security_protocol = security_protocol self.args = { 'topic': topic, 'num_records': num_records, 'record_size': record_size, 'throughput': throughput } self.settings = settings self.intermediate_stats = intermediate_stats self.client_id = client_id def _worker(self, idx, node): args = self.args.copy() args.update({'bootstrap_servers': self.kafka.bootstrap_servers(), 'jmx_port': self.jmx_port, 'client_id': self.client_id}) cmd = "JMX_PORT=%(jmx_port)d /opt/kafka/bin/kafka-run-class.sh org.apache.kafka.clients.tools.ProducerPerformance " \ "%(topic)s %(num_records)d %(record_size)d %(throughput)d bootstrap.servers=%(bootstrap_servers)s client.id=%(client_id)s" % args self.security_config.setup_node(node) if self.security_protocol == SecurityConfig.SSL: self.settings.update(self.security_config.properties) for key, value in self.settings.items(): cmd += " %s=%s" % (str(key), str(value)) cmd += " | tee /mnt/producer-performance.log" self.logger.debug("Producer performance %d command: %s", idx, cmd) def parse_stats(line): parts = line.split(',') return { 'records': int(parts[0].split()[0]), 'records_per_sec': float(parts[1].split()[0]), 'mbps': float(parts[1].split('(')[1].split()[0]), 'latency_avg_ms': float(parts[2].split()[0]), 'latency_max_ms': float(parts[3].split()[0]), 'latency_50th_ms': float(parts[4].split()[0]), 'latency_95th_ms': float(parts[5].split()[0]), 'latency_99th_ms': float(parts[6].split()[0]), 'latency_999th_ms': float(parts[7].split()[0]), } last = None producer_output = node.account.ssh_capture(cmd) first_line = producer_output.next() self.start_jmx_tool(idx, node) for line in itertools.chain([first_line], producer_output): if self.intermediate_stats: try: self.stats[idx-1].append(parse_stats(line)) except: # Sometimes there are extraneous log messages pass last = line try: self.results[idx-1] = parse_stats(last) except: raise Exception("Unable to parse aggregate performance statistics on node %d: %s" % (idx, last)) self.read_jmx_output(idx, node)
class KafkaService(JmxMixin, Service): logs = { "kafka_log": { "path": "/mnt/kafka.log", "collect_default": True }, "kafka_data": { "path": "/mnt/kafka-logs", "collect_default": False } } def __init__(self, context, num_nodes, zk, security_protocol=SecurityConfig.PLAINTEXT, interbroker_security_protocol=SecurityConfig.PLAINTEXT, topics=None, quota_config=None, jmx_object_names=None, jmx_attributes=[]): """ :type context :type zk: ZookeeperService :type topics: dict """ Service.__init__(self, context, num_nodes) JmxMixin.__init__(self, num_nodes, jmx_object_names, jmx_attributes) self.zk = zk if security_protocol == SecurityConfig.SSL or interbroker_security_protocol == SecurityConfig.SSL: self.security_config = SecurityConfig(SecurityConfig.SSL) else: self.security_config = SecurityConfig(SecurityConfig.PLAINTEXT) self.security_protocol = security_protocol self.interbroker_security_protocol = interbroker_security_protocol self.port = 9092 if security_protocol == SecurityConfig.PLAINTEXT else 9093 self.topics = topics self.quota_config = quota_config def start(self): Service.start(self) # Create topics if necessary if self.topics is not None: for topic, topic_cfg in self.topics.items(): if topic_cfg is None: topic_cfg = {} topic_cfg["topic"] = topic self.create_topic(topic_cfg) def start_node(self, node): props_file = self.render( 'kafka.properties', node=node, broker_id=self.idx(node), port=self.port, security_protocol=self.security_protocol, quota_config=self.quota_config, interbroker_security_protocol=self.interbroker_security_protocol) self.logger.info("kafka.properties:") self.logger.info(props_file) node.account.create_file("/mnt/kafka.properties", props_file) self.security_config.setup_node(node) cmd = "JMX_PORT=%d /opt/kafka/bin/kafka-server-start.sh /mnt/kafka.properties 1>> /mnt/kafka.log 2>> /mnt/kafka.log & echo $! > /mnt/kafka.pid" % self.jmx_port self.logger.debug( "Attempting to start KafkaService on %s with command: %s" % (str(node.account), cmd)) with node.account.monitor_log("/mnt/kafka.log") as monitor: node.account.ssh(cmd) monitor.wait_until("Kafka Server.*started", timeout_sec=30, err_msg="Kafka server didn't finish startup") self.start_jmx_tool(self.idx(node), node) if len(self.pids(node)) == 0: raise Exception("No process ids recorded on node %s" % str(node)) def pids(self, node): """Return process ids associated with running processes on the given node.""" try: return [ pid for pid in node.account.ssh_capture("cat /mnt/kafka.pid", callback=int) ] except: return [] def signal_node(self, node, sig=signal.SIGTERM): pids = self.pids(node) for pid in pids: node.account.signal(pid, sig) def signal_leader(self, topic, partition=0, sig=signal.SIGTERM): leader = self.leader(topic, partition) self.signal_node(leader, sig) def stop_node(self, node, clean_shutdown=True): pids = self.pids(node) sig = signal.SIGTERM if clean_shutdown else signal.SIGKILL for pid in pids: node.account.signal(pid, sig, allow_fail=False) node.account.ssh("rm -f /mnt/kafka.pid", allow_fail=False) def clean_node(self, node): JmxMixin.clean_node(self, node) node.account.kill_process("kafka", clean_shutdown=False, allow_fail=True) node.account.ssh( "rm -rf /mnt/kafka-logs /mnt/kafka.properties /mnt/kafka.log /mnt/kafka.pid", allow_fail=False) self.security_config.clean_node(node) def create_topic(self, topic_cfg): node = self.nodes[0] # any node is fine here self.logger.info("Creating topic %s with settings %s", topic_cfg["topic"], topic_cfg) cmd = "/opt/kafka/bin/kafka-topics.sh --zookeeper %(zk_connect)s --create "\ "--topic %(topic)s --partitions %(partitions)d --replication-factor %(replication)d" % { 'zk_connect': self.zk.connect_setting(), 'topic': topic_cfg.get("topic"), 'partitions': topic_cfg.get('partitions', 1), 'replication': topic_cfg.get('replication-factor', 1) } if "configs" in topic_cfg.keys() and topic_cfg["configs"] is not None: for config_name, config_value in topic_cfg["configs"].items(): cmd += " --config %s=%s" % (config_name, str(config_value)) self.logger.info("Running topic creation command...\n%s" % cmd) node.account.ssh(cmd) time.sleep(1) self.logger.info( "Checking to see if topic was properly created...\n%s" % cmd) for line in self.describe_topic(topic_cfg["topic"]).split("\n"): self.logger.info(line) def describe_topic(self, topic): node = self.nodes[0] cmd = "/opt/kafka/bin/kafka-topics.sh --zookeeper %s --topic %s --describe" % \ (self.zk.connect_setting(), topic) output = "" for line in node.account.ssh_capture(cmd): output += line return output def verify_reassign_partitions(self, reassignment): """Run the reassign partitions admin tool in "verify" mode """ node = self.nodes[0] json_file = "/tmp/" + str(time.time()) + "_reassign.json" # reassignment to json json_str = json.dumps(reassignment) json_str = json.dumps(json_str) # create command cmd = "echo %s > %s && " % (json_str, json_file) cmd += "/opt/kafka/bin/kafka-reassign-partitions.sh "\ "--zookeeper %(zk_connect)s "\ "--reassignment-json-file %(reassignment_file)s "\ "--verify" % {'zk_connect': self.zk.connect_setting(), 'reassignment_file': json_file} cmd += " && sleep 1 && rm -f %s" % json_file # send command self.logger.info("Verifying parition reassignment...") self.logger.debug(cmd) output = "" for line in node.account.ssh_capture(cmd): output += line self.logger.debug(output) if re.match(".*is in progress.*", output) is not None: return False return True def execute_reassign_partitions(self, reassignment): """Run the reassign partitions admin tool in "verify" mode """ node = self.nodes[0] json_file = "/tmp/" + str(time.time()) + "_reassign.json" # reassignment to json json_str = json.dumps(reassignment) json_str = json.dumps(json_str) # create command cmd = "echo %s > %s && " % (json_str, json_file) cmd += "/opt/kafka/bin/kafka-reassign-partitions.sh "\ "--zookeeper %(zk_connect)s "\ "--reassignment-json-file %(reassignment_file)s "\ "--execute" % {'zk_connect': self.zk.connect_setting(), 'reassignment_file': json_file} cmd += " && sleep 1 && rm -f %s" % json_file # send command self.logger.info("Executing parition reassignment...") self.logger.debug(cmd) output = "" for line in node.account.ssh_capture(cmd): output += line self.logger.debug("Verify partition reassignment:") self.logger.debug(output) def restart_node(self, node, wait_sec=0, clean_shutdown=True): """Restart the given node, waiting wait_sec in between stopping and starting up again.""" self.stop_node(node, clean_shutdown) time.sleep(wait_sec) self.start_node(node) def leader(self, topic, partition=0): """ Get the leader replica for the given topic and partition. """ cmd = "/opt/kafka/bin/kafka-run-class.sh kafka.tools.ZooKeeperMainWrapper -server %s " \ % self.zk.connect_setting() cmd += "get /brokers/topics/%s/partitions/%d/state" % (topic, partition) self.logger.debug(cmd) node = self.nodes[0] self.logger.debug( "Querying zookeeper to find leader replica for topic %s: \n%s" % (cmd, topic)) partition_state = None for line in node.account.ssh_capture(cmd): match = re.match("^({.+})$", line) if match is not None: partition_state = match.groups()[0] break if partition_state is None: raise Exception( "Error finding partition state for topic %s and partition %d." % (topic, partition)) partition_state = json.loads(partition_state) self.logger.info(partition_state) leader_idx = int(partition_state["leader"]) self.logger.info("Leader for topic %s and partition %d is now: %d" % (topic, partition, leader_idx)) return self.get_node(leader_idx) def bootstrap_servers(self): """Get the broker list to connect to Kafka using the specified security protocol """ return ','.join([ node.account.hostname + ":" + ` self.port ` for node in self.nodes ]) def read_jmx_output_all_nodes(self): for node in self.nodes: self.read_jmx_output(self.idx(node), node)
class VerifiableProducer(BackgroundThreadService): CONFIG_FILE = "/mnt/verifiable_producer.properties" logs = { "producer_log": { "path": "/mnt/producer.log", "collect_default": False } } def __init__(self, context, num_nodes, kafka, topic, security_protocol=None, max_messages=-1, throughput=100000): super(VerifiableProducer, self).__init__(context, num_nodes) self.kafka = kafka self.topic = topic self.max_messages = max_messages self.throughput = throughput self.acked_values = [] self.not_acked_values = [] self.prop_file = "" self.security_config = SecurityConfig(security_protocol, self.prop_file) self.security_protocol = self.security_config.security_protocol self.prop_file += str(self.security_config) def _worker(self, idx, node): # Create and upload config file self.logger.info("verifiable_producer.properties:") self.logger.info(self.prop_file) node.account.create_file(VerifiableProducer.CONFIG_FILE, self.prop_file) self.security_config.setup_node(node) cmd = self.start_cmd self.logger.debug("VerifiableProducer %d command: %s" % (idx, cmd)) for line in node.account.ssh_capture(cmd): line = line.strip() data = self.try_parse_json(line) if data is not None: with self.lock: if data["name"] == "producer_send_error": data["node"] = idx self.not_acked_values.append(int(data["value"])) elif data["name"] == "producer_send_success": self.acked_values.append(int(data["value"])) @property def start_cmd(self): cmd = "/opt/kafka/bin/kafka-verifiable-producer.sh" \ " --topic %s --broker-list %s" % (self.topic, self.kafka.bootstrap_servers()) if self.max_messages > 0: cmd += " --max-messages %s" % str(self.max_messages) if self.throughput > 0: cmd += " --throughput %s" % str(self.throughput) cmd += " --producer.config %s" % VerifiableProducer.CONFIG_FILE cmd += " 2>> /mnt/producer.log | tee -a /mnt/producer.log &" return cmd @property def acked(self): with self.lock: return self.acked_values @property def not_acked(self): with self.lock: return self.not_acked_values @property def num_acked(self): with self.lock: return len(self.acked_values) @property def num_not_acked(self): with self.lock: return len(self.not_acked_values) def stop_node(self, node): node.account.kill_process("VerifiableProducer", allow_fail=False) if self.worker_threads is None: return # block until the corresponding thread exits if len(self.worker_threads) >= self.idx(node): # Need to guard this because stop is preemptively called before the worker threads are added and started self.worker_threads[self.idx(node) - 1].join() def clean_node(self, node): node.account.kill_process("VerifiableProducer", clean_shutdown=False, allow_fail=False) node.account.ssh( "rm -rf /mnt/producer.log /mnt/verifiable_producer.properties", allow_fail=False) self.security_config.clean_node(node) def try_parse_json(self, string): """Try to parse a string as json. Return None if not parseable.""" try: record = json.loads(string) return record except ValueError: self.logger.debug("Could not parse as json: %s" % str(string)) return None
def security_config(self): if self.security_protocol == SecurityConfig.SSL or self.interbroker_security_protocol == SecurityConfig.SSL: return SecurityConfig(SecurityConfig.SSL) else: return SecurityConfig(SecurityConfig.PLAINTEXT)