class MigrationToolTest(ReplicationUtils, SetupUtils): testModuleAbsPathName = os.path.realpath(__file__) testSuiteAbsPathName = os.path.abspath(os.path.dirname(testModuleAbsPathName)) def __init__(self, systemTestEnv): # SystemTestEnv - provides cluster level environment settings # such as entity_id, hostname, kafka_home, java_home which # are available in a list of dictionary named # "clusterEntityConfigDictList" self.systemTestEnv = systemTestEnv super(MigrationToolTest, self).__init__(self) # dict to pass user-defined attributes to logger argument: "extra" d = {'name_of_class': self.__class__.__name__} def signal_handler(self, signal, frame): self.log_message("Interrupt detected - User pressed Ctrl+c") # perform the necessary cleanup here when user presses Ctrl+c and it may be product specific self.log_message("stopping all entities - please wait ...") kafka_system_test_utils.stop_all_remote_running_processes(self.systemTestEnv, self.testcaseEnv) sys.exit(1) def runTest(self): # ====================================================================== # get all testcase directories under this testsuite # ====================================================================== testCasePathNameList = system_test_utils.get_dir_paths_with_prefix( self.testSuiteAbsPathName, SystemTestEnv.SYSTEM_TEST_CASE_PREFIX) testCasePathNameList.sort() # ============================================================= # launch each testcase one by one: testcase_1, testcase_2, ... # ============================================================= for testCasePathName in testCasePathNameList: skipThisTestCase = False try: # ====================================================================== # A new instance of TestcaseEnv to keep track of this testcase's env vars # and initialize some env vars as testCasePathName is available now # ====================================================================== self.testcaseEnv = TestcaseEnv(self.systemTestEnv, self) self.testcaseEnv.testSuiteBaseDir = self.testSuiteAbsPathName self.testcaseEnv.initWithKnownTestCasePathName(testCasePathName) self.testcaseEnv.testcaseArgumentsDict = self.testcaseEnv.testcaseNonEntityDataDict["testcase_args"] # ====================================================================== # SKIP if this case is IN testcase_to_skip.json or NOT IN testcase_to_run.json # ====================================================================== testcaseDirName = self.testcaseEnv.testcaseResultsDict["_test_case_name"] if self.systemTestEnv.printTestDescriptionsOnly: self.testcaseEnv.printTestCaseDescription(testcaseDirName) continue elif self.systemTestEnv.isTestCaseToSkip(self.__class__.__name__, testcaseDirName): self.log_message("Skipping : " + testcaseDirName) skipThisTestCase = True continue else: self.testcaseEnv.printTestCaseDescription(testcaseDirName) system_test_utils.setup_remote_hosts_with_testcase_level_cluster_config(self.systemTestEnv, testCasePathName) # ============================================================================== # # ============================================================================== # # Product Specific Testing Code Starts Here: # # ============================================================================== # # ============================================================================== # # initialize self.testcaseEnv with user-defined environment variables (product specific) self.testcaseEnv.userDefinedEnvVarDict["zkConnectStr"] = "" self.testcaseEnv.userDefinedEnvVarDict["stopBackgroundProducer"] = False self.testcaseEnv.userDefinedEnvVarDict["backgroundProducerStopped"] = False # initialize signal handler signal.signal(signal.SIGINT, self.signal_handler) # TestcaseEnv.testcaseConfigsList initialized by reading testcase properties file: # system_test/<suite_name>_testsuite/testcase_<n>/testcase_<n>_properties.json self.testcaseEnv.testcaseConfigsList = system_test_utils.get_json_list_data( self.testcaseEnv.testcasePropJsonPathName) # clean up data directories specified in zookeeper.properties and kafka_server_<n>.properties kafka_system_test_utils.cleanup_data_at_remote_hosts(self.systemTestEnv, self.testcaseEnv) # create "LOCAL" log directories for metrics, dashboards for each entity under this testcase # for collecting logs from remote machines kafka_system_test_utils.generate_testcase_log_dirs(self.systemTestEnv, self.testcaseEnv) # TestcaseEnv - initialize producer & consumer config / log file pathnames kafka_system_test_utils.init_entity_props(self.systemTestEnv, self.testcaseEnv) # generate remote hosts log/config dirs if not exist kafka_system_test_utils.generate_testcase_log_dirs_in_remote_hosts(self.systemTestEnv, self.testcaseEnv) # generate properties files for zookeeper, kafka, producer, consumer and mirror-maker: # 1. copy system_test/<suite_name>_testsuite/config/*.properties to # system_test/<suite_name>_testsuite/testcase_<n>/config/ # 2. update all properties files in system_test/<suite_name>_testsuite/testcase_<n>/config # by overriding the settings specified in: # system_test/<suite_name>_testsuite/testcase_<n>/testcase_<n>_properties.json kafka_system_test_utils.generate_overriden_props_files(self.testSuiteAbsPathName, self.testcaseEnv, self.systemTestEnv) # ============================================= # preparing all entities to start the test # ============================================= self.log_message("starting zookeepers") kafka_system_test_utils.start_zookeepers(self.systemTestEnv, self.testcaseEnv) self.anonLogger.info("sleeping for 2s") time.sleep(2) self.log_message("starting brokers") kafka_system_test_utils.start_brokers(self.systemTestEnv, self.testcaseEnv) self.anonLogger.info("sleeping for 5s") time.sleep(5) self.log_message("starting migration tool") kafka_system_test_utils.start_migration_tool(self.systemTestEnv, self.testcaseEnv) self.anonLogger.info("sleeping for 5s") time.sleep(5) self.log_message("creating topics") kafka_system_test_utils.create_topic(self.systemTestEnv, self.testcaseEnv) self.anonLogger.info("sleeping for 5s") time.sleep(5) # ============================================= # starting producer # ============================================= self.log_message("starting producer in the background") kafka_system_test_utils.start_producer_performance(self.systemTestEnv, self.testcaseEnv, True) msgProducingFreeTimeSec = self.testcaseEnv.testcaseArgumentsDict["message_producing_free_time_sec"] self.anonLogger.info("sleeping for " + msgProducingFreeTimeSec + " sec to produce some messages") time.sleep(int(msgProducingFreeTimeSec)) # ============================================= # A while-loop to bounce leader as specified # by "num_iterations" in testcase_n_properties.json # ============================================= i = 1 numIterations = int(self.testcaseEnv.testcaseArgumentsDict["num_iteration"]) bouncedEntityDownTimeSec = 1 try: bouncedEntityDownTimeSec = int(self.testcaseEnv.testcaseArgumentsDict["bounced_entity_downtime_sec"]) except: pass while i <= numIterations: self.log_message("Iteration " + str(i) + " of " + str(numIterations)) # ============================================= # Bounce Migration Tool # ============================================= bounceMigrationTool = self.testcaseEnv.testcaseArgumentsDict["bounce_migration_tool"] self.log_message("bounce_migration_tool flag : " + bounceMigrationTool) if (bounceMigrationTool.lower() == "true"): clusterConfigList = self.systemTestEnv.clusterEntityConfigDictList migrationToolEntityIdList = system_test_utils.get_data_from_list_of_dicts( clusterConfigList, "role", "migration_tool", "entity_id") stoppedMigrationToolEntityId = migrationToolEntityIdList[0] migrationToolPPid = self.testcaseEnv.entityMigrationToolParentPidDict[stoppedMigrationToolEntityId] self.log_message("stopping migration tool : " + migrationToolPPid) kafka_system_test_utils.stop_remote_entity(self.systemTestEnv, stoppedMigrationToolEntityId, migrationToolPPid) self.anonLogger.info("sleeping for " + str(bouncedEntityDownTimeSec) + " sec") time.sleep(bouncedEntityDownTimeSec) # starting previously terminated broker self.log_message("starting the previously terminated migration tool") kafka_system_test_utils.start_migration_tool(self.systemTestEnv, self.testcaseEnv, stoppedMigrationToolEntityId) self.anonLogger.info("sleeping for 15s") time.sleep(15) i += 1 # while loop # ============================================= # tell producer to stop # ============================================= self.testcaseEnv.lock.acquire() self.testcaseEnv.userDefinedEnvVarDict["stopBackgroundProducer"] = True time.sleep(1) self.testcaseEnv.lock.release() time.sleep(1) # ============================================= # wait for producer thread's update of # "backgroundProducerStopped" to be "True" # ============================================= while 1: self.testcaseEnv.lock.acquire() self.logger.info("status of backgroundProducerStopped : [" + \ str(self.testcaseEnv.userDefinedEnvVarDict["backgroundProducerStopped"]) + "]", extra=self.d) if self.testcaseEnv.userDefinedEnvVarDict["backgroundProducerStopped"]: time.sleep(1) self.logger.info("all producer threads completed", extra=self.d) break time.sleep(1) self.testcaseEnv.lock.release() time.sleep(2) #print "\n\n#### sleeping for 30 min ...\n\n" #time.sleep(1800) # ============================================= # starting consumer # ============================================= self.log_message("starting consumer in the background") kafka_system_test_utils.start_console_consumer(self.systemTestEnv, self.testcaseEnv) self.anonLogger.info("sleeping for 20s") time.sleep(20) # ============================================= # this testcase is completed - stop all entities # ============================================= self.log_message("stopping all entities") for entityId, parentPid in self.testcaseEnv.entityBrokerParentPidDict.items(): kafka_system_test_utils.stop_remote_entity(self.systemTestEnv, entityId, parentPid) for entityId, parentPid in self.testcaseEnv.entityZkParentPidDict.items(): kafka_system_test_utils.stop_remote_entity(self.systemTestEnv, entityId, parentPid) # make sure all entities are stopped kafka_system_test_utils.ps_grep_terminate_running_entity(self.systemTestEnv) # ============================================= # collect logs from remote hosts # ============================================= kafka_system_test_utils.collect_logs_from_remote_hosts(self.systemTestEnv, self.testcaseEnv) # ============================================= # validate the data matched and checksum # ============================================= self.log_message("validating data matched") kafka_system_test_utils.validate_07_08_migrated_data_matched(self.systemTestEnv, self.testcaseEnv) kafka_system_test_utils.validate_broker_log_segment_checksum(self.systemTestEnv, self.testcaseEnv) # ============================================= # draw graphs # ============================================= metrics.draw_all_graphs(self.systemTestEnv.METRICS_PATHNAME, self.testcaseEnv, self.systemTestEnv.clusterEntityConfigDictList) # build dashboard, one for each role metrics.build_all_dashboards(self.systemTestEnv.METRICS_PATHNAME, self.testcaseEnv.testCaseDashboardsDir, self.systemTestEnv.clusterEntityConfigDictList) except Exception as e: self.log_message("Exception while running test {0}".format(e)) traceback.print_exc() finally: if not skipThisTestCase and not self.systemTestEnv.printTestDescriptionsOnly: self.log_message("stopping all entities - please wait ...") kafka_system_test_utils.stop_all_remote_running_processes(self.systemTestEnv, self.testcaseEnv)
class ReplicaBasicTest(ReplicationUtils, SetupUtils): testModuleAbsPathName = os.path.realpath(__file__) testSuiteAbsPathName = os.path.abspath( os.path.dirname(testModuleAbsPathName)) def __init__(self, systemTestEnv): # SystemTestEnv - provides cluster level environment settings # such as entity_id, hostname, kafka_home, java_home which # are available in a list of dictionary named # "clusterEntityConfigDictList" self.systemTestEnv = systemTestEnv super(ReplicaBasicTest, self).__init__(self) # dict to pass user-defined attributes to logger argument: "extra" d = {'name_of_class': self.__class__.__name__} def signal_handler(self, signal, frame): self.log_message("Interrupt detected - User pressed Ctrl+c") # perform the necessary cleanup here when user presses Ctrl+c and it may be product specific self.log_message("stopping all entities - please wait ...") kafka_system_test_utils.stop_all_remote_running_processes( self.systemTestEnv, self.testcaseEnv) sys.exit(1) def runTest(self): # ====================================================================== # get all testcase directories under this testsuite # ====================================================================== testCasePathNameList = system_test_utils.get_dir_paths_with_prefix( self.testSuiteAbsPathName, SystemTestEnv.SYSTEM_TEST_CASE_PREFIX) testCasePathNameList.sort() # ============================================================= # launch each testcase one by one: testcase_1, testcase_2, ... # ============================================================= for testCasePathName in testCasePathNameList: skipThisTestCase = False try: # ====================================================================== # A new instance of TestcaseEnv to keep track of this testcase's env vars # and initialize some env vars as testCasePathName is available now # ====================================================================== self.testcaseEnv = TestcaseEnv(self.systemTestEnv, self) self.testcaseEnv.testSuiteBaseDir = self.testSuiteAbsPathName self.testcaseEnv.initWithKnownTestCasePathName( testCasePathName) self.testcaseEnv.testcaseArgumentsDict = self.testcaseEnv.testcaseNonEntityDataDict[ "testcase_args"] # ====================================================================== # SKIP if this case is IN testcase_to_skip.json or NOT IN testcase_to_run.json # ====================================================================== testcaseDirName = self.testcaseEnv.testcaseResultsDict[ "_test_case_name"] if self.systemTestEnv.printTestDescriptionsOnly: self.testcaseEnv.printTestCaseDescription(testcaseDirName) continue elif self.systemTestEnv.isTestCaseToSkip( self.__class__.__name__, testcaseDirName): self.log_message("Skipping : " + testcaseDirName) skipThisTestCase = True continue else: self.testcaseEnv.printTestCaseDescription(testcaseDirName) system_test_utils.setup_remote_hosts_with_testcase_level_cluster_config( self.systemTestEnv, testCasePathName) # ============================================================================== # # ============================================================================== # # Product Specific Testing Code Starts Here: # # ============================================================================== # # ============================================================================== # # get optional testcase arguments logRetentionTest = "false" try: logRetentionTest = self.testcaseEnv.testcaseArgumentsDict[ "log_retention_test"] except: pass consumerMultiTopicsMode = "false" try: consumerMultiTopicsMode = self.testcaseEnv.testcaseArgumentsDict[ "consumer_multi_topics_mode"] except: pass # initialize self.testcaseEnv with user-defined environment variables (product specific) self.testcaseEnv.userDefinedEnvVarDict["zkConnectStr"] = "" self.testcaseEnv.userDefinedEnvVarDict[ "stopBackgroundProducer"] = False self.testcaseEnv.userDefinedEnvVarDict[ "backgroundProducerStopped"] = False self.testcaseEnv.userDefinedEnvVarDict[ "leaderElectionLatencyList"] = [] # initialize signal handler signal.signal(signal.SIGINT, self.signal_handler) # create "LOCAL" log directories for metrics, dashboards for each entity under this testcase # for collecting logs from remote machines kafka_system_test_utils.generate_testcase_log_dirs( self.systemTestEnv, self.testcaseEnv) # TestcaseEnv.testcaseConfigsList initialized by reading testcase properties file: # system_test/<suite_name>_testsuite/testcase_<n>/testcase_<n>_properties.json self.testcaseEnv.testcaseConfigsList = system_test_utils.get_json_list_data( self.testcaseEnv.testcasePropJsonPathName) # TestcaseEnv - initialize producer & consumer config / log file pathnames kafka_system_test_utils.init_entity_props( self.systemTestEnv, self.testcaseEnv) # clean up data directories specified in zookeeper.properties and kafka_server_<n>.properties kafka_system_test_utils.cleanup_data_at_remote_hosts( self.systemTestEnv, self.testcaseEnv) # generate remote hosts log/config dirs if not exist kafka_system_test_utils.generate_testcase_log_dirs_in_remote_hosts( self.systemTestEnv, self.testcaseEnv) # generate properties files for zookeeper, kafka, producer, consumer: # 1. copy system_test/<suite_name>_testsuite/config/*.properties to # system_test/<suite_name>_testsuite/testcase_<n>/config/ # 2. update all properties files in system_test/<suite_name>_testsuite/testcase_<n>/config # by overriding the settings specified in: # system_test/<suite_name>_testsuite/testcase_<n>/testcase_<n>_properties.json kafka_system_test_utils.generate_overriden_props_files( self.testSuiteAbsPathName, self.testcaseEnv, self.systemTestEnv) # ============================================= # preparing all entities to start the test # ============================================= self.log_message("starting zookeepers") kafka_system_test_utils.start_zookeepers( self.systemTestEnv, self.testcaseEnv) self.anonLogger.info("sleeping for 2s") time.sleep(2) self.log_message("starting brokers") kafka_system_test_utils.start_brokers(self.systemTestEnv, self.testcaseEnv) self.anonLogger.info("sleeping for 5s") time.sleep(5) self.log_message("creating topics") kafka_system_test_utils.create_topic(self.systemTestEnv, self.testcaseEnv) self.anonLogger.info("sleeping for 5s") time.sleep(5) # ============================================= # start ConsoleConsumer if this is a Log Retention test # ============================================= if logRetentionTest.lower() == "true": self.log_message("starting consumer in the background") kafka_system_test_utils.start_console_consumer( self.systemTestEnv, self.testcaseEnv) time.sleep(1) # ============================================= # starting producer # ============================================= self.log_message("starting producer in the background") kafka_system_test_utils.start_producer_performance( self.systemTestEnv, self.testcaseEnv, False) msgProducingFreeTimeSec = self.testcaseEnv.testcaseArgumentsDict[ "message_producing_free_time_sec"] self.anonLogger.info("sleeping for " + msgProducingFreeTimeSec + " sec to produce some messages") time.sleep(int(msgProducingFreeTimeSec)) # ============================================= # A while-loop to bounce leader as specified # by "num_iterations" in testcase_n_properties.json # ============================================= i = 1 numIterations = int( self.testcaseEnv.testcaseArgumentsDict["num_iteration"]) brokerType = self.testcaseEnv.testcaseArgumentsDict[ "broker_type"] bounceBrokerFlag = self.testcaseEnv.testcaseArgumentsDict[ "bounce_broker"] while i <= numIterations: self.log_message("Iteration " + str(i) + " of " + str(numIterations)) self.log_message("bounce_broker flag : " + bounceBrokerFlag) leaderDict = None controllerDict = None stoppedBrokerEntityId = "" # ============================================== # Find out the entity id for the stopping broker # ============================================== if brokerType == "leader" or brokerType == "follower": self.log_message("looking up leader") leaderDict = kafka_system_test_utils.get_leader_elected_log_line( self.systemTestEnv, self.testcaseEnv, self.leaderAttributesDict) # ========================== # leaderDict looks like this: # ========================== #{'entity_id': u'3', # 'partition': '0', # 'timestamp': 1345050255.8280001, # 'hostname': u'localhost', # 'topic': 'test_1', # 'brokerid': '3'} if brokerType == "leader": stoppedBrokerEntityId = leaderDict["entity_id"] self.log_message("Found leader with entity id: " + stoppedBrokerEntityId) else: # Follower self.log_message("looking up follower") # a list of all brokers brokerEntityIdList = system_test_utils.get_data_from_list_of_dicts( self.systemTestEnv.clusterEntityConfigDictList, "role", "broker", "entity_id") # we pick the first non-leader broker as the follower firstFollowerEntityId = None for brokerEntityId in brokerEntityIdList: if brokerEntityId != leaderDict["entity_id"]: firstFollowerEntityId = brokerEntityId break stoppedBrokerEntityId = firstFollowerEntityId self.log_message( "Found follower with entity id: " + stoppedBrokerEntityId) elif brokerType == "controller": self.log_message("looking up controller") controllerDict = kafka_system_test_utils.get_controller_attributes( self.systemTestEnv, self.testcaseEnv) # ========================== # controllerDict looks like this: # ========================== #{'entity_id': u'3', # 'timestamp': 1345050255.8280001, # 'hostname': u'localhost', # 'brokerid': '3'} stoppedBrokerEntityId = controllerDict["entity_id"] self.log_message("Found controller with entity id: " + stoppedBrokerEntityId) # ============================================= # Bounce the broker # ============================================= if bounceBrokerFlag.lower() == "true": if brokerType == "leader": # validate to see if leader election is successful self.log_message("validating leader election") kafka_system_test_utils.validate_leader_election_successful( self.testcaseEnv, leaderDict, self.testcaseEnv.validationStatusDict) # trigger leader re-election by stopping leader to get re-election latency reelectionLatency = kafka_system_test_utils.get_reelection_latency( self.systemTestEnv, self.testcaseEnv, leaderDict, self.leaderAttributesDict) latencyKeyName = "Leader Election Latency - iter " + str( i) + " brokerid " + leaderDict["brokerid"] self.testcaseEnv.validationStatusDict[ latencyKeyName] = str("{0:.2f}".format( reelectionLatency * 1000)) + " ms" self.testcaseEnv.userDefinedEnvVarDict[ "leaderElectionLatencyList"].append( "{0:.2f}".format(reelectionLatency * 1000)) elif brokerType == "follower": # stopping Follower self.log_message( "stopping follower with entity id: " + firstFollowerEntityId) kafka_system_test_utils.stop_remote_entity( self.systemTestEnv, firstFollowerEntityId, self.testcaseEnv.entityBrokerParentPidDict[ firstFollowerEntityId]) elif brokerType == "controller": # stopping Controller self.log_message("stopping controller : " + controllerDict["brokerid"]) kafka_system_test_utils.stop_remote_entity( self.systemTestEnv, controllerDict["entity_id"], self.testcaseEnv.entityBrokerParentPidDict[ controllerDict["entity_id"]]) brokerDownTimeInSec = 5 try: brokerDownTimeInSec = int( self.testcaseEnv.testcaseArgumentsDict[ "broker_down_time_in_sec"]) except: pass # take default time.sleep(brokerDownTimeInSec) # starting previously terminated broker self.log_message( "starting the previously terminated broker") kafka_system_test_utils.start_entity_in_background( self.systemTestEnv, self.testcaseEnv, stoppedBrokerEntityId) else: # GC Pause simulation pauseTime = None try: hostname = leaderDict["hostname"] pauseTime = self.testcaseEnv.testcaseArgumentsDict[ "pause_time_in_seconds"] parentPid = self.testcaseEnv.entityBrokerParentPidDict[ leaderDict["entity_id"]] pidStack = system_test_utils.get_remote_child_processes( hostname, parentPid) system_test_utils.simulate_garbage_collection_pause_in_remote_process( hostname, pidStack, pauseTime) except: pass self.anonLogger.info("sleeping for 15s") time.sleep(15) i += 1 # while loop # update Leader Election Latency MIN/MAX to testcaseEnv.validationStatusDict self.testcaseEnv.validationStatusDict[ "Leader Election Latency MIN"] = None try: self.testcaseEnv.validationStatusDict["Leader Election Latency MIN"] = \ min(self.testcaseEnv.userDefinedEnvVarDict["leaderElectionLatencyList"]) except: pass self.testcaseEnv.validationStatusDict[ "Leader Election Latency MAX"] = None try: self.testcaseEnv.validationStatusDict["Leader Election Latency MAX"] = \ max(self.testcaseEnv.userDefinedEnvVarDict["leaderElectionLatencyList"]) except: pass # ============================================= # tell producer to stop # ============================================= self.testcaseEnv.lock.acquire() self.testcaseEnv.userDefinedEnvVarDict[ "stopBackgroundProducer"] = True time.sleep(1) self.testcaseEnv.lock.release() time.sleep(1) # ============================================= # wait for producer thread's update of # "backgroundProducerStopped" to be "True" # ============================================= while 1: self.testcaseEnv.lock.acquire() self.logger.info("status of backgroundProducerStopped : [" + \ str(self.testcaseEnv.userDefinedEnvVarDict["backgroundProducerStopped"]) + "]", extra=self.d) if self.testcaseEnv.userDefinedEnvVarDict[ "backgroundProducerStopped"]: time.sleep(1) self.logger.info("all producer threads completed", extra=self.d) break time.sleep(1) self.testcaseEnv.lock.release() time.sleep(2) # ============================================= # collect logs from remote hosts to find the # minimum common offset of a certain log # segment file among all replicas # ============================================= minStartingOffsetDict = None if logRetentionTest.lower() == "true": self.anonLogger.info( "sleeping for 60s to make sure log truncation is completed" ) time.sleep(60) kafka_system_test_utils.collect_logs_from_remote_hosts( self.systemTestEnv, self.testcaseEnv) minStartingOffsetDict = kafka_system_test_utils.getMinCommonStartingOffset( self.systemTestEnv, self.testcaseEnv) print pprint.pprint(minStartingOffsetDict) # ============================================= # starting debug consumer # ============================================= if consumerMultiTopicsMode.lower() == "false": self.log_message( "starting debug consumers in the background") kafka_system_test_utils.start_simple_consumer( self.systemTestEnv, self.testcaseEnv, minStartingOffsetDict) self.anonLogger.info("sleeping for 10s") time.sleep(10) # ============================================= # starting console consumer # ============================================= if logRetentionTest.lower() == "false": self.log_message("starting consumer in the background") kafka_system_test_utils.start_console_consumer( self.systemTestEnv, self.testcaseEnv) time.sleep(1) # ============================================= # this testcase is completed - stop all entities # ============================================= self.log_message("stopping all entities") for entityId, parentPid in self.testcaseEnv.entityBrokerParentPidDict.items( ): kafka_system_test_utils.stop_remote_entity( self.systemTestEnv, entityId, parentPid) for entityId, parentPid in self.testcaseEnv.entityZkParentPidDict.items( ): kafka_system_test_utils.stop_remote_entity( self.systemTestEnv, entityId, parentPid) # make sure all entities are stopped kafka_system_test_utils.ps_grep_terminate_running_entity( self.systemTestEnv) # ============================================= # collect logs from remote hosts # ============================================= kafka_system_test_utils.collect_logs_from_remote_hosts( self.systemTestEnv, self.testcaseEnv) # ============================================= # validate the data matched and checksum # ============================================= self.log_message("validating data matched") if logRetentionTest.lower() == "true": kafka_system_test_utils.validate_simple_consumer_data_matched_across_replicas( self.systemTestEnv, self.testcaseEnv) kafka_system_test_utils.validate_data_matched( self.systemTestEnv, self.testcaseEnv) elif consumerMultiTopicsMode.lower() == "true": kafka_system_test_utils.validate_broker_log_segment_checksum( self.systemTestEnv, self.testcaseEnv) kafka_system_test_utils.validate_data_matched_in_multi_topics_from_single_consumer_producer( self.systemTestEnv, self.testcaseEnv) else: kafka_system_test_utils.validate_simple_consumer_data_matched_across_replicas( self.systemTestEnv, self.testcaseEnv) kafka_system_test_utils.validate_broker_log_segment_checksum( self.systemTestEnv, self.testcaseEnv) kafka_system_test_utils.validate_data_matched( self.systemTestEnv, self.testcaseEnv) # ============================================= # draw graphs # ============================================= metrics.draw_all_graphs( self.systemTestEnv.METRICS_PATHNAME, self.testcaseEnv, self.systemTestEnv.clusterEntityConfigDictList) # build dashboard, one for each role metrics.build_all_dashboards( self.systemTestEnv.METRICS_PATHNAME, self.testcaseEnv.testCaseDashboardsDir, self.systemTestEnv.clusterEntityConfigDictList) except Exception as e: self.log_message("Exception while running test {0}".format(e)) traceback.print_exc() finally: if not skipThisTestCase and not self.systemTestEnv.printTestDescriptionsOnly: self.log_message("stopping all entities - please wait ...") kafka_system_test_utils.stop_all_remote_running_processes( self.systemTestEnv, self.testcaseEnv)
class ReplicaBasicTest(ReplicationUtils, SetupUtils): testModuleAbsPathName = os.path.realpath(__file__) testSuiteAbsPathName = os.path.abspath(os.path.dirname(testModuleAbsPathName)) def __init__(self, systemTestEnv): # SystemTestEnv - provides cluster level environment settings # such as entity_id, hostname, kafka_home, java_home which # are available in a list of dictionary named # "clusterEntityConfigDictList" self.systemTestEnv = systemTestEnv super(ReplicaBasicTest, self).__init__(self) # dict to pass user-defined attributes to logger argument: "extra" d = {'name_of_class': self.__class__.__name__} def signal_handler(self, signal, frame): self.log_message("Interrupt detected - User pressed Ctrl+c") # perform the necessary cleanup here when user presses Ctrl+c and it may be product specific self.log_message("stopping all entities - please wait ...") kafka_system_test_utils.stop_all_remote_running_processes(self.systemTestEnv, self.testcaseEnv) sys.exit(1) def runTest(self): # ====================================================================== # get all testcase directories under this testsuite # ====================================================================== testCasePathNameList = system_test_utils.get_dir_paths_with_prefix( self.testSuiteAbsPathName, SystemTestEnv.SYSTEM_TEST_CASE_PREFIX) testCasePathNameList.sort() replicationUtils = ReplicationUtils(self) # ============================================================= # launch each testcase one by one: testcase_1, testcase_2, ... # ============================================================= for testCasePathName in testCasePathNameList: skipThisTestCase = False try: # ====================================================================== # A new instance of TestcaseEnv to keep track of this testcase's env vars # and initialize some env vars as testCasePathName is available now # ====================================================================== self.testcaseEnv = TestcaseEnv(self.systemTestEnv, self) self.testcaseEnv.testSuiteBaseDir = self.testSuiteAbsPathName self.testcaseEnv.initWithKnownTestCasePathName(testCasePathName) self.testcaseEnv.testcaseArgumentsDict = self.testcaseEnv.testcaseNonEntityDataDict["testcase_args"] # ====================================================================== # SKIP if this case is IN testcase_to_skip.json or NOT IN testcase_to_run.json # ====================================================================== testcaseDirName = self.testcaseEnv.testcaseResultsDict["_test_case_name"] if self.systemTestEnv.printTestDescriptionsOnly: self.testcaseEnv.printTestCaseDescription(testcaseDirName) continue elif self.systemTestEnv.isTestCaseToSkip(self.__class__.__name__, testcaseDirName): self.log_message("Skipping : " + testcaseDirName) skipThisTestCase = True continue else: self.testcaseEnv.printTestCaseDescription(testcaseDirName) system_test_utils.setup_remote_hosts_with_testcase_level_cluster_config(self.systemTestEnv, testCasePathName) # ============================================================================== # # ============================================================================== # # Product Specific Testing Code Starts Here: # # ============================================================================== # # ============================================================================== # # get optional testcase arguments logRetentionTest = "false" try: logRetentionTest = self.testcaseEnv.testcaseArgumentsDict["log_retention_test"] except: pass consumerMultiTopicsMode = "false" try: consumerMultiTopicsMode = self.testcaseEnv.testcaseArgumentsDict["consumer_multi_topics_mode"] except: pass autoCreateTopic = "false" try: autoCreateTopic = self.testcaseEnv.testcaseArgumentsDict["auto_create_topic"] except: pass # initialize self.testcaseEnv with user-defined environment variables (product specific) self.testcaseEnv.userDefinedEnvVarDict["zkConnectStr"] = "" self.testcaseEnv.userDefinedEnvVarDict["stopBackgroundProducer"] = False self.testcaseEnv.userDefinedEnvVarDict["backgroundProducerStopped"] = False self.testcaseEnv.userDefinedEnvVarDict["leaderElectionLatencyList"] = [] # initialize signal handler signal.signal(signal.SIGINT, self.signal_handler) # TestcaseEnv.testcaseConfigsList initialized by reading testcase properties file: # system_test/<suite_name>_testsuite/testcase_<n>/testcase_<n>_properties.json self.testcaseEnv.testcaseConfigsList = system_test_utils.get_json_list_data( self.testcaseEnv.testcasePropJsonPathName) # clean up data directories specified in zookeeper.properties and kafka_server_<n>.properties kafka_system_test_utils.cleanup_data_at_remote_hosts(self.systemTestEnv, self.testcaseEnv) # create "LOCAL" log directories for metrics, dashboards for each entity under this testcase # for collecting logs from remote machines kafka_system_test_utils.generate_testcase_log_dirs(self.systemTestEnv, self.testcaseEnv) # TestcaseEnv - initialize producer & consumer config / log file pathnames kafka_system_test_utils.init_entity_props(self.systemTestEnv, self.testcaseEnv) # generate remote hosts log/config dirs if not exist kafka_system_test_utils.generate_testcase_log_dirs_in_remote_hosts(self.systemTestEnv, self.testcaseEnv) # generate properties files for zookeeper, kafka, producer, consumer: # 1. copy system_test/<suite_name>_testsuite/config/*.properties to # system_test/<suite_name>_testsuite/testcase_<n>/config/ # 2. update all properties files in system_test/<suite_name>_testsuite/testcase_<n>/config # by overriding the settings specified in: # system_test/<suite_name>_testsuite/testcase_<n>/testcase_<n>_properties.json kafka_system_test_utils.generate_overriden_props_files(self.testSuiteAbsPathName, self.testcaseEnv, self.systemTestEnv) # ============================================= # preparing all entities to start the test # ============================================= self.log_message("starting zookeepers") kafka_system_test_utils.start_zookeepers(self.systemTestEnv, self.testcaseEnv) self.anonLogger.info("sleeping for 2s") time.sleep(2) self.log_message("starting brokers") kafka_system_test_utils.start_brokers(self.systemTestEnv, self.testcaseEnv) self.anonLogger.info("sleeping for 5s") time.sleep(5) if autoCreateTopic.lower() == "false": self.log_message("creating topics") kafka_system_test_utils.create_topic(self.systemTestEnv, self.testcaseEnv) self.anonLogger.info("sleeping for 5s") time.sleep(5) # ============================================= # start ConsoleConsumer if this is a Log Retention test # ============================================= if logRetentionTest.lower() == "true": self.log_message("starting consumer in the background") kafka_system_test_utils.start_console_consumer(self.systemTestEnv, self.testcaseEnv) time.sleep(1) # ============================================= # starting producer # ============================================= self.log_message("starting producer in the background") kafka_system_test_utils.start_producer_performance(self.systemTestEnv, self.testcaseEnv, False) msgProducingFreeTimeSec = self.testcaseEnv.testcaseArgumentsDict["message_producing_free_time_sec"] self.anonLogger.info("sleeping for " + msgProducingFreeTimeSec + " sec to produce some messages") time.sleep(int(msgProducingFreeTimeSec)) # ============================================= # A while-loop to bounce leader as specified # by "num_iterations" in testcase_n_properties.json # ============================================= i = 1 numIterations = int(self.testcaseEnv.testcaseArgumentsDict["num_iteration"]) brokerType = self.testcaseEnv.testcaseArgumentsDict["broker_type"] bounceBrokerFlag = self.testcaseEnv.testcaseArgumentsDict["bounce_broker"] while i <= numIterations: self.log_message("Iteration " + str(i) + " of " + str(numIterations)) self.log_message("bounce_broker flag : " + bounceBrokerFlag) leaderDict = None controllerDict = None stoppedBrokerEntityId = "" # ============================================== # Find out the entity id for the stopping broker # ============================================== if brokerType == "leader" or brokerType == "follower": self.log_message("looking up leader") leaderDict = kafka_system_test_utils.get_leader_elected_log_line(self.systemTestEnv, self.testcaseEnv, self.leaderAttributesDict) # ========================== # leaderDict looks like this: # ========================== #{'entity_id': u'3', # 'partition': '0', # 'timestamp': 1345050255.8280001, # 'hostname': u'localhost', # 'topic': 'test_1', # 'brokerid': '3'} if brokerType == "leader": stoppedBrokerEntityId = leaderDict["entity_id"] self.log_message("Found leader with entity id: " + stoppedBrokerEntityId) else: # Follower self.log_message("looking up follower") # a list of all brokers brokerEntityIdList = system_test_utils.get_data_from_list_of_dicts(self.systemTestEnv.clusterEntityConfigDictList, "role", "broker", "entity_id") # we pick the first non-leader broker as the follower firstFollowerEntityId = None for brokerEntityId in brokerEntityIdList: if brokerEntityId != leaderDict["entity_id"]: firstFollowerEntityId = brokerEntityId break stoppedBrokerEntityId = firstFollowerEntityId self.log_message("Found follower with entity id: " + stoppedBrokerEntityId) elif brokerType == "controller": self.log_message("looking up controller") controllerDict = kafka_system_test_utils.get_controller_attributes(self.systemTestEnv, self.testcaseEnv) # ========================== # controllerDict looks like this: # ========================== #{'entity_id': u'3', # 'timestamp': 1345050255.8280001, # 'hostname': u'localhost', # 'brokerid': '3'} stoppedBrokerEntityId = controllerDict["entity_id"] self.log_message("Found controller with entity id: " + stoppedBrokerEntityId) # ============================================= # Bounce the broker # ============================================= if bounceBrokerFlag.lower() == "true": if brokerType == "leader": # validate to see if leader election is successful self.log_message("validating leader election") kafka_system_test_utils.validate_leader_election_successful(self.testcaseEnv, leaderDict, self.testcaseEnv.validationStatusDict) # trigger leader re-election by stopping leader to get re-election latency reelectionLatency = kafka_system_test_utils.get_reelection_latency(self.systemTestEnv, self.testcaseEnv, leaderDict, self.leaderAttributesDict) latencyKeyName = "Leader Election Latency - iter " + str(i) + " brokerid " + leaderDict["brokerid"] self.testcaseEnv.validationStatusDict[latencyKeyName] = str("{0:.2f}".format(reelectionLatency * 1000)) + " ms" self.testcaseEnv.userDefinedEnvVarDict["leaderElectionLatencyList"].append("{0:.2f}".format(reelectionLatency * 1000)) elif brokerType == "follower": # stopping Follower self.log_message("stopping follower with entity id: " + firstFollowerEntityId) kafka_system_test_utils.stop_remote_entity(self.systemTestEnv, firstFollowerEntityId, self.testcaseEnv.entityBrokerParentPidDict[firstFollowerEntityId]) elif brokerType == "controller": # stopping Controller self.log_message("stopping controller : " + controllerDict["brokerid"]) kafka_system_test_utils.stop_remote_entity(self.systemTestEnv, controllerDict["entity_id"], self.testcaseEnv.entityBrokerParentPidDict[controllerDict["entity_id"]]) brokerDownTimeInSec = 5 try: brokerDownTimeInSec = int(self.testcaseEnv.testcaseArgumentsDict["broker_down_time_in_sec"]) except: pass # take default time.sleep(brokerDownTimeInSec) # starting previously terminated broker self.log_message("starting the previously terminated broker") kafka_system_test_utils.start_entity_in_background(self.systemTestEnv, self.testcaseEnv, stoppedBrokerEntityId) else: # GC Pause simulation pauseTime = None try: hostname = leaderDict["hostname"] pauseTime = self.testcaseEnv.testcaseArgumentsDict["pause_time_in_seconds"] parentPid = self.testcaseEnv.entityBrokerParentPidDict[leaderDict["entity_id"]] pidStack = system_test_utils.get_remote_child_processes(hostname, parentPid) system_test_utils.simulate_garbage_collection_pause_in_remote_process(hostname, pidStack, pauseTime) except: pass self.anonLogger.info("sleeping for 15s") time.sleep(15) i += 1 # while loop # update Leader Election Latency MIN/MAX to testcaseEnv.validationStatusDict self.testcaseEnv.validationStatusDict["Leader Election Latency MIN"] = None try: self.testcaseEnv.validationStatusDict["Leader Election Latency MIN"] = \ min(self.testcaseEnv.userDefinedEnvVarDict["leaderElectionLatencyList"]) except: pass self.testcaseEnv.validationStatusDict["Leader Election Latency MAX"] = None try: self.testcaseEnv.validationStatusDict["Leader Election Latency MAX"] = \ max(self.testcaseEnv.userDefinedEnvVarDict["leaderElectionLatencyList"]) except: pass # ============================================= # tell producer to stop # ============================================= self.testcaseEnv.lock.acquire() self.testcaseEnv.userDefinedEnvVarDict["stopBackgroundProducer"] = True time.sleep(1) self.testcaseEnv.lock.release() time.sleep(1) # ============================================= # wait for producer thread's update of # "backgroundProducerStopped" to be "True" # ============================================= while 1: self.testcaseEnv.lock.acquire() self.logger.info("status of backgroundProducerStopped : [" + \ str(self.testcaseEnv.userDefinedEnvVarDict["backgroundProducerStopped"]) + "]", extra=self.d) if self.testcaseEnv.userDefinedEnvVarDict["backgroundProducerStopped"]: time.sleep(1) self.logger.info("all producer threads completed", extra=self.d) break time.sleep(1) self.testcaseEnv.lock.release() time.sleep(2) # ============================================= # collect logs from remote hosts to find the # minimum common offset of a certain log # segment file among all replicas # ============================================= minStartingOffsetDict = None if logRetentionTest.lower() == "true": self.anonLogger.info("sleeping for 60s to make sure log truncation is completed") time.sleep(60) kafka_system_test_utils.collect_logs_from_remote_hosts(self.systemTestEnv, self.testcaseEnv) minStartingOffsetDict = kafka_system_test_utils.getMinCommonStartingOffset(self.systemTestEnv, self.testcaseEnv) print pprint.pprint(minStartingOffsetDict) # ============================================= # starting debug consumer # ============================================= if consumerMultiTopicsMode.lower() == "false": self.log_message("starting debug consumers in the background") kafka_system_test_utils.start_simple_consumer(self.systemTestEnv, self.testcaseEnv, minStartingOffsetDict) self.anonLogger.info("sleeping for 10s") time.sleep(10) # ============================================= # starting console consumer # ============================================= if logRetentionTest.lower() == "false": self.log_message("starting consumer in the background") kafka_system_test_utils.start_console_consumer(self.systemTestEnv, self.testcaseEnv) time.sleep(10) # ============================================= # this testcase is completed - stop all entities # ============================================= self.log_message("stopping all entities") for entityId, parentPid in self.testcaseEnv.entityBrokerParentPidDict.items(): kafka_system_test_utils.stop_remote_entity(self.systemTestEnv, entityId, parentPid) for entityId, parentPid in self.testcaseEnv.entityZkParentPidDict.items(): kafka_system_test_utils.stop_remote_entity(self.systemTestEnv, entityId, parentPid) # make sure all entities are stopped kafka_system_test_utils.ps_grep_terminate_running_entity(self.systemTestEnv) # ============================================= # collect logs from remote hosts # ============================================= kafka_system_test_utils.collect_logs_from_remote_hosts(self.systemTestEnv, self.testcaseEnv) # ============================================= # validate the data matched and checksum # ============================================= self.log_message("validating data matched") if logRetentionTest.lower() == "true": kafka_system_test_utils.validate_data_matched(self.systemTestEnv, self.testcaseEnv, replicationUtils) elif consumerMultiTopicsMode.lower() == "true": kafka_system_test_utils.validate_data_matched_in_multi_topics_from_single_consumer_producer( self.systemTestEnv, self.testcaseEnv, replicationUtils) else: kafka_system_test_utils.validate_simple_consumer_data_matched_across_replicas(self.systemTestEnv, self.testcaseEnv) kafka_system_test_utils.validate_broker_log_segment_checksum(self.systemTestEnv, self.testcaseEnv) kafka_system_test_utils.validate_data_matched(self.systemTestEnv, self.testcaseEnv, replicationUtils) kafka_system_test_utils.validate_index_log(self.systemTestEnv, self.testcaseEnv) # ============================================= # draw graphs # ============================================= metrics.draw_all_graphs(self.systemTestEnv.METRICS_PATHNAME, self.testcaseEnv, self.systemTestEnv.clusterEntityConfigDictList) # build dashboard, one for each role metrics.build_all_dashboards(self.systemTestEnv.METRICS_PATHNAME, self.testcaseEnv.testCaseDashboardsDir, self.systemTestEnv.clusterEntityConfigDictList) except Exception as e: self.log_message("Exception while running test {0}".format(e)) traceback.print_exc() finally: if not skipThisTestCase and not self.systemTestEnv.printTestDescriptionsOnly: self.log_message("stopping all entities - please wait ...") kafka_system_test_utils.stop_all_remote_running_processes(self.systemTestEnv, self.testcaseEnv)
class OffsetManagementTest(ReplicationUtils, SetupUtils): testModuleAbsPathName = os.path.realpath(__file__) testSuiteAbsPathName = os.path.abspath( os.path.dirname(testModuleAbsPathName)) def __init__(self, systemTestEnv): # SystemTestEnv - provides cluster level environment settings # such as entity_id, hostname, kafka_home, java_home which # are available in a list of dictionary named # "clusterEntityConfigDictList" self.systemTestEnv = systemTestEnv super(OffsetManagementTest, self).__init__(self) # dict to pass user-defined attributes to logger argument: "extra" d = {'name_of_class': self.__class__.__name__} def signal_handler(self, signal, frame): self.log_message("Interrupt detected - User pressed Ctrl+c") # perform the necessary cleanup here when user presses Ctrl+c and it may be product specific self.log_message("stopping all entities - please wait ...") kafka_system_test_utils.stop_all_remote_running_processes( self.systemTestEnv, self.testcaseEnv) sys.exit(1) def runTest(self): # ====================================================================== # get all testcase directories under this testsuite # ====================================================================== testCasePathNameList = system_test_utils.get_dir_paths_with_prefix( self.testSuiteAbsPathName, SystemTestEnv.SYSTEM_TEST_CASE_PREFIX) testCasePathNameList.sort() replicationUtils = ReplicationUtils(self) # ============================================================= # launch each testcase one by one: testcase_1, testcase_2, ... # ============================================================= for testCasePathName in testCasePathNameList: skipThisTestCase = False try: # ====================================================================== # A new instance of TestcaseEnv to keep track of this testcase's env vars # and initialize some env vars as testCasePathName is available now # ====================================================================== self.testcaseEnv = TestcaseEnv(self.systemTestEnv, self) self.testcaseEnv.testSuiteBaseDir = self.testSuiteAbsPathName self.testcaseEnv.initWithKnownTestCasePathName( testCasePathName) self.testcaseEnv.testcaseArgumentsDict = self.testcaseEnv.testcaseNonEntityDataDict[ "testcase_args"] # ====================================================================== # SKIP if this case is IN testcase_to_skip.json or NOT IN testcase_to_run.json # ====================================================================== testcaseDirName = self.testcaseEnv.testcaseResultsDict[ "_test_case_name"] if self.systemTestEnv.printTestDescriptionsOnly: self.testcaseEnv.printTestCaseDescription(testcaseDirName) continue elif self.systemTestEnv.isTestCaseToSkip( self.__class__.__name__, testcaseDirName): self.log_message("Skipping : " + testcaseDirName) skipThisTestCase = True continue else: self.testcaseEnv.printTestCaseDescription(testcaseDirName) system_test_utils.setup_remote_hosts_with_testcase_level_cluster_config( self.systemTestEnv, testCasePathName) # ============================================================================== # # ============================================================================== # # Product Specific Testing Code Starts Here: # # ============================================================================== # # ============================================================================== # # initialize self.testcaseEnv with user-defined environment variables (product specific) self.testcaseEnv.userDefinedEnvVarDict[ "stopBackgroundProducer"] = False self.testcaseEnv.userDefinedEnvVarDict[ "backgroundProducerStopped"] = False # initialize signal handler signal.signal(signal.SIGINT, self.signal_handler) # TestcaseEnv.testcaseConfigsList initialized by reading testcase properties file: # system_test/<suite_name>_testsuite/testcase_<n>/testcase_<n>_properties.json self.testcaseEnv.testcaseConfigsList = system_test_utils.get_json_list_data( self.testcaseEnv.testcasePropJsonPathName) # clean up data directories specified in zookeeper.properties and kafka_server_<n>.properties kafka_system_test_utils.cleanup_data_at_remote_hosts( self.systemTestEnv, self.testcaseEnv) # create "LOCAL" log directories for metrics, dashboards for each entity under this testcase # for collecting logs from remote machines kafka_system_test_utils.generate_testcase_log_dirs( self.systemTestEnv, self.testcaseEnv) # TestcaseEnv - initialize producer & consumer config / log file pathnames kafka_system_test_utils.init_entity_props( self.systemTestEnv, self.testcaseEnv) # generate remote hosts log/config dirs if not exist kafka_system_test_utils.generate_testcase_log_dirs_in_remote_hosts( self.systemTestEnv, self.testcaseEnv) # generate properties files for zookeeper, kafka, producer, and consumer: # 1. copy system_test/<suite_name>_testsuite/config/*.properties to # system_test/<suite_name>_testsuite/testcase_<n>/config/ # 2. update all properties files in system_test/<suite_name>_testsuite/testcase_<n>/config # by overriding the settings specified in: # system_test/<suite_name>_testsuite/testcase_<n>/testcase_<n>_properties.json kafka_system_test_utils.generate_overriden_props_files( self.testSuiteAbsPathName, self.testcaseEnv, self.systemTestEnv) # ============================================= # preparing all entities to start the test # ============================================= self.log_message("starting zookeepers") kafka_system_test_utils.start_zookeepers( self.systemTestEnv, self.testcaseEnv) self.anonLogger.info("sleeping for 2s") time.sleep(2) self.log_message("starting brokers") kafka_system_test_utils.start_brokers(self.systemTestEnv, self.testcaseEnv) self.anonLogger.info("sleeping for 5s") time.sleep(5) self.log_message("creating offset topic") kafka_system_test_utils.create_topic(self.systemTestEnv, self.testcaseEnv, "__consumer_offsets", 3, 2) self.anonLogger.info("sleeping for 5s") time.sleep(5) # ============================================= # starting producer # ============================================= self.log_message("starting producer in the background") kafka_system_test_utils.start_producer_performance( self.systemTestEnv, self.testcaseEnv, False) msgProducingFreeTimeSec = self.testcaseEnv.testcaseArgumentsDict[ "message_producing_free_time_sec"] self.anonLogger.info("sleeping for " + msgProducingFreeTimeSec + " sec to produce some messages") time.sleep(int(msgProducingFreeTimeSec)) kafka_system_test_utils.start_console_consumers( self.systemTestEnv, self.testcaseEnv) kafka_system_test_utils.get_leader_for(self.systemTestEnv, self.testcaseEnv, "__consumer_offsets", 0) # ============================================= # A while-loop to bounce consumers as specified # by "num_iterations" in testcase_n_properties.json # ============================================= i = 1 numIterations = int( self.testcaseEnv.testcaseArgumentsDict["num_iteration"]) bouncedEntityDownTimeSec = 10 try: bouncedEntityDownTimeSec = int( self.testcaseEnv. testcaseArgumentsDict["bounced_entity_downtime_sec"]) except: pass # group1 -> offsets partition 0 // has one consumer; eid: 6 # group2 -> offsets partition 1 // has four consumers; eid: 7, 8, 9, 10 offsets_0_leader_entity = kafka_system_test_utils.get_leader_for( self.systemTestEnv, self.testcaseEnv, "__consumer_offsets", 0) offsets_1_leader_entity = kafka_system_test_utils.get_leader_for( self.systemTestEnv, self.testcaseEnv, "__consumer_offsets", 1) while i <= numIterations: self.log_message("Iteration " + str(i) + " of " + str(numIterations)) kafka_system_test_utils.stop_remote_entity( self.systemTestEnv, offsets_0_leader_entity, self.testcaseEnv. entityBrokerParentPidDict[offsets_0_leader_entity]) kafka_system_test_utils.stop_remote_entity( self.systemTestEnv, offsets_1_leader_entity, self.testcaseEnv. entityBrokerParentPidDict[offsets_1_leader_entity]) # ============================================= # Bounce consumers if specified in testcase config # ============================================= bounceConsumers = self.testcaseEnv.testcaseArgumentsDict[ "bounce_consumers"] self.log_message("bounce_consumers flag : " + bounceConsumers) if (bounceConsumers.lower() == "true"): clusterConfigList = self.systemTestEnv.clusterEntityConfigDictList consumerEntityIdList = system_test_utils.get_data_from_list_of_dicts( clusterConfigList, "role", "console_consumer", "entity_id") for stoppedConsumerEntityId in consumerEntityIdList: consumerPPID = self.testcaseEnv.entityConsoleConsumerParentPidDict[ stoppedConsumerEntityId] self.log_message("stopping consumer: " + consumerPPID) kafka_system_test_utils.stop_remote_entity( self.systemTestEnv, stoppedConsumerEntityId, consumerPPID) self.anonLogger.info("sleeping for " + str(bouncedEntityDownTimeSec) + " sec") time.sleep(bouncedEntityDownTimeSec) # leaders would have changed during the above bounce. self.log_message( "starting the previously terminated consumers.") for stoppedConsumerEntityId in consumerEntityIdList: # starting previously terminated consumer kafka_system_test_utils.start_console_consumers( self.systemTestEnv, self.testcaseEnv, stoppedConsumerEntityId) self.log_message( "starting the previously terminated brokers") kafka_system_test_utils.start_entity_in_background( self.systemTestEnv, self.testcaseEnv, offsets_0_leader_entity) kafka_system_test_utils.start_entity_in_background( self.systemTestEnv, self.testcaseEnv, offsets_1_leader_entity) self.anonLogger.info("sleeping for 15s") time.sleep(15) i += 1 # while loop # ============================================= # tell producer to stop # ============================================= self.testcaseEnv.lock.acquire() self.testcaseEnv.userDefinedEnvVarDict[ "stopBackgroundProducer"] = True time.sleep(1) self.testcaseEnv.lock.release() time.sleep(1) # ============================================= # wait for producer thread's update of # "backgroundProducerStopped" to be "True" # ============================================= while 1: self.testcaseEnv.lock.acquire() self.logger.info("status of backgroundProducerStopped : [" + \ str(self.testcaseEnv.userDefinedEnvVarDict["backgroundProducerStopped"]) + "]", extra=self.d) if self.testcaseEnv.userDefinedEnvVarDict[ "backgroundProducerStopped"]: time.sleep(1) self.logger.info("all producer threads completed", extra=self.d) break time.sleep(1) self.testcaseEnv.lock.release() time.sleep(2) self.anonLogger.info("sleeping for 15s") time.sleep(15) # ============================================= # this testcase is completed - stop all entities # ============================================= self.log_message("stopping all entities") for entityId, parentPid in self.testcaseEnv.entityBrokerParentPidDict.items( ): kafka_system_test_utils.stop_remote_entity( self.systemTestEnv, entityId, parentPid) for entityId, parentPid in self.testcaseEnv.entityZkParentPidDict.items( ): kafka_system_test_utils.stop_remote_entity( self.systemTestEnv, entityId, parentPid) # make sure all entities are stopped kafka_system_test_utils.ps_grep_terminate_running_entity( self.systemTestEnv) # ============================================= # collect logs from remote hosts # ============================================= kafka_system_test_utils.collect_logs_from_remote_hosts( self.systemTestEnv, self.testcaseEnv) # ============================================= # validate the data matched and checksum # ============================================= self.log_message("validating data matched") kafka_system_test_utils.validate_data_matched_in_multi_topics_from_single_consumer_producer( self.systemTestEnv, self.testcaseEnv, replicationUtils) except Exception as e: self.log_message("Exception while running test {0}".format(e)) traceback.print_exc() self.testcaseEnv.validationStatusDict[ "Test completed"] = "FAILED" finally: if not skipThisTestCase and not self.systemTestEnv.printTestDescriptionsOnly: self.log_message("stopping all entities - please wait ...") kafka_system_test_utils.stop_all_remote_running_processes( self.systemTestEnv, self.testcaseEnv)
class MirrorMakerTest(ReplicationUtils, SetupUtils): testModuleAbsPathName = os.path.realpath(__file__) testSuiteAbsPathName = os.path.abspath( os.path.dirname(testModuleAbsPathName)) def __init__(self, systemTestEnv): # SystemTestEnv - provides cluster level environment settings # such as entity_id, hostname, kafka_home, java_home which # are available in a list of dictionary named # "clusterEntityConfigDictList" self.systemTestEnv = systemTestEnv super(MirrorMakerTest, self).__init__(self) # dict to pass user-defined attributes to logger argument: "extra" d = {'name_of_class': self.__class__.__name__} def signal_handler(self, signal, frame): self.log_message("Interrupt detected - User pressed Ctrl+c") # perform the necessary cleanup here when user presses Ctrl+c and it may be product specific self.log_message("stopping all entities - please wait ...") kafka_system_test_utils.stop_all_remote_running_processes( self.systemTestEnv, self.testcaseEnv) sys.exit(1) def runTest(self): # ====================================================================== # get all testcase directories under this testsuite # ====================================================================== testCasePathNameList = system_test_utils.get_dir_paths_with_prefix( self.testSuiteAbsPathName, SystemTestEnv.SYSTEM_TEST_CASE_PREFIX) testCasePathNameList.sort() replicationUtils = ReplicationUtils(self) # ============================================================= # launch each testcase one by one: testcase_1, testcase_2, ... # ============================================================= for testCasePathName in testCasePathNameList: skipThisTestCase = False try: # ====================================================================== # A new instance of TestcaseEnv to keep track of this testcase's env vars # and initialize some env vars as testCasePathName is available now # ====================================================================== self.testcaseEnv = TestcaseEnv(self.systemTestEnv, self) self.testcaseEnv.testSuiteBaseDir = self.testSuiteAbsPathName self.testcaseEnv.initWithKnownTestCasePathName( testCasePathName) self.testcaseEnv.testcaseArgumentsDict = self.testcaseEnv.testcaseNonEntityDataDict[ "testcase_args"] # ====================================================================== # SKIP if this case is IN testcase_to_skip.json or NOT IN testcase_to_run.json # ====================================================================== testcaseDirName = self.testcaseEnv.testcaseResultsDict[ "_test_case_name"] if self.systemTestEnv.printTestDescriptionsOnly: self.testcaseEnv.printTestCaseDescription(testcaseDirName) continue elif self.systemTestEnv.isTestCaseToSkip( self.__class__.__name__, testcaseDirName): self.log_message("Skipping : " + testcaseDirName) skipThisTestCase = True continue else: self.testcaseEnv.printTestCaseDescription(testcaseDirName) system_test_utils.setup_remote_hosts_with_testcase_level_cluster_config( self.systemTestEnv, testCasePathName) # ============================================================================== # # ============================================================================== # # Product Specific Testing Code Starts Here: # # ============================================================================== # # ============================================================================== # # initialize self.testcaseEnv with user-defined environment variables (product specific) self.testcaseEnv.userDefinedEnvVarDict["zkConnectStr"] = "" self.testcaseEnv.userDefinedEnvVarDict[ "stopBackgroundProducer"] = False self.testcaseEnv.userDefinedEnvVarDict[ "backgroundProducerStopped"] = False # initialize signal handler signal.signal(signal.SIGINT, self.signal_handler) # TestcaseEnv.testcaseConfigsList initialized by reading testcase properties file: # system_test/<suite_name>_testsuite/testcase_<n>/testcase_<n>_properties.json self.testcaseEnv.testcaseConfigsList = system_test_utils.get_json_list_data( self.testcaseEnv.testcasePropJsonPathName) # clean up data directories specified in zookeeper.properties and kafka_server_<n>.properties kafka_system_test_utils.cleanup_data_at_remote_hosts( self.systemTestEnv, self.testcaseEnv) # create "LOCAL" log directories for metrics, dashboards for each entity under this testcase # for collecting logs from remote machines kafka_system_test_utils.generate_testcase_log_dirs( self.systemTestEnv, self.testcaseEnv) # TestcaseEnv - initialize producer & consumer config / log file pathnames kafka_system_test_utils.init_entity_props( self.systemTestEnv, self.testcaseEnv) # generate remote hosts log/config dirs if not exist kafka_system_test_utils.generate_testcase_log_dirs_in_remote_hosts( self.systemTestEnv, self.testcaseEnv) # generate properties files for zookeeper, kafka, producer, consumer and mirror-maker: # 1. copy system_test/<suite_name>_testsuite/config/*.properties to # system_test/<suite_name>_testsuite/testcase_<n>/config/ # 2. update all properties files in system_test/<suite_name>_testsuite/testcase_<n>/config # by overriding the settings specified in: # system_test/<suite_name>_testsuite/testcase_<n>/testcase_<n>_properties.json kafka_system_test_utils.generate_overriden_props_files( self.testSuiteAbsPathName, self.testcaseEnv, self.systemTestEnv) # ============================================= # preparing all entities to start the test # ============================================= self.log_message("starting zookeepers") kafka_system_test_utils.start_zookeepers( self.systemTestEnv, self.testcaseEnv) self.anonLogger.info("sleeping for 2s") time.sleep(2) self.log_message("starting brokers") kafka_system_test_utils.start_brokers(self.systemTestEnv, self.testcaseEnv) self.anonLogger.info("sleeping for 5s") time.sleep(5) self.log_message("creating topics") kafka_system_test_utils.create_topic(self.systemTestEnv, self.testcaseEnv) self.anonLogger.info("sleeping for 5s") time.sleep(5) self.log_message("starting mirror makers") kafka_system_test_utils.start_mirror_makers( self.systemTestEnv, self.testcaseEnv) self.anonLogger.info("sleeping for 10s") time.sleep(10) # ============================================= # starting producer # ============================================= self.log_message("starting producer in the background") kafka_system_test_utils.start_producer_performance( self.systemTestEnv, self.testcaseEnv, False) msgProducingFreeTimeSec = self.testcaseEnv.testcaseArgumentsDict[ "message_producing_free_time_sec"] self.anonLogger.info("sleeping for " + msgProducingFreeTimeSec + " sec to produce some messages") time.sleep(int(msgProducingFreeTimeSec)) # ============================================= # A while-loop to bounce mirror maker as specified # by "num_iterations" in testcase_n_properties.json # ============================================= i = 1 numIterations = int( self.testcaseEnv.testcaseArgumentsDict["num_iteration"]) bouncedEntityDownTimeSec = 15 try: bouncedEntityDownTimeSec = int( self.testcaseEnv. testcaseArgumentsDict["bounced_entity_downtime_sec"]) except: pass while i <= numIterations: self.log_message("Iteration " + str(i) + " of " + str(numIterations)) # ============================================= # Bounce Mirror Maker if specified in testcase config # ============================================= bounceMirrorMaker = self.testcaseEnv.testcaseArgumentsDict[ "bounce_mirror_maker"] self.log_message("bounce_mirror_maker flag : " + bounceMirrorMaker) if (bounceMirrorMaker.lower() == "true"): clusterConfigList = self.systemTestEnv.clusterEntityConfigDictList mirrorMakerEntityIdList = system_test_utils.get_data_from_list_of_dicts( clusterConfigList, "role", "mirror_maker", "entity_id") stoppedMirrorMakerEntityId = mirrorMakerEntityIdList[0] mirrorMakerPPid = self.testcaseEnv.entityMirrorMakerParentPidDict[ stoppedMirrorMakerEntityId] self.log_message("stopping mirror maker : " + mirrorMakerPPid) kafka_system_test_utils.stop_remote_entity( self.systemTestEnv, stoppedMirrorMakerEntityId, mirrorMakerPPid) self.anonLogger.info("sleeping for " + str(bouncedEntityDownTimeSec) + " sec") time.sleep(bouncedEntityDownTimeSec) # starting previously terminated broker self.log_message( "starting the previously terminated mirror maker") kafka_system_test_utils.start_mirror_makers( self.systemTestEnv, self.testcaseEnv, stoppedMirrorMakerEntityId) self.anonLogger.info("sleeping for 15s") time.sleep(15) i += 1 # while loop # ============================================= # tell producer to stop # ============================================= self.testcaseEnv.lock.acquire() self.testcaseEnv.userDefinedEnvVarDict[ "stopBackgroundProducer"] = True time.sleep(1) self.testcaseEnv.lock.release() time.sleep(1) # ============================================= # wait for producer thread's update of # "backgroundProducerStopped" to be "True" # ============================================= while 1: self.testcaseEnv.lock.acquire() self.logger.info("status of backgroundProducerStopped : [" + \ str(self.testcaseEnv.userDefinedEnvVarDict["backgroundProducerStopped"]) + "]", extra=self.d) if self.testcaseEnv.userDefinedEnvVarDict[ "backgroundProducerStopped"]: time.sleep(1) self.logger.info("all producer threads completed", extra=self.d) break time.sleep(1) self.testcaseEnv.lock.release() time.sleep(2) self.anonLogger.info("sleeping for 15s") time.sleep(15) self.anonLogger.info("terminate Mirror Maker") cmdStr = "ps auxw | grep Mirror | grep -v grep | tr -s ' ' | cut -f2 -d ' ' | xargs kill -15" subproc = system_test_utils.sys_call_return_subproc(cmdStr) for line in subproc.stdout.readlines(): line = line.rstrip('\n') self.anonLogger.info("#### [" + line + "]") self.anonLogger.info("sleeping for 15s") time.sleep(15) # ============================================= # starting consumer # ============================================= self.log_message("starting consumer in the background") kafka_system_test_utils.start_console_consumer( self.systemTestEnv, self.testcaseEnv) self.anonLogger.info("sleeping for 10s") time.sleep(10) # ============================================= # this testcase is completed - stop all entities # ============================================= self.log_message("stopping all entities") for entityId, parentPid in self.testcaseEnv.entityBrokerParentPidDict.items( ): kafka_system_test_utils.stop_remote_entity( self.systemTestEnv, entityId, parentPid) for entityId, parentPid in self.testcaseEnv.entityZkParentPidDict.items( ): kafka_system_test_utils.stop_remote_entity( self.systemTestEnv, entityId, parentPid) # make sure all entities are stopped kafka_system_test_utils.ps_grep_terminate_running_entity( self.systemTestEnv) # ============================================= # collect logs from remote hosts # ============================================= kafka_system_test_utils.collect_logs_from_remote_hosts( self.systemTestEnv, self.testcaseEnv) # ============================================= # validate the data matched and checksum # ============================================= self.log_message("validating data matched") kafka_system_test_utils.validate_data_matched( self.systemTestEnv, self.testcaseEnv, replicationUtils) kafka_system_test_utils.validate_broker_log_segment_checksum( self.systemTestEnv, self.testcaseEnv, "source") kafka_system_test_utils.validate_broker_log_segment_checksum( self.systemTestEnv, self.testcaseEnv, "target") # ============================================= # draw graphs # ============================================= metrics.draw_all_graphs( self.systemTestEnv.METRICS_PATHNAME, self.testcaseEnv, self.systemTestEnv.clusterEntityConfigDictList) # build dashboard, one for each role metrics.build_all_dashboards( self.systemTestEnv.METRICS_PATHNAME, self.testcaseEnv.testCaseDashboardsDir, self.systemTestEnv.clusterEntityConfigDictList) except Exception as e: self.log_message("Exception while running test {0}".format(e)) traceback.print_exc() finally: if not skipThisTestCase and not self.systemTestEnv.printTestDescriptionsOnly: self.log_message("stopping all entities - please wait ...") kafka_system_test_utils.stop_all_remote_running_processes( self.systemTestEnv, self.testcaseEnv)
class OffsetManagementTest(ReplicationUtils, SetupUtils): testModuleAbsPathName = os.path.realpath(__file__) testSuiteAbsPathName = os.path.abspath(os.path.dirname(testModuleAbsPathName)) def __init__(self, systemTestEnv): """ SystemTestEnv - provides cluster level environment settings such as entity_id, hostname, kafka_home, java_home which are available in a list of dictionary named "clusterEntityConfigDictList" :type systemTestEnv: SystemTestEnv """ self.systemTestEnv = systemTestEnv super(OffsetManagementTest, self).__init__(self) # dict to pass user-defined attributes to logger argument: "extra" d = {'name_of_class': self.__class__.__name__} def signal_handler(self, signal, frame): self.log_message("Interrupt detected - User pressed Ctrl+c") # perform the necessary cleanup here when user presses Ctrl+c and it may be product specific self.log_message("stopping all entities - please wait ...") kafka_system_test_utils.stop_all_remote_running_processes(self.systemTestEnv, self.testcaseEnv) sys.exit(1) def runTest(self): # ====================================================================== # get all testcase directories under this testsuite # ====================================================================== testCasePathNameList = system_test_utils.get_dir_paths_with_prefix( self.testSuiteAbsPathName, SystemTestEnv.SYSTEM_TEST_CASE_PREFIX) testCasePathNameList.sort() replicationUtils = ReplicationUtils(self) # ============================================================= # launch each testcase one by one: testcase_1, testcase_2, ... # ============================================================= for testCasePathName in testCasePathNameList: skipThisTestCase = False try: # ====================================================================== # A new instance of TestcaseEnv to keep track of this testcase's env vars # and initialize some env vars as testCasePathName is available now # ====================================================================== self.testcaseEnv = TestcaseEnv(self.systemTestEnv, self) self.testcaseEnv.testSuiteBaseDir = self.testSuiteAbsPathName self.testcaseEnv.initWithKnownTestCasePathName(testCasePathName) self.testcaseEnv.testcaseArgumentsDict = self.testcaseEnv.testcaseNonEntityDataDict["testcase_args"] # ====================================================================== # SKIP if this case is IN testcase_to_skip.json or NOT IN testcase_to_run.json # ====================================================================== testcaseDirName = self.testcaseEnv.testcaseResultsDict["_test_case_name"] if self.systemTestEnv.printTestDescriptionsOnly: self.testcaseEnv.printTestCaseDescription(testcaseDirName) continue elif self.systemTestEnv.isTestCaseToSkip(self.__class__.__name__, testcaseDirName): self.testcaseEnv.validationStatusDict[TEST_COMPLETED] = SKIPPED skipThisTestCase = True continue else: self.testcaseEnv.printTestCaseDescription(testcaseDirName) system_test_utils.setup_remote_hosts_with_testcase_level_cluster_config(self.systemTestEnv, testCasePathName) # ============================================================================== # # ============================================================================== # # Product Specific Testing Code Starts Here: # # ============================================================================== # # ============================================================================== # # initialize self.testcaseEnv with user-defined environment variables (product specific) self.testcaseEnv.userDefinedEnvVarDict["stopBackgroundProducer"] = False self.testcaseEnv.userDefinedEnvVarDict["backgroundProducerStopped"] = False # initialize signal handler signal.signal(signal.SIGINT, self.signal_handler) # TestcaseEnv.testcaseConfigsList initialized by reading testcase properties file: # system_test/<suite_name>_testsuite/testcase_<n>/testcase_<n>_properties.json self.testcaseEnv.testcaseConfigsList = system_test_utils.get_json_list_data( self.testcaseEnv.testcasePropJsonPathName) # clean up data directories specified in zookeeper.properties and kafka_server_<n>.properties kafka_system_test_utils.cleanup_data_at_remote_hosts(self.systemTestEnv, self.testcaseEnv) # create "LOCAL" log directories for metrics, dashboards for each entity under this testcase # for collecting logs from remote machines kafka_system_test_utils.generate_testcase_log_dirs(self.systemTestEnv, self.testcaseEnv) # TestcaseEnv - initialize producer & consumer config / log file pathnames kafka_system_test_utils.init_entity_props(self.systemTestEnv, self.testcaseEnv) # generate remote hosts log/config dirs if not exist kafka_system_test_utils.generate_testcase_log_dirs_in_remote_hosts(self.systemTestEnv, self.testcaseEnv) # generate properties files for zookeeper, kafka, producer, and consumer: # 1. copy system_test/<suite_name>_testsuite/config/*.properties to # system_test/<suite_name>_testsuite/testcase_<n>/config/ # 2. update all properties files in system_test/<suite_name>_testsuite/testcase_<n>/config # by overriding the settings specified in: # system_test/<suite_name>_testsuite/testcase_<n>/testcase_<n>_properties.json kafka_system_test_utils.generate_overriden_props_files(self.testSuiteAbsPathName, self.testcaseEnv, self.systemTestEnv) # ============================================= # preparing all entities to start the test # ============================================= self.log_message("starting zookeepers") kafka_system_test_utils.start_zookeepers(self.systemTestEnv, self.testcaseEnv) self.anonLogger.info("sleeping for 2s") time.sleep(2) self.log_message("starting brokers") kafka_system_test_utils.start_brokers(self.systemTestEnv, self.testcaseEnv) self.anonLogger.info("sleeping for 5s") time.sleep(5) self.log_message("creating offset topic") kafka_system_test_utils.create_topic(self.systemTestEnv, self.testcaseEnv, "__consumer_offsets", 3, 2) self.anonLogger.info("sleeping for 5s") time.sleep(5) # ============================================= # starting producer # ============================================= self.log_message("starting producer in the background") kafka_system_test_utils.start_producer_performance(self.systemTestEnv, self.testcaseEnv, False) msgProducingFreeTimeSec = self.testcaseEnv.testcaseArgumentsDict["message_producing_free_time_sec"] self.anonLogger.info("sleeping for " + msgProducingFreeTimeSec + " sec to produce some messages") time.sleep(int(msgProducingFreeTimeSec)) kafka_system_test_utils.start_console_consumers(self.systemTestEnv, self.testcaseEnv) kafka_system_test_utils.get_leader_for(self.systemTestEnv, self.testcaseEnv, "__consumer_offsets", 0) # ============================================= # A while-loop to bounce consumers as specified # by "num_iterations" in testcase_n_properties.json # ============================================= i = 1 numIterations = int(self.testcaseEnv.testcaseArgumentsDict["num_iteration"]) bouncedEntityDownTimeSec = 10 try: bouncedEntityDownTimeSec = int(self.testcaseEnv.testcaseArgumentsDict["bounced_entity_downtime_sec"]) except: pass # group1 -> offsets partition 0 // has one consumer; eid: 6 # group2 -> offsets partition 1 // has four consumers; eid: 7, 8, 9, 10 offsets_0_leader_entity = kafka_system_test_utils.get_leader_for(self.systemTestEnv, self.testcaseEnv, "__consumer_offsets", 0) offsets_1_leader_entity = kafka_system_test_utils.get_leader_for(self.systemTestEnv, self.testcaseEnv, "__consumer_offsets", 1) while i <= numIterations: self.log_message("Iteration " + str(i) + " of " + str(numIterations)) kafka_system_test_utils.stop_remote_entity(self.systemTestEnv, offsets_0_leader_entity, self.testcaseEnv.entityBrokerParentPidDict[offsets_0_leader_entity]) kafka_system_test_utils.stop_remote_entity(self.systemTestEnv, offsets_1_leader_entity, self.testcaseEnv.entityBrokerParentPidDict[offsets_1_leader_entity]) # ============================================= # Bounce consumers if specified in testcase config # ============================================= bounceConsumers = self.testcaseEnv.testcaseArgumentsDict["bounce_consumers"] self.log_message("bounce_consumers flag : " + bounceConsumers) if (bounceConsumers.lower() == "true"): clusterConfigList = self.systemTestEnv.clusterEntityConfigDictList consumerEntityIdList = system_test_utils.get_data_from_list_of_dicts( clusterConfigList, "role", "console_consumer", "entity_id") for stoppedConsumerEntityId in consumerEntityIdList: consumerPPID = self.testcaseEnv.entityConsoleConsumerParentPidDict[stoppedConsumerEntityId] self.log_message("stopping consumer: " + consumerPPID) kafka_system_test_utils.stop_remote_entity(self.systemTestEnv, stoppedConsumerEntityId, consumerPPID) self.anonLogger.info("sleeping for " + str(bouncedEntityDownTimeSec) + " sec") time.sleep(bouncedEntityDownTimeSec) # leaders would have changed during the above bounce. self.log_message("starting the previously terminated consumers.") for stoppedConsumerEntityId in consumerEntityIdList: # starting previously terminated consumer kafka_system_test_utils.start_console_consumers(self.systemTestEnv, self.testcaseEnv, stoppedConsumerEntityId) self.log_message("starting the previously terminated brokers") kafka_system_test_utils.start_entity_in_background(self.systemTestEnv, self.testcaseEnv, offsets_0_leader_entity) kafka_system_test_utils.start_entity_in_background(self.systemTestEnv, self.testcaseEnv, offsets_1_leader_entity) self.anonLogger.info("sleeping for 15s") time.sleep(15) i += 1 # while loop # ============================================= # tell producer to stop # ============================================= self.testcaseEnv.lock.acquire() self.testcaseEnv.userDefinedEnvVarDict["stopBackgroundProducer"] = True time.sleep(1) self.testcaseEnv.lock.release() time.sleep(1) # ============================================= # wait for producer thread's update of # "backgroundProducerStopped" to be "True" # ============================================= while 1: self.testcaseEnv.lock.acquire() self.logger.info("status of backgroundProducerStopped : [" + \ str(self.testcaseEnv.userDefinedEnvVarDict["backgroundProducerStopped"]) + "]", extra=self.d) if self.testcaseEnv.userDefinedEnvVarDict["backgroundProducerStopped"]: time.sleep(1) self.logger.info("all producer threads completed", extra=self.d) break time.sleep(1) self.testcaseEnv.lock.release() time.sleep(2) self.anonLogger.info("sleeping for 15s") time.sleep(15) # ============================================= # this testcase is completed - stop all entities # ============================================= self.log_message("stopping all entities") for entityId, parentPid in self.testcaseEnv.entityBrokerParentPidDict.items(): kafka_system_test_utils.stop_remote_entity(self.systemTestEnv, entityId, parentPid) for entityId, parentPid in self.testcaseEnv.entityZkParentPidDict.items(): kafka_system_test_utils.stop_remote_entity(self.systemTestEnv, entityId, parentPid) # make sure all entities are stopped kafka_system_test_utils.ps_grep_terminate_running_entity(self.systemTestEnv) # ============================================= # collect logs from remote hosts # ============================================= kafka_system_test_utils.collect_logs_from_remote_hosts(self.systemTestEnv, self.testcaseEnv) # ============================================= # validate the data matched and checksum # ============================================= self.log_message("validating data matched") kafka_system_test_utils.validate_data_matched_in_multi_topics_from_single_consumer_producer(self.systemTestEnv, self.testcaseEnv, replicationUtils) except Exception as e: self.log_message("Exception while running test {0}".format(e)) traceback.print_exc() self.testcaseEnv.validationStatusDict[TEST_COMPLETED] = "FAILED" logger.info("Collecting logs post failure.") kafka_system_test_utils.collect_logs_from_remote_hosts(self.systemTestEnv, self.testcaseEnv) finally: log_test_summary(self.testcaseEnv.validationStatusDict, testcaseDirName) if not skipThisTestCase and not self.systemTestEnv.printTestDescriptionsOnly: self.log_message("stopping all entities - please wait ...") kafka_system_test_utils.stop_all_remote_running_processes(self.systemTestEnv, self.testcaseEnv)