def collect_tests(args): cpp_test_descriptors = [] if args.run_cpp_tests: cpp_test_descriptors = collect_cpp_tests(args.max_tests, args.cpp_test_program_regexp) java_test_descriptors = [] yb_src_root = yb_dist_tests.global_conf.yb_src_root if args.run_java_tests: for java_src_root in [os.path.join(yb_src_root, 'java'), os.path.join(yb_src_root, 'ent', 'java')]: for dir_path, dir_names, file_names in os.walk(java_src_root): rel_dir_path = os.path.relpath(dir_path, java_src_root) for file_name in file_names: if (file_name.startswith('Test') and (file_name.endswith('.java') or file_name.endswith('.scala')) or file_name.endswith('Test.java') or file_name.endswith('Test.scala')) and \ '/src/test/' in rel_dir_path: test_descriptor_str = os.path.join(rel_dir_path, file_name) if yb_dist_tests.JAVA_TEST_DESCRIPTOR_RE.match(test_descriptor_str): java_test_descriptors.append( yb_dist_tests.TestDescriptor(test_descriptor_str)) else: logging.warning("Skipping file (does not match expected pattern): " + test_descriptor) # TODO: sort tests in the order of reverse historical execution time. If Spark starts running # tasks from the beginning, this will ensure the longest tests start the earliest. # # Right now we just put Java tests first because those tests are entire test classes and will # take longer to run on average. return sorted(java_test_descriptors) + sorted(cpp_test_descriptors)
def load_test_list(test_list_path): test_descriptors = [] with open(test_list_path, 'r') as input_file: for line in input_file: line = line.strip() if line: test_descriptors.append(yb_dist_tests.TestDescriptor()) return test_descriptors
def parallel_run_test(test_descriptor_str): """ This is invoked in parallel to actually run tests. """ adjust_pythonpath() from yb import yb_dist_tests, command_util global_conf = yb_dist_tests.set_global_conf_from_dict(global_conf_dict) global_conf.set_env(propagated_env_vars) yb_dist_tests.global_conf = global_conf test_descriptor = yb_dist_tests.TestDescriptor(test_descriptor_str) os.environ['YB_TEST_ATTEMPT_INDEX'] = str(test_descriptor.attempt_index) os.environ['build_type'] = global_conf.build_type yb_dist_tests.wait_for_clock_sync() # We could use "run_program" here, but it collects all the output in memory, which is not # ideal for a large amount of test log output. The "tee" part also makes the output visible in # the standard error of the Spark task as well, which is sometimes helpful for debugging. def run_test(): start_time = time.time() exit_code = os.system( "bash -c 'set -o pipefail; \"{}\" {} 2>&1 | tee \"{}\"; {}'". format(global_conf.get_run_test_script_path(), test_descriptor.args_for_run_test, test_descriptor.error_output_path, 'exit ${PIPESTATUS[0]}')) >> 8 # The ">> 8" is to get the exit code returned by os.system() in the high 8 bits of the # result. elapsed_time_sec = time.time() - start_time logging.info("Test {} ran on {}, rc={}".format(test_descriptor, socket.gethostname(), exit_code)) return exit_code, elapsed_time_sec exit_code, elapsed_time_sec = run_test() error_output_path = test_descriptor.error_output_path failed_without_output = False if os.path.isfile(error_output_path) and os.path.getsize( error_output_path) == 0: if exit_code == 0: # Test succeeded, no error output. os.remove(error_output_path) else: # Test failed without any output! Re-run with "set -x" to diagnose. os.environ['YB_DEBUG_RUN_TEST'] = '1' exit_code, elapsed_time_sec = run_test() del os.environ['YB_DEBUG_RUN_TEST'] # Also mark this in test results. failed_without_output = True return yb_dist_tests.TestResult( exit_code=exit_code, test_descriptor=test_descriptor, elapsed_time_sec=elapsed_time_sec, failed_without_output=failed_without_output)
def collect_java_tests(): java_test_list_path = os.path.join(yb_dist_tests.global_conf.build_root, 'java_test_list.txt') if not os.path.exists(java_test_list_path): raise IOError("Java test list not found at '%s'", java_test_list_path) with open(java_test_list_path) as java_test_list_file: java_test_descriptors = [ yb_dist_tests.TestDescriptor(java_test_str.strip()) for java_test_str in java_test_list_file.read().split("\n") if java_test_str.strip() ] if not java_test_descriptors: raise RuntimeError("Could not find any Java tests listed in '%s'" % java_test_list_path) return java_test_descriptors
def parallel_run_test(test_descriptor_str): """ This is invoked in parallel to actually run tests. """ adjust_pythonpath() from yb import yb_dist_tests, command_util global_conf = yb_dist_tests.set_global_conf_from_dict(global_conf_dict) global_conf.set_env(propagated_env_vars) yb_dist_tests.global_conf = global_conf test_descriptor = yb_dist_tests.TestDescriptor(test_descriptor_str) os.environ['YB_TEST_ATTEMPT_INDEX'] = str(test_descriptor.attempt_index) os.environ['build_type'] = global_conf.build_type yb_dist_tests.wait_for_clock_sync() start_time = time.time() # We could use "run_program" here, but it collects all the output in memory, which is not # ideal for a large amount of test log output. The "tee" part also makes the output visible in # the standard error of the Spark task as well, which is sometimes helpful for debugging. exit_code = os.system( ("bash -c 'set -o pipefail; \"{}\" {} 2>&1 | tee \"{}\"'").format( global_conf.get_run_test_script_path(), test_descriptor.args_for_run_test, test_descriptor.error_output_path)) >> 8 # The ">> 8" is to get the exit code returned by os.system() in the high 8 bits of the result. elapsed_time_sec = time.time() - start_time logging.info("Test {} ran on {}, rc={}".format(test_descriptor, socket.gethostname(), exit_code)) error_output_path = test_descriptor.error_output_path if os.path.isfile(error_output_path) and os.path.getsize( error_output_path) == 0: os.remove(error_output_path) return yb_dist_tests.TestResult(exit_code=exit_code, test_descriptor=test_descriptor, elapsed_time_sec=elapsed_time_sec)
def collect_cpp_tests(max_tests, cpp_test_program_re_str): global_conf = yb_dist_tests.global_conf logging.info("Collecting the list of C++ tests") start_time_sec = time.time() ctest_cmd_result = command_util.run_program( ['/bin/bash', '-c', 'cd "{}" && YB_LIST_CTEST_TESTS_ONLY=1 ctest -j8 --verbose'.format( global_conf.build_root)]) test_programs = [] test_descriptor_strs = [] for line in ctest_cmd_result.stdout.split("\n"): re_match = CTEST_TEST_PROGRAM_RE.match(line) if re_match: rel_ctest_prog_path = os.path.relpath(re_match.group(1), global_conf.build_root) if is_one_shot_test(rel_ctest_prog_path): test_descriptor_strs.append(rel_ctest_prog_path) else: test_programs.append(rel_ctest_prog_path) elapsed_time_sec = time.time() - start_time_sec logging.info("Collected %d test programs in %.2f sec" % ( len(test_programs), elapsed_time_sec)) if cpp_test_program_re_str: cpp_test_program_re = re.compile(cpp_test_program_re_str) test_programs = [test_program for test_program in test_programs if cpp_test_program_re.search(test_program)] logging.info("Filtered down to %d test programs using regular expression '%s'" % (len(test_programs), cpp_test_program_re_str)) if max_tests and len(test_programs) > max_tests: logging.info("Randomly selecting {} test programs out of {} possible".format( max_tests, len(test_programs))) random.shuffle(test_programs) test_programs = test_programs[:max_tests] if not test_programs: logging.info("Found no test programs") return [] logging.info("Collecting gtest tests for {} test programs".format(len(test_programs))) start_time_sec = time.time() if len(test_programs) <= 3: app_name_details = ['test programs: [{}]'.format(', '.join(test_programs))] else: app_name_details = ['{} test programs'.format(len(test_programs))] init_spark_context(app_name_details) set_global_conf_for_spark_jobs() # Use fewer "slices" (tasks) than there are test programs, in hope to get some batching. num_slices = (len(test_programs) + 1) / 2 all_test_descriptor_lists = spark_context.parallelize( test_programs, numSlices=num_slices).map(parallel_list_test_descriptors).collect() elapsed_time_sec = time.time() - start_time_sec test_descriptor_strs += [ test_descriptor_str for test_descriptor_str_list in all_test_descriptor_lists for test_descriptor_str in test_descriptor_str_list] logging.info("Collected the list of %d gtest tests in %.2f sec" % ( len(test_descriptor_strs), elapsed_time_sec)) return [yb_dist_tests.TestDescriptor(s) for s in test_descriptor_strs]
def collect_cpp_tests(max_tests, cpp_test_program_filter, cpp_test_program_re_str): """ Collect C++ test programs to run. @param max_tests: maximum number of tests to run. Used in debugging. @param cpp_test_program_filter: a collection of C++ test program names to be used as a filter @param cpp_test_program_re_str: a regular expression string to be used as a filter for the set of C++ test programs. """ global_conf = yb_dist_tests.global_conf logging.info("Collecting the list of C++ test programs") start_time_sec = time.time() ctest_cmd_result = command_util.run_program([ '/bin/bash', '-c', 'cd "{}" && YB_LIST_CTEST_TESTS_ONLY=1 ctest -j8 --verbose'.format( global_conf.build_root) ]) test_programs = [] for line in ctest_cmd_result.stdout.split("\n"): re_match = CTEST_TEST_PROGRAM_RE.match(line) if re_match: rel_ctest_prog_path = os.path.relpath(re_match.group(1), global_conf.build_root) test_programs.append(rel_ctest_prog_path) test_programs = sorted(set(test_programs)) elapsed_time_sec = time.time() - start_time_sec logging.info("Collected %d test programs in %.2f sec" % (len(test_programs), elapsed_time_sec)) if cpp_test_program_re_str: cpp_test_program_re = re.compile(cpp_test_program_re_str) test_programs = [ test_program for test_program in test_programs if cpp_test_program_re.search(test_program) ] logging.info( "Filtered down to %d test programs using regular expression '%s'" % (len(test_programs), cpp_test_program_re_str)) if cpp_test_program_filter: cpp_test_program_filter = set(cpp_test_program_filter) unfiltered_test_programs = test_programs # test_program contains test paths relative to the root directory (including directory # names), and cpp_test_program_filter contains basenames only. test_programs = sorted( set([ test_program for test_program in test_programs if os.path.basename(test_program) in cpp_test_program_filter ])) logging.info( "Filtered down to %d test programs using the list from test conf file" % len(test_programs)) if unfiltered_test_programs and not test_programs: # This means we've filtered the list of C++ test programs down to an empty set. logging.info(( "NO MATCHING C++ TEST PROGRAMS FOUND! Test programs from conf file: {}, " "collected from ctest before filtering: {}").format( set_to_comma_sep_str(cpp_test_program_filter), set_to_comma_sep_str(unfiltered_test_programs))) if max_tests and len(test_programs) > max_tests: logging.info( "Randomly selecting {} test programs out of {} possible".format( max_tests, len(test_programs))) random.shuffle(test_programs) test_programs = test_programs[:max_tests] if not test_programs: logging.info("Found no test programs") return [] fine_granularity_gtest_programs = [] one_shot_test_programs = [] for test_program in test_programs: if is_one_shot_test(test_program): one_shot_test_programs.append(test_program) else: fine_granularity_gtest_programs.append(test_program) logging.info( ("Found {} gtest test programs where tests will be run separately, " "{} test programs to be run on one shot").format( len(fine_granularity_gtest_programs), len(one_shot_test_programs))) test_programs = fine_granularity_gtest_programs logging.info( "Collecting gtest tests for {} test programs where tests will be run separately" .format(len(test_programs))) start_time_sec = time.time() all_test_programs = fine_granularity_gtest_programs + one_shot_test_programs if len(all_test_programs) <= 5: app_name_details = [ 'test programs: [{}]'.format(', '.join(all_test_programs)) ] else: app_name_details = ['{} test programs'.format(len(all_test_programs))] init_spark_context(app_name_details) set_global_conf_for_spark_jobs() # Use fewer "slices" (tasks) than there are test programs, in hope to get some batching. num_slices = (len(test_programs) + 1) / 2 all_test_descriptor_lists = run_spark_action( lambda: spark_context.parallelize(test_programs, numSlices=num_slices). map(parallel_list_test_descriptors).collect()) elapsed_time_sec = time.time() - start_time_sec test_descriptor_strs = one_shot_test_programs + [ test_descriptor_str for test_descriptor_str_list in all_test_descriptor_lists for test_descriptor_str in test_descriptor_str_list ] logging.info("Collected the list of %d gtest tests in %.2f sec" % (len(test_descriptor_strs), elapsed_time_sec)) return [yb_dist_tests.TestDescriptor(s) for s in test_descriptor_strs]
def parallel_run_test(test_descriptor_str): """ This is invoked in parallel to actually run tests. """ adjust_pythonpath() wait_for_path_to_exist(YB_PYTHONPATH_ENTRY) try: from yb import yb_dist_tests, command_util except ImportError as ex: raise ImportError("%s. %s" % (ex.message, get_sys_path_info_str())) global_conf = yb_dist_tests.set_global_conf_from_dict(global_conf_dict) global_conf.set_env(propagated_env_vars) wait_for_path_to_exist(global_conf.build_root) yb_dist_tests.global_conf = global_conf test_descriptor = yb_dist_tests.TestDescriptor(test_descriptor_str) # This is saved in the test result file by process_test_result.py. os.environ['YB_TEST_DESCRIPTOR_STR'] = test_descriptor_str os.environ['YB_TEST_ATTEMPT_INDEX'] = str(test_descriptor.attempt_index) os.environ['build_type'] = global_conf.build_type os.environ['YB_RUNNING_TEST_ON_SPARK'] = '1' os.environ['BUILD_ROOT'] = global_conf.build_root test_started_running_flag_file = os.path.join( tempfile.gettempdir(), 'yb_test_started_running_flag_file_%d_%s' % (os.getpid(), ''.join('%09d' % random.randrange(0, 1000000000) for i in xrange(4)))) os.environ[ 'YB_TEST_STARTED_RUNNING_FLAG_FILE'] = test_started_running_flag_file os.environ[ 'YB_TEST_EXTRA_ERROR_LOG_PATH'] = test_descriptor.error_output_path yb_dist_tests.wait_for_clock_sync() # We could use "run_program" here, but it collects all the output in memory, which is not # ideal for a large amount of test log output. The "tee" part also makes the output visible in # the standard error of the Spark task as well, which is sometimes helpful for debugging. def run_test(): start_time_sec = time.time() runner_oneline = 'set -o pipefail; "%s" %s 2>&1 | tee "%s"; exit ${PIPESTATUS[0]}' % ( global_conf.get_run_test_script_path(), test_descriptor.args_for_run_test, test_descriptor.error_output_path) process = subprocess.Popen([get_bash_path(), '-c', runner_oneline]) found_flag_file = False while is_pid_running(process.pid): elapsed_time_sec = time.time() - start_time_sec termination_reason = None if elapsed_time_sec > TEST_TIMEOUT_UPPER_BOUND_SEC: termination_reason = 'ran longer than %d seconds' % TEST_TIMEOUT_UPPER_BOUND_SEC failed_to_launch = False if not found_flag_file: if os.path.exists(test_started_running_flag_file): found_flag_file = True elif elapsed_time_sec > TIME_SEC_TO_START_RUNNING_TEST: termination_reason = ( 'could not start running the test in %d seconds (file %s not created). ' 'Ran command: {{ %s }}.') % ( TIME_SEC_TO_START_RUNNING_TEST, test_started_running_flag_file, runner_oneline) failed_to_launch = True if termination_reason: error_msg = "Test %s is being terminated (ran for %.1f seconds), reason: %s" % ( test_descriptor, elapsed_time_sec, termination_reason) logging.info(error_msg) try: os.kill(process.pid, signal.SIGKILL) except OSError, os_error: if os_error.errno == errno.ESRCH: logging.info( "Process with pid %d disappeared suddenly, that's OK", process.pid) pass raise os_error if failed_to_launch: # This exception should bubble up to Spark and cause it to hopefully re-run the # test one some other node. raise RuntimeError(error_msg) break time.sleep(0.5) exit_code = process.wait() elapsed_time_sec = time.time() - start_time_sec logging.info("Test {} ran on {} in %.1f seconds, rc={}".format( test_descriptor, socket.gethostname(), exit_code)) return exit_code, elapsed_time_sec
def collect_tests(args): cpp_test_descriptors = [] if args.cpp_test_program_regexp and args.test_conf: raise RuntimeException( "--cpp_test_program_regexp and --test_conf cannot both be specified at the same time." ) test_conf = {} if args.test_conf: with open(args.test_conf) as test_conf_file: test_conf = json.load(test_conf_file) if args.run_cpp_tests and not test_conf['run_cpp_tests']: logging.info( "The test configuration file says that C++ tests should be skipped" ) args.run_cpp_tests = False if not test_conf['run_java_tests']: logging.info( "The test configuration file says that Java tests should be skipped" ) args.run_java_tests = False cpp_test_descriptors = [] if args.run_cpp_tests: cpp_test_programs = test_conf.get('cpp_test_programs') if args.cpp_test_program_regexp and cpp_test_programs: logging.warning(( "Ignoring the C++ test program regular expression specified on the " "command line: {}").format(args.cpp_test_program_regexp)) cpp_test_descriptors = collect_cpp_tests(args.max_tests, cpp_test_programs, args.cpp_test_program_regexp) java_test_descriptors = [] yb_src_root = yb_dist_tests.global_conf.yb_src_root if args.run_java_tests: for java_src_root in [ os.path.join(yb_src_root, 'java'), os.path.join(yb_src_root, 'ent', 'java') ]: for dir_path, dir_names, file_names in os.walk(java_src_root): rel_dir_path = os.path.relpath(dir_path, java_src_root) for file_name in file_names: if (file_name.startswith('Test') and (file_name.endswith('.java') or file_name.endswith('.scala')) or file_name.endswith('Test.java') or file_name.endswith('Test.scala')) and \ '/src/test/' in rel_dir_path: test_descriptor_str = os.path.join( rel_dir_path, file_name) if yb_dist_tests.JAVA_TEST_DESCRIPTOR_RE.match( test_descriptor_str): java_test_descriptors.append( yb_dist_tests.TestDescriptor( test_descriptor_str)) else: logging.warning( "Skipping file (does not match expected pattern): " + test_descriptor) # TODO: sort tests in the order of reverse historical execution time. If Spark starts running # tasks from the beginning, this will ensure the longest tests start the earliest. # # Right now we just put Java tests first because those tests are entire test classes and will # take longer to run on average. return sorted(java_test_descriptors) + sorted(cpp_test_descriptors)