def main(): parser = argparse.ArgumentParser( description='Run tests on Spark.') parser.add_argument('--verbose', action='store_true', help='Enable debug output') parser.add_argument('--java', dest='run_java_tests', action='store_true', help='Run Java tests') parser.add_argument('--cpp', dest='run_cpp_tests', action='store_true', help='Run C++ tests') parser.add_argument('--all', dest='run_all_tests', action='store_true', help='Run tests in all languages') parser.add_argument('--test_list', help='A file with a list of tests to run. Useful when e.g. re-running ' 'failed tests using a file produced with --failed_test_list.') parser.add_argument('--build-root', dest='build_root', required=True, help='Build root (e.g. ~/code/yugabyte/build/debug-gcc-dynamic-community)') parser.add_argument('--max-tests', type=int, dest='max_tests', help='Maximum number of tests to run. Useful when debugging this script ' 'for faster iteration. This number of tests will be randomly chosen ' 'from the test suite.') parser.add_argument('--sleep_after_tests', action='store_true', help='Sleep for a while after test are done before destroying ' 'SparkContext. This allows to examine the Spark app UI.') parser.add_argument('--reports-dir', dest='report_base_dir', help='A parent directory for storing build reports (such as per-test ' 'run times and whether the Spark job succeeded.)') parser.add_argument('--write_report', action='store_true', help='Actually enable writing build reports. If this is not ' 'specified, we will only read previous test reports to sort tests ' 'better.') parser.add_argument('--save_report_to_build_dir', action='store_true', help='Save a test report to the build directory directly, in addition ' 'to any reports saved in the common reports directory. This should ' 'work even if neither --reports-dir or --write_report are specified.') parser.add_argument('--cpp_test_program_regexp', help='A regular expression to filter C++ test program names on.') parser.add_argument('--test_conf', help='A file with a JSON configuration describing what tests to run, ' 'produced by dependency_graph.py') parser.add_argument('--num_repetitions', type=int, default=1, help='Number of times to run each test.') parser.add_argument('--failed_test_list', help='A file path to save the list of failed tests to. The format is ' 'one test descriptor per line.') parser.add_argument('--allow_no_tests', action='store_true', help='Allow running with filters that yield no tests to run. Useful when ' 'debugging.') args = parser.parse_args() # --------------------------------------------------------------------------------------------- # Argument validation. if args.run_all_tests: args.run_java_tests = True args.run_cpp_tests = True global verbose verbose = args.verbose log_level = logging.INFO logging.basicConfig( level=log_level, format="[%(filename)s:%(lineno)d] %(asctime)s %(levelname)s: %(message)s") global_conf = yb_dist_tests.set_global_conf_from_args(args) build_root = global_conf.build_root yb_src_root = global_conf.yb_src_root if not args.run_cpp_tests and not args.run_java_tests: fatal_error("At least one of --java or --cpp has to be specified") report_base_dir = args.report_base_dir write_report = args.write_report if report_base_dir and not os.path.isdir(report_base_dir): fatal_error("Report base directory '{}' does not exist".format(report_base_dir)) if write_report and not report_base_dir: fatal_error("--write_report specified but the reports directory (--reports-dir) is not") if write_report and not is_writable(report_base_dir): fatal_error( "--write_report specified but the reports directory ('{}') is not writable".format( report_base_dir)) if args.num_repetitions < 1: fatal_error("--num_repetitions must be at least 1, got: {}".format(args.num_repetitions)) failed_test_list_path = args.failed_test_list if failed_test_list_path and not is_parent_dir_writable(failed_test_list_path): fatal_error(("Parent directory of failed test list destination path ('{}') is not " + "writable").format(args.failed_test_list)) test_list_path = args.test_list if test_list_path and not os.path.isfile(test_list_path): fatal_error("File specified by --test_list does not exist or is not a file: '{}'".format( test_list_path)) # --------------------------------------------------------------------------------------------- # Start the timer. global_start_time = time.time() if test_list_path: test_descriptors = load_test_list(test_list_path) else: test_descriptors = collect_tests(args) if not test_descriptors and not args.allow_no_tests: logging.info("No tests to run") return propagate_env_vars() num_tests = len(test_descriptors) if args.max_tests and num_tests > args.max_tests: logging.info("Randomly selecting {} tests out of {} possible".format( args.max_tests, num_tests)) random.shuffle(test_descriptors) test_descriptors = test_descriptors[:args.max_tests] num_tests = len(test_descriptors) if args.verbose: for test_descriptor in test_descriptors: logging.info("Will run test: {}".format(test_descriptor)) num_repetitions = args.num_repetitions total_num_tests = num_tests * num_repetitions logging.info("Running {} tests on Spark, {} times each, for a total of {} tests".format( num_tests, num_repetitions, total_num_tests)) if num_repetitions > 1: test_descriptors = [ test_descriptor.with_attempt_index(i) for test_descriptor in test_descriptors for i in xrange(1, num_repetitions + 1) ] app_name_details = ['{} tests total'.format(total_num_tests)] if num_repetitions > 1: app_name_details += ['{} repetitions of {} tests'.format(num_repetitions, num_tests)] init_spark_context(app_name_details) set_global_conf_for_spark_jobs() # By this point, test_descriptors have been duplicated the necessary number of times, with # attempt indexes attached to each test descriptor. spark_succeeded = False if test_descriptors: logging.info("Running {} tasks on Spark".format(total_num_tests)) assert total_num_tests == len(test_descriptors), \ "total_num_tests={}, len(test_descriptors)={}".format( total_num_tests, len(test_descriptors)) test_names_rdd = spark_context.parallelize( [test_descriptor.descriptor_str for test_descriptor in test_descriptors], numSlices=total_num_tests) results = run_spark_action(lambda: test_names_rdd.map(parallel_run_test).collect()) else: # Allow running zero tests, for testing the reporting logic. results = [] test_exit_codes = set([result.exit_code for result in results]) global_exit_code = 0 if test_exit_codes == set([0]) else 1 logging.info("Tests are done, set of exit codes: {}, will return exit code {}".format( sorted(test_exit_codes), global_exit_code)) failures_by_language = defaultdict(int) failed_test_desc_strs = [] for result in results: if result.exit_code != 0: how_test_failed = "" if result.failed_without_output: how_test_failed = " without any output" logging.info("Test failed{}: {}".format(how_test_failed, result.test_descriptor)) failures_by_language[result.test_descriptor.language] += 1 failed_test_desc_strs.append(result.test_descriptor.descriptor_str) if failed_test_list_path: logging.info("Writing the list of failed tests to '{}'".format(failed_test_list_path)) with open(failed_test_list_path, 'w') as failed_test_file: failed_test_file.write("\n".join(failed_test_desc_strs) + "\n") for language, num_failures in failures_by_language.iteritems(): logging.info("Failures in {} tests: {}".format(language, num_failures)) total_elapsed_time_sec = time.time() - global_start_time logging.info("Total elapsed time: {} sec".format(total_elapsed_time_sec)) if report_base_dir and write_report or args.save_report_to_build_dir: save_report(report_base_dir, results, total_elapsed_time_sec, spark_succeeded, save_to_build_dir=args.save_report_to_build_dir) if args.sleep_after_tests: # This can be used as a way to keep the Spark app running during debugging while examining # its UI. time.sleep(600) sys.exit(global_exit_code)
def main(): parser = argparse.ArgumentParser( description='Run tests on Spark.') parser.add_argument('--verbose', action='store_true', help='Enable debug output') parser.add_argument('--java', dest='run_java_tests', action='store_true', help='Run Java tests') parser.add_argument('--cpp', dest='run_cpp_tests', action='store_true', help='Run C++ tests') parser.add_argument('--all', dest='run_all_tests', action='store_true', help='Run tests in all languages') parser.add_argument('--test_list', help='A file with a list of tests to run. Useful when e.g. re-running ' 'failed tests using a file produced with --failed_test_list.') parser.add_argument('--build-root', dest='build_root', required=True, help='Build root (e.g. ~/code/yugabyte/build/debug-gcc-dynamic-community)') parser.add_argument('--build-type', dest='build_type', required=False, help='Build type (e.g. debug, release, tsan, or asan)') parser.add_argument('--max-tests', type=int, dest='max_tests', help='Maximum number of tests to run. Useful when debugging this script ' 'for faster iteration. This number of tests will be randomly chosen ' 'from the test suite.') parser.add_argument('--sleep_after_tests', action='store_true', help='Sleep for a while after test are done before destroying ' 'SparkContext. This allows to examine the Spark app UI.') parser.add_argument('--reports-dir', dest='report_base_dir', help='A parent directory for storing build reports (such as per-test ' 'run times and whether the Spark job succeeded.)') parser.add_argument('--write_report', action='store_true', help='Actually enable writing build reports. If this is not ' 'specified, we will only read previous test reports to sort tests ' 'better.') parser.add_argument('--save_report_to_build_dir', action='store_true', help='Save a test report to the build directory directly, in addition ' 'to any reports saved in the common reports directory. This should ' 'work even if neither --reports-dir or --write_report are specified.') parser.add_argument('--cpp_test_program_regexp', help='A regular expression to filter C++ test program names on.') parser.add_argument('--num_repetitions', type=int, default=1, help='Number of times to run each test.') parser.add_argument('--failed_test_list', help='A file path to save the list of failed tests to. The format is ' 'one test descriptor per line.') parser.add_argument('--allow_no_tests', action='store_true', help='Allow running with filters that yield no tests to run. Useful when ' 'debugging.') args = parser.parse_args() # --------------------------------------------------------------------------------------------- # Argument validation. if args.run_all_tests: args.run_java_tests = True args.run_cpp_tests = True global verbose verbose = args.verbose log_level = logging.INFO logging.basicConfig( level=log_level, format="[%(filename)s:%(lineno)d] %(asctime)s %(levelname)s: %(message)s") global_conf = yb_dist_tests.set_global_conf_from_args(args) build_root = global_conf.build_root yb_src_root = global_conf.yb_src_root if not args.run_cpp_tests and not args.run_java_tests: fatal_error("At least one of --java or --cpp has to be specified") report_base_dir = args.report_base_dir write_report = args.write_report if report_base_dir and not os.path.isdir(report_base_dir): fatal_error("Report base directory '{}' does not exist".format(report_base_dir)) if write_report and not report_base_dir: fatal_error("--write_report specified but the reports directory (--reports-dir) is not") if write_report and not is_writable(report_base_dir): fatal_error( "--write_report specified but the reports directory ('{}') is not writable".format( report_base_dir)) if args.num_repetitions < 1: fatal_error("--num_repetitions must be at least 1, got: {}".format(args.num_repetitions)) failed_test_list_path = args.failed_test_list if failed_test_list_path and not is_parent_dir_writable(failed_test_list_path): fatal_error(("Parent directory of failed test list destination path ('{}') is not " + "writable").format(args.failed_test_list)) test_list_path = args.test_list if test_list_path and not os.path.isfile(test_list_path): fatal_error("File specified by --test_list does not exist or is not a file: '{}'".format( test_list_path)) # --------------------------------------------------------------------------------------------- # Start the timer. global_start_time = time.time() if test_list_path: test_descriptors = load_test_list(test_list_path) else: test_descriptors = collect_tests(args) if not test_descriptors and not args.allow_no_tests: logging.info("No tests to run") return propagate_env_vars() num_tests = len(test_descriptors) if args.max_tests and num_tests > args.max_tests: logging.info("Randomly selecting {} tests out of {} possible".format( args.max_tests, num_tests)) random.shuffle(test_descriptors) test_descriptors = test_descriptors[:args.max_tests] num_tests = len(test_descriptors) if args.verbose: for test_descriptor in test_descriptors: logging.info("Will run test: {}".format(test_descriptor)) num_repetitions = args.num_repetitions total_num_tests = num_tests * num_repetitions logging.info("Running {} tests on Spark, {} times each, for a total of {} tests".format( num_tests, num_repetitions, total_num_tests)) if num_repetitions > 1: test_descriptors = [ test_descriptor.with_attempt_index(i) for test_descriptor in test_descriptors for i in xrange(1, num_repetitions + 1) ] app_name_details = ['{} tests total'.format(total_num_tests)] if num_repetitions > 1: app_name_details += ['{} repetitions of {} tests'.format(num_repetitions, num_tests)] init_spark_context(app_name_details) set_global_conf_for_spark_jobs() # By this point, test_descriptors have been duplicated the necessary number of times, with # attempt indexes attached to each test descriptor. global_exit_code = None spark_succeeded = False if test_descriptors: logging.info("Running {} tasks on Spark".format(total_num_tests)) assert total_num_tests == len(test_descriptors), \ "total_num_tests={}, len(test_descriptors)={}".format( total_num_tests, len(test_descriptors)) test_names_rdd = spark_context.parallelize( [test_descriptor.descriptor_str for test_descriptor in test_descriptors], numSlices=total_num_tests) spark_succeeded = False try: results = test_names_rdd.map(parallel_run_test).collect() spark_succeeded = True except py4j.protocol.Py4JJavaError: traceback.print_exc() # This is EX_UNAVAILABLE from the standard sysexits.h header, which contains an attempt # to categorize exit codes. In this case the Spark cluster is probably unavailable. global_exit_code = 69 if not spark_succeeded: logging.error("Spark job failed to run! Jenkins should probably restart this build.") else: # Allow running zero tests, for testing the reporting logic. results = [] test_exit_codes = set([result.exit_code for result in results]) if not global_exit_code: if test_exit_codes == set([0]): global_exit_code = 0 else: global_exit_code = 1 logging.info("Tests are done, set of exit codes: {}, will return exit code {}".format( sorted(test_exit_codes), global_exit_code)) failures_by_language = defaultdict(int) failed_test_desc_strs = [] for result in results: if result.exit_code != 0: logging.info("Test failed: {}".format(result.test_descriptor)) failures_by_language[result.test_descriptor.language] += 1 failed_test_desc_strs.append(result.test_descriptor.descriptor_str) if failed_test_list_path: logging.info("Writing the list of failed tests to '{}'".format(failed_test_list_path)) with open(failed_test_list_path, 'w') as failed_test_file: failed_test_file.write("\n".join(failed_test_desc_strs) + "\n") for language, num_failures in failures_by_language.iteritems(): logging.info("Failures in {} tests: {}".format(language, num_failures)) total_elapsed_time_sec = time.time() - global_start_time logging.info("Total elapsed time: {} sec".format(total_elapsed_time_sec)) if report_base_dir and write_report or args.save_report_to_build_dir: save_report(report_base_dir, results, total_elapsed_time_sec, spark_succeeded, save_to_build_dir=args.save_report_to_build_dir) if args.sleep_after_tests: # This can be used as a way to keep the Spark app running during debugging while examining # its UI. time.sleep(600) sys.exit(global_exit_code)
def main(): parser = argparse.ArgumentParser(description='Run tests on Spark.') parser.add_argument('--verbose', action='store_true', help='Enable debug output') parser.add_argument('--java', dest='run_java_tests', action='store_true', help='Run Java tests') parser.add_argument('--cpp', dest='run_cpp_tests', action='store_true', help='Run C++ tests') parser.add_argument('--all', dest='run_all_tests', action='store_true', help='Run tests in all languages') parser.add_argument( '--test_list', help='A file with a list of tests to run. Useful when e.g. re-running ' + 'failed tests using a file produced with --failed_test_list.') parser.add_argument( '--build-root', dest='build_root', required=True, help= 'Build root (e.g. ~/code/yugabyte/build/debug-gcc-dynamic-community)') parser.add_argument('--build-type', dest='build_type', required=False, help='Build type (e.g. debug, release, tsan, or asan)') parser.add_argument( '--max-tests', type=int, dest='max_tests', help='Maximum number of tests to run. Useful when debugging this script ' 'for faster iteration. This number of tests will be randomly chosen ' 'from the test suite.') parser.add_argument( '--sleep_after_tests', action='store_true', help='Sleep for a while after test are done before destroying ' 'SparkContext. This allows to examine the Spark app UI.') parser.add_argument( '--stats-dir', dest='stats_dir', help='A directory for storing build statistics (such as per-test run ' + 'times.)') parser.add_argument( '--write_stats', action='store_true', help='Actually enable writing build statistics. If this is not ' + 'specified, we will only read previous stats to sort tests better.') parser.add_argument( '--cpp_test_program_regexp', help='A regular expression to filter C++ test program names on.') parser.add_argument('--num_repetitions', type=int, default=1, help='Number of times to run each test.') parser.add_argument( '--failed_test_list', help='A file path to save the list of failed tests to. The format is ' 'one test descriptor per line.') args = parser.parse_args() # --------------------------------------------------------------------------------------------- # Argument validation. if args.run_all_tests: args.run_java_tests = True args.run_cpp_tests = True global verbose verbose = args.verbose log_level = logging.INFO logging.basicConfig(level=log_level, format="[" + os.path.basename(__file__) + "] %(asctime)s %(levelname)s: %(message)s") global_conf = yb_dist_tests.set_global_conf_from_args(args) build_root = global_conf.build_root yb_src_root = global_conf.yb_src_root if not args.run_cpp_tests and not args.run_java_tests: fatal_error("At least one of --java or --cpp has to be specified") stats_dir = args.stats_dir write_stats = args.write_stats if stats_dir and not os.path.isdir(stats_dir): fatal_error("Stats directory '{}' does not exist".format(stats_dir)) if write_stats and not stats_dir: fatal_error( "--write_stats specified but the stats directory (--stats-dir) is not" ) if write_stats and not is_writable(stats_dir): fatal_error( "--write_stats specified but the stats directory ('{}') is not writable" .format(stats_dir)) if args.num_repetitions < 1: fatal_error("--num_repetitions must be at least 1, got: {}".format( args.num_repetitions)) failed_test_list_path = args.failed_test_list if failed_test_list_path and not is_parent_dir_writable( failed_test_list_path): fatal_error(( "Parent directory of failed test list destination path ('{}') is not " + "writable").format(args.failed_test_list)) test_list_path = args.test_list if test_list_path and not os.path.isfile(test_list_path): fatal_error( "File specified by --test_list does not exist or is not a file: '{}'" .format(test_list_path)) # --------------------------------------------------------------------------------------------- # Start the timer. global_start_time = time.time() if test_list_path: test_descriptors = load_test_list(test_list_path) else: test_descriptors = collect_tests(args) if not test_descriptors: logging.info("No tests to run") return propagate_env_vars() # We're only importing PySpark here so that we can debug the part of this script above this line # without depending on PySpark libraries. num_tests = len(test_descriptors) if args.max_tests and num_tests > args.max_tests: logging.info("Randomly selecting {} tests out of {} possible".format( args.max_tests, num_tests)) random.shuffle(test_descriptors) test_descriptors = test_descriptors[:args.max_tests] num_tests = len(test_descriptors) if args.verbose: for test_descriptor in test_descriptors: logging.info("Will run test: {}".format(test_descriptor)) num_repetitions = args.num_repetitions total_num_tests = num_tests * num_repetitions logging.info( "Running {} tests on Spark, {} times each, for a total of {} tests". format(num_tests, num_repetitions, total_num_tests)) if num_repetitions > 1: test_descriptors = [ test_descriptor.with_attempt_index(i) for test_descriptor in test_descriptors for i in xrange(1, num_repetitions + 1) ] init_spark_context() set_global_conf_for_spark_jobs() # By this point, test_descriptors have been duplicated the necessary number of times, with # attempt indexes attached to each test descriptor. test_names_rdd = spark_context.parallelize([ test_descriptor.descriptor_str for test_descriptor in test_descriptors ], numSlices=total_num_tests) results = test_names_rdd.map(parallel_run_test).collect() exit_codes = set([result.exit_code for result in results]) if exit_codes == set([0]): global_exit_code = 0 else: global_exit_code = 1 logging.info( "Tests are done, set of exit codes: {}, will return exit code {}". format(sorted(exit_codes), global_exit_code)) failures_by_language = defaultdict(int) failed_test_desc_strs = [] for result in results: if result.exit_code != 0: logging.info("Test failed: {}".format(result.test_descriptor)) failures_by_language[result.test_descriptor.language] += 1 failed_test_desc_strs.append(result.test_descriptor.descriptor_str) if failed_test_list_path: logging.info("Writing the list of failed tests to '{}'".format( failed_test_list_path)) with open(failed_test_list_path, 'w') as failed_test_file: failed_test_file.write("\n".join(failed_test_desc_strs) + "\n") for language, num_failures in failures_by_language.iteritems(): logging.info("Failures in {} tests: {}".format(language, num_failures)) total_elapsed_time_sec = time.time() - global_start_time logging.info("Total elapsed time: {} sec".format(total_elapsed_time_sec)) if stats_dir and write_stats: save_stats(stats_dir, results, total_elapsed_time_sec) if args.sleep_after_tests: # This can be used as a way to keep the Spark app running during debugging while examining # its UI. time.sleep(600) sys.exit(global_exit_code)