def cheribsd_setup_args(args: argparse.Namespace): if args.run_cheribsdtest is None: # Only hybrid and purecap images have cheribsdtest assert isinstance(args.xtarget, CrossCompileTarget) args.run_cheribsdtest = args.xtarget.is_hybrid_or_purecap_cheri() if args.kyua_tests_files: # flatten the list (https://stackoverflow.com/a/45323085/894271): args.kyua_tests_files = functools.reduce(operator.iconcat, args.kyua_tests_files, []) print(args.kyua_tests_files) for file in args.kyua_tests_files: if not Path(file).name == "Kyuafile": boot_cheribsd.failure( "Expected a path to a Kyuafile but got: ", file) # Make sure we mount the output directory if we are running kyua and/or cheribsdtest if args.kyua_tests_files or args.run_cheribsdtest: test_output_dir = Path( os.path.expandvars(os.path.expanduser(args.test_output_dir))) if not test_output_dir.is_dir(): boot_cheribsd.failure("Output directory does not exist: ", test_output_dir) # Create a timestamped directory: if args.no_timestamped_test_subdir: real_output_dir = test_output_dir.absolute() else: args.timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") real_output_dir = (test_output_dir / args.timestamp).absolute() args.test_output_dir = str(real_output_dir) boot_cheribsd.run_host_command(["mkdir", "-p", str(real_output_dir)]) if not boot_cheribsd.PRETEND: (real_output_dir / "cmdline").write_text(str(sys.argv)) args.smb_mount_directories.append( boot_cheribsd.SmbMount(real_output_dir, readonly=False, in_target="/test-results"))
def cheribsd_setup_args(args: argparse.Namespace): args.use_smb_instead_of_ssh = True # skip the ssh setup args.skip_ssh_setup = True if args.kyua_tests_files: # flatten the list (https://stackoverflow.com/a/45323085/894271): args.kyua_tests_files = functools.reduce(operator.iconcat, args.kyua_tests_files, []) print(args.kyua_tests_files) for file in args.kyua_tests_files: if not Path(file).name == "Kyuafile": boot_cheribsd.failure( "Expected a path to a Kyuafile but got: ", file) test_output_dir = Path( os.path.expandvars(os.path.expanduser(args.kyua_tests_output))) if not test_output_dir.is_dir(): boot_cheribsd.failure("Output directory does not exist: ", test_output_dir) # Create a timestamped directory: if args.kyua_tests_output_no_timestamped_subdir: real_output_dir = test_output_dir.absolute() else: args.timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") real_output_dir = (test_output_dir / args.timestamp).absolute() args.kyua_tests_output = str(real_output_dir) boot_cheribsd.run_host_command(["mkdir", "-p", str(real_output_dir)]) if not boot_cheribsd.PRETEND: (real_output_dir / "cmdline").write_text(str(sys.argv)) args.smb_mount_directories.append( boot_cheribsd.SmbMount(real_output_dir, readonly=False, in_target="/kyua-results"))
def flush_thread(f, qemu: boot_cheribsd.CheriBSDInstance, should_exit_event: threading.Event): while not should_exit_event.wait(timeout=0.1): if f: f.flush() if should_exit_event.is_set(): break # keep reading line-by-line to output any QEMU trap messages: i = qemu.expect( [pexpect.TIMEOUT, "KDB: enter:", pexpect.EOF, qemu.crlf], timeout=qemu.flush_interval) if boot_cheribsd.PRETEND: time.sleep(1) elif i == 1: boot_cheribsd.failure("GOT KERNEL PANIC!", exit=False) boot_cheribsd.debug_kernel_panic(qemu) global KERNEL_PANIC KERNEL_PANIC = True # TODO: tell lit to abort now.... elif i == 2: boot_cheribsd.failure("GOT QEMU EOF!", exit=False) # QEMU exited? break # One final expect to flush the buffer: qemu.expect([pexpect.TIMEOUT, pexpect.EOF], timeout=1) boot_cheribsd.success("QEMU output flushing thread terminated.")
def create_junit_xml(builddir, name, tools): _create_junit_xml(builddir, name, tools) test_output = Path(builddir, "test-results.xml") if not test_output.exists(): boot_cheribsd.failure("Failed to create the JUnit XML file") return False # boot_cheribsd.run_host_command(["head", "-n2", str(test_output)]) boot_cheribsd.run_host_command(["grep", "<testsuite", str(test_output)]) return True
def libcxx_main(barrier: Barrier = None, mp_queue: Queue = None, ssh_port_queue: Queue = None, shard_num: int = None): def set_cmdline_args(args: argparse.Namespace): boot_cheribsd.info("Setting args:", args) if mp_queue: # check that we don't get a conflict mp_debug(args, "Syncing shard ", shard_num, " with main process. Stage: assign SSH port") ssh_port_queue.put( (args.ssh_port, shard_num)) # check that we don't get a conflict run_remote_lit_test.notify_main_process( args, run_remote_lit_test.MultiprocessStages.BOOTING_CHERIBSD, mp_queue, barrier) if args.interact and (shard_num is not None or args.internal_num_shards or args.parallel_jobs): boot_cheribsd.failure("Cannot use --interact with multiple shards") sys.exit() run_remote_lit_test.adjust_common_cmdline_args(args) def run_libcxx_tests(qemu: boot_cheribsd.CheriBSDInstance, args: argparse.Namespace) -> bool: with tempfile.TemporaryDirectory( prefix="cheribuild-libcxx-tests-") as tempdir: # TODO: do we need lit_extra_args=["-Denable_filesystem=False"]? # Some of the tests might fail on a SMBFS directory. return run_remote_lit_test.run_remote_lit_tests("libcxx", qemu, args, tempdir, mp_q=mp_queue, barrier=barrier) try: run_tests_main( test_function=run_libcxx_tests, need_ssh=True, # we need ssh running to execute the tests should_mount_builddir=True, argparse_setup_callback=add_cmdline_args, argparse_adjust_args_callback=set_cmdline_args) except Exception as e: if mp_queue: boot_cheribsd.failure("GOT EXCEPTION in shard ", shard_num, ": ", sys.exc_info(), exit=False) # print(sys.exc_info()[2]) boot_cheribsd.info("".join(traceback.format_tb(sys.exc_info()[2]))) mp_queue.put((run_remote_lit_test.FAILURE, shard_num, str(type(e)) + ": " + str(e))) raise finally: boot_cheribsd.info("Finished running ", " ".join(sys.argv))
def run_subdir(qemu: boot_cheribsd.CheriBSDInstance, subdir: Path, xml: junitparser.JUnitXml, successful_tests: list, failed_tests: list, build_dir: Path): tests = [] for root, dirs, files in os.walk(str(subdir), topdown=True): for name in files: if not name.startswith("tst_") or name.endswith(".core"): continue tests.append(Path(root, name)) # Ignore .moc and .obj directories: dirs[:] = [d for d in dirs if not d.startswith(".")] # Ensure that we run the tests in a reproducible order for f in sorted(tests): starttime = datetime.datetime.utcnow() try: # TODO: -o /path/to/file -junitxml qemu.checked_run( "rm -f /build/test.xml && {} -o /build/test.xml,junitxml -o -,txt -v1" .format(f), timeout=10) endtime = datetime.datetime.utcnow() successful_tests.append(f) qemu.checked_run("fsync /build/test.xml") test_xml = build_dir / "test.xml" qt_test = junitparser.JUnitXml.fromfile(str(test_xml)) if not isinstance(qt_test, junitparser.TestSuite): raise ValueError( "Got unexpected parse result loading JUnit Xml: " + qt_test.tostring()) if qt_test.name.lower() != f.name: raise ValueError( "Got unexpected test suite name: '{}' instead of '{}'". format(qt_test.name, f.name)) if not qt_test.time: qt_test.time = (endtime - starttime).total_seconds() boot_cheribsd.info("Results for ", f.name, ": ", qt_test) xml.add_testsuite(qt_test) except Exception as e: if isinstance(e, boot_cheribsd.CheriBSDCommandFailed): boot_cheribsd.failure("Failed to run ", f.name, ": ", str(e), exit=False) else: boot_cheribsd.failure("Error loading JUnit result for", f.name, ": ", str(e), exit=False) failed_tests.append(f) add_junit_failure(xml, f, str(e), starttime) # Kill the process that timed out: qemu.sendintr() qemu.expect_prompt(timeout=60)
def run_qtbase_tests(qemu: boot_cheribsd.CheriBSDInstance, args: argparse.Namespace): # TODO: also run the non-corelib tests xml = junitparser.JUnitXml() failed_tests = [] successful_tests = [] build_dir = Path(args.build_dir) all_tests_starttime = datetime.datetime.utcnow() test_subset = Path(args.test_subset) tests_root = Path(build_dir, "tests/auto") assert Path(tests_root, test_subset).is_relative_to(tests_root), "Invalid path " + str( tests_root / test_subset) boot_cheribsd.info("Running qtbase tests for ", test_subset) # Start with some basic smoketests: qemu.checked_run( "/build/tests/auto/corelib/tools/qarraydata/tst_qarraydata") qemu.checked_run("/build/tests/auto/corelib/global/qtendian/tst_qtendian") run_subdir(qemu, Path(tests_root, test_subset), xml, build_dir=build_dir, successful_tests=successful_tests, failed_tests=failed_tests) xml.time = (datetime.datetime.utcnow() - all_tests_starttime).total_seconds() xml.update_statistics() boot_cheribsd.info("JUnit results:", xml) boot_cheribsd.info("Ran " + str(len(successful_tests) + len(failed_tests)), " tests in ", (datetime.datetime.utcnow() - all_tests_starttime)) if failed_tests: boot_cheribsd.failure("The following ", len(failed_tests), " tests failed:\n\t", "\n\t".join(x.name for x in failed_tests), exit=False) # Finally, write the Junit XML file: if not boot_cheribsd.PRETEND: xml.write(args.junit_xml, pretty=True) boot_cheribsd.info("Wrote Junit results to ", args.junit_xml) return not failed_tests
def set_cmdline_args(args: argparse.Namespace): boot_cheribsd.info("Setting args:", args) if mp_queue: # check that we don't get a conflict mp_debug(args, "Syncing shard ", shard_num, " with main process. Stage: assign SSH port") ssh_port_queue.put( (args.ssh_port, shard_num)) # check that we don't get a conflict run_remote_lit_test.notify_main_process( args, run_remote_lit_test.MultiprocessStages.BOOTING_CHERIBSD, mp_queue, barrier) if args.interact and (shard_num is not None or args.internal_num_shards or args.parallel_jobs): boot_cheribsd.failure("Cannot use --interact with multiple shards") sys.exit()
def wait_or_terminate_all_shards(processes, max_time, timed_out): assert max_time > 0 or timed_out max_end_time = datetime.datetime.utcnow() + datetime.timedelta( seconds=max_time) for i, p in enumerate(processes): # don't wait for completion if we've already timed out if not timed_out: remaining_time = max_end_time - datetime.datetime.utcnow() # wait for completion try: p.join(timeout=remaining_time.total_seconds()) except: boot_cheribsd.failure("Could not join job ", p.name, " in ", remaining_time.total_seconds(), " seconds", exit=False) timed_out = True if p.is_alive(): boot_cheribsd.failure("Parallel job ", p.name, " did not exit cleanly!", exit=False) p.terminate() time.sleep(1) os.kill(p.pid, signal.SIGKILL) time.sleep(1) if p.is_alive(): boot_cheribsd.failure("ERROR: Could not kill child process ", p.name, ", pid=", p.pid, exit=False)
def run_remote_lit_tests(testsuite: str, qemu: boot_cheribsd.CheriBSDInstance, args: argparse.Namespace, tempdir: str, mp_q: multiprocessing.Queue = None, barrier: multiprocessing.Barrier = None, llvm_lit_path: str = None, lit_extra_args: list = None) -> bool: try: import psutil # noqa: F401 except ImportError: boot_cheribsd.failure( "Cannot run lit without `psutil` python module installed", exit=True) try: if mp_q: assert barrier is not None result = run_remote_lit_tests_impl(testsuite=testsuite, qemu=qemu, args=args, tempdir=tempdir, barrier=barrier, mp_q=mp_q, llvm_lit_path=llvm_lit_path, lit_extra_args=lit_extra_args) if mp_q: mp_q.put((COMPLETED, args.internal_shard)) return result except Exception: if mp_q: boot_cheribsd.failure("GOT EXCEPTION in shard ", args.internal_shard, ": ", sys.exc_info(), exit=False) e = sys.exc_info()[1] mp_q.put( (FAILURE, args.internal_shard, str(type(e)) + ": " + str(e))) raise
def run_qtwebkit_tests(qemu: boot_cheribsd.CheriBSDInstance, args: argparse.Namespace) -> bool: boot_cheribsd.info("Running QtWebkit tests") try: # Check that jsc + dumprendertree work boot_cheribsd.checked_run_cheribsd_command(qemu, "/tmp/jsc --help", timeout=1200) # Run a simple javascript loop boot_cheribsd.checked_run_cheribsd_command( qemu, "/tmp/jsc -e 'for (i = 0; i < 10; i++) print(1 + i);'", timeout=1200) boot_cheribsd.checked_run_cheribsd_command( qemu, "/tmp/DumpRenderTree -v /tmp/helloworld.html", timeout=1800) boot_cheribsd.checked_run_cheribsd_command( qemu, "/tmp/DumpRenderTree -p --stdout /build/hello.png " "/tmp/helloworld.html", timeout=1800) if not args.smoketest: boot_cheribsd.checked_run_cheribsd_command( qemu, "/source/Tools/Scripts/run-layout-jsc -j /tmp/jsc -t " "/source/LayoutTests -r /build/results -x /build/results.xml", timeout=None) return True finally: tests_xml_path = Path(args.build_dir, 'results.xml') try: if not args.smoketest and tests_xml_path.exists(): # Process junit xml file with junitparser to update the number of tests, failures, total time, etc. xml = junitparser.JUnitXml.fromfile(str(tests_xml_path)) xml.update_statistics() xml.write() except: boot_cheribsd.failure("Could not update JUnit XML", tests_xml_path, exit=False) return False
def run_cheribsdtest(qemu: boot_cheribsd.QemuCheriBSDInstance, binary_name, old_binary_name, args: argparse.Namespace) -> bool: try: qemu.checked_run("rm -f /tmp/{}.xml".format(binary_name)) # Run it once with textual output (for debugging) # qemu.run("/bin/{} -a".format(binary_name, binary_name), # ignore_cheri_trap=True, cheri_trap_fatal=False, timeout=5 * 60) # Generate JUnit XML: qemu.run( "if [ -x /bin/{0} ]; then /bin/{0} -a -x; else /bin/{1} -a -x; fi > /tmp/{0}.xml" .format(binary_name, old_binary_name), ignore_cheri_trap=True, cheri_trap_fatal=False, timeout=5 * 60) qemu.sendline("echo EXITCODE=$?") qemu.expect(["EXITCODE=(\\d+)\r"], timeout=5, pretend_result=0) if boot_cheribsd.PRETEND: exit_code = 0 else: print(qemu.match.groups()) exit_code = int(qemu.match.group(1)) qemu.expect_prompt() if qemu.smb_failed: boot_cheribsd.info("SMB mount has failed, performing normal scp") host_path = Path(args.test_output_dir, binary_name + ".xml") qemu.scp_from_guest("/tmp/{}.xml".format(binary_name), host_path) else: qemu.checked_run("mv -f /tmp/{}.xml /test-results/{}.xml".format( binary_name, binary_name)) qemu.run("fsync /test-results/{}.xml".format(binary_name)) return exit_code == 0 except boot_cheribsd.CheriBSDCommandTimeout as e: boot_cheribsd.failure("Timeout running cheribsdtest: " + str(e), exit=False) qemu.sendintr() qemu.sendintr() # Try to cancel the running command and get back to having a sensible prompt qemu.checked_run("pwd") time.sleep(10) return False except boot_cheribsd.CheriBSDCommandFailed as e: if "command not found" in e.args: boot_cheribsd.failure("Cannot find cheribsdtest binary ", binary_name, ": " + str(e), exit=False) else: boot_cheribsd.failure("Failed to run: " + str(e), exit=False) return False
def run_parallel_impl(args: argparse.Namespace, processes: "typing.List[Process]", mp_q: Queue, mp_barrier: Barrier, ssh_port_queue: Queue): timed_out = False starttime = datetime.datetime.now() ssh_ports = [ ] # check that we don't have multiple parallel jobs trying to use the same port assert not mp_barrier.broken, mp_barrier # FIXME: without this sleep it fails in jenkins (is the python version there broken?) # Works just fine everywhere else where I test it... boot_cheribsd.info("Waiting 5 seconds before releasing barrier") time.sleep(5) mp_debug(args, "Waiting for SSH port barrier") mp_barrier.wait(timeout=10) # wait for ssh ports to be assigned for i in range(len(processes)): try: ssh_port, index = ssh_port_queue.get(timeout=1) assert index <= len(processes) print("SSH port for ", processes[index - 1].name, "is", ssh_port) processes[index - 1].ssh_port = ssh_port if ssh_port in ssh_ports: timed_out = True # kill all child processes boot_cheribsd.failure( "ERROR: reusing the same SSH port in multiple jobs: ", ssh_port, exit=False) except Empty: # This seems to be happening in jenkins? Barrier should ensure that we can read without blocking! timed_out = True # kill all child processes boot_cheribsd.failure( "ERROR: Could not determine SSH port for one of the processes!", exit=False) # wait for the success/failure message from the process: # if the shard takes longer than 4 hours to run something went wrong start_time = datetime.datetime.utcnow() max_test_duration = datetime.timedelta(seconds=4 * 60 * 60) test_end_time = start_time + max_test_duration # If any shard has not yet booted CheriBSD after 10 minutes something went horribly wrong max_boot_time = datetime.timedelta( seconds=10 * 60) if not args.pretend else datetime.timedelta(seconds=5) boot_cheribsd.info("Waiting for all shards to boot...") boot_end_time = start_time + max_boot_time booted_shards = 0 remaining_processes = processes.copy() not_booted_processes = processes.copy() retrying_queue_read = False while len(remaining_processes) > 0: if timed_out: for p in remaining_processes: p.stage = run_remote_lit_test.MultiprocessStages.TIMED_OUT break loop_start_time = datetime.datetime.utcnow() num_shards_not_booted = len(not_booted_processes) if num_shards_not_booted > 0: mp_debug(args, "Still waiting for ", num_shards_not_booted, " shards to boot") if loop_start_time > boot_end_time: timed_out = True boot_cheribsd.failure("ERROR: ", num_shards_not_booted, " shards did not boot within ", max_boot_time, ". Shards remaining: ", remaining_processes, exit=False) dump_processes(processes) continue mp_debug(args, "Still waiting for ", remaining_processes, " to finish") if boot_end_time > test_end_time: timed_out = True boot_cheribsd.failure("Reached test timeout of", max_test_duration, " with ", len(remaining_processes), "shards remaining: ", remaining_processes, exit=False) dump_processes(processes) continue remaining_test_time = test_end_time - loop_start_time max_timeout = 120.0 if not args.pretend else 1.0 try: shard_result = mp_q.get(timeout=min( max(1.0, remaining_test_time.total_seconds()), max_timeout)) retrying_queue_read = False mp_debug(args, "Got message:", shard_result) target_process = processes[shard_result[1] - 1] if shard_result[0] == run_remote_lit_test.COMPLETED: boot_cheribsd.success("===> Shard ", shard_result[1], " completed successfully.") mp_debug(args, "Shard ", target_process, "exited!") if target_process in remaining_processes: remaining_processes.remove(target_process) target_process.stage = run_remote_lit_test.MultiprocessStages.EXITED elif shard_result[0] == run_remote_lit_test.NEXT_STAGE: mp_debug(args, "===> Shard ", shard_result[1], " reached next stage: ", shard_result[2]) if target_process.stage == run_remote_lit_test.MultiprocessStages.BOOTING_CHERIBSD: not_booted_processes.remove(target_process) boot_cheribsd.success("Shard ", shard_result[1], " has booted successfully afer ", loop_start_time - start_time) if len(not_booted_processes) == 0: boot_cheribsd.success( "All shards have booted succesfully. Releasing barrier (num_waiting = ", mp_barrier.n_waiting, ")") assert mp_barrier.n_waiting == len( processes), "{} != {}".format( mp_barrier.n_waiting, len(processes)) mp_barrier.wait(timeout=10) boot_cheribsd.success( "Barrier has been released, tests should run now.") # assert target_process.stage < shard_result[2], "STAGE WENT BACKWARDS?" target_process.stage = shard_result[2] elif shard_result[0] == run_remote_lit_test.FAILURE: previous_stage = target_process.stage target_process.stage = run_remote_lit_test.MultiprocessStages.FAILED target_process.error_message = shard_result[2] if target_process in remaining_processes: remaining_processes.remove(target_process) if previous_stage != run_remote_lit_test.MultiprocessStages.RUNNING_TESTS: boot_cheribsd.failure( "===> FATAL: Shard ", target_process, " failed before running tests stage: ", previous_stage, " -> Aborting all other shards", exit=False) timed_out = True break else: boot_cheribsd.failure("===> ERROR: Shard ", shard_result[1], " failed while running tests: ", shard_result[2], exit=True) else: boot_cheribsd.failure( "===> FATAL: Received invalid shard result message: ", shard_result, exit=True) except Empty: mp_debug(args, "Got Empty read from QUEUE. Checking ", remaining_processes) for p in list(remaining_processes): if not p.is_alive(): mp_debug(args, "Found dead process", p) if retrying_queue_read: mp_debug( args, "Already retried read after finding dead process", p) boot_cheribsd.failure( "===> ERROR: shard ", p, " died without sending a message!", exit=False) remaining_processes.remove(p) else: # Try to read from the queue one more time to see if we missed a message retrying_queue_read = True mp_debug(args, "Retrying read after finding dead process", p) break continue except KeyboardInterrupt: dump_processes(processes) boot_cheribsd.failure("GOT KEYBOARD INTERRUPT! EXITING!", exit=False) return if not timed_out: if not_booted_processes: boot_cheribsd.failure( "FATAL: all processes exited but some still not booted? ", not_booted_processes) boot_cheribsd.success("All shards have terminated") # If we got an error we should not end up here -> all processes should be in stage exited dump_processes(processes) # All shards should have completed -> give them 60 seconds to shut down cleanly wait_or_terminate_all_shards(processes, max_time=60, timed_out=timed_out) if timed_out: time.sleep(0.2) boot_cheribsd.failure("Error running the test jobs!", exit=True) else: boot_cheribsd.success("All parallel jobs completed!") boot_cheribsd.success("Total execution time for parallel libcxx tests: ", datetime.datetime.now() - starttime)
def run_parallel(args: argparse.Namespace): if args.pretend: boot_cheribsd.PRETEND = True boot_cheribsd.MESSAGE_PREFIX = "\033[0;35m" + "main process: \033[0m" if args.parallel_jobs < 1: boot_cheribsd.failure("Invalid number of parallel jobs: ", args.parallel_jobs, exit=True) boot_cheribsd.success("Running ", args.parallel_jobs, " parallel jobs") # to ensure that all threads have started lit mp_barrier = Barrier(parties=args.parallel_jobs + 1, timeout=4 * 60 * 60) mp_q = Queue() ssh_port_queue = Queue() processes = [] # Extract the kernel + disk image in the main process to avoid race condition: kernel_path = boot_cheribsd.maybe_decompress(Path(args.kernel), True, True, args) if args.kernel else None disk_image_path = boot_cheribsd.maybe_decompress(Path( args.disk_image), True, True, args) if args.disk_image else None for i in range(args.parallel_jobs): shard_num = i + 1 boot_cheribsd.info(args) p = Process(target=run_shard, args=(mp_q, mp_barrier, shard_num, args.parallel_jobs, ssh_port_queue, kernel_path, disk_image_path, args.build_dir)) p.stage = run_remote_lit_test.MultiprocessStages.FINDING_SSH_PORT p.daemon = True # kill process on parent exit p.name = "<LIBCXX test shard " + str(shard_num) + ">" p.start() processes.append(p) atexit.register(p.terminate) dump_processes(processes) try: return run_parallel_impl(args, processes, mp_q, mp_barrier, ssh_port_queue) except BaseException as e: boot_cheribsd.info("Got error while running run_parallel_impl (", type(e), "): ", e) raise finally: wait_or_terminate_all_shards(processes, max_time=5, timed_out=False) # merge junit xml files if args.xunit_output: boot_cheribsd.success("Merging JUnit XML outputs") result = junitparser.JUnitXml() xunit_file = Path(args.xunit_output).absolute() dump_processes(processes) for i in range(args.parallel_jobs): shard_num = i + 1 shard_file = xunit_file.with_name("shard-" + str(shard_num) + "-" + xunit_file.name) mp_debug(args, processes[i], processes[i].stage) if shard_file.exists(): result += junitparser.JUnitXml.fromfile(str(shard_file)) else: error_msg = "ERROR: could not find JUnit XML " + str( shard_file) + " for shard " + str(shard_num) boot_cheribsd.failure(error_msg, exit=False) error_suite = junitparser.TestSuite(name="failed-shard-" + str(shard_num)) error_case = junitparser.TestCase(name="cannot-find-file") error_case.classname = "failed-shard-" + str(shard_num) error_case.result = junitparser.Error(message=error_msg) error_suite.add_testcase(error_case) result.add_testsuite(error_suite) if processes[ i].stage != run_remote_lit_test.MultiprocessStages.EXITED: error_msg = "ERROR: shard " + str( shard_num ) + " did not exit cleanly! Was in stage: " + processes[ i].stage.value if hasattr(processes[i], "error_message"): error_msg += "\nError message:\n" + processes[ i].error_message error_suite = junitparser.TestSuite( name="bad-exit-shard-" + str(shard_num)) error_case = junitparser.TestCase(name="bad-exit-status") error_case.result = junitparser.Error(message=error_msg) error_suite.add_testcase(error_case) result.add_testsuite(error_suite) result.update_statistics() result.write(str(xunit_file)) if args.pretend: print(xunit_file.read_text()) boot_cheribsd.success("Done merging JUnit XML outputs into ", xunit_file) print("Duration: ", result.time) print("Tests: ", result.tests) print("Failures: ", result.failures) print("Errors: ", result.errors) print("Skipped: ", result.skipped)
def run_cheribsd_test(qemu: boot_cheribsd.CheriBSDInstance, args: argparse.Namespace): boot_cheribsd.success("Booted successfully") qemu.checked_run("kenv") # unchecked since mount_smbfs returns non-zero for --help: qemu.run("mount_smbfs --help", cheri_trap_fatal=True) # same for ld-cheri-elf.so (but do check for CHERI traps): qemu.run("/libexec/ld-cheri-elf.so.1 -h", cheri_trap_fatal=True) tests_successful = True host_has_kyua = shutil.which("kyua") is not None try: # potentially bootstrap kyua for later testing if args.bootstrap_kyua or args.kyua_tests_files: qemu.checked_run("/sbin/prepare-testsuite.sh", timeout=30 * 60) qemu.checked_run("kyua help", timeout=60) for i, tests_file in enumerate(args.kyua_tests_files): # TODO: is the results file too big for tmpfs? No should be fine, only a few megabytes qemu.checked_run("rm -f /tmp/results.db") # Allow up to 24 hours to run the full testsuite # Not a checked run since it might return false if some tests fail test_start = datetime.datetime.now() qemu.run("kyua test --results-file=/tmp/results.db -k {}".format( shlex.quote(tests_file)), ignore_cheri_trap=True, cheri_trap_fatal=False, timeout=24 * 60 * 60) if i == 0: results_db = Path("/kyua-results/test-results.db") else: results_db = Path("/kyua-results/test-results-{}.db".format(i)) results_xml = results_db.with_suffix(".xml") assert shlex.quote(str(results_db)) == str( results_db), "Should not contain any special chars" qemu.checked_run("cp -v /tmp/results.db {}".format(results_db)) qemu.checked_run("fsync " + str(results_db)) boot_cheribsd.success("Running tests for ", tests_file, " took: ", datetime.datetime.now() - test_start) # run: kyua report-junit --results-file=test-results.db | vis -os > ${CPU}-${TEST_NAME}-test-results.xml # Not sure how much we gain by running it on the host instead. # Converting the full test suite to xml can take over an hour (probably a lot faster without the vis -os # pipe) # TODO: should escape the XML file but that's probably faster on the host if host_has_kyua: boot_cheribsd.info( "KYUA installed on the host, no need to do slow conversion in QEMU" ) else: xml_conversion_start = datetime.datetime.now() qemu.checked_run( "kyua report-junit --results-file=/tmp/results.db > /tmp/results.xml", timeout=200 * 60) qemu.checked_run( "cp -v /tmp/results.xml {}".format(results_xml)) qemu.checked_run("fsync " + str(results_xml)) boot_cheribsd.success( "Creating JUnit XML ", results_xml, " took: ", datetime.datetime.now() - xml_conversion_start) except boot_cheribsd.CheriBSDCommandTimeout as e: boot_cheribsd.failure("Timeout running tests: " + str(e), exit=False) qemu.sendintr() qemu.sendintr() # Try to cancel the running command and get back to having a sensible prompt qemu.checked_run("pwd") time.sleep(10) tests_successful = False except boot_cheribsd.CheriBSDCommandFailed as e: boot_cheribsd.failure("Failed to run: " + str(e), exit=False) boot_cheribsd.info("Trying to shut down cleanly") tests_successful = False # Update the JUnit stats in the XML file if args.kyua_tests_files: if not boot_cheribsd.PRETEND: time.sleep(2) # sleep two seconds to ensure the files exist junit_dir = Path(args.kyua_tests_output) try: if host_has_kyua: boot_cheribsd.info( "Converting kyua databases to JUNitXML in output directory ", junit_dir) for host_kyua_db_path in junit_dir.glob("*.db"): convert_kyua_db_to_junit_xml( host_kyua_db_path, host_kyua_db_path.with_suffix(".xml")) else: boot_cheribsd.info( "Updating statistics in JUnit output directory ", junit_dir) for host_xml_path in junit_dir.glob("*.xml"): fixup_kyua_generated_junit_xml(host_xml_path) except Exception as e: boot_cheribsd.failure("Could not update stats in ", junit_dir, ": ", e, exit=False) tests_successful = False if args.interact or args.skip_poweroff: boot_cheribsd.info( "Skipping poweroff step since --interact/--skip-poweroff was passed." ) return tests_successful poweroff_start = datetime.datetime.now() qemu.sendline("poweroff") i = qemu.expect(["Uptime:", pexpect.TIMEOUT, pexpect.EOF] + boot_cheribsd.FATAL_ERROR_MESSAGES, timeout=360) if i != 0: boot_cheribsd.failure("Poweroff " + ("timed out" if i == 1 else "failed")) return False # 240 secs since it takes a lot longer on a full image (it took 44 seconds after installing kyua, so on a really # busy jenkins slave it might be a lot slower) i = qemu.expect([pexpect.TIMEOUT, pexpect.EOF], timeout=240) if i == 0: boot_cheribsd.failure("QEMU didn't exit after shutdown!") return False boot_cheribsd.success("Poweroff took: ", datetime.datetime.now() - poweroff_start) return tests_successful
def run_cheribsd_test(qemu: boot_cheribsd.CheriBSDInstance, args: argparse.Namespace): boot_cheribsd.success("Booted successfully") qemu.checked_run("kenv") # unchecked since mount_smbfs returns non-zero for --help: qemu.run("mount_smbfs --help", cheri_trap_fatal=True) # same for ld-cheri-elf.so (but do check for CHERI traps): if qemu.xtarget.is_cheri_hybrid(): qemu.run("/libexec/ld-cheri-elf.so.1 -h", cheri_trap_fatal=True) qemu.run("/libexec/ld-elf.so.1 -h", cheri_trap_fatal=True) tests_successful = True # Check that we can connect to QEMU using SSH. This catches regressions that break SSHD. if not qemu.check_ssh_connection(): tests_successful = False host_has_kyua = shutil.which("kyua") is not None # Run the various cheritest binaries if args.run_cheritest: # Disable trap dumps while running cheritest (handle both old and new sysctl names until dev is merged): qemu.run( "sysctl machdep.log_user_cheri_exceptions=0 || sysctl machdep.log_cheri_exceptions=0" ) # The minimal disk image only has the statically linked variants: test_binaries = ["cheritest", "cheriabitest"] if not args.minimal_image: test_binaries.extend([ "cheriabitest-dynamic", "cheriabitest-dynamic-mt", "cheriabitest-mt", "cheritest-dynamic", "cheritest-dynamic-mt", "cheritest-mt" ]) for test in test_binaries: if not run_cheritest(qemu, test, args): tests_successful = False boot_cheribsd.failure("At least one test failure in", test, exit=False) qemu.run( "sysctl machdep.log_user_cheri_exceptions=1 || sysctl machdep.log_cheri_exceptions=1" ) # Run kyua tests try: if args.kyua_tests_files: qemu.checked_run("kyua help", timeout=60) # Try to load the pf module for the pfctl test qemu.run( "kldstat -m pf || kldload pf || echo 'failed to load pf module'" ) for i, tests_file in enumerate(args.kyua_tests_files): # TODO: is the results file too big for tmpfs? No should be fine, only a few megabytes qemu.checked_run("rm -f /tmp/results.db") # Allow up to 24 hours to run the full testsuite # Not a checked run since it might return false if some tests fail test_start = datetime.datetime.now() qemu.run("kyua test --results-file=/tmp/results.db -k {}".format( shlex.quote(tests_file)), ignore_cheri_trap=True, cheri_trap_fatal=False, timeout=24 * 60 * 60) if i == 0: result_name = "test-results.db" else: result_name = "test-results-{}.db".format(i) results_db = Path("/test-results/{}".format(result_name)) results_xml = results_db.with_suffix(".xml") assert shlex.quote(str(results_db)) == str( results_db), "Should not contain any special chars" if qemu.smb_failed: boot_cheribsd.info( "SMB mount has failed, performing normal scp") qemu.scp_from_guest( "/tmp/results.db", Path(args.test_output_dir, results_db.name)) else: qemu.checked_run("cp -v /tmp/results.db {}".format(results_db)) qemu.checked_run("fsync " + str(results_db)) boot_cheribsd.success("Running tests for ", tests_file, " took: ", datetime.datetime.now() - test_start) # run: kyua report-junit --results-file=test-results.db | vis -os > ${CPU}-${TEST_NAME}-test-results.xml # Not sure how much we gain by running it on the host instead. # Converting the full test suite to xml can take over an hour (probably a lot faster without the vis -os # pipe) # TODO: should escape the XML file but that's probably faster on the host if host_has_kyua: boot_cheribsd.info( "KYUA installed on the host, no need to do slow conversion in QEMU" ) else: xml_conversion_start = datetime.datetime.now() qemu.checked_run( "kyua report-junit --results-file=/tmp/results.db > /tmp/results.xml", timeout=200 * 60) if qemu.smb_failed: boot_cheribsd.info( "SMB mount has failed, performing normal scp") qemu.scp_from_guest( "/tmp/results.xml", Path(args.test_output_dir, results_xml.name)) else: qemu.checked_run( "cp -v /tmp/results.xml {}".format(results_xml)) qemu.checked_run("fsync " + str(results_xml)) boot_cheribsd.success( "Creating JUnit XML ", results_xml, " took: ", datetime.datetime.now() - xml_conversion_start) except boot_cheribsd.CheriBSDCommandTimeout as e: boot_cheribsd.failure("Timeout running tests: " + str(e), exit=False) qemu.sendintr() qemu.sendintr() # Try to cancel the running command and get back to having a sensible prompt qemu.checked_run("pwd") time.sleep(10) tests_successful = False except boot_cheribsd.CheriBSDCommandFailed as e: boot_cheribsd.failure("Failed to run: " + str(e), exit=False) boot_cheribsd.info("Trying to shut down cleanly") tests_successful = False # Update the JUnit stats in the XML files (both kyua and cheritest): if args.kyua_tests_files or args.run_cheritest: if not boot_cheribsd.PRETEND: time.sleep(2) # sleep two seconds to ensure the files exist junit_dir = Path(args.test_output_dir) if host_has_kyua: try: boot_cheribsd.info( "Converting kyua databases to JUNitXML in output directory ", junit_dir) for host_kyua_db_path in junit_dir.glob("*.db"): convert_kyua_db_to_junit_xml( host_kyua_db_path, host_kyua_db_path.with_suffix(".xml")) except Exception as e: boot_cheribsd.failure("Could not convert kyua database in ", junit_dir, ": ", e, exit=False) tests_successful = False boot_cheribsd.info("Updating statistics in JUnit output directory ", junit_dir) for host_xml_path in junit_dir.glob("*.xml"): try: fixup_kyua_generated_junit_xml( host_xml_path) # Despite the name also works for cheritest except Exception as e: boot_cheribsd.failure("Could not update stats in ", junit_dir, ": ", e, exit=False) tests_successful = False if args.interact or args.skip_poweroff: boot_cheribsd.info( "Skipping poweroff step since --interact/--skip-poweroff was passed." ) return tests_successful poweroff_start = datetime.datetime.now() qemu.sendline("poweroff") i = qemu.expect(["Uptime: ", pexpect.TIMEOUT, pexpect.EOF], timeout=360) if i != 0: boot_cheribsd.failure("Poweroff " + ("timed out" if i == 1 else "failed")) return False # 240 secs since it takes a lot longer on a full image (it took 44 seconds after installing kyua, so on a really # busy jenkins slave it might be a lot slower) i = qemu.expect( [pexpect.TIMEOUT, "Please press any key to reboot.", pexpect.EOF], timeout=240) if i == 0: boot_cheribsd.failure("QEMU didn't exit after shutdown!") return False boot_cheribsd.success("Poweroff took: ", datetime.datetime.now() - poweroff_start) if tests_successful and qemu.smb_failed: boot_cheribsd.info( "Tests succeeded, but SMB mount failed -> marking tests as failed." ) tests_successful = False return tests_successful
def adjust_args(args: argparse.Namespace): # We don't support parallel jobs but are reusing libcxx infrastructure -> set the expected vars if not args.test_command: boot_cheribsd.failure("--test-command must be set!", exit=True)
def run_remote_lit_tests_impl(testsuite: str, qemu: boot_cheribsd.CheriBSDInstance, args: argparse.Namespace, tempdir: str, mp_q: multiprocessing.Queue = None, barrier: multiprocessing.Barrier = None, llvm_lit_path: str = None, lit_extra_args: list = None) -> bool: qemu.EXIT_ON_KERNEL_PANIC = False # since we run multiple threads we shouldn't use sys.exit() boot_cheribsd.info("PID of QEMU: ", qemu.pid) if args.pretend and os.getenv( "FAIL_TIMEOUT_BOOT") and args.internal_shard == 2: time.sleep(10) if mp_q: assert barrier is not None notify_main_process(args, MultiprocessStages.TESTING_SSH_CONNECTION, mp_q, barrier=barrier) if args.pretend and os.getenv( "FAIL_RAISE_EXCEPTION") and args.internal_shard == 1: raise RuntimeError("SOMETHING WENT WRONG!") qemu.checked_run("cat /root/.ssh/authorized_keys", timeout=20) port = args.ssh_port user = "******" # TODO: run these tests as non-root! test_build_dir = Path(args.build_dir) # TODO: move this to boot_cheribsd.py config_contents = """ Host cheribsd-test-instance User {user} HostName localhost Port {port} IdentityFile {ssh_key} # avoid errors due to changed host key: UserKnownHostsFile /dev/null StrictHostKeyChecking no NoHostAuthenticationForLocalhost yes # faster connection by reusing the existing one: ControlPath {home}/.ssh/controlmasters/%r@%h:%p # ConnectTimeout 20 # ConnectionAttempts 2 ControlMaster auto """.format(user=user, port=port, ssh_key=Path(args.ssh_key).with_suffix(""), home=Path.home()) config_contents += " ControlPersist {control_persist}\n" # print("Writing ssh config: ", config_contents) with Path(tempdir, "config").open("w") as c: # Keep socket open for 10 min (600) or indefinitely (yes) c.write(config_contents.format(control_persist="yes")) Path(Path.home(), ".ssh/controlmasters").mkdir(exist_ok=True) boot_cheribsd.run_host_command(["cat", str(Path(tempdir, "config"))]) # Check that the config file works: def check_ssh_connection(prefix): connection_test_start = datetime.datetime.utcnow() boot_cheribsd.run_host_command([ "ssh", "-F", str(Path(tempdir, "config")), "cheribsd-test-instance", "-p", str(port), "--", "echo", "connection successful" ], cwd=str(test_build_dir)) connection_time = (datetime.datetime.utcnow() - connection_test_start).total_seconds() boot_cheribsd.success(prefix, " successful after ", connection_time, " seconds") check_ssh_connection("First SSH connection") controlmaster_running = False try: # Check that controlmaster worked by running ssh -O check boot_cheribsd.info("Checking if SSH control master is working.") boot_cheribsd.run_host_command([ "ssh", "-F", str(Path(tempdir, "config")), "cheribsd-test-instance", "-p", str(port), "-O", "check" ], cwd=str(test_build_dir)) check_ssh_connection("Second SSH connection (with controlmaster)") controlmaster_running = True except subprocess.CalledProcessError: boot_cheribsd.failure( "WARNING: Could not connect to ControlMaster SSH connection. Running tests will be slower", exit=False) with Path(tempdir, "config").open("w") as c: c.write(config_contents.format(control_persist="no")) check_ssh_connection("Second SSH connection (without controlmaster)") if args.pretend: time.sleep(2.5) extra_ssh_args = commandline_to_str( ("-n", "-4", "-F", "{tempdir}/config".format(tempdir=tempdir))) extra_scp_args = commandline_to_str( ("-F", "{tempdir}/config".format(tempdir=tempdir))) ssh_executor_args = [ args.ssh_executor_script, "--host", "cheribsd-test-instance", "--extra-ssh-args=" + extra_ssh_args ] if args.use_shared_mount_for_tests: # If we have a shared directory use that to massively speed up running tests tmpdir_name = args.shared_tmpdir_local.name ssh_executor_args.append("--shared-mount-local-path=" + str(args.shared_tmpdir_local)) ssh_executor_args.append("--shared-mount-remote-path=/build/" + tmpdir_name) else: # slow executor using scp: ssh_executor_args.append("--extra-scp-args=" + extra_scp_args) executor = commandline_to_str(ssh_executor_args) # TODO: I was previously passing -t -t to ssh. Is this actually needed? boot_cheribsd.success("Running", testsuite, "tests with executor", executor) notify_main_process(args, MultiprocessStages.RUNNING_TESTS, mp_q) # have to use -j1 since otherwise CheriBSD might wedge if llvm_lit_path is None: llvm_lit_path = str(test_build_dir / "bin/llvm-lit") # Note: we require python 3 since otherwise it seems to deadlock in Jenkins lit_cmd = [ sys.executable, llvm_lit_path, "-j1", "-vv", "-Dexecutor=" + executor, "test" ] if lit_extra_args: lit_cmd.extend(lit_extra_args) if args.lit_debug_output: lit_cmd.append("--debug") # This does not work since it doesn't handle running ssh commands.... lit_cmd.append( "--timeout=120" ) # 2 minutes max per test (in case there is an infinite loop) xunit_file = None # type: typing.Optional[Path] if args.xunit_output: lit_cmd.append("--xunit-xml-output") xunit_file = Path(args.xunit_output).absolute() if args.internal_shard: xunit_file = xunit_file.with_name("shard-" + str(args.internal_shard) + "-" + xunit_file.name) lit_cmd.append(str(xunit_file)) qemu_logfile = qemu.logfile if args.internal_shard: assert args.internal_num_shards, "Invalid call!" lit_cmd.append("--num-shards=" + str(args.internal_num_shards)) lit_cmd.append("--run-shard=" + str(args.internal_shard)) if xunit_file: assert qemu_logfile is not None, "Should have a valid logfile when running multiple shards" boot_cheribsd.success("Writing QEMU output to ", qemu_logfile) # Fixme starting lit at the same time does not work! # TODO: add the polling to the main thread instead of having another thread? # start the qemu output flushing thread so that we can see the kernel panic qemu.flush_interval = 15 # flush the logfile every 15 seconds should_exit_event = threading.Event() t = threading.Thread(target=flush_thread, args=(qemu_logfile, qemu, should_exit_event)) t.daemon = True t.start() shard_prefix = "SHARD" + str( args.internal_shard) + ": " if args.internal_shard else "" try: boot_cheribsd.success("Starting llvm-lit: cd ", test_build_dir, " && ", " ".join(lit_cmd)) boot_cheribsd.run_host_command(lit_cmd, cwd=str(test_build_dir)) # lit_proc = pexpect.spawnu(lit_cmd[0], lit_cmd[1:], echo=True, timeout=60, cwd=str(test_build_dir)) # TODO: get stderr!! # while lit_proc.isalive(): # lit_proc = None # while False: # line = lit_proc.readline() # if shard_prefix: # line = shard_prefix + line # print(line) # global KERNEL_PANIC # # Abort once we detect a kernel panic # if KERNEL_PANIC: # lit_proc.sendintr() # print(shard_prefix + lit_proc.read()) # print("Lit finished.") # if lit_proc and lit_proc.exitstatus == 1: # boot_cheribsd.failure(shard_prefix + "SOME TESTS FAILED", exit=False) except subprocess.CalledProcessError as e: boot_cheribsd.failure(shard_prefix + "SOME TESTS FAILED: ", e, exit=False) # Should only ever return 1 (otherwise something else went wrong!) if e.returncode == 1: return False else: raise finally: if qemu_logfile: qemu_logfile.flush() if controlmaster_running: boot_cheribsd.info("Terminating SSH controlmaster") try: boot_cheribsd.run_host_command([ "ssh", "-F", str(Path(tempdir, "config")), "cheribsd-test-instance", "-p", str(port), "-O", "exit" ], cwd=str(test_build_dir)) except subprocess.CalledProcessError: boot_cheribsd.failure( "Could not close SSH controlmaster connection.", exit=False) qemu.flush_interval = 0.1 should_exit_event.set() t.join(timeout=30) if t.is_alive(): boot_cheribsd.failure( "Failed to kill flush thread. Interacting with CheriBSD will not work!", exit=True) return False if not qemu.isalive(): boot_cheribsd.failure("QEMU died while running tests! ", qemu, exit=True) return True