def check_cgroup_availability(wait=1): """ Basic utility to check the availability and permissions of cgroups. This will log some warnings for the user if necessary. On some systems, daemons such as cgrulesengd might interfere with the cgroups of a process soon after it was started. Thus this function starts a process, waits a configurable amount of time, and check whether the cgroups have been changed. @param wait: a non-negative int that is interpreted as seconds to wait during the check @raise SystemExit: if cgroups are not usable """ logging.basicConfig(format="%(levelname)s: %(message)s") runexecutor = RunExecutor(use_namespaces=False) my_cgroups = runexecutor.cgroups if not ( CPUACCT in my_cgroups and CPUSET in my_cgroups # and FREEZER in my_cgroups # For now, we do not require freezer and MEMORY in my_cgroups ): sys.exit(1) with tempfile.NamedTemporaryFile(mode="rt") as tmp: runexecutor.execute_run( ["sh", "-c", "sleep {0}; cat /proc/self/cgroup".format(wait)], tmp.name, memlimit=1024 * 1024, # set memlimit to force check for swapaccount # set cores and memory_nodes to force usage of CPUSET cores=util.parse_int_list(my_cgroups.get_value(CPUSET, "cpus")), memory_nodes=my_cgroups.read_allowed_memory_banks(), ) lines = [] for line in tmp: line = line.strip() if ( line and not line == "sh -c 'sleep {0}; cat /proc/self/cgroup'".format(wait) and not all(c == "-" for c in line) ): lines.append(line) task_cgroups = find_my_cgroups(lines) fail = False for subsystem in CPUACCT, CPUSET, MEMORY, FREEZER: if subsystem in my_cgroups: if not task_cgroups[subsystem].startswith( os.path.join(my_cgroups[subsystem], "benchmark_") ): logging.warning( "Task was in cgroup %s for subsystem %s, " "which is not the expected sub-cgroup of %s. " "Maybe some other program is interfering with cgroup management?", task_cgroups[subsystem], subsystem, my_cgroups[subsystem], ) fail = True if fail: sys.exit(1)
def execute_benchmark(benchmark, output_handler): run_sets_executed = 0 logging.debug("I will use %s threads.", benchmark.num_of_threads) if benchmark.requirements.cpu_model \ or benchmark.requirements.cpu_cores != benchmark.rlimits.get(CORELIMIT, None) \ or benchmark.requirements.memory != benchmark.rlimits.get(MEMLIMIT, None): logging.warning("Ignoring specified resource requirements in local-execution mode, " "only resource limits are used.") my_cgroups = cgroups.find_my_cgroups() coreAssignment = None # cores per run memoryAssignment = None # memory banks per run if CORELIMIT in benchmark.rlimits: if not my_cgroups.require_subsystem(cgroups.CPUSET): sys.exit("Cgroup subsystem cpuset is required for limiting the number of CPU cores/memory nodes.") coreAssignment = get_cpu_cores_per_run(benchmark.rlimits[CORELIMIT], benchmark.num_of_threads, my_cgroups) memoryAssignment = get_memory_banks_per_run(coreAssignment, my_cgroups) if MEMLIMIT in benchmark.rlimits: # check whether we have enough memory in the used memory banks for all runs check_memory_size(benchmark.rlimits[MEMLIMIT], benchmark.num_of_threads, memoryAssignment, my_cgroups) if benchmark.num_of_threads > 1 and systeminfo.is_turbo_boost_enabled(): logging.warning("Turbo boost of CPU is enabled. " "Starting more than one benchmark in parallel affects the CPU frequency " "and thus makes the performance unreliable.") if benchmark.num_of_threads > 1 and benchmark.config.users: if len(benchmark.config.users) == 1: logging.warning( 'Executing multiple parallel benchmarks under same user account. ' 'Consider specifying multiple user accounts for increased separation of runs.') benchmark.config.users = [benchmark.config.users[0] for i in range(benchmark.num_of_threads)] elif len(benchmark.config.users) < benchmark.num_of_threads: sys.exit('Distributing parallel runs to different user accounts was requested, but not enough accounts were given. Please specify {} user accounts, or only one account.'.format(benchmark.num_of_threads)) elif len(benchmark.config.users) != len(set(benchmark.config.users)): sys.exit('Same user account was specified multiple times, please specify {} separate accounts, or only one account.'.format(benchmark.num_of_threads)) throttle_check = systeminfo.CPUThrottleCheck() swap_check = systeminfo.SwapCheck() # iterate over run sets for runSet in benchmark.run_sets: if STOPPED_BY_INTERRUPT: break if not runSet.should_be_executed(): output_handler.output_for_skipping_run_set(runSet) elif not runSet.runs: output_handler.output_for_skipping_run_set(runSet, "because it has no files") else: run_sets_executed += 1 # get times before runSet ruBefore = resource.getrusage(resource.RUSAGE_CHILDREN) walltime_before = util.read_monotonic_time() energyBefore = util.measure_energy() output_handler.output_before_run_set(runSet) # put all runs into a queue for run in runSet.runs: _Worker.working_queue.put(run) # create some workers for i in range(benchmark.num_of_threads): cores = coreAssignment[i] if coreAssignment else None memBanks = memoryAssignment[i] if memoryAssignment else None user = benchmark.config.users[i] if benchmark.config.users else None WORKER_THREADS.append(_Worker(benchmark, cores, memBanks, user, output_handler)) # wait until all tasks are done, # instead of queue.join(), we use a loop and sleep(1) to handle KeyboardInterrupt finished = False while not finished and not STOPPED_BY_INTERRUPT: try: _Worker.working_queue.all_tasks_done.acquire() finished = (_Worker.working_queue.unfinished_tasks == 0) finally: _Worker.working_queue.all_tasks_done.release() try: time.sleep(0.1) # sleep some time except KeyboardInterrupt: stop() # get times after runSet walltime_after = util.read_monotonic_time() energy = util.measure_energy(energyBefore) usedWallTime = walltime_after - walltime_before ruAfter = resource.getrusage(resource.RUSAGE_CHILDREN) usedCpuTime = (ruAfter.ru_utime + ruAfter.ru_stime) \ - (ruBefore.ru_utime + ruBefore.ru_stime) if STOPPED_BY_INTERRUPT: output_handler.set_error('interrupted', runSet) output_handler.output_after_run_set(runSet, cputime=usedCpuTime, walltime=usedWallTime, energy=energy) for worker in WORKER_THREADS: worker.cleanup() if throttle_check.has_throttled(): logging.warning('CPU throttled itself during benchmarking due to overheating. ' 'Benchmark results are unreliable!') if swap_check.has_swapped(): logging.warning('System has swapped during benchmarking. ' 'Benchmark results are unreliable!') output_handler.output_after_benchmark(STOPPED_BY_INTERRUPT) return 0
def execute_benchmark(benchmark, output_handler): run_sets_executed = 0 logging.debug("I will use %s threads.", benchmark.num_of_threads) if (benchmark.requirements.cpu_model or benchmark.requirements.cpu_cores != benchmark.rlimits.get( CORELIMIT, None) or benchmark.requirements.memory != benchmark.rlimits.get( MEMLIMIT, None)): logging.warning( "Ignoring specified resource requirements in local-execution mode, " "only resource limits are used.") my_cgroups = cgroups.find_my_cgroups() coreAssignment = None # cores per run memoryAssignment = None # memory banks per run cpu_packages = None pqos = Pqos( show_warnings=True) # The pqos class instance for cache allocation pqos.reset_monitoring() if CORELIMIT in benchmark.rlimits: if not my_cgroups.require_subsystem(cgroups.CPUSET): sys.exit( "Cgroup subsystem cpuset is required for limiting the number of CPU cores/memory nodes." ) coreAssignment = get_cpu_cores_per_run( benchmark.rlimits[CORELIMIT], benchmark.num_of_threads, benchmark.config.use_hyperthreading, my_cgroups, benchmark.config.coreset, ) pqos.allocate_l3ca(coreAssignment) memoryAssignment = get_memory_banks_per_run(coreAssignment, my_cgroups) cpu_packages = { get_cpu_package_for_core(core) for cores_of_run in coreAssignment for core in cores_of_run } elif benchmark.config.coreset: sys.exit( "Please limit the number of cores first if you also want to limit the set of available cores." ) if MEMLIMIT in benchmark.rlimits: # check whether we have enough memory in the used memory banks for all runs check_memory_size( benchmark.rlimits[MEMLIMIT], benchmark.num_of_threads, memoryAssignment, my_cgroups, ) if benchmark.num_of_threads > 1 and systeminfo.is_turbo_boost_enabled(): logging.warning( "Turbo boost of CPU is enabled. " "Starting more than one benchmark in parallel affects the CPU frequency " "and thus makes the performance unreliable.") throttle_check = systeminfo.CPUThrottleCheck() swap_check = systeminfo.SwapCheck() # iterate over run sets for runSet in benchmark.run_sets: if STOPPED_BY_INTERRUPT: break if not runSet.should_be_executed(): output_handler.output_for_skipping_run_set(runSet) elif not runSet.runs: output_handler.output_for_skipping_run_set( runSet, "because it has no files") else: run_sets_executed += 1 # get times before runSet energy_measurement = EnergyMeasurement.create_if_supported() ruBefore = resource.getrusage(resource.RUSAGE_CHILDREN) walltime_before = util.read_monotonic_time() if energy_measurement: energy_measurement.start() output_handler.output_before_run_set(runSet) # put all runs into a queue for run in runSet.runs: _Worker.working_queue.put(run) # keep a counter of unfinished runs for the below assertion unfinished_runs = len(runSet.runs) unfinished_runs_lock = threading.Lock() def run_finished(): nonlocal unfinished_runs with unfinished_runs_lock: unfinished_runs -= 1 if not containerexecutor.NATIVE_CLONE_CALLBACK_SUPPORTED: logging.debug( "Using sys.setswitchinterval() workaround for #435 in container " "mode because native callback is not available.") py_switch_interval = sys.getswitchinterval() sys.setswitchinterval(1000) # create some workers for i in range(min(benchmark.num_of_threads, unfinished_runs)): if STOPPED_BY_INTERRUPT: break cores = coreAssignment[i] if coreAssignment else None memBanks = memoryAssignment[i] if memoryAssignment else None WORKER_THREADS.append( _Worker(benchmark, cores, memBanks, output_handler, run_finished)) # wait until workers are finished (all tasks done or STOPPED_BY_INTERRUPT) for worker in WORKER_THREADS: worker.join() assert unfinished_runs == 0 or STOPPED_BY_INTERRUPT # get times after runSet walltime_after = util.read_monotonic_time() energy = energy_measurement.stop() if energy_measurement else None usedWallTime = walltime_after - walltime_before ruAfter = resource.getrusage(resource.RUSAGE_CHILDREN) usedCpuTime = (ruAfter.ru_utime + ruAfter.ru_stime) - ( ruBefore.ru_utime + ruBefore.ru_stime) if energy and cpu_packages: energy = { pkg: energy[pkg] for pkg in energy if pkg in cpu_packages } if not containerexecutor.NATIVE_CLONE_CALLBACK_SUPPORTED: sys.setswitchinterval(py_switch_interval) if STOPPED_BY_INTERRUPT: output_handler.set_error("interrupted", runSet) output_handler.output_after_run_set(runSet, cputime=usedCpuTime, walltime=usedWallTime, energy=energy) if throttle_check.has_throttled(): logging.warning( "CPU throttled itself during benchmarking due to overheating. " "Benchmark results are unreliable!") if swap_check.has_swapped(): logging.warning("System has swapped during benchmarking. " "Benchmark results are unreliable!") pqos.reset_resources() output_handler.output_after_benchmark(STOPPED_BY_INTERRUPT) return 0
def execute_benchmark(benchmark, output_handler): run_sets_executed = 0 logging.debug("I will use %s threads.", benchmark.num_of_threads) if (benchmark.requirements.cpu_model or benchmark.requirements.cpu_cores != benchmark.rlimits.cpu_cores or benchmark.requirements.memory != benchmark.rlimits.memory): logging.warning( "Ignoring specified resource requirements in local-execution mode, " "only resource limits are used.") my_cgroups = cgroups.find_my_cgroups() required_cgroups = set() coreAssignment = None # cores per run memoryAssignment = None # memory banks per run cpu_packages = None pqos = Pqos( show_warnings=True) # The pqos class instance for cache allocation pqos.reset_monitoring() if benchmark.rlimits.cpu_cores: if not my_cgroups.require_subsystem(cgroups.CPUSET): required_cgroups.add(cgroups.CPUSET) logging.error("Cgroup subsystem cpuset is required " "for limiting the number of CPU cores/memory nodes.") else: coreAssignment = resources.get_cpu_cores_per_run( benchmark.rlimits.cpu_cores, benchmark.num_of_threads, benchmark.config.use_hyperthreading, my_cgroups, benchmark.config.coreset, ) pqos.allocate_l3ca(coreAssignment) memoryAssignment = resources.get_memory_banks_per_run( coreAssignment, my_cgroups) cpu_packages = { resources.get_cpu_package_for_core(core) for cores_of_run in coreAssignment for core in cores_of_run } elif benchmark.config.coreset: sys.exit( "Please limit the number of cores first if you also want to limit the set of available cores." ) if benchmark.rlimits.memory: if not my_cgroups.require_subsystem(cgroups.MEMORY): required_cgroups.add(cgroups.MEMORY) logging.error( "Cgroup subsystem memory is required for memory limit.") else: # check whether we have enough memory in the used memory banks for all runs resources.check_memory_size( benchmark.rlimits.memory, benchmark.num_of_threads, memoryAssignment, my_cgroups, ) if benchmark.rlimits.cputime: if not my_cgroups.require_subsystem(cgroups.CPUACCT): required_cgroups.add(cgroups.CPUACCT) logging.error( "Cgroup subsystem cpuacct is required for cputime limit.") my_cgroups.handle_errors(required_cgroups) if benchmark.num_of_threads > 1 and systeminfo.is_turbo_boost_enabled(): logging.warning( "Turbo boost of CPU is enabled. " "Starting more than one benchmark in parallel affects the CPU frequency " "and thus makes the performance unreliable.") throttle_check = systeminfo.CPUThrottleCheck() swap_check = systeminfo.SwapCheck() # iterate over run sets for runSet in benchmark.run_sets: if STOPPED_BY_INTERRUPT: break if not runSet.should_be_executed(): output_handler.output_for_skipping_run_set(runSet) elif not runSet.runs: output_handler.output_for_skipping_run_set( runSet, "because it has no files") else: run_sets_executed += 1 _execute_run_set( runSet, benchmark, output_handler, coreAssignment, memoryAssignment, cpu_packages, ) if throttle_check.has_throttled(): logging.warning( "CPU throttled itself during benchmarking due to overheating. " "Benchmark results are unreliable!") if swap_check.has_swapped(): logging.warning("System has swapped during benchmarking. " "Benchmark results are unreliable!") pqos.reset_resources() output_handler.output_after_benchmark(STOPPED_BY_INTERRUPT) return 0