示例#1
0
    def run(plan, progress_file=None):
        """Runs experiments.

        In newest versions of this class the **plan** array must contain experiments
        with computed variables.

        :param list plan: List of experiments to perform.
        """
        # Count number of active experiments in the plan
        num_active_experiments = 0
        for experiment in plan:
            if 'exp.status' in experiment and experiment[
                    'exp.status'] != 'disabled':
                num_active_experiments += 1

        num_experiments = len(plan)
        start_time = datetime.datetime.now()
        stats = {
            "launcher.total_experiments": num_experiments,
            "launcher.active_experiments": num_active_experiments,
            "launcher.failed_experiments": 0,
            "launcher.skipped_experiments": 0,
            "launcher.disabled_experiments": 0,
            "launcher.start_time": str(start_time)
        }
        # See if resource monitor needs to be run. Now, the assumption is that
        # if it's enabled for a first experiments ,it's enabled for all others.
        resource_monitor = None
        if num_experiments > 0 and 'monitor.frequency' in plan[
                0] and plan[0]['monitor.frequency'] > 0:
            if not os.path.isdir(plan[0]['monitor.pid_folder']):
                os.makedirs(plan[0]['monitor.pid_folder'])
            resource_monitor = ResourceMonitor(plan[0]['monitor.launcher'],
                                               plan[0]['monitor.pid_folder'],
                                               plan[0]['monitor.frequency'],
                                               plan[0]['monitor.timeseries'])
            # The file must be created beforehand - this is required for docker to
            # to keep correct access rights.
            resource_monitor.empty_pid_file()
            resource_monitor.run()
        # It's used for reporting progress to a user
        progress_reporter = ProgressReporter(num_experiments,
                                             num_active_experiments,
                                             progress_file)
        # Setting handler for SIGUSR1 signal. Users can send this signal to this
        # script to gracefully terminate benchmarking process.
        print("--------------------------------------------------------------")
        print("Experimenter pid %d. Run this to gracefully terminate me:" %
              os.getpid())
        print("\tkill -USR1 %d" % os.getpid())
        print("I will terminate myself as soon as current benchmark finishes.")
        print("--------------------------------------------------------------")
        sys.stdout.flush()
        Launcher.must_exit = False

        def _sigusr1_handler(signum, frame):
            Launcher.must_exit = True

        signal.signal(signal.SIGUSR1, _sigusr1_handler)
        num_completed_experiments = 0
        for idx in range(num_experiments):
            if Launcher.must_exit:
                logging.warn(
                    "The SIGUSR1 signal has been caught, gracefully shutting down benchmarking process on experiment %d (out of %d)",
                    idx, num_experiments)
                break
            experiment = plan[idx]
            # Is experiment disabled?
            if 'exp.status' in experiment and experiment[
                    'exp.status'] == 'disabled':
                logging.info("Disabling experiment, exp.disabled is true")
                stats['launcher.disabled_experiments'] += 1
                progress_reporter.report(experiment['exp.log_file'],
                                         'disabled',
                                         counts=False)
                continue
            # If experiments have been ran, check if we need to re-run.
            if 'exp.log_file' in experiment and experiment['exp.log_file']:
                if isfile(experiment['exp.log_file']
                          ) and not Launcher.force_redo(experiment):
                    logging.info("Skipping experiment, file (%s) exists",
                                 experiment['exp.log_file'])
                    stats['launcher.skipped_experiments'] += 1
                    progress_reporter.report(experiment['exp.log_file'],
                                             'skipped',
                                             counts=True)
                    continue
            # Track current progress
            progress_reporter.report_active(experiment['exp.log_file'])
            # Get script that runs experiment for this framework. If no 'framework_family' is
            # found, we can try to use exp.framework.
            framework_key = 'exp.framework_family'
            if framework_key not in experiment:
                framework_key = 'exp.framework'
            command = [experiment['%s.launcher' % (experiment[framework_key])]]
            # Do we need to manipulate arguments for launching process?
            launcher_args_key = '%s.launcher_args' % experiment[framework_key]
            if launcher_args_key in experiment:
                launcher_args = set(experiment[launcher_args_key].split(' '))
                logging.debug(
                    'Only these arguments will be passed to laucnhing process (%s): %s',
                    command[0], str(launcher_args))
            else:
                launcher_args = None
            for param, param_val in experiment.items():
                if launcher_args is not None and param not in launcher_args:
                    continue
                assert not isinstance(param_val, list),\
                       "Here, this must not be the list but (%s=%s)" % (param, str(param_val))
                if not isinstance(param_val, bool):
                    command.extend(
                        ['--%s' % (param.replace('.', '_')),
                         str(param_val)])
                else:
                    command.extend([
                        '--%s' % (param.replace('.', '_')),
                        ('true' if param_val else 'false')
                    ])
            # Prepare environmental variables
            env_vars = copy.deepcopy(os.environ)
            env_vars.update(
                DictUtils.filter_by_key_prefix(experiment,
                                               'runtime.env.',
                                               remove_prefix=True))
            # Run experiment in background and wait for complete
            worker = Worker(command, env_vars, experiment)
            worker.work(resource_monitor)
            if worker.ret_code != 0:
                stats['launcher.failed_experiments'] += 1
            num_completed_experiments += 1
            # Print progress
            if num_completed_experiments % 10 == 0:
                print("Done %d benchmarks out of %d" %
                      (num_completed_experiments, num_active_experiments))
            progress_reporter.report_active_completed()

        end_time = datetime.datetime.now()
        stats['launcher.end_time'] = str(end_time)
        stats['launcher.hours'] = (end_time -
                                   start_time).total_seconds() / 3600

        if resource_monitor is not None:
            resource_monitor.stop()

        for key, val in stats.items():
            print('__%s__=%s' % (key, json.dumps(val)))
        progress_reporter.report_all_completed()
示例#2
0
    def run(plan, progress_file=None):
        """Runs experiments in `plan` one at a time.

        In newest versions of this class the `plan` array must contain experiments with computed variables.

        Args:
            plan (list): List of benchmarks to perform (list of dictionaries).
            progress_file (str): A file for a progress reporter. If None, no progress will be reported.
        """
        num_experiments = len(plan)
        # See if resource monitor needs to be run. Now, the assumption is that
        # if it's enabled for a first experiments ,it's enabled for all others.
        resource_monitor = None
        if num_experiments > 0 and DictUtils.get(plan[0], 'monitor.frequency',
                                                 0) > 0:
            if not os.path.isdir(plan[0]['monitor.pid_folder']):
                os.makedirs(plan[0]['monitor.pid_folder'])
            resource_monitor = ResourceMonitor(plan[0]['monitor.launcher'],
                                               plan[0]['monitor.pid_folder'],
                                               plan[0]['monitor.frequency'],
                                               plan[0]['monitor.timeseries'])
            # The file must be created beforehand - this is required for docker to
            # to keep correct access rights.
            resource_monitor.empty_pid_file()
            resource_monitor.run()
        # It's used for reporting progress to a user
        num_active_experiments = 0
        for experiment in plan:
            if DictUtils.get(experiment, 'exp.status',
                             '') not in ['disabled', 'inactive']:
                num_active_experiments += 1
        progress_tracker = ProgressTracker(num_experiments,
                                           num_active_experiments,
                                           progress_file)
        # Setting handler for SIGUSR1 signal. Users can send this signal to this
        # script to gracefully terminate benchmarking process.
        print("--------------------------------------------------------------")
        print("Experimenter pid %d. Run this to gracefully terminate me:" %
              os.getpid())
        print("\tkill -USR1 %d" % os.getpid())
        print("I will terminate myself as soon as current benchmark finishes.")
        print("--------------------------------------------------------------")
        sys.stdout.flush()
        Launcher.must_exit = False

        def _sigusr1_handler(signum, frame):
            Launcher.must_exit = True

        signal.signal(signal.SIGUSR1, _sigusr1_handler)

        for idx in range(num_experiments):
            if Launcher.must_exit:
                logging.warn(
                    "The SIGUSR1 signal has been caught, gracefully shutting down benchmarking "
                    "process on experiment %d (out of %d)", idx,
                    num_experiments)
                break
            experiment = plan[idx]
            # Is experiment disabled?
            if DictUtils.get(experiment, 'exp.status',
                             '') in ('disabled', 'inactive'):
                logging.info(
                    "Will not run benchmark, reason: exp.status='%s'" %
                    experiment['exp.status'])
                progress_tracker.report(experiment['exp.log_file'],
                                        exec_status='inactive')
                continue
            # If experiments have been ran, check if we need to re-run.
            if DictUtils.get(experiment, 'exp.log_file', None) is not None:
                if isfile(experiment['exp.log_file']):
                    bench_status = None
                    no_rerun_msg = None
                    rerun_condition = DictUtils.get(experiment, 'exp.rerun',
                                                    'never')
                    if rerun_condition == 'never':
                        no_rerun_msg = "Will not run benchmark, reason: log file exists, exp.rerun='never'"
                    elif rerun_condition == 'onfail':
                        bench_status = BenchData.status(
                            experiment['exp.log_file'])
                        if bench_status == 'ok':
                            no_rerun_msg = "Will not run benchmark, reason: log file exists, exp.status='ok', "\
                                           "exp.rerun='onfail'"
                    if no_rerun_msg is not None:
                        logging.info(no_rerun_msg)
                        progress_tracker.report(experiment['exp.log_file'],
                                                exec_status='skipped',
                                                bench_status=bench_status)
                        continue
            # Track current progress
            progress_tracker.report_active(
                DictUtils.get(experiment, 'exp.log_file', '<none>'))
            # Get script that runs experiment for this framework. If no 'framework_family' is
            # found, we can try to use exp.framework.
            framework_key = 'exp.framework_family'
            if framework_key not in experiment:
                framework_key = 'exp.framework'
            command = [experiment['%s.launcher' % (experiment[framework_key])]]
            # Do we need to manipulate arguments for launching process?
            launcher_args_key = '%s.launcher_args' % experiment[framework_key]
            if launcher_args_key in experiment:
                launcher_args = set(experiment[launcher_args_key].split(' '))
                logging.debug(
                    'Only these arguments will be passed to launching process (%s): %s',
                    command[0], str(launcher_args))
            else:
                launcher_args = None
            for param, param_val in experiment.items():
                if launcher_args is not None and param not in launcher_args:
                    continue
                if isinstance(param_val, list):
                    raise ValueError(
                        "Here, this must not be the list but (%s=%s)" %
                        (param, str(param_val)))
                if not isinstance(param_val, bool):
                    command.extend([
                        '--%s' % (param.replace('.', '_')),
                        ParamUtils.to_string(param_val)
                    ])
                else:
                    command.extend([
                        '--%s' % (param.replace('.', '_')),
                        ('true' if param_val else 'false')
                    ])
            # Prepare environmental variables
            env_vars = copy.deepcopy(os.environ)
            env_vars.update(
                DictUtils.filter_by_key_prefix(experiment,
                                               'runtime.env.',
                                               remove_prefix=True))
            # Run experiment in background and wait for complete
            worker = Worker(command, env_vars, experiment)
            worker.work(resource_monitor)
            # Print progress
            progress_tracker.report(experiment['exp.log_file'],
                                    exec_status='completed')
            if progress_tracker.num_completed_benchmarks() % 10 == 0:
                print("Done %d benchmarks out of %d" %
                      (progress_tracker.num_completed_benchmarks(),
                       num_active_experiments))
                sys.stdout.flush()
        # All benchmarks have been conducted.
        if resource_monitor is not None:
            resource_monitor.stop()
        progress_tracker.report_all_completed()
        progress_tracker.print_summary()