Пример #1
0
 def build_cache(self, inputs=None, output=None, output_cols=None):
     self.input_cols = [None] * len(inputs)
     for idx, param in enumerate(inputs):
         self.input_cols[idx] = {"index": idx, "param": param, "width": 0,
                                 "title": DictUtils.get(BenchData.Reporter.TITLES, param, param),
                                 "vals": sorted(self.bench_data.select_values(param))}
     self.output_param = output
     output_cols = output_cols if output_cols else sorted(self.bench_data.select_values(output))
     self.output_cols = [None] * len(output_cols)
     for idx, param_value in enumerate(output_cols):
         self.output_cols[idx] = {"index": idx, "value": param_value, "title": param_value,
                                  "width": len(BenchData.Reporter.to_string(param_value))}
     self.cache = {}
     for bench in self.bench_data.benchmarks():
         if BenchData.status(bench) != "ok":
             continue
         bench_key = []
         for input_col in self.input_cols:
             param_value = DictUtils.get(bench, input_col['param'], None)
             if not param_value:
                 bench_key = []
                 break
             bench_key.append(str(param_value))
         if bench_key:
             output_val = DictUtils.get(bench, self.output_param, None)
             if output_val:
                 bench_key = '.'.join(bench_key + [str(output_val)])
                 if bench_key not in self.cache:
                     self.cache[bench_key] = bench
                 else:
                     raise ValueError("Duplicate benchmark with key = {}".format(bench_key))
Пример #2
0
    def summary(self, params=None):
        """Return summary of benchmarks providing additional info on `params`.

        Args:
            params (list): List of parameters to provide additional info for. If empty, default list is used.

        Returns:
            dict: A summary of benchmarks.
        """
        if not params:
            params = ['exp.node_id', 'exp.node_title', 'exp.gpu_title', 'exp.gpu_id', 'exp.framework_title',
                      'exp.framework_id']
        summary_dict = {
            'num_benchmarks': len(self.__benchmarks),
            'num_failed_benchmarks': 0,
            'num_successful_benchmarks': 0
        }
        for param in params:
            summary_dict[param] = set()

        for bench in self.__benchmarks:
            if DictUtils.get(bench, 'results.time', -1) > 0:
                summary_dict['num_successful_benchmarks'] += 1
            else:
                summary_dict['num_failed_benchmarks'] += 1
            for param in params:
                summary_dict[param].add(DictUtils.get(bench, param, None))

        for param in params:
            summary_dict[param] = list(summary_dict[param])
        return summary_dict
Пример #3
0
    def check_variable_value(self, experiment, var):
        """ Check if variable contains correct value according to parameter info.

        Args:
            experiment (dict): A dictionary with experiment parameters.
            var (str): Name of a parameter.
        """
        if self.param_info is None or var not in self.param_info:
            return
        pi = self.param_info[var]
        ParamUtils.check_value(
            var,  # Parameter name
            experiment[var],  # Parameter value
            DictUtils.get(pi, 'val_domain', None),  # Value domain constraints.
            DictUtils.get(pi, 'val_regexp', None))  # Value regexp constraints.
Пример #4
0
 def get_header(self):
     header = ""
     for input_col in self.input_cols:
         format_str = "  %-" + str(input_col['width']) + "s"
         header = header + format_str % BenchData.Reporter.to_string(input_col['title'])
     header += "    "
     output_cols_title = " " * len(header) + DictUtils.get(BenchData.Reporter.TITLES,
                                                           self.output_param, self.output_param)
     for output_col in self.output_cols:
         format_str = "%+" + str(output_col['width']) + "s  "
         header = header + format_str % BenchData.Reporter.to_string(output_col['title'])
     return [output_cols_title, header]
Пример #5
0
    def init(self, **kwargs):
        """Initializes experimenter.

        Args:
            **kwargs (dict): Optional initialization parameters:
                - action (str): Action to perform.
                - config (str): A user-provided configuration file.
                - plan (str): A file for generated benchmark plan.
                - no_validation (bool): If true, do not perform validation
                - progress_file (str): A path to progress file (if not None, enables progress reporting).
                - params (dict): User defined parameters.
                - vars (dict): User defined variables.
                - discard_default_config (bool): If True, do not load standard DLBS config.
                - extensions (dict): User provided extensions.

        User provided parameters (`params`), variables (`vars`) and extensions (`extensions`) overwrite values defined
        in user configuration files (`config`) if it is present.
        Information defined in a uses-provided configuration file (`config`) overwrites standard DLBS configuration.
        """
        if self.__initialized:
            raise RuntimeError("Experimenter can only be initialized once.")

        self.action = DictUtils.get(kwargs, 'action', 'run')
        self.config_file = DictUtils.get(kwargs, 'config', None)
        self.plan_file = DictUtils.get(kwargs, 'plan', None)
        self.validation = not DictUtils.get(kwargs, 'no_validation', False)
        self.__progress_file = DictUtils.get(kwargs, 'progress_file', None)
        # Get parameters and variables from a command line/user-provided
        self.params.update(DictUtils.get(kwargs, 'params', {}))
        self.variables.update(DictUtils.get(kwargs, 'vars', {}))

        # Load default configuration
        if not DictUtils.get(kwargs, 'discard_default_config', False):
            logging.debug("Loading default configuration")
            _, self.config, self.param_info = ConfigurationLoader.load(
                os.path.join(os.path.dirname(__file__), 'configs'))
        # Load configurations specified on a command line
        self.load_configuration()
        # Add extensions from command line
        DictUtils.ensure_exists(self.config, 'extensions', [])
        self.config['extensions'].extend(
            DictUtils.get(kwargs, 'extensions', []))
        # All's done
        self.__initialized = True
Пример #6
0
    def select_values(self, key):
        """Return unique values for the `key` across all benchmarks.

        A missing key in a benchmark is considered to be a key having None value.

        Args:
            key (str): A key to return unique values for.

        Returns:
            list: sorted list of values.
        """
        selected = set()
        for benchmark in self.__benchmarks:
            selected.add(DictUtils.get(benchmark, key, None))
        return sorted(list(selected))
Пример #7
0
    def status(arg):
        """ Return status of the benchmark stored in a log file `log_file`.

        Args:
            arg: A name of a log file, a dictionary or an instance of the BenchData class.

        Returns:
            str or None: "ok" for successful benchmark, "failure" for not and None for other cases (such as no file).
        """
        if isinstance(arg, Six.string_types):
            bench_data = BenchData.parse(arg)
        elif isinstance(arg, dict):
            bench_data = BenchData([arg], create_copy=False)
        elif isinstance(arg, BenchData):
            bench_data = arg
        else:
            raise TypeError("Invalid argument type (={}). Expecting string, BenchData".format(type(arg)))
        if len(bench_data) == 1:
            return 'ok' if DictUtils.get(bench_data[0], 'results.time', -1) > 0 else 'failure'
        return None
Пример #8
0
 def build_cache(self, inputs=None, output=None, output_cols=None):
     self.input_cols = [None] * len(inputs)
     for idx, param in enumerate(inputs):
         self.input_cols[idx] = {
             "index": idx,
             "param": param,
             "width": 0,
             "title": DictUtils.get(BenchData.Reporter.TITLES, param,
                                    param),
             "vals": sorted(self.bench_data.select_values(param))
         }
     self.output_param = output
     output_cols = output_cols if output_cols else sorted(
         self.bench_data.select_values(output))
     self.output_cols = [None] * len(output_cols)
     for idx, param_value in enumerate(output_cols):
         self.output_cols[idx] = {
             "index": idx,
             "value": param_value,
             "title": param_value,
             "width": len(BenchData.Reporter.to_string(param_value))
         }
     self.cache = {}
     print("Number of benchmarks = {}".format(
         len(self.bench_data.benchmarks())),
           file=sys.stderr)
     for bench in self.bench_data.benchmarks():
         if BenchData.status(bench) != "ok":
             print(
                 "Ignoring failed benchmark: exp.id={}, exp.status={}, results.time={}, exp.model={}, "
                 "exp.replica_batch={}, exp.dtype={}, exp.num_gpus={}.".
                 format(bench.get('exp.id', 'UNKNOWN'),
                        bench.get('exp.status', 'UNKNOWN'),
                        bench.get('results.time', -1),
                        bench.get('exp.model', 'UNKNOWN'),
                        bench.get('exp.replica_batch', 'UNKNOWN'),
                        bench.get('exp.dtype', 'UNKNOWN'),
                        bench.get('exp.num_gpus', -1)),
                 file=sys.stderr)
             continue
         # The 'bench_key' is the composite benchmark ID that includes values of input and output variable, for
         # instance ['VGG16', 128, 2] may mean [ModelTitle, ReplicaBatch, NumGPUs].
         bench_key = []
         # Build initial version of the key taking into account input parameters.
         for input_col in self.input_cols:
             # The param_value is the value of an output parameter, for instance, number of GPUs
             param_value = DictUtils.get(bench, input_col['param'],
                                         None)
             if not param_value:
                 bench_key = []
                 break
             bench_key.append(str(param_value))
         if bench_key:
             output_val = DictUtils.get(bench, self.output_param, None)
             if output_val:
                 bench_key = '.'.join(bench_key + [str(output_val)])
                 if bench_key not in self.cache:
                     self.cache[bench_key] = bench
                 else:
                     raise ValueError(
                         "Duplicate benchmark with key = {}".format(
                             bench_key))
             else:
                 pass
Пример #9
0
    def run(plan, progress_file=None):
        """Runs experiments in `plan` one at a time.

        In newest versions of this class the `plan` array must contain experiments with computed variables.

        Args:
            plan (list): List of benchmarks to perform (list of dictionaries).
            progress_file (str): A file for a progress reporter. If None, no progress will be reported.
        """
        num_experiments = len(plan)
        # See if resource monitor needs to be run. Now, the assumption is that
        # if it's enabled for a first experiments ,it's enabled for all others.
        resource_monitor = None
        if num_experiments > 0 and DictUtils.get(plan[0], 'monitor.frequency',
                                                 0) > 0:
            if not os.path.isdir(plan[0]['monitor.pid_folder']):
                os.makedirs(plan[0]['monitor.pid_folder'])
            resource_monitor = ResourceMonitor(plan[0]['monitor.launcher'],
                                               plan[0]['monitor.pid_folder'],
                                               plan[0]['monitor.frequency'],
                                               plan[0]['monitor.timeseries'])
            # The file must be created beforehand - this is required for docker to
            # to keep correct access rights.
            resource_monitor.empty_pid_file()
            resource_monitor.run()
        # It's used for reporting progress to a user
        num_active_experiments = 0
        for experiment in plan:
            if DictUtils.get(experiment, 'exp.status',
                             '') not in ['disabled', 'inactive']:
                num_active_experiments += 1
        progress_tracker = ProgressTracker(num_experiments,
                                           num_active_experiments,
                                           progress_file)
        # Setting handler for SIGUSR1 signal. Users can send this signal to this
        # script to gracefully terminate benchmarking process.
        print("--------------------------------------------------------------")
        print("Experimenter pid %d. Run this to gracefully terminate me:" %
              os.getpid())
        print("\tkill -USR1 %d" % os.getpid())
        print("I will terminate myself as soon as current benchmark finishes.")
        print("--------------------------------------------------------------")
        sys.stdout.flush()
        Launcher.must_exit = False

        def _sigusr1_handler(signum, frame):
            Launcher.must_exit = True

        signal.signal(signal.SIGUSR1, _sigusr1_handler)

        for idx in range(num_experiments):
            if Launcher.must_exit:
                logging.warn(
                    "The SIGUSR1 signal has been caught, gracefully shutting down benchmarking "
                    "process on experiment %d (out of %d)", idx,
                    num_experiments)
                break
            experiment = plan[idx]
            # Is experiment disabled?
            if DictUtils.get(experiment, 'exp.status',
                             '') in ('disabled', 'inactive'):
                logging.info(
                    "Will not run benchmark, reason: exp.status='%s'" %
                    experiment['exp.status'])
                progress_tracker.report(experiment['exp.log_file'],
                                        exec_status='inactive')
                continue
            # If experiments have been ran, check if we need to re-run.
            if DictUtils.get(experiment, 'exp.log_file', None) is not None:
                if isfile(experiment['exp.log_file']):
                    bench_status = None
                    no_rerun_msg = None
                    rerun_condition = DictUtils.get(experiment, 'exp.rerun',
                                                    'never')
                    if rerun_condition == 'never':
                        no_rerun_msg = "Will not run benchmark, reason: log file exists, exp.rerun='never'"
                    elif rerun_condition == 'onfail':
                        bench_status = BenchData.status(
                            experiment['exp.log_file'])
                        if bench_status == 'ok':
                            no_rerun_msg = "Will not run benchmark, reason: log file exists, exp.status='ok', "\
                                           "exp.rerun='onfail'"
                    if no_rerun_msg is not None:
                        logging.info(no_rerun_msg)
                        progress_tracker.report(experiment['exp.log_file'],
                                                exec_status='skipped',
                                                bench_status=bench_status)
                        continue
            # Track current progress
            progress_tracker.report_active(
                DictUtils.get(experiment, 'exp.log_file', '<none>'))
            # Get script that runs experiment for this framework. If no 'framework_family' is
            # found, we can try to use exp.framework.
            framework_key = 'exp.framework_family'
            if framework_key not in experiment:
                framework_key = 'exp.framework'
            command = [experiment['%s.launcher' % (experiment[framework_key])]]
            # Do we need to manipulate arguments for launching process?
            launcher_args_key = '%s.launcher_args' % experiment[framework_key]
            if launcher_args_key in experiment:
                launcher_args = set(experiment[launcher_args_key].split(' '))
                logging.debug(
                    'Only these arguments will be passed to launching process (%s): %s',
                    command[0], str(launcher_args))
            else:
                launcher_args = None
            for param, param_val in experiment.items():
                if launcher_args is not None and param not in launcher_args:
                    continue
                if isinstance(param_val, list):
                    raise ValueError(
                        "Here, this must not be the list but (%s=%s)" %
                        (param, str(param_val)))
                if not isinstance(param_val, bool):
                    command.extend([
                        '--%s' % (param.replace('.', '_')),
                        ParamUtils.to_string(param_val)
                    ])
                else:
                    command.extend([
                        '--%s' % (param.replace('.', '_')),
                        ('true' if param_val else 'false')
                    ])
            # Prepare environmental variables
            env_vars = copy.deepcopy(os.environ)
            env_vars.update(
                DictUtils.filter_by_key_prefix(experiment,
                                               'runtime.env.',
                                               remove_prefix=True))
            # Run experiment in background and wait for complete
            worker = Worker(command, env_vars, experiment)
            worker.work(resource_monitor)
            # Print progress
            progress_tracker.report(experiment['exp.log_file'],
                                    exec_status='completed')
            if progress_tracker.num_completed_benchmarks() % 10 == 0:
                print("Done %d benchmarks out of %d" %
                      (progress_tracker.num_completed_benchmarks(),
                       num_active_experiments))
                sys.stdout.flush()
        # All benchmarks have been conducted.
        if resource_monitor is not None:
            resource_monitor.stop()
        progress_tracker.report_all_completed()
        progress_tracker.print_summary()