示例#1
0
def correlation(raw_data_df):
    """Get the correlations.

    Args:
        raw_data_df (DataFrame): raw data

    Returns:
        DataFrame: correlations
    """
    data_corr_df = pd.DataFrame()
    if not isinstance(raw_data_df, pd.DataFrame):
        logger.error('DataAnalyzer: the type of raw data is not pd.DataFrame')
        return data_corr_df
    if len(raw_data_df) == 0:
        logger.warning('DataAnalyzer: empty data.')
        return data_corr_df
    try:
        data_corr_df = raw_data_df.corr()
        statistics_error = []
        for column in list(raw_data_df.columns):
            if column not in list(data_corr_df.columns
                                  ) and not raw_data_df[column].isnull().all():
                statistics_error.append(column)
        if statistics_error:
            logger.warning(
                'DataAnalyzer: [{}] is missing in correlation results.'.format(
                    ','.join(str(x) for x in statistics_error)))
    except Exception as e:
        logger.error('DataAnalyzer: correlation failed, msg: {}'.format(
            str(e)))
    return data_corr_df
示例#2
0
    def _get_baseline_of_metric(self, baseline, metric):
        """Get the baseline value of the metric.

        Args:
            baseline (dict): baseline defined in baseline file
            metric (str): the full name of the metric

        Returns:
            numeric: the baseline value of the metric
        """
        if metric in baseline:
            return baseline[metric]
        elif 'return_code' in metric:
            return 0
        else:
            short = metric
            # exclude rank info, for example, '.*:\d+'->'.*'
            if ':' in metric:
                short = metric.strip(metric.split(':')[-1]).strip(':')
            else:
                short = metric.split('/')[0]
            if short in baseline:
                return baseline[short]
            # baseline not defined
            else:
                logger.warning('DataDiagnosis: get baseline - {} baseline not found'.format(metric))
                return -1
示例#3
0
    def _get_metrics(self, rule, benchmark_rules):
        """Get metrics in the rule.

        Parse metric regex in the rule, and store the (metric, -1) pair
        in _sb_rules[rule]['metrics']

        Args:
            rule (str): the name of the rule
            benchmark_rules (dict): the dict of rules
        """
        metrics_in_rule = benchmark_rules[rule]['metrics']
        benchmark_metrics_dict_in_rule = self._get_metrics_by_benchmarks(metrics_in_rule)
        for benchmark_name in benchmark_metrics_dict_in_rule:
            if benchmark_name not in self._benchmark_metrics_dict:
                logger.warning('RuleBase: get metrics failed - {}'.format(benchmark_name))
                continue
            # get rules and criteria for each metric
            for metric in self._benchmark_metrics_dict[benchmark_name]:
                # metric full name in baseline
                if metric in metrics_in_rule:
                    self._sb_rules[rule]['metrics'][metric] = -1
                    self._enable_metrics.add(metric)
                    continue
                # metric full name not in baseline, use regex to match
                for metric_regex in benchmark_metrics_dict_in_rule[benchmark_name]:
                    if re.search(metric_regex, metric):
                        self._sb_rules[rule]['metrics'][metric] = -1
                        self._enable_metrics.add(metric)
示例#4
0
    def exec(self):
        """Run the SuperBench benchmarks locally."""
        for benchmark_name in self._sb_benchmarks:
            if benchmark_name not in self._sb_enabled:
                continue
            benchmark_config = self._sb_benchmarks[benchmark_name]
            benchmark_results = list()
            self.__create_benchmark_dir(benchmark_name)
            cwd = os.getcwd()
            os.chdir(self.__get_benchmark_dir(benchmark_name))

            monitor = None
            if self.__get_rank_id(
            ) == 0 and self._sb_monitor_config and self._sb_monitor_config.enable:
                if self.__get_platform() == Platform.CUDA:
                    monitor = Monitor(
                        None, int(self._sb_monitor_config.sample_duration
                                  or 10),
                        int(self._sb_monitor_config.sample_interval or 1),
                        self.__get_monitor_path(benchmark_name))
                    monitor.start()
                else:
                    logger.warning(
                        'Monitor can not support ROCM/CPU platform.')

            benchmark_real_name = benchmark_name.split(':')[0]
            for framework in benchmark_config.frameworks or [
                    Framework.NONE.value
            ]:
                if benchmark_real_name == 'model-benchmarks' or (
                        ':' not in benchmark_name
                        and benchmark_name.endswith('_models')):
                    for model in benchmark_config.models:
                        full_name = f'{benchmark_name}/{framework}-{model}'
                        logger.info('Executor is going to execute %s.',
                                    full_name)
                        context = BenchmarkRegistry.create_benchmark_context(
                            model,
                            platform=self.__get_platform(),
                            framework=Framework(framework.lower()),
                            parameters=self.__get_arguments(
                                benchmark_config.parameters))
                        result = self.__exec_benchmark(full_name, context)
                        benchmark_results.append(result)
                else:
                    full_name = benchmark_name
                    logger.info('Executor is going to execute %s.', full_name)
                    context = BenchmarkRegistry.create_benchmark_context(
                        benchmark_real_name,
                        platform=self.__get_platform(),
                        framework=Framework(framework.lower()),
                        parameters=self.__get_arguments(
                            benchmark_config.parameters))
                    result = self.__exec_benchmark(full_name, context)
                    benchmark_results.append(result)

            if monitor:
                monitor.stop()
            self.__write_benchmark_results(benchmark_name, benchmark_results)
            os.chdir(cwd)
示例#5
0
    def output_diagnosis_in_jsonl(self, data_not_accept_df, output_path):
        """Output data_not_accept_df into jsonl file.

        Args:
            data_not_accept_df (DataFrame): the DataFrame to output
            output_path (str): the path of output jsonl file
        """
        p = Path(output_path)
        try:
            data_not_accept_json = data_not_accept_df.to_json(orient='index')
            data_not_accept = json.loads(data_not_accept_json)
            if not isinstance(data_not_accept_df, pd.DataFrame):
                logger.warning('DataDiagnosis: output json data - data_not_accept_df is not DataFrame.')
                return
            if data_not_accept_df.empty:
                logger.warning('DataDiagnosis: output json data - data_not_accept_df is empty.')
                return
            with p.open('w') as f:
                for node in data_not_accept:
                    line = data_not_accept[node]
                    line['Index'] = node
                    json_str = json.dumps(line)
                    f.write(json_str + '\n')
        except Exception as e:
            logger.error('DataDiagnosis: output json data failed, msg: {}'.format(str(e)))
    def _preprocess(self):
        """Preprocess/preparation operations before the benchmarking.

        Return:
            True if _preprocess() succeed.
        """
        if not super()._preprocess():
            return False

        if len(self._args.precision) == 0:
            self._precision_need_to_run = self._support_precisions
        else:
            self._args.precision = [p.lower() for p in self._args.precision]
            for p in self._args.precision:
                if p not in self._support_precisions:
                    logger.warning(
                        'Unsupported precision - benchmark: {}, precision: {}, expected: {}.'.format(
                            self._name, p, self._support_precisions
                        )
                    )
                else:
                    self._precision_need_to_run.append(p)

        if len(self._precision_need_to_run) == 0:
            self._result.set_return_code(ReturnCode.NO_SUPPORTED_PRECISION)
            return False

        return True
示例#7
0
def interquartile_range(raw_data_df):
    """Get outlier detection bounds using IQR method.

     The reference of IQR is https://en.wikipedia.org/wiki/Interquartile_range.
     Get the mild and extreme outlier upper and lower value and bound.
     values:
        Mild Outlier: A point beyond inner whiskers on either side
            lower whisker: Q1 - 1.5*IQR
            upper whisker : Q3 + 1.5*IQR
        Extreme Outlier: A point beyond outer whiskers on either side
            lower whisker : Q1 - 3*IQR
            upper whisker : Q3 + 3*IQR
     bounds:
        (values - mean) / mean

    Args:
        raw_data_df (DataFrame): raw data

    Returns:
        DataFrame: data statistics and IQR bound
    """
    if not isinstance(raw_data_df, pd.DataFrame):
        logger.error('DataAnalyzer: the type of raw data is not pd.DataFrame')
        return pd.DataFrame()
    if len(raw_data_df) == 0:
        logger.warning('DataAnalyzer: empty data.')
        return pd.DataFrame()
    try:
        data_statistics_df = statistic(raw_data_df)
        data_statistics_df.loc[
            'mild_outlier_upper'] = data_statistics_df.loc['75%'] + 1.5 * (
                data_statistics_df.loc['75%'] - data_statistics_df.loc['25%'])
        data_statistics_df.loc[
            'extreme_outlier_upper'] = data_statistics_df.loc['75%'] + 3 * (
                data_statistics_df.loc['75%'] - data_statistics_df.loc['25%'])
        data_statistics_df.loc[
            'mild_outlier_lower'] = data_statistics_df.loc['25%'] - 1.5 * (
                data_statistics_df.loc['75%'] - data_statistics_df.loc['25%'])
        data_statistics_df.loc[
            'extreme_outlier_lower'] = data_statistics_df.loc['25%'] - 3 * (
                data_statistics_df.loc['75%'] - data_statistics_df.loc['25%'])
        data_statistics_df.loc['mild_outlier_upper_bound'] = (
            data_statistics_df.loc['mild_outlier_upper'] -
            data_statistics_df.loc['mean']) / data_statistics_df.loc['mean']
        data_statistics_df.loc['extreme_outlier_upper_bound'] = (
            data_statistics_df.loc['extreme_outlier_upper'] -
            data_statistics_df.loc['mean']) / data_statistics_df.loc['mean']
        data_statistics_df.loc['mild_outlier_lower_bound'] = (
            data_statistics_df.loc['mild_outlier_lower'] -
            data_statistics_df.loc['mean']) / data_statistics_df.loc['mean']
        data_statistics_df.loc['extreme_outlier_lower_bound'] = (
            data_statistics_df.loc['extreme_outlier_lower'] -
            data_statistics_df.loc['mean']) / data_statistics_df.loc['mean']
    except Exception as e:
        logger.error(
            'DataAnalyzer: interquartile_range failed, msg: {}'.format(str(e)))
    return data_statistics_df
示例#8
0
def output_excel_raw_data(writer, raw_data_df, sheet_name):
    """Output raw data into 'sheet_name' excel page.

    Args:
        writer (xlsxwriter): xlsxwriter handle
        raw_data_df (DataFrame): the DataFrame to output
        sheet_name (str): sheet name of the excel
    """
    # Output the raw data
    if isinstance(raw_data_df, pd.DataFrame) and not raw_data_df.empty:
        raw_data_df.to_excel(writer, sheet_name, index=True)
    else:
        logger.warning('FileHandler: excel_data_output - {} data_df is empty.'.format(sheet_name))
示例#9
0
    def get_vendor(self):
        """Get GPU vendor.

        Returns:
            str: GPU vendor, nvidia or amd.
        """
        if Path('/dev/nvidiactl').is_char_device() and Path(
                '/dev/nvidia-uvm').is_char_device():
            if not list(Path('/dev').glob('nvidia[0-9]*')):
                logger.warning('Cannot find NVIDIA GPU device.')
            return 'nvidia'
        if Path('/dev/kfd').is_char_device() and Path('/dev/dri').is_dir():
            if not list(Path('/dev/dri').glob('card*')):
                logger.warning('Cannot find AMD GPU device.')
            return 'amd'
        return None
示例#10
0
def rotate_dir(target_dir):
    """Rotate directory if it is not empty.

    Args:
        target_dir (str): Target directory path.
    """
    try:
        if target_dir.is_dir() and any(target_dir.iterdir()):
            logger.warning('Directory %s is not empty.', str(target_dir))
            for i in itertools.count(start=1):
                backup_dir = target_dir.with_name(f'{target_dir.name}.bak{i}')
                if not backup_dir.is_dir():
                    target_dir.rename(backup_dir)
                    break
    except Exception:
        logger.exception('Failed to rotate directory %s.', str(target_dir))
        raise
示例#11
0
    def register_benchmark(cls, name, class_def, parameters='', platform=None):
        """Register new benchmark, key is the benchmark name.

        Args:
            name (str): internal name of benchmark.
            class_def (Benchmark): class object of benchmark.
            parameters (str): predefined parameters of benchmark.
            platform (Platform): Platform types like CUDA, ROCM.
        """
        if not name or not isinstance(name, str):
            logger.log_and_raise(
                TypeError,
                'Name of registered benchmark is not string - benchmark: {}, type: {}'.format(name, type(name))
            )

        if not issubclass(class_def, Benchmark):
            logger.log_and_raise(
                TypeError,
                'Registered class is not subclass of Benchmark - benchmark: {}, type: {}'.format(name, type(class_def))
            )

        if name not in cls.benchmarks:
            cls.benchmarks[name] = dict()

        if platform:
            if platform not in Platform:
                platform_list = list(map(str, Platform))
                logger.log_and_raise(
                    TypeError, 'Unknown platform - benchmark: {}, supportted platforms: {}, but got: {}'.format(
                        name, platform_list, platform
                    )
                )
            if platform in cls.benchmarks[name]:
                logger.warning('Duplicate registration - benchmark: {}, platform: {}'.format(name, platform))

            cls.benchmarks[name][platform] = (class_def, parameters)
        else:
            # If not specified the tag, means the benchmark works for all platforms.
            for p in Platform:
                if p in cls.benchmarks[name]:
                    logger.warning('Duplicate registration - benchmark: {}, platform: {}'.format(name, p))

                cls.benchmarks[name][p] = (class_def, parameters)

        cls.__parse_and_check_args(name, class_def, parameters)
示例#12
0
    def __select_benchmark(cls, name, platform):
        """Select benchmark by name and platform.

        Args:
            name (str): internal name of benchmark.
            platform (Platform): Platform type of benchmark.

        Return:
            benchmark_class (Benchmark): class object of benchmark.
            predefine_params (str): predefined parameters which is set when register the benchmark.
        """
        if name not in cls.benchmarks or platform not in cls.benchmarks[name]:
            logger.warning('Benchmark has no implementation, name: {}, platform: {}'.format(name, platform))
            return (None, None)

        (benchmark_class, predefine_params) = cls.benchmarks[name][platform]

        return (benchmark_class, predefine_params)
示例#13
0
    def run(self):
        """Run the SuperBench benchmarks distributedly."""
        self.check_env()
        for benchmark_name in self._sb_benchmarks:
            if benchmark_name not in self._sb_enabled_benchmarks:
                continue
            benchmark_config = self._sb_benchmarks[benchmark_name]
            for mode in benchmark_config.modes:
                if mode.name == 'local':
                    Parallel(n_jobs=mode.proc_num if mode.parallel else 1)(
                        delayed(self._run_proc)(benchmark_name, mode, {
                            'proc_rank': proc_rank
                        }) for proc_rank in range(mode.proc_num))
                elif mode.name == 'torch.distributed' or mode.name == 'mpi':
                    self._run_proc(benchmark_name, mode, {'proc_rank': 0})
                else:
                    logger.warning('Unknown mode %s.', mode.name)
            self.fetch_results()

        self.__create_results_summary()
示例#14
0
    def output_all_nodes_results(self, raw_data_df, data_not_accept_df):
        """Output diagnosis results of all nodes.

        Args:
            raw_data_df (DataFrame): raw data
            data_not_accept_df (DataFrame): defective nodes's detailed information

        Returns:
            DataFrame: all nodes' detailed information inluding ['Accept','#Issues','Category','Issue_Details']
        """
        append_columns = ['Accept', '#Issues', 'Category', 'Issue_Details']
        all_data_df = (raw_data_df).astype('float64')

        if data_not_accept_df.shape[0] == 0:
            all_data_df['Accept'] = [True for i in range(len(all_data_df))]
            all_data_df['#Issues'] = [0 for i in range(len(all_data_df))]
            all_data_df['Category'] = [None for i in range(len(all_data_df))]
            all_data_df['Issue_Details'] = [None for i in range(len(all_data_df))]

        elif data_not_accept_df.shape[0] > 0:
            data_not_accept_df['Accept'] = [False for i in range(len(data_not_accept_df))]
            data_not_accept_df['#Issues'] = data_not_accept_df['Defective Details'].map(lambda x: len(x.split(',')))
            data_not_accept_df = data_not_accept_df.rename(columns={'Defective Details': 'Issue_Details'})
            for index in range(len(append_columns)):
                if append_columns[index] not in data_not_accept_df:
                    logger.warning(
                        'DataDiagnosis: output_all_nodes_results - column {} not found in data_not_accept_df.'.format(
                            append_columns[index]
                        )
                    )
                    all_data_df[append_columns[index]] = None
                else:
                    all_data_df = all_data_df.merge(
                        data_not_accept_df[[append_columns[index]]], left_index=True, right_index=True, how='left'
                    )
            all_data_df['Accept'] = all_data_df['Accept'].replace(np.nan, True)
            all_data_df['#Issues'] = all_data_df['#Issues'].replace(np.nan, 0)

        all_data_df = all_data_df.replace(np.nan, '')

        return all_data_df
示例#15
0
    def run(self, ansible_config, sudo=False):  # pragma: no cover
        """Run Ansible runner.

        Args:
            ansible_config (dict): Ansible config dict.
            sudo (bool): Run as sudo or not. Defaults to False.

        Returns:
            int: Ansible return code.
        """
        if sudo:
            logger.info('Run as sudo ...')
            ansible_config['cmdline'] += ' --become'
        with tempfile.TemporaryDirectory(prefix='ansible') as tmpdir:
            r = ansible_runner.run(private_data_dir=tmpdir, **ansible_config)
            logger.debug(r.stats)
        if r.rc == 0:
            logger.info('Run succeed, return code {}.'.format(r.rc))
        else:
            logger.warning('Run failed, return code {}.'.format(r.rc))
        return r.rc
示例#16
0
    def _get_metrics_by_benchmarks(self, metrics_list):
        """Get mappings of benchmarks:metrics from metrics_list.

        Args:
            metrics_list (list): list of metrics

        Returns:
            dict: metrics organized by benchmarks
        """
        benchmarks_metrics = {}
        for metric in metrics_list:
            if '/' not in metric:
                logger.warning('RuleBase: get_metrics_by_benchmarks - {} does not have benchmark_name'.format(metric))
            else:
                benchmark = metric.split('/')[0]
                # support annotations in benchmark naming
                if ':' in benchmark:
                    benchmark = metric.split(':')[0]
                if benchmark not in benchmarks_metrics:
                    benchmarks_metrics[benchmark] = set()
                benchmarks_metrics[benchmark].add(metric)
        return benchmarks_metrics
示例#17
0
    def _benchmark(self):
        """Implementation for benchmarking.

        Return:
            True if run benchmark successfully.
        """
        precision_need_to_run = list()
        for precision in self._args.precision:
            # Check if the precision is supported or not.
            if precision not in self._supported_precision:
                logger.warning(
                    'Can not run with specified precision - model: {}, supprted precision: {}, specified precision: {}'.
                    format(self._name, ' '.join([p.value for p in self._supported_precision]), precision)
                )
            else:
                precision_need_to_run.append(precision)

        if len(precision_need_to_run) == 0:
            self._result.set_return_code(ReturnCode.NO_SUPPORTED_PRECISION)
            return False

        for precision in precision_need_to_run:
            for model_action in self._args.model_action:
                self._sub_benchmark_start_time = time.time()
                if model_action == ModelAction.TRAIN:
                    if not self.__train(precision):
                        return False
                elif model_action == ModelAction.INFERENCE:
                    if not self.__inference(precision):
                        return False
                else:
                    logger.warning(
                        'Model action has no implementation yet - model: {}, model_action: {}'.format(
                            self._name, model_action
                        )
                    )

        return True
示例#18
0
def creat_boxplot(raw_data_df, columns, output_dir):
    """Plot the boxplot for selected columns.

    Args:
        raw_data_df (DataFrame): raw data
        columns (list): selected metrics to plot the boxplot
        output_dir (str): the directory of output file
    """
    if not isinstance(raw_data_df, pd.DataFrame):
        logger.error('DataAnalyzer: the type of raw data is not pd.DataFrame')
        return
    if len(raw_data_df) == 0:
        logger.error('DataAnalyzer: empty data for boxplot.')
        return
    if not isinstance(columns, list):
        logger.error('DataAnalyzer: the type of columns should be list.')
        return
    try:
        data_columns = raw_data_df.columns
        for column in columns:
            if column not in data_columns or raw_data_df[
                    column].dtype is not np.dtype('float'):
                logger.warning(
                    'DataAnalyzer: invalid column {} for boxplot.'.format(
                        column))
                columns.remove(column)
        n = len(columns)
        for i in range(n):
            sns.set(style='whitegrid')
            plt.subplot(n, 1, i + 1)
            sns.boxplot(x=columns[i], data=raw_data_df, orient='h')
        plt.subplots_adjust(hspace=1)
        plt.savefig(output_dir + '/boxplot.png')
        plt.show()
    except Exception as e:
        logger.error('DataAnalyzer: creat_boxplot failed, msg: {}'.format(
            str(e)))
示例#19
0
def statistic(raw_data_df):
    """Get the statistics of the raw data.

    The statistics include count, mean, std, min, max, 1%, 5%, 25%, 50%, 75%, 95%, 99%.

    Args:
        raw_data_df (DataFrame): raw data

    Returns:
        DataFrame: data statistics
    """
    data_statistics_df = pd.DataFrame()
    if not isinstance(raw_data_df, pd.DataFrame):
        logger.error('DataAnalyzer: the type of raw data is not pd.DataFrame')
        return data_statistics_df
    if len(raw_data_df) == 0:
        logger.warning('DataAnalyzer: empty data.')
        return data_statistics_df
    try:
        data_statistics_df = raw_data_df.describe()
        data_statistics_df.loc['1%'] = raw_data_df.quantile(0.01)
        data_statistics_df.loc['5%'] = raw_data_df.quantile(0.05)
        data_statistics_df.loc['95%'] = raw_data_df.quantile(0.95)
        data_statistics_df.loc['99%'] = raw_data_df.quantile(0.99)
        statistics_error = []
        for column in list(raw_data_df.columns):
            if column not in list(data_statistics_df.columns
                                  ) and not raw_data_df[column].isnull().all():
                statistics_error.append(column)
        if statistics_error:
            logger.warning(
                'DataAnalyzer: [{}] is missing in statistics results.'.format(
                    ','.join(str(x) for x in statistics_error)))
    except Exception as e:
        logger.error('DataAnalyzer: statistic failed, msg: {}'.format(str(e)))
    return data_statistics_df
示例#20
0
                    if curr_step > self._args.num_warmup:
                        # Save the step time of every training/inference step, unit is millisecond.
                        duration.append((end - start) * 1000)
                    if self._is_finished(curr_step, end):
                        return duration


# Register CNN benchmarks.
# Reference: https://pytorch.org/vision/0.8/models.html
#            https://github.com/pytorch/vision/tree/v0.8.0/torchvision/models
MODELS = [
    'alexnet', 'densenet121', 'densenet169', 'densenet201', 'densenet161',
    'googlenet', 'inception_v3', 'mnasnet0_5', 'mnasnet0_75', 'mnasnet1_0',
    'mnasnet1_3', 'mobilenet_v2', 'resnet18', 'resnet34', 'resnet50',
    'resnet101', 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d',
    'wide_resnet50_2', 'wide_resnet101_2', 'shufflenet_v2_x0_5',
    'shufflenet_v2_x1_0', 'shufflenet_v2_x1_5', 'shufflenet_v2_x2_0',
    'squeezenet1_0', 'squeezenet1_1', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn',
    'vgg16', 'vgg16_bn', 'vgg19_bn', 'vgg19'
]

for model in MODELS:
    if hasattr(models, model):
        BenchmarkRegistry.register_benchmark('pytorch-' + model,
                                             PytorchCNN,
                                             parameters='--model_type ' +
                                             model)
    else:
        logger.warning(
            'model missing in torchvision.models - model: {}'.format(model))
示例#21
0
def output_excel_data_not_accept(writer, data_not_accept_df, rules):
    """Output data_not_accept_df into 'Not Accept' excel page.

    Args:
        writer (xlsxwriter): xlsxwriter handle
        data_not_accept_df (DataFrame): the DataFrame to output
        rules (dict): the rules of DataDiagnosis
    """
    # Get the xlsxwriter workbook objects and init the format
    workbook = writer.book
    color_format_red = workbook.add_format({'bg_color': '#FFC7CE', 'font_color': '#9C0006'})
    percent_format = workbook.add_format({'num_format': '0.00%'})

    # Output the not accept
    if isinstance(data_not_accept_df, pd.DataFrame):
        data_not_accept_df.to_excel(writer, 'Not Accept', index=True)
        if not data_not_accept_df.empty:
            row_start = 1
            row_end = max(row_start, len(data_not_accept_df))
            columns = list(data_not_accept_df.columns)
            worksheet = writer.sheets['Not Accept']

            for rule in rules:
                for metric in rules[rule]['metrics']:
                    # The column index of the metrics should start from 1
                    col_index = columns.index(metric) + 1
                    # Apply percent format for the columns whose rules are variance type.
                    if rules[rule]['function'] == 'variance':
                        worksheet.conditional_format(
                            row_start,
                            col_index,
                            row_end,
                            col_index,    # start_row, start_col, end_row, end_col
                            {
                                'type': 'no_blanks',
                                'format': percent_format
                            }
                        )
                    # Apply red format if the value violates the rule.
                    if rules[rule]['function'] == 'value' or rules[rule]['function'] == 'variance':
                        match = re.search(r'(>|<|<=|>=|==|!=)(.+)', rules[rule]['criteria'])
                        if not match:
                            continue
                        symbol = match.group(1)
                        condition = float(match.group(2))
                        worksheet.conditional_format(
                            row_start,
                            col_index,
                            row_end,
                            col_index,    # start_row, start_col, end_row, end_col
                            {
                                'type': 'cell',
                                'criteria': symbol,
                                'value': condition,
                                'format': color_format_red
                            }
                        )

        else:
            logger.warning('FileHandler: excel_data_output - data_not_accept_df is empty.')
    else:
        logger.warning('FileHandler: excel_data_output - data_not_accept_df is not DataFrame.')
示例#22
0
    def __get_mode_command(self, benchmark_name, mode, timeout=None):
        """Get runner command for given mode.

        Args:
            benchmark_name (str): Benchmark name.
            mode (DictConfig): Runner mode.
            timeout (int): The timeout value in seconds.

        Return:
            str: Runner command.
        """
        exec_command = (
            'sb exec --output-dir {output_dir} -c sb.config.yaml -C superbench.enable={name}'
        ).format(
            name=benchmark_name,
            output_dir=self._sb_output_dir,
        )
        if timeout is not None:
            exec_command = 'timeout {timeout} {command}'.format(
                timeout=timeout, command=exec_command)

        mode_command = exec_command
        if mode.name == 'local':
            mode_command = '{prefix} {command}'.format(
                prefix=mode.prefix.format(proc_rank=mode.proc_rank,
                                          proc_num=mode.proc_num),
                command=exec_command,
            )
            mode_command = f'PROC_RANK={mode.proc_rank} {mode_command.strip()}'
        elif mode.name == 'torch.distributed':
            # TODO: replace with torch.distributed.run in v1.9
            # TODO: only supports node_num=1 and node_num=all currently
            torch_dist_params = '' if mode.node_num == 1 else \
                '--nnodes=$NNODES --node_rank=$NODE_RANK --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT '
            mode_command = (
                f'python3 -m torch.distributed.launch'
                f' --use_env --no_python --nproc_per_node={mode.proc_num} {torch_dist_params}{exec_command}'
                f' superbench.benchmarks.{benchmark_name}.parameters.distributed_impl=ddp'
                f' superbench.benchmarks.{benchmark_name}.parameters.distributed_backend=nccl'
            )
        elif mode.name == 'mpi':
            mode_command = (
                'mpirun '  # use default OpenMPI in image
                '-tag-output '  # tag mpi output with [jobid,rank]<stdout/stderr> prefix
                '-allow-run-as-root '  # allow mpirun to run when executed by root user
                '{host_list} '  # use prepared hostfile and launch {proc_num} processes on each node
                '-bind-to numa '  # bind processes to numa
                '{mca_list} {env_list} {command}'
            ).format(
                host_list=f'-host localhost:{mode.proc_num}'
                if mode.node_num == 1 else
                f'-hostfile hostfile -map-by ppr:{mode.proc_num}:node',
                mca_list=' '.join(f'-mca {k} {v}'
                                  for k, v in mode.mca.items()),
                env_list=' '.join(
                    f'-x {k}={str(v).format(proc_rank=mode.proc_rank, proc_num=mode.proc_num)}'
                    if isinstance(v, str) else f'-x {k}'
                    for k, v in mode.env.items()),
                command=exec_command,
            )
        else:
            logger.warning('Unknown mode %s.', mode.name)
        return mode_command.strip()