def _get_minddata_pipeline_info(self):
        """Get the number of thread cores in minddata pipeline operator"""
        file_name = self._minddata_pipeline_display_filename.format(
            self._device_id)
        file_path = os.path.join(self._profiling_dir, file_name)
        file_path = validate_and_normalize_path(
            file_path, raise_key="Invalid minddata_pipeline_info file path.")
        if not os.path.exists(file_path):
            log.error('Did not find the minddata_pipeline file: %s', file_path)
            raise ProfilerFileNotFoundException(
                msg='Did not find the minddata_pipeline file:{}'.format(
                    file_path))

        with open(file_path, 'r', encoding='utf-8') as file:
            try:
                minddata_pipeline_info = json.load(file)
            except json.JSONDecodeError as err:
                log.exception(err)
                raise ProfilerRawFileException(
                    "Fail to parse minddata pipeline file")

        minddata_pipeline_op_info = []
        for item in minddata_pipeline_info.get("op_info"):
            op_info_dict = dict()
            op_info_dict["op_id"] = item.get("op_id")
            op_info_dict["num_workers"] = item.get("num_workers")
            minddata_pipeline_op_info.append(op_info_dict)
        return minddata_pipeline_op_info
    def _load_point_info(self):
        """Load point info."""
        file_path = os.path.join(self._profiling_dir,
                                 'step_trace_point_info.json')
        file_path = validate_and_normalize_path(
            file_path, raise_key="Invalid step_trace_point_info file path.")

        # If step_trace_point_info_{self._device_id}.json file exist, load this file.
        file_path_new = os.path.join(
            self._profiling_dir,
            f'step_trace_point_info_{self._device_id}.json')
        file_path_new = validate_and_normalize_path(
            file_path_new,
            raise_key="Invalid step_trace_point_info file path.")
        if os.path.isfile(file_path_new):
            file_path = file_path_new

        if os.path.isfile(file_path):
            with open(file_path, 'r', encoding='utf-8') as file:
                try:
                    self._point_info = json.load(file)
                except (json.JSONDecodeError, TypeError) as err:
                    log.exception(err)
                    raise ProfilerRawFileException(
                        'Fail to parse point info file.')
Exemplo n.º 3
0
    def _get_host_device_rank_relation(self):
        """Get host_ip device_id rank_id relation."""
        rank_table_file_path = self._get_rank_table_file_path()
        if not os.path.exists(rank_table_file_path):
            log.error('Did not find rank table file under %s',
                      self._cluster_profiler_dir)
            raise ProfilerFileNotFoundException(
                msg='Did not find rank table file')
        with open(rank_table_file_path, 'r', encoding='utf-8') as file:
            try:
                relation_info = json.load(file)
            except json.JSONDecodeError as err:
                log.exception(err)
        host_device_rank_relation = list()
        servers_info = relation_info.get("server_list")
        for server_info in servers_info:
            server_id = server_info.get("server_id")
            devices_info = server_info.get("device")
            for device_info in devices_info:
                device_id = device_info.get("device_id")
                rank_id = device_info.get("rank_id")
                host_device_rank_relation.append(
                    [server_id, device_id, rank_id])

        host_ips_mapping_info = self._get_host_ips_mapping_info()
        for item in host_device_rank_relation:
            # host_ip_index:0,host_mapping_id_index:1
            target_info = [i for i in host_ips_mapping_info if item[0] == i[0]]
            # target_info is like:[[host_ip, host_mapping_ip]]
            item[0] = target_info[0][1]

        return host_device_rank_relation
Exemplo n.º 4
0
 def _load_point_info(self):
     """Load point info."""
     file_path = os.path.join(self._profiling_dir, 'step_trace_point_info.json')
     if os.path.isfile(file_path):
         with open(file_path, 'r', encoding='utf-8') as file:
             try:
                 self._point_info = json.load(file)
             except (json.JSONDecodeError, TypeError) as err:
                 log.exception(err)
                 raise ProfilerRawFileException('Fail to parse point info file.')
Exemplo n.º 5
0
 def parse_and_save(self):
     """Parse step trace files and save the result."""
     try:
         source_files = self._get_step_trace_files()
         self._parse(source_files)
         self._save()
     except IOError as err:
         log.exception(err)
         raise ProfilerIOException()
     else:
         log.info("Finish to save intermediate result for step trace file.")
Exemplo n.º 6
0
    def __init__(self, profiling_dir, device_id):
        self._profiling_dir = self._normalize_profiling_dir(profiling_dir)
        self._device_id = device_id
        self._data = []
        self._result = None
        self._display_col_names = None
        self._size = 0
        self._none_filter_condition_key = []

        try:
            self._load()
        except IOError as err:
            logger.exception(err)
            raise ProfilerIOException()
    def _load(self):
        """Load cpu_utilization info."""
        file_name = self._cpu_utilization_display_filename.format(
            self._device_id)
        file_path = os.path.join(self._profiling_dir, file_name)
        file_path = validate_and_normalize_path(
            file_path, raise_key="Invalid cpu_utilization_info file path.")
        if not os.path.exists(file_path):
            log.error('Did not find the cpu utilization file: %s', file_path)
            raise ProfilerFileNotFoundException(
                msg='Did not find the cpu utilization file.')

        with open(file_path, 'r', encoding='utf-8') as src_file:
            try:
                self._data = json.load(src_file)
            except json.JSONDecodeError as err:
                log.exception(err)
                raise ProfilerRawFileException(
                    "Fail to parse cpu_utilization info file")
Exemplo n.º 8
0
 def minddata_cpu_utilization_proposal(self):
     """Get the proposals of minddata cpu utilization"""
     filename = "minddata_cpu_utilization_{}.json".format(self.device_id)
     file_path = os.path.join(self.profiling_path, filename)
     # Forward compatibility, it is reasonable that the file does not exist.
     if not os.path.exists(file_path):
         return
     minddata_cpu_utilization = OrderedDict()
     minddata_cpu_utilization_analyser = AnalyserFactory.instance().get_analyser(
         'minddata_cpu_utilization', self.profiling_path, self.device_id)
     try:
         idle_utilization_avg = minddata_cpu_utilization_analyser.get_idle_utilization_avg()
         # The maximum value of this cpu_activate_utilization_avg is 100%.
         cpu_activate_utilization_avg = 100 - idle_utilization_avg
         cpu_activate_utilization_threshold = 80
         if cpu_activate_utilization_avg > cpu_activate_utilization_threshold:
             minddata_cpu_utilization["minddata_cpu_utilization"] = [cpu_activate_utilization_avg]
             self.__proposal_dict.update(minddata_cpu_utilization)
     except (ProfilerRawFileException, ProfilerFileNotFoundException) as err:
         log.exception(err)
Exemplo n.º 9
0
    def parse(self):
        """
        Parse the minddata pipeline files.

        Raises:
            ProfilerRawFileException: If fails to parse the raw file of
                minddata pipeline or the file is empty.
        """
        with open(self._pipeline_path, 'r') as file:
            try:
                pipeline_info = json.load(file)
            except (json.JSONDecodeError, TypeError) as err:
                logger.exception(err)
                raise ProfilerRawFileException(
                    'Fail to parse minddata pipeline file.')
        if not pipeline_info:
            logger.warning('The minddata pipeline file is empty.')
            raise ProfilerRawFileException(
                'The minddata pipeline file is empty.')

        self._parse_and_save(pipeline_info)
Exemplo n.º 10
0
    def _convert_field_type(self, row):
        """
        Convert the field type to the specific type.

        Args:
            row (list): One row data from parsed data.

        Returns:
            list, the converted data.
        """
        try:
            return [
                row[0], row[1], row[2], row[3],
                int(row[4]),
                self._format_float_data(float(row[5])),
                self._format_float_data(float(row[6])),
                self._format_float_data(float(row[7])), row[8]
            ]
        except IndexError as err:
            log.exception(err)
            raise ProfilerRawFileException(
                'failed to get HOST CPU operator detail data.')
Exemplo n.º 11
0
    def _convert_field_type(row):
        """
        Convert the field type to the specific type.

        Args:
            row (list): One row data from parsed data.

        Returns:
            list, the converted data.
        """
        try:
            return [
                row[0],
                int(row[1]),
                int(row[2]),
                float(row[3]),
                float(row[4]),
                float(row[5]) * 100
            ]
        except IndexError as err:
            log.exception(err)
            raise ProfilerRawFileException(
                'failed to get HOST CPU operator type data.')
Exemplo n.º 12
0
    def analyse(self):
        """
        Collect and analyse performance data, called after training or during training.

        Examples:
            >>> from mindinsight.profiler import Profiler
            >>> context.set_context(mode=context.GRAPH_MODE, device_target=“Ascend”,
            >>>                     device_id=int(os.environ["DEVICE_ID"]))
            >>> profiler = Profiler(subgraph='all', is_detail=True, is_show_op_path=False, output_path='./data')
            >>> model = Model(train_network)
            >>> dataset = get_dataset()
            >>> model.train(2, dataset)
            >>> profiler.analyse()
        """

        try:
            from mindspore.communication.management import release
            release()
        except ImportError:
            logger.error("Profiling: fail to import release from mindspore.")

        logger.info("begin profiler analyse")

        job_id = self._get_profiling_job_id()
        if not job_id:
            msg = ("Fail to get profiling job, please check whether job dir was generated under path %s" \
                   % PROFILING_LOG_BASE_PATH)
            raise RuntimeError(msg)

        logger.info("Profiling: job id is %s ", job_id)

        source_path = os.path.join(PROFILING_LOG_BASE_PATH, job_id)
        # parse hwts.log.data.45.dev file, and get task profiling data
        hwts_output_filename = self._hwts_output_filename_target + self._dev_id + ".txt"
        hwts_output_filename = os.path.join(self._output_path, hwts_output_filename)
        hwtslog_parser = HWTSLogParser(source_path, hwts_output_filename)
        result = hwtslog_parser.execute()
        if not result:
            logger.error("Profiling: fail to parse hwts log file.")
            return

        # parse Framework file, and get the relation of op and tasks
        framework_parser = FrameworkParser(job_id, self._dev_id, self._output_path)
        framework_parser.parse()
        op_task_dict = framework_parser.to_task_id_full_op_name_dict()
        if not op_task_dict:
            logger.error("Profiling: fail to parse framework files.")
            return

        # get op compute time from hwts data and framework data, write output_op_compute_time.txt
        opcompute_output_filename = self._opcompute_output_filename_target + self._dev_id + ".txt"
        opcompute_output_filename = os.path.join(self._output_path, opcompute_output_filename)
        optime_parser = OPComputeTimeParser(hwts_output_filename, opcompute_output_filename, op_task_dict)
        optime_parser.execute()

        # parse DATA_PREPROCESS.dev.AICPU file, write output_data_preprocess_aicpu_x.txt
        output_data_preprocess_aicpu = self._aicpu_op_output_filename_target + self._dev_id + ".txt"
        output_data_preprocess_aicpu = os.path.join(self._output_path, output_data_preprocess_aicpu)
        try:
            aicpu_data_parser = DataPreProcessParser(source_path, output_data_preprocess_aicpu)
            aicpu_data_parser.execute()
        except FileNotFoundError as err:
            logger.exception(err)

        # analyse op compute time info
        try:
            self._analyser_op_info()
        except MindInsightException as err:
            logger.error(err.message)