示例#1
0
    def _parse_op_nodes(self, node_protos):
        """
        Parse `anf_ir_pb2.NodeProto` object, and create a normal node.

        Args:
            node_protos (list[anf_ir_pb2.NodeProto]): Refer to anf_ir_pb2.NodeProto.
        """
        logger.debug("Start to parse op nodes from proto.")
        for topological_index, node_proto in enumerate(node_protos):
            if not node_proto.name:
                logger.warning(
                    "Finding a node with an empty name will not save it.")
                continue

            if not node_proto.full_name or any(node_proto.full_name.lower(
            ).endswith(f'[:{plugin.value.lower()}]')
                                               for plugin in PluginNameEnum):
                node_name = Node.create_node_name(
                    scope=node_proto.scope,
                    base_name=f'{node_proto.op_type}{node_proto.name}')
            else:
                node_name = node_proto.full_name

            # The Graphviz plug-in that the UI USES can't handle these special characters.
            check_invalid_character(node_name)

            node = Node(name=node_name,
                        node_id=node_proto.name,
                        topological_index=topological_index)
            node.full_name = node_proto.full_name
            node.type = node_proto.op_type

            self._parse_attributes(node_proto.attribute, node)
            self._parse_inputs(node_proto.input, node)

            node.output_i = node_proto.output_i
            node.scope = node_proto.scope
            node.output_shape = self._get_shape_by_parse_type_proto(
                node_proto.output_type)
            node.output_nums = len(node.output_shape)
            node.output_data_type = self._get_data_type_by_parse_type_proto(
                node_proto.output_type, node)

            self._cache_node(node)
示例#2
0
    def get_train_job_by_plugin(self, train_id, plugin_name):
        """
        Get a train job by train job id.

        If the given train job does not has the given plugin data, the tag list will be empty.

        Args:
            train_id (str): Get train job info by the given id.
            plugin_name (str): Get tags by given plugin.

        Returns:
            TypedDict('TrainJobEntity', {'id': str, 'name': str, 'tags': List[str]}),
                a train job object.

        """
        self._check_status_valid()
        self._check_train_job_exist(train_id, self._loader_pool)

        loader = self._get_loader(train_id)
        if loader is None:
            logger.warning(
                "No valid summary log in train job %s, "
                "or it is not in the cache.", train_id)
            return None

        name = loader.name
        data_loader = loader.data_loader

        tags = []
        try:
            events_data = data_loader.get_events_data()
            tags = events_data.list_tags_by_plugin(plugin_name)
        except KeyError:
            logger.debug(
                "Plugin name %r does not exist "
                "in train job %r, and set tags to empty list.", plugin_name,
                name)
        except AttributeError:
            logger.debug(
                "Train job %r has been deleted or it has not loaded data, "
                "and set tags to empty list.", name)

        result = dict(id=train_id, name=name, tags=tags)
        return result
    def _parse_pb_file(summary_dir, filename):
        """
        Parse pb file and write content to `EventsData`.

        Args:
            filename (str): The file path of pb file.

        Returns:
            TensorEvent, if load pb file and build graph success, will return tensor event, else return None.
        """
        file_path = FileHandler.join(summary_dir, filename)
        logger.info("Start to load graph from pb file, file path: %s.",
                    file_path)
        filehandler = FileHandler(file_path)
        model_proto = anf_ir_pb2.ModelProto()
        try:
            model_proto.ParseFromString(filehandler.read())
        except ParseError:
            logger.warning(
                "The given file is not a valid pb file, file path: %s.",
                file_path)
            return None

        graph = MSGraph()

        try:
            graph.build_graph(model_proto.graph)
        except Exception as ex:
            # Normally, there are no exceptions, and it is only possible for users on the MindSpore side
            # to dump other non-default graphs.
            logger.error("Build graph failed, file path: %s.", file_path)
            logger.exception(ex)
            raise UnknownError(str(ex))

        tensor_event = TensorEvent(
            wall_time=FileHandler.file_stat(file_path).mtime,
            step=0,
            tag=filename,
            plugin_name=PluginNameEnum.GRAPH.value,
            value=graph,
            filename=filename)

        logger.info("Build graph success, file path: %s.", file_path)
        return tensor_event
示例#4
0
    def parse_files(self, executor, filenames, events_data):
        """
        Load summary file and parse file content.

        Args:
            executor (Executor): The executor instance.
            filenames (list[str]): File name list.
            events_data (EventsData): The container of event data.

        Returns:
            bool, True if all the summary files are finished loading.
        """
        self._events_data = events_data
        summary_files = self.filter_files(filenames)
        summary_files = self.sort_files(summary_files)
        if self._latest_filename in summary_files:
            index = summary_files.index(self._latest_filename)
            summary_files = summary_files[index:]

        for filename in summary_files:
            file_path = FileHandler.join(self._summary_dir, filename)

            if filename != self._latest_filename:
                self._summary_file_handler = FileHandler(file_path, 'rb')
                self._latest_filename = filename
                self._latest_file_size = 0

            new_size = FileHandler.file_stat(file_path).size
            if new_size == self._latest_file_size:
                continue

            try:
                if not self._load_single_file(self._summary_file_handler, executor):
                    self._latest_file_size = self._summary_file_handler.offset
                else:
                    self._latest_file_size = new_size
                # Wait for data in this file to be processed to avoid loading multiple files at the same time.
                logger.info("Parse summary file offset %d, file path: %s.", self._latest_file_size, file_path)
                return False
            except UnknownError as ex:
                logger.warning("Parse summary file failed, detail: %r,"
                               "file path: %s.", str(ex), file_path)
        return True
示例#5
0
    def _get_train_job_item(self, train_id):
        """
        Get train job item.

        Args:
            train_id (str): Specify train id.

        Returns:
            dict, a dict of train job item.
        """
        try:
            train_job = self._data_manager.get_train_job(train_id)
        except exceptions.TrainJobNotExistError:
            logger.warning('Train job %s not existed', train_id)
            return None

        basic_info = train_job.get_basic_info()
        train_job_item = dict(
            train_id=basic_info.train_id,
            relative_path=basic_info.train_id,
            create_time=basic_info.create_time.strftime('%Y-%m-%d %H:%M:%S'),
            update_time=basic_info.update_time.strftime('%Y-%m-%d %H:%M:%S'),
            profiler_dir=basic_info.profiler_dir,
            cache_status=train_job.cache_status.value,
            profiler_type=basic_info.profiler_type,
            summary_files=basic_info.summary_files,
            graph_files=basic_info.graph_files,
            lineage_files=basic_info.lineage_files
        )

        if train_job.cache_status == CacheStatus.CACHED:
            plugins = self.get_plugins(train_id)
        else:
            plugins = dict(plugins={
                'graph': [],
                'scalar': [],
                'image': [],
                'histogram': [],
            })

        train_job_item.update(plugins)
        return train_job_item
示例#6
0
    def _check_and_normalize_summary_path(self, summary_path):
        """
        Check and normalize summary path.

        Args:
            summary_path (str): A directory path, e.g. '/data/ImageNet/'.

        Returns:
            str, normalized summary path.

        """
        if summary_path is None:
            logger.warning(
                "Summary path is None. It will not init data loader generator."
            )
            raise ParamValueError("Summary path is None.")

        summary_path = os.path.realpath(summary_path)

        return summary_path
示例#7
0
    def get_train_job(self, train_id):
        """
        Get train job by train ID.

        This method overrides parent method.

        Args:
            train_id (str): Train ID for train job.
        Returns:
            dict, single train job, if can not find any data, will return None.
        """
        self._check_train_job_exist(train_id, self._loader_pool)

        loader = self._get_loader(train_id)
        if loader is None:
            logger.warning(
                "No valid summary log in train job %s, "
                "or it is not in the cache.", train_id)
            return None

        train_job = loader.to_dict()
        train_job.pop('data_loader')

        plugin_data = {}
        for plugin_name in PluginNameEnum.list_members():
            job = self.get_train_job_by_plugin(train_id,
                                               plugin_name=plugin_name)
            if job is None:
                plugin_data[plugin_name] = []
            else:
                plugin_data[plugin_name] = job['tags']

        train_job.update({DATAVISUAL_PLUGIN_KEY: plugin_data})

        # Will fill basic_info value in future.
        train_job_obj = CachedTrainJob(basic_info=None)
        train_job_obj.set(DATAVISUAL_CACHE_KEY, train_job)

        train_job_obj.cache_status = loader.cache_status

        return train_job_obj
示例#8
0
    def cache_train_jobs(self, train_ids):
        """
        Cache train jobs.

        Args:
            train_ids (list): Specify list of train_ids to be cached.

        Returns:
            dict, indicates train job ID and its current cache status.

        Raises:
            ParamTypeError, if the given train_ids parameter is not in valid type.
        """
        if not isinstance(train_ids, list):
            logger.error("train_ids must be list.")
            raise ParamTypeError('train_ids', list)

        cache_result = []
        for train_id in train_ids:
            if not isinstance(train_id, str):
                logger.error("train_id must be str.")
                raise ParamTypeError('train_id', str)

            try:
                train_job = self._data_manager.get_train_job(train_id)
            except exceptions.TrainJobNotExistError:
                logger.warning('Train job %s not existed', train_id)
                continue

            if train_job.cache_status == CacheStatus.NOT_IN_CACHE:
                self._data_manager.cache_train_job(train_id)
                # Update loader cache status to CACHING for consistency in response.
                train_job.cache_status = CacheStatus.CACHING

            cache_result.append(
                dict(
                    train_id=train_id,
                    cache_status=train_job.cache_status.value,
                ))

        return cache_result
示例#9
0
    def _is_valid_summary_directory(self, summary_base_dir, relative_path):
        """
        Check if the given summary directory is valid.

        Args:
            summary_base_dir (str): Path of summary base directory.
            relative_path (str): Relative path of summary directory, referring to summary base directory,
                                starting with "./" .

        Returns:
            bool, indicates if summary directory is valid.
        """
        summary_base_dir = os.path.realpath(summary_base_dir)
        summary_directory = os.path.realpath(
            os.path.join(summary_base_dir, relative_path))

        if not os.path.exists(summary_directory):
            logger.warning('Path of summary directory not exists.')
            return False

        if not os.path.isdir(summary_directory):
            logger.warning(
                'Path of summary directory is not a valid directory.')
            return False

        try:
            Path(summary_directory).relative_to(Path(summary_base_dir))
        except ValueError:
            logger.warning(
                'Relative path %s is not subdirectory of summary_base_dir',
                relative_path)
            return False

        return True
示例#10
0
    def _parse_op_nodes(self, node_protos):
        """
        Parse `anf_ir_pb2.NodeProto` object, and create a normal node.

        Args:
            node_protos (list[anf_ir_pb2.NodeProto]): Refer to anf_ir_pb2.NodeProto.
        """
        logger.debug("Start to parse op nodes from proto.")
        for node_proto in node_protos:
            if not node_proto.name:
                logger.warning(
                    "Finding a node with an empty name will not save it.")
                continue

            if not node_proto.full_name or any(node_proto.full_name.lower(
            ).endswith(f'[:{plugin.value.lower()}]')
                                               for plugin in PluginNameEnum):
                node_name = Node.create_node_name(
                    scope=node_proto.scope,
                    base_name=f'{node_proto.op_type}{node_proto.name}')
            else:
                node_name = node_proto.full_name
            node = Node(name=node_name, node_id=node_proto.name)
            node.type = node_proto.op_type
            logger.debug(
                "Foreach graph proto nodes, node id: %s, node name: %s, node def name: %s, "
                "input count: %s", node.node_id, node.name, node_proto.name,
                len(node_proto.input))

            self._parse_attributes(node_proto.attribute, node)
            self._parse_inputs(node_proto.input, node)

            node.output_i = node_proto.output_i
            node.scope = node_proto.scope
            node.output_shape = self._get_shape_by_parse_type_proto(
                node_proto.output_type)
            node.output_data_type = self._get_data_type_by_parse_type_proto(
                node_proto.output_type)

            self._cache_node(node)
示例#11
0
    def _get_train_job_item(self, train_id):
        """
        Get train job item.

        Args:
            train_id (str): Specify train id.

        Returns:
            dict, a dict of train job item.
        """
        try:
            train_job = self._data_manager.get_train_job(train_id)
        except exceptions.TrainJobNotExistError:
            logger.warning('Train job %s not existed', train_id)
            return None

        basic_info = train_job.get_basic_info()
        train_job_item = dict(
            train_id=basic_info.train_id,
            relative_path=basic_info.train_id,
            create_time=basic_info.create_time.strftime('%Y-%m-%d %H:%M:%S'),
            update_time=basic_info.update_time.strftime('%Y-%m-%d %H:%M:%S'),
            profiler_dir=basic_info.profiler_dir,
            cache_status=train_job.cache_status.value,
            profiler_type=basic_info.profiler_type,
            summary_files=basic_info.summary_files,
            graph_files=basic_info.graph_files,
            lineage_files=basic_info.lineage_files,
            dump_dir=basic_info.dump_dir)

        if train_job.cache_status != CacheStatus.NOT_IN_CACHE:
            plugins = self.get_plugins(train_id, manual_update=False)
        else:
            plugins = dict(plugins={
                plugin: []
                for plugin in PluginNameEnum.list_members()
            })

        train_job_item.update(plugins)
        return train_job_item
示例#12
0
    def load(self, computing_resource_mgr):
        """Load the data when loader is exist.

        Args:
            computing_resource_mgr (ComputingResourceManager): The ComputingResourceManager instance.
        """

        if self._loader is None:
            ms_dataloader = MSDataLoader(self._summary_dir)
            loaders = [ms_dataloader]
            for loader in loaders:
                if loader.filter_valid_files():
                    self._loader = loader
                    break

            if self._loader is None:
                logger.warning(
                    "No valid files can be loaded, summary_dir: %s.",
                    self._summary_dir)
                raise exceptions.SummaryLogPathInvalid()

        self._loader.load(computing_resource_mgr)
示例#13
0
    def _load_single_file(self, file_handler, executor):
        """
        Load a log file data.

        Args:
            file_handler (FileHandler): A file handler.
            executor (Executor): The executor instance.

        Returns:
            bool, True if the summary file is finished loading.
        """
        while True:
            start_offset = file_handler.offset
            try:
                event_str = self._event_load(file_handler)
                if event_str is None:
                    file_handler.reset_offset(start_offset)
                    return True
                if len(event_str) > MAX_EVENT_STRING:
                    logger.warning("file_path: %s, event string: %d exceeds %d and drop it.",
                                   file_handler.file_path, len(event_str), MAX_EVENT_STRING)
                    continue

                future = executor.submit(self._event_parse, event_str, self._latest_filename)

                def _add_tensor_event_callback(future_value):
                    try:
                        tensor_values = future_value.result()
                        for tensor_value in tensor_values:
                            if tensor_value.plugin_name == PluginNameEnum.GRAPH.value:
                                try:
                                    graph_tags = self._events_data.list_tags_by_plugin(PluginNameEnum.GRAPH.value)
                                except KeyError:
                                    graph_tags = []

                                summary_tags = self.filter_files(graph_tags)
                                for tag in summary_tags:
                                    self._events_data.delete_tensor_event(tag)

                            self._events_data.add_tensor_event(tensor_value)
                    except Exception as exc:
                        # Log exception for debugging.
                        logger.exception(exc)
                        raise

                future.add_done_callback(_add_tensor_event_callback)
                return False
            except exceptions.CRCFailedError:
                file_handler.reset_offset(start_offset)
                logger.warning("Check crc faild and ignore this file, file_path=%s, "
                               "offset=%s.", file_handler.file_path, file_handler.offset)
                return True
            except (OSError, DecodeError, exceptions.MindInsightException) as ex:
                logger.warning("Parse log file fail, and ignore this file, detail: %r,"
                               "file path: %s.", str(ex), file_handler.file_path)
                return True
            except Exception as ex:
                logger.exception(ex)
                raise UnknownError(str(ex))
示例#14
0
    def _parse_op_nodes(self, node_protos):
        """
        Parse `anf_ir_pb2.NodeProto` object, and create a normal node.

        Args:
            node_protos (list[anf_ir_pb2.NodeProto]): Refer to anf_ir_pb2.NodeProto.
        """
        logger.debug("Start to parse op nodes from proto.")
        for topological_index, node_proto in enumerate(node_protos):
            if not node_proto.name:
                logger.warning(
                    "Finding a node with an empty name will not save it.")
                continue

            node_name = node_proto.name

            # The Graphviz plug-in that the UI USES can't handle these special characters.
            check_invalid_character(node_name)

            node = Node(name=node_name,
                        node_id=node_proto.name,
                        topological_index=topological_index)
            node.full_name = node_proto.full_name
            node.type = node_proto.op_type
            if getattr(node_proto, 'source_address', None):
                node.stack = DebuggerSource.build_stack_from_source_address(
                    node_proto.source_address)
            self._parse_attributes(node_proto.attribute, node)
            self._parse_inputs(node_proto.input, node)

            node.output_i = node_proto.output_i
            node.scope = node_proto.scope
            node.output_shape = self._get_shape_by_parse_type_proto(
                node_proto.output_type)
            node.output_nums = len(node.output_shape)
            node.output_data_type = self._get_data_type_by_parse_type_proto(
                node_proto.output_type, node)

            self._cache_node(node)
示例#15
0
    def parse_files(self, executor, filenames, events_data):
        """
        Load summary file and parse file content.

        Args:
            executor (Executor): The executor instance.
            filenames (list[str]): File name list.
            events_data (EventsData): The container of event data.
        """
        self._events_data = events_data
        summary_files = self.filter_files(filenames)
        summary_files = self.sort_files(summary_files)

        for filename in summary_files:
            if self._latest_filename and \
                    (self._compare_summary_file(self._latest_filename, filename)):
                continue

            file_path = FileHandler.join(self._summary_dir, filename)

            if filename != self._latest_filename:
                self._summary_file_handler = FileHandler(file_path, 'rb')
                self._latest_filename = filename
                self._latest_file_size = 0

            new_size = FileHandler.file_stat(file_path).size
            if new_size == self._latest_file_size:
                continue

            self._latest_file_size = new_size
            try:
                self._load_single_file(self._summary_file_handler, executor)
                # Wait for data in this file to be processed to avoid loading multiple files at the same time.
                executor.wait_all_tasks_finish()
            except UnknownError as ex:
                logger.warning(
                    "Parse summary file failed, detail: %r,"
                    "file path: %s.", str(ex), file_path)
示例#16
0
    def _execute_loader(self, loader_id, executor):
        """
        Load data form data_loader.

        If there is something wrong by loading, add logs and delete the loader.

        Args:
            loader_id (str): An ID for `Loader`.
            executor (Executor): The Executor instance.

        Returns:
            bool, True if the loader is finished loading.
        """
        try:
            with self._loader_pool_mutex:
                loader = self._loader_pool.get(loader_id, None)
                if loader is None:
                    logger.debug(
                        "Loader %r has been deleted, will not load data.",
                        loader_id)
                    return True

            loader.cache_status = CacheStatus.CACHING
            if loader.data_loader.load(executor):
                # Update loader cache status to CACHED.
                # Loader with cache status CACHED should remain the same cache status.
                loader.cache_status = CacheStatus.CACHED
                return True
            return False

        except MindInsightException as ex:
            logger.warning(
                "Data loader %r load data failed. "
                "Delete data_loader. Detail: %s", loader_id, ex)

            with self._loader_pool_mutex:
                self._delete_loader(loader_id)
            return True
示例#17
0
    def _parse_summary_value(value, plugin):
        """
        Parse summary value and create corresponding container according to plugin.

        Args:
            value (Summary.Value): Value message in summary file.
            plugin (str): Plugin value.

        Returns:
            Union[Summary.Value, HistogramContainer, TensorContainer, ImageContainer], original summary value
            or an instance of  HistogramContainer or TensorContainer or ImageContainer.
        """
        tensor_event_value = getattr(value, plugin)
        if plugin == PluginNameEnum.HISTOGRAM.value:
            tensor_event_value = HistogramContainer(tensor_event_value)
            # Drop steps if original_buckets_count exceeds HistogramContainer.MAX_ORIGINAL_BUCKETS_COUNT
            # to avoid time-consuming re-sample process.
            if tensor_event_value.histogram.original_buckets_count > Histogram.MAX_ORIGINAL_BUCKETS_COUNT:
                logger.info('original_buckets_count exceeds '
                            'HistogramContainer.MAX_ORIGINAL_BUCKETS_COUNT')
                return None

        elif plugin == PluginNameEnum.TENSOR.value:
            tensor_event_value = TensorContainer(tensor_event_value)
            tensor_count = 1
            for d in tensor_event_value.dims:
                tensor_count *= d
            if tensor_count > MAX_TENSOR_COUNT:
                logger.warning(
                    'tag: %s/tensor, dims: %s, tensor count: %d exceeds %d and drop it.',
                    value.tag, tensor_event_value.dims, tensor_count,
                    MAX_TENSOR_COUNT)
                return None

        elif plugin == PluginNameEnum.IMAGE.value:
            tensor_event_value = ImageContainer(tensor_event_value)

        return tensor_event_value
示例#18
0
    def _scan_subdir_entries(self, summary_dict, summary_base_dir, entry_path,
                             entry_name, counter, list_explain):
        """
        Scan subdir entries.

        Args:
            summary_dict (dict): Temporary data structure to hold summary directory info.
            summary_base_dir (str): Path of summary base directory.
            entry_path(str): Path entry.
            entry_name (str): Name of entry.
            counter (Counter): An instance of CountLimiter.
            list_explain (bool): Indicates whether to list only the mindexplain folder.

        """
        try:
            subdir_entries = os.scandir(entry_path)
        except PermissionError:
            logger.warning(
                'Path of %s under summary base directory is not accessible.',
                entry_name)
            return

        for subdir_entry in subdir_entries:
            if len(summary_dict) == self.MAX_SUMMARY_DIR_COUNT:
                break
            try:
                counter.add()
            except MaxCountExceededError:
                logger.info(
                    'Stop further scanning due to overall is False and '
                    'number of scanned files exceeds upper limit.')
                break
            subdir_relative_path = os.path.join('.', entry_name)
            if subdir_entry.is_symlink():
                pass
            self._update_summary_dict(summary_dict, summary_base_dir,
                                      subdir_relative_path, subdir_entry,
                                      list_explain)
示例#19
0
    def _parse_consts(self, consts):
        """
        Parse `anf_ir_pb2.NameValueProto` object, and create a const node.

        Args:
            consts (list[anf_ir_pb2.NameValueProto]): Refer to `anf_ir_pb2.NameValueProto` object.
        """
        logger.debug("Start to parse consts from proto.")
        for const in consts:
            if not const.key:
                logger.warning(
                    "Finding a const with an empty key will not save it.")
                continue
            node = Node(name=const.key, node_id=const.key)
            node.type = NodeTypeEnum.CONST.value
            node.add_attr({const.key: str(const.value)})
            if const.value.dtype == DataType.DT_TENSOR:
                shape = []
                for dim in const.value.tensor_val.dims:
                    shape.append(dim)
                node.output_shape = shape

            self._cache_node(node)
示例#20
0
    def get_single_train_job(self, train_id, manual_update=False):
        """
        Get train job by train ID.

        Args:
            train_id (str): Train ID for train job.
            manual_update (bool): If manual update, True.

        Returns:
            dict, single train job, if can not find any data, will return None.
        """
        self._check_status_valid()
        self._check_train_job_exist(train_id, self._loader_pool)

        loader = self._get_loader(train_id, manual_update)
        if loader is None:
            logger.warning(
                "No valid summary log in train job %s, "
                "or it is not in the cache.", train_id)
            return None

        train_job = loader.to_dict()
        train_job.pop('data_loader')

        plugin_data = {}
        for plugin_name in PluginNameEnum.list_members():
            job = self.get_train_job_by_plugin(train_id,
                                               plugin_name=plugin_name)
            if job is None:
                plugin_data[plugin_name] = []
            else:
                plugin_data[plugin_name] = job['tags']

        train_job.update({'tag_mapping': plugin_data})

        return train_job
示例#21
0
    def _load(self, executor):
        """
        Load all log valid files.

        When the file is reloaded, it will continue to load from where it left off.

        Args:
            executor (executor): The Executor instance.

        Returns:
            bool, True if the train job is finished loading.
        """
        filenames = self.filter_valid_files()
        if not filenames:
            logger.warning("No valid files can be loaded, summary_dir: %s.", self._summary_dir)
            raise exceptions.SummaryLogPathInvalid()
        old_filenames = list(self._valid_filenames)
        self._valid_filenames = filenames
        self._check_files_deleted(filenames, old_filenames)

        finished = True
        for parser in self._parser_list:
            finished = parser.parse_files(executor, filenames, events_data=self._events_data) and finished
        return finished
示例#22
0
    def _parse_consts(self, consts):
        """
        Parse `anf_ir_pb2.NameValueProto` object, and create a const node.

        Args:
            consts (list[anf_ir_pb2.NameValueProto]): Refer to `anf_ir_pb2.NameValueProto` object.
        """
        logger.debug("Start to parse consts from proto.")
        for const in consts:
            if not const.key:
                logger.warning(
                    "Finding a const with an empty key will not save it.")
                continue
            check_invalid_character(const.key)
            node = Node(name=const.key, node_id=const.key)
            node.type = NodeTypeEnum.CONST.value
            if const.value.ByteSize() > self.MAX_NODE_ATTRIBUTE_VALUE_BYTES:
                node.add_attr(
                    {const.key: 'dtype: ' + DataType.Name(const.value.dtype)})
            else:
                node.add_attr({const.key: str(const.value)})

            if const.value.dtype == DataType.DT_TENSOR:
                shape = list(const.value.tensor_val.dims)
                node.output_shape.append(shape)
                if const.value.tensor_val.HasField('data_type'):
                    node.elem_types.append(
                        DataType.Name(const.value.tensor_val.data_type))
            else:
                node.elem_types.append(DataType.Name(const.value.dtype))
                # dim is zero
                node.output_shape.append([])

            node.output_nums = len(node.output_shape)

            self._cache_node(node)
示例#23
0
    def load(self, computing_resource_mgr):
        """
        Load all log valid files.

        When the file is reloaded, it will continue to load from where it left off.

        Args:
            computing_resource_mgr (ComputingResourceManager): The ComputingResourceManager instance.
        """
        logger.debug("Start to load data in ms data loader.")
        filenames = self.filter_valid_files()
        if not filenames:
            logger.warning("No valid files can be loaded, summary_dir: %s.",
                           self._summary_dir)
            raise exceptions.SummaryLogPathInvalid()
        old_filenames = list(self._valid_filenames)
        self._valid_filenames = filenames
        self._check_files_deleted(filenames, old_filenames)

        with computing_resource_mgr.get_executor() as executor:
            for parser in self._parser_list:
                parser.parse_files(executor,
                                   filenames,
                                   events_data=self._events_data)
示例#24
0
    def delete_tensor_event(self, tag):
        """
        This function will delete tensor event by the given tag in memory record.

        Args:
            tag (str): The tag name.
        """
        if len(self._deleted_tags) < _MAX_DELETED_TAGS_SIZE:
            self._deleted_tags.add(tag)
        else:
            logger.warning(
                'Too many deleted tags, %d upper limit reached, tags updating may not function hereafter',
                _MAX_DELETED_TAGS_SIZE)
        logger.info('%r and all related samples are going to be deleted', tag)
        self._tags.remove(tag)
        for plugin_name, lock in self._tags_by_plugin_mutex_lock.items():
            with lock:
                if tag in self._tags_by_plugin[plugin_name]:
                    self._tags_by_plugin[plugin_name].remove(tag)
                    break

        with self._reservoir_mutex_lock:
            if tag in self._reservoir_by_tag:
                self._reservoir_by_tag.pop(tag)
示例#25
0
    def list_summaries(self, summary_base_dir, relative_path='./'):
        """
        Get info of latest summary file within the given summary directory.

        Args:
            summary_base_dir (str): Path of summary base directory.
            relative_path (str): Relative path of summary directory, referring to summary base directory,
                                starting with "./" .

        Returns:
            list, list of summary file including the following attributes.
                - file_name (str): Summary file name.
                - create_time (datetime): Creation time of summary file.
                - update_time (datetime): Modification time of summary file.

        Examples:
            >>> from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher
            >>> summary_watcher = SummaryWatcher()
            >>> summaries = summary_watcher.list_summaries('/summary/base/dir', './job-01')
        """
        if contains_null_byte(summary_base_dir=summary_base_dir,
                              relative_path=relative_path):
            return []

        if not self._is_valid_summary_directory(summary_base_dir,
                                                relative_path):
            return []

        summaries = []
        summary_directory = os.path.realpath(
            os.path.join(summary_base_dir, relative_path))
        try:
            entries = os.scandir(summary_directory)
        except PermissionError:
            logger.error('Path of summary directory is not accessible.')
            raise FileSystemPermissionError(
                'Path of summary directory is not accessible.')

        for entry in entries:
            if entry.is_symlink() or not entry.is_file():
                continue

            pattern = re.search(self.SUMMARY_FILENAME_REGEX, entry.name)
            if pattern is None:
                continue

            timestamp = int(pattern.groupdict().get('timestamp'))
            try:
                # extract created time from filename
                ctime = datetime.datetime.fromtimestamp(timestamp).astimezone()
            except OverflowError:
                continue

            try:
                stat = entry.stat()
            except FileNotFoundError:
                logger.warning('File %s not found.', entry.name)
                continue

            mtime = datetime.datetime.fromtimestamp(stat.st_mtime).astimezone()

            summaries.append({
                'file_name': entry.name,
                'create_time': ctime,
                'update_time': mtime,
            })

        # sort by update time in descending order and filename in ascending order
        summaries.sort(
            key=lambda x: (-int(x['update_time'].timestamp()), x['file_name']))

        return summaries
示例#26
0
    def _update_summary_dict(self, summary_dict, summary_base_dir,
                             relative_path, entry):
        """
        Update summary_dict with ctime and mtime.

        Args:
            summary_dict (dict): Temporary data structure to hold summary directory info.
            summary_base_dir (str): Path of summary base directory.
            relative_path (str): Relative path of summary directory, referring to summary base directory,
                                starting with "./" .
            entry (DirEntry): Directory entry instance needed to check with regular expression.
        """
        try:
            stat = entry.stat()
        except FileNotFoundError:
            logger.warning('File %s not found', entry.name)
            return

        ctime = datetime.datetime.fromtimestamp(stat.st_ctime).astimezone()
        mtime = datetime.datetime.fromtimestamp(stat.st_mtime).astimezone()

        if entry.is_file():
            summary_pattern = re.search(self.SUMMARY_FILENAME_REGEX,
                                        entry.name)
            pb_pattern = re.search(self.PB_FILENAME_REGEX, entry.name)
            if summary_pattern is None and pb_pattern is None:
                return
            if summary_pattern is not None:
                timestamp = int(summary_pattern.groupdict().get('timestamp'))
                try:
                    # extract created time from filename
                    ctime = datetime.datetime.fromtimestamp(
                        timestamp).astimezone()
                except OverflowError:
                    return
            if relative_path not in summary_dict:
                summary_dict[relative_path] = {
                    'ctime': ctime,
                    'mtime': mtime,
                    'profiler': None,
                }
            elif summary_dict[relative_path]['ctime'] < ctime:
                summary_dict[relative_path].update({
                    'ctime': ctime,
                    'mtime': mtime,
                })
        elif entry.is_dir():
            profiler_pattern = re.search(self.PROFILER_DIRECTORY_REGEX,
                                         entry.name)
            full_dir_path = os.path.join(summary_base_dir, relative_path,
                                         entry.name)
            if profiler_pattern is None or not self._is_valid_profiler_directory(
                    full_dir_path):
                return

            profiler = {
                'directory': os.path.join('.', entry.name),
                'ctime': ctime,
                'mtime': mtime,
            }

            summary_dict[relative_path] = {
                'ctime': ctime,
                'mtime': mtime,
                'profiler': profiler,
            }
示例#27
0
    def generate_loaders(self, loader_pool):
        """
        Generate loader from summary path, if summary path is empty, will return empty list.

        Args:
            loader_pool (dict[str, LoaderStruct]): Current loader pool in data_manager.

        Returns:
            dict[str, LoaderStruct], a dict of `Loader`.
        """
        loader_dict = {}

        if not FileHandler.exists(self._summary_path):
            logger.warning(
                "Summary path does not exist. It will not start loading events data. "
                "Current path is %r.", self._summary_path)
            return loader_dict

        dir_map_mtime_dict = {}
        min_modify_time = None
        summaries_info = self._summary_watcher.list_summary_directories(
            self._summary_path)

        for item in summaries_info:
            relative_path = item.get("relative_path")
            current_dir = FileHandler.join(self._summary_path, relative_path)
            dataloader = DataLoader(current_dir)

            if not dataloader.has_valid_files():
                logger.debug(
                    "Can not find valid train log file in folder %s , "
                    "will ignore.", relative_path)
                continue

            modify_time = item.get("update_time").timestamp()

            # if loader exists in loader pool and newer time, update its time
            loader_id = self._generate_loader_id(relative_path)
            loader = loader_pool.get(loader_id)
            if loader is not None and loader.latest_update_time > modify_time:
                modify_time = loader.latest_update_time

            if not min_modify_time:
                # The first load, init min modify time
                min_modify_time = modify_time

            # We need to find `MAX_DATA_LOADER_SIZE` newly modified folders.
            if len(dir_map_mtime_dict) < MAX_DATA_LOADER_SIZE:
                if modify_time < min_modify_time:
                    min_modify_time = modify_time
                dir_map_mtime_dict.update({relative_path: modify_time})

            else:
                if modify_time >= min_modify_time:
                    dir_map_mtime_dict.update({relative_path: modify_time})

        sorted_dir_tuple = sorted(dir_map_mtime_dict.items(),
                                  key=lambda d: d[1])[-MAX_DATA_LOADER_SIZE:]

        for relative_path, modify_time in sorted_dir_tuple:
            loader_id = self._generate_loader_id(relative_path)
            loader = self._generate_loader_by_relative_path(relative_path)
            loader_dict.update({loader_id: loader})

        return loader_dict
示例#28
0
    def _update_summary_dict(self, summary_dict, summary_base_dir, relative_path, entry, list_explain):
        """
        Update summary_dict with ctime and mtime.

        Args:
            summary_dict (dict): Temporary data structure to hold summary directory info.
            summary_base_dir (str): Path of summary base directory.
            relative_path (str): Relative path of summary directory, referring to summary base directory,
                                starting with "./" .
            entry (DirEntry): Directory entry instance needed to check with regular expression.
            list_explain (bool): Indicates whether to list only the mindexplain folder.
        """
        try:
            stat = entry.stat()
        except FileNotFoundError:
            logger.warning('File %s not found', entry.name)
            return

        ctime = datetime.datetime.fromtimestamp(stat.st_ctime).astimezone()
        mtime = datetime.datetime.fromtimestamp(stat.st_mtime).astimezone()

        if entry.is_file():
            summary_pattern = re.search(self.SUMMARY_FILENAME_REGEX, entry.name)
            pb_pattern = re.search(self.PB_FILENAME_REGEX, entry.name)
            if not self._is_valid_pattern_result(summary_pattern, pb_pattern, list_explain, entry):
                return

            if summary_pattern is not None:
                timestamp = int(summary_pattern.groupdict().get('timestamp'))
                try:
                    # extract created time from filename
                    ctime = datetime.datetime.fromtimestamp(timestamp).astimezone()
                except OverflowError:
                    return

            if relative_path not in summary_dict:
                summary_dict[relative_path] = _new_entry(ctime, mtime)
            if summary_dict[relative_path]['create_time'] < ctime:
                summary_dict[relative_path].update({
                    'create_time': ctime,
                    'update_time': mtime,
                })
            if not summary_pattern:
                summary_dict[relative_path]['graph_files'] += 1
            elif entry.name.endswith(LINEAGE_SUMMARY_SUFFIX):
                summary_dict[relative_path]['lineage_files'] += 1
            elif entry.name.endswith(EXPLAIN_SUMMARY_SUFFIX):
                summary_dict[relative_path]['explain_files'] += 1
            else:
                summary_dict[relative_path]['summary_files'] += 1
        elif entry.is_dir():
            if list_explain:
                return

            profiler_type, is_find = self._find_profiler_dir(entry, summary_base_dir, relative_path)
            if not is_find:
                return

            profiler = {
                'directory': os.path.join('.', entry.name),
                'create_time': ctime,
                'update_time': mtime,
                "profiler_type": profiler_type
            }

            if relative_path in summary_dict:
                summary_dict[relative_path]['profiler'] = profiler
            else:
                summary_dict[relative_path] = _new_entry(ctime, mtime, profiler)
示例#29
0
    def _event_parse(self, event):
        """
        Transform `Event` data to tensor_event and update it to EventsData.

        Args:
            event (Event): Message event in summary proto, data read from file handler.
        """
        plugins = {
            'scalar_value': PluginNameEnum.SCALAR,
            'image': PluginNameEnum.IMAGE,
            'histogram': PluginNameEnum.HISTOGRAM,
        }

        if event.HasField('summary'):
            for value in event.summary.value:
                for plugin in plugins:
                    if not value.HasField(plugin):
                        continue
                    plugin_name_enum = plugins[plugin]
                    tensor_event_value = getattr(value, plugin)

                    if plugin == 'histogram':
                        tensor_event_value = HistogramContainer(
                            tensor_event_value)
                        # Drop steps if original_buckets_count exceeds HistogramContainer.MAX_ORIGINAL_BUCKETS_COUNT
                        # to avoid time-consuming re-sample process.
                        if tensor_event_value.original_buckets_count > HistogramContainer.MAX_ORIGINAL_BUCKETS_COUNT:
                            logger.warning(
                                'original_buckets_count exceeds '
                                'HistogramContainer.MAX_ORIGINAL_BUCKETS_COUNT'
                            )
                            continue

                    tensor_event = TensorEvent(
                        wall_time=event.wall_time,
                        step=event.step,
                        tag='{}/{}'.format(value.tag, plugin_name_enum.value),
                        plugin_name=plugin_name_enum.value,
                        value=tensor_event_value,
                        filename=self._latest_filename)
                    self._events_data.add_tensor_event(tensor_event)

        elif event.HasField('graph_def'):
            graph = MSGraph()
            graph.build_graph(event.graph_def)
            tensor_event = TensorEvent(wall_time=event.wall_time,
                                       step=event.step,
                                       tag=self._latest_filename,
                                       plugin_name=PluginNameEnum.GRAPH.value,
                                       value=graph,
                                       filename=self._latest_filename)

            try:
                graph_tags = self._events_data.list_tags_by_plugin(
                    PluginNameEnum.GRAPH.value)
            except KeyError:
                graph_tags = []

            summary_tags = self.filter_files(graph_tags)
            for tag in summary_tags:
                self._events_data.delete_tensor_event(tag)

            self._events_data.add_tensor_event(tensor_event)
示例#30
0
    def _event_parse(self, event):
        """
        Transform `Event` data to tensor_event and update it to EventsData.

        Args:
            event (Event): Message event in summary proto, data read from file handler.
        """
        if event.HasField('summary'):
            for value in event.summary.value:
                if value.HasField('scalar_value'):
                    tag = '{}/{}'.format(value.tag, PluginNameEnum.SCALAR.value)
                    tensor_event = TensorEvent(wall_time=event.wall_time,
                                               step=event.step,
                                               tag=tag,
                                               plugin_name=PluginNameEnum.SCALAR.value,
                                               value=value.scalar_value,
                                               filename=self._latest_filename)
                    self._events_data.add_tensor_event(tensor_event)

                if value.HasField('image'):
                    tag = '{}/{}'.format(value.tag, PluginNameEnum.IMAGE.value)
                    tensor_event = TensorEvent(wall_time=event.wall_time,
                                               step=event.step,
                                               tag=tag,
                                               plugin_name=PluginNameEnum.IMAGE.value,
                                               value=value.image,
                                               filename=self._latest_filename)
                    self._events_data.add_tensor_event(tensor_event)

                if value.HasField('histogram'):
                    histogram_msg = HistogramContainer(value.histogram)
                    # Drop steps if original_buckets_count exceeds HistogramContainer.MAX_ORIGINAL_BUCKETS_COUNT
                    # to avoid time-consuming re-sample process.
                    if histogram_msg.original_buckets_count > HistogramContainer.MAX_ORIGINAL_BUCKETS_COUNT:
                        logger.warning('original_buckets_count exceeds HistogramContainer.MAX_ORIGINAL_BUCKETS_COUNT')
                    else:
                        tag = '{}/{}'.format(value.tag, PluginNameEnum.HISTOGRAM.value)
                        tensor_event = TensorEvent(wall_time=event.wall_time,
                                                   step=event.step,
                                                   tag=tag,
                                                   plugin_name=PluginNameEnum.HISTOGRAM.value,
                                                   value=histogram_msg,
                                                   filename=self._latest_filename)
                        self._events_data.add_tensor_event(tensor_event)

        if event.HasField('graph_def'):
            graph_proto = event.graph_def
            graph = MSGraph()
            graph.build_graph(graph_proto)
            tensor_event = TensorEvent(wall_time=event.wall_time,
                                       step=event.step,
                                       tag=self._latest_filename,
                                       plugin_name=PluginNameEnum.GRAPH.value,
                                       value=graph,
                                       filename=self._latest_filename)

            try:
                graph_tags = self._events_data.list_tags_by_plugin(PluginNameEnum.GRAPH.value)
            except KeyError:
                graph_tags = []
            summary_tags = self.filter_files(graph_tags)
            for tag in summary_tags:
                self._events_data.delete_tensor_event(tag)

            self._events_data.add_tensor_event(tensor_event)