示例#1
0
    def load(self):
        """Start loading data from the latest summary file to the loader."""
        self.status = _LoaderStatus.LOADING.value
        filenames = []
        for filename in FileHandler.list_dir(self._loader_info['summary_dir']):
            if FileHandler.is_file(
                    FileHandler.join(self._loader_info['summary_dir'],
                                     filename)):
                filenames.append(filename)
        filenames = ExplainLoader._filter_files(filenames)

        if not filenames:
            raise TrainJobNotExistError(
                'No summary file found in %s, explain job will be delete.' %
                self._loader_info['summary_dir'])

        is_end = False
        while not is_end and self.status != _LoaderStatus.STOP.value:
            try:
                file_changed, is_end, event_dict = self._parser.list_events(
                    filenames)
            except UnknownError:
                break

            if file_changed:
                logger.info(
                    'Summary file in %s update, reload the data in the summary.',
                    self._loader_info['summary_dir'])
                self._clear_job()
            if event_dict:
                self._import_data_from_event(event_dict)
示例#2
0
    def _parse_pb_file(self, filename):
        """
        Parse pb file and write content to `EventsData`.

        Args:
            filename (str): The file path of pb file.
        """
        file_path = FileHandler.join(self._summary_dir, filename)
        logger.info("Start to load graph from pb file, file path: %s.",
                    file_path)
        filehandler = FileHandler(file_path)
        model_proto = anf_ir_pb2.ModelProto()
        try:
            model_proto.ParseFromString(filehandler.read())
        except ParseError:
            logger.warning(
                "The given file is not a valid pb file, file path: %s.",
                file_path)
            return

        graph = MSGraph()
        graph.build_graph(model_proto.graph)
        tensor_event = TensorEvent(wall_time=FileHandler.file_stat(file_path),
                                   step=0,
                                   tag=filename,
                                   plugin_name=PluginNameEnum.GRAPH.value,
                                   value=graph)
        self._events_data.add_tensor_event(tensor_event)
示例#3
0
 def sort_files(self, filenames):
     """Sort by modify time increments and filenames increments."""
     filenames = sorted(
         filenames,
         key=lambda file: (FileHandler.file_stat(
             FileHandler.join(self._summary_dir, file)).mtime, file))
     return filenames
示例#4
0
    def parse(self):
        """Load summary file and parse file content."""

        summary_file_handler = FileHandler(self.summary_file, 'rb')

        self._file_size = os.path.getsize(self.summary_file)
        # when current parsed size bigger than self._process_info, print process
        self._process_info = self._file_size // INFO_INTERVAL

        parse_summary_logger.info("Loading %s.", self.summary_file)
        result = self._load(summary_file_handler)

        if result:
            warning = ''
            scalar_path = FileHandler.join(self._output, "scalar.csv")
            image_path = FileHandler.join(self._output, IMAGE)

            if not self._image_check:
                warning = warning + " The summary file contains no image."
            else:
                parse_summary_logger.info("Images are written in %s.", image_path)

            if not self._scalar_check:
                warning = warning + " The summary file contains no scalar value."
            else:
                parse_summary_logger.info("Writing scalar data into %s.", scalar_path)

            self._scalar_writer.write()
            if warning:
                parse_summary_logger.warning(warning)

            parse_summary_logger.info("Finished loading %s.", self.summary_file)
示例#5
0
    def load(self):
        """Start loading data from the latest summary file to the loader."""
        filenames = []
        for filename in FileHandler.list_dir(self._loader_info['summary_dir']):
            if FileHandler.is_file(
                    FileHandler.join(self._loader_info['summary_dir'],
                                     filename)):
                filenames.append(filename)
        filenames = ExplainLoader._filter_files(filenames)

        if not filenames:
            raise TrainJobNotExistError(
                'No summary file found in %s, explain job will be delete.' %
                self._loader_info['summary_dir'])

        is_end = False
        while not is_end:
            is_clean, is_end, event_dict = self._parser.parse_explain(
                filenames)

            if is_clean:
                logger.info(
                    'Summary file in %s update, reload the data in the summary.',
                    self._loader_info['summary_dir'])
                self._clear_job()
            if event_dict:
                self._import_data_from_event(event_dict)
示例#6
0
    def parse_files(self, filenames, events_data):
        """
        Load summary file and parse file content.

        Args:
            filenames (list[str]): File name list.
            events_data (EventsData): The container of event data.
        """
        self._events_data = events_data
        summary_files = self.filter_files(filenames)
        summary_files = self.sort_files(summary_files)

        for filename in summary_files:
            if self._latest_filename and \
                    (self._compare_summary_file(self._latest_filename, filename)):
                continue

            file_path = FileHandler.join(self._summary_dir, filename)

            if filename != self._latest_filename:
                self._summary_file_handler = FileHandler(file_path, 'rb')
                self._latest_filename = filename
                self._latest_file_size = 0

            new_size = FileHandler.file_stat(file_path).size
            if new_size == self._latest_file_size:
                continue

            self._latest_file_size = new_size
            try:
                self._load_single_file(self._summary_file_handler)
            except UnknownError as ex:
                logger.warning("Parse summary file failed, detail: %r,"
                               "file path: %s.", str(ex), file_path)
示例#7
0
def phase_pb_file(file_path: str) -> Union[MSGraph, None]:
    """
    Parse pb file to graph

    Args:
        file_path (str): The file path of pb file.

    Returns:
        MSGraph, if load pb file and build graph success, will return the graph, else return None.
    """
    if not CONFIG.VERBOSE:
        logger.setLevel(logging.ERROR)
    logger.info("Start to load graph from pb file, file path: %s.", file_path)
    model_proto = anf_ir_pb2.ModelProto()
    try:
        model_proto.ParseFromString(FileHandler(file_path).read())
    except ParseError:
        logger.warning("The given file is not a valid pb file, file path: %s.",
                       file_path)
        return None

    graph = MSGraph()

    try:
        graph.build_graph(model_proto.graph)
    except Exception as ex:
        logger.error("Build graph failed, file path: %s.", file_path)
        logger.exception(ex)
        raise UnknownError(str(ex))

    logger.info("Build graph success, file path: %s.", file_path)
    return graph
示例#8
0
    def parse_files(self, executor, filenames, events_data):
        """
        Load summary file and parse file content.

        Args:
            executor (Executor): The executor instance.
            filenames (list[str]): File name list.
            events_data (EventsData): The container of event data.

        Returns:
            bool, True if all the summary files are finished loading.
        """
        summary_files = self.filter_files(filenames)
        summary_files = self.sort_files(summary_files)
        if self._latest_filename in summary_files:
            index = summary_files.index(self._latest_filename)
            summary_files = summary_files[index:]

        for filename in summary_files:
            file_path = FileHandler.join(self._summary_dir, filename)

            if filename != self._latest_filename:
                self._summary_file_handler = FileHandler(file_path, 'rb')
                self._latest_filename = filename
                self._latest_file_size = 0

            new_size = FileHandler.file_stat(file_path).size
            if new_size == self._latest_file_size:
                continue

            try:
                if not self._load_single_file(self._summary_file_handler,
                                              executor, events_data):
                    self._latest_file_size = self._summary_file_handler.offset
                else:
                    self._latest_file_size = new_size
                # Wait for data in this file to be processed to avoid loading multiple files at the same time.
                logger.debug("Parse summary file offset %d, file path: %s.",
                             self._latest_file_size, file_path)
                return False
            except UnknownError as ex:
                logger.warning(
                    "Parse summary file failed, detail: %r,"
                    "file path: %s.", str(ex), file_path)
        return True
示例#9
0
    def filter_valid_files(self):
        """
        Gets a list of valid files from the given file path.

        Returns:
            list[str], file name list.

        """
        filenames = []
        for filename in FileHandler.list_dir(self._summary_dir):
            if FileHandler.is_file(FileHandler.join(self._summary_dir, filename)):
                filenames.append(filename)

        valid_filenames = []
        for parser in self._parser_list:
            valid_filenames.extend(parser.filter_files(filenames))

        return list(set(valid_filenames))
示例#10
0
 def _generate_loader_from_relative_path(
         self, relative_path: str) -> ExplainLoader:
     """Generate explain loader from the given relative path."""
     self._check_summary_exist(relative_path)
     current_dir = os.path.realpath(
         FileHandler.join(self._summary_base_dir, relative_path))
     loader_id = self._generate_loader_id(relative_path)
     loader = ExplainLoader(loader_id=loader_id, summary_dir=current_dir)
     return loader
示例#11
0
 def __init__(self, summary_file, output):
     self.summary_file = summary_file
     self._output = output
     self._scalar_writer = ScalarWriter(self._output)
     self._image_writer = ImageWriter(FileHandler.join(self._output, IMAGE))
     self._file_size = 0
     self._process_info = 0
     self._image_check = False
     self._scalar_check = False
示例#12
0
    def _parse_pb_file(summary_dir, filename):
        """
        Parse pb file and write content to `EventsData`.

        Args:
            filename (str): The file path of pb file.

        Returns:
            TensorEvent, if load pb file and build graph success, will return tensor event, else return None.
        """
        file_path = FileHandler.join(summary_dir, filename)
        logger.info("Start to load graph from pb file, file path: %s.",
                    file_path)
        filehandler = FileHandler(file_path)
        model_proto = anf_ir_pb2.ModelProto()
        try:
            model_proto.ParseFromString(filehandler.read())
        except ParseError:
            logger.warning(
                "The given file is not a valid pb file, file path: %s.",
                file_path)
            return None

        graph = MSGraph()

        try:
            graph.build_graph(model_proto.graph)
        except Exception as ex:
            # Normally, there are no exceptions, and it is only possible for users on the MindSpore side
            # to dump other non-default graphs.
            logger.error("Build graph failed, file path: %s.", file_path)
            logger.exception(ex)
            raise UnknownError(str(ex))

        tensor_event = TensorEvent(
            wall_time=FileHandler.file_stat(file_path).mtime,
            step=0,
            tag=filename,
            plugin_name=PluginNameEnum.GRAPH.value,
            value=graph,
            filename=filename)

        logger.info("Build graph success, file path: %s.", file_path)
        return tensor_event
示例#13
0
    def _generate_loader_by_relative_path(self, relative_path):
        """
        Generate loader by relative path.

        Args:
            relative_path (str): Relative path of a summary directory, e.g. './log1'.

        Returns:
            dict[str, LoaderStruct], a dict of `Loader`.
        """
        current_dir = os.path.realpath(FileHandler.join(self._summary_path, relative_path))
        data_loader = DataLoader(current_dir)
        loader_id = self._generate_loader_id(relative_path)
        loader = LoaderStruct(loader_id=loader_id,
                              name=self._generate_loader_name(relative_path),
                              path=current_dir,
                              latest_update_time=FileHandler.file_stat(current_dir).mtime,
                              data_loader=data_loader)
        return loader
示例#14
0
    def _set_latest_file(self, filename):
        """
        Check if the file's modification time is newer than the last time it was loaded, and if so, set the time.

        Args:
            filename (str): The file name that needs to be checked and set.

        Returns:
            bool, Returns True if the file was modified earlier than the last time it was loaded, or False.
        """
        mtime = FileHandler.file_stat(FileHandler.join(self._summary_dir, filename)).mtime
        if mtime < self._latest_mtime or \
                (mtime == self._latest_mtime and filename <= self._latest_filename):
            return False

        self._latest_mtime = mtime
        self._latest_filename = filename

        return True
示例#15
0
    def run(self, args):
        """
        Execute for start command.

        Args:
            args (Namespace): Parsed arguments to hold customized parameters.
        """
        try:
            date_time = datetime.datetime.now().strftime(
                'output_%Y%m%d_%H%M%S_%f')
            output_path = os.path.join(args.output, date_time)

            summary_dir = args.summary_dir
            if not self._check_dirpath(summary_dir):
                return

            summary_parser = _SummaryParser(summary_dir)
            summary_files = summary_parser.filter_files(
                os.listdir(summary_dir))

            if not summary_files:
                parse_summary_logger.error('Path %s has no summary file.',
                                           summary_dir)
                return

            summary_files = summary_parser.sort_files(summary_files)
            filename = summary_files[-1]

            summary_file = FileHandler.join(summary_dir, filename)

            if not (self._check_filepath(summary_file)
                    and self._check_create_filepath(output_path)
                    and self._check_create_filepath(
                        FileHandler.join(output_path, 'image'))):
                return

            eventparser = EventParser(summary_file, output_path)
            eventparser.parse()

        except Exception as ex:
            parse_summary_logger.error(
                "Parse summary file failed, detail: %r.", str(ex))
            raise UnknownError(str(ex))
示例#16
0
    def _load_pb_files(self, filenames):
        """
        Load and parse the pb files.

        Args:
            filenames (list[str]): File name list, like [filename1, filename2].

        Returns:
            list[str], filename list.
        """
        pb_filenames = self._filter_pb_files(filenames)
        pb_filenames = sorted(
            pb_filenames,
            key=lambda file: FileHandler.file_stat(
                FileHandler.join(self._summary_dir, file)).mtime)
        for filename in pb_filenames:
            mtime = FileHandler.file_stat(
                FileHandler.join(self._summary_dir, filename)).mtime
            if mtime <= self._latest_pb_file_mtime:
                continue
            self._latest_pb_file_mtime = mtime
            self._parse_pb_file(filename)
示例#17
0
    def parse_files(self, executor, filenames, events_data):
        """
        Load summary file and parse file content.

        Args:
            executor (Executor): The executor instance.
            filenames (list[str]): File name list.
            events_data (EventsData): The container of event data.
        """
        self._events_data = events_data
        summary_files = self.filter_files(filenames)
        summary_files = self.sort_files(summary_files)

        for filename in summary_files:
            if self._latest_filename and \
                    (self._compare_summary_file(self._latest_filename, filename)):
                continue

            file_path = FileHandler.join(self._summary_dir, filename)

            if filename != self._latest_filename:
                self._summary_file_handler = FileHandler(file_path, 'rb')
                self._latest_filename = filename
                self._latest_file_size = 0

            new_size = FileHandler.file_stat(file_path).size
            if new_size == self._latest_file_size:
                continue

            self._latest_file_size = new_size
            try:
                self._load_single_file(self._summary_file_handler, executor)
                # Wait for data in this file to be processed to avoid loading multiple files at the same time.
                executor.wait_all_tasks_finish()
            except UnknownError as ex:
                logger.warning(
                    "Parse summary file failed, detail: %r,"
                    "file path: %s.", str(ex), file_path)
示例#18
0
    def check_path(file_path):
        """
        Check argument for file path.

        Args:
            file_path (str): File path.
        """
        if file_path.startswith('~'):
            file_path = os.path.realpath(os.path.expanduser(file_path))

        if not file_path.startswith('/'):
            file_path = os.path.realpath(
                FileHandler.join(os.getcwd(), file_path))

        return os.path.realpath(file_path)
示例#19
0
    def generate_loaders(self, loader_pool):
        """
        Generate loader from summary path, if summary path is empty, will return empty list.

        Args:
            loader_pool (dict[str, LoaderStruct]): Current loader pool in data_manager.

        Returns:
            dict[str, LoaderStruct], a dict of `Loader`.
        """
        loader_dict = {}

        if not FileHandler.exists(self._summary_path):
            logger.warning(
                "Summary path does not exist. It will not start loading events data. "
                "Current path is %r.", self._summary_path)
            return loader_dict

        dir_map_mtime_dict = {}
        min_modify_time = None
        summaries_info = self._summary_watcher.list_summary_directories(
            self._summary_path)

        for item in summaries_info:
            relative_path = item.get("relative_path")
            current_dir = FileHandler.join(self._summary_path, relative_path)
            dataloader = DataLoader(current_dir)

            if not dataloader.has_valid_files():
                logger.debug(
                    "Can not find valid train log file in folder %s , "
                    "will ignore.", relative_path)
                continue

            modify_time = item.get("update_time").timestamp()

            # if loader exists in loader pool and newer time, update its time
            loader_id = self._generate_loader_id(relative_path)
            loader = loader_pool.get(loader_id)
            if loader is not None and loader.latest_update_time > modify_time:
                modify_time = loader.latest_update_time

            if not min_modify_time:
                # The first load, init min modify time
                min_modify_time = modify_time

            # We need to find `MAX_DATA_LOADER_SIZE` newly modified folders.
            if len(dir_map_mtime_dict) < MAX_DATA_LOADER_SIZE:
                if modify_time < min_modify_time:
                    min_modify_time = modify_time
                dir_map_mtime_dict.update({relative_path: modify_time})

            else:
                if modify_time >= min_modify_time:
                    dir_map_mtime_dict.update({relative_path: modify_time})

        sorted_dir_tuple = sorted(dir_map_mtime_dict.items(),
                                  key=lambda d: d[1])[-MAX_DATA_LOADER_SIZE:]

        for relative_path, modify_time in sorted_dir_tuple:
            loader_id = self._generate_loader_id(relative_path)
            loader = self._generate_loader_by_relative_path(relative_path)
            loader_dict.update({loader_id: loader})

        return loader_dict
示例#20
0
class ExplainParser(_SummaryParser):
    """The summary file parser."""
    def __init__(self, summary_dir):
        super(ExplainParser, self).__init__(summary_dir)
        self._latest_filename = ''

    def parse_explain(self, filenames):
        """
        Load summary file and parse file content.

        Args:
            filenames (list[str]): File name list.
        Returns:
            bool, True if all the summary files are finished loading.
        """
        summary_files = self.sort_files(filenames)

        is_end = False
        is_clean = False
        event_data = {}
        filename = summary_files[-1]

        file_path = FileHandler.join(self._summary_dir, filename)
        if filename != self._latest_filename:
            self._summary_file_handler = FileHandler(file_path, 'rb')
            self._latest_filename = filename
            self._latest_file_size = 0
            is_clean = True

        new_size = FileHandler.file_stat(file_path).size
        if new_size == self._latest_file_size:
            is_end = True
            return is_clean, is_end, event_data

        while True:
            start_offset = self._summary_file_handler.offset
            try:
                event_str = self.event_load(self._summary_file_handler)
                if event_str is None:
                    self._summary_file_handler.reset_offset(start_offset)
                    is_end = True
                    return is_clean, is_end, event_data
                if len(event_str) > MAX_EVENT_STRING:
                    logger.warning(
                        "file_path: %s, event string: %d exceeds %d and drop it.",
                        self._summary_file_handler.file_path, len(event_str),
                        MAX_EVENT_STRING)
                    continue

                field_list, tensor_value_list = self._event_decode(event_str)
                for field, tensor_value in zip(field_list, tensor_value_list):
                    event_data[field] = tensor_value
                logger.info("Parse summary file offset %d, file path: %s.",
                            self._summary_file_handler.offset, file_path)
                return is_clean, is_end, event_data

            except (exceptions.CRCFailedError,
                    exceptions.CRCLengthFailedError) as ex:
                self._summary_file_handler.reset_offset(start_offset)
                is_end = True
                logger.warning(
                    "Check crc failed and ignore this file, file_path=%s, offset=%s. Detail: %r.",
                    self._summary_file_handler.file_path,
                    self._summary_file_handler.offset, str(ex))
                return is_clean, is_end, event_data
            except (OSError, DecodeError,
                    exceptions.MindInsightException) as ex:
                is_end = True
                logger.warning(
                    "Parse log file fail, and ignore this file, detail: %r,"
                    "file path: %s.", str(ex),
                    self._summary_file_handler.file_path)
                return is_clean, is_end, event_data
            except Exception as ex:
                logger.exception(ex)
                raise UnknownError(str(ex))

    @staticmethod
    def _event_decode(event_str):
        """
        Transform `Event` data to tensor_event and update it to EventsData.

        Args:
            event_str (str): Message event string in summary proto, data read from file handler.
        """

        logger.debug("Start to parse event string. Event string len: %s.",
                     len(event_str))
        event = summary_pb2.Event.FromString(event_str)
        logger.debug("Deserialize event string completed.")

        fields = {
            'sample_id': ExplainFieldsEnum.SAMPLE_ID,
            'benchmark': ExplainFieldsEnum.BENCHMARK,
            'metadata': ExplainFieldsEnum.METADATA
        }

        tensor_event_value = getattr(event, 'explain')

        field_list = []
        tensor_value_list = []
        for field in fields:
            if not getattr(tensor_event_value, field, False):
                continue

            if ExplainFieldsEnum.METADATA.value == field and not tensor_event_value.metadata.label:
                continue

            tensor_value = None
            if field == ExplainFieldsEnum.SAMPLE_ID.value:
                tensor_value = ExplainParser._add_image_data(
                    tensor_event_value)
            elif field == ExplainFieldsEnum.BENCHMARK.value:
                tensor_value = ExplainParser._add_benchmark(tensor_event_value)
            elif field == ExplainFieldsEnum.METADATA.value:
                tensor_value = ExplainParser._add_metadata(tensor_event_value)
            logger.debug("Event generated, label is %s, step is %s.", field,
                         event.step)
            field_list.append(field)
            tensor_value_list.append(tensor_value)
        return field_list, tensor_value_list

    @staticmethod
    def _add_image_data(tensor_event_value):
        """
        Parse image data based on sample_id in Explain message

        Args:
            tensor_event_value: the object of Explain message
        """
        inference = InferfenceContainer(
            ground_truth_prob=tensor_event_value.inference.ground_truth_prob,
            ground_truth_prob_sd=tensor_event_value.inference.
            ground_truth_prob_sd,
            ground_truth_prob_itl95_low=tensor_event_value.inference.
            ground_truth_prob_itl95_low,
            ground_truth_prob_itl95_hi=tensor_event_value.inference.
            ground_truth_prob_itl95_hi,
            predicted_label=tensor_event_value.inference.predicted_label,
            predicted_prob=tensor_event_value.inference.predicted_prob,
            predicted_prob_sd=tensor_event_value.inference.predicted_prob_sd,
            predicted_prob_itl95_low=tensor_event_value.inference.
            predicted_prob_itl95_low,
            predicted_prob_itl95_hi=tensor_event_value.inference.
            predicted_prob_itl95_hi)
        sample_data = SampleContainer(
            sample_id=tensor_event_value.sample_id,
            image_path=tensor_event_value.image_path,
            ground_truth_label=tensor_event_value.ground_truth_label,
            inference=inference,
            explanation=tensor_event_value.explanation,
            status=tensor_event_value.status)
        return sample_data

    @staticmethod
    def _add_benchmark(tensor_event_value):
        """
        Parse benchmark data from Explain message.

        Args:
            tensor_event_value: the object of Explain message

        Returns:
            benchmark_data: An object containing benchmark.
        """
        benchmark_data = BenchmarkContainer(
            benchmark=tensor_event_value.benchmark,
            status=tensor_event_value.status)

        return benchmark_data

    @staticmethod
    def _add_metadata(tensor_event_value):
        """
        Parse  metadata from Explain message.

        Args:
            tensor_event_value: the object of Explain message

        Returns:
            benchmark_data: An object containing metadata.
        """
        metadata_value = MetadataContainer(
            metadata=tensor_event_value.metadata,
            status=tensor_event_value.status)

        return metadata_value
示例#21
0
    def parse_explain(self, filenames):
        """
        Load summary file and parse file content.

        Args:
            filenames (list[str]): File name list.
        Returns:
            bool, True if all the summary files are finished loading.
        """
        summary_files = self.sort_files(filenames)

        is_end = False
        is_clean = False
        event_data = {}
        filename = summary_files[-1]

        file_path = FileHandler.join(self._summary_dir, filename)
        if filename != self._latest_filename:
            self._summary_file_handler = FileHandler(file_path, 'rb')
            self._latest_filename = filename
            self._latest_file_size = 0
            is_clean = True

        new_size = FileHandler.file_stat(file_path).size
        if new_size == self._latest_file_size:
            is_end = True
            return is_clean, is_end, event_data

        while True:
            start_offset = self._summary_file_handler.offset
            try:
                event_str = self.event_load(self._summary_file_handler)
                if event_str is None:
                    self._summary_file_handler.reset_offset(start_offset)
                    is_end = True
                    return is_clean, is_end, event_data
                if len(event_str) > MAX_EVENT_STRING:
                    logger.warning(
                        "file_path: %s, event string: %d exceeds %d and drop it.",
                        self._summary_file_handler.file_path, len(event_str),
                        MAX_EVENT_STRING)
                    continue

                field_list, tensor_value_list = self._event_decode(event_str)
                for field, tensor_value in zip(field_list, tensor_value_list):
                    event_data[field] = tensor_value
                logger.info("Parse summary file offset %d, file path: %s.",
                            self._summary_file_handler.offset, file_path)
                return is_clean, is_end, event_data

            except (exceptions.CRCFailedError,
                    exceptions.CRCLengthFailedError) as ex:
                self._summary_file_handler.reset_offset(start_offset)
                is_end = True
                logger.warning(
                    "Check crc failed and ignore this file, file_path=%s, offset=%s. Detail: %r.",
                    self._summary_file_handler.file_path,
                    self._summary_file_handler.offset, str(ex))
                return is_clean, is_end, event_data
            except (OSError, DecodeError,
                    exceptions.MindInsightException) as ex:
                is_end = True
                logger.warning(
                    "Parse log file fail, and ignore this file, detail: %r,"
                    "file path: %s.", str(ex),
                    self._summary_file_handler.file_path)
                return is_clean, is_end, event_data
            except Exception as ex:
                logger.exception(ex)
                raise UnknownError(str(ex))
示例#22
0
    def list_events(self, filenames):
        """
        Load summary file and parse file content.

        Args:
            filenames (list[str]): File name list.

        Returns:
            tuple, the elements of the tuple are:

                - file_changed (bool): True if the latest file is changed.
                - is_end (bool): True if all the summary files are finished loading.
                - event_data (dict): Event data where keys are explanation field.
        """
        summary_files = self.sort_files(filenames)

        is_end = False
        file_changed = False
        event_data = {}
        filename = summary_files[-1]

        file_path = FileHandler.join(self._summary_dir, filename)
        if filename != self._latest_filename:
            self._summary_file_handler = FileHandler(file_path, 'rb')
            self._latest_filename = filename
            self._latest_offset = 0
            file_changed = True

        new_size = FileHandler.file_stat(file_path).size
        if new_size == self._latest_offset:
            is_end = True
            return file_changed, is_end, event_data

        while True:
            start_offset = self._summary_file_handler.offset
            try:
                event_str = self.event_load(self._summary_file_handler)
                if event_str is None:
                    self._summary_file_handler.reset_offset(start_offset)
                    is_end = True
                    return file_changed, is_end, event_data
                if len(event_str) > MAX_EVENT_STRING:
                    logger.warning(
                        "file_path: %s, event string: %d exceeds %d and drop it.",
                        self._summary_file_handler.file_path, len(event_str),
                        MAX_EVENT_STRING)
                    continue

                field_list, tensor_value_list = self._event_decode(event_str)
                for field, tensor_value in zip(field_list, tensor_value_list):
                    event_data[field] = tensor_value

                logger.debug("Parse summary file offset %d, file path: %s.",
                             self._summary_file_handler.offset, file_path)
                return file_changed, is_end, event_data
            except exceptions.CRCLengthFailedError as ex:
                self._summary_file_handler.reset_offset(start_offset)
                is_end = True
                logger.warning(
                    "Check crc failed and reset offset, file_path=%s, offset=%s. Detail: %r.",
                    self._summary_file_handler.file_path,
                    self._summary_file_handler.offset, str(ex))
                return file_changed, is_end, event_data
            except Exception as ex:
                # Note: If an unknown error occurs, we will set the offset to the end of this file,
                # which is equivalent to stopping parsing this file. We do not delete the current job
                # and retain the data that has been successfully parsed.
                self._summary_file_handler.reset_offset(new_size)

                # Notice: If the current job is the latest one in the loader pool and the job is deleted,
                # the job goes into an infinite cycle of load-fail-delete-reload-load-fail-delete.
                # We need to prevent this infinite loop.
                logger.error(
                    "Parse summary file failed, will set offset to the file end. file_path: %s, "
                    "offset: %d, detail: %s.", file_path,
                    self._summary_file_handler.offset, str(ex))
                logger.exception(ex)
                raise UnknownError(str(ex))
            finally:
                self._latest_offset = self._summary_file_handler.offset
示例#23
0
class ExplainParser(_SummaryParser):
    """The summary file parser."""
    def __init__(self, summary_dir):
        super(ExplainParser, self).__init__(summary_dir)
        self._latest_offset = 0

    def list_events(self, filenames):
        """
        Load summary file and parse file content.

        Args:
            filenames (list[str]): File name list.

        Returns:
            tuple, the elements of the tuple are:

                - file_changed (bool): True if the latest file is changed.
                - is_end (bool): True if all the summary files are finished loading.
                - event_data (dict): Event data where keys are explanation field.
        """
        summary_files = self.sort_files(filenames)

        is_end = False
        file_changed = False
        event_data = {}
        filename = summary_files[-1]

        file_path = FileHandler.join(self._summary_dir, filename)
        if filename != self._latest_filename:
            self._summary_file_handler = FileHandler(file_path, 'rb')
            self._latest_filename = filename
            self._latest_offset = 0
            file_changed = True

        new_size = FileHandler.file_stat(file_path).size
        if new_size == self._latest_offset:
            is_end = True
            return file_changed, is_end, event_data

        while True:
            start_offset = self._summary_file_handler.offset
            try:
                event_str = self.event_load(self._summary_file_handler)
                if event_str is None:
                    self._summary_file_handler.reset_offset(start_offset)
                    is_end = True
                    return file_changed, is_end, event_data
                if len(event_str) > MAX_EVENT_STRING:
                    logger.warning(
                        "file_path: %s, event string: %d exceeds %d and drop it.",
                        self._summary_file_handler.file_path, len(event_str),
                        MAX_EVENT_STRING)
                    continue

                field_list, tensor_value_list = self._event_decode(event_str)
                for field, tensor_value in zip(field_list, tensor_value_list):
                    event_data[field] = tensor_value

                logger.debug("Parse summary file offset %d, file path: %s.",
                             self._summary_file_handler.offset, file_path)
                return file_changed, is_end, event_data
            except exceptions.CRCLengthFailedError as ex:
                self._summary_file_handler.reset_offset(start_offset)
                is_end = True
                logger.warning(
                    "Check crc failed and reset offset, file_path=%s, offset=%s. Detail: %r.",
                    self._summary_file_handler.file_path,
                    self._summary_file_handler.offset, str(ex))
                return file_changed, is_end, event_data
            except Exception as ex:
                # Note: If an unknown error occurs, we will set the offset to the end of this file,
                # which is equivalent to stopping parsing this file. We do not delete the current job
                # and retain the data that has been successfully parsed.
                self._summary_file_handler.reset_offset(new_size)

                # Notice: If the current job is the latest one in the loader pool and the job is deleted,
                # the job goes into an infinite cycle of load-fail-delete-reload-load-fail-delete.
                # We need to prevent this infinite loop.
                logger.error(
                    "Parse summary file failed, will set offset to the file end. file_path: %s, "
                    "offset: %d, detail: %s.", file_path,
                    self._summary_file_handler.offset, str(ex))
                logger.exception(ex)
                raise UnknownError(str(ex))
            finally:
                self._latest_offset = self._summary_file_handler.offset

    @staticmethod
    def _event_decode(event_str):
        """
        Transform `Event` data to tensor_event and update it to EventsData.

        Args:
            event_str (str): Message event string in summary proto, data read from file handler.

        Returns:
            tuple, the elements of the result tuple are:

                - field_list (list): Explain fields to be parsed.
                - tensor_value_list (list): Parsed data with respect to the field list.
        """

        logger.debug("Start to parse event string. Event string len: %s.",
                     len(event_str))
        event = xai_pb2.Event.FromString(event_str)
        logger.debug("Deserialize event string completed.")

        fields = {
            'sample_id': ExplainFieldsEnum.SAMPLE_ID,
            'benchmark': ExplainFieldsEnum.BENCHMARK,
            'metadata': ExplainFieldsEnum.METADATA
        }

        tensor_event_value = getattr(event, 'explain')

        field_list = []
        tensor_value_list = []
        for field in fields:
            if getattr(tensor_event_value, field, None) is None:
                continue

            if ExplainFieldsEnum.METADATA.value == field and not tensor_event_value.metadata.label:
                continue

            tensor_value = None
            if field == ExplainFieldsEnum.SAMPLE_ID.value:
                tensor_value = ExplainParser._add_image_data(
                    tensor_event_value)
            elif field == ExplainFieldsEnum.BENCHMARK.value:
                tensor_value = ExplainParser._add_benchmark(tensor_event_value)
            elif field == ExplainFieldsEnum.METADATA.value:
                tensor_value = ExplainParser._add_metadata(tensor_event_value)
            logger.debug("Event generated, label is %s, step is %s.", field,
                         event.step)
            field_list.append(field)
            tensor_value_list.append(tensor_value)
        return field_list, tensor_value_list

    @staticmethod
    def _add_image_data(tensor_event_value):
        """
        Parse image data based on sample_id in Explain message.

        Args:
            tensor_event_value (Event): The object of Explain message.

        Returns:
            SampleContainer, a named tuple containing sample data.
        """
        inference = InferfenceContainer(
            ground_truth_prob=tensor_event_value.inference.ground_truth_prob,
            ground_truth_prob_sd=tensor_event_value.inference.
            ground_truth_prob_sd,
            ground_truth_prob_itl95_low=tensor_event_value.inference.
            ground_truth_prob_itl95_low,
            ground_truth_prob_itl95_hi=tensor_event_value.inference.
            ground_truth_prob_itl95_hi,
            predicted_label=tensor_event_value.inference.predicted_label,
            predicted_prob=tensor_event_value.inference.predicted_prob,
            predicted_prob_sd=tensor_event_value.inference.predicted_prob_sd,
            predicted_prob_itl95_low=tensor_event_value.inference.
            predicted_prob_itl95_low,
            predicted_prob_itl95_hi=tensor_event_value.inference.
            predicted_prob_itl95_hi)
        sample_data = SampleContainer(
            sample_id=tensor_event_value.sample_id,
            image_path=tensor_event_value.image_path,
            ground_truth_label=tensor_event_value.ground_truth_label,
            inference=inference,
            explanation=tensor_event_value.explanation,
            hierarchical_occlusion=tensor_event_value.hoc,
            status=tensor_event_value.status)
        return sample_data

    @staticmethod
    def _add_benchmark(tensor_event_value):
        """
        Parse benchmark data from Explain message.

        Args:
            tensor_event_value (Event): The object of Explain message.

        Returns:
            BenchmarkContainer, a named tuple containing benchmark data.
        """
        benchmark_data = BenchmarkContainer(
            benchmark=tensor_event_value.benchmark,
            status=tensor_event_value.status)

        return benchmark_data

    @staticmethod
    def _add_metadata(tensor_event_value):
        """
        Parse  metadata from Explain message.

        Args:
            tensor_event_value (Event): The object of Explain message.

        Returns:
            MetadataContainer, a named tuple containing benchmark data.
        """
        metadata_value = MetadataContainer(
            metadata=tensor_event_value.metadata,
            status=tensor_event_value.status)

        return metadata_value
示例#24
0
def abc():
    FileHandler.is_file('aaa')
    print('after')