Пример #1
0
    def is_summary_directory(self, summary_base_dir, relative_path):
        """
        Check if the given summary directory is valid.

        Args:
            summary_base_dir (str): Path of summary base directory.
            relative_path (str): Relative path of summary directory, referring to summary base directory,
                                starting with "./" .

        Returns:
            bool, indicates if the given summary directory is valid.

        Examples:
            >>> from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher
            >>> summary_watcher = SummaryWatcher()
            >>> summaries = summary_watcher.is_summary_directory('/summary/base/dir', './job-01')
        """
        if contains_null_byte(summary_base_dir=summary_base_dir, relative_path=relative_path):
            return False

        if not self._is_valid_summary_directory(summary_base_dir, relative_path):
            return False

        summary_directory = os.path.realpath(os.path.join(summary_base_dir, relative_path))
        try:
            entries = os.scandir(summary_directory)
        except PermissionError:
            logger.error('Path of summary base directory is not accessible.')
            raise FileSystemPermissionError('Path of summary base directory is not accessible.')

        for entry in entries:
            if entry.is_symlink():
                continue

            summary_pattern = re.search(self.SUMMARY_FILENAME_REGEX, entry.name)
            if summary_pattern is not None and entry.is_file():
                return True

            pb_pattern = re.search(self.PB_FILENAME_REGEX, entry.name)
            if pb_pattern is not None and entry.is_file():
                return True

            profiler_pattern = re.search(self.PROFILER_DIRECTORY_REGEX, entry.name)
            if profiler_pattern is not None and entry.is_dir():
                full_path = os.path.realpath(os.path.join(summary_directory, entry.name))
                if self._is_valid_profiler_directory(full_path)[0]:
                    return True

        return False
Пример #2
0
    def _validate_config(self, config_path):
        """Check config_path."""
        config_path = self._normalize_path("config_path", config_path)
        try:
            with open(config_path, "r") as file:
                config_info = yaml.safe_load(file)
        except PermissionError as exc:
            raise FileSystemPermissionError(
                "Can not open config file. Detail: %s." % str(exc))
        except Exception as exc:
            raise UnknownError("Detail: %s." % str(exc))

        self._validate_config_schema(config_info)
        config_info['summary_base_dir'] = self._normalize_path(
            "summary_base_dir", config_info.get('summary_base_dir'))
        self._make_summary_base_dir(config_info['summary_base_dir'])
        return config_info
Пример #3
0
    def list_summary_directories(self, summary_base_dir, overall=True):
        """
        List summary directories within base directory.

        Args:
            summary_base_dir (str): Path of summary base directory.
            overall (bool): Limit the total num of scanning if overall is False.

        Returns:
            list, list of summary directory info, each of which including the following attributes.
                - relative_path (str): Relative path of summary directory, referring to settings.SUMMARY_BASE_DIR,
                                        starting with "./".
                - create_time (datetime): Creation time of summary file.
                - update_time (datetime): Modification time of summary file.
                - profiler (dict): profiler info, including profiler subdirectory path, profiler creation time and
                                    profiler modification time.

        Examples:
            >>> from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher
            >>> summary_watcher = SummaryWatcher()
            >>> directories = summary_watcher.list_summary_directories('/summary/base/dir')
        """
        if contains_null_byte(summary_base_dir=summary_base_dir):
            return []

        relative_path = os.path.join('.', '')
        if not self._is_valid_summary_directory(summary_base_dir,
                                                relative_path):
            return []

        summary_dict = {}
        counter = Counter(max_count=None if overall else self.MAX_SCAN_COUNT)

        try:
            entries = os.scandir(summary_base_dir)
        except PermissionError:
            logger.error('Path of summary base directory is not accessible.')
            raise FileSystemPermissionError(
                'Path of summary base directory is not accessible.')

        for entry in entries:
            if len(summary_dict) == self.MAX_SUMMARY_DIR_COUNT:
                break
            try:
                counter.add()
            except MaxCountExceededError:
                logger.info(
                    'Stop further scanning due to overall is False and '
                    'number of scanned files exceeds upper limit.')
                break
            if entry.is_symlink():
                pass
            elif entry.is_file():
                self._update_summary_dict(summary_dict, summary_base_dir,
                                          relative_path, entry)
            elif entry.is_dir():
                entry_path = os.path.realpath(
                    os.path.join(summary_base_dir, entry.name))
                self._scan_subdir_entries(summary_dict, summary_base_dir,
                                          entry_path, entry.name, counter)

        directories = []
        for key, value in summary_dict.items():
            directory = {
                'relative_path': key,
                'profiler': None,
                'create_time': value['ctime'],
                'update_time': value['mtime'],
            }
            profiler = value.get('profiler')
            if profiler is not None:
                directory['profiler'] = {
                    'directory': profiler['directory'],
                    'create_time': profiler['ctime'],
                    'update_time': profiler['mtime'],
                }
            directories.append(directory)

        # sort by update time in descending order and relative path in ascending order
        directories.sort(key=lambda x: (-int(x['update_time'].timestamp()), x[
            'relative_path']))

        return directories
Пример #4
0
    def list_summaries(self, summary_base_dir, relative_path='./'):
        """
        Get info of latest summary file within the given summary directory.

        Args:
            summary_base_dir (str): Path of summary base directory.
            relative_path (str): Relative path of summary directory, referring to summary base directory,
                                starting with "./" .

        Returns:
            list, list of summary file including the following attributes.
                - file_name (str): Summary file name.
                - create_time (datetime): Creation time of summary file.
                - update_time (datetime): Modification time of summary file.

        Examples:
            >>> from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher
            >>> summary_watcher = SummaryWatcher()
            >>> summaries = summary_watcher.list_summaries('/summary/base/dir', './job-01')
        """
        if contains_null_byte(summary_base_dir=summary_base_dir,
                              relative_path=relative_path):
            return []

        if not self._is_valid_summary_directory(summary_base_dir,
                                                relative_path):
            return []

        summaries = []
        summary_directory = os.path.realpath(
            os.path.join(summary_base_dir, relative_path))
        try:
            entries = os.scandir(summary_directory)
        except PermissionError:
            logger.error('Path of summary directory is not accessible.')
            raise FileSystemPermissionError(
                'Path of summary directory is not accessible.')

        for entry in entries:
            if entry.is_symlink() or not entry.is_file():
                continue

            pattern = re.search(self.SUMMARY_FILENAME_REGEX, entry.name)
            if pattern is None:
                continue

            timestamp = int(pattern.groupdict().get('timestamp'))
            try:
                # extract created time from filename
                ctime = datetime.datetime.fromtimestamp(timestamp).astimezone()
            except OverflowError:
                continue

            try:
                stat = entry.stat()
            except FileNotFoundError:
                logger.warning('File %s not found.', entry.name)
                continue

            mtime = datetime.datetime.fromtimestamp(stat.st_mtime).astimezone()

            summaries.append({
                'file_name': entry.name,
                'create_time': ctime,
                'update_time': mtime,
            })

        # sort by update time in descending order and filename in ascending order
        summaries.sort(
            key=lambda x: (-int(x['update_time'].timestamp()), x['file_name']))

        return summaries
Пример #5
0
    def query_image_binary(self, train_id, image_path, image_type):
        """
        Query image binary content.

        Args:
            train_id (str): Job ID.
            image_path (str): Image path relative to explain job's summary directory.
            image_type (str): Image type, 'original' or 'overlay'.

        Returns:
            bytes, image binary.
        """

        abs_image_path = os.path.join(self.job_manager.summary_base_dir,
                                      _clean_train_id_b4_join(train_id),
                                      image_path)

        if self._is_forbidden(abs_image_path):
            raise FileSystemPermissionError("Forbidden.")

        try:

            if image_type != "overlay":
                # no need to convert
                with open(abs_image_path, "rb") as fp:
                    return fp.read()

            image = Image.open(abs_image_path)

            if image.mode == _RGBA_MODE:
                # It is RGBA already, do not convert.
                with open(abs_image_path, "rb") as fp:
                    return fp.read()

        except FileNotFoundError:
            raise ImageNotExistError(
                f"train_id:{train_id} path:{image_path} type:{image_type}")
        except PermissionError:
            raise FileSystemPermissionError(
                f"train_id:{train_id} path:{image_path} type:{image_type}")
        except OSError:
            raise UnknownError(
                f"Invalid image file: train_id:{train_id} path:{image_path} type:{image_type}"
            )

        if image.mode == _SINGLE_CHANNEL_MODE:
            saliency = np.asarray(image) / _UINT8_MAX
        elif image.mode == _RGB_MODE:
            saliency = np.asarray(image)
            saliency = saliency[:, :, 0] / _UINT8_MAX
        else:
            raise UnknownError(f"Invalid overlay image mode:{image.mode}.")

        saliency_stack = np.empty((saliency.shape[0], saliency.shape[1], 4))
        for c in range(3):
            saliency_stack[:, :, c] = saliency
        rgba = saliency_stack * _SALIENCY_CMAP_HI
        rgba += (1 - saliency_stack) * _SALIENCY_CMAP_LOW
        rgba[:, :, 3] = saliency * _UINT8_MAX

        overlay = Image.fromarray(np.uint8(rgba), mode=_RGBA_MODE)
        buffer = io.BytesIO()
        overlay.save(buffer, format=_PNG_FORMAT)

        return buffer.getvalue()
Пример #6
0
    def list_summary_directories(self, summary_base_dir, overall=True):
        """
        List summary directories within base directory.

        Args:
            summary_base_dir (str): Path of summary base directory.

        Returns:
            list, list of summary directory info, each of which including the following attributes.
                - relative_path (str): Relative path of summary directory, referring to settings.SUMMARY_BASE_DIR,
                                        starting with "./".
                - create_time (datetime): Creation time of summary file.
                - update_time (datetime): Modification time of summary file.

        Examples:
            >>> from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher
            >>> summary_watcher = SummaryWatcher()
            >>> directories = summary_watcher.list_summary_directories('/summary/base/dir')
        """
        if self._contains_null_byte(summary_base_dir=summary_base_dir):
            return []

        if not os.path.exists(summary_base_dir):
            logger.warning('Path of summary base directory not exists.')
            return []

        if not os.path.isdir(summary_base_dir):
            logger.warning(
                'Path of summary base directory is not a valid directory.')
            return []

        summary_dict = {}
        scan_count = 0

        try:
            entries = os.scandir(summary_base_dir)
        except PermissionError:
            logger.error('Path of summary base directory is not accessible.')
            raise FileSystemPermissionError(
                'Path of summary base directory is not accessible.')

        for entry in entries:
            if len(summary_dict) == self.MAX_SUMMARY_DIR_COUNT:
                break
            relative_path = os.path.join('.', '')
            if entry.is_symlink():
                pass
            elif entry.is_file():
                self._update_summary_dict(summary_dict, relative_path, entry)
            elif entry.is_dir():
                full_path = os.path.realpath(
                    os.path.join(summary_base_dir, entry.name))

                try:
                    subdir_entries = os.scandir(full_path)
                except PermissionError:
                    logger.warning(
                        'Path of %s under summary base directory is not accessible.',
                        entry.name)
                else:
                    for subdir_entry in subdir_entries:
                        if len(summary_dict) == self.MAX_SUMMARY_DIR_COUNT:
                            break
                        subdir_relative_path = os.path.join('.', entry.name)
                        if subdir_entry.is_symlink():
                            pass
                        elif subdir_entry.is_file():
                            self._update_summary_dict(summary_dict,
                                                      subdir_relative_path,
                                                      subdir_entry)

                        scan_count += 1
                        if not overall and scan_count >= self.MAX_SCAN_COUNT:
                            break

            scan_count += 1
            if not overall and scan_count >= self.MAX_SCAN_COUNT:
                logger.info(
                    'Stop further scanning due to overall is False and '
                    'number of scanned files exceeds upper limit.')
                break

        directories = [{
            'relative_path': key,
            'create_time': value['ctime'],
            'update_time': value['mtime'],
        } for key, value in summary_dict.items()]

        # sort by update time in descending order and relative path in ascending order
        directories.sort(key=lambda x: (-int(x['update_time'].timestamp()), x[
            'relative_path']))

        return directories