Пример #1
0
    def get_plugins(self, train_id, manual_update=True):
        """
        Queries the plug-in data for the specified training job

        Args:
            train_id (str): Specify a training job to query.
            manual_update (bool): Specifies whether to refresh automatically.

        Returns:
            dict, refer to restful api.
        """
        Validation.check_param_empty(train_id=train_id)
        if contains_null_byte(train_id=train_id):
            raise QueryStringContainsNullByteError("train job id: {} contains null byte.".format(train_id))

        if manual_update:
            self._data_manager.cache_train_job(train_id)

        train_job = self._data_manager.get_train_job(train_id)

        try:
            data_visual_content = train_job.get_detail(DATAVISUAL_CACHE_KEY)
            plugins = data_visual_content.get(DATAVISUAL_PLUGIN_KEY)
        except exceptions.TrainJobDetailNotInCacheError:
            plugins = []

        if not plugins:
            default_result = dict()
            for plugin_name in PluginNameEnum.list_members():
                default_result.update({plugin_name: list()})
            return dict(plugins=default_result)

        return dict(
            plugins=plugins
        )
Пример #2
0
    def is_summary_directory(self, summary_base_dir, relative_path):
        """
        Check if the given summary directory is valid.

        Args:
            summary_base_dir (str): Path of summary base directory.
            relative_path (str): Relative path of summary directory, referring to summary base directory,
                                starting with "./" .

        Returns:
            bool, indicates if the given summary directory is valid.

        Examples:
            >>> from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher
            >>> summary_watcher = SummaryWatcher()
            >>> summaries = summary_watcher.is_summary_directory('/summary/base/dir', './job-01')
        """
        if contains_null_byte(summary_base_dir=summary_base_dir,
                              relative_path=relative_path):
            return False

        if not self._is_valid_summary_directory(summary_base_dir,
                                                relative_path):
            return False

        summary_directory = os.path.realpath(
            os.path.join(summary_base_dir, relative_path))
        try:
            entries = os.scandir(summary_directory)
        except PermissionError:
            logger.error('Path of summary base directory is not accessible.')
            raise FileSystemPermissionError(
                'Path of summary base directory is not accessible.')

        for entry in entries:
            if entry.is_symlink():
                continue

            summary_pattern = re.search(self.SUMMARY_FILENAME_REGEX,
                                        entry.name)
            if summary_pattern is not None and entry.is_file():
                return True

            pb_pattern = re.search(self.PB_FILENAME_REGEX, entry.name)
            if pb_pattern is not None and entry.is_file():
                return True

            if entry.is_dir():
                profiler_pattern = re.search(self.PROFILER_DIRECTORY_REGEX,
                                             entry.name)
                cluster_profiler_pattern = re.search(
                    self.CLUSTER_PROFILER_DIRECTORY_REGEX, entry.name)
                if profiler_pattern is not None or cluster_profiler_pattern is not None:
                    full_path = os.path.realpath(
                        os.path.join(summary_directory, entry.name))
                    if self._is_valid_profiler_directory(full_path)[0] or \
                            self._is_valid_cluster_profiler_directory(full_path)[0]:
                        return True
        return False
Пример #3
0
    def list_summary_directories(self, summary_base_dir, overall=True):
        """
        List summary directories within base directory.

        Args:
            summary_base_dir (str): Path of summary base directory.
            overall (bool): Limit the total num of scanning if overall is False.

        Returns:
            list, list of summary directory info, each of which including the following attributes.
                - relative_path (str): Relative path of summary directory, referring to settings.SUMMARY_BASE_DIR,
                                        starting with "./".
                - create_time (datetime): Creation time of summary file.
                - update_time (datetime): Modification time of summary file.
                - profiler (dict): profiler info, including profiler subdirectory path, profiler creation time and
                                    profiler modification time.

        Examples:
            >>> from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher
            >>> summary_watcher = SummaryWatcher()
            >>> directories = summary_watcher.list_summary_directories('/summary/base/dir')
        """
        if contains_null_byte(summary_base_dir=summary_base_dir):
            return []

        relative_path = os.path.join('.', '')
        if not self._is_valid_summary_directory(summary_base_dir,
                                                relative_path):
            return []

        summary_dict = {}
        counter = Counter(max_count=None if overall else self.MAX_SCAN_COUNT)

        try:
            entries = os.scandir(summary_base_dir)
        except PermissionError:
            logger.error('Path of summary base directory is not accessible.')
            raise FileSystemPermissionError(
                'Path of summary base directory is not accessible.')

        for entry in entries:
            if len(summary_dict) == self.MAX_SUMMARY_DIR_COUNT:
                break
            try:
                counter.add()
            except MaxCountExceededError:
                logger.info(
                    'Stop further scanning due to overall is False and '
                    'number of scanned files exceeds upper limit.')
                break
            if entry.is_symlink():
                pass
            elif entry.is_file():
                self._update_summary_dict(summary_dict, summary_base_dir,
                                          relative_path, entry)
            elif entry.is_dir():
                entry_path = os.path.realpath(
                    os.path.join(summary_base_dir, entry.name))
                self._scan_subdir_entries(summary_dict, summary_base_dir,
                                          entry_path, entry.name, counter)

        directories = []
        for key, value in summary_dict.items():
            directory = {
                'relative_path': key,
                'profiler': None,
                'create_time': value['ctime'],
                'update_time': value['mtime'],
            }
            profiler = value.get('profiler')
            if profiler is not None:
                directory['profiler'] = {
                    'directory': profiler['directory'],
                    'create_time': profiler['ctime'],
                    'update_time': profiler['mtime'],
                }
            directories.append(directory)

        # sort by update time in descending order and relative path in ascending order
        directories.sort(key=lambda x: (-int(x['update_time'].timestamp()), x[
            'relative_path']))

        return directories
Пример #4
0
    def list_summaries(self, summary_base_dir, relative_path='./'):
        """
        Get info of latest summary file within the given summary directory.

        Args:
            summary_base_dir (str): Path of summary base directory.
            relative_path (str): Relative path of summary directory, referring to summary base directory,
                                starting with "./" .

        Returns:
            list, list of summary file including the following attributes.
                - file_name (str): Summary file name.
                - create_time (datetime): Creation time of summary file.
                - update_time (datetime): Modification time of summary file.

        Examples:
            >>> from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher
            >>> summary_watcher = SummaryWatcher()
            >>> summaries = summary_watcher.list_summaries('/summary/base/dir', './job-01')
        """
        if contains_null_byte(summary_base_dir=summary_base_dir,
                              relative_path=relative_path):
            return []

        if not self._is_valid_summary_directory(summary_base_dir,
                                                relative_path):
            return []

        summaries = []
        summary_directory = os.path.realpath(
            os.path.join(summary_base_dir, relative_path))
        try:
            entries = os.scandir(summary_directory)
        except PermissionError:
            logger.error('Path of summary directory is not accessible.')
            raise FileSystemPermissionError(
                'Path of summary directory is not accessible.')

        for entry in entries:
            if entry.is_symlink() or not entry.is_file():
                continue

            pattern = re.search(self.SUMMARY_FILENAME_REGEX, entry.name)
            if pattern is None:
                continue

            timestamp = int(pattern.groupdict().get('timestamp'))
            try:
                # extract created time from filename
                ctime = datetime.datetime.fromtimestamp(timestamp).astimezone()
            except OverflowError:
                continue

            try:
                stat = entry.stat()
            except FileNotFoundError:
                logger.warning('File %s not found.', entry.name)
                continue

            mtime = datetime.datetime.fromtimestamp(stat.st_mtime).astimezone()

            summaries.append({
                'file_name': entry.name,
                'create_time': ctime,
                'update_time': mtime,
            })

        # sort by update time in descending order and filename in ascending order
        summaries.sort(
            key=lambda x: (-int(x['update_time'].timestamp()), x['file_name']))

        return summaries