def is_summary_directory(self, summary_base_dir, relative_path): """ Check if the given summary directory is valid. Args: summary_base_dir (str): Path of summary base directory. relative_path (str): Relative path of summary directory, referring to summary base directory, starting with "./" . Returns: bool, indicates if the given summary directory is valid. Examples: >>> from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher >>> summary_watcher = SummaryWatcher() >>> summaries = summary_watcher.is_summary_directory('/summary/base/dir', './job-01') """ if contains_null_byte(summary_base_dir=summary_base_dir, relative_path=relative_path): return False if not self._is_valid_summary_directory(summary_base_dir, relative_path): return False summary_directory = os.path.realpath(os.path.join(summary_base_dir, relative_path)) try: entries = os.scandir(summary_directory) except PermissionError: logger.error('Path of summary base directory is not accessible.') raise FileSystemPermissionError('Path of summary base directory is not accessible.') for entry in entries: if entry.is_symlink(): continue summary_pattern = re.search(self.SUMMARY_FILENAME_REGEX, entry.name) if summary_pattern is not None and entry.is_file(): return True pb_pattern = re.search(self.PB_FILENAME_REGEX, entry.name) if pb_pattern is not None and entry.is_file(): return True profiler_pattern = re.search(self.PROFILER_DIRECTORY_REGEX, entry.name) if profiler_pattern is not None and entry.is_dir(): full_path = os.path.realpath(os.path.join(summary_directory, entry.name)) if self._is_valid_profiler_directory(full_path)[0]: return True return False
def _validate_config(self, config_path): """Check config_path.""" config_path = self._normalize_path("config_path", config_path) try: with open(config_path, "r") as file: config_info = yaml.safe_load(file) except PermissionError as exc: raise FileSystemPermissionError( "Can not open config file. Detail: %s." % str(exc)) except Exception as exc: raise UnknownError("Detail: %s." % str(exc)) self._validate_config_schema(config_info) config_info['summary_base_dir'] = self._normalize_path( "summary_base_dir", config_info.get('summary_base_dir')) self._make_summary_base_dir(config_info['summary_base_dir']) return config_info
def list_summary_directories(self, summary_base_dir, overall=True): """ List summary directories within base directory. Args: summary_base_dir (str): Path of summary base directory. overall (bool): Limit the total num of scanning if overall is False. Returns: list, list of summary directory info, each of which including the following attributes. - relative_path (str): Relative path of summary directory, referring to settings.SUMMARY_BASE_DIR, starting with "./". - create_time (datetime): Creation time of summary file. - update_time (datetime): Modification time of summary file. - profiler (dict): profiler info, including profiler subdirectory path, profiler creation time and profiler modification time. Examples: >>> from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher >>> summary_watcher = SummaryWatcher() >>> directories = summary_watcher.list_summary_directories('/summary/base/dir') """ if contains_null_byte(summary_base_dir=summary_base_dir): return [] relative_path = os.path.join('.', '') if not self._is_valid_summary_directory(summary_base_dir, relative_path): return [] summary_dict = {} counter = Counter(max_count=None if overall else self.MAX_SCAN_COUNT) try: entries = os.scandir(summary_base_dir) except PermissionError: logger.error('Path of summary base directory is not accessible.') raise FileSystemPermissionError( 'Path of summary base directory is not accessible.') for entry in entries: if len(summary_dict) == self.MAX_SUMMARY_DIR_COUNT: break try: counter.add() except MaxCountExceededError: logger.info( 'Stop further scanning due to overall is False and ' 'number of scanned files exceeds upper limit.') break if entry.is_symlink(): pass elif entry.is_file(): self._update_summary_dict(summary_dict, summary_base_dir, relative_path, entry) elif entry.is_dir(): entry_path = os.path.realpath( os.path.join(summary_base_dir, entry.name)) self._scan_subdir_entries(summary_dict, summary_base_dir, entry_path, entry.name, counter) directories = [] for key, value in summary_dict.items(): directory = { 'relative_path': key, 'profiler': None, 'create_time': value['ctime'], 'update_time': value['mtime'], } profiler = value.get('profiler') if profiler is not None: directory['profiler'] = { 'directory': profiler['directory'], 'create_time': profiler['ctime'], 'update_time': profiler['mtime'], } directories.append(directory) # sort by update time in descending order and relative path in ascending order directories.sort(key=lambda x: (-int(x['update_time'].timestamp()), x[ 'relative_path'])) return directories
def list_summaries(self, summary_base_dir, relative_path='./'): """ Get info of latest summary file within the given summary directory. Args: summary_base_dir (str): Path of summary base directory. relative_path (str): Relative path of summary directory, referring to summary base directory, starting with "./" . Returns: list, list of summary file including the following attributes. - file_name (str): Summary file name. - create_time (datetime): Creation time of summary file. - update_time (datetime): Modification time of summary file. Examples: >>> from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher >>> summary_watcher = SummaryWatcher() >>> summaries = summary_watcher.list_summaries('/summary/base/dir', './job-01') """ if contains_null_byte(summary_base_dir=summary_base_dir, relative_path=relative_path): return [] if not self._is_valid_summary_directory(summary_base_dir, relative_path): return [] summaries = [] summary_directory = os.path.realpath( os.path.join(summary_base_dir, relative_path)) try: entries = os.scandir(summary_directory) except PermissionError: logger.error('Path of summary directory is not accessible.') raise FileSystemPermissionError( 'Path of summary directory is not accessible.') for entry in entries: if entry.is_symlink() or not entry.is_file(): continue pattern = re.search(self.SUMMARY_FILENAME_REGEX, entry.name) if pattern is None: continue timestamp = int(pattern.groupdict().get('timestamp')) try: # extract created time from filename ctime = datetime.datetime.fromtimestamp(timestamp).astimezone() except OverflowError: continue try: stat = entry.stat() except FileNotFoundError: logger.warning('File %s not found.', entry.name) continue mtime = datetime.datetime.fromtimestamp(stat.st_mtime).astimezone() summaries.append({ 'file_name': entry.name, 'create_time': ctime, 'update_time': mtime, }) # sort by update time in descending order and filename in ascending order summaries.sort( key=lambda x: (-int(x['update_time'].timestamp()), x['file_name'])) return summaries
def query_image_binary(self, train_id, image_path, image_type): """ Query image binary content. Args: train_id (str): Job ID. image_path (str): Image path relative to explain job's summary directory. image_type (str): Image type, 'original' or 'overlay'. Returns: bytes, image binary. """ abs_image_path = os.path.join(self.job_manager.summary_base_dir, _clean_train_id_b4_join(train_id), image_path) if self._is_forbidden(abs_image_path): raise FileSystemPermissionError("Forbidden.") try: if image_type != "overlay": # no need to convert with open(abs_image_path, "rb") as fp: return fp.read() image = Image.open(abs_image_path) if image.mode == _RGBA_MODE: # It is RGBA already, do not convert. with open(abs_image_path, "rb") as fp: return fp.read() except FileNotFoundError: raise ImageNotExistError( f"train_id:{train_id} path:{image_path} type:{image_type}") except PermissionError: raise FileSystemPermissionError( f"train_id:{train_id} path:{image_path} type:{image_type}") except OSError: raise UnknownError( f"Invalid image file: train_id:{train_id} path:{image_path} type:{image_type}" ) if image.mode == _SINGLE_CHANNEL_MODE: saliency = np.asarray(image) / _UINT8_MAX elif image.mode == _RGB_MODE: saliency = np.asarray(image) saliency = saliency[:, :, 0] / _UINT8_MAX else: raise UnknownError(f"Invalid overlay image mode:{image.mode}.") saliency_stack = np.empty((saliency.shape[0], saliency.shape[1], 4)) for c in range(3): saliency_stack[:, :, c] = saliency rgba = saliency_stack * _SALIENCY_CMAP_HI rgba += (1 - saliency_stack) * _SALIENCY_CMAP_LOW rgba[:, :, 3] = saliency * _UINT8_MAX overlay = Image.fromarray(np.uint8(rgba), mode=_RGBA_MODE) buffer = io.BytesIO() overlay.save(buffer, format=_PNG_FORMAT) return buffer.getvalue()
def list_summary_directories(self, summary_base_dir, overall=True): """ List summary directories within base directory. Args: summary_base_dir (str): Path of summary base directory. Returns: list, list of summary directory info, each of which including the following attributes. - relative_path (str): Relative path of summary directory, referring to settings.SUMMARY_BASE_DIR, starting with "./". - create_time (datetime): Creation time of summary file. - update_time (datetime): Modification time of summary file. Examples: >>> from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher >>> summary_watcher = SummaryWatcher() >>> directories = summary_watcher.list_summary_directories('/summary/base/dir') """ if self._contains_null_byte(summary_base_dir=summary_base_dir): return [] if not os.path.exists(summary_base_dir): logger.warning('Path of summary base directory not exists.') return [] if not os.path.isdir(summary_base_dir): logger.warning( 'Path of summary base directory is not a valid directory.') return [] summary_dict = {} scan_count = 0 try: entries = os.scandir(summary_base_dir) except PermissionError: logger.error('Path of summary base directory is not accessible.') raise FileSystemPermissionError( 'Path of summary base directory is not accessible.') for entry in entries: if len(summary_dict) == self.MAX_SUMMARY_DIR_COUNT: break relative_path = os.path.join('.', '') if entry.is_symlink(): pass elif entry.is_file(): self._update_summary_dict(summary_dict, relative_path, entry) elif entry.is_dir(): full_path = os.path.realpath( os.path.join(summary_base_dir, entry.name)) try: subdir_entries = os.scandir(full_path) except PermissionError: logger.warning( 'Path of %s under summary base directory is not accessible.', entry.name) else: for subdir_entry in subdir_entries: if len(summary_dict) == self.MAX_SUMMARY_DIR_COUNT: break subdir_relative_path = os.path.join('.', entry.name) if subdir_entry.is_symlink(): pass elif subdir_entry.is_file(): self._update_summary_dict(summary_dict, subdir_relative_path, subdir_entry) scan_count += 1 if not overall and scan_count >= self.MAX_SCAN_COUNT: break scan_count += 1 if not overall and scan_count >= self.MAX_SCAN_COUNT: logger.info( 'Stop further scanning due to overall is False and ' 'number of scanned files exceeds upper limit.') break directories = [{ 'relative_path': key, 'create_time': value['ctime'], 'update_time': value['mtime'], } for key, value in summary_dict.items()] # sort by update time in descending order and relative path in ascending order directories.sort(key=lambda x: (-int(x['update_time'].timestamp()), x[ 'relative_path'])) return directories