def make_directory(path): """Make directory.""" if path is None or not isinstance(path, str) or not path.strip(): log.error("Invalid input path: %r.", path) raise LineageParamTypeError("Invalid path type") # convert relative path to abs path path = os.path.realpath(path) log.debug("The abs path is %r", path) # check path exist and its write permissions] if os.path.exists(path): real_path = path else: # All exceptions need to be caught because create directory maybe have some limit(permissions) log.debug("The directory(%s) doesn't exist, will create it", path) try: os.makedirs(path, exist_ok=True) real_path = path except PermissionError as err: log.error("No write permission on the directory(%r), error = %r", path, err) raise LineageParamTypeError( "No write permission on the directory.") return real_path
def validate_filter_key(keys): """ Verify the keys of filtering is valid or not. Args: keys (list): The keys to get the relative lineage info. Raises: LineageParamTypeError: If keys is not list. LineageParamValueError: If the value of keys is invalid. """ filter_keys = [ 'metric', 'hyper_parameters', 'algorithm', 'train_dataset', 'model', 'valid_dataset', 'dataset_graph' ] if not isinstance(keys, list): log.error("Keys must be list.") raise LineageParamTypeError("Keys must be list.") for element in keys: if not isinstance(element, str): log.error("Element of keys must be str.") raise LineageParamTypeError("Element of keys must be str.") if not set(keys).issubset(filter_keys): err_msg = "Keys must be in {}.".format(filter_keys) log.error(err_msg) raise LineageParamValueError(err_msg)
def general_get_summary_lineage(data_manager=None, summary_dir=None, keys=None): """ Get summary lineage from data_manager or parsing from summaries. One of data_manager or summary_dir needs to be specified. Support getting super_lineage_obj from data_manager or parsing summaries by summary_dir. Args: data_manager (DataManager): Data manager defined as mindinsight.datavisual.data_transform.data_manager.DataManager summary_dir (str): The summary directory. It contains summary logs for one training. keys (list[str]): The filter keys of lineage information. The acceptable keys are `metric`, `user_defined`, `hyper_parameters`, `algorithm`, `train_dataset`, `model`, `valid_dataset` and `dataset_graph`. If it is `None`, all information will be returned. Default: None. Returns: dict, the lineage information for one training. Raises: LineageParamSummaryPathError: If summary path is invalid. LineageQuerySummaryDataError: If querying summary data fails. LineageFileNotFoundError: If the summary log file is not found. """ default_result = {} if data_manager is None and summary_dir is None: raise LineageParamTypeError( "One of data_manager or summary_dir needs to be specified.") if data_manager is not None and summary_dir is None: raise LineageParamTypeError( "If data_manager is specified, the summary_dir needs to be " "specified as relative path.") if keys is not None: validate_filter_key(keys) if data_manager is None: normalize_summary_dir(summary_dir) super_lineage_obj = LineageParser(summary_dir).super_lineage_obj else: validate_train_id(summary_dir) super_lineage_obj = LineageOrganizer( data_manager=data_manager).get_super_lineage_obj(summary_dir) if super_lineage_obj is None: return default_result try: result = Querier({ summary_dir: super_lineage_obj }).get_summary_lineage(summary_dir, keys) except (LineageQuerierParamException, LineageParamTypeError) as error: log.error(str(error)) log.exception(error) raise LineageQuerySummaryDataError("Get summary lineage failed.") return result[0]
def validate_condition(search_condition): """ Verify the param in search_condition is valid or not. Args: search_condition (dict): The search condition. Raises: LineageParamTypeError: If the type of the param in search_condition is invalid. LineageParamValueError: If the value of the param in search_condition is invalid. """ if not isinstance(search_condition, dict): log.error("Invalid search_condition type, it should be dict.") raise LineageParamTypeError("Invalid search_condition type, " "it should be dict.") if "limit" in search_condition: if isinstance(search_condition.get("limit"), bool) \ or not isinstance(search_condition.get("limit"), int): log.error("The limit must be int.") raise LineageParamTypeError("The limit must be int.") if "offset" in search_condition: if isinstance(search_condition.get("offset"), bool) \ or not isinstance(search_condition.get("offset"), int): log.error("The offset must be int.") raise LineageParamTypeError("The offset must be int.") if "sorted_name" in search_condition: sorted_name = search_condition.get("sorted_name") err_msg = "The sorted_name must be in {} or start with " \ "`metric/` or `user_defined/`.".format(list(FIELD_MAPPING.keys())) if not isinstance(sorted_name, str): log.error(err_msg) raise LineageParamValueError(err_msg) if not (sorted_name in FIELD_MAPPING or (sorted_name.startswith('metric/') and len(sorted_name) > len('metric/')) or (sorted_name.startswith('user_defined/') and len(sorted_name) > len('user_defined/')) or sorted_name in ['tag']): log.error(err_msg) raise LineageParamValueError(err_msg) sorted_type_param = ['ascending', 'descending', None] if "sorted_type" in search_condition: if "sorted_name" not in search_condition: log.error("The sorted_name have to exist when sorted_type exists.") raise LineageParamValueError( "The sorted_name have to exist when sorted_type exists.") if search_condition.get("sorted_type") not in sorted_type_param: err_msg = "The sorted_type must be ascending or descending." log.error(err_msg) raise LineageParamValueError(err_msg)
def _check_objs(self, super_lineage_objs): if super_lineage_objs is None: raise LineageQuerierParamException( 'querier_init_param', 'The querier init param is empty.') if not isinstance(super_lineage_objs, dict): raise LineageParamTypeError("Init param should be a dict.") return super_lineage_objs
def check_comparision(self, data, **kwargs): """Check comparision for all parameters in schema.""" for attr, condition in data.items(): if attr in ["limit", "offset", "sorted_name", "sorted_type", 'lineage_type']: continue if not isinstance(attr, str): raise LineageParamValueError('The search attribute not supported.') if attr not in FIELD_MAPPING and not attr.startswith(('metric/', 'user_defined/')): raise LineageParamValueError('The search attribute not supported.') if not isinstance(condition, dict): raise LineageParamTypeError("The search_condition element {} should be dict." .format(attr)) for key in condition.keys(): if key not in ["eq", "lt", "gt", "le", "ge", "in"]: raise LineageParamValueError("The compare condition should be in " "('eq', 'lt', 'gt', 'le', 'ge', 'in').") if attr.startswith('metric/'): if len(attr) == 7: raise LineageParamValueError( 'The search attribute not supported.' ) try: SearchModelConditionParameter.check_param_value_type(condition) except ValidationError: raise MindInsightException( error=LineageErrors.LINEAGE_PARAM_METRIC_ERROR, message=LineageErrorMsg.LINEAGE_METRIC_ERROR.value.format(attr) ) return data
def _package_parameter(key, value, message): """ Package parameters in operation. Args: key (str): Operation name. value (Union[str, bool, int, float, list, None]): Operation args. message (OperationParameter): Operation proto message. """ if isinstance(value, str): message.mapStr[key] = value elif isinstance(value, bool): message.mapBool[key] = value elif isinstance(value, int): message.mapInt[key] = value elif isinstance(value, float): message.mapDouble[key] = value elif isinstance(value, list) and key != "operations": if value: replace_value_list = list( map(lambda x: "" if x is None else x, value)) message.mapStrList[key].strValue.extend(replace_value_list) elif value is None: message.mapStr[key] = "None" else: error_msg = "Parameter {} is not supported " \ "in event package.".format(key) log.error(error_msg) raise LineageParamTypeError(error_msg)
def _parse_summary_logs(self, summary_path): """ Parse summary logs. Args: summary_path (Union[str, list[str]]): The single summary log path or a list of summary log path. """ if not summary_path: raise LineageQuerierParamException('summary_path', 'The summary path is empty.') if isinstance(summary_path, str): self._parse_summary_log(summary_path, 0) elif isinstance(summary_path, list): index = 0 for path in summary_path: parse_result = self._parse_summary_log(path, index) if parse_result: index += 1 else: raise LineageParamTypeError('Summary path is not str or list.') if self._parse_failed_paths: logger.info('Parse failed paths: %s', str(self._parse_failed_paths)) if not self._lineage_objects: raise LineageSummaryParseException()
def general_filter_summary_lineage(data_manager=None, summary_base_dir=None, search_condition=None, added=False): """ Filter summary lineage from data_manager or parsing from summaries. One of data_manager or summary_base_dir needs to be specified. Support getting super_lineage_obj from data_manager or parsing summaries by summary_base_dir. Args: data_manager (DataManager): Data manager defined as mindinsight.datavisual.data_transform.data_manager.DataManager summary_base_dir (str): The summary base directory. It contains summary directories generated by training. search_condition (dict): The search condition. """ if data_manager is None and summary_base_dir is None: raise LineageParamTypeError( "One of data_manager or summary_base_dir needs to be specified.") if data_manager is None: summary_base_dir = normalize_summary_dir(summary_base_dir) else: summary_base_dir = data_manager.summary_base_dir search_condition = {} if search_condition is None else search_condition try: validate_condition(search_condition) validate_search_model_condition(SearchModelConditionParameter, search_condition) except MindInsightException as error: log.error(str(error)) log.exception(error) raise LineageSearchConditionParamError(str(error.message)) try: search_condition = _convert_relative_path_to_abspath( summary_base_dir, search_condition) except (LineageParamValueError, LineageDirNotExistError) as error: log.error(str(error)) log.exception(error) raise LineageParamSummaryPathError(str(error.message)) try: lineage_objects = LineageOrganizer(data_manager, summary_base_dir).super_lineage_objs result = Querier(lineage_objects).filter_summary_lineage( condition=search_condition, added=added) except LineageSummaryParseException: result = {'object': [], 'count': 0} except (LineageQuerierParamException, LineageParamTypeError) as error: log.error(str(error)) log.exception(error) raise LineageQuerySummaryDataError("Filter summary lineage failed.") return result
def test_invalid_search_condition(self, mock_path, mock_valid): """Test filter_summary_lineage with invalid invalid param.""" mock_path.return_value = None mock_valid.side_effect = LineageParamTypeError( 'Invalid search_condition type.') self.assertRaisesRegex(LineageSearchConditionParamError, 'Invalid search_condition type.', filter_summary_lineage, '/path/to/summary/dir', 'invalid_condition')
def validate_user_defined_info(user_defined_info): """ Validate user defined info, delete the item if its key is in lineage. Args: user_defined_info (dict): The user defined info. Raises: LineageParamTypeError: If the type of parameters is invalid. LineageParamValueError: If user defined keys have been defined in lineage. """ if not isinstance(user_defined_info, dict): log.error("Invalid user defined info. It should be a dict.") raise LineageParamTypeError( "Invalid user defined info. It should be dict.") for key, value in user_defined_info.items(): if not isinstance(key, str): error_msg = "Dict key type {} is not supported in user defined info." \ "Only str is permitted now.".format(type(key)) log.error(error_msg) raise LineageParamTypeError(error_msg) if not isinstance(value, (int, str, float)): error_msg = "Dict value type {} is not supported in user defined info." \ "Only str, int and float are permitted now.".format(type(value)) log.error(error_msg) raise LineageParamTypeError(error_msg) field_map = set(FIELD_MAPPING.keys()) user_defined_keys = set(user_defined_info.keys()) insertion = list(field_map & user_defined_keys) if insertion: for key in insertion: user_defined_info.pop(key) raise LineageParamValueError( "There are some keys have defined in lineage. " "Duplicated key(s): %s. " % insertion)