def general_get_summary_lineage(data_manager=None, summary_dir=None, keys=None): """ Get summary lineage from data_manager or parsing from summaries. One of data_manager or summary_dir needs to be specified. Support getting super_lineage_obj from data_manager or parsing summaries by summary_dir. Args: data_manager (DataManager): Data manager defined as mindinsight.datavisual.data_transform.data_manager.DataManager summary_dir (str): The summary directory. It contains summary logs for one training. keys (list[str]): The filter keys of lineage information. The acceptable keys are `metric`, `user_defined`, `hyper_parameters`, `algorithm`, `train_dataset`, `model`, `valid_dataset` and `dataset_graph`. If it is `None`, all information will be returned. Default: None. Returns: dict, the lineage information for one training. Raises: LineageParamSummaryPathError: If summary path is invalid. LineageQuerySummaryDataError: If querying summary data fails. LineageFileNotFoundError: If the summary log file is not found. """ default_result = {} if data_manager is None and summary_dir is None: raise LineageParamTypeError( "One of data_manager or summary_dir needs to be specified.") if data_manager is not None and summary_dir is None: raise LineageParamTypeError( "If data_manager is specified, the summary_dir needs to be " "specified as relative path.") if keys is not None: validate_filter_key(keys) if data_manager is None: normalize_summary_dir(summary_dir) super_lineage_obj = LineageParser(summary_dir).super_lineage_obj else: validate_train_id(summary_dir) super_lineage_obj = LineageOrganizer( data_manager=data_manager).get_super_lineage_obj(summary_dir) if super_lineage_obj is None: return default_result try: result = Querier({ summary_dir: super_lineage_obj }).get_summary_lineage(summary_dir, keys) except (LineageQuerierParamException, LineageParamTypeError) as error: log.error(str(error)) log.exception(error) raise LineageQuerySummaryDataError("Get summary lineage failed.") return result[0]
def update_lineage_object(data_manager, train_id, added_info: dict): """Update lineage objects about tag and remark.""" validate_train_id(train_id) validate_added_info(added_info) cache_item = data_manager.get_brief_train_job(train_id) lineage_item = cache_item.get(key=LINEAGE, raise_exception=False) if lineage_item is None: logger.warning("Cannot update the lineage for tran job %s, because it does not exist.", train_id) raise ParamValueError("Cannot update the lineage for tran job %s, because it does not exist." % train_id) cached_added_info = lineage_item.super_lineage_obj.added_info new_added_info = dict(cached_added_info) for key, value in added_info.items(): new_added_info.update({key: value}) with cache_item.lock_key(LINEAGE): cache_item.get(key=LINEAGE).super_lineage_obj.added_info = new_added_info
def get_dataset_graph(): """ Get dataset graph. Returns: str, the dataset graph information. Raises: MindInsightException: If method fails to be called. ParamValueError: If summary_dir is invalid. Examples: >>> GET http://xxxx/v1/mindinsight/datasets/dataset_graph?train_id=xxx """ train_id = get_train_id(request) validate_train_id(train_id) search_condition = { 'summary_dir': { 'in': [train_id] } } result = {} try: objects = filter_summary_lineage(data_manager=DATA_MANAGER, search_condition=search_condition).get('object') except MindInsightException as exception: raise MindInsightException(exception.error, exception.message, http_code=400) if objects: lineage_obj = objects[0] dataset_graph = lineage_obj.get('dataset_graph') if dataset_graph: result.update({'dataset_graph': dataset_graph}) result.update({'summary_dir': lineage_obj.get('summary_dir')}) return jsonify(result)