Exemplo n.º 1
0
    def parse_shape(shape, limit=0):
        """
        Parse shape from str.

        Args:
            shape (str): Specify shape of tensor.
            limit (int): The max dimensions specified. Default value is 0 which means that there is no limitation.

        Returns:
            Union[None, tuple], a string like this: "[0, 0, 1:10, :]" will convert to this value:
                (0, 0, slice(1, 10, None), slice(None, None, None)].

        Raises:
            ParamValueError, If type of shape is not str or format is not correct or exceed specified dimensions.
        """
        if shape is None:
            return shape
        if not (isinstance(shape, str) and shape.strip().startswith('[')
                and shape.strip().endswith(']')):
            raise ParamValueError(
                "Invalid shape. The type of shape should be str and start with `[` and "
                "end with `]`. Received: {}.".format(shape))
        shape = shape.strip()[1:-1]
        dimension_size = sum(1 for dim in shape.split(',') if dim.count(':'))
        if limit and dimension_size > limit:
            raise ParamValueError(
                "Invalid shape. At most {} dimensions are specified. Received: {}"
                .format(limit, shape))
        parsed_shape = tuple(
            str_to_slice_or_int(dim.strip())
            for dim in shape.split(',')) if shape else tuple()
        return parsed_shape
Exemplo n.º 2
0
    def list_tensors(self, train_id, tag):
        """
        List tensors of the given train job and tag.

        If the tensor can not find by the given tag, will raise exception.

        Args:
            train_id (str): ID for train job.
            tag (str): The tag name.

        Returns:
            NamedTuple, the tuple format is `collections.namedtuple('_Tensor', ['wall_time', 'event_step', 'value'])`.
                the value will contain the given tag data.

        """
        self._check_status_valid()
        loader_pool = self._get_snapshot_loader_pool()
        if not self._is_loader_in_loader_pool(train_id, loader_pool):
            raise ParamValueError(
                "Can not find any data in loader pool about the train job.")

        data_loader = loader_pool[train_id].data_loader
        events_data = data_loader.get_events_data()

        try:
            tensors = events_data.tensors(tag)
        except KeyError:
            error_msg = "Can not find any data in this train job by given tag."
            raise ParamValueError(error_msg)

        return tensors
Exemplo n.º 3
0
def get_profile_device_list():
    """
    Get profile device list.

    Returns:
        list, the available device list.

    Raises:
        ParamValueError: If the search condition contains some errors.

    Examples:
        >>> POST http://xxxx/v1/mindinsight/profile/devices
    """
    profiler_dir = get_profiler_dir(request)
    train_id = get_train_id(request)
    if not profiler_dir or not train_id:
        raise ParamValueError("No profiler_dir or train_id.")

    profiler_dir_abs = os.path.join(settings.SUMMARY_BASE_DIR, train_id,
                                    profiler_dir)
    try:
        profiler_dir_abs = validate_and_normalize_path(profiler_dir_abs,
                                                       "profiler")
    except ValidationError:
        raise ParamValueError("Invalid profiler dir")

    check_train_job_and_profiler_dir(profiler_dir_abs)

    device_list, _ = analyse_device_list_from_profiler_dir(profiler_dir_abs)
    return jsonify(device_list)
Exemplo n.º 4
0
    def validate_dims_format(dims):
        """
        Validate correct of format of dimension parameter.

        Args:
            dims (str): Dims of tensor. Its format is something like this "[0, 0, :, :]".

        Raises:
            ParamValueError: If format of dims is not correct.
        """
        if dims is not None:
            if not isinstance(dims, str):
                raise ParamTypeError(dims, str)
            dims = dims.strip()
            if not (dims.startswith('[') and dims.endswith(']')):
                raise ParamValueError(
                    'The value: {} of dims must be '
                    'start with `[` and end with `]`.'.format(dims))
            for dim in dims[1:-1].split(','):
                dim = dim.strip()
                if dim == ":":
                    continue
                if dim.startswith('-'):
                    dim = dim[1:]
                if not dim.isdigit():
                    raise ParamValueError(
                        'The value: {} of dims in the square brackets '
                        'must be int or `:`.'.format(dims))
Exemplo n.º 5
0
    def set_visual_range(self, max_val: float, min_val: float,
                         bins: int) -> None:
        """
        Sets visual range for later re-sampling.

        It's caller's duty to ensure input is valid.

        Why we need visual range for histograms? Aligned buckets between steps can help users know about the trend of
        tensors. Miss aligned buckets between steps might miss-lead users about the trend of a tensor. Because for
        given tensor, if you have thinner buckets, count of every bucket will get lower, however, if you have
        thicker buckets, count of every bucket will get higher.  When they are displayed together, user might think
        the histogram with thicker buckets has more values. This is miss-leading. So we need to unify buckets across
        steps. Visual range for histogram is a technology for unifying buckets.

        Args:
            max_val (float): Max value for visual histogram.
            min_val (float): Min value for visual histogram.
            bins (int): Bins number for visual histogram.
        """
        if max_val < min_val:
            raise ParamValueError(
                "Invalid input. max_val({}) is less or equal than min_val({})."
                .format(max_val, min_val))

        if bins < 1:
            raise ParamValueError(
                "Invalid input bins({}). Must be greater than 0.".format(bins))

        self._visual_max = max_val
        self._visual_min = min_val
        self._visual_bins = bins

        # mark _re_sampled_buckets to empty
        self._re_sampled_buckets = ()
Exemplo n.º 6
0
    def _calc_intersection_len(self, max1, min1, max2, min2):
        """Calculates intersection length of [min1, max1] and [min2, max2]."""
        if max1 < min1:
            raise ParamValueError(
                "Invalid input. max1({}) is less than min1({}).".format(
                    max1, min1))

        if max2 < min2:
            raise ParamValueError(
                "Invalid input. max2({}) is less than min2({}).".format(
                    max2, min2))

        if min1 <= min2:
            if max1 <= min2:
                # return value must be calculated by max1.__sub__
                return max1 - max1
            if max1 <= max2:
                return max1 - min2
            # max1 > max2
            return max2 - min2

        # min1 > min2
        if max2 <= min1:
            return max2 - max2
        if max2 <= max1:
            return max2 - min1
        return max1 - min1
Exemplo n.º 7
0
def get_cluster_link_info():
    """
    Get cluster link info.

    Returns:
        Response, the cluster link info.

    Raises:
        ParamValueError: If the search condition contains some errors.

    Examples:
        >>>POST http://xxx/v1/mindinsight/profile/search-cluster-link
    """
    train_id = get_train_id(request)
    cluster_profiler_dir = os.path.join(settings.SUMMARY_BASE_DIR, train_id)
    try:
        cluster_profiler_dir = validate_and_normalize_path(
            cluster_profiler_dir, 'cluster_profiler')
    except ValidationError:
        raise ParamValueError('Invalid cluster_profiler dir')

    condition = request.stream.read()
    try:
        condition = json.loads(condition) if condition else {}
    except (json.JSONDecodeError, ValueError):
        raise ParamValueError("Json data parse failed.")

    device_id = condition.get("device_id", "0")
    to_int(device_id, 'device_id')

    analyser = AnalyserFactory.instance().get_analyser('cluster_hccl',
                                                       cluster_profiler_dir,
                                                       device_id)
    link_info = analyser.get_cluster_link_info(condition)
    return jsonify(link_info)
Exemplo n.º 8
0
def get_specific_dims_data(ndarray, dims, tensor_dims):
    """
    Get specific dims data.

    Args:
        ndarray (numpy.ndarray): An ndarray of numpy.
        dims (list): A list of specific dims.
        tensor_dims (list): A list of tensor dims.

    Returns:
        numpy.ndarray, an ndarray of specific dims tensor data.

    Raises:
        ParamValueError, If the length of param dims is not equal to the length of tensor dims or
                         the index of param dims out of range.
    """
    if len(dims) != len(tensor_dims):
        raise ParamValueError("The length of param dims: {}, is not equal to the "
                              "length of tensor dims: {}.".format(len(dims), len(tensor_dims)))
    indices = []
    for k, d in enumerate(dims):
        if d is not None:
            if d >= tensor_dims[k]:
                raise ParamValueError("The index: {} of param dims out of range: {}.".format(d, tensor_dims[k]))
            indices.append(d)
        else:
            indices.append(slice(0, tensor_dims[k]))
    return ndarray[tuple(indices)]
Exemplo n.º 9
0
    def get_memory_usage_breakdowns(self, device_type, graph_id, node_id):
        """
        Get memory usage breakdowns for each node.

        Args:
            device_type (str): Device type, e.g., GPU, Ascend.
            graph_id (int): Graph id.
            node_id (int): Node id.

        Returns:
            json, the content of memory usage breakdowns.
        """
        memory_details = self._get_file_content(device_type, FileType.DETAILS.value)
        if graph_id not in memory_details:
            logger.error('Invalid graph id: %s', graph_id)
            raise ParamValueError('Invalid graph id.')

        graph = memory_details[graph_id]
        if not ('breakdowns' in graph and node_id < len(graph['breakdowns'])):
            logger.error('Invalid node id: %s', node_id)
            raise ParamValueError('Invalid node id.')

        memory_breakdowns = graph.get('breakdowns')[node_id]

        return {'breakdowns': memory_breakdowns}
Exemplo n.º 10
0
    def get_specific_dims_data(ndarray, dims):
        """
        Get specific dims data.

        Args:
            ndarray (numpy.ndarray): An ndarray of numpy.
            dims (tuple): A tuple of specific dims.

        Returns:
            numpy.ndarray, an ndarray of specific dims tensor data.

        Raises:
            ParamValueError, If the length of param dims is not equal to the length of tensor dims.
            IndexError, If the param dims and tensor shape is unmatched.
        """
        if len(ndarray.shape) != len(dims):
            raise ParamValueError(
                "Invalid dims. The length of param dims and tensor shape should be the same."
            )
        try:
            result = ndarray[dims]
        except IndexError:
            raise ParamValueError(
                "Invalid shape. Shape unmatched. Received: {}, tensor shape: {}"
                .format(dims, ndarray.shape))
        # Make sure the return type is numpy.ndarray.
        if not isinstance(result, np.ndarray):
            result = np.array(result)
        return result
Exemplo n.º 11
0
    def check_reload_interval(reload_interval):
        """
        Check reload interval is valid.

        Args:
            reload_interval (int): Reload interval >= 0.
        """
        if not isinstance(reload_interval, int):
            raise ParamValueError("The value of reload interval should be integer.")

        if reload_interval < 0:
            raise ParamValueError("The value of reload interval should be >= 0.")
Exemplo n.º 12
0
    def calc_diff_between_two_tensor(first_tensor, second_tensor, tolerance):
        """
        Calculate the difference between the first tensor and the second tensor.

        Args:
            first_tensor (numpy.ndarray): Specify the first tensor.
            second_tensor (numpy.ndarray): Specify the second tensor.
            tolerance (float): The tolerance of difference between the first tensor and the second tensor.
                Its is a percentage. The boundary value is equal to max(abs(min),abs(max)) * tolerance.
                The function of min and max is being used to calculate the min value and max value of
                the result of the first tensor subtract the second tensor. If the absolute value of
                result is less than or equal to boundary value, the result will set to be zero.

        Returns:
            tuple[numpy.ndarray, OverallDiffMetric], numpy.ndarray indicates the value of the first tensor
                subtract the second tensor and set the value to be zero when its less than or equal to tolerance.

        Raises:
            ParamTypeError: If the type of these two tensors is not the numpy.ndarray.
            ParamValueError: If the shape or dtype is not the same of these two tensors or
                the tolerance should be between 0 and 1.
        """
        if not isinstance(first_tensor, np.ndarray):
            raise ParamTypeError('first_tensor', np.ndarray)

        if not isinstance(second_tensor, np.ndarray):
            raise ParamTypeError('second_tensor', np.ndarray)

        if first_tensor.shape != second_tensor.shape:
            raise ParamValueError(
                "the shape: {} of first tensor is not equal to shape: {} of second tensor."
                .format(first_tensor.shape, second_tensor.shape))

        if first_tensor.dtype != second_tensor.dtype:
            raise ParamValueError(
                "the dtype: {} of first tensor is not equal to dtype: {} of second tensor."
                .format(first_tensor.dtype, second_tensor.dtype))
        # Make sure tolerance is between 0 and 1.
        if tolerance < 0 or tolerance > 1:
            raise ParamValueError(
                "the tolerance should be between 0 and 1, but got {}".format(
                    tolerance))

        diff_tensor = np.subtract(first_tensor, second_tensor)
        stats = TensorUtils.get_statistics_from_tensor(diff_tensor)
        boundary_value = max(abs(stats.max), abs(stats.min)) * tolerance
        is_close = np.isclose(first_tensor,
                              second_tensor,
                              atol=boundary_value,
                              rtol=0)
        result = np.multiply(diff_tensor, ~is_close)
        return result
Exemplo n.º 13
0
    def check_max_threads_count(max_threads_count):
        """
        Threads count should be a integer, and should > 0.

        Args:
            max_threads_count (int), should > 0.
        """
        if not isinstance(max_threads_count, int):
            raise ParamValueError(
                "The value of max threads count should be integer.")
        if max_threads_count <= 0:
            raise ParamValueError(
                "The value of max threads count should be > 0.")
Exemplo n.º 14
0
def get_profile_summary_proposal():
    """
    Get summary profiling proposal.

    Returns:
        str, the summary profiling proposal.

    Raises:
        ParamValueError: If the parameters contain some errors.

    Examples:
        >>> GET http://xxxx/v1/mindinsight/profile/summary/propose
    """
    profiler_dir = get_profiler_dir(request)
    train_id = get_train_id(request)
    device_id = get_device_id(request)
    if not profiler_dir or not train_id:
        raise ParamValueError("No profiler_dir or train_id.")
    to_int(device_id, 'device_id')

    profiler_dir_abs = os.path.join(settings.SUMMARY_BASE_DIR, train_id,
                                    profiler_dir)
    try:
        profiler_dir_abs = validate_and_normalize_path(profiler_dir_abs,
                                                       "profiler")
    except ValidationError:
        raise ParamValueError("Invalid profiler dir")

    check_train_job_and_profiler_dir(profiler_dir_abs)

    step_trace_condition = {
        "filter_condition": {
            "mode": "proc",
            "proc_name": "iteration_interval",
            "step_id": 0
        }
    }
    options = {'step_trace': {"iter_interval": step_trace_condition}}

    proposal_type_list = [
        'step_trace', 'minddata', 'minddata_pipeline', 'common'
    ]
    proposal_obj = ComposeProposal(profiler_dir_abs, device_id,
                                   proposal_type_list)
    proposal_info = proposal_obj.get_proposal(options)
    # Use json.dumps for orderly return
    return CustomResponse(json.dumps(proposal_info),
                          mimetype='application/json')
Exemplo n.º 15
0
    def get_tensors(self, train_ids, tags, step, dims, detail):
        """
        Get tensor data for given train_ids, tags, step, dims and detail.

        Args:
            train_ids (list): Specify list of train job ID.
            tags (list): Specify list of tag.
            step (int): Specify step of tag, it's necessary when detail is equal to 'data'.
            dims (str): Specify dims of step, it's necessary when detail is equal to 'data'.
            detail (str): Specify which data to query, available values: 'stats', 'histogram' and 'data'.

        Returns:
            dict, a dict including the `tensors`.

        Raises:
            UrlDecodeError, If unquote train id error with strict mode.
        """
        Validation.check_param_empty(train_id=train_ids, tag=tags)
        if dims is not None:
            if not isinstance(dims, str):
                raise ParamValueError('The type of dims must be str, but got {}.'.format(type(dims)))
            dims = dims.strip()
            if not (dims.startswith('[') and dims.endswith(']')):
                raise ParamValueError('The value: {} of dims must be '
                                      'start with `[` and end with `]`.'.format(dims))
            for dim in dims[1:-1].split(','):
                dim = dim.strip()
                if dim == ":":
                    continue
                if dim.startswith('-'):
                    dim = dim[1:]
                if not dim.isdigit():
                    raise ParamValueError('The value: {} of dims in the square brackets '
                                          'must be int or `:`.'.format(dims))

        for index, train_id in enumerate(train_ids):
            try:
                train_id = unquote(train_id, errors='strict')
            except UnicodeDecodeError:
                raise UrlDecodeError('Unquote train id error with strict mode')
            else:
                train_ids[index] = train_id

        tensors = []
        for train_id in train_ids:
            tensors += self._get_train_tensors(train_id, tags, step, dims, detail)

        return {"tensors": tensors}
Exemplo n.º 16
0
def search_model():
    """
    Get model lineage info.

    Get model info by summary base dir return a model lineage information list of dict
    contains model's all kinds of param and count of summary log.

    Returns:
        str, the model lineage information.

    Raises:
        MindInsightException: If method fails to be called.
        ParamValueError: If parsing json data search_condition fails.

    Examples:
        >>> POST http://xxxx/v1/mindinsight/models/model_lineage
    """
    search_condition = request.stream.read()
    try:
        search_condition = json.loads(
            search_condition if search_condition else "{}")
    except Exception:
        raise ParamValueError("Json data parse failed.")

    model_lineage_info = _get_lineage_info(lineage_type="model",
                                           search_condition=search_condition)

    return jsonify(model_lineage_info)
Exemplo n.º 17
0
def convert_array_from_str(dims, limit=0):
    """
    Convert string of dims data to array.

    Args:
        dims (str): Specify dims of tensor.
        limit (int): The max flexible dimension count, default value is 0 which means that there is no limitation.

    Returns:
        list, a string like this: "[0, 0, :, :]" will convert to this value: [0, 0, None, None].

    Raises:
        ParamValueError, If flexible dimensions exceed limit value.
    """
    dims = dims.strip().lstrip('[').rstrip(']')
    dims_list = []
    count = 0
    for dim in dims.split(','):
        dim = dim.strip()
        if dim == ':':
            dims_list.append(None)
            count += 1
        else:
            dims_list.append(to_int(dim, "dim"))
    if limit and count > limit:
        raise ParamValueError("Flexible dimensions cannot exceed limit value: {}, size: {}"
                              .format(limit, count))
    return dims_list
Exemplo n.º 18
0
def get_datasets_lineage():
    """
    Get dataset lineage.

    Returns:
        str, the dataset lineage information.

    Raises:
        MindInsightException: If method fails to be called.
        ParamValueError: If parsing json data search_condition fails.

    Examples:
        >>> POST http://xxxx/v1/minddata/datasets/dataset_lineage
    """
    search_condition = request.stream.read()
    try:
        search_condition = json.loads(
            search_condition if search_condition else "{}")
    except Exception:
        raise ParamValueError("Json data parse failed.")

    dataset_lineage_info = _get_lineage_info(lineage_type="dataset",
                                             search_condition=search_condition)

    return jsonify(dataset_lineage_info)
Exemplo n.º 19
0
def get_cluster_flops():
    """
    Get cluster FLOPs.

    Returns:
        str, the cluster FLOPs.

    Raises:
        ParamValueError: If the cluster profiler dir is invalid.

    Examples:
        >>>GET http://xxx/v1/mindinsight/profile/cluster-flops
    """
    train_id = get_train_id(request)
    if not train_id:
        raise ParamValueError('No train id.')
    cluster_profiler_dir = os.path.join(settings.SUMMARY_BASE_DIR, train_id)
    cluster_profiler_dir = validate_and_normalize_path(cluster_profiler_dir,
                                                       'cluster_profiler')
    check_train_job_and_profiler_dir(cluster_profiler_dir)

    analyser = AnalyserFactory.instance().get_analyser('cluster_flops',
                                                       cluster_profiler_dir)
    flops = analyser.get_flops()
    return jsonify(flops)
Exemplo n.º 20
0
def get_memory_usage_breakdowns():
    """
    Get memory breakdowns of each node.

    Returns:
        Response, the memory breakdowns for each node.

    Examples:
        >>> GET http://xxxx/v1/mindinsight/profile/memory-breakdowns
    """
    summary_dir = request.args.get("dir")
    profiler_dir_abs = validate_and_normalize_profiler_path(
        summary_dir, settings.SUMMARY_BASE_DIR)
    check_train_job_and_profiler_dir(profiler_dir_abs)

    device_id = request.args.get("device_id", default='0')
    to_int(device_id, 'device_id')
    device_type = request.args.get("device_type", default='ascend')
    graph_id = request.args.get("graph_id", default='0')
    node_id = request.args.get("node_id", default='0')
    node_id = to_int(node_id, 'node_id')
    if device_type not in ['ascend']:
        logger.error(
            "Invalid device_type, Memory Usage only supports Ascend for now.")
        raise ParamValueError("Invalid device_type.")

    analyser = AnalyserFactory.instance().get_analyser('memory_usage',
                                                       profiler_dir_abs,
                                                       device_id)
    breakdowns = analyser.get_memory_usage_breakdowns(device_type, graph_id,
                                                      node_id)

    return breakdowns
Exemplo n.º 21
0
def query_image():
    """
    Query image.

    Returns:
        bytes, image binary content for UI to demonstrate.
    """
    train_id = get_train_id(request)
    if train_id is None:
        raise ParamMissError("train_id")
    image_path = request.args.get("path")
    if image_path is None:
        raise ParamMissError("path")
    image_type = request.args.get("type")
    if image_type is None:
        raise ParamMissError("type")
    if image_type not in ("original", "overlay", "outcome"):
        raise ParamValueError(
            f"type:{image_type}, valid options: 'original' 'overlay' 'outcome'"
        )

    encapsulator = DatafileEncap(EXPLAIN_MANAGER)
    image = encapsulator.query_image_binary(train_id, image_path, image_type)

    return image
Exemplo n.º 22
0
def get_timeline_detail():
    """
    Get timeline detail.

    Returns:
        Response, the detail information of timeline.

    Examples:
        >>> GET http://xxxx/v1/mindinsight/profile/timeline
    """
    summary_dir = request.args.get("dir")
    profiler_dir_abs = validate_and_normalize_profiler_path(
        summary_dir, settings.SUMMARY_BASE_DIR)
    check_train_job_and_profiler_dir(profiler_dir_abs)

    device_id = request.args.get("device_id", default='0')
    to_int(device_id, 'device_id')
    device_type = request.args.get("device_type", default='ascend')
    scope_name_num = request.args.get("scope_name_num", default='0')
    if device_type not in ['gpu', 'ascend']:
        logger.info(
            "Invalid device_type, device_type should be gpu or ascend.")
        raise ParamValueError("Invalid device_type.")

    analyser = AnalyserFactory.instance().get_analyser('timeline',
                                                       profiler_dir_abs,
                                                       device_id)
    timeline = analyser.get_display_timeline(device_type, scope_name_num)

    return jsonify(timeline)
Exemplo n.º 23
0
def get_timeline_summary():
    """
    Get timeline summary info.

    Returns:
        Response, the timeline summary info.

    Examples:
        >>> GET http://xxxx/v1/mindinsight/profile/timeline-summary
    """
    summary_dir = request.args.get("dir")
    profiler_dir_abs = validate_and_normalize_profiler_path(
        summary_dir, settings.SUMMARY_BASE_DIR)
    check_train_job_and_profiler_dir(profiler_dir_abs)

    device_id = request.args.get("device_id", default='0')
    to_int(device_id, 'device_id')
    device_type = request.args.get("device_type", default='ascend')
    if device_type not in ['gpu', 'ascend']:
        logger.info(
            "Invalid device_type, device_type should be gpu or ascend.")
        raise ParamValueError("Invalid device_type.")

    analyser = AnalyserFactory.instance().get_analyser('timeline',
                                                       profiler_dir_abs,
                                                       device_id)
    summary = analyser.get_timeline_summary(device_type)

    return summary
Exemplo n.º 24
0
def get_timeline_detail():
    """
    Get timeline detail.

    Returns:
        Response, the detail information of timeline.

    Examples:
        >>> GET http://xxxx/v1/mindinsight/profile/timeline
    """
    summary_dir = request.args.get("dir")
    profiler_dir = validate_and_normalize_profiler_path(
        summary_dir, settings.SUMMARY_BASE_DIR)
    if not os.path.exists(profiler_dir):
        raise ProfilerDirNotFoundException(msg=summary_dir)
    device_id = request.args.get("device_id", default='0')
    _ = to_int(device_id, 'device_id')
    device_type = request.args.get("device_type", default='ascend')
    if device_type not in ['gpu', 'ascend']:
        logger.info(
            "Invalid device_type, device_type should be gpu or ascend.")
        raise ParamValueError("Invalid device_type.")

    analyser = AnalyserFactory.instance().get_analyser('timeline',
                                                       profiler_dir, device_id)
    timeline = analyser.get_display_timeline(device_type)

    return jsonify(timeline)
Exemplo n.º 25
0
    def check_limit(cls,
                    limit,
                    min_value=1,
                    max_value=1000,
                    default_value=100):
        """
        Check limit parameter, it should between min_value and max_value.

        Args:
            limit (Union[str, int]): Value can be string number or int.
            min_value (int): Limit should greater or equal this value. Default: 1.
            max_value (int): Limit should less or equal this value. Default: 1000.
            default_value (int): Default value for limit. Default: 100.

        Returns:
            int, limit.
        """

        if limit is None:
            return default_value

        limit = to_int(limit, 'limit')
        if limit < min_value or limit > max_value:
            raise ParamValueError("'limit' should in [{}, {}].".format(
                min_value, max_value))
        return limit
Exemplo n.º 26
0
    def _get_file_path(self, device_type, file_type):
        """
        Get memory usage summary file.

        Args:
            device_type (str): Device type, e.g., GPU, Ascend.
            file_type (str): memory usage file type, e.g., summary, details.

        Returns:
            str, file path of memory usage file corresponding to its file_type.
        """
        filename = ""
        if device_type == "ascend":
            if file_type is FileType.SUMMARY.value:
                filename = self._summary_filename.format(self._device_id)
            elif file_type is FileType.DETAILS.value:
                filename = self._details_filename.format(self._device_id)
        else:
            logger.error('Memory Usage only supports Ascend for now. Please check the device type.')
            raise ParamValueError("Invalid device type.")

        file_path = os.path.join(self._profiling_dir, filename)
        file_path = validate_and_normalize_path(
            file_path, raise_key='Invalid memory usage file path.'
        )

        return file_path
Exemplo n.º 27
0
    def list_tensors(self, train_id, tag):
        """
        List tensors of the given train job and tag.

        If the tensor can not find by the given tag, will raise exception.

        Args:
            train_id (str): ID for train job.
            tag (str): The tag name.

        Returns:
            list, the NameTuple format is `collections.namedtuple('_Tensor', ['wall_time', 'event_step', 'value'])`.
                the value will contain the given tag data.

        """
        loader_pool = self._get_snapshot_loader_pool()
        if not self._is_loader_in_loader_pool(train_id, loader_pool):
            raise TrainJobNotExistError(
                "Can not find the given train job in cache.")

        data_loader = loader_pool[train_id].data_loader

        tensors = []
        try:
            events_data = data_loader.get_events_data()
            tensors = events_data.tensors(tag)
        except KeyError:
            error_msg = "Can not find any data in this train job by given tag."
            raise ParamValueError(error_msg)
        except AttributeError:
            logger.debug(
                "Train job %r has been deleted or it has not loaded data, "
                "and set tags to empty list.", train_id)

        return tensors
Exemplo n.º 28
0
def get_memory_usage_graphics():
    """
    Get graphic representation of memory usage.

    Returns:
        Response, the graphic representation of memory usage.

    Examples:
        >>> GET http://xxxx/v1/mindinsight/profile/memory-graphics
    """
    summary_dir = request.args.get("dir")
    profiler_dir_abs = validate_and_normalize_profiler_path(
        summary_dir, settings.SUMMARY_BASE_DIR)
    check_train_job_and_profiler_dir(profiler_dir_abs)

    device_id = request.args.get("device_id", default='0')
    to_int(device_id, 'device_id')
    device_type = request.args.get("device_type", default='ascend')
    if device_type not in ['ascend']:
        logger.info(
            "Invalid device_type, Memory Usage only supports Ascend for now.")
        raise ParamValueError("Invalid device_type.")

    analyser = AnalyserFactory.instance().get_analyser('memory_usage',
                                                       profiler_dir_abs,
                                                       device_id)
    graphics = analyser.get_memory_usage_graphics(device_type)

    return graphics
Exemplo n.º 29
0
    def get_display_timeline(self, device_type):
        """
        Get timeline data for UI display.

        Returns:
            json, the content of timeline data.
        """
        if device_type == "ascend":
            display_filename = self._ascend_display_filename.format(
                self._device_id)
        elif device_type == "gpu":
            display_filename = self._gpu_display_filename.format(
                self._device_id)
        else:
            logger.info(
                'device type should be ascend or gpu. Please check the device type.'
            )
            raise ParamValueError("Invalid device_type.")
        file_path = os.path.join(self._profiling_dir, display_filename)
        file_path = validate_and_normalize_path(
            file_path, raise_key='Invalid timeline json path.')

        timeline = []
        if os.path.exists(file_path):
            try:
                with open(file_path, 'r') as f_obj:
                    timeline = json.load(f_obj)
            except (IOError, OSError, json.JSONDecodeError) as err:
                logger.error(
                    'Error occurred when read timeline display file: %s', err)
                raise ProfilerIOException
        else:
            logger.info('No timeline file. Please check the output path.')

        return timeline
Exemplo n.º 30
0
    def get_single_image(self, train_id, tag, step):
        """
        Returns the actual image bytes for a given image.

        Args:
            train_id (str): The ID of the events data the image belongs to.
            tag (str): The name of the tag the images belongs to.
            step (int): The step of the image in the current reservoir.

        Returns:
            bytes, a byte string of the raw image bytes.

        """
        Validation.check_param_empty(train_id=train_id, tag=tag, step=step)
        step = to_int(step, "step")

        tensors = self._data_manager.list_tensors(train_id, tag)

        image = None
        for tensor in tensors:
            if tensor.step == step:
                # Default value for bytes field is empty byte string normally,
                # see also "Optional Fields And Default Values" in protobuf
                # documentation.
                image = tensor.value.encoded_image
                break

        if image is None:
            raise ParamValueError(
                "Can not find the step with given train job id and tag.")

        return image