def _get_by_offset(self, group_condition):
        """Return the list of watchpoint hits on the offset page."""
        limit = group_condition.get('limit')
        offset = group_condition.get('offset')
        if not isinstance(limit, int) or not isinstance(offset, int):
            log.error("Param limit or offset is not a integer")
            raise DebuggerParamValueError(
                "Param limit or offset is not a integer")
        watch_point_hits = []

        total = len(self._ordered_hits)

        if limit * offset >= total and offset != 0:
            log.error("Param offset out of bounds")
            raise DebuggerParamValueError("Param offset out of bounds")

        if total == 0:
            return {}

        for watchpoint_hits in self._ordered_hits[(limit *
                                                   offset):(limit *
                                                            (offset + 1))]:
            self._get_tensors(watchpoint_hits, watch_point_hits)

        return {
            'watch_point_hits': watch_point_hits,
            'offset': offset,
            'total': total
        }
Exemplo n.º 2
0
    def get_tensor_value_by_shape(self, shape=None):
        """
        Get tensor value by shape.

        Args:
            shape (tuple): The specified shape.

        Returns:
            Union[None, str, numpy.ndarray], the sub-tensor.
        """
        if self._value is None:
            log.warning("%s has no value yet.", self.name)
            return None
        if shape is None or not isinstance(shape, tuple):
            log.info("Get the whole tensor value with shape is %s", shape)
            return self._value
        if len(shape) != len(self.shape):
            log.error("Invalid shape. Received: %s, tensor shape: %s", shape,
                      self.shape)
            raise DebuggerParamValueError("Invalid shape. Shape unmatched.")
        try:
            value = self._value[shape]
        except IndexError as err:
            log.error("Invalid shape. Received: %s, tensor shape: %s", shape,
                      self.shape)
            log.exception(err)
            raise DebuggerParamValueError("Invalid shape. Shape unmatched.")
        if isinstance(value, np.ndarray):
            if value.size > self.max_number_data_show_on_ui:
                value = "Too large to show."
                log.info(
                    "The tensor size is %s, which is too large to show on UI.")
        else:
            value = np.asarray(value)
        return value
Exemplo n.º 3
0
def validate_watch_condition_params(watch_condition):
    """
    Validate watch condition parameters.

    Args:
        watch_condition (dict): Watch condition.

            - condition (str): Condition type. Should be in WATCHPOINT_CONDITION_MAPPING.

            - param (list): Condition value. Should be given for comparison condition. The value will
                be translated to np.float32.
    """
    condition = watch_condition.get('condition')
    param = watch_condition.get('param')
    if condition in ['NAN', 'INF', 'OVERFLOW']:
        if param:
            log.error("No param is expected for %s condition.", condition)
            raise DebuggerParamValueError("No param is expected.")
    else:
        if not isinstance(param, (float, int)):
            log.error("Number param should be given for condition <%s>.",
                      condition)
            raise DebuggerParamValueError("Number param should be given.")
        if np.isinf(np.float32(param)):
            log.error("Condition param should be float32.")
            raise DebuggerParamValueError(
                "The value of condition param should be within float32.")
Exemplo n.º 4
0
    def get_tensors_diff(self, tensor_name, shape, tolerance=0):
        """
            Get tensor comparisons data for given name, detail, shape and tolerance.

        Args:
            tensor_name (str): The name of tensor for cache.
            shape (tuple): Specify concrete dimensions of shape.
            tolerance (str): Specify tolerance of difference between current step tensor and previous
                step tensor. Default value is 0. Its is a percentage. The boundary value is equal to
                max(abs(min),abs(max)) * tolerance. The function of min and max is being used to
                calculate the min value and max value of the result of the current step tensor subtract
                the previous step tensor. If the absolute value of result is less than or equal to
                boundary value, the result will set to be zero.

        Raises:
            DebuggerParamValueError, If get current step node and previous step node failed or
                the type of tensor value is not numpy.ndarray."

        Returns:
            dict, the retrieved data.
        """
        curr_tensor = self.get_valid_tensor_by_name(tensor_name)
        prev_tensor = self.get_valid_tensor_by_name(tensor_name, prev=True)
        if not (curr_tensor and prev_tensor):
            log.error("Get current step and previous step for this tensor name %s failed.", tensor_name)
            raise DebuggerParamValueError(f"Get current step and previous step for this tensor name "
                                          f"{tensor_name} failed.")
        curr_tensor_slice = curr_tensor.get_tensor_value_by_shape(shape)
        prev_tensor_slice = prev_tensor.get_tensor_value_by_shape(shape)
        # get tensor comparison basic info
        tensor_info = curr_tensor.get_basic_info()
        tensor_info.pop('has_prev_step')
        tensor_info.pop('value')
        # calculate tensor comparision object
        tensor_comparison = curr_tensor.tensor_comparison
        if not tensor_comparison or tensor_comparison.tolerance != tolerance:
            if curr_tensor.value.shape != prev_tensor.value.shape:
                raise DebuggerParamValueError("The shape of these two step tensors is not the same.")
            tensor_diff = TensorUtils.calc_diff_between_two_tensor(curr_tensor.value, prev_tensor.value, tolerance)
            stats = TensorUtils.get_statistics_from_tensor(tensor_diff)
            tensor_comparison = TensorComparison(tolerance, stats, tensor_diff)
            curr_tensor.update_tensor_comparisons(tensor_comparison)
        # calculate diff value
        # the type of curr_tensor_slice is one of np.ndarray or str
        if isinstance(curr_tensor_slice, np.ndarray) and isinstance(prev_tensor_slice, np.ndarray):
            if not shape:
                tensor_diff_slice = tensor_comparison.value
            else:
                tensor_diff_slice = tensor_comparison.value[shape]
            result = np.stack([prev_tensor_slice, curr_tensor_slice, tensor_diff_slice], axis=-1)
            tensor_info['diff'] = result.tolist()
        elif isinstance(curr_tensor_slice, str):
            tensor_info['diff'] = curr_tensor_slice
        # add comparision statistics
        tensor_info.update(self._get_comparison_statistics(curr_tensor, prev_tensor))
        reply = {'tensor_value': tensor_info}
        return reply
Exemplo n.º 5
0
 def validate_tensor_param(name, detail):
     """Validate params for retrieve tensor request."""
     # validate name
     if not isinstance(name, str) or ':' not in name:
         log.error("Invalid tensor name. Received: %s", name)
         raise DebuggerParamValueError("Invalid tensor name.")
     # validate data
     if detail != 'data':
         log.error("Invalid detail value. Received: %s", detail)
         raise DebuggerParamValueError("Invalid detail value.")
Exemplo n.º 6
0
    def _parse_pos(self, pos):
        """Get next pos according to input position."""
        elements = pos.split(':')
        try:
            idx = int(elements[-1])
        except ValueError:
            log.error("Invalid index. The index in pos should be digit but get pos:%s", pos)
            raise DebuggerParamValueError("Invalid pos.")

        if idx < 0 or idx >= self.max_limit:
            log.error("Invalid index. The index in pos should between [0, %d)", self.max_limit)
            raise DebuggerParamValueError(f"Invalid pos. {idx}")
        flag = elements[0] if len(elements) == 2 else ''

        return flag, idx
Exemplo n.º 7
0
    def retrieve(self, mode, filter_condition=None):
        """
        Retrieve data according to mode and params.

        Args:
            mode (str): The type of info message.
            filter_condition (dict): The filter condition.

        Returns:
            dict, the retrieved data.
        """
        log.info(
            "receive retrieve request for mode:%s\n, filter_condition: %s",
            mode, filter_condition)
        mode_mapping = {
            'all': self._retrieve_all,
            'node': self._retrieve_node,
            'watchpoint': self._retrieve_watchpoint,
        }
        # validate param <mode>
        if mode not in mode_mapping.keys():
            log.error(
                "Invalid param <mode>. <mode> should be in ['all', 'node', 'watchpoint', "
                "'watchpoint_hit'], but got %s.", mode_mapping)
            raise DebuggerParamValueError("Invalid mode.")
        # validate backend status
        metadata_stream = self.cache_store.get_stream_handler(Streams.METADATA)
        if metadata_stream.state == ServerStatus.PENDING.value:
            log.info("The backend is in pending status.")
            return metadata_stream.get()

        filter_condition = {} if filter_condition is None else filter_condition
        reply = mode_mapping[mode](filter_condition)

        return reply
Exemplo n.º 8
0
    def control(self, params=None):
        """
        Control the training process.

        Args:
            params (dict): The control params.

                - mode (str): Acceptable control command, including `continue`,
                    `pause` and `terminate`.

                - level (str): The control granularity, `node` level or `step` level.
                    Default: `step`.

                - steps (int): Specify the steps that training should run.
                    Used when `level` is `step`.

                - name (str): Specify the name of the node. Used when `level` is `node`.

        Returns:
            dict, the response.
        """
        log.info("Receive control request: %s.", params)
        mode = params.get('mode')
        metadata_stream = self.cache_store.get_stream_handler(Streams.METADATA)
        if mode == 'continue':
            reply = self._continue(metadata_stream, params)
        elif mode in ['pause', 'terminate']:
            mode_mapping = {'pause': self._pause, 'terminate': self._terminate}
            reply = mode_mapping.get(mode)(metadata_stream)
        else:
            log.error("Invalid control mode %s", mode)
            raise DebuggerParamValueError("Invalid control mode.")

        return reply
Exemplo n.º 9
0
    def download(self, name, prev, graph_name=None, rank_id=0):
        """
        Download the tensor value.

        Args:
            name (str): Node name shown in UI.
            prev (bool): The previous step or current step.
            graph_name (Union[str, None]): The graph name, default is: None.
            rank_id (int): The id of rank. Default: 0.

        Returns:
            str, the file path.
            str, the file name.
        """
        if not isinstance(name, str) or ':' not in name:
            log.error("Invalid tensor name. Received: %s", name)
            raise DebuggerParamValueError("Invalid tensor name.")
        _, tensor_name, graph_name = self._get_tensor_name_and_type_by_ui_name(
            name, graph_name, rank_id)
        log.info("Download the tensor value: name: %s", tensor_name)
        tensor_stream = self.cache_store.get_stream_handler(
            Streams.TENSOR).get_tensor_handler_by_rank_id(rank_id)
        step = tensor_stream.cur_step
        if prev:
            step -= 1
        tensor_info = {
            "tensor_name": tensor_name,
            "graph_name": graph_name,
            "step": step,
            "rank_id": rank_id
        }
        return tensor_stream.download_mgr.get(**tensor_info)
Exemplo n.º 10
0
    def get(self, filter_condition=None):
        """
        Get full tensor value.

        Args:
            filter_condition (dict): Filter condition.

                - name (str): The full name of tensor.
                - node_type (str): The type of the node.
                - prev (bool): Whether to get previous tensor.

        Returns:
            dict, the tensor_value.
        """
        name = filter_condition.get('name')
        node_type = filter_condition.get('node_type')
        shape = filter_condition.get('shape')
        if filter_condition.get('prev'):
            step = self.prev_step
        else:
            step = self.cur_step
        tensor = self._get_tensor(name, node_type, step)
        if not tensor:
            log.error("No tensor named %s at the step %s", name, step)
            raise DebuggerParamValueError("No tensor named {}".format(name))
        tensor_info = tensor.get_full_info(shape)
        self._update_has_prev_step_field(tensor_info, name, node_type)
        return {'tensor_value': tensor_info}
Exemplo n.º 11
0
    def find_tensor_file(self, pattern, rank_ids=None, iterations=None):
        """
        Find tensor files.

        Args:
            pattern (str): File name pattern.
            rank_ids (Union[None, list[int]]): Filter condition of rank id. Default: None.
            iterations (Union[None, list[int]]): Filter condition of iteration id. Default: None.

        Returns:
            OpPathManager, operator path object.
        """
        op_path = OpPathManager(pattern)
        if rank_ids is None:
            rank_dirs = self.data_loader.rank_dirs
        else:
            rank_dirs = []
            for rank_id in rank_ids:
                if not isinstance(rank_id, int):
                    raise DebuggerParamValueError(
                        "rank_ids should be list of int.")
                rank_dirs.append(self.data_loader.get_rank_dir(rank_id))

        for rank_dir in rank_dirs:
            op_device_obj = self.find_tensor_file_per_device(
                pattern, rank_dir.rank_id, iterations)
            op_path.add(op_device_obj)
        return op_path
Exemplo n.º 12
0
    def get_tensor_history(self, node_name, graph_name=None, depth=0):
        """
        Get the tensor history of a specified node.

        Args:
            node_name (str): The debug name of the node.
            graph_name (str): The graph_name. Default: None.
            depth (int): The number of layers the user
                wants to trace. Default is 0.

        Returns:
            dict, basic tensor history, only including tensor name and tensor type and node type.
        """
        graph_name, node_name = self._parse_node_name(node_name, graph_name)
        graph = self._get_graph(graph_name=graph_name, node_name=node_name)
        # validate node type, scope node has no tensor history
        node_type = graph.get_node_type(node_name)
        if is_scope_type(node_type):
            log.error("Scope type node has no tensor history.")
            raise DebuggerParamValueError("Invalid leaf node name.")
        # get tensor history
        tensor_history, cur_outputs_nums = graph.get_tensor_history(
            node_name, depth)
        # add the tensor type for tensor history
        self._update_tensor_history(tensor_history[0:cur_outputs_nums],
                                    'output', graph_name)
        self._update_tensor_history(tensor_history[cur_outputs_nums:], 'input',
                                    graph_name)
        log.debug("Get %d tensors in tensor history for node <%s>.",
                  len(tensor_history), node_name)
        return {'tensor_history': tensor_history}
Exemplo n.º 13
0
 def _validate_leaf_name(self, node_name):
     """Validate if the node is a leaf node."""
     graph_stream = self.cache_store.get_stream_handler(Streams.GRAPH)
     node_type = graph_stream.get_node_type(node_name)
     if is_scope_type(node_type):
         log.error("Scope type node has no tensor history.")
         raise DebuggerParamValueError("Invalid leaf node name.")
Exemplo n.º 14
0
    def get(self, filter_condition=None):
        """
        Get full tensor value.

        Args:
            filter_condition (dict): Filter condition.

                - name (str): The full name of tensor.
                - node_type (str): The type of the node.
                - prev (bool): Whether to get previous tensor.

        Returns:
            dict, the tensor_value and whether need to send view_command.
        """
        name = filter_condition.get('name')
        node_type = filter_condition.get('node_type')
        shape = filter_condition.get('shape')
        if filter_condition.get('prev'):
            step = self.prev_step
        else:
            step = self.cur_step
        tensor = self._get_tensor(name, node_type, step)
        if not tensor:
            log.error("No tensor named %s at the step %s", name, step)
            raise DebuggerParamValueError("No tensor named {}".format(name))
        tensor_info = tensor.get_full_info(shape)
        self._update_has_prev_step_field(tensor_info, name, node_type,
                                         self.cur_step)
        res = {'tensor_value': tensor_info, 'view_cmd': False}
        if tensor.status == TensorStatusEnum.UNCACHED.value:
            self._add_hold_value_tensors(name, step)
            res['view_cmd'] = True
        return res
    def reset_training_step(self, step_id):
        """
        Reset the training step.

        Args:
            step_id (int): The target step_id.

        Returns:
            dict, metadata info.
        """
        metadata_stream = self._metadata_stream
        if metadata_stream.debugger_type == DebuggerServerMode.ONLINE.value:
            log.error(
                "'step_id' can not be changed manually in online debugger.")
            return metadata_stream.get(['state', 'enable_recheck', 'step'])
        if step_id > metadata_stream.max_step_num:
            log.error("Invalid step_id, step_id should be less than %d.",
                      metadata_stream.max_step_num)
            raise DebuggerParamValueError("Invalid step_id.")
        metadata_stream.state = ServerStatus.SENDING.value
        metadata_stream.step = step_id
        self._cache_store.get_stream_handler(Streams.TENSOR).set_step(step_id)
        self._cache_store.clean_data()
        self._cache_store.clean_command()
        metadata_stream.enable_recheck = True
        metadata_stream.state = ServerStatus.WAITING.value
        self._cache_store.get_stream_handler(Streams.WATCHPOINT).set_outdated()
        log.debug("Send the Change_training_step CMD.")
        return metadata_stream.get(['state', 'enable_recheck', 'step'])
Exemplo n.º 16
0
 def parse_shape(shape):
     """Parse shape."""
     if shape is None:
         return shape
     if not (isinstance(shape, str) and shape.startswith('[')
             and shape.endswith(']')):
         log.error("Invalid shape. Received: %s", shape)
         raise DebuggerParamValueError("Invalid shape.")
     shape = shape.strip('[]')
     if shape.count(':') > 2:
         log.error("Invalid shape. At most two dimensions are specified.")
         raise DebuggerParamValueError("Invalid shape.")
     parsed_shape = tuple(
         str_to_slice_or_int(dim)
         for dim in shape.split(',')) if shape else tuple()
     log.info("Parsed shape: %s from %s", parsed_shape, shape)
     return parsed_shape
Exemplo n.º 17
0
 def remove(self, sub_name):
     """Remove sub node."""
     try:
         self._children.pop(sub_name)
     except KeyError as err:
         log.error("Failed to find node %s. %s", sub_name, err)
         raise DebuggerParamValueError(
             "Failed to find node {}".format(sub_name))
Exemplo n.º 18
0
def validate_param_type(condition_id, condition_param, param):
    """
    Validate parameter type.

    Args:
        condition_id (str): Condition id. Should be in WATCHPOINT_CONDITION_MAPPING.
        condition_param (ConditionParameter): Condition Parameter object.
        param (dict): Condition parameter value.
    """
    if condition_param.type.name in (ValueTypeEnum.FLOAT64.name, ValueTypeEnum.INT64.name) \
            and not isinstance(param.get("value"), (float, int)):
        log.error("Number param should be given for condition: %s",
                  condition_id)
        raise DebuggerParamValueError("Number param should be given.")
    if condition_param.type.name == ValueTypeEnum.BOOL.name \
            and not isinstance(param.get("value"), bool):
        log.error("Bool param should be given for condition: %s", condition_id)
        raise DebuggerParamValueError("Bool param should be given.")
Exemplo n.º 19
0
    def get_watchpoint_by_id(self, watchpoint_id):
        """Get watchpoint by watchpoint id."""
        watchpoint = self._watchpoints.get(watchpoint_id)
        if not watchpoint:
            log.error("Invalid watchpoint id %d", watchpoint_id)
            raise DebuggerParamValueError(
                "Invalid watchpoint id {}".format(watchpoint_id))

        return watchpoint
Exemplo n.º 20
0
    def tensor_comparisons(self, name, shape, detail='data', tolerance='0'):
        """
        Get tensor comparisons data for given name, detail, shape and tolerance.

        Args:
            name (str): The name of tensor for ui.
            detail (str): Specify which data to query. Current available value is 'data' which means
                          concrete tensor data. Histogram or unique count can be supported in the future.
            shape (str): Specify concrete dimensions of shape.
            tolerance (str): Specify tolerance of difference between current step tensor and previous
                             step tensor. Default value is 0.

        Raises:
            DebuggerParamValueError, If node type is not parameter or value of detail is not support.
            DebuggerCompareTensorError, If MindSpore is not in waiting state.
        Returns:
            dict, the retrieved data.
        """
        if self.cache_store.get_stream_handler(
                Streams.METADATA).state != ServerStatus.WAITING.value:
            log.error(
                "Failed to compare tensors as the MindSpore is not in waiting state."
            )
            raise DebuggerCompareTensorError(
                "Failed to compare tensors as the MindSpore is not in waiting state."
            )
        self.validate_tensor_param(name, detail)
        parsed_shape = self.parse_shape(shape)
        node_type, tensor_name = self._get_tensor_name_and_type_by_ui_name(
            name)
        tolerance = to_float(tolerance, 'tolerance')
        tensor_stream = self.cache_store.get_stream_handler(Streams.TENSOR)
        if detail == 'data':
            if node_type == NodeTypeEnum.PARAMETER.value:
                reply = tensor_stream.get_tensors_diff(tensor_name,
                                                       parsed_shape, tolerance)
            else:
                raise DebuggerParamValueError(
                    "The node type must be parameter, but got {}.".format(
                        node_type))
        else:
            raise DebuggerParamValueError(
                "The value of detail: {} is not support.".format(detail))
        return reply
Exemplo n.º 21
0
 def validate_watchpoint_id(self, watch_point_id):
     """Validate watchpoint id."""
     if not isinstance(watch_point_id, int):
         log.error(
             "Invalid watchpoint id %s. The watch point id should be int.",
             watch_point_id)
         raise DebuggerParamTypeError("Watchpoint id should be int type.")
     if watch_point_id and watch_point_id not in self._watchpoints:
         log.error("Invalid watchpoint id: %d.", watch_point_id)
         raise DebuggerParamValueError(
             "Invalid watchpoint id: {}".format(watch_point_id))
Exemplo n.º 22
0
    def get_node_by_bfs_order(self, node_name=None, ascend=True):
        """
        Traverse the graph in order of breath-first search by given node.

        Args:
            node_name (str): The name of current chosen leaf node.
            ascend (bool): If True, traverse the input nodes;
                If False, traverse the output nodes. Default is True.

        Returns:
            Union[None, dict], the next node object in dict type or None.
        """
        self._graph_exists()
        bfs_order = self.bfs_order
        length = len(bfs_order)

        if not bfs_order:
            log.error('Cannot get the BFS order of the graph!')
            msg = 'Cannot get the BFS order of the graph!'
            raise DebuggerParamValueError(msg)

        if node_name is None:
            if ascend is False:
                next_node = None
            else:
                next_node = bfs_order[0]
        else:
            try:
                index = bfs_order.index(node_name)
                log.debug("The index of the node in BFS list is: %d", index)
            except ValueError as err:
                log.error(
                    'Cannot find the node: %s. Please check '
                    'the node name: %s', node_name, err)
                msg = f'Cannot find the node: {node_name}. ' \
                      f'Please check the node name {err}.'
                raise DebuggerParamValueError(msg)

            next_node = self.get_next_node_in_bfs(index, length, ascend)

        return next_node
Exemplo n.º 23
0
 def _validate_node_type(self, node_name):
     """Check the node type in node control."""
     if not node_name:
         return
     node_type = self.cache_store.get_stream_handler(
         Streams.GRAPH).get_node_type(node_name)
     unsupported_types = [item.value for item in list(NodeTypeEnum)]
     if node_type in unsupported_types:
         log.error("Invalid node type. %s", node_name)
         raise DebuggerParamValueError(
             f"The type of node {node_name} is unsupported for "
             "continue to command.")
Exemplo n.º 24
0
 def load(self, tensor_name, graph_name, prev, node_type, tensor=None):
     """Load the tensor."""
     self.download_mgr.check_status()
     step = self._cur_step
     if prev:
         step -= 1
     tensor = self._get_tensor(tensor_name, node_type,
                               step) if tensor is None else tensor
     if not tensor or tensor.status == TensorStatusEnum.EMPTY.value:
         log.error("No tensor named %s at the step %s", tensor_name, step)
         raise DebuggerParamValueError(
             "No tensor named {}".format(tensor_name))
     if tensor.download_size > MAX_CACHE_SPACE:
         log.error(
             "Tensor named %s at the step %s is too large to download.",
             tensor_name, step)
         raise DebuggerParamValueError(
             "Tensor named {} at the step {} is too large to download.".
             format(tensor_name, step))
     if tensor.status == TensorStatusEnum.CACHED.value:
         temp_dir = tempfile.TemporaryDirectory(
             dir=self.download_mgr.temp_base_dir)
         os.chmod(temp_dir.name, DIR_MODE)
         node_name, slot = tensor_name.rsplit(':', 1)
         _, node_name = node_name.rsplit('/', 1)
         file_name = "{}.{}.0.0.{}.output.{}.NONE.npy".format(
             node_type, node_name, round(time.time() * 100), slot)
         file_path = os.path.join(temp_dir.name, file_name)
         np.save(file_path, tensor.value)
         os.chmod(file_path, FILE_MODE)
         tensor_info = {
             "tensor_name": tensor_name,
             "graph_name": graph_name,
             "step": step,
             "rank_id": self._rank_id
         }
         self.download_mgr.add(file_name, file_path, temp_dir,
                               **tensor_info)
         return {'in_memory': True}
     return {'in_memory': False}
Exemplo n.º 25
0
    def tensor_comparisons(self,
                           name,
                           shape,
                           detail='data',
                           tolerance='0',
                           rank_id=0,
                           graph_name=None):
        """
        Get tensor comparisons data for given name, detail, shape and tolerance.

        Args:
            name (str): The name of tensor for ui.
            shape (str): Specify concrete dimensions of shape.
            detail (str): Specify which data to query. Current available value is 'data' which means
                          concrete tensor data. Histogram or unique count can be supported in the future.
            rank_id (int): The id of rank. Default: 0.
            tolerance (str): Specify tolerance of difference between current step tensor and previous
                             step tensor. Default value is 0.
            graph_name (str): The graph name. Default: None.

        Returns:
            dict, the retrieved data.
        """
        if self.cache_store.get_stream_handler(
                Streams.METADATA).state != ServerStatus.WAITING.value:
            log.error(
                "Failed to compare tensors as the MindSpore is not in waiting state."
            )
            raise DebuggerCompareTensorError(
                "Failed to compare tensors as the MindSpore is not in waiting state."
            )
        self.validate_tensor_param(name, detail)
        # Limit to query max two dimensions for tensor in table view.
        parsed_shape = TensorUtils.parse_shape(shape,
                                               limit=MAX_DIMENSIONS_FOR_TENSOR)
        node_type, tensor_name, graph_name = self._get_tensor_name_and_type_by_ui_name(
            name, graph_name, rank_id)
        tolerance = to_float(tolerance, 'tolerance')
        tensor_stream = self.cache_store.get_stream_handler(
            Streams.TENSOR).get_tensor_handler_by_rank_id(rank_id)
        cur_step = self.cache_store.get_stream_handler(Streams.METADATA).step
        if node_type == NodeTypeEnum.PARAMETER.value:
            reply = tensor_stream.get_tensors_diff(tensor_name, parsed_shape,
                                                   tolerance, cur_step)
        else:
            raise DebuggerParamValueError(
                "The node type must be parameter, but got {}.".format(
                    node_type))
        if reply.pop('view_cmd', False):
            self._send_view_cmd(name, graph_name, rank_id, tensor_name,
                                node_type)
        return reply
Exemplo n.º 26
0
    def _get_by_name(self, group_condition):
        """Return the list of watchpoint hits by the group condition."""
        limit = group_condition.get('limit')
        if not isinstance(limit, int) or limit == 0:
            log.error("Param limit is 0 or not a integer")
            raise DebuggerParamValueError("Param limit is 0 or not a integer")

        index = self._multi_graph_hits.get((group_condition.get('graph_name'),
                                            group_condition.get('node_name')))
        if index is not None:
            group_condition['offset'] = index // limit
            return self._get_by_offset(group_condition)

        return {}
Exemplo n.º 27
0
def validate_watch_condition(watch_condition):
    """Validate watch condition."""
    if not isinstance(watch_condition, dict):
        log.error("<watch_condition> should be dict. %s received.",
                  watch_condition)
        raise DebuggerParamTypeError("<watch_condition> should be dict.")
    # validate condition
    condition = watch_condition.get('condition')
    if condition not in WATCHPOINT_CONDITION_MAPPING.keys():
        log.error("Invalid watch condition. Acceptable values are <%s>.",
                  str(WATCHPOINT_CONDITION_MAPPING.keys()))
        raise DebuggerParamValueError("Invalid watch condition value.")
    # validate param
    validate_watch_condition_params(watch_condition)