def recheck(self):
        """
        Recheck all watchpoints.

        Returns:
            dict, metadata info.
        """
        metadata_stream = self._metadata_stream
        # validate backend status is able to recheck watchpoint
        if not metadata_stream.enable_recheck:
            log.error("Recheck is not available.")
            raise DebuggerRecheckError("Recheck is not available.")
        metadata_stream.state = ServerStatus.SENDING.value
        metadata_stream.enable_recheck = False
        # send updated watchpoint and recheck command
        try:
            event = self._construct_run_event({'level': 'recheck'})
            self._send_watchpoints()
            self._cache_store.put_command(event)
        except MindInsightException as err:
            log.error("Failed to send recheck event.")
            log.exception(err)
            metadata_stream.state = ServerStatus.WAITING.value
            metadata_stream.enable_recheck = True
            raise DebuggerContinueError("Failed to send recheck command.")
        else:
            log.debug("Send the recheck to command queue.")
        return metadata_stream.get(['state', 'enable_recheck'])
 def run(self):
     """Start the debugger offline server."""
     log.info("Initialize Offline Debugger Server for dbg_dir: %s",
              self._context.dbg_dir)
     self._offline_server_manager.initialize()
     log.info("Start Offline Debugger Server for dbg_dir: %s",
              self._context.dbg_dir)
     self._running.set()
     try_count = 0
     while self._running.is_set(
     ) and try_count < self._MAX_TRY_EXCEPT_COUNT:
         try:
             self._offline_server_manager.wait_for_termination()
             if not self._offline_server_manager.is_runnable():
                 break
         except MindInsightException as err:
             log.exception(err)
             log.warning(
                 "Error happens during listening on user commands. Restart listening again."
             )
         finally:
             try_count += 1
     # protect server from too much failure commands.
     if try_count == self._MAX_TRY_EXCEPT_COUNT:
         self._cache_store.clean()
         metadata = self._cache_store.get_stream_handler(
             Streams.METADATA).get()
         self._cache_store.put_data(metadata)
         log.warning("Exception exceed %d times, stop server.", try_count)
Exemplo n.º 3
0
    def continue_training(self, params):
        """
        Send RunCMD to MindSpore.

        Args:
            params (dict): The control params.

        Returns:
            dict, metadata info.
        """
        metadata_stream = self._metadata_stream
        if metadata_stream.state != ServerStatus.WAITING.value:
            log.error("MindSpore is not ready to run. Current state is: %s",
                      metadata_stream.state)
            raise DebuggerContinueError(
                "MindSpore is not ready to run or is running currently.")
        metadata_stream.state = ServerStatus.RUNNING.value
        try:
            self._validate_continue_params(params)
            event = self._construct_run_event(params)
            self._send_watchpoints()
            self._cache_store.put_command(event)
        except MindInsightException as err:
            log.error("Failed to send run event.")
            log.exception(err)
            metadata_stream.state = ServerStatus.WAITING.value
            raise DebuggerContinueError("Failed to send run command.")
        else:
            metadata_stream.enable_recheck = False
            log.debug("Send the RunCMD to command queue.")
        return metadata_stream.get(['state', 'enable_recheck'])
    def continue_training(self, params):
        """
        Send RunCMD to MindSpore.

        Args:
            params (dict): The control params.

        Returns:
            dict, metadata info.
        """
        metadata_stream = self._metadata_stream
        if metadata_stream.state != ServerStatus.WAITING.value:
            log.error("MindSpore is not ready to run. Current state is: %s",
                      metadata_stream.state)
            raise DebuggerContinueError(
                "MindSpore is not ready to run or is running currently.")
        metadata_stream.state = ServerStatus.SENDING.value
        try:
            self._validate_continue_params(params)
            event = self._construct_run_event(params)
            # whether need to send recheck before continue, especially for initialization watchpoint
            recheck_flag = bool(self._metadata_stream.step == 0
                                and self._watchpoint_stream.is_recheckable())
            self._send_watchpoints()
            if recheck_flag:
                self._cache_store.put_command(
                    self._construct_run_event({'level': 'recheck'}))
                log.info(
                    "Send recheck command for initialization watchpoints before continue command."
                )
            self._cache_store.put_command(event)
        except MindInsightException as err:
            log.error("Failed to send run event.")
            log.exception(err)
            metadata_stream.state = ServerStatus.WAITING.value
            raise DebuggerContinueError("Failed to send run command.")
        else:
            metadata_stream.enable_recheck = False
            log.debug("Send the RunCMD to command queue.")
        return metadata_stream.get(['state', 'enable_recheck'])
Exemplo n.º 5
0
    def get_tensor_value_by_shape(self, shape=None):
        """
        Get tensor value by shape.

        Args:
            shape (tuple): The specified shape.

        Returns:
            Union[None, str, numpy.ndarray], the value of parsed tensor.
        """
        if self._value is None:
            log.warning("%s has no value yet.", self.name)
            return None
        if shape is None or not isinstance(shape, tuple):
            log.info("Get the whole tensor value with shape is %s", shape)
            return self._value
        if len(shape) != len(self.shape):
            log.error("Invalid shape. Received: %s, tensor shape: %s", shape,
                      self.shape)
            raise DebuggerParamValueError("Invalid shape. Shape unmatched.")
        try:
            value = self._value[shape]
        except IndexError as err:
            log.error("Invalid shape. Received: %s, tensor shape: %s", shape,
                      self.shape)
            log.exception(err)
            raise DebuggerParamValueError("Invalid shape. Shape unmatched.")
        if isinstance(value, np.ndarray):
            if value.size > self.max_number_data_show_on_ui:
                log.info(
                    "The tensor size is %d, which is too large to show on UI.",
                    value.size)
                value = "Too large to show."
        else:
            value = np.asarray(value)
        return value
Exemplo n.º 6
0
 def record_log(*args, **kwargs):
     try:
         return func(*args, **kwargs)
     except Exception as err:
         log.exception(err)
         raise err
Exemplo n.º 7
0
 def record_log(*args, **kwargs):
     try:
         return func(*args, **kwargs)
     except Exception as err:
         log.exception(err)
         raise DebuggerServerRunningError(str(err))