Exemplo n.º 1
0
def _common_init(config):
    if to_bool(config.get(
            "EnableAutoNumpySharedMemPickling",
            False)) and not task_system.SharedMemNumpyConfig["enabled"]:
        task_system.SharedMemNumpyConfig["enabled"] = True
        print(
            "SprintExternInterface[pid %i] EnableAutoNumpySharedMemPickling = True"
            % (os.getpid(), ))
Exemplo n.º 2
0
def init_better_exchook():
    """
  Installs our own ``sys.excepthook``, which uses :mod:`better_exchook`,
  but adds some special handling for the main thread.
  """
    from returnn.util.better_exchook import better_exchook

    def excepthook(exc_type, exc_obj, exc_tb):
        """
    :param exc_type:
    :param exc_obj:
    :param exc_tb:
    """
        # noinspection PyBroadException
        try:
            # noinspection PyUnresolvedReferences,PyProtectedMember
            is_main_thread = isinstance(threading.currentThread(),
                                        threading._MainThread)
        except Exception:  # Can happen at a very late state while quitting.
            if exc_type is KeyboardInterrupt:
                return
        else:
            if is_main_thread:
                if exc_type is KeyboardInterrupt and getattr(
                        sys, "exited", False):
                    # Got SIGINT twice. Can happen.
                    return
                # An unhandled exception in the main thread. This means that we are going to quit now.
                sys.exited = True
        print("Unhandled exception %s in thread %s, proc %i." %
              (exc_type, threading.currentThread(), os.getpid()))
        if exc_type is KeyboardInterrupt:
            return

        # noinspection PyUnresolvedReferences,PyProtectedMember
        if isinstance(threading.currentThread(), threading._MainThread):
            main_thread_id = thread.get_ident()
            if not isinstance(exc_type, Exception):
                # We are the main thread and we got an exit-exception. This is likely fatal.
                # This usually means an exit. (We ignore non-daemon threads and procs here.)
                # Print the stack of all other threads.
                dump_all_thread_tracebacks(exclude_thread_ids={main_thread_id})

        better_exchook(exc_type, exc_obj, exc_tb, file=sys.stdout)

    sys.excepthook = excepthook

    from returnn.util.basic import to_bool
    if os.environ.get("DEBUG_WARN_WITH_TRACEBACK") and to_bool(
            os.environ.get("DEBUG_WARN_WITH_TRACEBACK")):
        setup_warn_with_traceback()
Exemplo n.º 3
0
 def bool_or_other(self, key, default, index=0):
     """
 :param str key:
 :param T default:
 :param int index:
 :return: if we have typed value, just as-is. otherwise try to convert to bool. or default if not there.
 :rtype: bool|T|object
 """
     if key in self.typed_dict:
         return self.typed_value(key, default=default, index=index)
     if key not in self.dict:
         return default
     v = str(self.value(key, None, index))
     if not v:
         return default
     from returnn.util.basic import to_bool
     try:
         return to_bool(v)
     except ValueError:
         return v
Exemplo n.º 4
0
 def bool(self, key, default, index=0):
     """
 Parses the value of the given key as boolean, returning default if not existent
 :type key: str
 :type default: T
 :type index: bool
 :rtype: bool | T
 """
     if key in self.typed_dict:
         value = self.typed_value(key, default=default, index=index)
         if isinstance(value, int):
             value = bool(value)
         if value is not None:
             assert isinstance(value, bool)
         return value
     if key not in self.dict:
         return default
     v = str(self.value(key, None, index))
     if not v:
         return default
     from returnn.util.basic import to_bool
     return to_bool(v)
Exemplo n.º 5
0
def init_faulthandler(sigusr1_chain=False):
    """
  Maybe installs signal handlers, SIGUSR1 and SIGUSR2 and others.
  If no signals handlers are installed yet for SIGUSR1/2, we try to install our own Python handler.
  This also tries to install the handler from the fauldhandler module,
  esp for SIGSEGV and others.

  :param bool sigusr1_chain: whether the default SIGUSR1 handler should also be called.
  """
    from returnn.util.basic import to_bool
    # Enable libSigSegfault first, so that we can have both,
    # because faulthandler will also call the original sig handler.
    if os.environ.get("DEBUG_SIGNAL_HANDLER") and to_bool(
            os.environ.get("DEBUG_SIGNAL_HANDLER")):
        install_lib_sig_segfault()
        install_native_signal_handler()
    if sys.platform != 'win32':
        # In case that sigusr1_chain, we expect that there is already some handler
        # for SIGUSR1, and then this will not overwrite this handler.
        if install_signal_handler_if_default(signal.SIGUSR1):
            # There is already some handler or we installed our own handler now,
            # so in any case, it's safe that we chain then handler.
            sigusr1_chain = True
        # Why not also SIGUSR2... SGE can also send this signal.
        install_signal_handler_if_default(signal.SIGUSR2)
    try:
        import faulthandler
    except ImportError as e:
        print("faulthandler import error. %s" % e)
    else:
        # Only enable if not yet enabled -- otherwise, leave it in its current state.
        if not faulthandler.is_enabled():
            faulthandler.enable()
            if sys.platform != 'win32':
                faulthandler.register(signal.SIGUSR1,
                                      all_threads=True,
                                      chain=sigusr1_chain)
Exemplo n.º 6
0
def init(name,
         reference,
         config,
         sprint_unit=None,
         version_number=None,
         callback=None,
         **kwargs):
    """
  This will be called by Sprint PythonControl.
  But we also call it ourselves e.g. in getSegmentList() and SprintNnPythonLayer.
  In this specific module, we expect that there is "c2p_fd" and "p2c_fd" in the config string
  to communicate with the parent process, which is usually handled by SprintErrorSignals.

  :param str name: this specifies the caller. e.g. "Sprint.PythonControl"
  :param reference: this is any object to identify the specific instance of the caller, if there are multiple.
  :param str config: this will be passed over from Sprint. you can configure that via --*.pymod-config.
  :param str sprint_unit: if this is called by Sprint PythonControl, this will specify which specific part
    of Sprint is using this PythonControl, because there can be multiple parts.
    E.g. there is "FeedForwardTrainer", "SegmentwiseNnTrainer" and "NnTrainer.pythonControl".
  :param int|None version_number: if this is called by Sprint PythonControl, this will set the version number.
    only newer Sprint versions will set this.
  :param function|None callback: if this is called by Sprint PythonControl, this might provide a callback.
    Only newer Sprint versions will provide this to init().
    This callback can be used for many different actions.
    It's supposed to be called like callback(action, **other_args), where action is a string.
    See Sprint PythonControl code about the possible actions and arguments.
  :param kwargs: all remaining args are specific for each caller.
  """

    config = config.split(",")
    config = {
        key: value
        for (key, value) in [s.split(":", 1) for s in config if s]
    }

    global Quiet
    if to_bool(config.get("quiet", False)):
        Quiet = True

    print((
        "RETURNN SprintControl[pid %i] init: "
        "name=%r, sprint_unit=%r, version_number=%r, callback=%r, ref=%r, config=%r, kwargs=%r"
    ) % (os.getpid(), name, sprint_unit, version_number, callback, reference,
         config, kwargs))
    InitTypes.add(name)

    global Verbose
    if to_bool(config.get("verbose", False)):
        Verbose = True

    if to_bool(config.get(
            "EnableAutoNumpySharedMemPickling",
            False)) and not task_system.SharedMemNumpyConfig["enabled"]:
        task_system.SharedMemNumpyConfig["enabled"] = True
        print(
            "RETURNN SprintControl[pid %i] EnableAutoNumpySharedMemPickling = True"
            % (os.getpid(), ))

    # Remaining Sprint interface is in this PythonControl instance.
    return PythonControl.create(c2p_fd=int(config["c2p_fd"]),
                                p2c_fd=int(config["p2c_fd"]),
                                name=name,
                                reference=reference,
                                config=config,
                                sprint_unit=sprint_unit,
                                version_number=version_number,
                                min_version_number=int(
                                    config["minPythonControlVersion"]),
                                callback=callback,
                                **kwargs)
Exemplo n.º 7
0
def init_python_trainer(inputDim, outputDim, config, targetMode, **kwargs):
    """
  Called by Sprint when it initializes the PythonTrainer.
  Set trainer = python-trainer in Sprint to enable.
  Note that Sprint will call this, i.e. the trainer init lazily quite late,
  only once it sees the first data.

  :type inputDim: int
  :type outputDim: int
  :param str config: config string, passed by Sprint. assumed to be ","-separated
  :param str targetMode: "target-alignment" or "criterion-by-sprint" or so
  :return: not expected to return anything
  :rtype: None
  """
    print("SprintInterface[pid %i] init()" % (os.getpid(), ))
    print("inputDim:", inputDim)
    print("outputDim:", outputDim)
    print("config:", config)
    print("targetMode:", targetMode)
    print("other args:", kwargs)
    global InputDim, OutputDim, MaxSegmentLength
    InputDim = inputDim
    OutputDim = outputDim

    MaxSegmentLength = kwargs.get('maxSegmentLength', MaxSegmentLength)

    config = config.split(",")
    config = {
        key: value
        for (key, value) in [s.split(":", 1) for s in config if s]
    }

    if to_bool(config.get(
            "EnableAutoNumpySharedMemPickling",
            False)) and not task_system.SharedMemNumpyConfig["enabled"]:
        task_system.SharedMemNumpyConfig["enabled"] = True
        print(
            "SprintInterface[pid %i] EnableAutoNumpySharedMemPickling = True" %
            (os.getpid(), ))

    epoch = config.get("epoch", None)
    if epoch is not None:
        epoch = int(epoch)
        assert epoch >= 1

    configfile = config.get("configfile", None)

    global Task
    action = config["action"]
    Task = action
    if action == "train":
        pass
    elif action == "forward":
        assert targetMode in ["criterion-by-sprint", "forward-only"]
        targetMode = "forward"
    elif action == "nop":
        targetMode = None
    else:
        assert False, "unknown action: %r" % action

    _init_base(target_mode=targetMode,
               configfile=configfile,
               epoch=epoch,
               sprint_opts=config)
    sprintDataset.set_dimensions(inputDim, outputDim)
    sprintDataset.initialize()

    if Task == "train":
        _start_train_thread(epoch)
    elif Task == "forward":
        _prepare_forwarding()

    global startTime
    startTime = time.time()