Пример #1
0
def init(config_filename, log_verbosity):
    """
  :param str config_filename: filename to config-file
  :param int log_verbosity:
  """
    rnn.initBetterExchook()
    rnn.initThreadJoinHack()
    if config_filename:
        print("Using config file %r." % config_filename)
        assert os.path.exists(config_filename)
    rnn.initConfig(configFilename=config_filename, commandLineOptions=[])
    global config
    config = rnn.config
    config.set("log", None)
    config.set("log_verbosity", log_verbosity)
    config.set("use_tensorflow", True)
    rnn.initLog()
    print("Returnn compile-native-op starting up.", file=log.v1)
    rnn.returnnGreeting()
    rnn.initBackendEngine()
    assert Util.BackendEngine.is_tensorflow_selected(
    ), "this is only for TensorFlow"
    rnn.initFaulthandler()
    rnn.initConfigJsonNetwork()
    if 'network' in config.typed_dict:
        print("Loading network")
        from TFNetwork import TFNetwork
        network = TFNetwork(name="root",
                            config=config,
                            rnd_seed=1,
                            train_flag=False,
                            eval_flag=True,
                            search_flag=False)
        network.construct_from_dict(config.typed_dict["network"])
Пример #2
0
def init(config_str):
    """
  :param str config_str: either filename to config-file, or dict for dataset
  """
    rnn.initBetterExchook()
    rnn.initThreadJoinHack()
    if config_str.startswith("{"):
        print("Using dataset %s." % config_str)
        datasetDict = eval(config_str)
        configFilename = None
    else:
        datasetDict = None
        configFilename = config_str
        print("Using config file %r." % configFilename)
        assert os.path.exists(configFilename)
    rnn.initConfig(configFilename=configFilename, commandLineOptions=[])
    global config
    config = rnn.config
    config.set("log", None)
    if datasetDict:
        config.set("train", datasetDict)
    rnn.initLog()
    print("CRNN dump-dataset starting up.", file=log.v1)
    rnn.initFaulthandler()
    rnn.initConfigJsonNetwork()
    rnn.initData()
    rnn.printTaskProperties()
def init(config_filename, log_verbosity):
    """
  :param str config_filename: filename to config-file
  :param int log_verbosity:
  """
    rnn.initBetterExchook()
    rnn.initThreadJoinHack()
    if config_filename:
        print("Using config file %r." % config_filename)
        assert os.path.exists(config_filename)
    rnn.initConfig(configFilename=config_filename, commandLineOptions=[])
    global config
    config = rnn.config
    config.set("task", "calculate_wer")
    config.set("log", None)
    config.set("log_verbosity", log_verbosity)
    config.set("use_tensorflow", True)
    rnn.initLog()
    print("Returnn calculate-word-error-rate starting up.", file=log.v1)
    rnn.returnnGreeting()
    rnn.initBackendEngine()
    assert Util.BackendEngine.is_tensorflow_selected(
    ), "this is only for TensorFlow"
    rnn.initFaulthandler()
    rnn.initConfigJsonNetwork()
    rnn.printTaskProperties()
Пример #4
0
def demo():
  import better_exchook
  better_exchook.install()
  import rnn
  import sys
  if len(sys.argv) <= 1:
    print("usage: python %s [config] [other options]" % __file__)
    print("example usage: python %s ++pretrain default ++pretrain_construction_algo from_input" % __file__)
  rnn.initConfig(commandLineOptions=sys.argv[1:])
  rnn.config._hack_value_reading_debug()
  rnn.config.update({"log": []})
  rnn.initLog()
  rnn.initBackendEngine()
  if not rnn.config.value("pretrain", ""):
    print("config option 'pretrain' not set, will set it for this demo to 'default'")
    rnn.config.set("pretrain", "default")
  pretrain = pretrainFromConfig(rnn.config)
  print("pretrain: %s" % pretrain)
  num_pretrain_epochs = pretrain.get_train_num_epochs()
  from pprint import pprint
  for epoch in range(1, 1 + num_pretrain_epochs):
    print("epoch %i (of %i) network json:" % (epoch, num_pretrain_epochs))
    net_json = pretrain.get_network_json_for_epoch(epoch)
    pprint(net_json)
  print("done.")
Пример #5
0
def init(config_str, verbosity):
    """
  :param str config_str: either filename to config-file, or dict for dataset
  :param int verbosity:
  """
    rnn.initBetterExchook()
    rnn.initThreadJoinHack()
    datasetDict = None
    configFilename = None
    if config_str.strip().startswith("{"):
        print("Using dataset %s." % config_str)
        datasetDict = eval(config_str.strip())
    elif config_str.endswith(".hdf"):
        datasetDict = {"class": "HDFDataset", "files": [config_str]}
        print("Using dataset %r." % datasetDict)
        assert os.path.exists(config_str)
    else:
        configFilename = config_str
        print("Using config file %r." % configFilename)
        assert os.path.exists(configFilename)
    rnn.initConfig(configFilename=configFilename,
                   default_config={"cache_size": "0"})
    global config
    config = rnn.config
    config.set("log", None)
    config.set("log_verbosity", verbosity)
    if datasetDict:
        config.set("train", datasetDict)
    rnn.initLog()
    print("Returnn dump-dataset starting up.", file=log.v2)
    rnn.returnnGreeting()
    rnn.initFaulthandler()
    rnn.initConfigJsonNetwork()
    rnn.initData()
    rnn.printTaskProperties()
Пример #6
0
def init(config_filename, cmd_line_opts, dataset_config_str):
  """
  :param str config_filename: global config for CRNN
  :param list[str] cmd_line_opts: options for initConfig method
  :param str dataset_config_str: dataset via init_dataset_via_str()
  """
  rnn.initBetterExchook()
  rnn.initThreadJoinHack()
  if config_filename:
    rnn.initConfig(config_filename, cmd_line_opts)
    rnn.initLog()
  else:
    log.initialize(verbosity=[5])
  print >> log.v3, "CRNN dump-dataset starting up."
  rnn.initFaulthandler()
  rnn.initConfigJsonNetwork()
  if config_filename:
    rnn.initData()
    rnn.printTaskProperties()
    assert isinstance(rnn.train_data, Dataset)
    return rnn.train_data
  else:
    assert dataset_config_str
    dataset = init_dataset_via_str(dataset_config_str)
    print >> log.v3, "Source dataset:", dataset.len_info()
    return dataset
Пример #7
0
def test_rnn_initConfig_py_global_var():
  import rnn
  import tempfile
  with tempfile.NamedTemporaryFile(mode="w", suffix=".config", prefix="test_rnn_initConfig") as cfgfile:
    cfgfile.write("""#!rnn.py
task = config.value("task", "train")

test_value = 42

def test_func():
  return task

    """)
    cfgfile.flush()
    rnn.initConfig(commandLineOptions=[cfgfile.name, "--task", "search"])

  assert isinstance(rnn.config, Config)
  pprint(rnn.config.dict)
  pprint(rnn.config.typed_dict)
  assert rnn.config.has("task")
  assert rnn.config.has("test_value")
  assert rnn.config.has("test_func")
  assert_equal(rnn.config.value("task", None), "search")
  assert rnn.config.is_typed("test_value")
  assert_equal(rnn.config.typed_value("test_value"), 42)
  assert rnn.config.is_typed("test_func")
  # So far it's fine.
  # Now something a bit strange.
  # Earlier, this failed, because the command-line overwrote this.
  assert rnn.config.is_typed("task")
  test_func = rnn.config.typed_dict["test_func"]
  assert callable(test_func)
  assert_equal(test_func(), "search")
Пример #8
0
def init(config_filename, cmd_line_opts, dataset_config_str):
    """
  :param str config_filename: global config for CRNN
  :param list[str] cmd_line_opts: options for initConfig method
  :param str dataset_config_str: dataset via init_dataset_via_str()
  """
    rnn.initBetterExchook()
    rnn.initThreadJoinHack()
    if config_filename:
        rnn.initConfig(config_filename, cmd_line_opts)
        rnn.initLog()
    else:
        log.initialize(verbosity=[5])
    print("Returnn hdf_dump starting up.", file=log.v3)
    rnn.initFaulthandler()
    if config_filename:
        rnn.initData()
        rnn.printTaskProperties()
        assert isinstance(rnn.train_data, Dataset)
        return rnn.train_data
    else:
        assert dataset_config_str
        dataset = init_dataset_via_str(dataset_config_str)
        print("Source dataset:", dataset.len_info(), file=log.v3)
        return dataset
Пример #9
0
def init(configFilename, commandLineOptions):
    rnn.initBetterExchook()
    rnn.initConfig(configFilename, commandLineOptions)
    global config
    config = rnn.config
    config.set("log", [])
    rnn.initLog()
    print("CRNN dump-dataset starting up.", file=log.v3)
    rnn.initConfigJsonNetwork()
Пример #10
0
def init(configFilename, commandLineOptions):
  rnn.initBetterExchook()
  rnn.initConfig(configFilename, commandLineOptions)
  global config
  config = rnn.config
  config.set("log", [])
  rnn.initLog()
  print >> log.v3, "CRNN dump-dataset starting up."
  rnn.initConfigJsonNetwork()
Пример #11
0
def demo():
  import better_exchook
  better_exchook.install()
  import rnn
  import sys
  if len(sys.argv) <= 1:
    print("usage: python %s [config] [other options]" % __file__)
    print("example usage: python %s ++learning_rate_control newbob ++learning_rate_file newbob.data ++learning_rate 0.001" % __file__)
  rnn.initConfig(commandLineOptions=sys.argv[1:])
  rnn.config._hack_value_reading_debug()
  from Pretrain import pretrainFromConfig
  pretrain = pretrainFromConfig(rnn.config)
  first_non_pretrain_epoch = 1
  pretrain_learning_rate = None
  if pretrain:
    first_non_pretrain_epoch = pretrain.get_train_num_epochs() + 1
  log.initialize(verbosity=[5])
  control = loadLearningRateControlFromConfig(rnn.config)
  print("LearningRateControl: %r" % control)
  if not control.epochData:
    print("No epoch data so far.")
    return
  firstEpoch = min(control.epochData.keys())
  if firstEpoch != 1:
    print("Strange, first epoch from epoch data is %i." % firstEpoch)
  print("Error key: %s from %r" % (control.getErrorKey(epoch=firstEpoch), control.epochData[firstEpoch].error))
  if pretrain:
    pretrain_learning_rate = rnn.config.float('pretrain_learning_rate', control.defaultLearningRate)
  maxEpoch = max(control.epochData.keys())
  for epoch in range(1, maxEpoch + 2):  # all epochs [1..maxEpoch+1]
    oldLearningRate = None
    if epoch in control.epochData:
      oldLearningRate = control.epochData[epoch].learningRate
    if epoch < first_non_pretrain_epoch:
      learningRate = pretrain_learning_rate
      s = "Pretrain epoch %i, fixed learning rate: %s (was: %s)" % (epoch, learningRate, oldLearningRate)
    elif first_non_pretrain_epoch > 1 and epoch == first_non_pretrain_epoch:
      learningRate = control.defaultLearningRate
      s = "First epoch after pretrain, epoch %i, fixed learning rate: %s (was %s)" % (epoch, learningRate, oldLearningRate)
    else:
      learningRate = control.calcNewLearnignRateForEpoch(epoch)
      s = "Calculated learning rate for epoch %i: %s (was: %s)" % (epoch, learningRate, oldLearningRate)
    if learningRate < control.minLearningRate:
      learningRate = control.minLearningRate
      s += ", clipped to %s" % learningRate
    s += ", previous relative error: %s" % control.calcRelativeError(epoch - 2, epoch - 1)
    if hasattr(control, "_calcRecentMeanRelativeError"):
      s += ", previous mean relative error: %s" % control._calcRecentMeanRelativeError(epoch)
    print(s)
    # Overwrite new learning rate so that the calculation for further learning rates stays consistent.
    if epoch in control.epochData:
      control.epochData[epoch].learningRate = learningRate
    else:
      control.epochData[epoch] = control.EpochData(learningRate=learningRate)
  print("Finished, last stored epoch was %i." % maxEpoch)
Пример #12
0
def init(configFilename, commandLineOptions):
    rnn.initBetterExchook()
    rnn.initThreadJoinHack()
    rnn.initConfig(configFilename, commandLineOptions)
    global config
    config = rnn.config
    rnn.initLog()
    print >> log.v3, "CRNN demo-dataset starting up"
    rnn.initFaulthandler()
    rnn.initConfigJsonNetwork()
    rnn.initData()
    rnn.printTaskProperties()
Пример #13
0
def init(configFilename, commandLineOptions):
  rnn.initBetterExchook()
  rnn.initThreadJoinHack()
  rnn.initConfig(configFilename, commandLineOptions)
  global config
  config = rnn.config
  rnn.initLog()
  print >> log.v3, "CRNN demo-dataset starting up"
  rnn.initFaulthandler()
  rnn.initConfigJsonNetwork()
  rnn.initData()
  rnn.printTaskProperties()
Пример #14
0
def init(configFilename, commandLineOptions):
    rnn.initBetterExchook()
    rnn.initThreadJoinHack()
    rnn.initConfig(configFilename, commandLineOptions)
    global config
    config = rnn.config
    config.set("log", None)
    rnn.initLog()
    print("CRNN dump-dataset starting up.", file=log.v1)
    rnn.initFaulthandler()
    rnn.initConfigJsonNetwork()
    rnn.initData()
    rnn.printTaskProperties()
Пример #15
0
def init(configFilename=None):
    rnn.initBetterExchook()
    rnn.initThreadJoinHack()
    if configFilename:
        rnn.initConfig(configFilename, commandLineOptions=[])
        rnn.initLog()
    else:
        log.initialize()
    print("CRNN collect-orth-symbols starting up.", file=log.v3)
    rnn.initFaulthandler()
    if configFilename:
        rnn.initConfigJsonNetwork()
        rnn.initData()
        rnn.printTaskProperties()
Пример #16
0
def init(configFilename=None):
  rnn.initBetterExchook()
  rnn.initThreadJoinHack()
  if configFilename:
    rnn.initConfig(configFilename, commandLineOptions=[])
    rnn.initLog()
  else:
    log.initialize()
  print >> log.v3, "CRNN collect-orth-symbols starting up."
  rnn.initFaulthandler()
  if configFilename:
    rnn.initConfigJsonNetwork()
    rnn.initData()
    rnn.printTaskProperties()
Пример #17
0
def demo():
  import better_exchook
  better_exchook.install()
  import rnn
  import sys
  if len(sys.argv) <= 1:
    print("usage: python %s [config] [other options]" % __file__)
    print("example usage: python %s ++learning_rate_control newbob ++learning_rate_file newbob.data ++learning_rate 0.001" % __file__)
  rnn.initConfig(commandLineOptions=sys.argv[1:])
  from Pretrain import pretrainFromConfig
  pretrain = pretrainFromConfig(rnn.config)
  first_non_pretrain_epoch = 1
  pretrain_learning_rate = None
  if pretrain:
    first_non_pretrain_epoch = pretrain.get_train_num_epochs() + 1
  rnn.config._hack_value_reading_debug()
  log.initialize(verbosity=[5])
  control = loadLearningRateControlFromConfig(rnn.config)
  print("LearningRateControl: %r" % control)
  if not control.epochData:
    print("No epoch data so far.")
    return
  if pretrain:
    pretrain_learning_rate = rnn.config.float('pretrain_learning_rate', control.defaultLearningRate)
  maxEpoch = max(control.epochData.keys())
  for epoch in range(1, maxEpoch + 2):  # all epochs [1..maxEpoch+1]
    oldLearningRate = None
    if epoch in control.epochData:
      oldLearningRate = control.epochData[epoch].learningRate
    if epoch < first_non_pretrain_epoch:
      learningRate = pretrain_learning_rate
      s = "Pretrain epoch %i, fixed learning rate: %s (was: %s)" % (epoch, learningRate, oldLearningRate)
    elif first_non_pretrain_epoch > 1 and epoch == first_non_pretrain_epoch:
      learningRate = control.defaultLearningRate
      s = "First epoch after pretrain, epoch %i, fixed learning rate: %s (was %s)" % (epoch, learningRate, oldLearningRate)
    else:
      learningRate = control.calcLearningRateForEpoch(epoch)
      s = "Calculated learning rate for epoch %i: %s (was: %s)" % (epoch, learningRate, oldLearningRate)
    if learningRate < control.minLearningRate:
      learningRate = control.minLearningRate
      s += ", clipped to %s" % learningRate
    s += ", previous relative error: %s" % control.calcRelativeError(epoch - 2, epoch - 1)
    print(s)
    # Overwrite new learning rate so that the calculation for further learning rates stays consistent.
    if epoch in control.epochData:
      control.epochData[epoch].learningRate = learningRate
    else:
      control.epochData[epoch] = control.EpochData(learningRate=learningRate)
  print("Finished, last stored epoch was %i." % maxEpoch)
Пример #18
0
def initBase(configfile=None, targetMode=None, epoch=None):
  """
  :type configfile: str | None
  """

  global isInitialized
  isInitialized = True
  # Run through in any case. Maybe just to set targetMode.

  global config
  if not config:
    if configfile is None:
      configfile = DefaultSprintCrnnConfig
    assert os.path.exists(configfile)

    rnn.initThreadJoinHack()
    rnn.initConfig(configfile, [])
    config = rnn.config
    rnn.initLog()
    rnn.initConfigJsonNetwork()

  if targetMode:
    setTargetMode(targetMode)

  initDataset()

  if targetMode and targetMode == "forward" and epoch:
    model_filename = config.value('model', '')
    fns = [Engine.epoch_model_filename(model_filename, epoch, is_pretrain) for is_pretrain in [False, True]]
    fns_existing = [fn for fn in fns if os.path.exists(fn)]
    assert len(fns_existing) == 1, "%s not found" % fns
    model_epoch_filename = fns_existing[0]
    config.set('load', model_epoch_filename)
    assert Engine.get_epoch_model(config)[1] == model_epoch_filename

  global engine
  if not engine:
    devices = rnn.initDevices()
    rnn.printTaskProperties(devices)
    rnn.initEngine(devices)
    engine = rnn.engine
    assert isinstance(engine, Engine)
Пример #19
0
def init(config_filename, log_verbosity):
    """
  :param str config_filename: filename to config-file
  :param int log_verbosity:
  """
    rnn.initBetterExchook()
    rnn.initThreadJoinHack()
    if config_filename:
        print("Using config file %r." % config_filename)
        assert os.path.exists(config_filename)
    rnn.initConfig(configFilename=config_filename, commandLineOptions=[])
    global config
    config = rnn.config
    config.set("task", "dump")
    config.set("log", None)
    config.set("log_verbosity", log_verbosity)
    rnn.initLog()
    print("Returnn dump-dataset-raw-strings starting up.", file=log.v1)
    rnn.returnnGreeting()
    rnn.initFaulthandler()
Пример #20
0
def test_rnn_initConfig_py_cmd_type():
  import rnn
  import tempfile
  with tempfile.NamedTemporaryFile(mode="w", suffix=".config", prefix="test_rnn_initConfig") as cfgfile:
    cfgfile.write("""#!rnn.py
max_seq_length = {'bpe': 75}

def test_func():
  return max_seq_length

    """)
    cfgfile.flush()
    rnn.initConfig(commandLineOptions=[cfgfile.name, "++max_seq_length", "0"])

  assert isinstance(rnn.config, Config)
  assert rnn.config.has("max_seq_length")
  assert rnn.config.has("test_func")
  assert rnn.config.is_typed("max_seq_length")
  assert rnn.config.is_typed("test_func")
  test_func = rnn.config.typed_dict["test_func"]
  assert callable(test_func)
  assert_equal(test_func(), 0)
Пример #21
0
def demo():
  import better_exchook
  better_exchook.install()
  import rnn
  import sys
  if len(sys.argv) <= 1:
    print("usage: python %s [config] [other options]" % __file__)
    print("example usage: python %s ++pretrain default ++pretrain_construction_algo from_input" % __file__)
  rnn.initConfig(commandLineOptions=sys.argv[1:])
  rnn.config._hack_value_reading_debug()
  if not rnn.config.value("pretrain", ""):
    print("config option 'pretrain' not set, will set it for this demo to 'default'")
    rnn.config.set("pretrain", "default")
  pretrain = pretrainFromConfig(rnn.config)
  print("pretrain: %s" % pretrain)
  num_pretrain_epochs = pretrain.get_train_num_epochs()
  from pprint import pprint
  for epoch in range(1, 1 + num_pretrain_epochs):
    print("epoch %i (of %i) network json:" % (epoch, num_pretrain_epochs))
    net_json = pretrain.get_network_json_for_epoch(epoch)
    pprint(net_json)
  print("done.")
Пример #22
0
def initBase(configfile=None, targetMode=None, epoch=None):
    """
  :param str|None configfile: filename, via init(), this is set
  :param str|None targetMode: "forward" or so. via init(), this is set
  :param int epoch: via init(), this is set
  """

    global isInitialized
    isInitialized = True
    # Run through in any case. Maybe just to set targetMode.

    if not getattr(sys, "argv", None):
        # Set some dummy. Some code might want this (e.g. TensorFlow).
        sys.argv = [__file__]

    global config
    if not config:
        # Some subset of what we do in rnn.init().

        rnn.initBetterExchook()
        rnn.initThreadJoinHack()

        if configfile is None:
            configfile = DefaultSprintCrnnConfig
        assert os.path.exists(configfile)
        rnn.initConfig(configFilename=configfile)
        config = rnn.config

        rnn.initLog()
        rnn.returnnGreeting(configFilename=configfile)
        rnn.initBackendEngine()
        rnn.initFaulthandler(sigusr1_chain=True)
        rnn.initConfigJsonNetwork()

        if BackendEngine.is_tensorflow_selected():
            # Use TFEngine.Engine class instead of Engine.Engine.
            import TFEngine
            global Engine
            Engine = TFEngine.Engine

        import atexit
        atexit.register(_at_exit_handler)

    if targetMode:
        setTargetMode(targetMode)

    initDataset()

    if targetMode and targetMode == "forward" and epoch:
        model_filename = config.value('model', '')
        fns = [
            Engine.epoch_model_filename(model_filename, epoch, is_pretrain)
            for is_pretrain in [False, True]
        ]
        fn_postfix = ""
        if BackendEngine.is_tensorflow_selected():
            fn_postfix += ".meta"
        fns_existing = [fn for fn in fns if os.path.exists(fn + fn_postfix)]
        assert len(fns_existing) == 1, "%s not found" % fns
        model_epoch_filename = fns_existing[0]
        config.set('load', model_epoch_filename)
        assert Engine.get_epoch_model(config)[1] == model_epoch_filename, \
          "%r != %r" % (Engine.get_epoch_model(config), model_epoch_filename)

    global engine
    if not engine:
        devices = rnn.initDevices()
        rnn.printTaskProperties(devices)
        rnn.initEngine(devices)
        engine = rnn.engine
        assert isinstance(engine, Engine)