def init(config_filename, log_verbosity, remaining_args=()):
  """
  :param str config_filename: filename to config-file
  :param int log_verbosity:
  :param list[str] remaining_args:
  """
  rnn.init_better_exchook()
  rnn.init_thread_join_hack()
  print("Using config file %r." % config_filename)
  assert os.path.exists(config_filename)
  rnn.init_config(
    config_filename=config_filename,
    command_line_options=remaining_args,
    extra_updates={
      "use_tensorflow": True,
      "log": None,
      "log_verbosity": log_verbosity,
      "task": "search",
    },
    default_config={
      "debug_print_layer_output_template": True,
    }
  )
  global config
  config = rnn.config
  rnn.init_log()
  print("Returnn %s starting up." % os.path.basename(__file__), file=log.v1)
  rnn.returnn_greeting()
  rnn.init_backend_engine()
  assert Util.BackendEngine.is_tensorflow_selected(), "this is only for TensorFlow"
  rnn.init_faulthandler()
  better_exchook.replace_traceback_format_tb()  # makes some debugging easier
  rnn.init_config_json_network()
Пример #2
0
def init(config_str, verbosity):
    """
  :param str config_str: either filename to config-file, or dict for dataset
  :param int verbosity:
  """
    rnn.init_better_exchook()
    rnn.init_thread_join_hack()
    datasetDict = None
    configFilename = None
    if config_str.strip().startswith("{"):
        print("Using dataset %s." % config_str)
        datasetDict = eval(config_str.strip())
    elif config_str.endswith(".hdf"):
        datasetDict = {"class": "HDFDataset", "files": [config_str]}
        print("Using dataset %r." % datasetDict)
        assert os.path.exists(config_str)
    else:
        configFilename = config_str
        print("Using config file %r." % configFilename)
        assert os.path.exists(configFilename)
    rnn.init_config(config_filename=configFilename,
                    default_config={"cache_size": "0"})
    global config
    config = rnn.config
    config.set("log", None)
    config.set("log_verbosity", verbosity)
    if datasetDict:
        config.set("train", datasetDict)
    rnn.init_log()
    print("Returnn dump-dataset starting up.", file=log.v2)
    rnn.returnn_greeting()
    rnn.init_faulthandler()
    rnn.init_config_json_network()
    rnn.init_data()
    rnn.print_task_properties()
Пример #3
0
def init(config_filename, log_verbosity):
    """
  :param str config_filename: filename to config-file
  :param int log_verbosity:
  """
    rnn.init_better_exchook()
    rnn.init_thread_join_hack()
    print("Using config file %r." % config_filename)
    assert os.path.exists(config_filename)
    rnn.init_config(
        config_filename=config_filename,
        extra_updates={
            "use_tensorflow": True,
            "log": None,
            "log_verbosity": log_verbosity,
            "task": __file__,  # just extra info for the config
        })
    global config
    config = rnn.config
    rnn.init_log()
    print("Returnn compile-tf-graph starting up.", file=log.v1)
    rnn.returnn_greeting()
    rnn.init_backend_engine()
    assert Util.BackendEngine.is_tensorflow_selected(
    ), "this is only for TensorFlow"
    rnn.init_faulthandler()
    rnn.init_config_json_network()
Пример #4
0
def init(config_str, verbosity):
  """
  :param str config_str: either filename to config-file, or dict for dataset
  :param int verbosity:
  """
  rnn.init_better_exchook()
  rnn.init_thread_join_hack()
  datasetDict = None
  configFilename = None
  if config_str.strip().startswith("{"):
    print("Using dataset %s." % config_str)
    datasetDict = eval(config_str.strip())
  elif config_str.endswith(".hdf"):
    datasetDict = {"class": "HDFDataset", "files": [config_str]}
    print("Using dataset %r." % datasetDict)
    assert os.path.exists(config_str)
  else:
    configFilename = config_str
    print("Using config file %r." % configFilename)
    assert os.path.exists(configFilename)
  rnn.init_config(config_filename=configFilename, default_config={"cache_size": "0"})
  global config
  config = rnn.config
  config.set("log", None)
  config.set("log_verbosity", verbosity)
  if datasetDict:
    config.set("train", datasetDict)
  rnn.init_log()
  print("Returnn dump-dataset starting up.", file=log.v2)
  rnn.returnn_greeting()
  rnn.init_faulthandler()
  rnn.init_config_json_network()
  rnn.init_data()
  rnn.print_task_properties()
Пример #5
0
def test_rnn_init_config_py_global_var():
  import rnn
  import tempfile
  with tempfile.NamedTemporaryFile(mode="w", suffix=".config", prefix="test_rnn_initConfig") as cfgfile:
    cfgfile.write("""#!rnn.py
task = config.value("task", "train")

test_value = 42

def test_func():
  return task

    """)
    cfgfile.flush()
    rnn.init_config(command_line_options=[cfgfile.name, "--task", "search"])

  assert isinstance(rnn.config, Config)
  pprint(rnn.config.dict)
  pprint(rnn.config.typed_dict)
  assert rnn.config.has("task")
  assert rnn.config.has("test_value")
  assert rnn.config.has("test_func")
  assert_equal(rnn.config.value("task", None), "search")
  assert rnn.config.is_typed("test_value")
  assert_equal(rnn.config.typed_value("test_value"), 42)
  assert rnn.config.is_typed("test_func")
  # So far it's fine.
  # Now something a bit strange.
  # Earlier, this failed, because the command-line overwrote this.
  assert rnn.config.is_typed("task")
  test_func = rnn.config.typed_dict["test_func"]
  assert callable(test_func)
  assert_equal(test_func(), "search")
Пример #6
0
def init(config_filename, log_verbosity):
    """
  :param str config_filename: filename to config-file
  :param int log_verbosity:
  """
    rnn.init_better_exchook()
    rnn.init_thread_join_hack()
    if config_filename:
        print("Using config file %r." % config_filename)
        assert os.path.exists(config_filename)
    rnn.init_config(config_filename=config_filename, command_line_options=[])
    global config
    config = rnn.config
    config.set("task", "calculate_wer")
    config.set("log", None)
    config.set("log_verbosity", log_verbosity)
    config.set("use_tensorflow", True)
    rnn.init_log()
    print("Returnn calculate-word-error-rate starting up.", file=log.v1)
    rnn.returnn_greeting()
    rnn.init_backend_engine()
    assert Util.BackendEngine.is_tensorflow_selected(
    ), "this is only for TensorFlow"
    rnn.init_faulthandler()
    rnn.init_config_json_network()
    rnn.print_task_properties()
Пример #7
0
def test_rnn_init_config_py_cmd_type():
    import rnn
    import tempfile
    with tempfile.NamedTemporaryFile(mode="w",
                                     suffix=".config",
                                     prefix="test_rnn_initConfig") as cfgfile:
        cfgfile.write("""#!rnn.py
max_seq_length = {'bpe': 75}

def test_func():
  return max_seq_length

    """)
        cfgfile.flush()
        rnn.init_config(
            command_line_options=[cfgfile.name, "++max_seq_length", "0"])

    assert isinstance(rnn.config, Config)
    assert rnn.config.has("max_seq_length")
    assert rnn.config.has("test_func")
    assert rnn.config.is_typed("max_seq_length")
    assert rnn.config.is_typed("test_func")
    test_func = rnn.config.typed_dict["test_func"]
    assert callable(test_func)
    assert_equal(test_func(), 0)
Пример #8
0
def init(config_filename, log_verbosity):
    """
  :param str config_filename: filename to config-file
  :param int log_verbosity:
  """
    rnn.init_better_exchook()
    rnn.init_thread_join_hack()
    if config_filename:
        print("Using config file %r." % config_filename)
        assert os.path.exists(config_filename)
    rnn.init_config(config_filename=config_filename, command_line_options=[])
    global config
    config = rnn.config
    config.set("log", None)
    config.set("log_verbosity", log_verbosity)
    config.set("use_tensorflow", True)
    rnn.init_log()
    print("Returnn compile-native-op starting up.", file=log.v1)
    rnn.returnn_greeting()
    rnn.init_backend_engine()
    assert Util.BackendEngine.is_tensorflow_selected(
    ), "this is only for TensorFlow"
    rnn.init_faulthandler()
    rnn.init_config_json_network()
    if 'network' in config.typed_dict:
        print("Loading network")
        from TFNetwork import TFNetwork
        network = TFNetwork(name="root",
                            config=config,
                            rnd_seed=1,
                            train_flag=False,
                            eval_flag=True,
                            search_flag=False)
        network.construct_from_dict(config.typed_dict["network"])
Пример #9
0
def init(config_filename, cmd_line_opts, dataset_config_str):
    """
  :param str config_filename: global config for CRNN
  :param list[str] cmd_line_opts: options for init_config method
  :param str dataset_config_str: dataset via init_dataset_via_str()
  """
    rnn.init_better_exchook()
    rnn.init_thread_join_hack()
    if config_filename:
        rnn.init_config(config_filename, cmd_line_opts)
        rnn.init_log()
    else:
        log.initialize(verbosity=[5])
    print("Returnn hdf_dump starting up.", file=log.v3)
    rnn.init_faulthandler()
    if config_filename:
        rnn.init_data()
        rnn.print_task_properties()
        assert isinstance(rnn.train_data, Dataset)
        dataset = rnn.train_data
    else:
        assert dataset_config_str
        dataset = init_dataset(dataset_config_str)
    print("Source dataset:", dataset.len_info(), file=log.v3)
    return dataset
Пример #10
0
def init(config_filename, cmd_line_opts, dataset_config_str):
  """
  :param str config_filename: global config for CRNN
  :param list[str] cmd_line_opts: options for init_config method
  :param str dataset_config_str: dataset via init_dataset_via_str()
  """
  rnn.init_better_exchook()
  rnn.init_thread_join_hack()
  if config_filename:
    rnn.init_config(config_filename, cmd_line_opts)
    rnn.init_log()
  else:
    log.initialize(verbosity=[5])
  print("Returnn hdf_dump starting up.", file=log.v3)
  rnn.init_faulthandler()
  if config_filename:
    rnn.init_data()
    rnn.print_task_properties()
    assert isinstance(rnn.train_data, Dataset)
    dataset = rnn.train_data
  else:
    assert dataset_config_str
    dataset = init_dataset(dataset_config_str)
  print("Source dataset:", dataset.len_info(), file=log.v3)
  return dataset
Пример #11
0
def demo():
  """
  Will print out the different network topologies of the specified pretraining scheme.
  """
  import better_exchook
  better_exchook.install()
  import rnn
  import sys
  if len(sys.argv) <= 1:
    print("usage: python %s [config] [other options]" % __file__)
    print("example usage: python %s ++pretrain default ++pretrain_construction_algo from_input" % __file__)
  rnn.init_config(command_line_options=sys.argv[1:])
  # noinspection PyProtectedMember
  rnn.config._hack_value_reading_debug()
  rnn.config.update({"log": []})
  rnn.init_log()
  rnn.init_backend_engine()
  if not rnn.config.value("pretrain", ""):
    print("config option 'pretrain' not set, will set it for this demo to 'default'")
    rnn.config.set("pretrain", "default")
  pretrain = pretrain_from_config(rnn.config)
  print("pretrain: %s" % pretrain)
  num_pretrain_epochs = pretrain.get_train_num_epochs()
  from pprint import pprint
  for epoch in range(1, 1 + num_pretrain_epochs):
    print("epoch %i (of %i) network json:" % (epoch, num_pretrain_epochs))
    net_json = pretrain.get_network_json_for_epoch(epoch)
    pprint(net_json)
  print("done.")
Пример #12
0
def demo():
    """
  Will print out the different network topologies of the specified pretraining scheme.
  """
    import better_exchook
    better_exchook.install()
    import rnn
    import sys
    if len(sys.argv) <= 1:
        print("usage: python %s [config] [other options]" % __file__)
        print(
            "example usage: python %s ++pretrain default ++pretrain_construction_algo from_input"
            % __file__)
    rnn.init_config(command_line_options=sys.argv[1:])
    # noinspection PyProtectedMember
    rnn.config._hack_value_reading_debug()
    rnn.config.update({"log": []})
    rnn.init_log()
    rnn.init_backend_engine()
    if not rnn.config.value("pretrain", ""):
        print(
            "config option 'pretrain' not set, will set it for this demo to 'default'"
        )
        rnn.config.set("pretrain", "default")
    pretrain = pretrain_from_config(rnn.config)
    print("pretrain: %s" % pretrain)
    num_pretrain_epochs = pretrain.get_train_num_epochs()
    from pprint import pprint
    for epoch in range(1, 1 + num_pretrain_epochs):
        print("epoch %i (of %i) network json:" % (epoch, num_pretrain_epochs))
        net_json = pretrain.get_network_json_for_epoch(epoch)
        pprint(net_json)
    print("done.")
Пример #13
0
def init(configFilename, commandLineOptions):
    rnn.init_better_exchook()
    rnn.init_config(configFilename, commandLineOptions)
    global config
    config = rnn.config
    config.set("log", [])
    rnn.init_log()
    print("CRNN dump-dataset starting up.", file=log.v3)
    rnn.init_config_json_network()
Пример #14
0
def init(configFilename, commandLineOptions):
  rnn.init_better_exchook()
  rnn.init_thread_join_hack()
  rnn.init_config(configFilename, commandLineOptions)
  global config
  config = rnn.config
  rnn.init_log()
  print("CRNN demo-dataset starting up", file=log.v3)
  rnn.init_faulthandler()
  rnn.init_config_json_network()
  rnn.init_data()
  rnn.print_task_properties()
Пример #15
0
def init(configFilename=None):
    rnn.init_better_exchook()
    rnn.init_thread_join_hack()
    if configFilename:
        rnn.init_config(configFilename, command_line_options=[])
        rnn.init_log()
    else:
        log.initialize()
    print("Returnn collect-words starting up.", file=log.v3)
    rnn.init_faulthandler()
    if configFilename:
        rnn.init_config_json_network()
        rnn.init_data()
        rnn.print_task_properties()
Пример #16
0
def init(configFilename=None):
  rnn.init_better_exchook()
  rnn.init_thread_join_hack()
  if configFilename:
    rnn.init_config(configFilename, command_line_options=[])
    rnn.init_log()
  else:
    log.initialize()
  print("Returnn collect-words starting up.", file=log.v3)
  rnn.init_faulthandler()
  if configFilename:
    rnn.init_config_json_network()
    rnn.init_data()
    rnn.print_task_properties()
Пример #17
0
def demo():
    """
  Will print out the different network topologies of the specified pretraining scheme.
  """
    import better_exchook
    better_exchook.install()
    import rnn
    import argparse
    from Util import dict_diff_str
    arg_parser = argparse.ArgumentParser()
    arg_parser.add_argument("config")
    arg_parser.add_argument("--diff",
                            action="store_true",
                            help="show diff only")
    arg_parser.add_argument('other_returnn_args',
                            nargs=argparse.REMAINDER,
                            help="config updates or so")
    args = arg_parser.parse_args()
    rnn.init_config(config_filename=args.config,
                    command_line_options=args.other_returnn_args,
                    extra_updates={"log": []})
    # noinspection PyProtectedMember
    rnn.config._hack_value_reading_debug()
    rnn.init_log()
    if not rnn.config.value("pretrain", ""):
        print(
            "config option 'pretrain' not set, will set it for this demo to 'default'"
        )
        rnn.config.set("pretrain", "default")
    pretrain = pretrain_from_config(rnn.config)
    print("pretrain: %s" % pretrain)
    num_pretrain_epochs = pretrain.get_train_num_epochs()
    last_net_json = None
    from pprint import pprint
    for epoch in range(1, 1 + num_pretrain_epochs):
        print("epoch %i (of %i) network json:" % (epoch, num_pretrain_epochs))
        net_json = pretrain.get_network_json_for_epoch(epoch)
        if args.diff:
            if last_net_json is not None:
                print(dict_diff_str(last_net_json, net_json))
            else:
                print("(initial)")
        else:
            pprint(net_json)
        last_net_json = net_json
    print("done.")
Пример #18
0
def init(config_filename, log_verbosity):
    """
  :param str config_filename: filename to config-file
  :param int log_verbosity:
  """
    rnn.init_better_exchook()
    rnn.init_thread_join_hack()
    if config_filename:
        print("Using config file %r." % config_filename)
        assert os.path.exists(config_filename)
    rnn.init_config(config_filename=config_filename, command_line_options=[])
    global config
    config = rnn.config
    config.set("task", "dump")
    config.set("log", None)
    config.set("log_verbosity", log_verbosity)
    rnn.init_log()
    print("Returnn dump-dataset-raw-strings starting up.", file=log.v1)
    rnn.returnn_greeting()
    rnn.init_faulthandler()
Пример #19
0
def init(config_filename, log_verbosity):
  """
  :param str config_filename: filename to config-file
  :param int log_verbosity:
  """
  rnn.init_better_exchook()
  rnn.init_thread_join_hack()
  if config_filename:
    print("Using config file %r." % config_filename)
    assert os.path.exists(config_filename)
  rnn.init_config(config_filename=config_filename, command_line_options=[])
  global config
  config = rnn.config
  config.set("task", "dump")
  config.set("log", None)
  config.set("log_verbosity", log_verbosity)
  rnn.init_log()
  print("Returnn dump-dataset-raw-strings starting up.", file=log.v1)
  rnn.returnn_greeting()
  rnn.init_faulthandler()
Пример #20
0
def init(config_filename, log_verbosity):
  """
  :param str config_filename: filename to config-file
  :param int log_verbosity:
  """
  rnn.init_better_exchook()
  rnn.init_thread_join_hack()
  print("Using config file %r." % config_filename)
  assert os.path.exists(config_filename)
  rnn.init_config(config_filename=config_filename, command_line_options=[])
  global config
  config = rnn.config
  config.set("log", None)
  config.set("log_verbosity", log_verbosity)
  config.set("use_tensorflow", True)
  rnn.init_log()
  print("Returnn compile-native-op starting up.", file=log.v1)
  rnn.returnn_greeting()
  rnn.init_backend_engine()
  assert Util.BackendEngine.is_tensorflow_selected(), "this is only for TensorFlow"
  rnn.init_faulthandler()
  rnn.init_config_json_network()
Пример #21
0
def test_rnn_init_config_py_cmd_type():
  import rnn
  import tempfile
  with tempfile.NamedTemporaryFile(mode="w", suffix=".config", prefix="test_rnn_initConfig") as cfgfile:
    cfgfile.write("""#!rnn.py
max_seq_length = {'bpe': 75}

def test_func():
  return max_seq_length

    """)
    cfgfile.flush()
    rnn.init_config(command_line_options=[cfgfile.name, "++max_seq_length", "0"])

  assert isinstance(rnn.config, Config)
  assert rnn.config.has("max_seq_length")
  assert rnn.config.has("test_func")
  assert rnn.config.is_typed("max_seq_length")
  assert rnn.config.is_typed("test_func")
  test_func = rnn.config.typed_dict["test_func"]
  assert callable(test_func)
  assert_equal(test_func(), 0)
Пример #22
0
def test_rnn_init_config_py_global_var():
    import rnn
    import tempfile
    with tempfile.NamedTemporaryFile(mode="w",
                                     suffix=".config",
                                     prefix="test_rnn_initConfig") as cfgfile:
        cfgfile.write("""#!rnn.py
task = config.value("task", "train")

test_value = 42

def test_func():
  return task

    """)
        cfgfile.flush()
        rnn.init_config(
            command_line_options=[cfgfile.name, "--task", "search"])

    assert isinstance(rnn.config, Config)
    pprint(rnn.config.dict)
    pprint(rnn.config.typed_dict)
    assert rnn.config.has("task")
    assert rnn.config.has("test_value")
    assert rnn.config.has("test_func")
    assert_equal(rnn.config.value("task", None), "search")
    assert rnn.config.is_typed("test_value")
    assert_equal(rnn.config.typed_value("test_value"), 42)
    assert rnn.config.is_typed("test_func")
    # So far it's fine.
    # Now something a bit strange.
    # Earlier, this failed, because the command-line overwrote this.
    assert rnn.config.is_typed("task")
    test_func = rnn.config.typed_dict["test_func"]
    assert callable(test_func)
    assert_equal(test_func(), "search")
Пример #23
0
def _init_base(configfile=None, target_mode=None, epoch=None, sprint_opts=None):
  """
  :param str|None configfile: filename, via init(), this is set
  :param str|None target_mode: "forward" or so. via init(), this is set
  :param int epoch: via init(), this is set
  :param dict[str,str]|None sprint_opts: optional parameters to override values in configfile
  """

  global isInitialized
  isInitialized = True
  # Run through in any case. Maybe just to set targetMode.

  if not getattr(sys, "argv", None):
     # Set some dummy. Some code might want this (e.g. TensorFlow).
     sys.argv = [__file__]

  global config
  if not config:
    # Some subset of what we do in rnn.init().

    rnn.init_better_exchook()
    rnn.init_thread_join_hack()

    if configfile is None:
      configfile = DefaultSprintCrnnConfig
    assert os.path.exists(configfile)
    rnn.init_config(config_filename=configfile, extra_updates={"task": target_mode})
    config = rnn.config
    if sprint_opts is not None:
      config.update(sprint_opts)

    rnn.init_log()
    rnn.returnn_greeting(config_filename=configfile)
    rnn.init_backend_engine()
    rnn.init_faulthandler(sigusr1_chain=True)
    rnn.init_config_json_network()

    global Engine
    if BackendEngine.is_tensorflow_selected():
      # Use TFEngine.Engine class instead of Engine.Engine.
      from TFEngine import Engine
    elif BackendEngine.is_theano_selected():
      from Engine import Engine

    import atexit
    atexit.register(_at_exit_handler)

  if target_mode:
    set_target_mode(target_mode)

  _init_dataset()

  if target_mode and target_mode == "forward" and epoch:
    model_filename = config.value('model', '')
    fns = [EngineBase.epoch_model_filename(model_filename, epoch, is_pretrain) for is_pretrain in [False, True]]
    fn_postfix = ""
    if BackendEngine.is_tensorflow_selected():
      fn_postfix += ".meta"
    fns_existing = [fn for fn in fns if os.path.exists(fn + fn_postfix)]
    assert len(fns_existing) == 1, "%s not found" % fns
    model_epoch_filename = fns_existing[0]
    config.set('load', model_epoch_filename)
    assert EngineBase.get_epoch_model(config)[1] == model_epoch_filename, (
      "%r != %r" % (EngineBase.get_epoch_model(config), model_epoch_filename))

  global engine
  if not engine:
    devices = rnn.init_theano_devices()
    rnn.print_task_properties(devices)
    rnn.init_engine(devices)
    engine = rnn.engine
    assert isinstance(engine, Engine)
Пример #24
0
def demo():
  """
  Demo run. Given some learning rate file (with scores / existing lrs), will calculate how lrs would have been set,
  given some config.
  """
  import better_exchook
  better_exchook.install()
  import rnn
  import sys
  if len(sys.argv) <= 1:
    print("usage: python %s [config] [other options] [++check_learning_rates 1]" % __file__)
    print(
      ("example usage: "
       "python %s ++learning_rate_control newbob ++learning_rate_file newbob.data ++learning_rate 0.001") % __file__)
  rnn.init_config(command_line_options=sys.argv[1:])
  # noinspection PyProtectedMember
  rnn.config._hack_value_reading_debug()
  rnn.config.update({"log": []})
  rnn.init_log()
  rnn.init_backend_engine()
  check_lr = rnn.config.bool("check_learning_rates", False)
  from Pretrain import pretrain_from_config
  pretrain = pretrain_from_config(rnn.config)
  first_non_pretrain_epoch = 1
  pretrain_learning_rate = None
  if pretrain:
    first_non_pretrain_epoch = pretrain.get_train_num_epochs() + 1
  log.initialize(verbosity=[5])
  control = load_learning_rate_control_from_config(rnn.config)
  print("LearningRateControl: %r" % control)
  if not control.epoch_data:
    print("No epoch data so far.")
    return
  first_epoch = min(control.epoch_data.keys())
  if first_epoch != 1:
    print("Strange, first epoch from epoch data is %i." % first_epoch)
  print("Error key: %s from %r" % (control.get_error_key(epoch=first_epoch), control.epoch_data[first_epoch].error))
  if pretrain:
    pretrain_learning_rate = rnn.config.float('pretrain_learning_rate', control.default_learning_rate)
  max_epoch = max(control.epoch_data.keys())
  for epoch in range(1, max_epoch + 2):  # all epochs [1..max_epoch+1]
    old_learning_rate = None
    if epoch in control.epoch_data:
      old_learning_rate = control.epoch_data[epoch].learning_rate
    if epoch < first_non_pretrain_epoch:
      learning_rate = pretrain_learning_rate
      s = "Pretrain epoch %i, fixed learning rate: %s (was: %s)" % (epoch, learning_rate, old_learning_rate)
    elif 1 < first_non_pretrain_epoch == epoch:
      learning_rate = control.default_learning_rate
      s = "First epoch after pretrain, epoch %i, fixed learning rate: %s (was %s)" % (
        epoch, learning_rate, old_learning_rate)
    else:
      learning_rate = control.calc_new_learning_rate_for_epoch(epoch)
      s = "Calculated learning rate for epoch %i: %s (was: %s)" % (epoch, learning_rate, old_learning_rate)
    if learning_rate < control.min_learning_rate:
      learning_rate = control.min_learning_rate
      s += ", clipped to %s" % learning_rate
    s += ", previous relative error: %s" % control.calc_relative_error(epoch - 2, epoch - 1)
    if hasattr(control, "_calc_recent_mean_relative_error"):
      # noinspection PyProtectedMember
      s += ", previous mean relative error: %s" % control._calc_recent_mean_relative_error(epoch)
    print(s)
    if check_lr and old_learning_rate is not None:
      if old_learning_rate != learning_rate:
        print("Learning rate is different in epoch %i!" % epoch)
        sys.exit(1)
    # Overwrite new learning rate so that the calculation for further learning rates stays consistent.
    if epoch in control.epoch_data:
      control.epoch_data[epoch].learning_rate = learning_rate
    else:
      control.epoch_data[epoch] = control.EpochData(learningRate=learning_rate)
  print("Finished, last stored epoch was %i." % max_epoch)
Пример #25
0
def demo():
  """
  Demo run. Given some learning rate file (with scores / existing lrs), will calculate how lrs would have been set,
  given some config.
  """
  import better_exchook
  better_exchook.install()
  import rnn
  import sys
  if len(sys.argv) <= 1:
    print("usage: python %s [config] [other options] [++check_learning_rates 1]" % __file__)
    print(
      ("example usage: "
       "python %s ++learning_rate_control newbob ++learning_rate_file newbob.data ++learning_rate 0.001") % __file__)
  rnn.init_config(command_line_options=sys.argv[1:])
  # noinspection PyProtectedMember
  rnn.config._hack_value_reading_debug()
  rnn.config.update({"log": []})
  rnn.init_log()
  rnn.init_backend_engine()
  check_lr = rnn.config.bool("check_learning_rates", False)
  from Pretrain import pretrain_from_config
  pretrain = pretrain_from_config(rnn.config)
  first_non_pretrain_epoch = 1
  pretrain_learning_rate = None
  if pretrain:
    first_non_pretrain_epoch = pretrain.get_train_num_epochs() + 1
  log.initialize(verbosity=[5])
  control = load_learning_rate_control_from_config(rnn.config)
  print("LearningRateControl: %r" % control)
  if not control.epoch_data:
    print("No epoch data so far.")
    return
  first_epoch = min(control.epoch_data.keys())
  if first_epoch != 1:
    print("Strange, first epoch from epoch data is %i." % first_epoch)
  print("Error key: %s from %r" % (control.get_error_key(epoch=first_epoch), control.epoch_data[first_epoch].error))
  if pretrain:
    pretrain_learning_rate = rnn.config.float('pretrain_learning_rate', control.default_learning_rate)
  max_epoch = max(control.epoch_data.keys())
  for epoch in range(1, max_epoch + 2):  # all epochs [1..max_epoch+1]
    old_learning_rate = None
    if epoch in control.epoch_data:
      old_learning_rate = control.epoch_data[epoch].learning_rate
    if epoch < first_non_pretrain_epoch:
      learning_rate = pretrain_learning_rate
      s = "Pretrain epoch %i, fixed learning rate: %s (was: %s)" % (epoch, learning_rate, old_learning_rate)
    elif 1 < first_non_pretrain_epoch == epoch:
      learning_rate = control.default_learning_rate
      s = "First epoch after pretrain, epoch %i, fixed learning rate: %s (was %s)" % (
        epoch, learning_rate, old_learning_rate)
    else:
      learning_rate = control.calc_new_learning_rate_for_epoch(epoch)
      s = "Calculated learning rate for epoch %i: %s (was: %s)" % (epoch, learning_rate, old_learning_rate)
    if learning_rate < control.min_learning_rate:
      learning_rate = control.min_learning_rate
      s += ", clipped to %s" % learning_rate
    s += ", previous relative error: %s" % control.calc_relative_error(epoch - 2, epoch - 1)
    if hasattr(control, "_calc_recent_mean_relative_error"):
      # noinspection PyProtectedMember
      s += ", previous mean relative error: %s" % control._calc_recent_mean_relative_error(epoch)
    print(s)
    if check_lr and old_learning_rate is not None:
      if old_learning_rate != learning_rate:
        print("Learning rate is different in epoch %i!" % epoch)
        sys.exit(1)
    # Overwrite new learning rate so that the calculation for further learning rates stays consistent.
    if epoch in control.epoch_data:
      control.epoch_data[epoch].learning_rate = learning_rate
    else:
      control.epoch_data[epoch] = control.EpochData(learningRate=learning_rate)
  print("Finished, last stored epoch was %i." % max_epoch)
Пример #26
0
def init(config_str, config_dataset, use_pretrain, epoch, verbosity):
  """
  :param str config_str: either filename to config-file, or dict for dataset
  :param str|None config_dataset:
  :param bool use_pretrain: might overwrite config options, or even the dataset
  :param int epoch:
  :param int verbosity:
  """
  rnn.init_better_exchook()
  rnn.init_thread_join_hack()
  dataset_opts = None
  config_filename = None
  if config_str.strip().startswith("{"):
    print("Using dataset %s." % config_str)
    dataset_opts = eval(config_str.strip())
  elif config_str.endswith(".hdf"):
    dataset_opts = {"class": "HDFDataset", "files": [config_str]}
    print("Using dataset %r." % dataset_opts)
    assert os.path.exists(config_str)
  else:
    config_filename = config_str
    print("Using config file %r." % config_filename)
    assert os.path.exists(config_filename)
  rnn.init_config(config_filename=config_filename, default_config={"cache_size": "0"})
  global config
  config = rnn.config
  config.set("log", None)
  config.set("log_verbosity", verbosity)
  rnn.init_log()
  print("Returnn %s starting up." % __file__, file=log.v2)
  rnn.returnn_greeting()
  rnn.init_faulthandler()
  rnn.init_config_json_network()
  Util.BackendEngine.select_engine(config=config)
  if not dataset_opts:
    if config_dataset:
      dataset_opts = "config:%s" % config_dataset
    else:
      dataset_opts = "config:train"
  if use_pretrain:
    from Pretrain import pretrain_from_config
    pretrain = pretrain_from_config(config)
    if pretrain:
      print("Using pretrain %s, epoch %i" % (pretrain, epoch), file=log.v2)
      net_dict = pretrain.get_network_json_for_epoch(epoch=epoch)
      if "#config" in net_dict:
        config_overwrites = net_dict["#config"]
        print("Pretrain overwrites these config options:", file=log.v2)
        assert isinstance(config_overwrites, dict)
        for key, value in sorted(config_overwrites.items()):
          assert isinstance(key, str)
          orig_value = config.typed_dict.get(key, None)
          if isinstance(orig_value, dict) and isinstance(value, dict):
            diff_str = "\n" + Util.dict_diff_str(orig_value, value)
          elif isinstance(value, dict):
            diff_str = "\n%r ->\n%s" % (orig_value, pformat(value))
          else:
            diff_str = " %r -> %r" % (orig_value, value)
          print("Config key %r for epoch %i:%s" % (key, epoch, diff_str), file=log.v2)
          config.set(key, value)
      else:
        print("No config overwrites for this epoch.", file=log.v2)
    else:
      print("No pretraining used.", file=log.v2)
  elif config.typed_dict.get("pretrain", None):
    print("Not using pretrain.", file=log.v2)
  dataset_default_opts = {}
  Dataset.kwargs_update_from_config(config, dataset_default_opts)
  print("Using dataset:", dataset_opts, file=log.v2)
  global dataset
  dataset = init_dataset(dataset_opts, default_kwargs=dataset_default_opts)
  assert isinstance(dataset, Dataset)
  dataset.init_seq_order(epoch=epoch)
Пример #27
0
def _init_base(configfile=None,
               target_mode=None,
               epoch=None,
               sprint_opts=None):
    """
  :param str|None configfile: filename, via init(), this is set
  :param str|None target_mode: "forward" or so. via init(), this is set
  :param int epoch: via init(), this is set
  :param dict[str,str]|None sprint_opts: optional parameters to override values in configfile
  """

    global isInitialized
    isInitialized = True
    # Run through in any case. Maybe just to set targetMode.

    if not getattr(sys, "argv", None):
        # Set some dummy. Some code might want this (e.g. TensorFlow).
        sys.argv = [__file__]

    global config
    if not config:
        # Some subset of what we do in rnn.init().

        rnn.init_better_exchook()
        rnn.init_thread_join_hack()

        if configfile is None:
            configfile = DefaultSprintCrnnConfig
        assert os.path.exists(configfile)
        rnn.init_config(config_filename=configfile,
                        extra_updates={"task": target_mode})
        assert rnn.config
        config = rnn.config
        if sprint_opts is not None:
            config.update(sprint_opts)

        rnn.init_log()
        rnn.returnn_greeting(config_filename=configfile)
        rnn.init_backend_engine()
        rnn.init_faulthandler(sigusr1_chain=True)
        rnn.init_config_json_network()

        global Engine
        if BackendEngine.is_tensorflow_selected():
            # Use TFEngine.Engine class instead of Engine.Engine.
            from TFEngine import Engine
        elif BackendEngine.is_theano_selected():
            from Engine import Engine

        import atexit
        atexit.register(_at_exit_handler)

    if target_mode:
        set_target_mode(target_mode)

    _init_dataset()

    if target_mode and target_mode == "forward" and epoch:
        model_filename = config.value('model', '')
        fns = [
            EngineBase.epoch_model_filename(model_filename, epoch, is_pretrain)
            for is_pretrain in [False, True]
        ]
        fn_postfix = ""
        if BackendEngine.is_tensorflow_selected():
            fn_postfix += ".meta"
        fns_existing = [fn for fn in fns if os.path.exists(fn + fn_postfix)]
        assert len(fns_existing) == 1, "%s not found" % fns
        model_epoch_filename = fns_existing[0]
        config.set('load', model_epoch_filename)
        assert EngineBase.get_epoch_model(config)[1] == model_epoch_filename, (
            "%r != %r" %
            (EngineBase.get_epoch_model(config), model_epoch_filename))

    global engine
    if not engine:
        devices = rnn.init_theano_devices()
        rnn.print_task_properties(devices)
        rnn.init_engine(devices)
        engine = rnn.engine
        assert isinstance(engine, Engine)