Exemplo n.º 1
0
def init_backend_engine():
    """
  Initializes ``engine``, which is either :class:`TFEngine.Engine` or Theano :class:`Engine.Engine`.
  """
    BackendEngine.select_engine(config=config)
    if BackendEngine.is_theano_selected():
        print("Theano:", describe_theano_version(), file=log.v3)
        import returnn.theano.util
        returnn.theano.util.monkey_patches()
    elif BackendEngine.is_tensorflow_selected():
        print("TensorFlow:", describe_tensorflow_version(), file=log.v3)
        if get_tensorflow_version_tuple()[0] == 0:
            print("Warning: TF <1.0 is not supported and likely broken.",
                  file=log.v2)
        if os.environ.get("TF_DEVICE"):
            print("Devices: Use %s via TF_DEVICE instead of %s." %
                  (os.environ.get("TF_DEVICE"),
                   config.opt_typed_value("device")),
                  file=log.v4)
            config.set("device", os.environ.get("TF_DEVICE"))
        if config.is_true("use_horovod"):
            import returnn.tf.horovod
            hvd = returnn.tf.horovod.get_ctx(config=config)
            import socket
            if "gpu" in config.value("device", "") or os.environ.get(
                    "CUDA_VISIBLE_DEVICES", ""):
                # We assume that we want to use a GPU.
                gpu_opts = config.typed_dict.setdefault("tf_session_opts",
                                                        {}).setdefault(
                                                            "gpu_options", {})
                assert "visible_device_list" not in gpu_opts
                gpu_opts["visible_device_list"] = str(hvd.local_rank())
                print("Horovod: Hostname %s, pid %i, using GPU %s." %
                      (socket.gethostname(), os.getpid(),
                       gpu_opts["visible_device_list"]),
                      file=log.v3)
            else:
                if hvd.rank() == 0:  # Don't spam in all ranks.
                    print("Horovod: Not using GPU.", file=log.v3)
            if hvd.rank() == 0:  # Don't spam in all ranks.
                print("Horovod: Reduce type:",
                      hvd.get_reduce_type(),
                      file=log.v3)
        from returnn.tf.util.basic import debug_register_better_repr, setup_tf_thread_pools, print_available_devices
        tf_session_opts = config.typed_value("tf_session_opts", {})
        assert isinstance(tf_session_opts, dict)
        # This must be done after the Horovod logic, such that we only touch the devices we are supposed to touch.
        setup_tf_thread_pools(log_file=log.v3, tf_session_opts=tf_session_opts)
        # Print available devices. Also make sure that get_tf_list_local_devices uses the correct TF session opts.
        print_available_devices(tf_session_opts=tf_session_opts, file=log.v2)
        from returnn.tf.native_op import OpMaker
        OpMaker.log_stream = log.v3
        debug_register_better_repr()
        if config.is_true("distributed_tf"):
            import returnn.tf.distributed
            returnn.tf.distributed.init_distributed_tf(config)
    else:
        raise NotImplementedError
Exemplo n.º 2
0
def main():
    """
  Main entry.
  """
    global LstmCellTypes
    print("Benchmarking LSTMs.")
    better_exchook.install()
    print("Args:", " ".join(sys.argv))
    arg_parser = ArgumentParser()
    arg_parser.add_argument("cfg",
                            nargs="*",
                            help="opt=value, opt in %r" %
                            sorted(base_settings.keys()))
    arg_parser.add_argument("--no-cpu", action="store_true")
    arg_parser.add_argument("--no-gpu", action="store_true")
    arg_parser.add_argument("--selected",
                            help="comma-separated list from %r" %
                            LstmCellTypes)
    arg_parser.add_argument("--no-setup-tf-thread-pools", action="store_true")
    args = arg_parser.parse_args()
    for opt in args.cfg:
        key, value = opt.split("=", 1)
        assert key in base_settings
        value_type = type(base_settings[key])
        base_settings[key] = value_type(value)
    print("Settings:")
    pprint(base_settings)

    log.initialize(verbosity=[4])
    print("Returnn:", describe_returnn_version(), file=log.v3)
    print("TensorFlow:", describe_tensorflow_version(), file=log.v3)
    print("Python:", sys.version.replace("\n", ""), sys.platform)
    if not args.no_setup_tf_thread_pools:
        setup_tf_thread_pools(log_file=log.v2)
    else:
        print(
            "Not setting up the TF thread pools. Will be done automatically by TF to number of CPU cores."
        )
    if args.no_gpu:
        print("GPU will not be used.")
    else:
        print("GPU available: %r" % is_gpu_available())
    print_available_devices()

    if args.selected:
        LstmCellTypes = args.selected.split(",")
    benchmarks = {}
    if not args.no_gpu and is_gpu_available():
        for lstm_unit in LstmCellTypes:
            benchmarks["GPU:" + lstm_unit] = benchmark(lstm_unit=lstm_unit,
                                                       use_gpu=True)
    if not args.no_cpu:
        for lstm_unit in LstmCellTypes:
            if lstm_unit in GpuOnlyCellTypes:
                continue
            benchmarks["CPU:" + lstm_unit] = benchmark(lstm_unit=lstm_unit,
                                                       use_gpu=False)

    print("-" * 20)
    print("Settings:")
    pprint(base_settings)
    print("Final results:")
    for t, lstm_unit in sorted([
        (t, lstm_unit) for (lstm_unit, t) in sorted(benchmarks.items())
    ]):
        print("  %s: %s" % (lstm_unit, hms_fraction(t)))
    print("Done.")