def init(config_filename, log_verbosity): """ :param str config_filename: filename to config-file :param int log_verbosity: """ rnn.init_better_exchook() rnn.init_thread_join_hack() if config_filename: print("Using config file %r." % config_filename) assert os.path.exists(config_filename) rnn.init_config(config_filename=config_filename, command_line_options=[]) global config config = rnn.config config.set("log", None) config.set("log_verbosity", log_verbosity) config.set("use_tensorflow", True) rnn.init_log() print("Returnn compile-native-op starting up.", file=log.v1) rnn.returnn_greeting() rnn.init_backend_engine() assert util.BackendEngine.is_tensorflow_selected(), "this is only for TensorFlow" rnn.init_faulthandler() rnn.init_config_json_network() if 'network' in config.typed_dict: print("Loading network") from returnn.tf.network import TFNetwork network = TFNetwork( name="root", config=config, rnd_seed=1, train_flag=False, eval_flag=True, search_flag=False) network.construct_from_dict(config.typed_dict["network"])
def test_Updater_add_check_numerics_ops(): class _Layer(DummyLayer): def _get_loss_value(self): return tf_compat.v1.log(self.x) from returnn.tf.network import TFNetwork, ExternData from returnn.config import Config with make_scope() as session: config = Config() config.set("debug_add_check_numerics_ops", True) network = TFNetwork(extern_data=ExternData(), train_flag=True) network.add_layer(name="output", layer_class=_Layer, initial_value=1.0) network.initialize_params(session=session) updater = Updater(config=config, network=network) updater.set_learning_rate(1.0, session=session) updater.set_trainable_vars(network.get_trainable_params()) updater.init_optimizer_vars(session=session) # Should succeed. session.run(updater.get_optim_op()) # One gradient descent step from ln(x), x = 1.0: gradient is 1.0 / x, thus x - 1.0 = 0.0. assert_almost_equal( session.run(network.get_default_output_layer().output.placeholder), 0.0) try: # Now, should fail. session.run(updater.get_optim_op()) except tf.errors.InvalidArgumentError as exc: print("Expected exception: %r" % exc) else: assert False, "should have raised an exception"
def test_Updater_CustomUpdate(): with make_scope() as session: from returnn.tf.network import TFNetwork, ExternData from returnn.config import Config from returnn.tf.util.basic import CustomUpdate config = Config() network = TFNetwork(extern_data=ExternData(), train_flag=True) layer = network.add_layer(name="output", layer_class=DummyLayer, initial_value=4.0) assert isinstance(layer, DummyLayer) network.initialize_params(session=session) class CustomUpdateAdd13(CustomUpdate): def update_var(self, var): return tf_compat.v1.assign_add(var, 13.0) CustomUpdateAdd13().set_on_var(layer.x) updater = Updater(config=config, network=network) updater.set_learning_rate(1000.0, session=session) # should be ignored updater.set_trainable_vars(network.get_trainable_params()) updater.init_optimizer_vars(session=session) session.run(updater.get_optim_op()) # Should have applied CustomUpdateAdd13. assert_almost_equal( session.run(network.get_default_output_layer().output.placeholder), 17.0)
def _run_returnn_standalone_net_dict(self): print(">>> Constructing RETURNN model, load TF checkpoint, run...") with tf.compat.v1.Session() as session: from returnn.config import Config from returnn.tf.network import TFNetwork config = Config({ "extern_data": { "data": self._returnn_in_data_dict }, "debug_print_layer_output_template": True, }) network = TFNetwork(config=config, name="root") network.construct_from_dict(self._returnn_net_dict) network.load_params_from_file( filename=self._tf_checkpoint_save_path, session=session) x = network.extern_data.get_default_input_data() y = network.get_default_output_layer().output feed_dict = self._make_tf_feed_dict(x) y_, y_size = session.run((y.placeholder, y.size_placeholder), feed_dict=feed_dict) assert isinstance(y_, numpy.ndarray) print("Output shape:", y_.shape) numpy.testing.assert_allclose(self._out_returnn_np, y_) print(">>>> Looks good!") print()
def init(config_filename, command_line_options, args): """ :param str config_filename: :param list[str] command_line_options: :param args: argparse.Namespace """ global config, engine, dataset rnn.init(config_filename=config_filename, command_line_options=command_line_options, config_updates={ "log": None, "need_data": False }, extra_greeting="RETURNN dump-forward starting up.") config = rnn.config engine = rnn.engine dataset_str = args.dataset if dataset_str in {"train", "dev", "eval", "search_data"}: dataset_str = "config:%s" % dataset_str extra_dataset_kwargs = {} if args.reset_partition_epoch: print("NOTE: We are resetting partition epoch to %i." % (args.reset_partition_epoch, )) extra_dataset_kwargs["partition_epoch"] = args.reset_partition_epoch if args.reset_seq_ordering: print("NOTE: We will use %r seq ordering." % (args.reset_seq_ordering, )) extra_dataset_kwargs["seq_ordering"] = args.reset_seq_ordering if args.reset_epoch_wise_filter: extra_dataset_kwargs["epoch_wise_filter"] = eval( args.reset_epoch_wise_filter) dataset = init_dataset(dataset_str, extra_kwargs=extra_dataset_kwargs) if hasattr(dataset, "epoch_wise_filter") and args.reset_epoch_wise_filter is None: if dataset.epoch_wise_filter: print("NOTE: Resetting epoch_wise_filter to None.") dataset.epoch_wise_filter = None if args.reset_partition_epoch: assert dataset.partition_epoch == args.reset_partition_epoch if args.reset_seq_ordering: assert dataset.seq_ordering == args.reset_seq_ordering config.set("task", "eval") if args.load: config.set("load", args.load) epoch, model_epoch_filename = Engine.get_epoch_model(config) engine.pretrain = pretrain_from_config(config) engine.custom_get_net_dict = config.typed_value("get_network") net_dict = engine.get_net_dict_for_epoch(epoch) engine.make_tf_session() engine.network = TFNetwork(name="root") engine.network.construct_layer(net_dict, args.layer) print("Load model:", model_epoch_filename) engine.network.load_params_from_file(model_epoch_filename, session=engine.tf_session)
def test_Updater_simple_batch(): with make_scope() as session: from returnn.tf.network import TFNetwork, ExternData from returnn.config import Config from returnn.datasets.generating import Task12AXDataset dataset = Task12AXDataset() dataset.init_seq_order(epoch=1) extern_data = ExternData() extern_data.init_from_dataset(dataset) config = Config() network = TFNetwork(extern_data=extern_data, train_flag=True) network.construct_from_dict({ "layer1": { "class": "linear", "activation": "tanh", "n_out": 13, "from": "data:data" }, "layer2": { "class": "linear", "activation": "tanh", "n_out": 13, "from": ["layer1"] }, "output": { "class": "softmax", "loss": "ce", "target": "classes", "from": ["layer2"] } }) network.initialize_params(session=session) updater = Updater(config=config, network=network) updater.set_learning_rate(1.0, session=session) updater.set_trainable_vars(network.get_trainable_params()) updater.init_optimizer_vars(session=session) from returnn.tf.data_pipeline import FeedDictDataProvider batches = dataset.generate_batches( recurrent_net=network.recurrent, batch_size=100, max_seqs=10, max_seq_length=sys.maxsize, used_data_keys=network.used_data_keys) data_provider = FeedDictDataProvider(tf_session=session, extern_data=extern_data, data_keys=network.used_data_keys, dataset=dataset, batches=batches) feed_dict, _ = data_provider.get_feed_dict(single_threaded=True) session.run(updater.get_optim_op(), feed_dict=feed_dict)
def test_Updater_GradientDescent(): with make_scope() as session: from returnn.tf.network import TFNetwork, ExternData from returnn.config import Config config = Config() network = TFNetwork(extern_data=ExternData(), train_flag=True) network.add_layer(name="output", layer_class=DummyLayer, initial_value=5.0, loss_value_factor=3.0) network.initialize_params(session=session) updater = Updater(config=config, network=network) updater.set_learning_rate(1.0, session=session) updater.set_trainable_vars(network.get_trainable_params()) updater.init_optimizer_vars(session=session) session.run(updater.get_optim_op()) # One gradient descent step from 3.0 * x: gradient is 3, thus 5 - 3 = 2. assert_almost_equal(session.run(network.get_default_output_layer().output.placeholder), 2.0)
def test_base_get_output_shape_from_returnn_conv2d_dynamic(): with Naming.make_instance() as naming: assert isinstance(naming, Naming) x = torch.Tensor(64, 1, 11, 13) x_ = naming.register_tensor(x) x_.returnn_data = Data(name="x", shape=(1, None, None), feature_dim_axis=1) x_.returnn_axis_from_torch_axis = {0: 0, 1: 1, 2: 2, 3: 3} net = TFNetwork(extern_data=ExternData()) # E.g. conv layer, with padding "same". layer = InternalLayer(name="layer", network=net, out_type={"shape": (None, None, 32)}) torch_shape, returnn_axis_from_torch_axis = torch.nn.Module._base_get_output_shape_from_returnn( inputs_flat=[x], layer=layer) assert returnn_axis_from_torch_axis == {0: 0, 1: 3, 2: 1, 3: 2} assert torch_shape == (64, 32, 11, 13)
def _run_returnn_standalone_python(self): print( ">>> Constructing RETURNN model via Python code, load TF checkpoint, run..." ) with tf.compat.v1.Session() as session: with Naming.make_instance( ) as naming: # we expect this to work with the default settings model_func = self._model_func # Wrap the model_func in a module. # We assume this would be flattened away in the namespace. # All named modules should thus have the same names. class DummyModule(torch_returnn.nn.Module): def get_returnn_name(self) -> str: return "" # also avoid that this name becomes a prefix anywhere def forward(self, *inputs): return model_func(wrapped_import_torch_returnn, *inputs) dummy_mod = DummyModule() net_dict = dummy_mod.as_returnn_net_dict( self._returnn_in_data_dict) from returnn.config import Config from returnn.tf.network import TFNetwork config = Config({ "extern_data": { "data": self._returnn_in_data_dict }, "debug_print_layer_output_template": True, }) network = TFNetwork(config=config, name="root") network.construct_from_dict(net_dict) network.load_params_from_file( filename=self._tf_checkpoint_save_path, session=session) x = network.extern_data.get_default_input_data() y = network.get_default_output_layer().output feed_dict = self._make_tf_feed_dict(x) y_, y_size = session.run((y.placeholder, y.size_placeholder), feed_dict=feed_dict) assert isinstance(y_, numpy.ndarray) print("Output shape:", y_.shape) numpy.testing.assert_allclose(self._out_returnn_np, y_) print(">>>> Looks good!") print()
def test_base_get_output_shape_from_returnn_2d_reorder_dynamic(): with Naming.make_instance() as naming: assert isinstance(naming, Naming) x = torch.Tensor(64, 1, 11, 13) x_ = naming.register_tensor(x) x_.returnn_data = Data(name="x", shape=(1, None, None), feature_dim_axis=1, auto_create_placeholders=True) x_.returnn_axis_from_torch_axis = {0: 0, 1: 1, 2: 2, 3: 3} y_data = x_.returnn_data.copy_move_axis(2, 3) assert y_data.get_dim_tag(3) == x_.returnn_data.get_dim_tag(2) net = TFNetwork(extern_data=ExternData()) # E.g. softmax_over_spatial with axis="stag:time1" layer = InternalLayer(name="layer", network=net, output=y_data) # We expect from all Torch modules, that they don't reorder the spatial axes. # (If they do, they explicitly would overwrite the output shape logic.) torch_shape, returnn_axis_from_torch_axis = torch.nn.Module._base_get_output_shape_from_returnn( inputs_flat=[x], layer=layer) assert returnn_axis_from_torch_axis == {0: 0, 1: 1, 2: 3, 3: 2} assert torch_shape == (64, 1, 11, 13)
def __init__(self, *, parent: Optional[ReturnnContext] = None, name: Optional[str] = None): self.parent = parent if parent: assert name self.config = parent.config self.tf_name_scope = parent.network.get_absolute_name_scope_prefix( ) + LayerBase.cls_get_tf_scope_name(name) assert parent.network.extern_data.data self.sub_net_layer = ( parent.network.add_layer( name=name, layer_class=SubnetworkLayer, # This is just a placeholder, will be replaced in define_output. sources=[parent.network.get_layer("data")], subnetwork={"output": { "class": "copy" }})) # type: SubnetworkLayer self._dummy_sub_output = self.sub_net_layer.subnetwork.layers[ "output"] else: self.config = Config({ # "debug_print_layer_output_template": True, }) self.tf_name_scope = "" self.sub_net_layer = None self._dummy_sub_output = None if self.sub_net_layer: self.network = self.sub_net_layer.subnetwork else: assert not parent self.network = TFNetwork( extern_data=ExternData(), config=self.config, name="root", absolute_name_prefix=(self.tf_name_scope + "/") if self.tf_name_scope else "")