Пример #1
0
 def _init_network(self, net_desc, epoch=None):
   if epoch is None:
     epoch = self.epoch
   self._close_tf_session()
   self._reset_graph()
   # The new session will by default use the newly created default graph.
   self._make_tf_session()
   tf.set_random_seed(42)
   from TFUtil import get_global_train_flag_placeholder
   if self.use_dynamic_train_flag:
     train_flag = get_global_train_flag_placeholder()
   else:
     train_flag = False
   if False:  # TODO ...
     extern_data = ExternData()
     extern_data.init_from_config(self.config)
     # TODO...
   network = TFNetwork(
     name="root",
     config=self.config,
     rnd_seed=epoch,
     train_flag=train_flag,
     search_flag=self.use_search_flag)
   network.construct_from_dict(net_desc)
   network.initialize_params(session=self.tf_session)
   network.layers_desc = net_desc
   self.network = network
   if self.train_data:
     # Need to create new Updater because it has the learning_rate var which must be in the current graph.
     self.updater = Updater(config=self.config, tf_session=self.tf_session, network=network)
     self.updater.set_trainable_vars(network.get_trainable_params())
   network.print_network_info()
Пример #2
0
def test_Updater_simple_batch():
    with make_scope() as session:
        from TFNetwork import TFNetwork, ExternData
        from Config import Config
        from GeneratingDataset import Task12AXDataset
        dataset = Task12AXDataset()
        dataset.init_seq_order(epoch=1)
        extern_data = ExternData()
        extern_data.init_from_dataset(dataset)

        config = Config()
        network = TFNetwork(extern_data=extern_data, train_flag=True)
        network.construct_from_dict({
            "layer1": {
                "class": "linear",
                "activation": "tanh",
                "n_out": 13
            },
            "layer2": {
                "class": "linear",
                "activation": "tanh",
                "n_out": 13,
                "from": ["layer1"]
            },
            "output": {
                "class": "softmax",
                "loss": "ce",
                "target": "classes",
                "from": ["layer2"]
            }
        })
        network.initialize_params(session=session)

        updater = Updater(config=config, network=network)
        updater.set_learning_rate(1.0, session=session)
        updater.set_trainable_vars(network.get_trainable_params())
        updater.init_optimizer_vars(session=session)

        from TFDataPipeline import FeedDictDataProvider
        batches = dataset.generate_batches(
            recurrent_net=network.recurrent,
            batch_size=100,
            max_seqs=10,
            max_seq_length=sys.maxsize,
            used_data_keys=network.used_data_keys)
        data_provider = FeedDictDataProvider(tf_session=session,
                                             extern_data=extern_data,
                                             data_keys=network.used_data_keys,
                                             dataset=dataset,
                                             batches=batches)
        feed_dict, _ = data_provider.get_feed_dict(single_threaded=True)
        session.run(updater.get_optim_op(), feed_dict=feed_dict)
Пример #3
0
def test_Updater_CustomUpdate():
    from TFNetwork import TFNetwork, ExternData
    from Config import Config
    from TFUtil import CustomUpdate

    config = Config()
    network = TFNetwork(extern_data=ExternData(), train_flag=True)
    layer = network.add_layer(name="output",
                              layer_class=DummyLayer,
                              initial_value=4.0)
    assert isinstance(layer, DummyLayer)
    network.initialize_params(session=session)

    class CustomUpdateAdd13(CustomUpdate):
        def update_var(self, var):
            return tf.assign_add(var, 13.0)

    CustomUpdateAdd13().set_on_var(layer.x)

    updater = Updater(config=config, network=network)
    updater.set_learning_rate(1000.0, session=session)  # should be ignored
    updater.set_trainable_vars(network.get_trainable_params())
    updater.init_optimizer_vars(session=session)
    session.run(updater.get_optim_op())
    # Should have applied CustomUpdateAdd13.
    assert_almost_equal(
        session.run(network.get_default_output_layer().output.placeholder),
        17.0)
Пример #4
0
def test_Updater_add_check_numerics_ops():
    class _Layer(DummyLayer):
        def _get_loss_value(self):
            return tf.log(self.x)

    from TFNetwork import TFNetwork, ExternData
    from Config import Config

    config = Config()
    config.set("debug_add_check_numerics_ops", True)
    network = TFNetwork(extern_data=ExternData(), train_flag=True)
    network.add_layer(name="output", layer_class=_Layer, initial_value=1.0)
    network.initialize_params(session=session)

    updater = Updater(config=config, network=network)
    updater.set_learning_rate(1.0, session=session)
    updater.set_trainable_vars(network.get_trainable_params())
    updater.init_optimizer_vars(session=session)
    # Should succeed.
    session.run(updater.get_optim_op())
    # One gradient descent step from ln(x), x = 1.0: gradient is 1.0 / x, thus x - 1.0 = 0.0.
    assert_almost_equal(
        session.run(network.get_default_output_layer().output.placeholder),
        0.0)

    try:
        # Now, should fail.
        session.run(updater.get_optim_op())
    except tf.errors.InvalidArgumentError as exc:
        print("Expected exception: %r" % exc)
    else:
        assert False, "should have raised an exception"
Пример #5
0
  def __init__(self, input_data, names, dtypes, extern_data, data_keys):
    """
    :param dict[str,tf.Tensor] input_data:
    :param list[str] names: data_keys + extra info
    :param list[tf.DType|str] dtypes: corresponds to names
    :param ExternData extern_data:
    :param list[str] data_keys:
    """
    from TFUtil import post_control_dependencies

    # The device-scope when this gets called is the default device,
    # so everywhere where we want to do it on CPU, we have to specify it explicitly.
    # StagingArea can be used for async CPU->GPU transfer.
    # It will live on the current device by the current device scope, e.g. the GPU.
    self._tf_staging_area = StagingArea(names=names, dtypes=dtypes)

    with tf.device("/cpu:0"):
      self.staging_size = tf.Variable(0, trainable=False, dtype=tf.int32, name="staging_size")
      self._staging_size_init = tf.variables_initializer([self.staging_size])
      with tf.control_dependencies([tf.assign_add(self.staging_size, 1)]):
        self.stage_put_op = self._tf_staging_area.put(input_data)
      get_updates = [tf.assign_sub(self.staging_size, 1)]
    # This should run on the default device (GPU).
    self.stage_get_op = post_control_dependencies(self._tf_staging_area.get(), updates=get_updates)

    self.output_as_extern_data = ExternData(
      default_input=extern_data.default_input,
      default_target=extern_data.default_target,
      data={key: Data(**data.get_kwargs()) for (key, data) in data_keys})
    for key, data in self.output_as_extern_data.data.items():
      data.placeholder = self.stage_get_op[key]
      data.size_placeholder = {
        axis: self.stage_get_op["%s/size%i" % (key, axis)]
        for axis in data.get_axes_with_size()}
Пример #6
0
def test_Updater_multiple_optimizers_and_opts():
  with make_scope() as session:
    from TFNetwork import TFNetwork, ExternData
    from Config import Config
    from GeneratingDataset import Task12AXDataset
    dataset = Task12AXDataset()
    dataset.init_seq_order(epoch=1)
    extern_data = ExternData()
    extern_data.init_from_dataset(dataset)

    config = Config()
    network = TFNetwork(extern_data=extern_data, train_flag=True)
    network.construct_from_dict({
      "layer1": {"class": "linear", "activation": "tanh", "n_out": 13,
                 "updater_opts": {"optimizer": {"class": "Adam"}, "accum_grad_multiple_step": 2}},
      "layer2": {"class": "linear", "activation": "tanh", "n_out": 13, "from": ["layer1"],
                 "updater_opts": {
                   "optimizer": {"class": "Adagrad", "learning_rate_multiplier": 3}, "gradient_noise": 0.1}},
      "output": {"class": "softmax", "loss": "ce", "target": "classes", "from": ["layer2"]}
    })
    network.initialize_params(session=session)

    updater = Updater(config=config, network=network)
    updater.set_learning_rate(1.0, session=session)
    updater.set_trainable_vars(network.get_trainable_params())
    updater.init_optimizer_vars(session=session)

    optim_op = updater.get_optim_op()
    assert isinstance(updater.optimizer, WrapOptimizer)
    assert len(updater.optimizer.optimizers) == 3

    from TFDataPipeline import FeedDictDataProvider
    batches = dataset.generate_batches(
      recurrent_net=network.recurrent,
      batch_size=100,
      max_seqs=10,
      max_seq_length=sys.maxsize,
      used_data_keys=network.used_data_keys)
    data_provider = FeedDictDataProvider(
      tf_session=session, extern_data=extern_data,
      data_keys=network.used_data_keys,
      dataset=dataset, batches=batches)
    feed_dict, _ = data_provider.get_feed_dict(single_threaded=True)
    session.run(optim_op, feed_dict=feed_dict)
Пример #7
0
def set_config_num_inputs_outputs_from_dataset(config, dataset):
  """
  :param Config.Config config:
  :param Dataset dataset:
  """
  config.set("num_inputs", dataset.num_inputs)
  from Util import BackendEngine
  if BackendEngine.is_tensorflow_selected():
    # TF supports more fine-grained specification,
    # however the dataset does not store that in num_outputs.
    from TFNetwork import ExternData
    config.set("num_outputs", {
      key: ExternData.data_kwargs_from_dataset_key(dataset=dataset, key=key)
      for key in dataset.get_data_keys()})
  else:
    config.set("num_outputs", dataset.num_outputs)
Пример #8
0
def set_config_num_inputs_outputs_from_dataset(config, dataset):
  """
  :param Config.Config config:
  :param Dataset dataset:
  """
  from Util import BackendEngine
  if BackendEngine.is_tensorflow_selected():
    # TF supports more fine-grained specification,
    # however the dataset does not store that in num_outputs.
    from TFNetwork import ExternData
    config.set("extern_data", {
      key: ExternData.data_kwargs_from_dataset_key(dataset=dataset, key=key)
      for key in dataset.get_data_keys()})
  else:
    config.set("num_inputs", dataset.num_inputs)
    config.set("num_outputs", dataset.num_outputs)
Пример #9
0
 def __init__(self, data_queues, batch_size, max_seqs, capacity=10):
   """
   :param TFDataQueues data_queues:
   :param int batch_size:
   :param int max_seqs:
   :param int capacity:
   """
   assert not data_queues.with_batch
   self.data_queues = data_queues
   self.batch_size = batch_size
   self.max_seqs = max_seqs
   self.shapes = {key: data.batch_shape for (key, data) in data_queues.extern_data.data.items()}
   for key, data in data_queues.extern_data.data.items():
     assert data.batch_dim_axis == 0, "batch-dim currently is always added at axis 0"
     for axis in data.get_axes_with_size():
       self.shapes["%s/size%i" % (key, axis)] = (None,)  # (batch,)
   self._tf_out_queue = tf.PaddingFIFOQueue(
     capacity=capacity, name="TFBatchingQueue",
     names=data_queues.names, dtypes=data_queues.dtypes,
     shapes=[self.data_queues.shapes[key] for key in data_queues.names])
   self._tf_batch_nums = tf.FIFOQueue(
     capacity=capacity, dtypes=[tf.int32], shapes=[()])
   self._cur_batch_num = tf.Variable(initial_value=0, dtype=tf.int32, trainable=False, name="batch_num")
   self._cur_max_seq_len = tf.Variable(initial_value=0, dtype=tf.int32, trainable=False, name="max_seq_len")
   self._tf_enqueue_loop_op = self._make_enqueue_loop_op()
   from TFUtil import Data
   self.output_as_extern_data = ExternData(
     default_input=data_queues.extern_data.default_input,
     default_target=data_queues.extern_data.default_target,
     data={key: Data(**data.get_kwargs()) for (key, data) in data_queues.extern_data.data.items()})
   self.batch_queue_size = self._tf_batch_nums.size()
   batch_dequeue_op = self._tf_out_queue.dequeue_up_to(n=self._tf_batch_nums.dequeue())
   self.last_seq_idx = tf.Variable(initial_value=0, dtype=tf.int32, trainable=False, name="last_seq_idx")
   self.seq_counter = tf.Variable(initial_value=0, dtype=tf.int32, trainable=False, name="seq_counter")
   self.batch_counter = tf.Variable(initial_value=0, dtype=tf.int32, trainable=False, name="batch_counter")
   default_input_key = self.output_as_extern_data.default_input
   default_input_data = self.output_as_extern_data.data[default_input_key]
   last_batch_size = tf.shape(self.batch_dequeue_op[default_input_key])[default_input_data.batch_dim_axis]
   updates = [
     tf.assign(self.last_seq_idx, tf.maximum(self.last_seq_idx, tf.reduce_max(self.batch_dequeue_op["seq_idx"]))),
     tf.assign_add(self.seq_counter, last_batch_size),
     tf.assign_add(self.batch_counter, 1)]
   self._init_op = tf.variables_initializer([
     self._cur_batch_num, self._cur_max_seq_len,
     self.last_seq_idx, self.seq_counter, self.batch_counter])
   from TFUtil import post_control_dependencies
   self.batch_dequeue_op = post_control_dependencies(batch_dequeue_op, updates=updates)
Пример #10
0
def test_Updater_GradientDescent():
    from TFNetwork import TFNetwork, ExternData
    from Config import Config

    config = Config()
    network = TFNetwork(extern_data=ExternData(), train_flag=True)
    network.add_layer(name="output",
                      layer_class=DummyLayer,
                      initial_value=5.0,
                      loss_value_factor=3.0)
    network.initialize_params(session=session)

    updater = Updater(config=config, tf_session=session, network=network)
    updater.set_learning_rate(1.0)
    updater.set_trainable_vars(network.get_trainable_params())
    session.run(updater.get_optim_op())
    # One gradient descent step from 3.0 * x: gradient is 3, thus 5 - 3 = 2.
    assert_almost_equal(
        session.run(network.get_default_output_layer().output.placeholder),
        2.0)