def test_serialization_deserialization(self):
    np.random.seed(12345)
    with tf.Session() as sess:
      dim_in = np.random.randint(1, 100)
      dim_out = np.random.randint(1, 100)
      num_layers = np.random.randint(1, 10)
      layer_sizes = np.random.randint(1, 100, num_layers)
      gen = networks.FullyConnectedNetworkGenerator(dim_in, dim_out,
                                                    layer_sizes)
      weights = gen.construct_network_weights()
      init = tf.global_variables_initializer()
      sess.run(init)

      # Serialize weights to a 1-d numpy array
      serialized_weights = networks.serialize_weights(sess, weights)
      self.assertEqual(
          len(serialized_weights.shape),
          1,
          msg='Serialized weights should have dimension 1.')

      expected_length = 0
      for name in weights:
        expected_length += np.prod(weights[name].shape)
      self.assertEqual(
          expected_length, serialized_weights.shape[0],
          'Serialized weights should have the same length as original weights.')

      # Deserialize and compare weights
      deserialized_weights = networks.deserialize_weights(
          weights, serialized_weights)
      for name in weights:
        original_weight = weights[name].eval(session=sess)
        np.testing.assert_array_equal(
            original_weight, deserialized_weights[name],
            'Weights should be exactly the same before/after serialization')
  def test_fully_connected(self):
    np.random.seed(12345)
    with self.session() as sess:
      for _ in range(10):
        dim_in = np.random.randint(1, 100)
        dim_out = np.random.randint(1, 100)
        inp = tf.placeholder(tf.float32, shape=(None, dim_in), name='input')
        for num_layers in [1, 2, 10]:
          layer_sizes = np.random.randint(1, 100, num_layers)
          gen = networks.FullyConnectedNetworkGenerator(dim_in, dim_out,
                                                        layer_sizes)
          weights = gen.construct_network_weights()
          network = gen.construct_network(inp, weights)

          init = tf.global_variables_initializer()
          sess.run(init)
          test_out = sess.run(network, {inp: np.random.randn(100, dim_in)})
          self.assertFalse(np.isnan(test_out).any())
Пример #3
0
    def test_learn_simple_policy(self):
        """Train a gaussian "policy" to react differently to various inputs.

    Inputs are sampled from a 2D Gaussian distribution.
    Outputs are one dimensional.
    """
        input_means = np.array([[-1., -1], [-1, 1], [1, -1], [1, 1]])
        input_std = .1
        output_means = np.array([[0.], [1], [2], [3]])

        network_generator = networks.FullyConnectedNetworkGenerator(
            2, 1, (
                64,
                64,
            ), tf.nn.relu)
        weights = network_generator.construct_network_weights()
        net_in = tf.placeholder(tf.float32, shape=(None, 2), name='input')
        net_out = network_generator.construct_network(net_in, weights)
        policy = policies.GaussianPolicy(net_in, net_out, 1, -5.)

        actions = tf.placeholder(tf.float32, shape=(None, 1), name='actions')
        log_lik = policy.log_likelihood_op(actions)
        optimizer = tf.train.AdamOptimizer(0.001)
        minimizer = optimizer.minimize(-tf.reduce_mean(log_lik))

        pol_mean, _ = policy.mean_op()

        with self.session() as sess:
            init = tf.global_variables_initializer()
            sess.run(init)
            for _ in range(1000):
                sample_input = np.repeat(input_means, 100, axis=0)
                sample_input += np.random.normal(0, input_std,
                                                 sample_input.shape)
                sample_output = np.repeat(output_means, 100, axis=0)
                sess.run(minimizer, {
                    net_in: sample_input,
                    actions: sample_output
                })
            output_means_res = sess.run(pol_mean, {net_in: input_means})
            mae = np.mean(np.abs(output_means - output_means_res))
            self.assertAlmostEqual(mae, 0, places=1)
Пример #4
0
MOVE_POINT_ROTATE_MAML = dict(
    random_seed=random.randint(0, 1000000),
    num_outer_iterations=1000,
    task_generator=functools.partial(move_point_env.MovePointEnv,
                                     start_pos=(0, 0),
                                     end_pos=(1, 0),
                                     goal_reached_distance=-1,
                                     trial_length=10),
    task_env_modifiers=[{
        '_action_rotation': i
    } for i in np.linspace(-np.pi, np.pi, 5000)],
    network_generator=networks.FullyConnectedNetworkGenerator(
        dim_input=2,
        dim_output=2,
        layer_sizes=(
            50,
            50,
        ),
        activation_fn=tf.nn.tanh),
    input_dims=2,
    pol_log_std_init=-3.,
    output_dims=2,
    reward_disc=0.9,
    learn_offset=False,
    policy=policies.GaussianPolicy,
    tasks_batch_size=10,
    num_inner_rollouts=25,
    outer_optimizer_algo=tf.train.AdamOptimizer,
    advantage_function='returns-values',
    whiten_values=False,
    always_full_rollouts=False,