Exemplo n.º 1
0
 def __init__(self, env):
     self.env = env
     self.net = actor_distribution_network.ActorDistributionNetwork(
         self.env.observation_spec(),
         self.env.action_spec(),
         conv_layer_params=[(32, 5, 1), (64, 5, 2),
                            (128, 5, 2), (256, 5, 2)],
         fc_layer_params=(64, 2))
     self.optimizer = tf.compat.v1.train.AdamOptimizer(
         learning_rate=1e-3)
     self.strategy = strategy_utils.get_strategy(tpu=False, use_gpu=True)
    def test_tpu_strategy(self, mock_tpu_cluster_resolver,
                          mock_experimental_connect_to_cluster,
                          mock_initialize_tpu_system, mock_tpu_strategy):
        resolver = mock.MagicMock()
        mock_tpu_cluster_resolver.return_value = resolver
        mock_strategy = mock.MagicMock()
        mock_tpu_strategy.return_value = mock_strategy

        strategy = strategy_utils.get_strategy(tpu='bns_address',
                                               use_gpu=False)

        mock_tpu_cluster_resolver.assert_called_with(tpu='bns_address')
        mock_experimental_connect_to_cluster.assert_called_with(resolver)
        mock_initialize_tpu_system.assert_called_with(resolver)
        self.assertIs(strategy, mock_strategy)
Exemplo n.º 3
0
def main(_):
  logging.set_verbosity(logging.INFO)
  tf.enable_v2_behavior()

  gin.parse_config_files_and_bindings(FLAGS.gin_file, FLAGS.gin_bindings)

  strategy = strategy_utils.get_strategy(FLAGS.tpu, FLAGS.use_gpu)

  train(
      FLAGS.root_dir,
      strategy,
      replay_buffer_server_address=FLAGS.replay_buffer_server_address,
      variable_container_server_address=FLAGS.variable_container_server_address,
      create_agent_fn=_create_agent,
      create_env_fn=lambda: suite_mujoco.load('HalfCheetah-v2'),
      num_iterations=FLAGS.num_iterations,
  )
Exemplo n.º 4
0
    log_interval = 1000  # @param {type:"integer"}

    num_eval_episodes = 20  # @param {type:"integer"}
    eval_interval = 1000  # @param {type:"integer"}

    policy_save_interval = 5000  # @param {type:"integer"}


env = get_tf_wrapped_robo_rugby_env()
print('Observation Spec:')
print(env.time_step_spec().observation)
print('Action Spec:')
print(env.action_spec())
collect_env = get_tf_wrapped_robo_rugby_env()
eval_env = get_tf_wrapped_robo_rugby_env()
objStrategy = strategy_utils.get_strategy(tpu=False, use_gpu=True)

specObservation, specAction, specTimeStep = (
    spec_utils.get_tensor_specs(collect_env))

with objStrategy.scope():
    # Critic network trains the Actor network
    nnCritic = critic_network.CriticNetwork(
        (specObservation, specAction),
        observation_fc_layer_params=None,
        action_fc_layer_params=None,
        joint_fc_layer_params=HyperParms.critic_joint_fc_layer_params,
        kernel_initializer='glorot_uniform',
        last_kernel_initializer='glorot_uniform')

with objStrategy.scope():
from tf_agents.networks import actor_distribution_network
from tf_agents.policies import greedy_policy
from tf_agents.policies import py_tf_eager_policy
from tf_agents.policies import random_py_policy
from tf_agents.replay_buffers import reverb_replay_buffer
from tf_agents.replay_buffers import reverb_utils

from tf_agents.environments import tf_py_environment
from tf_agents.agents.sac import sac_agent
from nq_environment_tf import *
from power_switch import *

tf.compat.v1.enable_v2_behavior()

use_gpu = False
strategy = strategy_utils.get_strategy(tpu=False, use_gpu=use_gpu)

tempdir = "nimble_quest_weight_sac_1st"

num_iterations = 1000  # @param {type:"integer"}
initial_collect_steps = 500  # @param {type:"integer"}
collect_steps_per_iteration = 100  # @param {type:"integer"}
replay_buffer_capacity = 70000  # @param {type:"integer"}
batch_size = 200  # @param {type:"integer"}
learning_rate = 1e-3  # @param {type:"number"}
log_interval = 1  # @param {type:"integer"}
num_eval_episodes = 50  # @param {type:"integer"}
eval_interval = 3  # @param {type:"integer"}

policy_save_interval = 5000
    def test_mirrored_strategy(self, mock_mirrored_strategy):
        mirrored_strategy = mock.MagicMock()
        mock_mirrored_strategy.return_value = mirrored_strategy

        strategy = strategy_utils.get_strategy(False, use_gpu=True)
        self.assertIs(strategy, mirrored_strategy)
    def test_get_distribution_strategy_default(self):
        # Get a default strategy to compare against.
        default_strategy = tf.distribute.get_strategy()

        strategy = strategy_utils.get_strategy(tpu=False, use_gpu=False)
        self.assertIsInstance(strategy, type(default_strategy))