evaluation_data_collector=eval_path_collector, replay_buffer=replay_buffer, **variant['algo_kwargs'] ) algorithm.to(ptu.device) algorithm.train() if __name__ == "__main__": variant = dict( algo_kwargs=dict( num_epochs=100, max_path_length=50, num_eval_steps_per_epoch=1000, num_expl_steps_per_train_loop=1000, num_trains_per_train_loop=1000, min_num_steps_before_training=1000, batch_size=128, ), trainer_kwargs=dict( discount=0.99, ), replay_buffer_kwargs=dict( max_size=100000, fraction_goals_rollout_goals=0.2, # equal to k = 4 in HER paper fraction_goals_env_goals=0.0, ), ) setup_logger('her-dqn-gridworld-experiment', variant=variant) experiment(variant)
policy = TanhGaussianPolicy( hidden_sizes=[100, 100], obs_dim=obs_dim, action_dim=action_dim, ) algorithm = SoftActorCritic(env=env, policy=policy, qf=qf, vf=vf, **variant['algo_params']) algorithm.to(ptu.device) with torch.autograd.profiler.profile() as prof: algorithm.train() prof.export_chrome_trace("tmp-torch-chrome-trace.prof") if __name__ == "__main__": # noinspection PyTypeChecker variant = dict(algo_params=dict( num_epochs=10, num_steps_per_epoch=1000, num_steps_per_eval=300, batch_size=64, max_path_length=30, reward_scale=0.3, discount=0.99, soft_target_tau=0.001, ), ) setup_logger("11-24-profile") experiment(variant)
algorithm = SoftActorCritic(env=env, policy=policy, qf=qf, vf=vf, **variant['algo_params']) if ptu.gpu_enabled(): algorithm.cuda() algorithm.train() if __name__ == "__main__": # noinspection PyTypeChecker variant = dict( algo_params=dict( num_epochs=1000, num_steps_per_epoch=1000, num_steps_per_eval=1000, batch_size=128, max_path_length=999, discount=0.99, soft_target_tau=0.001, policy_lr=3E-4, qf_lr=3E-4, vf_lr=3E-4, use_automatic_entropy_tuning=True, ), net_size=300, ) setup_logger('test', variant=variant) experiment(variant)
num_steps_per_eval=500, max_path_length=200, discount=0.99, replay_buffer_size=int(1E4), ), cnn_params=dict( kernel_sizes=[3, 3], n_channels=[16, 16], strides=[2, 2], pool_sizes=[1, 1], hidden_sizes=[128, 64], paddings=[0, 0], use_layer_norm=False, ), env_id='InvertedPendulum-v2', ) setup_logger('name-of-td3-experiment', variant=variant) experiment(variant) for i in range(2): run_experiment( experiment, variant=variant, exp_id=0, exp_prefix="TD3-images-inverted-pendulum", mode='local', # exp_prefix="double-vs-dqn-huber-sweep-cartpole", # mode='local', #use_gpu=True, )
variant = dict( algorithm='HER-SAC', version='normal', algo_kwargs=dict( batch_size=128, num_epochs=100, num_eval_steps_per_epoch=5000, num_expl_steps_per_train_loop=1000, num_trains_per_train_loop=1000, min_num_steps_before_training=1000, max_path_length=50, ), sac_trainer_kwargs=dict( discount=0.99, soft_target_tau=5e-3, target_update_period=1, policy_lr=3E-4, qf_lr=3E-4, reward_scale=1, use_automatic_entropy_tuning=True, ), replay_buffer_kwargs=dict( max_size=int(1E6), fraction_goals_rollout_goals=0.2, # equal to k = 4 in HER paper fraction_goals_env_goals=0, ), qf_kwargs=dict(hidden_sizes=[400, 300], ), policy_kwargs=dict(hidden_sizes=[400, 300], ), ) setup_logger('her-sac-fetch-experiment', variant=variant) experiment(variant)
'Reacher-v2', ], 'bins': [9], 'algo_class': [ DoubleDQN, ], 'learning_rate': [1e-3, 1e-4], 'qf_criterion_class': [ HuberLoss, ], } sweeper = hyp.DeterministicHyperparameterSweeper( search_space, default_parameters=variant, ) setup_logger('dqn-images-experiment', variant=variant) experiment(variant) for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()): #for i in range(2): run_experiment( experiment, variant=variant, exp_id=exp_id, exp_prefix="dqn-Pusher2D-test", mode='ec2', # use_gpu=False, # exp_prefix="double-vs-dqn-huber-sweep-cartpole", # mode='local', # use_gpu=True, )
trainer_kwargs=dict( discount=0.99, ), replay_buffer_kwargs=dict( max_size=100000, fraction_goals_rollout_goals=0.2, fraction_goals_env_goals=0.0, ), qf_kwargs=dict( hidden_sizes=[400, 300], ), policy_kwargs=dict( hidden_sizes=[400, 300], ), ) setup_logger('her-td3-pusher-0', variant=variant) experiment(variant) # search_space = { # } # sweeper = hyp.DeterministicHyperparameterSweeper( # search_space, default_parameters=variant, # ) # # n_seeds = 1 # mode = 'local' # exp_prefix = 'dev-{}'.format( # __file__.replace('/', '-').replace('_', '-').split('.')[0] # ) # # n_seeds = 5 # mode = 'sss'
object_high=(x_high - t - t, y_high - t, 0.02), ), algo_kwargs=dict( num_epochs=2000, max_path_length=20, batch_size=128, num_eval_steps_per_epoch=1000, num_expl_steps_per_train_loop=1000, num_trains_per_train_loop=100, min_num_steps_before_training=1000, ), sac_trainer_kwargs=dict( discount=0.99, soft_target_tau=5e-3, target_update_period=1, policy_lr=3E-4, qf_lr=3E-4, reward_scale=1, use_automatic_entropy_tuning=True, ), replay_buffer_kwargs=dict( max_size=int(1E6), fraction_goals_rollout_goals=0.2, # equal to k = 4 in HER paper fraction_goals_env_goals=0, ), qf_kwargs=dict(hidden_sizes=[400, 300], ), policy_kwargs=dict(hidden_sizes=[400, 300], ), ) setup_logger('her-sac-pusher', variant=variant) experiment(variant)