}, 'n_step': 1, 'evaluation_interval': None, 'evaluation_num_episodes': 1, 'timesteps_per_iteration': 1000, 'tau': 5e-3, 'buffer_size': 200000, 'prioritized_replay': False, 'optimization': { 'learning_rate': 5e-4, 'policy_loss_weight': 1.0, 'Q_loss_weight': 1.0, 'entropy_loss_weight': 1.0, }, 'learning_starts': 1000, 'sample_batch_size': 20, 'train_batch_size': 300, 'worker_side_prioritization': False, 'min_iter_time_s': 1, } main('SAC', configuration)
from paint_ppo import main if __name__ == '__main__': configuration = { # 'model': { # 'custom_model': 'paint_model', # 'custom_options': {}, # }, 'model': { 'fcnet_hiddens': [256, 128], 'use_lstm': False, }, 'num_workers': 15, 'sample_batch_size': 20, } main('A3C', configuration)
from paint_ppo import main if __name__ == '__main__': configuration = { 'num_workers': 15, 'num_gpus': 1, 'num_atoms': 1, # 'v_min': -120.0, # 'v_max': 120.0, 'dueling': True, 'double_q': True, 'hiddens': [256, 128], 'exploration_final_eps': 0.01, "schedule_max_timesteps": 2000000, 'exploration_fraction': 0.2, 'timesteps_per_iteration': 1000, 'target_network_update_freq': 3000, 'soft_q': False, 'parameter_noise': False, 'batch_mode': 'truncate_episodes', 'buffer_size': 200000, 'prioritized_replay': True, 'compress_observations': False, 'learning_starts': 1000, 'sample_batch_size': 20, 'train_batch_size': 32, } main('APEX', configuration)
from paint_ppo import main if __name__ == '__main__': configuration = { 'num_workers': 15, 'twin_q': True, 'policy_delay': 2, 'smooth_target_policy': True, # 'model': { # # 'custom_model': 'paint_layer_model', # # 'custom_options': {}, # extra options to pass to your model # 'use_lstm': False, # }, 'actor_hiddens': [256, 128], 'critic_hiddens': [256, 128], 'timesteps_per_iteration': 1000, 'target_network_update_freq': 3000, 'tau': 1e-3, 'buffer_size': 200000, 'prioritized_replay': True, 'learning_starts': 1000, 'sample_batch_size': 20, 'train_batch_size': 32, 'num_gpus': 1, # 'num_gpus_per_worker': 0, } main('APEX_DDPG', configuration)
}, 'num_workers': 15, 'num_gpus': 1, 'batch_mode': 'truncate_episodes', 'observation_filter': 'NoFilter', 'lr': 0.0005, 'sample_batch_size': 50, 'train_batch_size': 750, # 'num_sgd_iter': 16, "num_data_loader_buffers": 4, # how many train batches should be retained for minibatching. This conf # only has an effect if `num_sgd_iter > 1`. "minibatch_buffer_size": 4, # set >0 to enable experience replay. Saved samples will be replayed with # a p:1 proportion to new data samples. # 'replay_proportion': 10, # number of sample batches to store for replay. The number of transitions # saved total will be (replay_buffer_num_slots * sample_batch_size). # 'replay_buffer_num_slots': 100, # level of queuing for sampling. 'max_sample_requests_in_flight_per_worker': 1, 'broadcast_interval': 3, 'grad_clip': 40.0, 'vf_loss_coeff': 0.5, 'entropy_coeff': 0.01, } main('IMPALA', configuration)