'clip_range': (0.05, 0.3), # range of the adapted penalty factor 'scale_constant': 1.2, }, }, 'replay': { # 'replay_class': 'FIFOReplay', 'batch_size': 64, 'memory_size': 96, 'sampling_start_size': 64, 'replay_shards': 1, }, 'parameter_publish': { 'exp_interval': 4096, }, }) PPO_DEFAULT_LEARNER_CONFIG.extend(BASE_LEARNER_CONFIG) PPO_DEFAULT_ENV_CONFIG = Config({ 'env_name': '', 'action_repeat': 1, 'pixel_input': False, 'use_grayscale': False, 'use_depth': False, 'frame_stacks': 1, 'sleep_time': 0, 'video': { 'record_video': False, 'save_folder': None, 'max_videos': 500, 'record_every': 5, },
}, 'replay': { 'batch_size': 512, 'memory_size': int(1000000 / 3), # The total replay size is memory_size * replay_shards 'sampling_start_size': 3000, 'replay_shards': 3, }, 'parameter_publish': { # Minimum amount of time (seconds) between two parameter publish 'min_publish_interval': 3, }, }) DDPG_DEFAULT_LEARNER_CONFIG.extend(BASE_LEARNER_CONFIG) DDPG_DEFAULT_ENV_CONFIG = Config({ 'env_name': '_str_', 'num_agents': '_int_', 'demonstration': None, 'use_depth': False, 'render': False, 'use_demonstration': False, # If true, DDPG will expect an image at obs['pixel']['camera0'] 'pixel_input': False, 'use_grayscale': False, # Stacks previous image frames together to provide history information 'frame_stacks': 3, # Each action will be played this number of times. The reward of the consecutive actions will be the the reward # of the last action in the sequence