示例#1
0
            'clip_range': (0.05, 0.3),  # range of the adapted penalty factor
            'scale_constant': 1.2,
        },
    },
    'replay': {
        # 'replay_class': 'FIFOReplay',
        'batch_size': 64,
        'memory_size': 96,
        'sampling_start_size': 64,
        'replay_shards': 1,
    },
    'parameter_publish': {
        'exp_interval': 4096,
    },
})
PPO_DEFAULT_LEARNER_CONFIG.extend(BASE_LEARNER_CONFIG)

PPO_DEFAULT_ENV_CONFIG = Config({
    'env_name': '',
    'action_repeat': 1,
    'pixel_input': False,
    'use_grayscale': False,
    'use_depth': False,
    'frame_stacks': 1,
    'sleep_time': 0,
    'video': {
        'record_video': False,
        'save_folder': None,
        'max_videos': 500,
        'record_every': 5,
    },
示例#2
0
    },
    'replay': {
        'batch_size': 512,
        'memory_size':
        int(1000000 /
            3),  # The total replay size is memory_size * replay_shards
        'sampling_start_size': 3000,
        'replay_shards': 3,
    },
    'parameter_publish': {
        # Minimum amount of time (seconds) between two parameter publish
        'min_publish_interval': 3,
    },
})

DDPG_DEFAULT_LEARNER_CONFIG.extend(BASE_LEARNER_CONFIG)

DDPG_DEFAULT_ENV_CONFIG = Config({
    'env_name': '_str_',
    'num_agents': '_int_',
    'demonstration': None,
    'use_depth': False,
    'render': False,
    'use_demonstration': False,
    # If true, DDPG will expect an image at obs['pixel']['camera0']
    'pixel_input': False,
    'use_grayscale': False,
    # Stacks previous image frames together to provide history information
    'frame_stacks': 3,
    # Each action will be played this number of times. The reward of the consecutive actions will be the the reward
    # of the last action in the sequence