Exemplo n.º 1
0
        },

        'n_step': 1,
        
        'evaluation_interval': None,
        'evaluation_num_episodes': 1,

        'timesteps_per_iteration': 1000,
        'tau': 5e-3,

        'buffer_size': 200000,
        'prioritized_replay': False,

        'optimization': {
            'learning_rate': 5e-4,

            'policy_loss_weight': 1.0,
            'Q_loss_weight': 1.0,
            'entropy_loss_weight': 1.0,
        },
        'learning_starts': 1000,

        'sample_batch_size': 20,
        'train_batch_size': 300,

        'worker_side_prioritization': False,
        'min_iter_time_s': 1,

    }
    main('SAC', configuration)
Exemplo n.º 2
0
from paint_ppo import main

if __name__ == '__main__':
    configuration = {
        # 'model': {
        #     'custom_model': 'paint_model',
        #     'custom_options': {},
        # },
        'model': {
            'fcnet_hiddens': [256, 128],
            'use_lstm': False,
        },
        'num_workers': 15,
        'sample_batch_size': 20,
    }
    main('A3C', configuration)
Exemplo n.º 3
0
from paint_ppo import main

if __name__ == '__main__':
    configuration = {
        'num_workers': 15,
        'num_gpus': 1,
        'num_atoms': 1,
        # 'v_min': -120.0,
        # 'v_max': 120.0,
        'dueling': True,
        'double_q': True,
        'hiddens': [256, 128],
        'exploration_final_eps': 0.01,
        "schedule_max_timesteps": 2000000,
        'exploration_fraction': 0.2,
        'timesteps_per_iteration': 1000,
        'target_network_update_freq': 3000,
        'soft_q': False,
        'parameter_noise': False,
        'batch_mode': 'truncate_episodes',
        'buffer_size': 200000,
        'prioritized_replay': True,
        'compress_observations': False,
        'learning_starts': 1000,
        'sample_batch_size': 20,
        'train_batch_size': 32,
    }
    main('APEX', configuration)
Exemplo n.º 4
0
from paint_ppo import main

if __name__ == '__main__':
    configuration = {
        'num_workers': 15,
        'twin_q': True,
        'policy_delay': 2,
        'smooth_target_policy': True,

        # 'model': {
        #     # 'custom_model': 'paint_layer_model',
        #     # 'custom_options': {},  # extra options to pass to your model
        #     'use_lstm': False,
        # },
        'actor_hiddens': [256, 128],
        'critic_hiddens': [256, 128],
        'timesteps_per_iteration': 1000,
        'target_network_update_freq': 3000,
        'tau': 1e-3,
        'buffer_size': 200000,
        'prioritized_replay': True,
        'learning_starts': 1000,
        'sample_batch_size': 20,
        'train_batch_size': 32,
        'num_gpus': 1,
        # 'num_gpus_per_worker': 0,
    }
    main('APEX_DDPG', configuration)
Exemplo n.º 5
0
        },
        'num_workers': 15,
        'num_gpus': 1,
        'batch_mode': 'truncate_episodes',
        'observation_filter': 'NoFilter',
        'lr': 0.0005,
        'sample_batch_size': 50,
        'train_batch_size': 750,

        # 'num_sgd_iter': 16,
        "num_data_loader_buffers": 4,
        # how many train batches should be retained for minibatching. This conf
        # only has an effect if `num_sgd_iter > 1`.
        "minibatch_buffer_size": 4,

        # set >0 to enable experience replay. Saved samples will be replayed with
        # a p:1 proportion to new data samples.
        # 'replay_proportion': 10,
        # number of sample batches to store for replay. The number of transitions
        # saved total will be (replay_buffer_num_slots * sample_batch_size).
        # 'replay_buffer_num_slots': 100,

        # level of queuing for sampling.
        'max_sample_requests_in_flight_per_worker': 1,
        'broadcast_interval': 3,
        'grad_clip': 40.0,
        'vf_loss_coeff': 0.5,
        'entropy_coeff': 0.01,
    }
    main('IMPALA', configuration)