示例#1
0
        evaluation_data_collector=eval_path_collector,
        replay_buffer=replay_buffer,
        **variant['algo_kwargs']
    )
    algorithm.to(ptu.device)
    algorithm.train()


if __name__ == "__main__":
    variant = dict(
        algo_kwargs=dict(
            num_epochs=100,
            max_path_length=50,
            num_eval_steps_per_epoch=1000,
            num_expl_steps_per_train_loop=1000,
            num_trains_per_train_loop=1000,
            min_num_steps_before_training=1000,
            batch_size=128,
        ),
        trainer_kwargs=dict(
            discount=0.99,
        ),
        replay_buffer_kwargs=dict(
            max_size=100000,
            fraction_goals_rollout_goals=0.2,  # equal to k = 4 in HER paper
            fraction_goals_env_goals=0.0,
        ),
    )
    setup_logger('her-dqn-gridworld-experiment', variant=variant)
    experiment(variant)
示例#2
0
    policy = TanhGaussianPolicy(
        hidden_sizes=[100, 100],
        obs_dim=obs_dim,
        action_dim=action_dim,
    )
    algorithm = SoftActorCritic(env=env,
                                policy=policy,
                                qf=qf,
                                vf=vf,
                                **variant['algo_params'])
    algorithm.to(ptu.device)
    with torch.autograd.profiler.profile() as prof:
        algorithm.train()
    prof.export_chrome_trace("tmp-torch-chrome-trace.prof")


if __name__ == "__main__":
    # noinspection PyTypeChecker
    variant = dict(algo_params=dict(
        num_epochs=10,
        num_steps_per_epoch=1000,
        num_steps_per_eval=300,
        batch_size=64,
        max_path_length=30,
        reward_scale=0.3,
        discount=0.99,
        soft_target_tau=0.001,
    ), )
    setup_logger("11-24-profile")
    experiment(variant)
示例#3
0
    algorithm = SoftActorCritic(env=env,
                                policy=policy,
                                qf=qf,
                                vf=vf,
                                **variant['algo_params'])
    if ptu.gpu_enabled():
        algorithm.cuda()
    algorithm.train()


if __name__ == "__main__":
    # noinspection PyTypeChecker
    variant = dict(
        algo_params=dict(
            num_epochs=1000,
            num_steps_per_epoch=1000,
            num_steps_per_eval=1000,
            batch_size=128,
            max_path_length=999,
            discount=0.99,
            soft_target_tau=0.001,
            policy_lr=3E-4,
            qf_lr=3E-4,
            vf_lr=3E-4,
            use_automatic_entropy_tuning=True,
        ),
        net_size=300,
    )
    setup_logger('test', variant=variant)
    experiment(variant)
示例#4
0
            num_steps_per_eval=500,
            max_path_length=200,
            discount=0.99,
            replay_buffer_size=int(1E4),
        ),
        cnn_params=dict(
            kernel_sizes=[3, 3],
            n_channels=[16, 16],
            strides=[2, 2],
            pool_sizes=[1, 1],
            hidden_sizes=[128, 64],
            paddings=[0, 0],
            use_layer_norm=False,
        ),
        env_id='InvertedPendulum-v2',
    )
    setup_logger('name-of-td3-experiment', variant=variant)
    experiment(variant)

    for i in range(2):
        run_experiment(
            experiment,
            variant=variant,
            exp_id=0,
            exp_prefix="TD3-images-inverted-pendulum",
            mode='local',
            # exp_prefix="double-vs-dqn-huber-sweep-cartpole",
            # mode='local',
            #use_gpu=True,
        )
示例#5
0
 variant = dict(
     algorithm='HER-SAC',
     version='normal',
     algo_kwargs=dict(
         batch_size=128,
         num_epochs=100,
         num_eval_steps_per_epoch=5000,
         num_expl_steps_per_train_loop=1000,
         num_trains_per_train_loop=1000,
         min_num_steps_before_training=1000,
         max_path_length=50,
     ),
     sac_trainer_kwargs=dict(
         discount=0.99,
         soft_target_tau=5e-3,
         target_update_period=1,
         policy_lr=3E-4,
         qf_lr=3E-4,
         reward_scale=1,
         use_automatic_entropy_tuning=True,
     ),
     replay_buffer_kwargs=dict(
         max_size=int(1E6),
         fraction_goals_rollout_goals=0.2,  # equal to k = 4 in HER paper
         fraction_goals_env_goals=0,
     ),
     qf_kwargs=dict(hidden_sizes=[400, 300], ),
     policy_kwargs=dict(hidden_sizes=[400, 300], ),
 )
 setup_logger('her-sac-fetch-experiment', variant=variant)
 experiment(variant)
            'Reacher-v2',
        ],
        'bins': [9],
        'algo_class': [
            DoubleDQN,
        ],
        'learning_rate': [1e-3, 1e-4],
        'qf_criterion_class': [
            HuberLoss,
        ],
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )
    setup_logger('dqn-images-experiment', variant=variant)
    experiment(variant)

    for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
        #for i in range(2):
        run_experiment(
            experiment,
            variant=variant,
            exp_id=exp_id,
            exp_prefix="dqn-Pusher2D-test",
            mode='ec2',
            # use_gpu=False,
            # exp_prefix="double-vs-dqn-huber-sweep-cartpole",
            # mode='local',
            # use_gpu=True,
        )
示例#7
0
     trainer_kwargs=dict(
         discount=0.99,
     ),
     replay_buffer_kwargs=dict(
         max_size=100000,
         fraction_goals_rollout_goals=0.2,
         fraction_goals_env_goals=0.0,
     ),
     qf_kwargs=dict(
         hidden_sizes=[400, 300],
     ),
     policy_kwargs=dict(
         hidden_sizes=[400, 300],
     ),
 )
 setup_logger('her-td3-pusher-0', variant=variant)
 experiment(variant)
 # search_space = {
 # }
 # sweeper = hyp.DeterministicHyperparameterSweeper(
 #     search_space, default_parameters=variant,
 # )
 #
 # n_seeds = 1
 # mode = 'local'
 # exp_prefix = 'dev-{}'.format(
 #     __file__.replace('/', '-').replace('_', '-').split('.')[0]
 # )
 #
 # n_seeds = 5
 # mode = 'sss'
示例#8
0
            object_high=(x_high - t - t, y_high - t, 0.02),
        ),
        algo_kwargs=dict(
            num_epochs=2000,
            max_path_length=20,
            batch_size=128,
            num_eval_steps_per_epoch=1000,
            num_expl_steps_per_train_loop=1000,
            num_trains_per_train_loop=100,
            min_num_steps_before_training=1000,
        ),
        sac_trainer_kwargs=dict(
            discount=0.99,
            soft_target_tau=5e-3,
            target_update_period=1,
            policy_lr=3E-4,
            qf_lr=3E-4,
            reward_scale=1,
            use_automatic_entropy_tuning=True,
        ),
        replay_buffer_kwargs=dict(
            max_size=int(1E6),
            fraction_goals_rollout_goals=0.2,  # equal to k = 4 in HER paper
            fraction_goals_env_goals=0,
        ),
        qf_kwargs=dict(hidden_sizes=[400, 300], ),
        policy_kwargs=dict(hidden_sizes=[400, 300], ),
    )
    setup_logger('her-sac-pusher', variant=variant)
    experiment(variant)