Пример #1
0
                use_linear_dynamics=False,
                is_auto_encoder=False,
                batch_size=64,
                lr=1e-3,
            ),
            save_period=50,
        ),
        num_exps_per_instance=3,
    )

    search_space = {
        'train_vae_variant.beta':[.5, 2.5],
        'env_id':['SawyerPushAndReacherXYEnv-v0'],
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space, default_parameters=variant,
    )

    variants = []

    for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
        # if variant['env_id'] == 'SawyerPushAndReachXYEnv-No-Arena-v0':
        #     variant['train_vae_variant']['generate_vae_dataset_kwargs']['dataset_path'] = \
        #         'datasets/SawyerPushAndReachXYEnv-No-Arena-v0_N5000_sawyer_pusher_camera_upright_v3_imsize84_random_oracle_split_0.npy'
        # else:
        #     variant['train_vae_variant']['generate_vae_dataset_kwargs']['dataset_path'] = \
        #         'datasets/SawyerPushAndReachXYEnv-No-Arena-v1_N5000_sawyer_pusher_camera_upright_v3_imsize84_random_oracle_split_0.npy'
        variants.append(variant)

    run_variants(grill_her_td3_full_experiment, variants, run_id=3)
Пример #2
0
        #     tensorboard=True,
        # ),
        load_demos=True,
        pretrain_policy=True,
        pretrain_rl=True,
        # save_pretrained_algorithm=True,
        # snapshot_mode="all",
        env='pen-sparse-v0',
    )

    experiment(variant)


if __name__ == "__main__":
    variant = dict()

    search_space = {
        'seedid': range(3),
    }

    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )

    variants = []
    for variant in sweeper.iterate_hyperparameters():
        variants.append(variant)

    run_variants(local_exp, variants, run_id=0)
Пример #3
0
        'seedid': range(5),
        'algo_kwargs.base_kwargs.num_updates_per_env_step': [
            16,
        ],
        'replay_buffer_kwargs.fraction_goals_rollout_goals': [
            0.1,
        ],
        'replay_buffer_kwargs.fraction_goals_env_goals': [
            0.5,
        ],
    }

    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )

    # n_seeds = 1
    # mode = 'local'
    # exp_prefix = 'test'

    n_seeds = 1
    mode = 'ec2'
    exp_prefix = 'sawyer_pusher_state_final'

    variants = []
    for variant in sweeper.iterate_hyperparameters():
        variants.append(variant)

    run_variants(her_td3_experiment, variants, run_id=0)
Пример #4
0
                distance_weight=0,
                skew_dataset=False,
                priority_function_kwargs=dict(
                    decoder_distribution='gaussian_identity_variance',
                    sampling_method='importance_sampling',
                    # sampling_method='true_prior_sampling',
                    num_latents_to_sample=10,
                ),
                use_parallel_dataloading=False,
            ),

            save_period=25,
        ),
    )

    search_space = {
        'seedid': range(1),
        'train_vae_variant.representation_size': [(16, 16)], #(3 * objects, 3 * colors)
        'train_vae_variant.beta': [50], #THIS IS A LINEAR INTERPOLATION CURRENTLY
        # 'train_vae_variant.generate_vae_dataset_kwargs.n_random_steps': [2],#, 50, 100],
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space, default_parameters=variant,
    )

    variants = []
    for variant in sweeper.iterate_hyperparameters():
        variants.append(variant)

    run_variants(experiment, variants, run_id=101)
        wrap_mujoco_env=True,
        track_qpos_goal=5,
        do_state_based_exp=False,
        exploration_noise=0.1,
        reward_params=dict(
            type="sparse",
            epsilon=20.0,
        ),
        save_video=False,
    )

    n_seeds = 3

    search_space = {
        'exploration_type': ['epsilon'],
        'env_kwargs.arm_range': [1.0],
        'algo_kwargs.reward_scale': [1],
        'algo_kwargs.discount': [0.99],
        'exploration_noise': [0.2],
        'reward_params.epsilon': [20.0, 100.0, 5.0],
        'replay_kwargs.fraction_goals_are_env_goals': [0.0, 0.5, 1.0],
        'replay_kwargs.fraction_goals_are_rollout_goals': [0.2, 1.0],
        # 'rdim': [2, 4, 8, 16],
        'seedid': range(n_seeds),
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )
    run_variants(experiment, sweeper.iterate_hyperparameters(), run_id=1)
Пример #6
0
                    sampling_method='importance_sampling',
                    # sampling_method='true_prior_sampling',
                    num_latents_to_sample=10,
                ),
                use_parallel_dataloading=False,
            ),
            save_period=25,
        ),
    )

    search_space = {
        'seedid': range(3),
        'grill_variant.reward_params.type': ['latent_distance'],
        'train_vae_variant.representation_size': [
            (4, 4),
        ],  #(3 * objects, 3 * colors)
        'train_vae_variant.beta': [50],
        'train_vae_variant.generate_vae_dataset_kwargs.n_random_steps': [100]
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )

    variants = []
    for variant in sweeper.iterate_hyperparameters():
        variants.append(variant)

    run_variants(grill_her_twin_sac_online_vae_full_experiment,
                 variants,
                 run_id=10)
Пример #7
0
        'env_id': [
            'SawyerPushDebugLEAP-v3',
            'SawyerPushDebugLEAPPuckRew-v3',
        ],
        'rl_variant.use_masks': [True, False],
        'rl_variant.max_path_length': [200],
        'init_camera': [sawyer_xyz_reacher_camera_v0],
        'rl_variant.vis_kwargs.vis_list': [
            [],
        ],
        # 'plt',
        # ]],
        'rl_variant.algo_kwargs.num_trains_per_train_loop': [
            4000,
        ],
        'rl_variant.es_kwargs.max_sigma': [0.2, 0.5, 0.8],
        'seedid': range(3),
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )

    variants = []
    for variant in sweeper.iterate_hyperparameters():
        if not (variant['env_id'] == 'SawyerPushDebugLEAPPuckRew-v3'
                and variant['rl_variant']['use_masks'] == True):
            variants.append(variant)

    run_variants(rl_experiment, variants, run_id=0)
Пример #8
0
        trainer_kwargs=dict(
            lr=1e-3,
            loss_weights=dict(
                mse=1.0,
            )
        ),
        save_period=100,
        slurm_variant=dict(
            timeout_min=48 * 60,
            cpus_per_task=10,
            gpus_per_node=1,
        ),
        num_train_workers=8,
    )

    search_space = {
        "dataset_kwargs.dataset_name": ["doors2", "pour1"],
        "model_kwargs.delta_features": [True, ],
        "model_kwargs.pretrained_features": [True, False, ],
        "model_kwargs.normalize": [True, ],
        "trainer_kwargs.loss_weights.mse": [0.0, 1.0],
        "trainer_kwargs.loss_weights.classification_gradients": [False, True, ],
        "trainer_class": [GeometricTimePredictorTrainer],
        "seedid": range(1),
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space, default_parameters=variant,
    )

    run_variants(train_rfeatures_model, sweeper.iterate_hyperparameters(), run_id=1)
Пример #9
0
            4000,
        ],
        'env_kwargs.reward_type': [
            'hand_and_puck_distance',
        ],
        'policy_kwargs.min_log_std': [
            -6,
        ],
        'expl_mask_distribution_kwargs.weights': [
            (1, 1, 1),
            (0, 0, 1),
            (0, 1, 0),
        ],
        'eval_mask_distribution_kwargs.weights': [
            (1, 0, 0),
            (0, 1, 0),
            (0, 0, 1),
        ],
    }

    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )

    variants = []
    for variant in sweeper.iterate_hyperparameters():
        variants.append(variant)

    run_variants(experiment, variants, process_args)
Пример #10
0
        #     # 'shaped',
        #     'euclidean',
        # ],
        # 'exploration_type': [
        #     'epsilon',
        #     'ou',
        #     'gaussian',
        # ],
        'seed_id':
        range(n_seeds),
        'replay_buffer_kwargs.fraction_resampled_goals_are_env_goals': [
            0.0,
            0.5,
        ],
        'replay_buffer_kwargs.fraction_goals_are_rollout_goals': [
            0.2,
            1.0,
        ],
        'env_kwargs.randomize_position_on_reset': [False, True],
        'env_kwargs.wall_shape': ["u"],
        'algo_kwargs.her_kwargs.rollout_goal_params.exploration_temperature':
        [1.0, 100.0, 1e-2, 1e-4],
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )
    run_variants(her_td3_experiment,
                 sweeper.iterate_hyperparameters(),
                 run_id=0)
Пример #11
0
                skew_config=dict(
                    method='vae_prob',
                    power=0,
                ),
                skew_dataset=False,
                priority_function_kwargs=dict(
                    decoder_distribution='gaussian_identity_variance',
                    sampling_method='importance_sampling',
                    # sampling_method='true_prior_sampling',
                    num_latents_to_sample=10,
                ),
                use_parallel_dataloading=False,
            ),
            save_period=25,
        ),
    )

    search_space = {
        'seedid': range(5),
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )

    variants = []
    for variant in sweeper.iterate_hyperparameters():
        variants.append(variant)

    run_variants(arl_full_experiment, variants, run_id=0)
Пример #12
0
    )

    search_space = {
        'td3_bc_trainer_kwargs.use_awr': [False],
        # 'td3_bc_trainer_kwargs.demo_beta':[1, 10],
        'td3_bc_trainer_kwargs.bc_weight': [1, 0],
        'td3_bc_trainer_kwargs.rl_weight': [1],
        'algo_kwargs.num_epochs': [1000],
        'algo_kwargs.num_eval_steps_per_epoch': [100],
        'algo_kwargs.num_expl_steps_per_train_loop': [100],
        'algo_kwargs.min_num_steps_before_training': [0],
        # 'td3_bc_trainer_kwargs.add_demos_to_replay_buffer':[True, False],
        # 'td3_bc_trainer_kwargs.num_trains_per_train_loop':[1000, 2000, 4000, 10000, 16000],
        # 'exploration_noise':[0.1, .3, .5],
        # 'pretrain_rl':[True],
        # 'pretrain_policy':[False],
        'pretrain_rl': [False],
        'pretrain_policy': [False],
        'seedid': range(5),
    }

    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space, default_parameters=variant,
    )

    variants = []
    for variant in sweeper.iterate_hyperparameters():
        variants.append(variant)

    run_variants(state_td3bc_experiment, variants, run_id=0)
                skew_config=dict(
                    method='vae_prob',
                    power=0,
                ),
                skew_dataset=False,
                priority_function_kwargs=dict(
                    decoder_distribution='gaussian_identity_variance',
                    sampling_method='importance_sampling',
                    # sampling_method='true_prior_sampling',
                    num_latents_to_sample=10,
                ),
                use_parallel_dataloading=False,
            ),

            save_period=25,
        ),
    )

    search_space = {
        'seedid': range(5),
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space, default_parameters=variant,
    )

    variants = []
    for variant in sweeper.iterate_hyperparameters():
        variants.append(variant)

    run_variants(grill_her_twin_sac_full_experiment, variants, run_id=0)
Пример #14
0
                    decoder_distribution='gaussian_identity_variance',
                    sampling_method='importance_sampling',
                    # sampling_method='true_prior_sampling',
                    num_latents_to_sample=10,
                ),
                use_parallel_dataloading=False,
            ),

            save_period=25,
        ),
        region="us-west-2",
    )

    search_space = {
        'seedid': range(2),
        'grill_variant.algo_kwargs.rl_offpolicy_num_training_steps': [10, ],
        # 'grill_variant.reward_params.type':['latent_bound'], #, 'latent_distance'
        # 'train_vae_variant.representation_size': [(6, 8)], #(3 * objects, 3 * colors)
        # 'train_vae_variant.beta': [1],
        # 'train_vae_variant.generate_vae_dataset_kwargs.n_random_steps': [100]
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space, default_parameters=variant,
    )

    variants = []
    for variant in sweeper.iterate_hyperparameters():
        variants.append(variant)

    run_variants(grill_her_td3_offpolicy_online_vae_full_experiment, variants, run_id=0)
Пример #15
0
        ),
        policy_kwargs=dict(
            hidden_sizes=[32, 32],
        ),

        save_video=True,
        dump_video_kwargs=dict(
            save_period=1,
            # imsize=(3, 500, 300),
        ),
        desired_trajectory="/home/anair/ros_ws/src/railrl-private/demos/door_demos_v3/demo_v3_0.pkl",

        logger_variant=dict(
            tensorboard=True,
        ),
        model_path="/home/anair/data/s3doodad/facebook/models/rfeatures/multitask1/run2/id2/itr_4000.pt",
    )

    search_space = {
        'seedid': range(1),
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space, default_parameters=variant,
    )

    variants = []
    for variant in sweeper.iterate_hyperparameters():
        variants.append(variant)

    run_variants(encoder_wrapped_td3bc_experiment, variants, run_id=3)
Пример #16
0
                    method='vae_prob',
                    power=0,
                ),
                skew_dataset=False,
                priority_function_kwargs=dict(
                    decoder_distribution='gaussian_identity_variance',
                    sampling_method='importance_sampling',
                    # sampling_method='true_prior_sampling',
                    num_latents_to_sample=10,
                ),
                use_parallel_dataloading=False,
            ),

            save_period=25,
        ),
    )

    search_space = {
        'seedid': range(5),
        'train_vae_variant.representation_size': [2, 4, 8, 16],
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space, default_parameters=variant,
    )

    variants = []
    for variant in sweeper.iterate_hyperparameters():
        variants.append(variant)

    run_variants(experiment, variants, run_id=2)
Пример #17
0
                priority_function_kwargs=dict(
                    decoder_distribution='gaussian_identity_variance',
                    sampling_method='importance_sampling',
                    # sampling_method='true_prior_sampling',
                    num_latents_to_sample=10,
                ),
                use_parallel_dataloading=False,
            ),
            save_period=25,
        ),
        region="us-west-1",
    )

    search_space = {
        'seedid': range(5),
        # 'grill_variant.reward_params.type':['latent_bound'], #, 'latent_distance'
        # 'train_vae_variant.representation_size': [(6, 8)], #(3 * objects, 3 * colors)
        # 'train_vae_variant.beta': [1],
        # 'train_vae_variant.generate_vae_dataset_kwargs.n_random_steps': [100]
    }
    sweeper = hyp.DeterministicHyperparameterSweeper(
        search_space,
        default_parameters=variant,
    )

    variants = []
    for variant in sweeper.iterate_hyperparameters():
        variants.append(variant)

    run_variants(grill_her_td3_online_vae_full_experiment, variants, run_id=2)