Exemplo n.º 1
0
def experiment(variant, prev_exp_state=None):

    domain = variant['domain']
    seed = variant['seed']
    goal = variant['goal']

    expl_env = env_producer(domain, seed, goal)

    env_max_action = float(expl_env.action_space.high[0])
    obs_dim = expl_env.observation_space.low.size
    action_dim = expl_env.action_space.low.size
    vae_latent_dim = 2 * action_dim
    mlp_enconder_input_size = 2 * obs_dim + action_dim + 1

    print('------------------------------------------------')
    print('obs_dim', obs_dim)
    print('action_dim', action_dim)
    print('------------------------------------------------')

    # Network module from tiMe

    mlp_enconder = MlpEncoder(hidden_sizes=[200, 200, 200],
                              input_size=mlp_enconder_input_size,
                              output_size=2 * variant['latent_dim'])

    context_encoder = ProbabilisticContextEncoder(mlp_enconder,
                                                  variant['latent_dim'])

    qf1 = FlattenMlp(
        hidden_sizes=variant['Qs_hidden_sizes'],
        input_size=obs_dim + action_dim + variant['latent_dim'],
        output_size=1,
    )
    target_qf1 = FlattenMlp(
        hidden_sizes=variant['Qs_hidden_sizes'],
        input_size=obs_dim + action_dim + variant['latent_dim'],
        output_size=1,
    )
    qf2 = FlattenMlp(
        hidden_sizes=variant['Qs_hidden_sizes'],
        input_size=obs_dim + action_dim + variant['latent_dim'],
        output_size=1,
    )
    target_qf2 = FlattenMlp(
        hidden_sizes=variant['Qs_hidden_sizes'],
        input_size=obs_dim + action_dim + variant['latent_dim'],
        output_size=1,
    )
    vae_decoder = VaeDecoder(
        max_action=env_max_action,
        hidden_sizes=variant['vae_hidden_sizes'],
        input_size=obs_dim + vae_latent_dim + variant['latent_dim'],
        output_size=action_dim,
    )
    perturbation_generator = PerturbationGenerator(
        max_action=env_max_action,
        hidden_sizes=variant['perturbation_hidden_sizes'],
        input_size=obs_dim + action_dim + variant['latent_dim'],
        output_size=action_dim,
    )

    # Load the params obtained by tiMe
    ss = load_gzip_pickle(variant['path_to_snapshot'])
    ss = ss['trainer']

    encoder_state_dict = OrderedDict()
    for key, value in ss['context_encoder_state_dict'].items():
        if 'mlp_encoder' in key:
            encoder_state_dict[key.replace('mlp_encoder.', '')] = value

    mlp_enconder.load_state_dict(encoder_state_dict)

    qf1.load_state_dict(ss['Qs_state_dict'])

    target_qf1.load_state_dict(ss['Qs_state_dict'])

    qf2.load_state_dict(ss['Qs_state_dict'])

    target_qf2.load_state_dict(ss['Qs_state_dict'])

    vae_decoder.load_state_dict(ss['vae_decoder_state_dict'])

    perturbation_generator.load_state_dict(ss['perturbation_generator_dict'])

    tiMe_path_collector = tiMeSampler(
        expl_env,
        context_encoder,
        qf1,
        vae_decoder,
        perturbation_generator,
        vae_latent_dim=vae_latent_dim,
        candidate_size=variant['candidate_size'],
    )
    tiMe_path_collector.to(ptu.device)

    # Get producer function for policy
    policy_producer = get_policy_producer(
        obs_dim, action_dim, hidden_sizes=variant['policy_hidden_sizes'])
    # Finished getting producer

    remote_eval_path_collector = RemoteMdpPathCollector.remote(
        domain, seed * 10 + 1, goal, policy_producer)
    expl_path_collector = MdpPathCollector(expl_env, )
    replay_buffer = ReplayBuffer(variant['replay_buffer_size'],
                                 ob_space=expl_env.observation_space,
                                 action_space=expl_env.action_space)
    trainer = SACTrainer(policy_producer,
                         qf1=qf1,
                         target_qf1=target_qf1,
                         qf2=qf2,
                         target_qf2=target_qf2,
                         action_space=expl_env.action_space,
                         **variant['trainer_kwargs'])

    algorithm = BatchRLAlgorithm(
        trainer=trainer,
        exploration_data_collector=expl_path_collector,
        remote_eval_data_collector=remote_eval_path_collector,
        tiMe_data_collector=tiMe_path_collector,
        replay_buffer=replay_buffer,
        optimistic_exp_hp=variant['optimistic_exp'],
        **variant['algorithm_kwargs'])

    algorithm.to(ptu.device)

    start_epoch = prev_exp_state['epoch'] + \
        1 if prev_exp_state is not None else 0

    algorithm.train(start_epoch)
Exemplo n.º 2
0
def run_experiment_here(
        experiment_function,
        variant,
        seed=None,
        use_gpu=True,
        gpu_id=0,

        # Logger params:
        snapshot_mode='last',
        snapshot_gap=1,

        force_randomize_seed=False,
        log_dir=None,
):
    """
    Run an experiment locally without any serialization.

    :param experiment_function: Function. `variant` will be passed in as its
    only argument.
    :param exp_prefix: Experiment prefix for the save file.
    :param variant: Dictionary passed in to `experiment_function`.
    :param exp_id: Experiment ID. Should be unique across all
    experiments. Note that one experiment may correspond to multiple seeds,.
    :param seed: Seed used for this experiment.
    :param use_gpu: Run with GPU. By default False.
    :param script_name: Name of the running script
    :param log_dir: If set, set the log directory to this. Otherwise,
    the directory will be auto-generated based on the exp_prefix.
    :return:
    """
    torch.set_num_threads(1)

    if force_randomize_seed or seed is None:
        seed = random.randint(0, 100000)
        variant['seed'] = str(seed)

    log_dir = variant['log_dir']

    # The logger's default mode is to
    # append to the text file if the file already exists
    # So this would not override and erase any existing
    # log file in the same log dir.
    logger.reset()
    setup_logger(
        snapshot_mode=snapshot_mode,
        snapshot_gap=snapshot_gap,
        log_dir=log_dir,
    )

    # Assume this file is at the top level of the repo
    git_infos = get_git_infos([osp.dirname(__file__)])

    run_experiment_here_kwargs = dict(
        variant=variant,
        seed=seed,
        use_gpu=use_gpu,
        snapshot_mode=snapshot_mode,
        snapshot_gap=snapshot_gap,
        git_infos=git_infos,
    )

    exp_setting = dict(
        run_experiment_here_kwargs=run_experiment_here_kwargs
    )

    exp_setting_pkl_path = osp.join(log_dir, 'experiment.pkl')

    # Check if existing result exists
    prev_exp_state = None

    if osp.isfile(exp_setting_pkl_path):
        # Sanity check to make sure the experimental setting
        # of the saved data and the current experiment run is the same
        prev_exp_setting = load_pkl(exp_setting_pkl_path)

        logger.log(f'Log dir is not empty: {os.listdir(log_dir)}')

        if prev_exp_setting != exp_setting:
            logger.log("""Previous experimental setting is not
                        the same as the current experimental setting.
                        Very risky to try to reload the previous state.
                        Exitting""")
            logger.log(f'Previous: {prev_exp_setting}')
            logger.log(f'Current: {exp_setting}')
            exit(1)

        try:
            prev_exp_state = load_gzip_pickle(
                osp.join(log_dir, 'params.zip_pkl'))

            logger.log('Trying to restore the state of the experiment program')

        except FileNotFoundError:
            logger.log("""There is no previous experiment state available.
                            Do not try to restore.""")

            prev_exp_state = None

    # Log the variant
    logger.log("Variant:")
    logger.log(json.dumps(dict_to_safe_json(variant), indent=2))
    variant_log_path = osp.join(log_dir, 'variant.json')
    logger.log_variant(variant_log_path, variant)

    # Save the current experimental setting
    dump_pkl(exp_setting_pkl_path, exp_setting)
    log_git_infos(git_infos, log_dir)

    logger.log(f'Seed: {seed}')
    set_seed(seed)

    logger.log(f'Using GPU: {use_gpu}')
    set_gpu_mode(use_gpu, gpu_id)

    return experiment_function(variant, prev_exp_state)
 def load_from_gzip(self, filename):
     self.storage = np.array(load_gzip_pickle(filename))
     self.buffer_size = len(self.storage)
Exemplo n.º 4
0
            num_trans_context=variant['num_trans_context'],
            in_mdp_batch_size=variant['in_mdp_batch_size'],
            use_next_obs_in_context=variant['use_next_obs_in_context'],
        )

        buffer_loader_id_list.append(rp_buffer.load_from_gzip.remote(filename))
        bcq_buffers.append(rp_buffer)
    ray.get(buffer_loader_id_list)

    assert len(bcq_buffers) == len(idx_list)

    # Load ensemble parameters
    ensemble_params_list = []
    for idx in idx_list:
        params_dir = ensemble_params_dir + str(idx) + '/itr_200.zip_pkl'
        params = load_gzip_pickle(params_dir)
        ensemble_params_list.extend(
            params['trainer']['network_ensemble_state_dict'])

    # set up logger
    variant['log_dir'] = get_log_dir(variant)

    logger.reset()
    setup_logger(log_dir=variant['log_dir'],
                 snapshot_gap=100,
                 snapshot_mode="gap")

    logger.log(f"Seed: {seed}")
    set_seed(seed)

    logger.log(f'Using GPU: {True}')