示例#1
0
    def train(self, **kwargs):
        """
        Run the training algorithm to optimize model parameters for the
        environment provided.
        """
        # define default parameters for each training algorithm, then perturb them based on user input
        preset_kwargs = PRESETS[
            self.training_alg]  # select default kwargs for the algo
        preset_kwargs.update(
            kwargs)  # update default algo kwargs based on user input
        render_saves = preset_kwargs.get('render_saves', False)
        if 'render_saves' in preset_kwargs.keys():
            preset_kwargs.pop('render_saves')

        # dynamically import source code (e.g. import algos.vpg.vpg as mod)
        mod = import_module("algos.{}.{}".format(self.training_alg,
                                                 self.training_alg))
        method = getattr(
            mod, self.training_alg)  # e.g. from algos.vpg.vpg import vpg

        if self.actorCritic is None:
            # use the default actorCritic for the algo
            core = import_module("algos.{}.core".format(
                self.training_alg))  # e.g. import algos.vpg.core as core
            self.actorCritic = getattr(
                core, DEFAULT_ACTOR_CRITIC[self.training_alg]
            )  # e.g. from core import MLPActorCritic as actorCritic

        # prepare mpi if self.ncpu > 1 (and supported by chosen RL algorithm)
        mpi_fork(self.ncpu)  # run parallel code with mpi

        # update logger kwargs
        logger_kwargs = setup_logger_kwargs(self.exp_name,
                                            preset_kwargs['seed'])
        preset_kwargs['logger_kwargs'] = logger_kwargs

        # begin training
        method(self.env, actor_critic=self.actorCritic, **preset_kwargs)

        # render all checkpoints user specifies with 'render_saves'
        if render_saves:
            log_dir = logger_kwargs['output_dir'] + os.sep + 'pyt_save' + os.sep
            fnames = glob.glob(
                log_dir + 'model*.pt'
            )[1:]  # first item in list is final checkpoint, with no itr in file name
            for checkpoint in fnames:
                itr = re.search('model(.*).pt', checkpoint).group(
                    1)  # get epoch number from file name
                render_kwargs = {
                    'filename': '/gym_animation_' + str(itr) + '.mp4',
                    'model_itr': itr
                }
                self.render(save=True,
                            show=False,
                            seed=self.seed,
                            **render_kwargs)
示例#2
0
    def thunk_plus():
        # Make 'env_fn' from 'env_name'
        if 'env_name' in kwargs:
            import gym
            env_name = kwargs['env_name']
            kwargs['env_fn'] = lambda: gym.make(env_name)
            del kwargs['env_name']

        # Fork into multiple processes
        mpi_fork(num_cpu)

        # Run thunk
        thunk(**kwargs)
示例#3
0
def main():
    model_path = "experiments/20210403_19:22:15_ppo"
    # model_path = None
    agent_file = "environments/3DBall_single/3DBall_single.x86_64"
    if model_path is None:
        cpus = 8
        mpi_fork(cpus)
        ppo = PPO(lambda: train_environment(agent_file), PPOActorCritic)
        ppo.train()
    else:
        cpus = 1
        mpi_fork(cpus)
        ppo = PPO(lambda: inference_environment(agent_file), PPOActorCritic)
        test_episodes = 10
        ppo.test_model(model_path, test_episodes)
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--env', type=str, default='Pendulum-v0')
    parser.add_argument('--hid', type=int, default=64)
    parser.add_argument('--l', type=int, default=2)
    parser.add_argument('--gamma', type=float, default=0.99)
    parser.add_argument('--seed', '-s', type=int, default=0)
    parser.add_argument('--cpu', type=int, default=4)
    parser.add_argument('--steps', type=int, default=4000)
    parser.add_argument('--epochs', type=int, default=70)
    parser.add_argument('--exp_name', type=str, default='vpg')
    args, _ = parser.parse_known_args()

    mpi_fork(args.cpu)  # run parallel code with mpi

    vpg(gym.make(args.env),
        ac_kwargs=dict(hidden_sizes=[args.hid] * args.l),
        gamma=args.gamma,
        seed=args.seed,
        steps_per_epoch=args.steps,
        epochs=args.epochs)
示例#5
0
def main():
    model_path = None
    agent_file = "worm/worm.x86_64"
    if model_path is None:
        cpus = 4
        mpi_fork(cpus)

        no_graphics = True if proc_id() != 0 else False
        env_fn = lambda: WormGymWrapper(agent_file, no_graphics)
        ppo = PPO(env_fn, PPOActorCritic, epochs=5)
        if proc_id() == 0:
            with mlflow.start_run() as run:
                ppo.train()
        else:
            ppo.train()
    else:
        cpus = 1
        mpi_fork(cpus)
        env_fn = lambda: WormGymWrapper(
            agent_file, time_scale=1., no_graphics=False)
        ppo = PPO(env_fn, PPOActorCritic)
        test_episodes = 10
        ppo.test_model(model_path, test_episodes)
示例#6
0
    logger.output_file.close()


if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--env', type=str, default='HalfCheetah-v2')
    parser.add_argument('--hid', type=int, default=64)
    parser.add_argument('--l', type=int, default=2)
    parser.add_argument('--gamma', type=float, default=0.99)
    parser.add_argument('--seed', '-s', type=int, default=0)
    parser.add_argument('--cpu', type=int, default=4)
    parser.add_argument('--steps', type=int, default=4000)
    parser.add_argument('--epochs', type=int, default=50)
    parser.add_argument('--exp_name', type=str, default='ppo')
    args = parser.parse_args()

    mpi_fork(args.cpu)  # run parallel code with mpi

    from spinup.utils.run_utils import setup_logger_kwargs
    logger_kwargs = setup_logger_kwargs(args.exp_name, args.seed)

    ppo(lambda: gym.make(args.env),
        actor_critic=core.MLPActorCritic,
        ac_kwargs=dict(hidden_sizes=[args.hid] * args.l),
        gamma=args.gamma,
        seed=args.seed,
        steps_per_epoch=args.steps,
        epochs=args.epochs,
        logger_kwargs=logger_kwargs)
示例#7
0
if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--env', type=str, default='HalfCheetah-v2')
    parser.add_argument('--hid', type=int, default=64)
    parser.add_argument('--l', type=int, default=2)
    parser.add_argument('--gamma', type=float, default=0.99)
    parser.add_argument('--lam', type=float, default=0.97)
    parser.add_argument('--seed', '-s', type=int, default=0)
    parser.add_argument('--cpu', type=int, default=8)
    parser.add_argument('--episodes-per-epoch', type=int, default=40)
    parser.add_argument('--epochs', type=int, default=1000)
    parser.add_argument('--exp_name', type=str, default='gailt')
    args = parser.parse_args()

    mpi_fork(args.cpu)

    from utils.run_utils import setup_logger_kwargs
    logger_kwargs = setup_logger_kwargs(args.exp_name, args.seed)

    # policyg(lambda: gym.make(args.env), actor_critic=ActorCritic, ac_kwargs=dict(hidden_dims=[args.hid]*args.l),
    #     gamma=args.gamma, lam=args.lam, seed=args.seed, episodes_per_epoch=args.episodes_per_epoch,
    #     epochs=args.epochs, logger_kwargs=logger_kwargs)

    gail(lambda: gym.make(args.env),
         actor_critic=ActorCritic,
         ac_kwargs=dict(hidden_dims=[args.hid] * args.l),
         disc=Discriminator,
         dc_kwargs=dict(hidden_dims=[args.hid] * args.l),
         gamma=args.gamma,
         lam=args.lam,