示例#1
0
def run_experiment(**kwargs):
    exp_dir = os.getcwd() + '/data/' + EXP_NAME + '/' + kwargs.get(
        'exp_name', '')
    print("\n---------- experiment with dir {} ---------------------------".
          format(exp_dir))
    logger.configure(dir=exp_dir,
                     format_strs=['stdout', 'log', 'csv'],
                     snapshot_mode='last')
    json.dump(kwargs,
              open(exp_dir + '/params.json', 'w'),
              indent=2,
              sort_keys=True,
              cls=ClassEncoder)
    os.makedirs(exp_dir + '/Data/', exist_ok=True)
    os.makedirs(exp_dir + '/Model/', exist_ok=True)
    os.makedirs(exp_dir + '/Policy/', exist_ok=True)
    json.dump(kwargs,
              open(exp_dir + '/Data/params.json', 'w+'),
              indent=2,
              sort_keys=True,
              cls=ClassEncoder)
    json.dump(kwargs,
              open(exp_dir + '/Model/params.json', 'w+'),
              indent=2,
              sort_keys=True,
              cls=ClassEncoder)
    json.dump(kwargs,
              open(exp_dir + '/Policy/params.json', 'w+'),
              indent=2,
              sort_keys=True,
              cls=ClassEncoder)
    run_base(exp_dir, **kwargs)
def run_experiment(**kwargs):
    exp_dir = os.getcwd() + '/data/' + EXP_NAME + '/' + kwargs.get(
        'exp_name', 'tmp')
    print("\n---------- experiment with dir {} ---------------------------".
          format(exp_dir))
    logger.configure(dir=exp_dir,
                     format_strs=['stdout', 'log', 'csv'],
                     snapshot_mode='last')
    json.dump(kwargs,
              open(exp_dir + '/params.json', 'w'),
              indent=2,
              sort_keys=True,
              cls=ClassEncoder)
    if 'num_data_workers' in kwargs:
        for idx in range(kwargs['num_data_workers']):
            os.makedirs(exp_dir + f'/Data-{idx}/', exist_ok=True)
            json.dump(kwargs,
                      open(exp_dir + f'/Data-{idx}/params.json', 'w+'),
                      indent=2,
                      sort_keys=True,
                      cls=ClassEncoder)
        for idx in range(kwargs['num_model_workers']):
            os.makedirs(exp_dir + f'/Model-{idx}/', exist_ok=True)
            json.dump(kwargs,
                      open(exp_dir + f'/Model-{idx}/params.json', 'w+'),
                      indent=2,
                      sort_keys=True,
                      cls=ClassEncoder)
        for idx in range(kwargs['num_policy_workers']):
            os.makedirs(exp_dir + f'/Policy-{idx}/', exist_ok=True)
            json.dump(kwargs,
                      open(exp_dir + f'/Policy-{idx}/params.json', 'w+'),
                      indent=2,
                      sort_keys=True,
                      cls=ClassEncoder)
    else:
        os.makedirs(exp_dir + '/Data/', exist_ok=True)
        os.makedirs(exp_dir + '/Model/', exist_ok=True)
        os.makedirs(exp_dir + '/Policy/', exist_ok=True)
        json.dump(kwargs,
                  open(exp_dir + '/Data/params.json', 'w+'),
                  indent=2,
                  sort_keys=True,
                  cls=ClassEncoder)
        json.dump(kwargs,
                  open(exp_dir + '/Model/params.json', 'w+'),
                  indent=2,
                  sort_keys=True,
                  cls=ClassEncoder)
        json.dump(kwargs,
                  open(exp_dir + '/Policy/params.json', 'w+'),
                  indent=2,
                  sort_keys=True,
                  cls=ClassEncoder)
    run_base(exp_dir, **kwargs)
示例#3
0
 def __init__(
     self,
     name,
     exp_dir,
     n_itr,
     stop_cond,
     verbose=True,
 ):
     self.name = name
     logger.configure(dir=exp_dir + '/' + name,
                      format_strs=['csv', 'stdout', 'log'])
     self.n_itr = n_itr
     self.stop_cond = stop_cond
     self.verbose = verbose
     self.step_counter, self.synch_counter = 0, 0
     self.sess = None
示例#4
0
def run_experiment(**kwargs):
    exp_dir = os.getcwd() + '/data/' + EXP_NAME + kwargs.get('exp_name', '')
    logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'], snapshot_mode='last')
    json.dump(kwargs, open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True, cls=ClassEncoder)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = kwargs.get('gpu_frac', 0.95)
    sess = tf.Session(config=config)
    with sess.as_default() as sess:
        # Instantiate classes
        set_seed(kwargs['seed'])

        baseline = kwargs['baseline']()

        env = normalize(kwargs['env']()) # Wrappers?

        policy = GaussianMLPPolicy(
            name="meta-policy",
            obs_dim=np.prod(env.observation_space.shape),
            action_dim=np.prod(env.action_space.shape),
            hidden_sizes=kwargs['policy_hidden_sizes'],
            learn_std=kwargs['policy_learn_std'],
            hidden_nonlinearity=kwargs['policy_hidden_nonlinearity'],
            output_nonlinearity=kwargs['policy_output_nonlinearity'],
        )

        dynamics_model = MLPDynamicsEnsemble('dynamics-ensemble',
                                             env=env,
                                             num_models=kwargs['num_models'],
                                             hidden_nonlinearity=kwargs['dyanmics_hidden_nonlinearity'],
                                             hidden_sizes=kwargs['dynamics_hidden_sizes'],
                                             output_nonlinearity=kwargs['dyanmics_output_nonlinearity'],
                                             learning_rate=kwargs['dynamics_learning_rate'],
                                             batch_size=kwargs['dynamics_batch_size'],
                                             buffer_size=kwargs['dynamics_buffer_size'],
                                             rolling_average_persitency=kwargs['rolling_average_persitency']
                                             )

        env_sampler = Sampler(
            env=env,
            policy=policy,
            num_rollouts=kwargs['num_rollouts'],
            max_path_length=kwargs['max_path_length'],
            n_parallel=kwargs['n_parallel'],
        )

        model_sampler = METRPOSampler(
            env=env,
            policy=policy,
            dynamics_model=dynamics_model,
            num_rollouts=kwargs['imagined_num_rollouts'],
            max_path_length=kwargs['max_path_length'],
            deterministic=kwargs['deterministic'],
        )

        dynamics_sample_processor = ModelSampleProcessor(
            baseline=baseline,
            discount=kwargs['discount'],
            gae_lambda=kwargs['gae_lambda'],
            normalize_adv=kwargs['normalize_adv'],
            positive_adv=kwargs['positive_adv'],
        )

        model_sample_processor = SampleProcessor(
            baseline=baseline,
            discount=kwargs['discount'],
            gae_lambda=kwargs['gae_lambda'],
            normalize_adv=kwargs['normalize_adv'],
            positive_adv=kwargs['positive_adv'],
        )

        algo = TRPO(
            policy=policy,
            step_size=kwargs['step_size'],
        )

        trainer = Trainer(
            algo=algo,
            policy=policy,
            env=env,
            model_sampler=model_sampler,
            env_sampler=env_sampler,
            model_sample_processor=model_sample_processor,
            dynamics_sample_processor=dynamics_sample_processor,
            dynamics_model=dynamics_model,
            n_itr=kwargs['n_itr'],
            dynamics_model_max_epochs=kwargs['dynamics_max_epochs'],
            log_real_performance=kwargs['log_real_performance'],
            steps_per_iter=kwargs['steps_per_iter'],
            sample_from_buffer=kwargs['sample_from_buffer'],
            sess=sess,
        )

        trainer.train()
示例#5
0
def run_experiment(**kwargs):
    exp_dir = os.getcwd() + '/data/' + EXP_NAME
    logger.configure(dir=exp_dir,
                     format_strs=['stdout', 'log', 'csv'],
                     snapshot_mode='last')
    json.dump(kwargs,
              open(exp_dir + '/params.json', 'w'),
              indent=2,
              sort_keys=True,
              cls=ClassEncoder)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = kwargs.get(
        'gpu_frac', 0.95)
    sess = tf.Session(config=config)
    with sess.as_default() as sess:

        # Instantiate classes
        set_seed(kwargs['seed'])

        baseline = kwargs['baseline']()

        env = normalize(kwargs['env']())

        policy = GaussianMLPPolicy(
            name="policy",
            obs_dim=np.prod(env.observation_space.shape),
            action_dim=np.prod(env.action_space.shape),
            hidden_sizes=kwargs['hidden_sizes'],
            learn_std=kwargs['learn_std'],
            hidden_nonlinearity=kwargs['hidden_nonlinearity'],
            output_nonlinearity=kwargs['output_nonlinearity'],
            init_std=kwargs['init_std'],
            squashed=kwargs['squashed'])

        # Load policy here

        sampler = Sampler(
            env=env,
            policy=policy,
            num_rollouts=kwargs['num_rollouts'],
            max_path_length=kwargs['max_path_length'],
            n_parallel=kwargs['n_parallel'],
        )

        sample_processor = SingleSampleProcessor(
            baseline=baseline,
            discount=kwargs['discount'],
            gae_lambda=kwargs['gae_lambda'],
            normalize_adv=kwargs['normalize_adv'],
            positive_adv=kwargs['positive_adv'],
        )

        algo = PPO(
            policy=policy,
            learning_rate=kwargs['learning_rate'],
            clip_eps=kwargs['clip_eps'],
            max_epochs=kwargs['num_ppo_steps'],
            entropy_bonus=kwargs['entropy_bonus'],
        )

        trainer = Trainer(
            algo=algo,
            policy=policy,
            env=env,
            sampler=sampler,
            sample_processor=sample_processor,
            n_itr=kwargs['n_itr'],
            sess=sess,
        )

        trainer.train()
示例#6
0
    def __call__(
        self,
        exp_dir,
        policy_pickle,
        env_pickle,
        baseline_pickle,
        dynamics_model_pickle,
        feed_dict,
        queue_prev,
        queue,
        queue_next,
        remote,
        start_itr,
        n_itr,
        stop_cond,
        need_query,
        auto_push,
        config,
    ):
        time_start = time.time()

        self.name = current_process().name
        logger.configure(dir=exp_dir + '/' + self.name,
                         format_strs=['csv', 'stdout', 'log'],
                         snapshot_mode=self.snapshot_mode,
                         snapshot_gap=self.snapshot_gap)

        self.n_itr = n_itr
        self.queue_prev = queue_prev
        self.queue = queue
        self.queue_next = queue_next
        self.stop_cond = stop_cond

        # FIXME: specify CPU/GPU usage here

        import tensorflow as tf

        def _init_vars():
            sess = tf.get_default_session()
            sess.run(tf.initializers.global_variables())

        with tf.Session(config=config).as_default():

            self.construct_from_feed_dict(
                policy_pickle,
                env_pickle,
                baseline_pickle,
                dynamics_model_pickle,
                feed_dict,
            )

            _init_vars()

            # warm up
            self.itr_counter = start_itr
            if self.verbose:
                print('{} waiting for starting msg from trainer...'.format(
                    self.name))
            assert remote.recv() == 'prepare start'
            self.prepare_start()
            remote.send('loop ready')
            logger.dumpkvs()
            logger.log("\n============== {} is ready =============".format(
                self.name))

            assert remote.recv() == 'start loop'
            total_push, total_synch, total_step = 0, 0, 0
            while not self.stop_cond.is_set():
                if self.verbose:
                    logger.log(
                        "\n------------------------- {} starting new loop ------------------"
                        .format(self.name))
                if need_query:  # poll
                    time_poll = time.time()
                    queue_prev.put('push')
                    time_poll = time.time() - time_poll
                    logger.logkv('{}-TimePoll'.format(self.name), time_poll)
                do_push, do_synch, do_step = self.process_queue()
                # step
                if do_step:
                    self.itr_counter += 1
                    self.step()
                    if auto_push:
                        do_push += 1
                        self.push()
                    # Assuming doing autopush for all
                    assert do_push == 1
                    assert do_step == 1

                total_push += do_push
                total_synch += do_synch
                total_step += do_step
                logger.logkv(self.name + '-TimeSoFar',
                             time.time() - time_start)
                logger.logkv(self.name + '-TotalPush', total_push)
                logger.logkv(self.name + '-TotalSynch', total_synch)
                logger.logkv(self.name + '-TotalStep', total_step)
                if total_synch > 0:
                    logger.logkv(self.name + '-StepPerSynch',
                                 total_step / total_synch)
                logger.dumpkvs()
                logger.log(
                    "\n========================== {} {}, total {} ==================="
                    .format(
                        self.name,
                        (do_push, do_synch, do_step),
                        (total_push, total_synch, total_step),
                    ))
                self.set_stop_cond()

            remote.send('loop done')

        logger.log("\n================== {} closed ===================".format(
            self.name))

        remote.send('worker closed')