Пример #1
0
    def __init__(self,
                 env,
                 num_init_random_rollouts=10,
                 max_rollout_length=500,
                 num_onplicy_iters=10,
                 num_onpolicy_rollouts=10,
                 training_epochs=60,
                 training_batch_size=512,
                 render=False,
                 mpc_horizon=15,
                 num_random_action_selection=4096,
                 nn_layers=1):
        self._env = env
        self._max_rollout_length = max_rollout_length
        self._num_onpolicy_iters = num_onplicy_iters
        self._num_onpolicy_rollouts = num_onpolicy_rollouts
        self._training_epochs = training_epochs
        self._training_batch_size = training_batch_size
        self._render = render

        logger.info('Gathering random dataset')
        self._random_dataset = self._gather_rollouts(utils.RandomPolicy(env),
                                                     num_init_random_rollouts)

        logger.info('Creating policy')
        self._policy = ModelBasedPolicy(
            env,
            self._random_dataset,
            horizon=mpc_horizon,
            num_random_action_selection=num_random_action_selection)

        timeit.reset()
        timeit.start('total')
Пример #2
0
 def _log(self, dataset):
     timeit.stop('total')
     dataset.log()
     logger.dump_tabular(print_func=logger.info)
     logger.debug('')
     for line in str(timeit).split('\n'):
         logger.debug(line)
     timeit.reset()
     timeit.start('total')
Пример #3
0
    def _log(self, dataset):
        # stop timing
        timeit.stop('total')

        # print logging information
        dataset.log()
        logger.dump_tabular(print_func=logger.info)
        logger.debug('')
        for line in str(timeit).split('\n'):
            logger.debug(line)

        # reset timing
        timeit.reset()
        timeit.start('total')