Пример #1
0
 def _advance_iteration_variables(self):
     """Move all 'cur' variables to 'prev', reinitialize 'cur' variables, and advance iteration counter."""
     Algorithm._advance_iteration_variables(self)
     for m in range(self.M):
         self.cur[m].traj_info.last_kl_step = self.prev[
             m].traj_info.last_kl_step
         self.cur[m].pol_info = copy.deepcopy(self.prev[m].pol_info)
Пример #2
0
 def _advance_iteration_variables(self):
     """
     Move all 'cur' variables to 'prev', reinitialize 'cur'
     variables, and advance iteration counter.
     """
     Algorithm._advance_iteration_variables(self)
     for m in range(self.M):
         self.cur[m].traj_info.last_kl_step = \
                 self.prev[m].traj_info.last_kl_step
         self.cur[m].pol_info = copy.deepcopy(self.prev[m].pol_info)
Пример #3
0
    def __init__(self, hyperparams):
        # ALG_BADMM = {
        #     'inner_iterations': 4,
        #     'policy_dual_rate': 0.1,
        #     'policy_dual_rate_covar': 0.0,
        #     'fixed_lg_step': 0,
        #     'lg_step_schedule': 10.0,
        #     'ent_reg_schedule': 0.0,
        #     'init_pol_wt': 0.01,
        #     'policy_sample_mode': 'add',
        #     'exp_step_increase': 2.0,
        #     'exp_step_decrease': 0.5,
        #     'exp_step_upper': 0.5,
        #     'exp_step_lower': 1.0,
        # }

        # Copy and update parameters
        config = copy.deepcopy(ALG_BADMM)
        # update() adds dictionary dict2's key-values pairs in to dict
        config.update(hyperparams)

        # Initialize (algorithm.py)
        Algorithm.__init__(self, config)

        # algorithm['policy_prior'] = {
        #     'type': PolicyPriorGMM,
        #     'max_clusters': 20,
        #     'min_samples_per_cluster': 40,
        #     'max_samples': 40,
        # }

        policy_prior = self._hyperparams['policy_prior']

        # self._cond_idx = hyperparams['train_conditions']
        # self.M = hyperparams['conditions'] = 2
        for m in range(self.M):
            # self.cur = [IterationData() for _ in range(self.M)]
            # Initialize policy information
            self.cur[m].pol_info = PolicyInfo(self._hyperparams)
            self.cur[m].pol_info.policy_prior = \
                    policy_prior['type'](policy_prior)

        # algorithm['policy_opt'] = {
        #     'type': PolicyOptTf,
        #     'network_params': {
        #         'obs_include': [JOINT_ANGLES, JOINT_VELOCITIES],
        #         'obs_vector_data': [JOINT_ANGLES, JOINT_VELOCITIES],
        #         'sensor_dims': SENSOR_DIMS,
        #     },
        #     'network_model': tf_network,
        #     'iterations': 1000,
        #     'weights_file_prefix': EXP_DIR + 'policy',
        # }
        self.policy_opt = self._hyperparams['policy_opt']['type'](
            self._hyperparams['policy_opt'], self.dO, self.dU)
Пример #4
0
    def __init__(self, hyperparams):
        config = copy.deepcopy(ALG_BADMM)
        config.update(hyperparams)
        Algorithm.__init__(self, config)

        policy_prior = self._hyperparams['policy_prior']
        for m in range(self.M):
            self.cur[m].pol_info = PolicyInfo(self._hyperparams)
            self.cur[m].pol_info.policy_prior = \
                    policy_prior['type'](policy_prior)

        self.policy_opt = self._hyperparams['policy_opt']['type'](
            self._hyperparams['policy_opt'], self.dO, self.dU)
Пример #5
0
    def __init__(self, hyperparams):
        config = copy.deepcopy(ALG_MDGPS)
        config.update(hyperparams)
        Algorithm.__init__(self, config)

        for m in range(self.M):
            self.cur[m].pol_info = PolicyInfo(self._hyperparams)
            policy_prior = self._hyperparams['policy_prior']
            self.cur[m].pol_info.policy_prior = \
                    policy_prior['type'](policy_prior)

        self.policy_opt = self._hyperparams['policy_opt']['type'](
            self._hyperparams['policy_opt'], self.dO, self.dU
        )
Пример #6
0
    def __init__(self, hyperparams):
        config = copy.deepcopy(ALG_OLGPS)
        config.update(hyperparams)
        Algorithm.__init__(self, config)
        self.policy_opt = self._hyperparams['policy_opt']['type'](
            self._hyperparams['policy_opt'], self.dO, self.dU
        )
        self.flag_reset = False

        policy_prior = self._hyperparams['policy_prior']
        for m in range(self.M):
            self.cur[m].last_pol = PolicyInfo(self._hyperparams)
            self.cur[m].last_pol.policy_prior = \
                    policy_prior['type'](policy_prior)
Пример #7
0
 def __init__(self, hyperparams):
     config = copy.deepcopy(ALG_PI2)
     config.update(hyperparams)
     Algorithm.__init__(self, config)
Пример #8
0
 def __init__(self, hyperparams):
     Algorithm.__init__(self, hyperparams)
Пример #9
0
    def __init__(self, hyperparams):
        Algorithm.__init__(self, hyperparams)

        self.policy_opt = self._hyperparams['policy_opt']['type'](
            self._hyperparams['policy_opt'], self.dO, self.dU)