def __init__(self): variables = { 'dynamics': None, # Dynamics object for the current iteration. 'x0mu': None, # Mean for the initial state, used by the dynamics. 'x0sigma': None, # Covariance for the initial state distribution. 'cc': None, # Cost estimate constant term. 'cv': None, # Cost estimate vector term. 'Cm': None, # Cost estimate matrix term. 'last_kl_step': float('inf'), # KL step of the previous iteration. } BundleType.__init__(self, variables)
def __init__(self): variables = { 'sample_list': None, # List of samples for the current iteration. 'traj_info': None, # Current TrajectoryInfo object. 'pol_info': None, # Current PolicyInfo object. 'traj_distr': None, # Initial trajectory distribution. 'cs': None, # Sample costs of the current iteration. 'step_mult': 1.0, # KL step multiplier for the current iteration. 'eta': 1.0, # Dual variable used in LQR backward pass. } BundleType.__init__(self, variables)
def __init__(self): variables = { 'sample_list': None, # List of samples for the current iteration. 'traj_info': None, # Current TrajectoryInfo object. 'pol_info': None, # Current PolicyInfo object. 'traj_distr': None, # Initial trajectory distribution. 'new_traj_distr': None, # Updated trajectory distribution. 'cs': None, # Sample costs of the current iteration. 'step_mult': 1.0, # KL step multiplier for the current iteration. 'eta': 1.0, # Dual variable used in LQR backward pass. } BundleType.__init__(self, variables)
def __init__(self, hyperparams): T, dU, dX = hyperparams['T'], hyperparams['dU'], hyperparams['dX'] variables = { 'pol_mu': None, # Mean of the current policy output. 'pol_sig': None, # Covariance of the current policy output. 'pol_K': np.zeros((T, dU, dX)), # Policy linearization. 'pol_k': np.zeros((T, dU)), # Policy linearization. 'pol_S': np.zeros((T, dU, dU)), # Policy linearization covariance. 'chol_pol_S': np.zeros((T, dU, dU)), # Cholesky decomp of covar. 'policy_prior': None, # Current prior for policy linearization. } BundleType.__init__(self, variables)
def __init__(self): variables = { 'sample_list': None, # List of samples for the current iteration. 'syn_sample_list': None, # List of synthetic samples 'traj_info': None, # Current TrajectoryInfo object. 'prevcost_traj_info': None, # Current TrajectoryInfo object using previous IOC cost. 'init_pol_info': None, # Initial PolicyInfo object 'pol_info': None, # Current PolicyInfo object. 'traj_distr': None, # Initial trajectory distribution. 'cs': None, # Sample costs of the current iteration. 'cgt': None, # Ground truth sample cost of the current iteration. 'step_mult': 1.0, # KL step multiplier for the current iteration. 'eta': 1.0, # Dual variable used in LQR backward pass. } BundleType.__init__(self, variables)
def __init__(self): variables = { 'dynamics': None, # Dynamics object for the current iteration. 'x0mu': None, # Mean for the initial state, used by the dynamics. 'x0sigma': None, # Covariance for the initial state distribution. 'xmu': None, # Mean of real world trajectory distribution 'ref_x': None, # Reference states 'ref_u': None, # Reference actions 'xmusigma': None, # Covariance of real world trajectory distribution 'cc': None, # Cost estimate constant term. 'cv': None, # Cost estimate vector term. 'Cm': None, # Cost estimate matrix term. 'cs': None, # Actual costs 'last_kl_step': float('inf'), # KL step of the previous iteration. } BundleType.__init__(self, variables)
def __init__(self, hyperparams): T, dU, dX = hyperparams['T'], hyperparams['dU'], hyperparams['dX'] variables = { 'lambda_k': np.zeros((T, dU)), # Dual variables. 'lambda_K': np.zeros((T, dU, dX)), # Dual variables. 'pol_wt': hyperparams['init_pol_wt'] * np.ones(T), # Policy weight. 'pol_mu': None, # Mean of the current policy output. 'pol_sig': None, # Covariance of the current policy output. 'pol_K': np.zeros((T, dU, dX)), # Policy linearization. 'pol_k': np.zeros((T, dU)), # Policy linearization. 'pol_S': np.zeros((T, dU, dU)), # Policy linearization covariance. 'chol_pol_S': np.zeros((T, dU, dU)), # Cholesky decomp of covar. 'prev_kl': None, # Previous KL divergence. 'policy_samples': [], # List of current policy samples. 'policy_prior': None, # Current prior for policy linearization. } BundleType.__init__(self, variables)
def __init__(self, hyperparams): T, dU, dX = hyperparams['T'], hyperparams['dU'], hyperparams['dX'] variables = { 'lambda_k': np.zeros((T, dU)), # Dual variables. 'lambda_K': np.zeros((T, dU, dX)), # Dual variables. 'pol_wt': hyperparams['init_pol_wt'] * np.ones(T), # Policy weight. 'pol_mu': None, # Mean of the current policy output. 'pol_sig': None, # Covariance of the current policy output. 'pol_K': np.zeros((T, dU, dX)), # Policy linearization. 'pol_k': np.zeros((T, dU)), # Policy linearization. 'pol_S': np.zeros((T, dU, dU)), # Policy linearization covariance. 'chol_pol_S': np.zeros((T, dU, dU)), # Cholesky decomp of covar. 'prev_kl': None, # Previous KL divergence. 'init_kl': None, # The initial KL divergence, before the iteration. 'policy_samples': [], # List of current policy samples. 'policy_prior': None, # Current prior for policy linearization. } BundleType.__init__(self, variables)