Exemplo n.º 1
0
 def __init__(self):
     variables = {
         'dynamics': None,  # Dynamics object for the current iteration.
         'x0mu': None,  # Mean for the initial state, used by the dynamics.
         'x0sigma': None,  # Covariance for the initial state distribution.
         'cc': None,  # Cost estimate constant term.
         'cv': None,  # Cost estimate vector term.
         'Cm': None,  # Cost estimate matrix term.
         'last_kl_step': float('inf'),  # KL step of the previous iteration.
     }
     BundleType.__init__(self, variables)
Exemplo n.º 2
0
 def __init__(self):
     variables = {
         'dynamics': None,  # Dynamics object for the current iteration.
         'x0mu': None,  # Mean for the initial state, used by the dynamics.
         'x0sigma': None,  # Covariance for the initial state distribution.
         'cc': None,  # Cost estimate constant term.
         'cv': None,  # Cost estimate vector term.
         'Cm': None,  # Cost estimate matrix term.
         'last_kl_step': float('inf'),  # KL step of the previous iteration.
     }
     BundleType.__init__(self, variables)
Exemplo n.º 3
0
 def __init__(self):
     variables = {
         'sample_list': None,  # List of samples for the current iteration.
         'traj_info': None,  # Current TrajectoryInfo object.
         'pol_info': None,  # Current PolicyInfo object.
         'traj_distr': None,  # Initial trajectory distribution.
         'cs': None,  # Sample costs of the current iteration.
         'step_mult': 1.0,  # KL step multiplier for the current iteration.
         'eta': 1.0,  # Dual variable used in LQR backward pass.
     }
     BundleType.__init__(self, variables)
Exemplo n.º 4
0
 def __init__(self):
     variables = {
         'sample_list': None,  # List of samples for the current iteration.
         'traj_info': None,  # Current TrajectoryInfo object.
         'pol_info': None,  # Current PolicyInfo object.
         'traj_distr': None,  # Initial trajectory distribution.
         'new_traj_distr': None,  # Updated trajectory distribution.
         'cs': None,  # Sample costs of the current iteration.
         'step_mult': 1.0,  # KL step multiplier for the current iteration.
         'eta': 1.0,  # Dual variable used in LQR backward pass.
     }
     BundleType.__init__(self, variables)
Exemplo n.º 5
0
 def __init__(self, hyperparams):
     T, dU, dX = hyperparams['T'], hyperparams['dU'], hyperparams['dX']
     variables = {
         'pol_mu': None,  # Mean of the current policy output.
         'pol_sig': None,  # Covariance of the current policy output.
         'pol_K': np.zeros((T, dU, dX)),  # Policy linearization.
         'pol_k': np.zeros((T, dU)),  # Policy linearization.
         'pol_S': np.zeros((T, dU, dU)),  # Policy linearization covariance.
         'chol_pol_S': np.zeros((T, dU, dU)),  # Cholesky decomp of covar.
         'policy_prior': None,  # Current prior for policy linearization.
     }
     BundleType.__init__(self, variables)
Exemplo n.º 6
0
 def __init__(self):
     variables = {
         'sample_list': None,  # List of samples for the current iteration.
         'syn_sample_list': None,  # List of synthetic samples
         'traj_info': None,  # Current TrajectoryInfo object.
         'prevcost_traj_info': None, # Current TrajectoryInfo object using previous IOC cost.
         'init_pol_info': None, # Initial PolicyInfo object
         'pol_info': None,  # Current PolicyInfo object.
         'traj_distr': None,  # Initial trajectory distribution.
         'cs': None,  # Sample costs of the current iteration.
         'cgt': None, # Ground truth sample cost of the current iteration.
         'step_mult': 1.0,  # KL step multiplier for the current iteration.
         'eta': 1.0,  # Dual variable used in LQR backward pass.
     }
     BundleType.__init__(self, variables)
Exemplo n.º 7
0
 def __init__(self):
     variables = {
         'dynamics': None,  # Dynamics object for the current iteration.
         'x0mu': None,  # Mean for the initial state, used by the dynamics.
         'x0sigma': None,  # Covariance for the initial state distribution.
         'xmu': None, # Mean of real world trajectory distribution
         'ref_x': None, # Reference states
         'ref_u': None, # Reference actions
         'xmusigma': None, # Covariance of real world trajectory distribution
         'cc': None,  # Cost estimate constant term.
         'cv': None,  # Cost estimate vector term.
         'Cm': None,  # Cost estimate matrix term.
         'cs': None,  # Actual costs
         'last_kl_step': float('inf'),  # KL step of the previous iteration.
     }
     BundleType.__init__(self, variables)
Exemplo n.º 8
0
 def __init__(self, hyperparams):
     T, dU, dX = hyperparams['T'], hyperparams['dU'], hyperparams['dX']
     variables = {
         'lambda_k': np.zeros((T, dU)),  # Dual variables.
         'lambda_K': np.zeros((T, dU, dX)),  # Dual variables.
         'pol_wt': hyperparams['init_pol_wt'] * np.ones(T),  # Policy weight.
         'pol_mu': None,  # Mean of the current policy output.
         'pol_sig': None,  # Covariance of the current policy output.
         'pol_K': np.zeros((T, dU, dX)),  # Policy linearization.
         'pol_k': np.zeros((T, dU)),  # Policy linearization.
         'pol_S': np.zeros((T, dU, dU)),  # Policy linearization covariance.
         'chol_pol_S': np.zeros((T, dU, dU)),  # Cholesky decomp of covar.
         'prev_kl': None,  # Previous KL divergence.
         'policy_samples': [],  # List of current policy samples.
         'policy_prior': None,  # Current prior for policy linearization.
     }
     BundleType.__init__(self, variables)
Exemplo n.º 9
0
 def __init__(self, hyperparams):
     T, dU, dX = hyperparams['T'], hyperparams['dU'], hyperparams['dX']
     variables = {
         'lambda_k': np.zeros((T, dU)),  # Dual variables.
         'lambda_K': np.zeros((T, dU, dX)),  # Dual variables.
         'pol_wt': hyperparams['init_pol_wt'] * np.ones(T),  # Policy weight.
         'pol_mu': None,  # Mean of the current policy output.
         'pol_sig': None,  # Covariance of the current policy output.
         'pol_K': np.zeros((T, dU, dX)),  # Policy linearization.
         'pol_k': np.zeros((T, dU)),  # Policy linearization.
         'pol_S': np.zeros((T, dU, dU)),  # Policy linearization covariance.
         'chol_pol_S': np.zeros((T, dU, dU)),  # Cholesky decomp of covar.
         'prev_kl': None,  # Previous KL divergence.
         'init_kl': None,  # The initial KL divergence, before the iteration.
         'policy_samples': [],  # List of current policy samples.
         'policy_prior': None,  # Current prior for policy linearization.
     }
     BundleType.__init__(self, variables)