def nn_constructor(self, model_init_cfg): model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap( name="model", num_networks=get_required_argument(model_init_cfg, "num_nets", "Must provide ensemble size"), sess=self.SESS )) # Construct model below. For example: # model.add(FC(*args)) # ... # model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001}) return model
def gp_constructor(self, model_init_cfg): model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap( name="model", kernel_class=get_required_argument(model_init_cfg, "kernel_class", "Must provide kernel class"), kernel_args=model_init_cfg.get("kernel_args", {}), num_inducing_points=get_required_argument( model_init_cfg, "num_inducing_points", "Must provide number of inducing points." ), sess=self.SESS )) return model
def value_nn_constructor(self, name, model_init_cfg_val): model = get_required_argument(model_init_cfg_val, "model_class", "Must provide model class")(DotMap( name=name, num_networks=get_required_argument(model_init_cfg_val, "num_nets", "Must provide ensemble size"), sess=self.SESS, load_model=model_init_cfg_val.get("load_model", False), model_dir=model_init_cfg_val.get("model_dir", None) )) if not model_init_cfg_val.get("load_model", False): model.add(FC(500, input_dim=self.VALUE_IN, activation='swish', weight_decay=0.0001)) model.add(FC(500, activation='swish', weight_decay=0.00025)) model.add(FC(500, activation='swish', weight_decay=0.00025)) model.add(FC(self.VALUE_OUT, weight_decay=0.0005)) model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001}, suffix = "val") return model
def nn_constructor(self, model_init_cfg): model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap( name="model", num_networks=get_required_argument(model_init_cfg, "num_nets", "Must provide ensemble size"), sess=self.SESS, load_model=model_init_cfg.get("load_model", False), model_dir=model_init_cfg.get("model_dir", None) )) if not model_init_cfg.get("load_model", False): model.add(FC(200, input_dim=self.MODEL_IN, activation="swish", weight_decay=0.00025)) model.add(FC(200, activation="swish", weight_decay=0.0005)) model.add(FC(200, activation="swish", weight_decay=0.0005)) model.add(FC(200, activation="swish", weight_decay=0.0005)) model.add(FC(self.MODEL_OUT, weight_decay=0.00075)) model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.00075}) return model
def value_nn_constructor(self, name, model_init_cfg_val): model = get_required_argument( model_init_cfg_val, "model_class", "Must provide model class")( DotMap(name=name, num_networks=get_required_argument( model_init_cfg_val, "num_nets", "Must provide ensemble size"), sess=self.SESS, load_model=model_init_cfg_val.get("load_model", False), model_dir=model_init_cfg_val.get("model_dir", None))) # Construct model below. For example: # model.add(FC(*args)) # ... # model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001}) return model
def __init__(self, name, params): super().__init__(params) if params.gym_robotics: self.dO = params.env.observation_space.spaces[ 'observation'].low.size else: self.dO = params.env.observation_space.shape[0] self.ac_ub, self.ac_lb = params.env.action_space.high, params.env.action_space.low self.ac_ub = np.minimum(self.ac_ub, params.get("ac_ub", self.ac_ub)) self.ac_lb = np.maximum(self.ac_lb, params.get("ac_lb", self.ac_lb)) self.update_fns = params.get("update_fns", []) self.per = params.get("per", 1) self.model = get_required_argument( params.model_init_cfg_val, "model_constructor", "Must provide a model constructor.")(name, params.model_init_cfg_val) self.model_train_cfg = params.get("model_train_cfg", {}) self.buffer_limit = params.get("val_buffer_size", None) self.ign_var = params.get("ign_var", False) self.obs_preproc = params.get("obs_preproc", lambda obs: obs) self.obs_postproc = params.get("obs_postproc", lambda obs, model_out: model_out) self.targ_proc = params.get("targ_proc", lambda val: val) self.save_all_models = params.log_cfg.get("save_all_models", False) opt_cfg = params.opt_cfg.get("cfg", {}) # Controller state variables self.has_been_trained = params.prop_cfg.get("model_pretrained", False) self.train_in = np.array([]).reshape( 0, self.obs_preproc(np.zeros([1, self.dO])).shape[-1]) self.train_targs = np.array([]) self.buffer = { "obs": [], "cost": [], "next_obs": [], "val": [], "terminal": [] } if self.model.is_tf_model: self.sy_cur_obs = tf.Variable(np.zeros(self.dO), dtype=tf.float32) # self.ac_seq = tf.placeholder(shape=[1, self.plan_hor*self.dU], dtype=tf.float32) # self.pred_cost, self.pred_traj = self._compile_cost(self.ac_seq, get_pred_trajs=True) # self.optimizer.setup(self._compile_cost, True) self.model.sess.run(tf.variables_initializer([self.sy_cur_obs])) else: raise NotImplementedError() if self.save_all_models: print( "Value function will save all models. (Note: This may be memory-intensive." ) else: print("Value function won't save all models")
def bnn_constructor(model_init_cfg): """ Constructs the Bayesian Neural Network model. Moodel_init_cfg is a dotmap object containing: - model_in (int): Number of inputs to the model. - model_out (int): Number of outputs to the model. - n_layers (int): Number of hidden layers. - n_neurons (int): Number of neurons per hidden layer. - learning_rate (float): Learning rate. - wd_in (float): Weight decay for the input layer neurons. - wd_hid (float): Weight decay for the hidden layer neurons. - wd_out (float): Weight decay for the output layer neurons. Returns: BNN class object """ cfg = tf.ConfigProto() cfg.gpu_options.allow_growth = True SESS = tf.Session(config=cfg) # Tensorflow session model = BNN( DotMap(name=get_required_argument(model_init_cfg, "model_name", "Must provide model name size"), num_networks=get_required_argument( model_init_cfg, "num_nets", "Must provide ensemble size"), sess=SESS, load_model=model_init_cfg.get("load_model", False), model_dir=model_init_cfg.get("model_dir", None))) if not model_init_cfg.get("load_model", False): model.add( FC(model_init_cfg.n_neurons, input_dim=model_init_cfg.model_in, activation="swish", weight_decay=model_init_cfg.wd_in)) for i in range(model_init_cfg.n_layers): model.add( FC(model_init_cfg.n_neurons, activation="swish", weight_decay=model_init_cfg.wd_hid)) model.add( FC(model_init_cfg.model_out, weight_decay=model_init_cfg.wd_out)) model.finalize(tf.train.AdamOptimizer, {"learning_rate": model_init_cfg.learning_rate}) return model
def __init__(self, params): """Initializes class instance. Argument: params (DotMap): A DotMap containing the following: .sim_cfg: .env (gym.env): Environment for this experiment .task_hor (int): Task horizon .stochastic (bool): (optional) If True, agent adds noise to its actions. Must provide noise_std (see below). Defaults to False. .noise_std (float): for stochastic agents, noise of the form N(0, noise_std^2I) will be added. .exp_cfg: .ntrain_iters (int): Number of training iterations to be performed. .nrollouts_per_iter (int): (optional) Number of rollouts done between training iterations. Defaults to 1. .ninit_rollouts (int): (optional) Number of initial rollouts. Defaults to 1. .policy (controller): Policy that will be trained. .log_cfg: .logdir (str): Parent of directory path where experiment data will be saved. Experiment will be saved in logdir/<date+time of experiment start> .nrecord (int): (optional) Number of rollouts to record for every iteration. Defaults to 0. .neval (int): (optional) Number of rollouts for performance evaluation. Defaults to 1. """ self.env = get_required_argument(params.sim_cfg, "env", "Must provide environment.") self.task_hor = get_required_argument(params.sim_cfg, "task_hor", "Must provide task horizon.") if params.sim_cfg.get("stochastic", False): self.agent = Agent( DotMap( env=self.env, noisy_actions=True, noise_stddev=get_required_argument( params.sim_cfg, "noise_std", "Must provide noise standard deviation in the case of a stochastic environment." ))) else: self.agent = Agent(DotMap(env=self.env, noisy_actions=False)) self.ntrain_iters = get_required_argument( params.exp_cfg, "ntrain_iters", "Must provide number of training iterations.") self.nrollouts_per_iter = params.exp_cfg.get("nrollouts_per_iter", 1) self.ninit_rollouts = params.exp_cfg.get("ninit_rollouts", 1) self.policy = get_required_argument(params.exp_cfg, "policy", "Must provide a policy.") self.logdir = os.path.join( get_required_argument(params.log_cfg, "logdir", "Must provide log parent directory."), strftime("%Y-%m-%d--%H_%M_%S", localtime()) # Compatiable format in windows ) self.nrecord = params.log_cfg.get("nrecord", 0) self.neval = params.log_cfg.get("neval", 1)
def __init__(self, params): """Initializes class instance. Arguments: params .name (str): Model name .kernel_class (class): Kernel class .kernel_args (args): Kernel args .num_inducing_points (int): Number of inducing points .sess (tf.compat.v1.Session): Tensorflow session """ self.name = params.get("name", "GP") self.kernel_class = get_required_argument( params, "kernel_class", "Must provide kernel class.") self.kernel_args = params.get("kernel_args", {}) self.num_inducing_points = get_required_argument( params, "num_inducing_points", "Must provide number of inducing points.") if params.get("sess", None) is None: config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True self._sess = tf.compat.v1.Session(config=config) else: self._sess = params.get("sess") with self._sess.as_default(): with tf.compat.v1.variable_scope(self.name): output_dim = self.kernel_args["output_dim"] del self.kernel_args["output_dim"] self.model = gpflow.models.SGPR( np.zeros([1, self.kernel_args["input_dim"]]), np.zeros([1, output_dim]), kern=self.kernel_class(**self.kernel_args), Z=np.zeros([ self.num_inducing_points, self.kernel_args["input_dim"] ])) self.model.initialize()
def nn_constructor(self, model_init_cfg, misc=None): model = get_required_argument( model_init_cfg, "model_class", "Must provide model class")(DotMap( name="model", num_networks=get_required_argument( model_init_cfg, "num_nets", "Must provide ensemble size"), sess=self.SESS, load_model=model_init_cfg.get("load_model", False), model_dir=model_init_cfg.get("model_dir", None), misc=misc)) if not model_init_cfg.get("load_model", False): print('not building model from scratch') network_shape = get_required_argument(model_init_cfg, "network_shape", "network shape missing!") activation = get_required_argument(model_init_cfg, "activation", "acivations missing!") weight_decays = get_required_argument(model_init_cfg, "weight_decays", "weight decays missing!") learning_rate = get_required_argument(model_init_cfg, "lr", "learning rate missing!") model = build_model(model, self.MODEL_IN, self.MODEL_OUT, network_shape, activation, weight_decays, learning_rate) # model.add(FC(200, input_dim=self.MODEL_IN, activation="swish", weight_decay=0.000025)) # model.add(FC(200, activation="swish", weight_decay=0.00005)) # model.add(FC(200, activation="swish", weight_decay=0.000075)) # model.add(FC(200, activation="swish", weight_decay=0.000075)) # model.add(FC(self.MODEL_OUT, weight_decay=0.0001)) model.finalize( tf.train.AdamOptimizer, { "learning_rate": get_required_argument(model_init_cfg, "lr", "learning rate missing!") }) return model
def __init__(self, params): """Creates class instance. Arguments: params .env (gym.env): Environment for which this controller will be used. .update_fns (list<func>): A list of functions that will be invoked (possibly with a tensorflow session) every time this controller is reset. .ac_ub (np.ndarray): (optional) An array of action upper bounds. Defaults to environment action upper bounds. .ac_lb (np.ndarray): (optional) An array of action lower bounds. Defaults to environment action lower bounds. .per (int): (optional) Determines how often the action sequence will be optimized. Defaults to 1 (reoptimizes at every call to act()). .prop_cfg .model_init_cfg (DotMap): A DotMap of initialization parameters for the model. .model_constructor (func): A function which constructs an instance of this model, given model_init_cfg. .model_train_cfg (dict): (optional) A DotMap of training parameters that will be passed into the model every time is is trained. Defaults to an empty dict. .model_pretrained (bool): (optional) If True, assumes that the model has been trained upon construction. .mode (str): Propagation method. Choose between [E, DS, TSinf, TS1, MM]. See https://arxiv.org/abs/1805.12114 for details. .npart (int): Number of particles used for DS, TSinf, TS1, and MM propagation methods. .ign_var (bool): (optional) Determines whether or not variance output of the model will be ignored. Defaults to False unless deterministic propagation is being used. .obs_preproc (func): (optional) A function which modifies observations (in a 2D matrix) before they are passed into the model. Defaults to lambda obs: obs. Note: Must be able to process both NumPy and Tensorflow arrays. .obs_postproc (func): (optional) A function which returns vectors calculated from the previous observations and model predictions, which will then be passed into the provided cost function on observations. Defaults to lambda obs, model_out: model_out. Note: Must be able to process both NumPy and Tensorflow arrays. .obs_postproc2 (func): (optional) A function which takes the vectors returned by obs_postproc and (possibly) modifies it into the predicted observations for the next time step. Defaults to lambda obs: obs. Note: Must be able to process both NumPy and Tensorflow arrays. .targ_proc (func): (optional) A function which takes current observations and next observations and returns the array of targets (so that the model learns the mapping obs -> targ_proc(obs, next_obs)). Defaults to lambda obs, next_obs: next_obs. Note: Only needs to process NumPy arrays. .opt_cfg .mode (str): Internal optimizer that will be used. Choose between [CEM, Random]. .cfg (DotMap): A map of optimizer initializer parameters. .plan_hor (int): The planning horizon that will be used in optimization. .obs_cost_fn (func): A function which computes the cost of every observation in a 2D matrix. Note: Must be able to process both NumPy and Tensorflow arrays. .ac_cost_fn (func): A function which computes the cost of every action in a 2D matrix. .log_cfg .save_all_models (bool): (optional) If True, saves models at every iteration. Defaults to False (only most recent model is saved). Warning: Can be very memory-intensive. .log_traj_preds (bool): (optional) If True, saves the mean and variance of predicted particle trajectories. Defaults to False. .log_particles (bool) (optional) If True, saves all predicted particles trajectories. Defaults to False. Note: Takes precedence over log_traj_preds. Warning: Can be very memory-intensive """ super().__init__(params) self.dO, self.dU = params.env.observation_space.shape[ 0], params.env.action_space.shape[0] self.ac_ub, self.ac_lb = params.env.action_space.high, params.env.action_space.low self.ac_ub = np.minimum(self.ac_ub, params.get("ac_ub", self.ac_ub)) self.ac_lb = np.maximum(self.ac_lb, params.get("ac_lb", self.ac_lb)) self.update_fns = params.get("update_fns", []) self.per = params.get("per", 1) self.model = get_required_argument( params.prop_cfg.model_init_cfg, "model_constructor", "Must provide a model constructor.")( params.prop_cfg.model_init_cfg) self.model_train_cfg = params.prop_cfg.get("model_train_cfg", {}) self.prop_mode = get_required_argument( params.prop_cfg, "mode", "Must provide propagation method.") self.npart = get_required_argument( params.prop_cfg, "npart", "Must provide number of particles.") self.ign_var = params.prop_cfg.get("ign_var", False) or self.prop_mode == "E" self.obs_preproc = params.prop_cfg.get("obs_preproc", lambda obs: obs) self.obs_postproc = params.prop_cfg.get( "obs_postproc", lambda obs, model_out: model_out) self.obs_postproc2 = params.prop_cfg.get("obs_postproc2", lambda next_obs: next_obs) self.targ_proc = params.prop_cfg.get("targ_proc", lambda obs, next_obs: next_obs) self.opt_mode = get_required_argument( params.opt_cfg, "mode", "Must provide optimization method.") self.plan_hor = get_required_argument( params.opt_cfg, "plan_hor", "Must provide planning horizon.") self.obs_cost_fn = get_required_argument( params.opt_cfg, "obs_cost_fn", "Must provide cost on observations.") self.ac_cost_fn = get_required_argument( params.opt_cfg, "ac_cost_fn", "Must provide cost on actions.") self.save_all_models = params.log_cfg.get("save_all_models", False) self.log_traj_preds = params.log_cfg.get("log_traj_preds", False) self.log_particles = params.log_cfg.get("log_particles", False) # Perform argument checks if self.prop_mode not in ["E", "DS", "MM", "TS1", "TSinf"]: raise ValueError("Invalid propagation method.") if self.prop_mode in ["TS1", "TSinf" ] and self.npart % self.model.num_nets != 0: raise ValueError( "Number of particles must be a multiple of the ensemble size.") if self.prop_mode == "E" and self.npart != 1: raise ValueError( "Deterministic propagation methods only need one particle.") # Create action sequence optimizer opt_cfg = params.opt_cfg.get("cfg", {}) self.optimizer = MPC.optimizers[params.opt_cfg.mode]( sol_dim=self.plan_hor * self.dU, lower_bound=np.tile(self.ac_lb, [self.plan_hor]), upper_bound=np.tile(self.ac_ub, [self.plan_hor]), tf_session=None if not self.model.is_tf_model else self.model.sess, **opt_cfg) # Controller state variables self.has_been_trained = params.prop_cfg.get("model_pretrained", False) self.ac_buf = np.array([]).reshape(0, self.dU) self.prev_sol = np.tile((self.ac_lb + self.ac_ub) / 2, [self.plan_hor]) self.init_var = np.tile( np.square(self.ac_ub - self.ac_lb) / 16, [self.plan_hor]) self.train_in = np.array([]).reshape( 0, self.dU + self.obs_preproc(np.zeros([1, self.dO])).shape[-1]) self.train_targs = np.array([]).reshape( 0, self.targ_proc(np.zeros([1, self.dO]), np.zeros([1, self.dO])).shape[-1]) if self.model.is_tf_model: self.sy_cur_obs = tf.Variable(np.zeros(self.dO), dtype=tf.float32) self.ac_seq = tf.placeholder(shape=[1, self.plan_hor * self.dU], dtype=tf.float32) self.pred_cost, self.pred_traj = self._compile_cost( self.ac_seq, get_pred_trajs=True) self.optimizer.setup(self._compile_cost, True) self.model.sess.run(tf.variables_initializer([self.sy_cur_obs])) else: raise NotImplementedError() print("Created an MPC controller, prop mode %s, %d particles. " % (self.prop_mode, self.npart) + ("Ignoring variance." if self.ign_var else "")) if self.save_all_models: print( "Controller will save all models. (Note: This may be memory-intensive." ) if self.log_particles: print( "Controller is logging particle predictions (Note: This may be memory-intensive)." ) self.pred_particles = [] elif self.log_traj_preds: print( "Controller is logging trajectory prediction statistics (mean+var)." ) self.pred_means, self.pred_vars = [], [] else: print("Trajectory prediction logging is disabled.")
def __init__(self, params): """Initializes class instance. Argument: params (DotMap): A DotMap containing the following: .sim_cfg: .env (gym.env): Environment for this experiment .task_hor (int): Task horizon .stochastic (bool): (optional) If True, agent adds noise to its actions. Must provide noise_std (see below). Defaults to False. .noise_std (float): for stochastic agents, noise of the form N(0, noise_std^2I) will be added. .exp_cfg: .ntrain_iters (int): Number of training iterations to be performed. .nrollouts_per_iter (int): (optional) Number of rollouts done between training iterations. Defaults to 1. .ninit_rollouts (int): (optional) Number of initial rollouts. Defaults to 1. .policy (controller): Policy that will be trained. .demo_low_cost (int): Minimum allowed cost for demonstrations .demo_high_cost (int): Maximum allowed cost for demonstrations .num_demos (int): Number of demonstrations .ss_buffer_size (int): Size of buffer of safe states that density model is trained on .gym_robotics (bool): Indicates whether env is a gym robotics env, in which case there are some small differences in data loading and environment parameters .log_cfg: .logdir (str): Parent of directory path where experiment data will be saved. Experiment will be saved in logdir/<date+time of experiment start> .nrecord (int): (optional) Number of rollouts to record for every iteration. Defaults to 0. .neval (int): (optional) Number of rollouts for performance evaluation. Defaults to 1. """ self.env = get_required_argument(params.sim_cfg, "env", "Must provide environment.") self.demo_low_cost = params.exp_cfg.demo_low_cost self.demo_high_cost = params.exp_cfg.demo_high_cost self.num_demos = params.exp_cfg.num_demos self.ss_buffer_size = params.exp_cfg.ss_buffer_size self.gym_robotics = params.exp_cfg.gym_robotics self.task_hor = get_required_argument(params.sim_cfg, "task_hor", "Must provide task horizon.") if params.sim_cfg.get("stochastic", False): self.agent = Agent( DotMap( env=self.env, noisy_actions=True, noise_stddev=get_required_argument( params.sim_cfg, "noise_std", "Must provide noise standard deviation in the case of a stochastic environment." ))) else: self.agent = Agent(DotMap(env=self.env, noisy_actions=False)) self.ntrain_iters = get_required_argument( params.exp_cfg, "ntrain_iters", "Must provide number of training iterations.") self.nrollouts_per_iter = params.exp_cfg.get("nrollouts_per_iter", 1) self.policy = get_required_argument(params.exp_cfg, "policy", "Must provide a policy.") self.value = get_required_argument(params.exp_cfg, "value", "Must provide a value function.") self.target = get_required_argument(params.exp_cfg, "value_target", "Must provide a value function.") self.value.target = self.target self.logdir = os.path.join( get_required_argument(params.log_cfg, "logdir", "Must provide log parent directory."), strftime("%Y-%m-%d--%H:%M:%S", localtime())) self.nrecord = params.log_cfg.get("nrecord", 0) self.neval = params.log_cfg.get("neval", 1) self.load_samples = params.exp_cfg.get("load_samples", True) self.demo_load_path = params.exp_cfg.get("demo_load_path", None) self.use_value = params.exp_cfg.get("use_value", True) self.teacher = params.exp_cfg.get("teacher") self.stabilizable_observations = [] self.tvalue_schedule = LinearSchedule(3, 3, 500) self.stabilized_model = knn(n_neighbors=1) self.target_update_freq = 1
def __init__(self, params): super().__init__(params) self.dO, self.dU = params.dO, params.dU constrains = get_required_argument( params.opt_cfg, "constrains", "Must provide the optimisation constrains.") self.ac_lb = constrains[0][0] self.ac_ub = constrains[0][1] print("lb", self.ac_lb) print("ub", self.ac_ub) self.update_fns = params.get("update_fns", []) self.per = params.get("per", 1) self.model = get_required_argument( params.prop_cfg.model_init_cfg, "model_constructor", "Must provide a model constructor.")( params.prop_cfg.model_init_cfg) self.model_train_cfg = params.prop_cfg.get("model_train_cfg", {}) self.prop_mode = get_required_argument( params.prop_cfg, "mode", "Must provide propagation method.") self.npart = get_required_argument( params.prop_cfg, "npart", "Must provide number of particles.") self.ign_var = params.prop_cfg.get("ign_var", False) or self.prop_mode == "E" self.obs_preproc = params.prop_cfg.get( "obs_preproc", lambda obs: obs) # Defaults to do nothing self.obs_postproc = params.prop_cfg.get( "obs_postproc", lambda obs, model_out: model_out) # Defaults to do nothing self.obs_postproc2 = params.prop_cfg.get( "obs_postproc2", lambda next_obs: next_obs) # Defaults to do nothing self.targ_proc = params.prop_cfg.get( "targ_proc", lambda obs, next_obs: next_obs) # Defaults to do nothing self.opt_mode = get_required_argument( params.opt_cfg, "mode", "Must provide optimization method.") self.plan_hor = get_required_argument( params.opt_cfg, "plan_hor", "Must provide planning horizon.") self.og_plan_hor = self.plan_hor self.obs_cost_fn = get_required_argument( params.opt_cfg, "obs_cost_fn", "Must provide cost on observations.") self.target = get_required_argument( params.opt_cfg, "target", "Must provide cost on observations.") self.ac_cost_fn = get_required_argument( params.opt_cfg, "ac_cost_fn", "Must provide cost on actions.") self.save_all_models = params.log_cfg.get("save_all_models", False) self.log_traj_preds = params.log_cfg.get("log_traj_preds", False) self.log_particles = params.log_cfg.get("log_particles", False) self.useConstantAction = params.get("useConstantAction", False) if self.useConstantAction: self.constantActionToUse = get_required_argument( params, "constantActionToUse", "Must provide constant action") # Perform argument checks if self.prop_mode not in ["E", "DS", "MM", "TS1", "TSinf"]: raise ValueError("Invalid propagation method.") if self.prop_mode in ["TS1", "TSinf" ] and self.npart % self.model.num_nets != 0: raise ValueError( "Number of particles must be a multiple of the ensemble size.") if self.prop_mode == "E" and self.npart != 1: raise ValueError( "Deterministic propagation methods only need one particle.") # Create action sequence optimizer opt_cfg = params.opt_cfg.get("cfg", {}) self.optimizer = MPC.optimizers[params.opt_cfg.mode]( sol_dim=self.plan_hor * self.dU, constrains=constrains, max_resamples=params.opt_cfg.get("max_resamples", 10), tf_session=None if not self.model.is_tf_model else self.model.sess, **opt_cfg) self.has_been_trained = params.prop_cfg.get("model_pretrained", False) # Init parameters for the optimizer # -------------------------------------------------------------------- # Buffer of actions if not planning at each step self.ac_buf = np.array([]).reshape(0, self.dU) # (sol_dim,) # Previous solution is mean between min and max action self.prev_sol = np.tile((self.ac_lb + self.ac_ub) / 2, [self.plan_hor]) # (sol_dim,) # sigma ~= 4 * (max - min) for Gaussian, var = sigma^2 self.init_var = np.tile( np.square(self.ac_ub - self.ac_lb) / 16, [self.plan_hor]) # (sol_dim,) # print("Init var", self.init_var) # Init parameters for model training # --------------------------------------------------------------------- # shape going in is (~, n actions + n input observations) self.train_in = np.array([]).reshape( 0, self.dU + self.obs_preproc(np.zeros([1, self.dO])).shape[-1]) # shaope is (~, n targets) self.train_targs = np.array([]).reshape( 0, self.targ_proc(np.zeros([1, self.dO]), np.zeros([1, self.dO])).shape[-1]) if self.model.is_tf_model: self.sy_cur_obs = tf.Variable(np.zeros(self.dO), dtype=tf.float32) # (dO,) self.ac_seq = tf.placeholder(shape=[1, self.plan_hor * self.dU], dtype=tf.float32) # (1, H*dU) self.pred_cost, self.pred_traj = self._compile_cost( self.ac_seq, get_pred_trajs=True) self.optimizer.setup(self._compile_cost, True) self.model.sess.run(tf.variables_initializer([self.sy_cur_obs])) else: raise NotImplementedError() print("Created an MPC controller, prop mode %s, %d particles. " % (self.prop_mode, self.npart) + ("Ignoring variance." if self.ign_var else "")) if self.save_all_models: print( "Controller will save all models. (Note: This may be memory-intensive." ) if self.log_particles: print( "Controller is logging particle predictions (Note: This may be memory-intensive)." ) self.pred_particles = [] elif self.log_traj_preds: print( "Controller is logging trajectory prediction statistics (mean+var)." ) self.pred_means, self.pred_vars = [], [] else: print("Trajectory prediction logging is disabled.")