def _create_mpc(self, itr, x0): """ Must initialize MPC """ sample0 = Sample(meta_data=params, T=1) sample0.set_X(x0, t=0) self._update_world(sample0, 0) self._logger.info('\t\t\tCreating MPC') if self._planner_type == 'primitives': additional_costs = [] mpc_policy = PrimitivesMPCPolicyBebop2d( self._trajopt, self._cost_probcoll, additional_costs=additional_costs, meta_data=params, use_threads=False, plot=True, epsilon_greedy=params['prediction']['dagger'] ['epsilon_greedy']) elif self._planner_type == 'teleop': mpc_policy = TeleopMPCPolicyBebop2d(params) else: raise NotImplementedError( 'planner_type {0} not implemented for bebop2d'.format( self._planner_type)) return mpc_policy
def _run_itr_analyze(self, itr): T = 1 samples = [] world_infos = [] mpc_infos = [] self._conditions.reset() for cond in xrange(self._conditions.length): for rep in xrange(self._conditions.repeats): self._logger.info('\t\tStarting cond {0} rep {1}'.format( cond, rep)) if (cond == 0 and rep == 0) or self._world.randomize: self._reset_world(itr, cond, rep) x0 = self._conditions.get_cond(cond, rep=rep) sample_T = Sample(meta_data=params, T=T) sample_T.set_X(x0, t=0) mpc_policy = self._create_mpc(itr, x0) control_noise = self._create_control_noise( ) # create each time b/c may not be memoryless for t in xrange(T): self._update_world(sample_T, t) x0 = sample_T.get_X(t=t) rollout = self._agent.sample_policy(x0, mpc_policy, noise=control_noise, T=1) u = rollout.get_U(t=0) o = rollout.get_O(t=0) u_label = u sample_T.set_U(u_label, t=t) sample_T.set_O(o, t=t) if hasattr(mpc_policy, '_curr_traj'): self._world.update_visualization( sample_T, mpc_policy._curr_traj, t) else: self._logger.info('\t\t\tLasted for t={0}'.format(t)) samples.append(sample_T) world_infos.append(self._get_world_info()) mpc_infos.append(mpc_policy.get_info()) assert (samples[-1].isfinite()) rep += 1 self._itr_save_samples(itr, samples) self._itr_save_worlds(itr, world_infos) self._itr_save_mpcs(itr, mpc_infos)
def _get_sim_state(self, xt): state_sample = Sample(meta_data=params, T=1) state_msg = self.sim_state state = np.array([ state_msg.position.x, state_msg.position.y, state_msg.position.z, state_msg.orientation.x, state_msg.orientation.y, state_msg.orientation.z, state_msg.orientation.w ]) state_sample.set_X(state[:3], t=0, sub_state='position') state_sample.set_X(state[3:], t=0, sub_state='orientation') x = np.nan_to_num(xt) + np.nan_to_num(state_sample.get_X(t=0)) return x
def sample_policy(self, x0, policy, T=None, **policy_args): if T is None: T = policy._T policy_sample = Sample(meta_data=params, T=T) noise = policy_args.get('noise', ZeroNoise(params)) rate = rospy.Rate(1. / params['dt']) policy_sample.set_X(x0, t=0) for t in xrange(T): # get observation and act x_t = policy_sample.get_X(t=t) o_t = self.get_observation(x_t) u_t = policy.act(x_t, o_t, t, noise=noise) if params['probcoll']['planner_type'] != 'teleop': # TODO hack self.execute_control(u_t) # record policy_sample.set_X(x_t, t=t) policy_sample.set_O(o_t, t=t) policy_sample.set_U(u_t, t=t) # propagate dynamics if t < T-1: x_tp1 = self._dynamics.evolve(x_t, u_t) policy_sample.set_X(x_tp1, t=t+1) rate.sleep() # see if collision in the past cycle policy_sample.set_O([int(self.coll_callback.get() is not None)], t=t, sub_obs='collision') return policy_sample
def act(self, x, obs, t, noise, ref_traj=None): ### create desired path (radiates outward) T, dt, theta, speed = self._T, self._meta_data[ 'dt'], self.theta, self.speed traj = Sample(meta_data=self._meta_data, T=T) ### initialize traj.set_X(x, t=0) traj.set_O(obs, t=0) ### create desired path (radiates outward) linearvel = [speed * np.cos(theta), speed * np.sin(theta)] for t in xrange(T - 1): x_t = traj.get_X(t=t) x_tp1 = self._dynamics.evolve(x_t, linearvel) traj.set_X(x_tp1, t=t + 1) traj.set_U(linearvel, t=t) traj.set_U(linearvel, t=-1) self._curr_traj = traj u = traj.get_U(t=0) return u + noise.sample(u)
def sample_policy(self, x0, policy, T=None, use_noise=True, only_noise=False, **policy_args): if T is None: T = policy._T rate = rospy.Rate(1. / params['dt']) policy_sample = Sample(meta_data=params, T=T) policy_sample.set_X(x0, t=0) policy_sample_no_noise = Sample(meta_data=params, T=T) for t in xrange(T): # get observation and act x_t = policy_sample.get_X(t=t) o_t = self.get_observation(x_t) if self.sim: x_t = self._get_sim_state(x_t) start = time.time() u_t, u_t_no_noise = policy.act(x_t, o_t, t, only_noise=only_noise) self._logger.debug(time.time() - start) # only execute control if no collision # TODO is this necessary if int(o_t[policy_sample.get_O_idxs(sub_obs='collision')][0]) == 0: if use_noise: self.execute_control(u_t) else: self.execute_control(u_t_no_noise) # record policy_sample.set_X(x_t, t=t) policy_sample.set_O(o_t, t=t) policy_sample.set_U(u_t, t=t) if not only_noise: policy_sample_no_noise.set_U(u_t_no_noise, t=t) # In sim we do not have cycles if self.sim: policy_sample.set_O([int(self.sim_coll)], t=t, sub_obs='collision') else: # propagate dynamics if t < T - 1: x_tp1 = self._dynamics.evolve(x_t, u_t) policy_sample.set_X(x_tp1, t=t + 1) rate.sleep() # see if collision in the past cycle policy_sample.set_O( [int(self.coll_callback.get() is not None)], t=t, sub_obs='collision') return policy_sample, policy_sample_no_noise
def sample_policy(self, x0, policy, T=None, only_noise=False, **policy_args): """ Run the policy and collect the trajectory data :param x0: initial state :param policy: to execute :param policy_args: e.g. ref_traj, noise, etc :rtype Sample """ if T is None: T = policy._T policy_sample = Sample(meta_data=params, T=T) policy_sample.set_X(x0, t=0) policy_sample_no_noise = Sample(meta_data=params, T=T) for t in xrange(T): # get observation and act x_t = policy_sample.get_X(t=t) o_t = self.get_observation(x_t) u_t, u_t_no_noise = policy.act(x_t, o_t, t) # record policy_sample.set_X(x_t, t=t) policy_sample.set_O(o_t, t=t) policy_sample.set_U(u_t, t=t) if not only_noise: policy_sample_no_noise.set_U(u_t_no_noise, t=t) # propagate dynamics if t < T - 1: x_tp1 = self._dynamics.evolve(x_t, u_t) policy_sample.set_X(x_tp1, t=t + 1) return policy_sample, policy_sample_no_noise
def plan(self, x0, obs, dynamics, cost_funcs, dU, T, init_traj=None, plot_env=None, init_cov=None, config=None): """ :type init_traj: Sample :type dynamics: Dynamics :type cost_func: Cost """ if config is None: config = self._config self.means, self.covs = [], [] if init_traj is not None: mean = init_traj.get_U().ravel() elif 'init_u' in config: mean = config['init_u'] * T else: mean = np.zeros(dU * T) lower_bound = config['bounds'][ 'min'] * T if 'bounds' in config else -np.inf * np.ones(dU * T) upper_bound = config['bounds'][ 'max'] * T if 'bounds' in config else np.inf * np.ones(dU * T) ### run one iteration of CEM with extra samples # if init_cov is None: cov = config['init_var'] * np.eye(dU * T) # else: # cov = config['init_var'] * init_cov mean, cov = self._cem_step(x0, obs, mean, cov, config['init_M'], config['K'], lower_bound, upper_bound, dU, T, dynamics, cost_funcs, plot_env=plot_env) self.means.append(mean) self.covs.append(cov) if plot_env: pass # raw_input('CEM iter 0') ### remaining iterations for iter in xrange(1, config['iters']): mean, cov = self._cem_step(x0, obs, mean, cov, config['M'], config['K'], lower_bound, upper_bound, dU, T, dynamics, cost_funcs, plot_env=plot_env) self.means.append(mean) self.covs.append(cov) if plot_env: pass # raw_input('CEM iter {0}'.format(iter)) sample = Sample(meta_data=self._meta_data, T=T) U = mean.reshape(T, dU) U[:, -1] = 0. # TODO hack sample.set_U(U, t=slice(0, T)) sample.set_O(obs, t=0) sample.set_X(x0, t=0) sample.rollout(dynamics) return sample
def _cem_step(self, x0, obs, mean, cov, M, K, lower_bound, upper_bound, dU, T, dynamics, cost_funcs, plot_env=None): """ :param mean: mean of trajectory controls Gaussian :param obs: observation :param cov: covariance of trajectory controls Gaussian :param M: sample M controls :param K: keep K lowest cost trajectories :return: mean, cov """ ### sample trajectories M_controls = [] while len(M_controls) < M: control = np.random.multivariate_normal(mean, cov) if np.all(control < upper_bound) and np.all(control > lower_bound): M_controls.append(control) samples = [] for U_flat in M_controls: sample = Sample(meta_data=self._meta_data, T=T) U = U_flat.reshape(T, dU) U[:, -1] = 0. # TODO hack sample.set_U(U, t=slice(0, T)) sample.set_O(obs, t=0) sample.set_X(x0, t=0) for sub_control, u_sub in self._config['fixed'].items(): sample.set_U(np.array([list(u_sub) * T]).T, t=slice(0, T), sub_control=sub_control) sample.rollout(dynamics) samples.append(sample) ### keep lowest K cost trajectories costs = [0.] * len(samples) for cost_func in cost_funcs: if hasattr(cost_func, 'eval_batch'): costs = [ c + cost_func_approx.J for c, cost_func_approx in zip( costs, cost_func.eval_batch(samples)) ] else: costs = [ c + cost_func.eval(sample).J for c, sample in zip(costs, samples) ] samples_costs = zip(samples, costs) samples_costs = sorted(samples_costs, key=lambda x: x[1]) K_samples = [x[0] for x in samples_costs[:K]] ### fit Gaussian data = np.vstack([s.get_U().ravel() for s in K_samples]) new_mean = np.mean(data, axis=0) new_cov = np.cov(data, rowvar=0) if plot_env: plot_env.rave_env.clear_plots() colors = [(0, 1, 0)] * K + [(1, 0, 0)] * (M - K) for sample, color in zip([x[0] for x in samples_costs], colors): for t in xrange(T - 1): plot_env.rave_env.plot_segment( sample.get_X(t=t, sub_state='position'), sample.get_X(t=t + 1, sub_state='position'), color=color) return new_mean, new_cov
def rosbag_to_sample(rosbag_fname): print('Parsing {0}'.format(rosbag_fname)) bebop_topics = params['bebop']['topics'] ### read all messages bag = rosbag.Bag(rosbag_fname, 'r') msg_dict = collections.defaultdict(list) start_time = None for topic, msg, t in bag.read_messages(): if start_time is None: start_time = t.to_sec() else: assert(start_time <= t.to_sec()) msg_dict[topic].append((t.to_sec() - start_time, msg)) bag.close() # get_measured_vel(rosbag_fname, msg_dict) # necessary hack if len(msg_dict[bebop_topics['start_rollout']]) == 0: print('\tNo start_rollout, returning None') return None ### sort by time for k, v in msg_dict.items(): msg_dict[k] = sorted(v, key=lambda x: x[0]) ### prune start and end is_coll = (bebop_topics['collision'] in msg_dict.keys()) start_time = msg_dict[bebop_topics['start_rollout']][0][0] end_time = msg_dict[bebop_topics['collision']][0][0] if is_coll else np.inf for topic, time_msg in msg_dict.items(): msg_dict[topic] = [(t, msg) for t, msg in time_msg if start_time <= t <= end_time] ### necessary info for Samples dt = params['dt'] T = params['prediction']['dagger']['T'] ### space by dt image_time_msgs = spaced_messages(msg_dict[bebop_topics['image']], dt)[-T:] cmd_vel_time_msgs = aligned_messages(image_time_msgs, msg_dict[bebop_topics['cmd_vel']], dt)[-T:] # measured_vel_time_msgs = aligned_messages(image_time_msgs, msg_dict[bebop_topics['measured_vel']], dt)[-T:] ### create sample sample = Sample(T=len(image_time_msgs), meta_data=params) cvb = cv_bridge.CvBridge() for t, (image_time_msg, cmd_vel_time_msg) in enumerate(zip(image_time_msgs, cmd_vel_time_msgs)): image_msg = image_time_msg[1] cmd_vel_msg = cmd_vel_time_msg[1] ### image im = AgentBebop2d.process_image(image_msg, cvb) sample.set_O(im.ravel(), t=t, sub_obs='camera') ### collision sample.set_O([0], t=t, sub_obs='collision') ### linearvel sample.set_X([cmd_vel_msg.linear.x, cmd_vel_msg.linear.y], t=t) sample.set_U([cmd_vel_msg.linear.x, cmd_vel_msg.linear.y], t=t) sample.set_O([int(is_coll)], t=-1, sub_obs='collision') # sample.set_O([int(np.random.random() > 0.5)], t=-1, sub_obs='collision') # tmp assert(sample.isfinite()) return sample
def _run_rollout(self, itr): T = params['probcoll']['T'] label_with_noise = params['probcoll']['label_with_noise'] samples = [] world_infos = [] mpc_infos = [] self._conditions.reset() for cond in xrange(self._conditions.length): rep = 0 while rep < self._conditions.repeats: self._logger.info( '\t\tStarting cond {0} rep {1} itr {2}'.format( cond, rep, itr)) start = time.time() if (cond == 0 and rep == 0) or self._world.randomize: self._reset_world(itr, cond, rep) x0 = self._conditions.get_cond(cond, rep=rep) sample_T = Sample(meta_data=params, T=T) sample_T.set_X(x0, t=0) for t in xrange(T): self._update_world(sample_T, t) x0 = sample_T.get_X(t=t) rollout, rollout_no_noise = self._agent.sample_policy( x0, self._mpc_policy, T=1, only_noise=label_with_noise) o = rollout.get_O(t=0) if label_with_noise: u = rollout.get_U(t=0) else: u = rollout_no_noise.get_U(t=0) sample_T.set_U(u, t=t) sample_T.set_O(o, t=t) if not self._use_dynamics: sample_T.set_X(rollout.get_X(t=0), t=t) if self._world.is_collision(sample_T, t=t): self._logger.warning( '\t\t\tCrashed at t={0}'.format(t)) break if self._use_dynamics: if t < T - 1: x_tp1 = self._dynamics.evolve(x0, u) sample_T.set_X(x_tp1, t=t + 1) if hasattr(self._mpc_policy, '_curr_traj'): self._world.update_visualization( sample_T, self._mpc_policy._curr_traj, t) else: self._logger.info('\t\t\tLasted for t={0}'.format(t)) sample = sample_T.match(slice(0, t + 1)) world_info = self._get_world_info() mpc_info = self._mpc_policy.get_info() if not self._is_good_rollout(sample, t): self._logger.warning( '\t\t\tNot good rollout. Repeating rollout.'.format(t)) continue samples.append(sample) world_infos.append(world_info) mpc_infos.append(mpc_info) assert (samples[-1].isfinite()) elapsed = time.time() - start self._logger.info( '\t\t\tFinished cond {0} rep {1} ({2:.1f}s, {3:.3f}x real-time)' .format(cond, rep, elapsed, t * params['dt'] / elapsed)) rep += 1 self._itr_save_samples(itr, samples) self._itr_save_worlds(itr, world_infos) self._itr_save_mpcs(itr, mpc_infos) self._reset_world(itr, 0, 0) # leave the world as it was
def _evaluate_probcoll_model(self): ### get prediction model evaluations for all iterations itr = 0 itrs_Xs, itrs_prob_means, itrs_prob_stds, itrs_costs = [], [], [], [] while True: ### load NN model_file = self._itr_model_file(itr) if not os.path.exists(model_file): break bootstrap = ProbcollModelPointquad(read_only=True, finalize=False) bootstrap.load(model_file=model_file) wd = self._load_world_file(itr, 0, 0) self._world.env.clear() for pose, radius, height in zip(wd['cyl_poses'], wd['cyl_radii'], wd['cyl_heights']): self._world.env.add_cylinder(pose, radius, height) for pose, extents in zip(wd['box_poses'], wd['box_extents']): self._world.env.add_box(pose, extents) default_sample = Sample(meta_data=params, T=bootstrap.T) for k, v in params['probcoll']['conditions']['default'].items(): default_sample.set_X(v, t=slice(0, bootstrap.T), sub_state=k) ### evaluate on grid dx, dy = 30, 30 positions = np.dstack( np.meshgrid(np.linspace(-0.5, 3.0, dx), np.linspace(1.0, -1.0, dy))).reshape(dx * dy, 2) samples = [] for pos in positions: s = copy.deepcopy(default_sample) pos = np.concatenate( (pos, [s.get_X(t=0, sub_state='position')[-1]])) s.set_X(pos, t=slice(0, bootstrap.T), sub_state='position') s.set_O(self._agent.get_observation(s.get_X(t=0), noise=False), t=slice(0, bootstrap.T)) s.set_U(np.zeros((bootstrap.T, bootstrap.dU)), t=slice(0, bootstrap.T)) samples.append(s) cp_params = params['probcoll']['cost_probcoll'] eval_cost = cp_params['eval_cost'] pre_activation = cp_params['pre_activation'] probs_mean_batch, probs_std_batch = bootstrap.eval_sample_batch( samples, num_avg=10, pre_activation=pre_activation) bootstrap.close() sigmoid = lambda x: (1. / (1. + np.exp(-x))) costs = [] for probs_mean, probs_std in zip(probs_mean_batch, probs_std_batch): cost = 0. for t in [bootstrap.T - 1]: speed = 1. cost += eval(eval_cost) costs.append(cost / bootstrap.T) costs = np.array(costs) # if itr == 3: # import IPython; IPython.embed() Xs = np.array([ s.get_X(t=0, sub_state='position')[:2] for s in samples ]).reshape(dx, dy, 2) itrs_Xs.append(Xs) if pre_activation: probs_mean_batch = sigmoid(probs_mean_batch) # probs_std_batch = sigmoid(probs_std_batch) probs_std_batch /= probs_std_batch.max() itrs_prob_means.append( probs_mean_batch.max(axis=1).reshape(dx, dy)) itrs_prob_stds.append(probs_std_batch.max(axis=1).reshape(dx, dy)) itrs_costs.append(costs.reshape(dx, dy)) # if itr >= 3: # break itr += 1 return itrs_Xs, itrs_prob_means, itrs_prob_stds, itrs_costs
def _evaluate_probcoll_model_groundtruth(self): """ For each primitive, execute the primitive for T timesteps to obtain ground truth, and compare label """ ### get prediction model evaluations for all iterations itr = 0 itrs_gt_crashes, itrs_pred_crashes = [], [] positions, speeds_angles = None, None while True: ### load NN model_file = self._itr_model_file(itr) if not os.path.exists(model_file): break bootstrap = ProbcollModelPointquad(read_only=True, finalize=False) bootstrap.load(model_file=model_file) wd = self._load_world_file(itr, 0, 0) self._world.env.clear() for pose, radius, height in zip(wd['cyl_poses'], wd['cyl_radii'], wd['cyl_heights']): self._world.env.add_cylinder(pose, radius, height) for pose, extents in zip(wd['box_poses'], wd['box_extents']): self._world.env.add_box(pose, extents) ### create MPC cp_params = params['probcoll']['cost'] cost_cp = CostProbcollPointquad( bootstrap, None, weight=float(cp_params['weight']), eval_cost=cp_params['eval_cost'], pre_activation=cp_params['pre_activation']) mpc_policy = PrimitivesMPCPolicyPointquad(self._trajopt, cost_cp, additional_costs=[], meta_data=params, use_threads=False, plot=False) default_sample = Sample(meta_data=params, T=bootstrap.T) for k, v in params['probcoll']['conditions']['default'].items(): default_sample.set_X(v, t=slice(0, bootstrap.T), sub_state=k) ### speeds/angles speeds_angles = [(p.speed, p.theta) for p in mpc_policy._mpc_policies] positions = [] ### evaluate on grid dx, dy = 12, 5 rollouts = [] for i, xpos in enumerate(np.linspace(-0.5, 1.5, dx)): for j, ypos in enumerate(np.linspace(0.6, -0.6, dy)): s = copy.deepcopy(default_sample) pos = [xpos, ypos, s.get_X(t=0, sub_state='position')[-1]] s.set_X(pos, t=slice(0, bootstrap.T), sub_state='position') s.set_O(self._agent.get_observation(s.get_X(t=0), noise=False), t=slice(0, bootstrap.T)) if np.sum(s.get_O(sub_obs='collision', t=0)) > 0: continue positions.append((xpos, ypos)) x0 = s.get_X(t=0) rollouts_ij = [ self._agent.sample_policy(x0, p, noise=ZeroNoise(params), T=bootstrap.T) for p in mpc_policy._mpc_policies ] rollouts.append(rollouts_ij) rollouts = np.array(rollouts) gt_crashes = np.array([ r.get_O(sub_obs='collision').sum() > 0 for r in rollouts.ravel() ]).reshape(rollouts.shape) sigmoid = lambda x: 1. / (1. + np.exp(-x)) pred_crashes = np.array([ sigmoid(mean) > 0.5 for mean in bootstrap.eval_sample_batch( rollouts.ravel(), num_avg=10, pre_activation=True)[0] ]).reshape(rollouts.shape) bootstrap.close() itrs_gt_crashes.append(gt_crashes) itrs_pred_crashes.append(pred_crashes) itr += 1 return itrs_gt_crashes, itrs_pred_crashes, positions, speeds_angles
def _evaluate_prediction_trajectory_model(self): ### get prediction model evaluations for all iterations itr = 0 itrs_Xs, itrs_thetas, itrs_speeds, itrs_prob_means, itrs_prob_stds, itrs_costs = [], [], [], [], [], [] while True: ### load NN model_file = self._itr_model_file(itr) if not os.path.exists(model_file): break bootstrap = ProbcollModelPointquad(read_only=True, finalize=False) bootstrap.load(model_file=model_file) wd = self._load_world_file(itr, 0, 0) self._world.env.clear() for pose, radius, height in zip(wd['cyl_poses'], wd['cyl_radii'], wd['cyl_heights']): self._world.env.add_cylinder(pose, radius, height) for pose, extents in zip(wd['box_poses'], wd['box_extents']): self._world.env.add_box(pose, extents) ### create MPC cp_params = params['probcoll']['cost_probcoll'] cost_cp = CostProbcollPointquad( bootstrap, None, weight=float(cp_params['weight']), eval_cost=cp_params['eval_cost'], pre_activation=cp_params['pre_activation']) mpc_policy = PrimitivesMPCPolicyPointquad(self._trajopt, cost_cp, additional_costs=[], meta_data=params, use_threads=False, plot=False) default_sample = Sample(meta_data=params, T=bootstrap.T) for k, v in params['probcoll']['conditions']['default'].items(): default_sample.set_X(v, t=slice(0, bootstrap.T), sub_state=k) ### evaluate on grid dx, dy = 20, 20 samples = [] itr_costs = [] thetas, speeds = [], [] for i, xpos in enumerate(np.linspace(-0.5, 1.5, dx)): for j, ypos in enumerate(np.linspace(0.6, -0.6, dy)): s = copy.deepcopy(default_sample) pos = [xpos, ypos, s.get_X(t=0, sub_state='position')[-1]] s.set_X(pos, t=slice(0, bootstrap.T), sub_state='position') s.set_O(self._agent.get_observation(s.get_X(t=0), noise=False), t=slice(0, bootstrap.T)) x = s.get_X(t=0) o = s.get_O(t=0) # assume straight line trajectories p_samples = [] p_speeds, p_thetas = [], [] for primitive in mpc_policy._mpc_policies: p_speeds.append(primitive.speed) p_thetas.append(primitive.theta) primitive.act(x, o, t=0, noise=ZeroNoise(params)) p_samples.append(primitive._curr_traj) prim_costs = mpc_policy._primitives_cost(p_samples) min_cost_idx = np.argmin(prim_costs) samples.append(p_samples[min_cost_idx]) itr_costs.append(prim_costs[min_cost_idx]) thetas.append(p_thetas[min_cost_idx]) speeds.append(p_speeds[min_cost_idx]) ### evaluate on grid cp_params = params['probcoll']['cost_probcoll'] eval_cost = cp_params['eval_cost'] pre_activation = cp_params['pre_activation'] probs_mean_batch, probs_std_batch = bootstrap.eval_sample_batch( samples, num_avg=10, pre_activation=pre_activation) bootstrap.close() sigmoid = lambda x: (1. / (1. + np.exp(-x))) # costs = [] # for probs_mean, probs_std in zip(probs_mean_batch, probs_std_batch): # cost = 0. # for t in [bootstrap.T - 1]: # speed = 1. # cost += eval(eval_cost) # costs.append(cost / bootstrap.T) # costs = np.array(costs) itrs_Xs.append( np.array( [s.get_X(t=0, sub_state='position')[:2] for s in samples])) itrs_thetas.append(np.array(thetas)) itrs_speeds.append( np.array(speeds) / (params['trajopt']['cost_velocity']['velocity'][0] * dx * dy)) # normalize so fits into grid density # itrs_speeds.append(np.ones(len(speeds), dtype=float) / (dx * dy)) # TODO if pre_activation: probs_mean_batch = sigmoid(probs_mean_batch) # probs_std_batch = sigmoid(probs_std_batch) probs_std_batch /= probs_std_batch.max() itrs_prob_means.append(np.array(probs_mean_batch)) itrs_prob_stds.append(np.array(probs_std_batch)) itrs_costs.append(np.array(itr_costs)) itr += 1 # if itr >= 2: # TODO # break return itrs_Xs, itrs_thetas, itrs_speeds, itrs_prob_means, itrs_prob_stds, itrs_costs
def _run_testing(self, itr): if (itr != 0 and (itr == self._max_iter - 1 \ or itr % params['world']['testing']['itr_freq'] == 0)): self._logger.info('Itr {0} testing'.format(itr)) if self._agent.sim: # if self._async_on: # self._logger.debug('Recovering probcoll model') # self._probcoll_model.recover() T = params['probcoll']['T'] conditions = params['world']['testing']['positions'] samples = [] reset_pos, reset_quat = self._agent.get_sim_state_data() for cond in xrange(len(conditions)): self._logger.info('\t\tTesting cond {0} itr {1}'.format( cond, itr)) start = time.time() self._agent.execute_control(None, reset=True, pos=conditions[cond]) x0 = self._conditions.get_cond(0) sample_T = Sample(meta_data=params, T=T) sample_T.set_X(x0, t=0) for t in xrange(T): x0 = sample_T.get_X(t=t) rollout, rollout_no_noise = self._agent.sample_policy( x0, self._mpc_policy, T=1, use_noise=False) o = rollout.get_O(t=0) u = rollout_no_noise.get_U(t=0) sample_T.set_U(u, t=t) sample_T.set_O(o, t=t) if not self._use_dynamics: sample_T.set_X(rollout.get_X(t=0), t=t) if self._world.is_collision(sample_T, t=t): self._logger.warning( '\t\t\tCrashed at t={0}'.format(t)) break if self._use_dynamics: if t < T - 1: x_tp1 = self._dynamics.evolve(x0, u) sample_T.set_X(x_tp1, t=t + 1) else: self._logger.info('\t\t\tLasted for t={0}'.format(t)) sample = sample_T.match(slice(0, t + 1)) if not self._is_good_rollout(sample, t): self._logger.warning( '\t\t\tNot good rollout. Repeating rollout.'. format(t)) continue samples.append(sample) assert (samples[-1].isfinite()) elapsed = time.time() - start self._logger.info( '\t\t\tFinished cond {0} of testing ({1:.1f}s, {2:.3f}x real-time)' .format(cond, elapsed, t * params['dt'] / elapsed)) self._itr_save_samples(itr, samples, prefix='testing_') self._agent.execute_control(None, reset=True, pos=reset_pos, quat=reset_quat) else: pass