def get_action(self, model, inputs, goals): assert self._session is not None feed_dict = {} for name, ph in self._obs_placeholders.get_leaf_items(): value = np.array(inputs.get_recursive(name)) if value.shape == tuple(): value = value[np.newaxis] feed_dict[ph] = value for name, ph in self._goal_placeholders.get_leaf_items(): feed_dict[ph] = np.array(goals.get_recursive(name)) feed_dict[self._mppi_mean_placeholder] = self._mppi_mean_np get_action_tf = {} for name, tensor in self._get_action_outputs.get_leaf_items(): get_action_tf[name] = tensor get_action_tf_output = self._session.run(get_action_tf, feed_dict=feed_dict) get_action = AttrDict() for name, value in get_action_tf_output.items(): get_action.add_recursive(name, value) self._mppi_mean_np = get_action.mppi_mean return get_action
def call(self, inputs, obs_lowd=None, training=False): if obs_lowd is None: obs_lowd = [None] * len(self._models) else: obs_lowd = tf.split(obs_lowd, self._obs_lowd_dims, axis=-1) outputs = AttrDict() for model_i, obs_lowd_i in zip(self._models, obs_lowd): outputs_i = model_i(inputs, obs_lowd=obs_lowd_i, training=training) for key, value in outputs_i.get_leaf_items(): outputs.add_recursive(key, value) return outputs
def _split_action(self, action): """ :param action (AttrDict): """ d = AttrDict() idx = 0 for name in self._env_spec.action_names: dim = np.sum(self._env_spec.names_to_shapes.get_recursive(name)) value = action[..., idx:idx + dim] d.add_recursive(name, value) idx += dim return d
def _get_done(self): names = ['collision/any', 'gps/latlong', 'joy'] obs = AttrDict.from_dict(self._jackal_subscriber.get(names=names)) is_collision = obs.collision.any is_close_to_goal = np.linalg.norm( latlong_to_utm(self._goal_latlong) - latlong_to_utm(obs.gps.latlong)) < 2.0 return is_collision or is_close_to_goal
def denormalize(self, inputs): """ :param inputs (AttrDict): :return: AttrDict """ inputs_denormalized = AttrDict() for key, value in inputs.get_leaf_items(): lower, upper = self.names_to_limits.get_recursive(key) lower, upper = np.array(lower), np.array(upper) mean = 0.5 * (lower + upper) std = 0.5 * (upper - lower) value_denormalized = value * std + mean inputs_denormalized.add_recursive(key, value_denormalized) return inputs_denormalized
def _get_position_outputs(self, preprocess_outputs, inputs): assert preprocess_outputs.shape.as_list()[-1] == 3 if self._is_output_gps: batch_size = tf.shape(preprocess_outputs)[0] position = tf.zeros([batch_size, 3]) yaw = -inputs.imu.compass_bearing[:, 0] + 0.5 * np.pi # so that east is 0 degrees else: position = inputs.jackal.position yaw = inputs.jackal.yaw[:, 0] output_positions = rotate_to_global(curr_position=position, curr_yaw=yaw, local_position=preprocess_outputs) outputs = AttrDict() outputs.add_recursive('jackal/position', output_positions) return outputs
def _get_goal(self, obs): goal_utm = latlong_to_utm(self._goal_latlong) goal_utm -= obs.gps.utm goal_utm = np.append(goal_utm, 0.) cost_weights = rospy.get_param( '/cost_weights', { 'collision': 1.0, 'position': 0.0, 'action_magnitude': 0.001, 'action_smooth': 0.0, 'bumpy': 0.8, 'position_sigmoid_center': 0.6, 'position_sigmoid_scale': 100. }) for key, weight in cost_weights.items(): cost_weights[key] = np.ravel(weight).astype(np.float32) return AttrDict(position=goal_utm, cost_weights=AttrDict.from_dict(cost_weights))
def _preprocess_action_inputs(self, inputs): ### separate out actions action_inputs = inputs.filter_recursive( lambda key, value: key in self._env_spec.action_names) # normalization action_inputs.modify_recursive(lambda arr: tf.cast(arr, tf.float32)) normalized_inputs = self._env_spec.normalize(action_inputs) actions = AttrDict() for key in self._env_spec.action_names: value = normalized_inputs.get_recursive(key) if len(value.shape) == 2: value = value[..., tf.newaxis] elif len(value.shape) == 3: pass else: raise ValueError actions.add_recursive(key, value) return actions
def get_batch(self, batch_size, horizon): if self._static_inputs_and_outputs is None: self._static_inputs_and_outputs = self._iterator.get_next() inputs_and_outputs = self._static_inputs_and_outputs inputs = AttrDict() outputs = AttrDict() for key, value in inputs_and_outputs.items(): if key.startswith('inputs/'): inputs.add_recursive(key.replace('inputs/', ''), value) else: outputs.add_recursive(key.replace('outputs/', ''), value) return inputs, outputs
def _get_outputs(self, preprocess_outputs, inputs, denormalize=True): """ Split the outputs into each prediction component and denormalize :param preprocess_outputs (tensor): [batch_size, horizon, dim] :return: AttrDict """ ### split and denormalize outputs_denormalized = AttrDict() start_idx = 0 for output_observation in self._output_observations: name = output_observation.name shape = self._env_spec.names_to_shapes.get_recursive(name) assert len(shape) == 1, 'Can only predict vector quantities' dim = shape[0] outputs_slice_denormalized = preprocess_outputs[ ..., start_idx:start_idx + dim] outputs_denormalized.add_recursive(name, outputs_slice_denormalized) start_idx += dim outputs = self._env_spec.denormalize( outputs_denormalized) if denormalize else outputs_denormalized ### make relative for output_observation in self._output_observations: name = output_observation.name is_relative = output_observation.is_relative if is_relative: value = outputs.get_recursive(name) value += inputs.get_recursive(name)[:, tf.newaxis, :] outputs.add_recursive(name, value) return outputs
def _preprocess_observation_inputs(self, inputs): ### separate out observations obs_inputs = inputs.filter_recursive( lambda key, value: key in self._env_spec.observation_names) # normalization obs_inputs.modify_recursive(lambda arr: tf.cast(arr, tf.float32)) normalized_inputs = self._env_spec.normalize(obs_inputs) obs_ims = AttrDict() obs_vecs = AttrDict() for key, value in normalized_inputs.get_leaf_items(): value = normalized_inputs.get_recursive(key) if len(value.shape) == 1: obs_vecs.add_recursive(key, value[:, tf.newaxis]) elif len(value.shape) == 2: obs_vecs.add_recursive(key, value) elif len(value.shape) == 4: obs_ims.add_recursive(key, value) else: raise ValueError return obs_ims, obs_vecs
def _get_observation(self): obs_names = set(self.spec.observation_names) obs_names.add('gps/utm') obs = AttrDict.from_dict(self._jackal_subscriber.get(names=obs_names)) if 'images/rgb_left' in obs.get_leaf_keys(): obs.images.rgb_left = self.spec.process_image( 'images/rgb_left', obs.images.rgb_left) if 'images/rgb_right' in obs.get_leaf_keys(): obs.images.rgb_right = self.spec.process_image( 'images/rgb_right', obs.images.rgb_right) obs.modify_recursive(lambda v: np.asarray(v)) return obs
def __init__(self, names_shapes_limits_dtypes): names_shapes_limits_dtypes = list(names_shapes_limits_dtypes) names_shapes_limits_dtypes += [('done', (1, ), (0, 1), np.bool)] self._names_to_shapes = AttrDict() self._names_to_limits = AttrDict() self._names_to_dtypes = AttrDict() for name, shape, limit, dtype in names_shapes_limits_dtypes: self._names_to_shapes.add_recursive(name, shape) self._names_to_limits.add_recursive(name, limit) self._names_to_dtypes.add_recursive(name, dtype)
def _setup_mppi_graph(self, model, goals): ### create placeholders obs_placeholders = AttrDict() for name in self._env_spec.observation_names: shape = list(self._env_spec.names_to_shapes.get_recursive(name)) dtype = tf.as_dtype( self._env_spec.names_to_dtypes.get_recursive(name)) ph = tf.placeholder(dtype, shape=shape, name=name) obs_placeholders.add_recursive(name, ph) goal_placeholders = AttrDict() for name, value in goals.get_leaf_items(): goal_placeholders.add_recursive( name, tf.placeholder(tf.as_dtype(value.dtype), shape=value.shape, name=name)) mppi_mean_placeholder = tf.placeholder( tf.float32, name='mppi_mean', shape=[model.horizon, self._action_dim]) ### get obs lowd inputs = obs_placeholders.apply_recursive( lambda value: value[tf.newaxis]) obs_lowd = model.get_obs_lowd(inputs) past_mean = mppi_mean_placeholder[0] shifted_mean = tf.concat( [mppi_mean_placeholder[1:], mppi_mean_placeholder[-1:]], axis=0) # sample through time delta_limits = 0.5 * (self._action_selection_upper_limits - self._action_selection_lower_limits) eps = tf.random_normal(mean=0, stddev=self._sigma * delta_limits, shape=(self._N, model.horizon, self._action_dim)) actions = [] for h in range(model.horizon): if h == 0: action_h = self._beta * (shifted_mean[h, :] + eps[:, h, :]) + ( 1. - self._beta) * past_mean else: action_h = self._beta * (shifted_mean[h, :] + eps[:, h, :]) + ( 1. - self._beta) * actions[-1] actions.append(action_h) actions = tf.stack(actions, axis=1) actions = tf.clip_by_value( actions, self._action_selection_lower_limits[np.newaxis, np.newaxis], self._action_selection_upper_limits[np.newaxis, np.newaxis]) # forward simulate actions_split = self._split_action(actions) inputs_tiled = inputs.apply_recursive( lambda v: tf.tile(v, [self._N] + [1] * (len(v.shape) - 1))) for k, v in actions_split.get_leaf_items(): inputs_tiled.add_recursive(k, v) obs_lowd_tiled = tf.tile(obs_lowd, (self._N, 1)) ### call model and evaluate cost model_outputs = model(inputs_tiled, obs_lowd=obs_lowd_tiled) model_outputs = model_outputs.filter_recursive( lambda key, value: key[0] != '_') costs_per_timestep = self._cost_fn(inputs_tiled, model_outputs, goal_placeholders, actions_split) costs = tf.reduce_mean(costs_per_timestep.total, axis=1) # MPPI update scores = -costs probs = tf.exp(self._gamma * (scores - tf.reduce_max(scores))) probs /= tf.reduce_sum(probs) + 1e-10 new_mppi_mean = tf.reduce_sum(actions * probs[:, tf.newaxis, tf.newaxis], axis=0) best_idx = tf.argmin(costs) best_actions = self._split_action(new_mppi_mean) get_action_outputs = AttrDict( cost=costs[best_idx], cost_per_timestep=costs_per_timestep.apply_recursive( lambda v: v[best_idx]), action=best_actions.apply_recursive(lambda v: v[0]), action_sequence=best_actions, model_outputs=model_outputs.apply_recursive(lambda v: v[best_idx]), all_costs=costs, all_costs_per_timestep=costs_per_timestep, all_actions=actions_split, all_model_outputs=model_outputs, mppi_mean=new_mppi_mean, ) for key, value in get_action_outputs.get_leaf_items(): get_action_outputs.add_recursive( key, tf.identity(value, 'get_action_outputs/' + key)) return obs_placeholders, goal_placeholders, mppi_mean_placeholder, get_action_outputs
class EnvSpec(object): def __init__(self, names_shapes_limits_dtypes): names_shapes_limits_dtypes = list(names_shapes_limits_dtypes) names_shapes_limits_dtypes += [('done', (1, ), (0, 1), np.bool)] self._names_to_shapes = AttrDict() self._names_to_limits = AttrDict() self._names_to_dtypes = AttrDict() for name, shape, limit, dtype in names_shapes_limits_dtypes: self._names_to_shapes.add_recursive(name, shape) self._names_to_limits.add_recursive(name, limit) self._names_to_dtypes.add_recursive(name, dtype) @property def observation_names(self): raise NotImplementedError @property def output_observation_names(self): return self.observation_names @property def action_names(self): raise NotImplementedError @property def names(self): return self.observation_names + self.action_names @property def names_to_shapes(self): return self._names_to_shapes @property def names_to_limits(self): return self._names_to_limits @property def names_to_dtypes(self): return self._names_to_dtypes def dims(self, names): return np.array([ np.sum(self.names_to_shapes.get_recursive(name)) for name in names ]) def dim(self, names): return np.sum(self.dims(names)) def normalize(self, inputs): """ :param inputs (AttrDict): :return: AttrDict """ inputs_normalized = AttrDict() for key, value in inputs.get_leaf_items(): lower, upper = self.names_to_limits.get_recursive(key) lower, upper = np.array(lower), np.array(upper) mean = 0.5 * (lower + upper) std = 0.5 * (upper - lower) value_normalized = (value - mean) / std inputs_normalized.add_recursive(key, value_normalized) return inputs_normalized def denormalize(self, inputs): """ :param inputs (AttrDict): :return: AttrDict """ inputs_denormalized = AttrDict() for key, value in inputs.get_leaf_items(): lower, upper = self.names_to_limits.get_recursive(key) lower, upper = np.array(lower), np.array(upper) mean = 0.5 * (lower + upper) std = 0.5 * (upper - lower) value_denormalized = value * std + mean inputs_denormalized.add_recursive(key, value_denormalized) return inputs_denormalized def process_image(self, name, image): """ Default behavior: resize the image """ if len(image.shape) == 4: return np.array([self.process_image(name, im_i) for im_i in image]) return imresize(image, self.names_to_shapes.get_recursive(name))
def _process_inputs(self, inputs): """ Separate out observations/actions and normalize :param inputs (AttrDict): :return: obs_ims (AttrDict), obs_vecs (AttrDict), actions (AttrDict) """ ### separate out observations/actions # normalization inputs.modify_recursive(lambda arr: tf.cast(arr, tf.float32)) normalized_inputs = self._env_spec.normalize(inputs) obs_ims = AttrDict() obs_vecs = AttrDict() for key in self._env_spec.observation_names: value = normalized_inputs.get_recursive(key) if len(value.shape) == 1: obs_vecs.add_recursive(key, value[:, tf.newaxis]) elif len(value.shape) == 2: obs_vecs.add_recursive(key, value) elif len(value.shape) == 4: obs_ims.add_recursive(key, value) else: raise ValueError actions = AttrDict() for key in self._env_spec.action_names: value = normalized_inputs.get_recursive(key) if len(value.shape) == 2: value = value[..., tf.newaxis] elif len(value.shape) == 3: pass else: raise ValueError actions.add_recursive(key, value) return obs_ims, obs_vecs, actions