Пример #1
0
    def _get_goal(self):
        goal = AttrDict()

        goal.is_turn = np.ravel(False).astype(np.float32)
        goal.turn_goal = np.ravel(0.).astype(np.float32)

        return goal
Пример #2
0
    def get_batch(self, indices=None, is_tf=True):
        if indices is None:
            indices = np.random.choice(self._valid_start_indices[:-self._horizon],
                                       size=self._batch_size)

        sampled_datadict = self._datadict.leaf_apply(
            lambda arr: np.stack([arr[idx:idx+self._horizon+1] for idx in indices], axis=0))

        inputs = AttrDict()
        outputs = AttrDict()
        for key in self._env_spec.names:
            value = sampled_datadict[key]

            if key in self._env_spec.observation_names:
                inputs[key] = value[:, 0]
            elif key in self._env_spec.action_names:
                inputs[key] = value[:, :-1]

            if key in self._env_spec.output_observation_names:
                outputs[key] = value[:, 1:]

        outputs.done = sampled_datadict.done[:, 1:].cumsum(axis=1).astype(bool)

        if is_tf:
            for d in (inputs, outputs):
                d.leaf_modify(lambda x: tf.convert_to_tensor(x))

        return inputs, outputs
Пример #3
0
    def warm_start(self, model, observation, goal):
        assert not tf.executing_eagerly()

        logger.debug('Setting up CEM graph....')
        self._session = tf.get_default_session()
        assert self._session is not None

        ### create placeholders
        self._obs_placeholders = AttrDict()
        for name in self._env_spec.observation_names:
            shape = list(self._env_spec.names_to_shapes[name])
            dtype = tf.as_dtype(self._env_spec.names_to_dtypes[name])
            ph = tf.placeholder(dtype, shape=shape, name=name)
            self._obs_placeholders[name] = ph

        self._goal_placeholders = AttrDict()
        for name, value in goal.leaf_items():
            self._goal_placeholders[name] = tf.placeholder(tf.as_dtype(
                value.dtype),
                                                           shape=value.shape,
                                                           name=name)

        self._get_action_outputs = self._cem(model, self._obs_placeholders,
                                             self._goal_placeholders)

        logger.debug('CEM graph setup complete')
Пример #4
0
    def call(self, inputs, obs_lowd=None, training=False):
        obs_lowd = obs_lowd if obs_lowd is not None else self.get_obs_lowd(
            inputs, training=training)
        outputs = AttrDict(obs_lowd=obs_lowd)
        if training:
            outputs.kernels = tf_utils.get_kernels(self.layers)

        return outputs.freeze()
Пример #5
0
 def get_batch(self):
     """
     Returns:
         inputs (AttrDict)
         outputs (AttrDict)
     """
     raise NotImplementedError
     inputs = AttrDict()
     outputs = AttrDict()
     return inputs, outputs
Пример #6
0
    def __init__(self, params, names_shapes_limits_dtypes=[]):
        names_shapes_limits_dtypes = list(names_shapes_limits_dtypes)
        names_shapes_limits_dtypes += [('done', (1, ), (0, 1), np.bool)]

        self._names_to_shapes = AttrDict()
        self._names_to_limits = AttrDict()
        self._names_to_dtypes = AttrDict()
        for name, shape, limit, dtype in names_shapes_limits_dtypes:
            self._names_to_shapes[name] = shape
            self._names_to_limits[name] = limit
            self._names_to_dtypes[name] = dtype
Пример #7
0
    def _get_outputs(self, inputs, rnn_outputs, training=False):
        # bin the actions
        actions = inputs.commands.turn[..., 0]
        lower, upper = -1., 1.
        edges = np.linspace(lower, upper, self._num_collision_bins + 1).astype(np.float32)
        bins = tf.cast(tfp.stats.find_bins(actions, edges, extend_lower_interval=True, extend_upper_interval=True), tf.int32)

        turn_one_hot = tf.one_hot(bins, depth=self._num_collision_bins, axis=2)

        # for training
        all_pre_logits = rnn_outputs
        all_unscaled_logits = tf.nn.softmax(all_pre_logits, axis=-1)
        all_logits = -5 + 10 * all_unscaled_logits # from -5 to 5
        logits = tf.reduce_sum(all_logits * turn_one_hot, axis=-1)
        logits = logits[..., tf.newaxis] # for compatibility

        # for planning
        probs = tf.nn.sigmoid(logits)

        return AttrDict(
            turn_one_hot=turn_one_hot,
            logits=logits,
            probcoll=probs,

            # debuggin
            all_logits=all_logits
        )
Пример #8
0
    def _get_outputs(self, inputs, obs_lowd, training=False):
        # labels
        horizon = 1
        turn = tf.reshape(inputs.commands.turn[..., :horizon, 0], (-1, ))
        bins = tfp.stats.find_bins(turn,
                                   self._bin_edges,
                                   extend_lower_interval=True,
                                   extend_upper_interval=True)

        # prediction
        logits = obs_lowd
        dist = tf.distributions.Categorical(logits)
        log_probs = dist.log_prob(bins)

        # accuracy
        accs = tf.cast(
            tf.equal(tf.argmax(logits, axis=-1), tf.cast(bins, tf.int64)),
            tf.float32)

        return AttrDict(
            logits=logits,
            log_prob=log_probs,
            acc=accs,
            bin_edges=tf.convert_to_tensor(self._bin_edges),
        )
Пример #9
0
 def _load_hdf5(self, fname):
     self._d = AttrDict()
     with h5py.File(fname, 'r') as f:
         for key in self.keys:
             self._d[key] = np.array(f[key])
             if 'image' in key:
                 self._d[key] = np.array(
                     list(np_utils.uncompress_video(self._d[key])))
Пример #10
0
    def _init_setup(self):
        self._fig, axes = plt.subplots(4, 1, figsize=(8, 20))
        ax_observation, ax_policy, ax_turn, ax_model = axes.ravel()

        self._pyblit = AttrDict(
            observation=self._init_setup_observation(ax_observation),
            policy=self._init_setup_policy(ax_policy),
            turn=self._init_setup_turn(ax_turn),
            model=self._init_setup_model(ax_model))

        self._fig_shown = False
Пример #11
0
    def _unwhiten_and_split(self, tensor_whitened, names):
        lower, upper = self._env_spec.limits(names)
        mean = 0.5 * (lower + upper)
        var = 0.5 * (upper - lower)
        tensor = tensor_whitened * var + mean

        return AttrDict.from_dict({
            k: v
            for k, v in zip(
                names, tf.split(tensor, self._env_spec.dims(names), axis=2))
        })
Пример #12
0
    def _get_obs(self):
        obs = AttrDict()

        for name in set(self._env_spec.observation_names):
            obs[name] = self._data_traverser.get(name, horizon=1)[0]

        original_image = obs.images.front
        desired_shape = self._env_spec.names_to_shapes.images.front
        image = process_image(original_image, desired_shape, image_rectify=True)
        obs.images.front = image

        return obs
Пример #13
0
    def _load_hdf5s(self):
        hdf5_fnames = file_utils.get_files_ending_with(self._hdf5_folders, '.hdf5')

        # initialize to empty lists
        datadict = AttrDict()
        for key in self._env_spec.names:
            datadict[key] = []
        datadict.done = []
        datadict.hdf5_fname = []
        datadict.rollout_timestep = []

        # concatenate each hdf5
        for hdf5_fname in hdf5_fnames:
            logger.debug('Loading ' + hdf5_fname)
            with h5py.File(hdf5_fname, 'r') as f:
                hdf5_names = file_utils.get_hdf5_leaf_names(f)
                hdf5_lens = np.array([len(f[name]) for name in hdf5_names])
                if len(hdf5_names) == 0:
                    logger.warning('Empty hdf5, skipping!')
                    continue
                if not np.all(hdf5_lens == hdf5_lens[0]):
                    logger.warning('data lengths not all the same, skipping!')
                    continue
                if hdf5_lens[0] == 0:
                    logger.warning('data lengths are 0, skipping!')
                    continue

                for key in self._env_spec.names:
                    assert key in f, '"{0}" not in env space names'.format(key)
                    value = self._parse_hdf5(key, f[key])
                    datadict[key].append(value)
                datadict.done.append([False] * (len(value) - 1) + [True])
                datadict.hdf5_fname.append([hdf5_fname] * len(value))
                datadict.rollout_timestep.append(np.arange(len(value)))

        # turn every value into a single numpy array
        datadict.leaf_modify(lambda arr_list: np.concatenate(arr_list, axis=0))
        datadict_len = len(datadict.done)
        datadict.leaf_assert(lambda arr: len(arr) == datadict_len)
        logger.debug('Dataset length: {}'.format(datadict_len))

        # everywhere not done
        valid_start_indices = np.where(np.logical_not(datadict.done))[0]

        return datadict, valid_start_indices
Пример #14
0
    def get_action(self, model, observation, goal):
        assert self._session is not None

        feed_dict = self._get_action_feed_dict(observation, goal)

        get_action_tf = {}
        for name, tensor in self._get_action_outputs.leaf_items():
            get_action_tf[name] = tensor

        get_action_tf_output = self._session.run(get_action_tf,
                                                 feed_dict=feed_dict)

        get_action = AttrDict.from_dict(get_action_tf_output)
        get_action.cost_fn = self._cost_fn

        return get_action
Пример #15
0
    def _load_hdf5(self, fname):
        self._d = AttrDict()
        self._image_buffer = None
        self._image_key = None
        with h5py.File(fname, 'r') as f:
            for key in self.keys:
                if 'image' in key:
                    assert self._image_key is None
                    self._image_key = key
                    self._image_buffer = np.array(f[key])
                    self._d[key] = list()
                else:
                    self._d[key] = np.array(f[key])

        assert self._image_buffer is not None
        thread = threading.Thread(target=self._background_video_thread)
        thread.daemon = True
        thread.start()
Пример #16
0
 def _init_setup_observation(self, ax):
     imshow = pyblit.Imshow(ax)
     return AttrDict(imshow=imshow, ax=pyblit.Axis(ax, [imshow]))
Пример #17
0
 def _init_setup_turn(self, ax):
     bar = pyblit.Barh(ax)
     return AttrDict(bar=bar, ax=pyblit.Axis(ax, [bar]))
Пример #18
0
 def _init_setup_model(self, ax):
     imshow = pyblit.Imshow(ax)
     return AttrDict(imshow=imshow, ax=pyblit.Axis(ax, [imshow]))
Пример #19
0
 def _init_setup_model(self, ax):
     batch_line = pyblit.BatchLineCollection(ax)
     return AttrDict(batch_line=batch_line,
                     ax=pyblit.Axis(ax, [batch_line]))
Пример #20
0
    def _cem(self, model, observation, goal):
        observation.leaf_modify(lambda v: tf.convert_to_tensor(v))
        goal.leaf_modify(lambda v: tf.convert_to_tensor(v))

        ### get obs lowd
        inputs = observation.leaf_apply(lambda value: value[tf.newaxis])
        obs_lowd = model.get_obs_lowd(inputs)

        ### CEM setup
        inputs = inputs.leaf_filter(lambda key, value: len(value.shape) < 4)
        action_selection_lower_limits = np.tile(self._action_selection_lower_limits, (self._horizon,))
        action_selection_upper_limits = np.tile(self._action_selection_upper_limits, (self._horizon,))
        action_distribution = tf.contrib.distributions.Uniform(
            action_selection_lower_limits,
            action_selection_upper_limits
        )
        # CEM params
        Ms = [self._M_init] + [self._M] * (self._itrs - 1)
        Ks = [self._K] * (self._itrs - 1) + [1]

        ### keep track of
        all_costs = []
        all_costs_per_timestep = []
        all_actions = []
        all_model_outputs = []

        ### CEM loop
        for M, K in zip(Ms, Ks):
            concat_actions = tf.reshape(
                action_distribution.sample((M,)),
                (M, self._horizon, -1)
            )

            concat_actions = tf.clip_by_value(
                concat_actions,
                np.reshape(action_selection_lower_limits, (self._horizon, self._action_dim)),
                np.reshape(action_selection_upper_limits, (self._horizon, self._action_dim))
            )

            actions = self._split_action(concat_actions)

            inputs_tiled = inputs.leaf_filter(lambda k, v: k in self._env_spec.output_observation_names)
            inputs_tiled = inputs_tiled.leaf_apply(lambda v: tf.tile(v, [M] + [1] * (len(v.shape) - 1)))

            for k, v in actions.leaf_items():
                inputs_tiled[k] = v

            obs_lowd_tiled = tf.tile(obs_lowd, (M, 1))

            ### call model and evaluate cost
            model_outputs = model.call(inputs_tiled, obs_lowd=obs_lowd_tiled)
            model_outputs = model_outputs.leaf_filter(lambda key, value: key[0] != '_')
            costs_per_timestep = self._cost_fn(inputs_tiled, model_outputs, goal, actions)
            costs = tf.reduce_mean(costs_per_timestep.total, axis=1)

            ### keep track
            all_costs.append(costs)
            all_costs_per_timestep.append(costs_per_timestep)
            all_actions.append(actions)
            all_model_outputs.append(model_outputs.leaf_filter(lambda k, v: tf.is_tensor(v)))

            ### get top K
            _, top_indices = tf.nn.top_k(-costs, k=K)
            top_actions = tf.gather(
                tf.reshape(concat_actions, [M, self._horizon * self._action_dim]),
                indices=top_indices
            )

            ### set new distribution based on top k
            mean = tf.reduce_mean(top_actions, axis=0)
            covar = tf.matmul(tf.transpose(top_actions), top_actions) / float(K)
            sigma = covar + self._eps * tf.eye(self._horizon * self._action_dim)

            action_distribution = tf.contrib.distributions.MultivariateNormalFullCovariance(
                loc=mean,
                covariance_matrix=sigma
            )

        all_costs = tf.concat(all_costs, axis=0)
        all_costs_per_timestep = AttrDict.leaf_combine_and_apply(all_costs_per_timestep, lambda arrs: tf.concat(arrs, axis=0))
        all_actions = AttrDict.leaf_combine_and_apply(all_actions, lambda arrs: tf.concat(arrs, axis=0))
        all_model_outputs = AttrDict.leaf_combine_and_apply(all_model_outputs, lambda arrs: tf.concat(arrs, axis=0))

        best_idx = tf.argmin(all_costs)
        best_cost = all_costs[best_idx]
        best_cost_per_timestep = all_costs_per_timestep.leaf_apply(lambda v: v[best_idx])
        best_action_sequence = all_actions.leaf_apply(lambda v: v[best_idx])
        best_action = best_action_sequence.leaf_apply(lambda v: v[0])
        # best_model_outputs = all_model_outputs.leaf_apply(lambda v: v[best_idx])

        get_action_outputs = AttrDict(
            cost=best_cost,
            cost_per_timestep=best_cost_per_timestep,
            action=best_action,
            action_sequence=best_action_sequence,
            # model_outputs=best_model_outputs,

            all_costs=all_costs,
            all_costs_per_timestep=all_costs_per_timestep,
            all_actions=all_actions,
            all_model_outputs=all_model_outputs
        )

        return get_action_outputs
Пример #21
0
 def _get_obs(self):
     obs = AttrDict()
     for name in self._env_spec.observation_names:
         obs[name] = self._data_traverser.get(name, horizon=1)
     return obs
Пример #22
0
            lambda arr: np.stack([arr[idx:idx+self._horizon+1] for idx in indices], axis=0))

        inputs = AttrDict()
        outputs = AttrDict()
        for key in self._env_spec.names:
            value = sampled_datadict[key]

            if key in self._env_spec.observation_names:
                inputs[key] = value[:, 0]
            elif key in self._env_spec.action_names:
                inputs[key] = value[:, :-1]

            if key in self._env_spec.output_observation_names:
                outputs[key] = value[:, 1:]

        outputs.done = sampled_datadict.done[:, 1:].cumsum(axis=1).astype(bool)

        if is_tf:
            for d in (inputs, outputs):
                d.leaf_modify(lambda x: tf.convert_to_tensor(x))

        return inputs, outputs

    def __len__(self):
        return len(self._datadict.done)


if __name__ == '__main__':
    from sidewalk.envs.env_spec import EnvSpec
    d = Hdf5Dataset(AttrDict(), EnvSpec(AttrDict()))
Пример #23
0
    def _setup_mppi_graph(self, model, goals):
        ### create placeholders
        obs_placeholders = AttrDict()
        for name in self._env_spec.observation_names:
            shape = list(self._env_spec.names_to_shapes[name])
            dtype = tf.as_dtype(self._env_spec.names_to_dtypes[name])
            ph = tf.placeholder(dtype, shape=shape, name=name)
            obs_placeholders[name] = ph

        goal_placeholders = AttrDict()
        for name, value in goals.leaf_items():
            goal_placeholders[name] = tf.placeholder(tf.as_dtype(value.dtype),
                                                     shape=value.shape,
                                                     name=name)

        mppi_mean_placeholder = tf.placeholder(
            tf.float32,
            name='mppi_mean',
            shape=[self._horizon, self._action_dim])

        ### get obs lowd
        inputs = obs_placeholders.leaf_apply(lambda value: value[tf.newaxis])
        obs_lowd = model.get_obs_lowd(inputs)

        past_mean = mppi_mean_placeholder[0]
        shifted_mean = tf.concat(
            [mppi_mean_placeholder[1:], mppi_mean_placeholder[-1:]], axis=0)

        # sample through time
        delta_limits = 0.5 * (self._action_selection_upper_limits -
                              self._action_selection_lower_limits)
        eps = tf.random_normal(mean=0,
                               stddev=self._sigma * delta_limits,
                               shape=(self._N, self._horizon,
                                      self._action_dim))
        actions = []
        for h in range(self._horizon):
            if h == 0:
                # action_h = self._beta * (shifted_mean[h, :] + eps[:, h, :]) + (1. - self._beta) * past_mean
                action_h = self._beta * (past_mean + eps[:, h, :]) + (
                    1. - self._beta) * past_mean
            else:
                action_h = self._beta * (shifted_mean[h, :] + eps[:, h, :]) + (
                    1. - self._beta) * actions[-1]
            actions.append(action_h)
        actions = tf.stack(actions, axis=1)
        actions = tf.clip_by_value(
            actions, self._action_selection_lower_limits[np.newaxis,
                                                         np.newaxis],
            self._action_selection_upper_limits[np.newaxis, np.newaxis])

        # forward simulate
        actions_split = self._split_action(actions)
        inputs_tiled = inputs.leaf_filter(
            lambda k, v: k in self._env_spec.output_observation_names
        ).leaf_apply(lambda v: tf.tile(v, [self._N] + [1] *
                                       (len(v.shape) - 1)))
        inputs_tiled.combine(actions_split)
        inputs_tiled.freeze()
        obs_lowd_tiled = tf.tile(obs_lowd, (self._N, 1))

        ### call model and evaluate cost
        model_outputs = model.call(inputs_tiled, obs_lowd=obs_lowd_tiled)
        costs_per_timestep = self._cost_fn(inputs_tiled, model_outputs,
                                           goal_placeholders, actions_split)
        costs = tf.reduce_mean(costs_per_timestep.total, axis=1)

        # MPPI update
        scores = -costs
        probs = tf.exp(self._gamma * (scores - tf.reduce_max(scores)))
        probs /= tf.reduce_sum(probs) + 1e-10
        new_mppi_mean = tf.reduce_sum(actions *
                                      probs[:, tf.newaxis, tf.newaxis],
                                      axis=0)

        best_idx = tf.argmin(costs)
        best_actions = self._split_action(new_mppi_mean)

        get_action_outputs = AttrDict(
            cost=costs[best_idx],
            cost_per_timestep=costs_per_timestep.leaf_apply(
                lambda v: v[best_idx]),
            action=best_actions.leaf_apply(lambda v: v[0]),
            action_sequence=best_actions,
            # model_outputs=model_outputs.leaf_filter(lambda k, v: tf.is_tensor(v)).leaf_apply(lambda v: v[best_idx]),
            all_costs=costs,
            all_costs_per_timestep=costs_per_timestep,
            all_actions=actions_split,
            all_model_outputs=model_outputs.leaf_filter(
                lambda k, v: tf.is_tensor(v)),
            mppi_mean=new_mppi_mean,
        ).freeze()

        return obs_placeholders, goal_placeholders, mppi_mean_placeholder, get_action_outputs
Пример #24
0
class StaticCEMPolicy(EagerCEMPolicy):
    @abstract.overrides
    def _init_setup(self):
        super()._init_setup()

        # static graph
        self._session = None
        self._obs_placeholders = None
        self._goal_placeholders = None
        self._get_action_outputs = None

    @abstract.overrides
    def warm_start(self, model, observation, goal):
        assert not tf.executing_eagerly()

        logger.debug('Setting up CEM graph....')
        self._session = tf.get_default_session()
        assert self._session is not None

        ### create placeholders
        self._obs_placeholders = AttrDict()
        for name in self._env_spec.observation_names:
            shape = list(self._env_spec.names_to_shapes[name])
            dtype = tf.as_dtype(self._env_spec.names_to_dtypes[name])
            ph = tf.placeholder(dtype, shape=shape, name=name)
            self._obs_placeholders[name] = ph

        self._goal_placeholders = AttrDict()
        for name, value in goal.leaf_items():
            self._goal_placeholders[name] = tf.placeholder(tf.as_dtype(
                value.dtype),
                                                           shape=value.shape,
                                                           name=name)

        self._get_action_outputs = self._cem(model, self._obs_placeholders,
                                             self._goal_placeholders)

        logger.debug('CEM graph setup complete')

    def _get_action_feed_dict(self, observation, goal):
        feed_dict = {}
        for name, ph in self._obs_placeholders.leaf_items():
            value = np.array(observation[name])
            if value.shape == tuple():
                value = value[np.newaxis]
            feed_dict[ph] = value
        for name, ph in self._goal_placeholders.leaf_items():
            feed_dict[ph] = np.array(goal[name])

        return feed_dict

    @abstract.overrides
    def get_action(self, model, observation, goal):
        assert self._session is not None

        feed_dict = self._get_action_feed_dict(observation, goal)

        get_action_tf = {}
        for name, tensor in self._get_action_outputs.leaf_items():
            get_action_tf[name] = tensor

        get_action_tf_output = self._session.run(get_action_tf,
                                                 feed_dict=feed_dict)

        get_action = AttrDict.from_dict(get_action_tf_output)
        get_action.cost_fn = self._cost_fn

        return get_action
Пример #25
0
 def _get_goal(self):
     goal = AttrDict()
     for name in self._env_spec.goal_names:
         goal[name] = self._data_traverser.get(name, horizon=1)
     return goal
Пример #26
0
 def _split_action(self, actions):
     split_actions = AttrDict()
     for name, tensor in zip(self._env_spec.action_names,
                             tf.split(actions, self._env_spec.dims(self._env_spec.action_names), axis=-1)):
         split_actions[name] = tensor
     return split_actions