示例#1
0
        def augment_fn(states: list):
            state = prepare(states)
            image = utils.to_float(state['state_image'])

            if alpha > 0.0:
                # Color Jitter:
                if aug.tf_chance(seed=seed) < alpha:
                    image = rl.augmentations.simclr.color_jitter(image, strength=alpha, seed=seed)

                # blur
                if aug.tf_chance(seed=seed) < 0.25 * alpha:
                    blur_size = 3 if aug.tf_chance(seed=seed) >= 0.5 else 5
                    image = aug.tf_gaussian_blur(image, size=blur_size, seed=seed)

                # noise
                if aug.tf_chance(seed=seed) < 0.2 * alpha:
                    image = aug.tf_salt_and_pepper_batch(image, amount=0.1)

                if aug.tf_chance(seed=seed) < 0.33 * alpha:
                    image = aug.tf_gaussian_noise_batch(image, amount=0.10, std=0.075, seed=seed)

                image = aug.tf_normalize_batch(image)

                # cutout
                if aug.tf_chance(seed=seed) < 0.15 * alpha:
                    image = aug.tf_cutout_batch(image, size=6, seed=seed)

                # coarse dropout
                if aug.tf_chance(seed=seed) < 0.15 * alpha:
                    image = aug.tf_coarse_dropout_batch(image, size=81, amount=0.04, seed=seed)

            state['state_image'] = image
            return state
示例#2
0
    def filter_throttle(s, a, r):
        mask = a[:, 0] >= 0.0

        s = {k: utils.to_float(v)[mask] for k, v in s.items()}

        return s, a[mask], r[tf.concat([mask, [True]], axis=0)]
示例#3
0
def explore_traces(traces_dir: str, amount=64, seed=None):
    import tensorflow as tf

    amounts = dict(left=amount, right=amount, center=amount)

    def filter_throttle(s, a, r):
        mask = a[:, 0] >= 0.0

        s = {k: utils.to_float(v)[mask] for k, v in s.items()}

        return s, a[mask], r[tf.concat([mask, [True]], axis=0)]

    def shuffle_trace(s: dict, a, r):
        indices = tf.range(start=0, limit=tf.shape(a)[0], dtype=tf.int32)
        indices = tf.random.shuffle(indices)

        for k, v in s.items():
            s[k] = tf.gather(v, indices)

        a = tf.gather(a, indices)
        r = tf.gather(r, tf.concat([indices, [tf.shape(r)[0] - 1]], axis=0))

        return s, a, r

    def mask_reward(r, mask):
        return r[tf.concat([mask, [True]], axis=0)]

    def filter_steering(s, a, r, t=0.1):
        masks = dict(left=a[:, 1] <= -t,
                     right=a[:, 1] >= t,
                     center=(a[:, 1] > -t) & (a[:, 1] < t))

        filtered_data = []

        for k in ['left', 'center', 'right']:
            mask = masks[k]
            taken = int(min(amounts[k], tf.reduce_sum(tf.cast(mask,
                                                              tf.int32))))
            amounts[k] -= taken

            filtered_data.append(
                dict(state={k: v[mask][:taken]
                            for k, v in s.items()},
                     action=a[mask][:taken],
                     reward=mask_reward(r, mask)[:taken]))
        return filtered_data

    random.seed(seed)
    data = None

    while sum(map(lambda k_: amounts[k_], amounts)) > 0:
        for j, trace in enumerate(utils.load_traces(traces_dir)):
            print(f'trace-{j}')
            print('amounts:', amounts)
            state, action, reward, _ = utils.unpack_trace(trace)
            state, action, reward = filter_throttle(state,
                                                    utils.to_float(action),
                                                    reward)
            state, action, reward = shuffle_trace(state, action, reward)
            f_data = filter_steering(state, action, reward)

            if data is None:
                data = f_data
            else:
                for i, d in enumerate(f_data):
                    data[i]['state'] = utils.concat_dict_tensor(
                        data[i]['state'], d['state'])
                    data[i]['action'] = tf.concat(
                        [data[i]['action'], d['action']], axis=0)
                    data[i]['reward'] = tf.concat(
                        [data[i]['reward'], d['reward']], axis=0)

            if sum(map(lambda k_: amounts[k_], amounts)) <= 0:
                break

    for i, d in enumerate(data):
        print(i, d['action'].shape)

    d = dict(state=utils.concat_dict_tensor(*list(d['state'] for d in data)),
             action=tf.concat(list(d['action'] for d in data), axis=0),
             reward=tf.concat(list(d['reward'] for d in data), axis=0))

    breakpoint()
示例#4
0
    def imitation_objective(self, batch, validation=False):
        """Imitation learning objective with `concordance loss` (i.e. a loss that encourages the network to make
           consistent predictions among augmented and non-augmented batches of data)
        """
        states, aug_states, speed, similarity = batch

        true_actions = utils.to_float(states['action'])
        true_values = states['value']

        # prediction on NON-augmented and AUGMENTED states
        policy, value = self.network.imitation_predict(states)
        policy_aug, value_aug = self.network.imitation_predict(aug_states)

        # actions, values, speed, and similarities
        actions, actions_aug = utils.to_float(policy['actions']), utils.to_float(policy_aug['actions'])
        values, values_aug = value['value'], value_aug['value']
        pi_speed, pi_speed_aug = policy['speed'], policy_aug['speed']
        v_speed, v_speed_aug = value['speed'], value_aug['speed']
        pi_similarity, pi_similarity_aug = policy['similarity'], policy_aug['similarity']
        v_similarity, v_similarity_aug = value['similarity'], value_aug['similarity']

        if not validation:
            self.log_actions(actions_pred_imitation=actions, actions_pred_aug_imitation=actions_aug)
            self.log(values_pred_imitation=values, values_pred_aug_imitation=values_aug,
                     speed_pi=pi_speed, speed_pi_aug=pi_speed_aug, speed_v=v_speed, speed_v_aug=v_speed_aug,
                     similarity_pi=pi_similarity, similarity_pi_aug=pi_similarity_aug,
                     similarity_v=v_similarity, similarity_v_aug=v_similarity_aug)

        # loss policy = sum of per-action MAE error
        loss_policy = (tf.reduce_mean(tf.reduce_sum(tf.abs(true_actions - actions), axis=1)) +
                       tf.reduce_mean(tf.reduce_sum(tf.abs(true_actions - actions_aug), axis=1))) / 2.0

        loss_value = (tf.reduce_mean(losses.MSE(y_true=true_values, y_pred=values)) +
                      tf.reduce_mean(losses.MSE(y_true=true_values, y_pred=values_aug))) / 2.0

        loss_speed_policy = (tf.reduce_mean(losses.MSE(y_true=speed, y_pred=pi_speed)) +
                             tf.reduce_mean(losses.MSE(y_true=speed, y_pred=pi_speed_aug))) / 2.0
        loss_speed_value = (tf.reduce_mean(losses.MSE(y_true=speed, y_pred=v_speed)) +
                            tf.reduce_mean(losses.MSE(y_true=speed, y_pred=v_speed_aug))) / 2.0

        loss_similarity_policy = (tf.reduce_mean(losses.MSE(y_true=similarity, y_pred=pi_similarity)) +
                                  tf.reduce_mean(losses.MSE(y_true=similarity, y_pred=pi_similarity_aug))) / 2.0
        loss_similarity_value = (tf.reduce_mean(losses.MSE(y_true=similarity, y_pred=v_similarity)) +
                                 tf.reduce_mean(losses.MSE(y_true=similarity, y_pred=v_similarity_aug))) / 2.0

        # concordance loss: make both prediction be close as possible
        concordance_policy = (tf.reduce_mean(losses.MSE(actions, actions_aug)) +
                              tf.reduce_mean(losses.MSE(pi_speed, pi_speed_aug)) +
                              tf.reduce_mean(losses.MSE(pi_similarity, pi_similarity_aug))) / 3.0

        concordance_value = (tf.reduce_mean(losses.MSE(values, values_aug)) +
                             tf.reduce_mean(losses.MSE(v_speed, v_speed_aug)) +
                             tf.reduce_mean(losses.MSE(v_similarity, v_similarity_aug))) / 3.0

        # total loss
        total_loss_policy = \
            loss_policy + self.aux * (loss_speed_policy + loss_similarity_policy) + self.delta * concordance_policy
        total_loss_value = \
            loss_value + self.aux * (loss_speed_value + loss_similarity_value) + self.eta * concordance_value

        if not validation:
            self.log(loss_policy=loss_policy, loss_value=loss_value, loss_speed_policy=loss_speed_policy,
                     loss_similarity_policy=loss_similarity_policy, loss_speed_value=loss_speed_value,
                     loss_similarity_value=loss_similarity_value,
                     loss_concordance_policy=concordance_policy, loss_concordance_value=concordance_value,
                     # loss_steer=steer_penalty, loss_throttle=throttle_penalty, loss_entropy=entropy_penalty
            )

        return total_loss_policy, total_loss_value
示例#5
0
    def imitation_prepare_data(self, batch_size: int, traces_dir: str, num_traces: int, shuffle=False,
                               offset=0) -> (dict, int):
        """Loads data from traces, and builds a batch with balanced actions (e.g. same amount of left and right
           steering etc.)
        """
        def filter_throttle(s, a, r):
            mask = a[:, 0] >= 0.0

            s = {_k: utils.to_float(v)[mask] for _k, v in s.items()}

            return s, a[mask], r[tf.concat([mask, [True]], axis=0)]

        def shuffle_trace(s: dict, a, r):
            indices = tf.range(start=0, limit=tf.shape(a)[0], dtype=tf.int32)
            indices = tf.random.shuffle(indices)

            for _k, v in s.items():
                s[_k] = tf.gather(v, indices)

            a = tf.gather(a, indices)
            r = tf.gather(r, tf.concat([indices, [tf.shape(r)[0] - 1]], axis=0))

            return s, a, r

        def mask_reward(r, mask):
            return r[tf.concat([mask, [True]], axis=0)]

        def filter_steering(s, a, r, t=0.1):
            masks = dict(left=a[:, 1] <= -t,
                         right=a[:, 1] >= t,
                         center=(a[:, 1] > -t) & (a[:, 1] < t))

            filtered_data = []

            for k in ['left', 'center', 'right']:
                mask = masks[k]
                taken = int(min(amounts[k], tf.reduce_sum(tf.cast(mask, tf.int32))))
                amounts[k] -= taken

                filtered_data.append(dict(state={k: v[mask][:taken] for k, v in s.items()},
                                          action=a[mask][:taken],
                                          reward=mask_reward(r, mask)[:taken]))
            return filtered_data

        amounts = dict(left=batch_size, right=batch_size, center=batch_size)
        data = None
        k = offset

        while sum(map(lambda k_: amounts[k_], amounts)) > 0:
            for j, trace in enumerate(utils.load_traces(traces_dir, max_amount=num_traces, shuffle=shuffle,
                                                        offset=0 if self.seed is None else offset)):
                k += 1
                trace = utils.unpack_trace(trace, unpack=False)

                states, actions = trace['state'], utils.to_float(trace['action'])
                rewards = utils.to_float(trace['reward'])
                states['speed'] = utils.to_tensor(trace['info_speed'], expand_axis=-1)
                states['similarity'] = utils.to_tensor(trace['info_similarity'], expand_axis=-1)
                states['state_command'] = self.convert_command(states['state_command'])

                # compute (decomposed) returns
                returns = utils.rewards_to_go(rewards, discount=self.gamma)
                states: dict
                states['returns_base'], \
                states['returns_exp'] = tf.map_fn(fn=utils.decompose_number, elems=utils.to_float(returns),
                                                  dtype=(tf.float32, tf.float32))

                states, actions, rewards = filter_throttle(states, actions, rewards)
                states, actions, rewards = shuffle_trace(states, actions, rewards)
                f_data = filter_steering(states, actions, rewards)

                if data is None:
                    data = f_data
                else:
                    for i, d in enumerate(f_data):
                        # for i in left, center, right...
                        data[i]['state'] = utils.concat_dict_tensor(data[i]['state'], d['state'])
                        data[i]['action'] = tf.concat([data[i]['action'], d['action']], axis=0)
                        data[i]['reward'] = tf.concat([data[i]['reward'], d['reward']], axis=0)

                if sum(map(lambda k_: amounts[k_], amounts)) <= 0:
                    break

        # concat left, center, and right parts together
        return dict(state=utils.concat_dict_tensor(*list(d['state'] for d in data)),
                    action=tf.concat(list(d['action'] for d in data), axis=0),
                    reward=tf.concat(list(d['reward'] for d in data), axis=0)), k