def test_dataset_shuffle(self):
     run_length = 10
     dataset = Dataset()
     for run_index in range(3):
         for step_index in range(run_length + run_index):
             dataset.append(
                 Experience(
                     observation=torch.as_tensor((len(dataset), )),
                     action=torch.as_tensor((len(dataset), )),
                     reward=torch.as_tensor((0, )),
                     done=torch.as_tensor(
                         (0, )) if step_index != run_length + run_index - 1
                     else torch.as_tensor((1, ))))
     self.assertEqual(dataset.observations[0].item(), 0)
     dataset.shuffle()
     self.assertEqual(dataset.observations[0], dataset.actions[0])
     self.assertNotEqual(dataset.observations[0].item(), 0)
Пример #2
0
def generate_dataset_by_length(length: int,
                               input_size: tuple = (3, 100, 100),
                               output_size: tuple = (1, ),
                               continuous: bool = True,
                               fixed_input_value: float = None,
                               fixed_output_value: float = None) -> Dataset:
    dataset = Dataset()
    while len(dataset) < length:
        for count, experience in enumerate(
                experience_generator(input_size=input_size,
                                     output_size=output_size,
                                     continuous=continuous,
                                     fixed_input_value=fixed_input_value,
                                     fixed_output_value=fixed_output_value)):
            if experience.done != TerminationType.Unknown:
                dataset.append(experience)
            if len(dataset) >= length:
                break
    return dataset
 def test_dataset_subsample(self):
     run_length = 10
     subsample = 3
     dataset = Dataset()
     for run_index in range(3):
         for step_index in range(run_length + run_index):
             dataset.append(
                 Experience(
                     observation=torch.as_tensor((step_index, )),
                     action=torch.as_tensor((0, )),
                     reward=torch.as_tensor((0, )),
                     done=torch.as_tensor(
                         (0, )) if step_index != run_length + run_index - 1
                     else torch.as_tensor((1, ))))
     dataset.subsample(subsample)
     for exp_index in range(len(dataset)):
         self.assertTrue(
             dataset.observations[exp_index].item() % subsample == 0
             or dataset.done[exp_index].item() == 1)
 def test_dataset_size(self):
     dataset = Dataset()
     dataset.append(
         Experience(observation=torch.as_tensor([0] * 10),
                    action=torch.as_tensor([1] * 3),
                    reward=torch.as_tensor(0),
                    done=torch.as_tensor(2)))
     first_size = dataset.get_memory_size()
     dataset.append(
         Experience(observation=torch.as_tensor([0] * 10),
                    action=torch.as_tensor([1] * 3),
                    reward=torch.as_tensor(0),
                    done=torch.as_tensor(2)))
     self.assertEqual(2 * first_size, dataset.get_memory_size())
     dataset = Dataset()
     dataset.append(
         Experience(observation=torch.as_tensor([0] * 10,
                                                dtype=torch.float32),
                    action=torch.as_tensor([1] * 3, dtype=torch.float32),
                    reward=torch.as_tensor(0, dtype=torch.float32),
                    done=torch.as_tensor(2, dtype=torch.float32)))
     second_size = dataset.get_memory_size()
     self.assertEqual(first_size, 2 * second_size)
class DataSaver:
    def __init__(self, config: DataSaverConfig):
        self._config = config
        self._logger = get_logger(
            name=get_filename_without_extension(__file__),
            output_path=self._config.output_path,
            quiet=False)
        cprint(f'initiate', self._logger)

        if not self._config.saving_directory.startswith('/'):
            self._config.saving_directory = os.path.join(
                os.environ['HOME'], self._config.saving_directory)

        if self._config.store_on_ram_only:
            self._dataset = Dataset(max_size=self._config.max_size)

        # used to keep track of replay buffer size on file system
        if not self._config.store_on_ram_only \
                and os.path.isdir(os.path.dirname(self._config.saving_directory)) \
                and self._config.max_size != -1:
            data_loader = DataLoader(config=DataLoaderConfig().create(
                config_dict={
                    'data_directories': [
                        os.path.join(
                            os.path.dirname(self._config.saving_directory),
                            run) for run in sorted(
                                os.listdir(
                                    os.path.dirname(
                                        self._config.saving_directory)))
                    ],
                    'output_path':
                    self._config.output_path,
                    'store':
                    False  # don't store config
                }))
            data_loader.load_dataset()
            self._frame_counter = len(data_loader.get_dataset())
        else:
            self._frame_counter = 0

    def __len__(self):
        if self._config.store_on_ram_only:
            return len(self._dataset)
        else:
            return self._frame_counter

    def update_saving_directory(self):
        if self._config.separate_raw_data_runs:
            self._config.saving_directory = create_saving_directory(
                self._config.output_path, self._config.saving_directory_tag)

    def get_saving_directory(self):
        return self._config.saving_directory if not self._config.store_on_ram_only else 'ram'

    def get_dataset(self):
        return self._dataset

    def save(self, experience: Experience) -> None:
        if experience.done == TerminationType.Unknown:
            return  # don't save experiences in an unknown state
        if self._config.store_on_ram_only:
            return self._dataset.append(experience)
        else:
            os.makedirs(self._config.saving_directory, exist_ok=True)
            return self._store_in_file_system(experience=experience)

    def _store_in_file_system(self, experience: Experience) -> None:
        for dst, data in zip(['observation', 'action', 'reward', 'done'], [
                experience.observation, experience.action, experience.reward,
                experience.done
        ]):
            if data is not None:
                self._store_frame(data=np.asarray(
                    data.value if isinstance(data, Action) else data),
                                  dst=dst,
                                  time_stamp=experience.time_stamp)

        for key, value in experience.info.items():
            self._store_frame(data=np.asarray(value.value) if isinstance(
                value, Action) else value,
                              dst=f'info_{to_file_name(key)}',
                              time_stamp=experience.time_stamp)

        if experience.done in [
                TerminationType.Success, TerminationType.Failure
        ]:
            os.system(
                f'touch {os.path.join(self._config.saving_directory, experience.done.name)}'
            )
        self._check_dataset_size_on_file_system()

    def _store_frame(self, data: Union[np.ndarray, float], dst: str,
                     time_stamp: int) -> None:
        if not isinstance(data, np.ndarray):
            data = np.asarray(data)
        try:
            if len(data.shape) in [2, 3]:
                if not os.path.isdir(
                        os.path.join(self._config.saving_directory, dst)):
                    os.makedirs(os.path.join(self._config.saving_directory,
                                             dst),
                                exist_ok=True)
                store_image(
                    data=data,
                    file_name=os.path.join(self._config.saving_directory, dst,
                                           timestamp_to_filename(time_stamp)) +
                    '.jpg')
            elif len(data.shape) in [0, 1]:
                store_array_to_file(data=data,
                                    file_name=os.path.join(
                                        self._config.saving_directory,
                                        dst + '.data'),
                                    time_stamp=time_stamp)
        except Exception as e:
            cprint(f'Failed to store frame: {e}',
                   self._logger,
                   msg_type=MessageType.error)

    def _check_dataset_size_on_file_system(self):
        self._frame_counter += 1
        # If number of frames exceed max_size, remove oldest run and decrease frame counter
        if self._frame_counter > self._config.max_size != -1:
            raw_data_dir = os.path.dirname(self._config.saving_directory)
            first_run = sorted(os.listdir(raw_data_dir))[0]
            with open(os.path.join(raw_data_dir, first_run, 'done.data'),
                      'r') as f:
                run_length = len(f.readlines())
            self._frame_counter -= run_length
            shutil.rmtree(os.path.join(raw_data_dir, first_run),
                          ignore_errors=True)
            if not self._config.separate_raw_data_runs:
                cprint(
                    f"Reached max buffer size and removing all data."
                    f"Avoid this by setting data_saver_config.separate_raw_data_runs to True.",
                    msg_type=MessageType.warning,
                    logger=self._logger)

    def _get_runs(self) -> list:
        """
        parse the parent directory of the saving directory for all raw_data runs.
        Return a list of the absolute paths to these runs.
        """
        raw_data_dir = os.path.dirname(self._config.saving_directory)
        return [
            os.path.join(raw_data_dir, run)
            for run in sorted(os.listdir(raw_data_dir))
        ]

    def create_train_validation_hdf5_files(
            self,
            runs: List[str] = None,
            input_size: List[int] = None) -> None:
        all_runs = runs if runs is not None else self._get_runs()

        number_of_training_runs = int(self._config.training_validation_split *
                                      len(all_runs))
        train_runs = all_runs[0:number_of_training_runs]
        validation_runs = all_runs[number_of_training_runs:]

        for file_name, runs in zip(['train', 'validation'],
                                   [train_runs, validation_runs]):
            config = DataLoaderConfig().create(
                config_dict={
                    'data_directories': runs,
                    'output_path': self._config.output_path,
                    'subsample': self._config.subsample_hdf5,
                    'input_size': input_size
                })
            data_loader = DataLoader(config=config)
            data_loader.load_dataset()
            create_hdf5_file_from_dataset(filename=os.path.join(
                self._config.output_path, file_name + '.hdf5'),
                                          dataset=data_loader.get_dataset())
            cprint(f'created {file_name}.hdf5', self._logger)

    def empty_raw_data_in_output_directory(self) -> None:
        raw_data_directory = os.path.dirname(self._config.saving_directory)
        if os.path.isdir(raw_data_directory):
            for d in os.listdir(raw_data_directory):
                shutil.rmtree(os.path.join(raw_data_directory, d))

    def clear_buffer(self) -> None:
        self._frame_counter = 0
        if self._config.store_on_ram_only:
            self._dataset = Dataset()
        else:
            self.empty_raw_data_in_output_directory()

    def remove(self):
        [h.close() for h in self._logger.handlers]
class PhiWeightTest(unittest.TestCase):
    def setUp(self) -> None:
        self.output_dir = f'{os.environ["PWD"]}/test_dir/{get_filename_without_extension(__file__)}'
        os.makedirs(self.output_dir, exist_ok=True)
        self.batch = Dataset()
        self.durations = [10, 1, 5]
        self.step_reward = torch.as_tensor(1)
        self.end_reward = torch.as_tensor(10)
        for episode in range(3):
            for experience in range(self.durations[episode] - 1):
                self.batch.append(
                    Experience(observation=torch.as_tensor(5),
                               action=torch.as_tensor(5),
                               reward=self.step_reward,
                               done=torch.as_tensor(0)))
            self.batch.append(
                Experience(observation=torch.as_tensor(5),
                           action=torch.as_tensor(5),
                           reward=self.end_reward,
                           done=torch.as_tensor(2)))

    def test_get_returns_on_dataset(self):
        returns = get_returns(self.batch)
        targets = [
            self.end_reward + (duration - 1) * self.step_reward
            for duration in self.durations for _ in range(duration)
        ]
        for r_e, r_t in zip(returns, targets):
            self.assertEqual(r_e, r_t)

    def test_get_reward_to_go(self):
        returns = get_reward_to_go(self.batch)
        targets = reversed([
            self.end_reward + t * self.step_reward
            for duration in reversed(self.durations) for t in range(duration)
        ])

        for r_e, r_t in zip(returns, targets):
            self.assertEqual(r_e, r_t)

    def test_generalized_advantage_estimate(self):
        # with gae_lambda == 1 and no value --> same as reward-to-go
        rtg_returns = get_generalized_advantage_estimate(
            batch_rewards=self.batch.rewards,
            batch_done=self.batch.done,
            batch_values=[torch.as_tensor(0.)] * len(self.batch),
            discount=1,
            gae_lambda=1)
        for r_e, r_t in zip(rtg_returns, get_reward_to_go(self.batch)):
            self.assertEqual(r_e, r_t)

        one_step_returns = get_generalized_advantage_estimate(
            batch_rewards=self.batch.rewards,
            batch_done=self.batch.done,
            batch_values=[torch.as_tensor(0.)] * len(self.batch),
            discount=1,
            gae_lambda=0)
        targets = [
            self.step_reward if d == 0 else self.end_reward
            for d in self.batch.done
        ]
        for r_e, r_t in zip(one_step_returns, targets):
            self.assertEqual(r_e, r_t)

        gae_returns = get_generalized_advantage_estimate(
            batch_rewards=self.batch.rewards,
            batch_done=self.batch.done,
            batch_values=[torch.as_tensor(0.)] * len(self.batch),
            discount=0.99,
            gae_lambda=0.99)
        for t in range(len(self.batch)):
            self.assertGreaterEqual(gae_returns[t], one_step_returns[t])
            self.assertLessEqual(gae_returns[t], rtg_returns[t])

    def tearDown(self) -> None:
        shutil.rmtree(self.output_dir, ignore_errors=True)