def reset(self) -> Tuple[Experience, np.ndarray]: """ reset gazebo, reset fsm, wait till fsm in 'running' state return experience without reward or action """ cprint(f'resetting', self._logger) self._reset_filters() self._step = 0 self._return = 0 if self._config.ros_config.ros_launch_config.gazebo: self._reset_gazebo() self._reset_publisher.publish(Empty()) self._clear_experience_values() while self.fsm_state != FsmState.Running \ or self.observation is None \ or self.terminal_state is None \ or self.terminal_state is TerminationType.Unknown: self._run_shortly() self.observation = self._filter_observation(self.observation) self._current_experience = Experience( done=deepcopy(self.terminal_state), observation=deepcopy(self.observation), time_stamp=int(rospy.get_time() * 10**3), info={}) self._previous_observation = deepcopy(self.observation) return self._current_experience, deepcopy(self.observation)
def reset(self) -> Tuple[Experience, np.ndarray]: self._reset_filters() observation = self._gym.reset() observation = self._filter_observation(observation) self._step_count = 0 self._return = 0 self.previous_observation = observation.copy() return Experience(done=TerminationType.NotDone, ), observation
def setUp(self) -> None: self.output_dir = f'{os.environ["PWD"]}/test_dir/{get_filename_without_extension(__file__)}' os.makedirs(self.output_dir, exist_ok=True) self.batch = Dataset() self.durations = [10, 1, 5] self.step_reward = torch.as_tensor(1) self.end_reward = torch.as_tensor(10) for episode in range(3): for experience in range(self.durations[episode] - 1): self.batch.append( Experience(observation=torch.as_tensor(5), action=torch.as_tensor(5), reward=self.step_reward, done=torch.as_tensor(0))) self.batch.append( Experience(observation=torch.as_tensor(5), action=torch.as_tensor(5), reward=self.end_reward, done=torch.as_tensor(2)))
def test_dataset_size(self): dataset = Dataset() dataset.append( Experience(observation=torch.as_tensor([0] * 10), action=torch.as_tensor([1] * 3), reward=torch.as_tensor(0), done=torch.as_tensor(2))) first_size = dataset.get_memory_size() dataset.append( Experience(observation=torch.as_tensor([0] * 10), action=torch.as_tensor([1] * 3), reward=torch.as_tensor(0), done=torch.as_tensor(2))) self.assertEqual(2 * first_size, dataset.get_memory_size()) dataset = Dataset() dataset.append( Experience(observation=torch.as_tensor([0] * 10, dtype=torch.float32), action=torch.as_tensor([1] * 3, dtype=torch.float32), reward=torch.as_tensor(0, dtype=torch.float32), done=torch.as_tensor(2, dtype=torch.float32))) second_size = dataset.get_memory_size() self.assertEqual(first_size, 2 * second_size)
def test_dataset_shuffle(self): run_length = 10 dataset = Dataset() for run_index in range(3): for step_index in range(run_length + run_index): dataset.append( Experience( observation=torch.as_tensor((len(dataset), )), action=torch.as_tensor((len(dataset), )), reward=torch.as_tensor((0, )), done=torch.as_tensor( (0, )) if step_index != run_length + run_index - 1 else torch.as_tensor((1, )))) self.assertEqual(dataset.observations[0].item(), 0) dataset.shuffle() self.assertEqual(dataset.observations[0], dataset.actions[0]) self.assertNotEqual(dataset.observations[0].item(), 0)
def test_dataset_subsample(self): run_length = 10 subsample = 3 dataset = Dataset() for run_index in range(3): for step_index in range(run_length + run_index): dataset.append( Experience( observation=torch.as_tensor((step_index, )), action=torch.as_tensor((0, )), reward=torch.as_tensor((0, )), done=torch.as_tensor( (0, )) if step_index != run_length + run_index - 1 else torch.as_tensor((1, )))) dataset.subsample(subsample) for exp_index in range(len(dataset)): self.assertTrue( dataset.observations[exp_index].item() % subsample == 0 or dataset.done[exp_index].item() == 1)
def load_run(directory: str, arrange_according_to_timestamp: bool = False, input_size: List[int] = None, scope: str = 'default') -> List[Experience]: run = {} time_stamps = {} for x in os.listdir(directory): #try: k = x if not x.endswith('.data') else x[:-5] time_stamps[x], run[k] = load_data(x, directory, size=input_size if k == 'observation' else None, scope=scope if k == 'observation' else None) #except: # pass if arrange_according_to_timestamp: run = arrange_run_according_timestamps(run, time_stamps) if len(run.keys()) == 0: return [] else: return [Experience( observation=run['observation'][index] if 'observation' in run.keys() else None, action=run['action'][index] if 'action' in run.keys() else None, reward=run['reward'][index] if 'reward' in run.keys() else None, done=run['done'][index] if 'done' in run.keys() else None ) for index in range(len(run['observation']))]
def step(self, action: Action) -> Tuple[Experience, np.ndarray]: self._step_count += 1 observation, unfiltered_reward, done, info = self._gym.step( action.value) observation = self._filter_observation(observation) reward = self._filter_reward(unfiltered_reward) info['unfiltered_reward'] = unfiltered_reward self._return += unfiltered_reward terminal = TerminationType.Done if done or self._step_count >= self._config.max_number_of_steps != -1 \ else TerminationType.NotDone if terminal == TerminationType.Done: info['return'] = self._return experience = Experience(done=terminal, observation=self.previous_observation.copy(), action=action, reward=reward, time_stamp=self._step_count, info=info) if self._config.gym_config.render: self._gym.render() self.previous_observation = observation.copy() return experience, observation.copy()
def experience_generator(input_size: tuple = (3, 100, 100), output_size: tuple = (1, ), continuous: bool = True, fixed_input_value: float = None, fixed_output_value: float = None): starting = 5 running = np.random.randint(10, 12) ending = 1 for step in range(starting + running + ending): experience = Experience(info={}) if step < starting: experience.done = TerminationType.Unknown elif starting <= step < starting + running: experience.done = TerminationType.NotDone else: experience.done = TerminationType.Success experience.time_stamp = step experience.observation = np.random.randint(0, 255, size=input_size, dtype=np.uint8) \ if fixed_input_value is None else fixed_input_value if fixed_output_value is not None: experience.action = np.asarray(fixed_output_value) else: if continuous: experience.action = np.random.random(output_size) else: assert len(output_size) == 1 probabilities = [8] probabilities += [1] * (output_size[0] - 1) probabilities = [p / sum(probabilities) for p in probabilities] experience.action = np.asarray( [np.argmax(np.random.multinomial(1, probabilities))]) experience.reward = np.random.normal() yield experience
def _update_current_experience(self) -> bool: """ If all experience fields are updated, store all experience fields in _current_experience fields end return True else False. :return: Bool whether all fields are updated """ self._internal_update_terminal_state( ) # check count_steps for termination if self._config.ros_config.observation != '' and self.observation is None: cprint("waiting for observation", self._logger, msg_type=MessageType.debug) return False if self.reward is None: cprint("waiting for reward", self._logger, msg_type=MessageType.debug) return False if self.terminal_state is None: cprint("waiting for terminal state", self._logger, msg_type=MessageType.debug) return False if self.action is None and self.terminal_state == TerminationType.NotDone: # Don't wait for next action if episode is finished cprint("waiting for action", self._logger, msg_type=MessageType.debug) return False if None in [v for v in self.info.values() if not isinstance(v, Iterable)] and \ self.terminal_state == TerminationType.NotDone: # Don't wait for next info if episode is finished: cprint("waiting for info", self._logger, msg_type=MessageType.debug) return False self.observation = self._filter_observation(self.observation) self.info['unfiltered_reward'] = deepcopy(self.reward) self._return += self.reward self.reward = self._filter_reward(self.reward) if self.terminal_state in [ TerminationType.Done, TerminationType.Success, TerminationType.Failure ]: self.info['return'] = self._return self._current_experience = Experience( done=deepcopy(self.terminal_state), observation=deepcopy(self._previous_observation), action=deepcopy(self.action), reward=deepcopy(self.reward), time_stamp=int(rospy.get_time() * 10**3), info={ field_name: deepcopy(self.info[field_name]) for field_name in self.info.keys() }) cprint( f"update current experience: " f"done {self._current_experience.done}, " f"reward {self._current_experience.reward}, " f"time_stamp {self._current_experience.time_stamp}, " f"info: {[k for k in self._current_experience.info.keys()]}", self._logger, msg_type=MessageType.debug) self._previous_observation = deepcopy(self.observation) return True