def store_episode(self, episode_batch, update_stats=True): """ episode_batch: array of batch_size x (T or T+1) x dim_key 'o' is of size T+1, others are of size T """ self.buffer.store_episode(episode_batch) if update_stats: # add transitions to normalizer episode_batch['o_2'] = episode_batch['o'][:, 1:, :] episode_batch['ag_2'] = episode_batch['ag'][:, 1:, :] num_normalizing_transitions = transitions_in_episode_batch( episode_batch) transitions = self.sample_transitions(episode_batch, num_normalizing_transitions) o, o_2, g, ag = transitions['o'], transitions['o_2'], transitions[ 'g'], transitions['ag'] transitions['o'], transitions['g'] = self._preprocess_og(o, ag, g) # No need to preprocess the o_2 and g_2 since this is only used for stats self.o_stats.update(transitions['o']) self.g_stats.update(transitions['g']) self.o_stats.recompute_stats() self.g_stats.recompute_stats()
def initDemoBuffer(self, demoDataFile, update_stats=True): demoData = np.load(demoDataFile) info_keys = [key.replace('info_', '') for key in self.input_dims.keys() if key.startswith('info_')] info_values = [np.empty((self.T, self.rollout_batch_size, self.input_dims['info_' + key]), np.float32) for key in info_keys] for epsd in range(self.num_demo): obs, acts, goals, achieved_goals = [], [] ,[] ,[] i = 0 for transition in range(self.T): obs.append([demoData['obs'][epsd ][transition].get('observation')]) acts.append([demoData['acs'][epsd][transition]]) goals.append([demoData['obs'][epsd][transition].get('desired_goal')]) achieved_goals.append([demoData['obs'][epsd][transition].get('achieved_goal')]) for idx, key in enumerate(info_keys): info_values[idx][transition, i] = demoData['info'][epsd][transition][key] obs.append([demoData['obs'][epsd][self.T].get('observation')]) achieved_goals.append([demoData['obs'][epsd][self.T].get('achieved_goal')]) episode = dict(o=obs, u=acts, g=goals, ag=achieved_goals) for key, value in zip(info_keys, info_values): episode['info_{}'.format(key)] = value episode = convert_episode_to_batch_major(episode) global demoBuffer demoBuffer.store_episode(episode) print("Demo buffer size currently ", demoBuffer.get_current_size()) if update_stats: # add transitions to normalizer to normalize the demo data as well episode['o_2'] = episode['o'][:, 1:, :] episode['ag_2'] = episode['ag'][:, 1:, :] num_normalizing_transitions = transitions_in_episode_batch(episode) transitions = self.sample_transitions(episode, num_normalizing_transitions) o, o_2, g, ag = transitions['o'], transitions['o_2'], transitions['g'], transitions['ag'] transitions['o'], transitions['g'] = self._preprocess_og(o, ag, g) # No need to preprocess the o_2 and g_2 since this is only used for stats self.o_stats.update(transitions['o']) self.g_stats.update(transitions['g']) self.o_stats.recompute_stats() self.g_stats.recompute_stats() episode.clear()