def convert_action2one_hot(self, a_counts): ''' 用于在训练前将buffer中的离散动作的索引转换为one_hot类型 ''' if 'a' in self.buffer.keys(): self.buffer['a'] = [ sth.int2one_hot(a.astype(np.int32), a_counts) for a in self.buffer['a'] ]
def get_transitions(self, databuffer, data_name_list=['s', 'a', 'r', 's_', 'done']): ''' TODO: Annotation ''' data = databuffer.sample() # 经验池取数据 if not self.is_continuous and 'a' in data_name_list: a_idx = data_name_list.index('a') a = data[a_idx].astype(np.int32) pre_shape = a.shape a = a.reshape(-1) a = sth.int2one_hot(a, self.a_counts) a = a.reshape(pre_shape+(-1,)) data[a_idx] = a return dict([ [n, d] for n, d in zip(data_name_list, list(map(self.data_convert, data))) ])
def get_transitions(self, data_name_list=[ 's', 'visual_s', 'a', 'r', 's_', 'visual_s_', 'done' ]): ''' TODO: Annotation ''' data = self.data.sample() # 经验池取数据 if not self.is_continuous and 'a' in data_name_list: a_idx = data_name_list.index('a') data[a_idx] = sth.int2one_hot(data[a_idx].astype(np.int32), self.a_dim) return dict([[n, d] for n, d in zip( data_name_list, list(map(self.data_convert, data)))])