def read(self, p, episode, rem, flag): self.F_ward[self.link_tail] = [0.0, p] self.B_ward[p] = [0.0, self.link_tail] s = self.B_ward[self.link_tail][1] m = self.link_tail e = self.F_ward[self.link_tail][1] if flag: self.F_ward[m][0] = F.sed_op([self.ori_traj_set[episode][s], self.ori_traj_set[episode][rem], self.ori_traj_set[episode][m], self.ori_traj_set[episode][e]]) self.B_ward[m][0] = F.sed_op([self.ori_traj_set[episode][s], self.ori_traj_set[episode][rem], self.ori_traj_set[episode][m], self.ori_traj_set[episode][e]]) else: self.F_ward[m][0] = F.sed_op([self.ori_traj_set[episode][s], self.ori_traj_set[episode][m], self.ori_traj_set[episode][e]]) self.B_ward[m][0] = F.sed_op([self.ori_traj_set[episode][s], self.ori_traj_set[episode][m], self.ori_traj_set[episode][e]]) heapq.heappush(self.heap, (self.F_ward[m][0], m))# save (state_value, point index of ori traj) self.link_tail = p
def run_by_drop_value_2(self, episode, err_bounded, k, total_drop=0): anchor_check = F.sed_op( self.ori_traj_set[episode][self.origin_index:self.e + 1]) if anchor_check > err_bounded: if self.e - self.origin_index - 1 < k: #deterministic rule self.simplified_index.append(self.e - 1) self.simplified_tra.append(self.ori_traj_set[episode][self.e - 1]) self.origin_index = self.e - 1 self.e = self.origin_index + 2 self.observation_container = [0.0] #init , 1.0 return [] self.conOpw = False self.observation_index = list(range(self.origin_index + 1, self.e))[-k:] observation = self.observation_container[-self.len_ * k:] #observation.extend([self.ori_traj_set[episode][self.origin_index][0], self.ori_traj_set[episode][self.origin_index][1]]) observation = self.states_normalized(observation, self.len_) return np.array(observation).reshape(-1, self.len_ * k) else: tmp = [anchor_check] #, self.e - self.origin_index ''' self.ori_traj_set[episode][self.origin_index][0], self.ori_traj_set[episode][self.origin_index][1], self.ori_traj_set[episode][self.e][0], self.ori_traj_set[episode][self.e][1], ''' self.observation_container.extend(tmp) self.e += 1 return []
def run_by_drop_value(self, episode, err_bounded, k, total_drop=0): anchor_check = F.sed_op( self.ori_traj_set[episode][self.origin_index:self.e + 1]) if anchor_check > err_bounded: self.conOpw = False # padding observation_index self.observation_index = list(range(self.origin_index + 1, self.e))[-k:] if len(self.observation_index) < k: rb = RingBuffer(k) while rb.append(self.observation_index): continue self.observation_index = rb.view # padding observation observation = self.observation_container[-self.len_ * k:] if len(observation) < self.len_ * k: rb = RingBuffer(self.len_ * k) while rb.append(observation): continue observation = rb.view return np.array(observation).reshape(-1, self.len_ * k) else: tmp = [anchor_check] #, self.e - self.origin_index ''' self.ori_traj_set[episode][self.origin_index][0], self.ori_traj_set[episode][self.origin_index][1], self.ori_traj_set[episode][self.e][0], self.ori_traj_set[episode][self.e][1], ''' self.observation_container.extend(tmp) self.e += 1 return []
def reset(self, episode, buffer_size): self.rw = 0.0 self.INX = 0 self.heap = [] self.last_error = 0.0 self.current = 0.0 self.c_left = 0 self.c_right = 0 #self.copy_traj = copy.deepcopy(self.ori_traj_set[episode]) self.start = {} self.end = {} self.err_seg = {} self.err_record = {} self.steps = len(self.ori_traj_set[episode]) self.F_ward = {} # save (state_value, next_point) self.B_ward = {} # save (state_value, last_point) self.F_ward[0] = [0.0, 1] self.B_ward[1] = [0.0, 0] self.link_head = 0 self.link_tail = 1 for i in range(2, buffer_size + 1): self.read(i, episode) t = heapq.nsmallest(self.n_features, self.heap) if len(t) < self.n_features: self.check = [t[0][1], t[0][1], t[1][1]] self.state = [t[0][0], t[0][0], t[1][0], t[0][0], t[0][0]] else: self.check = [t[0][1], t[1][1], t[2][1]] if buffer_size + 4 <= self.steps: J1 = F.sed_op( self.ori_traj_set[episode][buffer_size:buffer_size + 3]) J2 = F.sed_op( self.ori_traj_set[episode][buffer_size:buffer_size + 4]) self.state = [t[0][0], t[1][0], t[2][0], J1, J2] else: self.state = [t[0][0], t[1][0], t[2][0], t[0][0], t[0][0]] # self.check = [self.heap[0][1], self.heap[1][1]] # self.state = [self.heap[0][0], self.heap[1][0]] # state = (min_non_b; min_b; current) #print('len, obs, heap and state', len(self.heap), self.observation, self.heap, self.state) return self.steps, np.array(self.state).reshape(1, -1)
def read(self, p, episode): self.F_ward[self.link_tail] = [0.0, p] self.B_ward[p] = [0.0, self.link_tail] s = self.B_ward[self.link_tail][1] m = self.link_tail e = self.F_ward[self.link_tail][1] self.err_record[(s, e)] = F.sed_op(self.ori_traj_set[episode][s:e + 1]) self.F_ward[m][0] = self.err_record[(s, e)] self.B_ward[m][0] = self.err_record[(s, e)] heapq.heappush(self.heap, (self.F_ward[m][0], m)) # save (state_value, point index of ori traj) self.link_tail = p
def run_by_drop_num(self, episode, err_bounded, k, total_drop=0): if F.sed_op(self.ori_traj_set[episode][self.origin_index:self.e + 1]) > err_bounded: self.conOpw = False # padding observation_index self.observation_index = list(range(self.origin_index + 1, self.e))[-k:] if len(self.observation_index) < k: self.observation_index.extend( self.observation_index[-1:] * (k - len(self.observation_index))) # padding observation observation = list( range(total_drop + 0, total_drop + self.e - self.origin_index - 1))[-k:] if len(observation) < k: observation.extend(observation[-1:] * (k - len(observation))) return np.array(observation).reshape(-1, k) else: self.e += 1 return []
def run_by_skip_value_4(self, episode, J, err_bounded): self.err_record = {} for i in range(self.e, self.e + J): tmp = F.sed_op(self.ori_traj_set[episode][self.origin_index:i + 1]) self.observation_container.append(tmp) self.observation_index.append(i) self.err_record[i] = tmp self.observation_index = self.observation_index[-J:] observation = self.observation_container[-J:] if len(self.observation_index) < J: self.observation_index.extend(self.observation_index[-1:] * (J - len(self.observation_index))) observation.extend(observation[-1:] * (J - len(observation))) observation = self.states_normalized(observation, 1) observation, self.observation_index = shuffle( np.array(observation).reshape(-1, 1), self.observation_index, random_state=0) return np.array(observation).reshape(-1, J)
def run_by_skip_value_3(self, episode, J): for i in range(self.e, self.e + J): if i + 1 < len(self.ori_traj_set[episode]): self.observation_container.append( F.sed_op([ self.ori_traj_set[episode][self.origin_index], self.ori_traj_set[episode][i], self.ori_traj_set[episode][i + 1] ])) self.observation_index.append(i) self.observation_index = self.observation_index[-J:] observation = self.observation_container[-J:] if len(self.observation_index) < J: self.observation_index.extend(self.observation_index[-1:] * (J - len(self.observation_index))) observation.extend(observation[-1:] * (J - len(observation))) observation = self.states_normalized(observation, 1) observation, self.observation_index = shuffle( np.array(observation).reshape(-1, 1), self.observation_index, random_state=0) return np.array(observation).reshape(-1, J)
def reward_update(self, episode, rem): if (rem not in self.start) and (rem not in self.end): #interval insert a = self.B_ward[rem][1] b = self.F_ward[rem][1] self.start[a] = b self.end[b] = a NOW = F.sed_op(self.ori_traj_set[episode][a: b + 1]) self.err_seg[(a,b)] = NOW if NOW >= self.last_error: self.current = NOW self.current_left, self.current_right = a, b elif (rem in self.start) and (rem not in self.end): #interval expand left a = self.B_ward[rem][1] b = rem c = self.start[rem] BEFORE = self.err_seg[(b,c)] NOW = F.sed_op(self.ori_traj_set[episode][a: c + 1]) del self.err_seg[(b,c)] self.err_seg[(a,c)] = NOW if math.isclose(self.last_error,BEFORE): if NOW >= BEFORE: #interval expand left_case1 self.current = NOW self.current_left, self.current_right = a, c else: #interval expand left_case2 (self.current_left, self.current_right) = max(self.err_seg, key=self.err_seg.get) self.current = self.err_seg[(self.current_left, self.current_right)] else: #interval expand left_case3 if NOW >= self.last_error: self.current = NOW self.current_left, self.current_right = a, c self.end[c] = a self.start[a] = c del self.start[b] # interval expand right elif (rem not in self.start) and (rem in self.end): #interval expand right a = self.end[rem] b = rem c = self.F_ward[rem][1] BEFORE = self.err_seg[(a,b)] NOW = F.sed_op(self.ori_traj_set[episode][a: c + 1]) del self.err_seg[(a,b)] self.err_seg[(a,c)] = NOW if math.isclose(self.last_error,BEFORE): if NOW >= BEFORE: #interval expand right_case1 self.current = NOW self.current_left, self.current_right = a, c else: #interval expand right_case2 (self.current_left, self.current_right) = max(self.err_seg, key=self.err_seg.get) self.current = self.err_seg[(self.current_left, self.current_right)] else: #interval expand right_case3 if NOW >= self.last_error: self.current = NOW self.current_left, self.current_right = a, c self.start[a] = c self.end[c] = a del self.end[b] # interval merge elif (rem in self.start) and (rem in self.end): #interval merge b = rem a = self.end[b] c = self.start[b] # get values quickly BEFORE_1 = self.err_seg[(a,b)] BEFORE_2 = self.err_seg[(b,c)] NOW = F.sed_op(self.ori_traj_set[episode][a: c + 1]) del self.err_seg[(a,b)] del self.err_seg[(b,c)] self.err_seg[(a,c)] = NOW if math.isclose(self.last_error,BEFORE_1): if NOW >= BEFORE_1: #interval merge_case1 self.current = NOW self.current_left, self.current_right = a, c else: #interval merge_case2 (self.current_left, self.current_right) = max(self.err_seg, key=self.err_seg.get) self.current = self.err_seg[(self.current_left, self.current_right)] elif math.isclose(self.last_error,BEFORE_2): if NOW >= BEFORE_2: #interval merge_case3 self.current = NOW self.current_left, self.current_right = a, c else: #interval merge_case4 (self.current_left, self.current_right) = max(self.err_seg, key=self.err_seg.get) self.current = self.err_seg[(self.current_left, self.current_right)] else: #interval merge_case5 if NOW >= self.last_error: self.current = NOW self.current_left, self.current_right = a, c self.start[a] = c self.end[c] = a del self.start[b] del self.end[b] else: print('Here is a bug!!!')
def step(self, episode, action, index, done, label = 'T'): # update state and compute reward rem = self.check[action] # point index in ori traj NEXT_P = self.F_ward[rem][1] NEXT_V = self.B_ward[NEXT_P][0] LAST_P = self.B_ward[rem][1] LAST_V = self.F_ward[LAST_P][0] if LAST_P > self.link_head: self.delete_heap(self.heap, (LAST_V, LAST_P)) s = self.ori_traj_set[episode][self.B_ward[LAST_P][1]] m1 = self.ori_traj_set[episode][LAST_P] m2 = self.ori_traj_set[episode][rem] e = self.ori_traj_set[episode][NEXT_P] self.F_ward[LAST_P][0] = F.sed_op([s,m1,m2,e]) self.B_ward[LAST_P][0] = F.sed_op([s,m1,m2,e]) heapq.heappush(self.heap, (self.F_ward[LAST_P][0], LAST_P)) if NEXT_P < self.link_tail: self.delete_heap(self.heap, (NEXT_V, NEXT_P)) s = self.ori_traj_set[episode][LAST_P] m1 = self.ori_traj_set[episode][rem] m2 = self.ori_traj_set[episode][NEXT_P] e = self.ori_traj_set[episode][self.F_ward[NEXT_P][1]] self.F_ward[NEXT_P][0] = F.sed_op([s,m1,m2,e]) self.B_ward[NEXT_P][0] = F.sed_op([s,m1,m2,e]) heapq.heappush(self.heap, (self.F_ward[NEXT_P][0], NEXT_P)) #self.copy_traj.remove(self.ori_traj_set[episode][rem]) #for testing the correctness of inc rewards if label == 'T': self.reward_update(episode, rem) self.F_ward[LAST_P][1] = NEXT_P self.B_ward[NEXT_P][1] = LAST_P self.delete_heap(self.heap, (self.F_ward[rem][0], rem)) del self.F_ward[rem] del self.B_ward[rem] #_, self.current = F.sed_error(self.ori_traj_set[episode], self.copy_traj) #for testing the correctness of inc rewards rw = self.last_error - self.current self.last_error = self.current #print('self.current',self.current) # if not done: #boundary process # if NEXT_P == self.link_tail: # self.read(index + 1, episode, rem, True) # self.check = [self.heap[0][1], LAST_P, LAST_P] # self.state = [self.heap[0][0], self.F_ward[LAST_P][0], self.F_ward[LAST_P][0]] # else: # self.read(index + 1, episode, rem, False) # if LAST_P == self.link_head: # self.check = [self.heap[0][1], NEXT_P, NEXT_P] # self.state = [self.heap[0][0], self.B_ward[NEXT_P][0], self.B_ward[NEXT_P][0]] # else: # self.check = [self.heap[0][1], LAST_P, NEXT_P] # self.state = [self.heap[0][0], self.F_ward[LAST_P][0], self.B_ward[NEXT_P][0]] if not done: #boundary process if NEXT_P == self.link_tail: self.read(index + 1, episode, rem, True) if len(self.heap) < self.n_features: self.check = [self.heap[0][1],self.heap[0][1], self.heap[1][1]] self.state = [self.heap[0][0],self.heap[0][0], self.heap[1][0]] else: t = heapq.nsmallest(self.n_features, self.heap) self.check = [t[0][1], t[1][1], t[2][1]] self.state = [t[0][0], t[1][0], t[2][0]] else: self.read(index + 1, episode, rem, False) if len(self.heap) < self.n_features: self.check = [self.heap[0][1],self.heap[0][1],self.heap[1][1]] self.state = [self.heap[0][0],self.heap[0][0],self.heap[1][0]] else: t = heapq.nsmallest(self.n_features, self.heap) self.check = [t[0][1], t[1][1], t[2][1]] self.state = [t[0][0], t[1][0], t[2][0]] #print('heap', self.heap) #print('check and state', self.check, self.state) return np.array(self.state).reshape(1, -1), rw
def step(self, episode, action, index, done, label='T'): # update state and compute reward rem = self.check[action] # point index in ori traj NEXT_P = self.F_ward[rem][1] NEXT_V = self.B_ward[NEXT_P][0] LAST_P = self.B_ward[rem][1] LAST_V = self.F_ward[LAST_P][0] if LAST_P > self.link_head: self.delete_heap(self.heap, (LAST_V, LAST_P)) self.err_record[(self.B_ward[LAST_P][1], NEXT_P)] = F.sed_op( self.ori_traj_set[episode][self.B_ward[LAST_P][1]:NEXT_P + 1]) self.F_ward[LAST_P][0] = self.err_record[(self.B_ward[LAST_P][1], NEXT_P)] self.B_ward[LAST_P][0] = self.err_record[(self.B_ward[LAST_P][1], NEXT_P)] heapq.heappush(self.heap, (self.F_ward[LAST_P][0], LAST_P)) if NEXT_P < self.link_tail: self.delete_heap(self.heap, (NEXT_V, NEXT_P)) self.err_record[(LAST_P, self.F_ward[NEXT_P][1])] = F.sed_op( self.ori_traj_set[episode][LAST_P:self.F_ward[NEXT_P][1] + 1]) self.F_ward[NEXT_P][0] = self.err_record[(LAST_P, self.F_ward[NEXT_P][1])] self.B_ward[NEXT_P][0] = self.err_record[(LAST_P, self.F_ward[NEXT_P][1])] heapq.heappush(self.heap, (self.F_ward[NEXT_P][0], NEXT_P)) #self.copy_traj.remove(self.ori_traj_set[episode][rem]) # for testing the correctness of inc rewards self.reward_update(episode, rem) self.F_ward[LAST_P][1] = NEXT_P self.B_ward[NEXT_P][1] = LAST_P self.delete_heap(self.heap, (self.F_ward[rem][0], rem)) del self.F_ward[rem] del self.B_ward[rem] #_, self.current = F.sed_error(self.ori_traj_set[episode], self.copy_traj) # for testing the correctness of inc rewards rw = self.last_error - self.current self.last_error = self.current #print('self.current',self.current) if not done: self.read(index + 1, episode) t = heapq.nsmallest(self.n_features, self.heap) if len(t) < self.n_features: self.check = [t[0][1], t[0][1], t[1][1]] self.state = [t[0][0], t[0][0], t[1][0]] else: self.check = [t[0][1], t[1][1], t[2][1]] self.state = [t[0][0], t[1][0], t[2][0]] #cannot remove the starting and ending # if self.current_left == self.link_head: # self.check.append(self.current_right) # self.state.append(self.B_ward[self.current_right][0]) # elif self.current_right == self.link_tail: # self.check.append(self.current_left) # self.state.append(self.F_ward[self.current_left][0]) # elif self.F_ward[self.current_left][0] < self.B_ward[self.current_right][0]: # self.check.append(self.current_left) # self.state.append(self.F_ward[self.current_left][0]) # else: # self.check.append(self.current_right) # self.state.append(self.B_ward[self.current_right][0]) #self.state.append(self.current) #self.state[1] = self.state[1] - self.current #print('check and state', self.check, self.state) return np.array(self.state).reshape(1, -1), rw
def reward_update(self, episode, rem, label=''): if label == 'skip': a = rem[0] b = rem[1] self.start[a] = b self.end[b] = a NOW = F.sed_op(self.ori_traj_set[episode][a: b + 1]) self.err_seg[(a,b)] = NOW if NOW >= self.last_error: self.current = NOW self.current_left, self.current_right = a, b return if (rem not in self.start) and (rem not in self.end): #print('interval insert') #f.write('interval insert\n') a = self.B_ward[rem][1] b = self.F_ward[rem][1] self.start[a] = b self.end[b] = a NOW = F.sed_op(self.ori_traj_set[episode][a: b + 1]) self.err_seg[(a,b)] = NOW if NOW >= self.last_error: self.current = NOW self.current_left, self.current_right = a, b elif (rem in self.start) and (rem not in self.end): #print('interval expand left') a = self.B_ward[rem][1] b = rem c = self.start[rem] BEFORE = self.err_seg[(b,c)] #F.sed_op(self.ori_traj_set[episode][b: c + 1]) NOW = F.sed_op(self.ori_traj_set[episode][a: c + 1]) del self.err_seg[(b,c)] self.err_seg[(a,c)] = NOW if math.isclose(self.last_error,BEFORE): if NOW >= BEFORE: #print('interval expand left_case1') #f.write('interval expand left_case1 \n') self.current = NOW self.current_left, self.current_right = a, c else: #print('interval expand left_case2') #f.write('interval expand left_case2 \n') (self.current_left, self.current_right) = max(self.err_seg, key=self.err_seg.get) self.current = self.err_seg[(self.current_left, self.current_right)] else: #print('interval expand left_case3') #f.write('interval expand left_case3 \n') if NOW >= self.last_error: self.current = NOW self.current_left, self.current_right = a, c self.end[c] = a self.start[a] = c del self.start[b] # interval expand right elif (rem not in self.start) and (rem in self.end): #print('interval expand right') a = self.end[rem] b = rem c = self.F_ward[rem][1] BEFORE = self.err_seg[(a,b)] #F.sed_op(self.ori_traj_set[episode][a: b + 1]) NOW = F.sed_op(self.ori_traj_set[episode][a: c + 1]) del self.err_seg[(a,b)] self.err_seg[(a,c)] = NOW if math.isclose(self.last_error,BEFORE): if NOW >= BEFORE: #print('interval expand right_case1') #f.write('interval expand right_case1 \n') self.current = NOW self.current_left, self.current_right = a, c else: #print('interval expand right_case2') #f.write('interval expand right_case2 \n') (self.current_left, self.current_right) = max(self.err_seg, key=self.err_seg.get) self.current = self.err_seg[(self.current_left, self.current_right)] else: #print('interval expand right_case3') #f.write('interval expand right_case3 \n') if NOW >= self.last_error: self.current = NOW self.current_left, self.current_right = a, c self.start[a] = c self.end[c] = a del self.end[b] # interval merge elif (rem in self.start) and (rem in self.end): #print('interval merge') b = rem a = self.end[b] c = self.start[b] # get values quickly BEFORE_1 = self.err_seg[(a,b)] #F.sed_op(self.ori_traj_set[episode][a: b + 1]) BEFORE_2 = self.err_seg[(b,c)] #F.sed_op(self.ori_traj_set[episode][b: c + 1]) NOW = F.sed_op(self.ori_traj_set[episode][a: c + 1]) del self.err_seg[(a,b)] del self.err_seg[(b,c)] self.err_seg[(a,c)] = NOW if math.isclose(self.last_error,BEFORE_1): if NOW >= BEFORE_1: #print('interval merge_case1') #f.write('interval merge_case1 \n') self.current = NOW self.current_left, self.current_right = a, c else: #print('interval merge_case2') #f.write('interval merge_case2 \n') (self.current_left, self.current_right) = max(self.err_seg, key=self.err_seg.get) self.current = self.err_seg[(self.current_left, self.current_right)] elif math.isclose(self.last_error,BEFORE_2): if NOW >= BEFORE_2: #print('interval merge_case3') #f.write('interval merge_case3 \n') self.current = NOW self.current_left, self.current_right = a, c else: #print('interval merge_case4') #f.write('interval merge_case4 \n') (self.current_left, self.current_right) = max(self.err_seg, key=self.err_seg.get) self.current = self.err_seg[(self.current_left, self.current_right)] else: #print('interval merge_case5') #f.write('interval merge_case5 \n') if NOW >= self.last_error: self.current = NOW self.current_left, self.current_right = a, c self.start[a] = c self.end[c] = a del self.start[b] del self.end[b] else: print('Here is a bug!!!')
def step(self, episode, action, index, done, label = 'T'): # update state and compute reward #print('self.F_ward', self.F_ward) #print('check, state, heap', self.check, self.state, self.heap) if action >= len(self.check): rem = self.check[0] else: rem = self.check[action] # point index in ori traj #print('remove point index and value', self.state, rem, self.F_ward[rem][0]) NEXT_P = self.F_ward[rem][1] NEXT_V = self.B_ward[NEXT_P][0] LAST_P = self.B_ward[rem][1] LAST_V = self.F_ward[LAST_P][0] if LAST_P > self.link_head: self.delete_heap(self.heap, (LAST_V, LAST_P)) s = self.ori_traj_set[episode][self.B_ward[LAST_P][1]] m1 = self.ori_traj_set[episode][LAST_P] m2 = self.ori_traj_set[episode][rem] e = self.ori_traj_set[episode][NEXT_P] #F.sed_op(self.ori_traj_set[episode][self.B_ward[LAST_P][1]: NEXT_P + 1]) t = F.sed_op([s,m1,m2,e]) self.F_ward[LAST_P][0] = t self.B_ward[LAST_P][0] = t heapq.heappush(self.heap, (self.F_ward[LAST_P][0], LAST_P)) if NEXT_P < self.link_tail: self.delete_heap(self.heap, (NEXT_V, NEXT_P)) s = self.ori_traj_set[episode][LAST_P] m1 = self.ori_traj_set[episode][rem] m2 = self.ori_traj_set[episode][NEXT_P] e = self.ori_traj_set[episode][self.F_ward[NEXT_P][1]] #F.sed_op(self.ori_traj_set[episode][LAST_P: self.F_ward[NEXT_P][1] + 1]) t = F.sed_op([s,m1,m2,e]) self.F_ward[NEXT_P][0] = t self.B_ward[NEXT_P][0] = t heapq.heappush(self.heap, (self.F_ward[NEXT_P][0], NEXT_P)) if label == 'T': self.reward_update(episode, rem) ''' self.copy_traj.remove(self.ori_traj_set[episode][rem]) _, self.current = F.sed_error(self.ori_traj_set[episode], self.copy_traj) ''' self.rw = self.last_error - self.current self.last_error = self.current #print('self.current',self.current) self.F_ward[LAST_P][1] = NEXT_P self.B_ward[NEXT_P][1] = LAST_P self.delete_heap(self.heap, (self.F_ward[rem][0], rem)) del self.F_ward[rem] del self.B_ward[rem] # if not done: #boundary process # if NEXT_P == self.link_tail: # self.read(index + 1, episode, rem, True) # self.check = [self.heap[0][1], LAST_P, LAST_P] # self.state = [self.heap[0][0], self.F_ward[LAST_P][0], self.F_ward[LAST_P][0]] # else: # self.read(index + 1, episode, rem, False) # if LAST_P == self.link_head: # self.check = [self.heap[0][1], NEXT_P, NEXT_P] # self.state = [self.heap[0][0], self.B_ward[NEXT_P][0], self.B_ward[NEXT_P][0]] # else: # self.check = [self.heap[0][1], LAST_P, NEXT_P] # self.state = [self.heap[0][0], self.F_ward[LAST_P][0], self.B_ward[NEXT_P][0]] if not done: #boundary process if NEXT_P == self.link_tail: if action >= len(self.check): self.INX = min(index + 2 + action - len(self.check), len(self.ori_traj_set[episode]) - 1) self.read(self.INX, episode, rem, True) if label == 'T': self.reward_update(episode, [index, self.INX], 'skip') ''' for skip in range(index + 1, self.INX): self.copy_traj.remove(self.ori_traj_set[episode][skip]) _, self.current = F.sed_error(self.ori_traj_set[episode], self.copy_traj) ''' self.rw += self.last_error - self.current self.last_error = self.current else: self.read(index + 1, episode, rem, True) if len(self.heap) < self.n_features: self.check = [self.heap[0][1],self.heap[0][1], self.heap[1][1]] self.state = [self.heap[0][0],self.heap[0][0], self.heap[1][0]] else: t = heapq.nsmallest(self.n_features, self.heap) self.check = [t[0][1], t[1][1], t[2][1]] self.state = [t[0][0], t[1][0], t[2][0]] else: if action >= len(self.check): self.INX = min(index + 2 + action - len(self.check), len(self.ori_traj_set[episode]) - 1) self.read(self.INX, episode, rem, False) if label == 'T': self.reward_update(episode, [index, self.INX], 'skip') ''' for skip in range(index + 1, self.INX): self.copy_traj.remove(self.ori_traj_set[episode][skip]) _, self.current = F.sed_error(self.ori_traj_set[episode], self.copy_traj) ''' self.rw += self.last_error - self.current self.last_error = self.current else: self.read(index + 1, episode, rem, False) if len(self.heap) < self.n_features: self.check = [self.heap[0][1],self.heap[0][1],self.heap[1][1]] self.state = [self.heap[0][0],self.heap[0][0],self.heap[1][0]] else: t = heapq.nsmallest(self.n_features, self.heap) self.check = [t[0][1], t[1][1], t[2][1]] self.state = [t[0][0], t[1][0], t[2][0]] #f.write('--->'+str(self.rw)+'\n') #print('heap', self.heap) #print('check and state', self.check, self.state) return np.array(self.state).reshape(1, -1), self.rw
def step(self, episode, action, index, done, label='T'): # update state and compute reward #print('self.F_ward', self.F_ward) #print('check, state, heap', self.check, self.state, self.heap) if action >= len(self.check): rem = self.check[0] else: rem = self.check[action] # point index in ori traj #print('remove point index and value', self.state, rem, self.F_ward[rem][0]) NEXT_P = self.F_ward[rem][1] NEXT_V = self.B_ward[NEXT_P][0] LAST_P = self.B_ward[rem][1] LAST_V = self.F_ward[LAST_P][0] if LAST_P > self.link_head: self.delete_heap(self.heap, (LAST_V, LAST_P)) #s = self.ori_traj_set[episode][self.B_ward[LAST_P][1]] #m1 = self.ori_traj_set[episode][LAST_P] #m2 = self.ori_traj_set[episode][rem] #e = self.ori_traj_set[episode][NEXT_P] self.err_record[(self.B_ward[LAST_P][1], NEXT_P)] = F.sed_op( self.ori_traj_set[episode][self.B_ward[LAST_P][1]:NEXT_P + 1]) #self.err_record[(self.B_ward[LAST_P][1], NEXT_P)] = F.sed_op([s,m1,m2,e]) self.F_ward[LAST_P][0] = self.err_record[(self.B_ward[LAST_P][1], NEXT_P)] self.B_ward[LAST_P][0] = self.err_record[(self.B_ward[LAST_P][1], NEXT_P)] heapq.heappush(self.heap, (self.F_ward[LAST_P][0], LAST_P)) if NEXT_P < self.link_tail: self.delete_heap(self.heap, (NEXT_V, NEXT_P)) #s = self.ori_traj_set[episode][LAST_P] #m1 = self.ori_traj_set[episode][rem] #m2 = self.ori_traj_set[episode][NEXT_P] #e = self.ori_traj_set[episode][self.F_ward[NEXT_P][1]] self.err_record[(LAST_P, self.F_ward[NEXT_P][1])] = F.sed_op( self.ori_traj_set[episode][LAST_P:self.F_ward[NEXT_P][1] + 1]) #self.err_record[(LAST_P, self.F_ward[NEXT_P][1])] = F.sed_op([s,m1,m2,e]) self.F_ward[NEXT_P][0] = self.err_record[(LAST_P, self.F_ward[NEXT_P][1])] self.B_ward[NEXT_P][0] = self.err_record[(LAST_P, self.F_ward[NEXT_P][1])] heapq.heappush(self.heap, (self.F_ward[NEXT_P][0], NEXT_P)) #self.copy_traj.remove(self.ori_traj_set[episode][rem]) if label == 'T': self.reward_update(episode, rem) ''' self.copy_traj.remove(self.ori_traj_set[episode][rem]) _, self.current = F.sed_error(self.ori_traj_set[episode], self.copy_traj) ''' self.rw = self.last_error - self.current self.last_error = self.current #print('self.current',self.current) self.F_ward[LAST_P][1] = NEXT_P self.B_ward[NEXT_P][1] = LAST_P self.delete_heap(self.heap, (self.F_ward[rem][0], rem)) del self.F_ward[rem] del self.B_ward[rem] if not done: if action >= len(self.check): self.INX = min(index + 2 + action - len(self.check), len(self.ori_traj_set[episode]) - 1) self.read(self.INX, episode) if label == 'T': self.reward_update(episode, [index, self.INX], 'skip') ''' for skip in range(index + 1, self.INX): self.copy_traj.remove(self.ori_traj_set[episode][skip]) _, self.current = F.sed_error(self.ori_traj_set[episode], self.copy_traj) ''' self.rw += self.last_error - self.current self.last_error = self.current else: self.read(index + 1, episode) t = heapq.nsmallest(self.n_features, self.heap) if len(t) < self.n_features: self.check = [t[0][1], t[0][1], t[1][1]] self.state = [t[0][0], t[0][0], t[1][0], t[0][0], t[0][0]] else: self.check = [t[0][1], t[1][1], t[2][1]] if self.INX + 4 <= self.steps: J1 = F.sed_op( self.ori_traj_set[episode][self.INX:self.INX + 3]) J2 = F.sed_op( self.ori_traj_set[episode][self.INX:self.INX + 4]) self.state = [t[0][0], t[1][0], t[2][0], J1, J2] else: self.state = [t[0][0], t[1][0], t[2][0], t[0][0], t[0][0]] # self.check = [self.heap[0][1], self.heap[1][1]] # self.state = [self.heap[0][0], self.heap[1][0]] #f.write('--->'+str(rw)+'\n') #self.state = [max(self.heap[0][0] - self.current, 0.0)] #cannot remove the starting and ending # if self.current_left == self.link_head: # self.check.append(self.current_right) # self.state.append(self.B_ward[self.current_right][0]) # elif self.current_right == self.link_tail: # self.check.append(self.current_left) # self.state.append(self.F_ward[self.current_left][0]) # elif self.F_ward[self.current_left][0] < self.B_ward[self.current_right][0]: # self.check.append(self.current_left) # self.state.append(self.F_ward[self.current_left][0]) # else: # self.check.append(self.current_right) # self.state.append(self.B_ward[self.current_right][0]) #self.state.append(self.current) #self.state[1] = self.state[1] - self.current #print('check and state', self.check, self.state) return np.array(self.state).reshape(1, -1), self.rw