def construct_RT(self): ''' n x 1''' self.RT = np.zeros((self.n,1)) for i in range(self.n): state_vec = self.sdic.get_state(i) # if the current position is equal to destination, give a reward if state.same_loc([state_vec[0], state_vec[1]], [state_vec[2], state_vec[3]]): self.RT[i,0] = REWARD # if the current position is equal to start, give a cost else: dest = [state_vec[2], state_vec[3]] start = START[DESTINATION.index(dest)] if state.same_loc([state_vec[0], state_vec[1]], [start[0], start[1]]): self.RT[i,0] = COST else: self.RT[i,0] = 0.5
def construct_RT(self): ''' n x 1''' self.RT = np.zeros((self.n,1)) for i in range(self.n): state_vec = self.sdic.get_state(i) # if the current position is equal to destination if state.same_loc([state_vec[0], state_vec[1]], [state_vec[2], state_vec[3]]): self.RT[i,0] = REWARD
def construct_x0(self): ''' n x 1, assume cars are distributed equally in start ''' self.x0 = np.zeros((self.n, 1)) for i in range(self.n): state_vec = self.sdic.get_state(i) start_pos = [state_vec[0], state_vec[1]] if not(start_pos in START): continue else: des_pos = [state_vec[2], state_vec[3]] if state.same_loc(DESTINATION[START.index(start_pos)], des_pos): self.x0[i, 0] = INIT_DENSITY_CORNER
def construct_G(self): '''A x n x n''' self.G = np.zeros((self.A, self.n, self.n)) for act in range(self.A): if act == STAY: self.G[act,:,:] = np.eye(self.n) # stay results in an identity matrix else: # probability from j to i G_act = np.zeros((self.n, self.n)) for j in range(self.n): state_j = self.sdic.get_state(j) # start from this state loc_j = [state_j[0], state_j[1]] result_loc = self.world.move_consq(loc_j, act) for i in range(self.n): state_i = self.sdic.get_state(i) loc_i = [state_i[0], state_i[1]] if state.same_loc(result_loc, loc_i) \ and state_i[2] == state_j[2] and state_i[3] == state_j[3] and state_i[4] == state_j[4]: G_act[i][j] = 1 self.G[act,:,:] = cp.deepcopy(G_act)