def step(self, action): assert self.act_space.contains(action), "%r (%s) invalid" % (action, type(action)) state = self.state * self.high_obs dist, ue_ang, rbd = state rbs = self.BeamSet[action] ue_pos = np.array(sph2cart(ue_ang, 0, dist)) #ue_pos is(x,y) ue_pos[0] += self.ue_v new_dist = np.sqrt(ue_pos[0]**2 + ue_pos[1]**2) #x**2 + y**2 new_ang = np.arctan2(ue_pos[1],ue_pos[0]) self.state = np.array([new_dist, new_ang, rbs]) / self.high_obs self.mimo_model = MIMO(ue_pos, self.gNB[0], self.sc_xyz, self.ch_model, self.ptx, self.N_tx, self.N_rx) prev_rate = self.rate prev_dist = dist self.SNR, self.rate = self.mimo_model.Calc_Rate(self.SF_time, np.array([rbs, 0]))#rkbeam_vec, tbeam_vec ) self.steps_done += 1 rwd = self._reward(prev_rate, prev_dist, new_dist) #print("[uav_env] rwd: {}".format(rwd)) done = self._gameover() return self.state, rwd, done, {}
def get_LoS_Rate(self, ue_s): sc_xyz = np.array([]) ch_model = 'fsp' mimo_model = MIMO(ue_s, self.gNB[0], sc_xyz, ch_model, self.ptx, self.N_tx, self.N_rx) #print("[Env]: LoS h: {0}".format(mimo_model.channel.pathloss)) Los_SNR, Los_rate = mimo_model.Los_Rate() return Los_rate
def get_Exh_Rate(self, state): state = np.rint(state * self.high_obs) ue_xloc, ue_yloc = state ue_pos = np.array([ue_xloc, ue_yloc, 0]) if (ue_xloc == 0) and (ue_yloc) == 0: #return -1.0,-1.0 ue_pos = np.array([ue_xloc + 40, ue_yloc + 40, 0]) mimo_exh_model = MIMO(ue_pos, self.gNB[0], self.sc_xyz, self.ch_model, self.ptx, self.N_tx, self.N_rx) #rbeam_vec = self.BeamSet#Generate_BeamDir(self.N) exh_SNR = [] exh_rates = [] for rbeam in self.BeamSet: #rbeam_vec: SNR, rate = mimo_exh_model.Calc_ExhRate(self.SF_time, np.array([rbeam, 0])) #rate = 1e3 * rate exh_SNR.append(SNR) exh_rates.append(rate) best_rbeam_ndx = np.argmax(exh_rates) best_beam = self.BeamSet[best_rbeam_ndx] SNRmax, rate_max = mimo_exh_model.Calc_ExhRate(self.SF_time, np.array([best_beam, 0]), noise_flag=False) #print("[UAV_Env]: AOD: {}, AoA: {}, AoD-AoA: {}".format(mimo_exh_model.channel.az_aod[0], self.BeamSet[best_rbeam_ndx], -self.BeamSet[best_rbeam_ndx]+mimo_exh_model.channel.az_aod[0])) return best_beam, rate_max #(Best RBS, Best Rate)
def get_Los_Rate(self, state): dist, ue_ang, rbd = (state * self.high_obs) ue_pos = np.array(sph2cart(ue_ang, 0, dist)) # ue_pos is(x,y) sc_xyz = np.array([]) ch_model = 'fsp' mimo_model = MIMO(ue_pos, self.gNB[0], sc_xyz, ch_model, self.ptx, self.N_tx, self.N_rx) SNR, rate = mimo_model.Los_Rate() # rkbeam_vec, tbeam_vec ) return SNR, rate
def step(self, action): assert self.act_space.contains(action), "%r (%s) invalid" % (action, type(action)) state = np.rint(self.state * self.high_obs) rbd_ndx, ue_mv_ndx = self.decode_action(action) ue_vx, ue_vy = self.choose_vel(ue_mv_ndx) rbs = self.BeamSet[rbd_ndx] ue_xdest = self.ue_xdest[0] ue_ydest = self.ue_ydest[0] ue_xloc, ue_yloc = state ue_mv = self.ue_moves[ue_mv_ndx] if ue_mv == 'L': new_ue_xloc = max(ue_xloc + ue_vx, np.min(self.ue_xloc)) new_ue_yloc = ue_yloc + ue_vy if ue_mv == 'U': new_ue_xloc = ue_xloc + ue_vx new_ue_yloc = min(ue_yloc + ue_vy, np.max(self.ue_yloc)) if ue_mv == 'R': new_ue_xloc = min(ue_xloc + ue_vx, np.max(self.ue_xloc)) new_ue_yloc = ue_yloc + ue_vy if ue_mv == 'D': new_ue_xloc = ue_xloc + ue_vx new_ue_yloc = max(ue_yloc + ue_vy, np.min(self.ue_yloc)) new_ue_pos = np.array([new_ue_xloc, new_ue_yloc, 0]) self.mimo_model = MIMO(new_ue_pos, self.gNB[0], self.sc_xyz, self.ch_model, self.ptx, self.N_tx, self.N_rx) self.SNR, self.rate = self.mimo_model.Calc_Rate(self.SF_time, np.array([rbs, 0])) # rkbeam_vec, tbeam_vec ) self.cur_rate = self.rate self.cur_dist = np.sqrt((ue_xloc-ue_xdest)**2 + (ue_yloc-ue_ydest)**2) #x**2 + y**2 self.state = np.array([new_ue_xloc, new_ue_yloc]) / self.high_obs rwd, done = self._gameover(rbs, self.mimo_model.az_aod) #self.rate = 1e3*self.rate new_ue_xndx = np.where(self.ue_xloc ==new_ue_xloc)[0][0] new_ue_yndx = np.where(self.ue_yloc == new_ue_yloc)[0][0] self.ue_path_rates.append(self.rate) #self.ue_path_rates.append(self.rate) self.ue_path.append(np.array([new_ue_xloc, new_ue_yloc])) self.steps_done += 1 #rwd = self._reward(prev_dist) #print("[uav_env] rwd: {}".format(rwd)) return self.state, rwd, done, {}
def get_Los_Rate(self, state): state = np.rint(state * self.high_obs) ue_xloc, ue_yloc = state sc_xyz = np.array([]) ch_model = 'fsp' ue_pos = np.array([ue_xloc, ue_yloc, 0]) mimo_model = MIMO(ue_pos, self.gNB[0], sc_xyz, ch_model, self.ptx, self.N_tx, self.N_rx) SNR, rate = mimo_model.Los_Rate() # rkbeam_vec, tbeam_vec ) #rate = 1e3 * rate return SNR, rate
def step(self, action): # check the legal move first and then return its reward # if action in self.actions[self.current_state]: #assert self.action_space.contains(action), "%r (%s) invalid" % (action, type(action)) #rkbeam_vec, delta_p = action[:self.K], action[self.K][0] #rkbeam_vec = action[:] #tbeam_vec = [self.TB_r] #changing only receiver beam based on the action #prev_ue = self.cur_ue #self.cur_ue[0] += self.ue_v + delta_p #prev_obs = self.obs[:] #prev_obs[0] += delta_p self.ue_s[0] += self.ue_v #+ delta_p self.obs[1] = np.array(action) self.obs[0] = np.array(self.ue_s) #self.TB_r = get_TBD(self.ue_s, self.alpha) #RIM = self.Compute_RIM(prev_obs, kbeam_vec) #rssi_gnb1 = RIM[:self.K] #best_rssi_val = np.max(rssi_gnb1) #print('[RF_Env] UE_S: ', self.ue_s) self.mimo_model = MIMO(self.ue_s, self.gNB[0], self.sc_xyz, self.ch_model, self.ptx, self.N_tx, self.N_rx) self.SNR, self.rate = self.mimo_model.Calc_Rate(self.SF_time, np.array([self.obs[1][0], 0]))#rkbeam_vec, tbeam_vec ) #print("[RF_Env] SNR: {0}, rate: {1}".format(20*np.log10(self.SNR), self.rate)) #self.mimo_los_model = MIMO(self.ue_s, self.gNB[0], self.ptx, self.N_tx, self.N_rx) #print("[RF_Env] Los_SNR: {0}, Los_rate: {1}".format(20 * np.log10(self.Los_SNR), self.Los_rate)) #self.mimo_los_model = MIMO(self.obs, self.gNB[0], self.ptx, self.N_tx, self.N_rx) #self.Los_SNR, self.LoS_rate = self.mimo_los_model.Los_Rate(SF_time, self.K) #print("[RF_Env] pos_corr: {0}, Rate: {1}".format(delta_p,self.rate)) self.cum_rate += self.rate #self.Los_rate = self.get_LoS_Rate(self.ue_s) #self.cum_Los_rate += self.Los_rate self.count += 1 rwd = self.Reward() done = self.Game_Over() #self.obs[0] += self.ue_v #done = self.Game_Over() #self.cur_ue = prev_ue return self.obs, rwd, done
def get_Exh_Rate(self, state): dist, ue_ang, rbd = (state*self.high_obs) ue_pos = np.array(sph2cart(ue_ang, 0, dist)) # ue_pos is(x,y) mimo_exh_model = MIMO(ue_pos, self.gNB[0], self.sc_xyz, self.ch_model, self.ptx, self.N_tx, self.N_rx) #rbeam_vec = self.BeamSet#Generate_BeamDir(self.N) exh_SNR = [] exh_rates = [] for rbeam in self.BeamSet:#rbeam_vec: SNR, rate = mimo_exh_model.Calc_Rate(self.SF_time, np.array([rbeam, 0])) exh_SNR.append(SNR) exh_rates.append(rate) best_rbeam_ndx = np.argmax(exh_rates) return self.BeamSet[best_rbeam_ndx], np.max(exh_rates) #(Best RBS, Best Rate)
def get_Exh_Rate(self, ue_s): #print("[RF_Env] obs: {0}".format(self.obs)) #print("[RF_Env] TB_r: {0}".format(self.TB_r)) self.mimo_exh_model = MIMO(ue_s, self.gNB[0], self.sc_xyz, self.ch_model, self.ptx, self.N_tx, self.N_rx) #print("[Env]: exh h: {0}".format(self.mimo_exh_model.channel.pathloss)) rbeam_vec = Generate_BeamDir(self.N) tbeam_vec = Generate_BeamDir(self.N) exh_SNR = [] exh_rates=[] for rbeam in rbeam_vec: SNR, rate = self.mimo_exh_model.Calc_Rate(self.SF_time, np.array([rbeam[0], 0])) exh_SNR.append(SNR) exh_rates.append(rate) best_rbeam_ndx = np.argmax(exh_rates) return rbeam_vec[best_rbeam_ndx], np.max(exh_rates)
def Compute_RIM(self, ue, omega_vec): #compute rssi information of each ue_x and gnB location, forming a 3kx1 nd.array self.mimo_models = [] RIM = np.zeros(3*self.K) #(3k, 1), k set of receive beams each instant for i in range(len(self.gNB)): self.mimo_models.append(MIMO(ue, self.gNB[i], self.ptx, self.N_tx, self.N_rx)) RIM[i*self.K: (i+1)*self.K] = self.mimo_models[i].Compute_RSSI(omega_vec, self.TB_r) return RIM
class RFBeamEnv: #metadata = {'render.modes': ['human']} def __init__(self, sc_xyz, ch_model): self.N_tx = 1 # Num of transmitter antenna elements self.N_rx = 8 # Num of receiver antenna elements self.count = 0 self.ptx = 30 #dB self.SF_time = 20 #msec - for 60KHz carrier frequency in 5G self.alpha = 0 #self.level = 1 #self.state = None # (x1,y1,z1) of UE_source location self.ue_s = None#[10,15,0] self.ue_v = None#10 #self.ue_tdist = 90 #self.cur_ue = None #gNB locations One Serving gNB_1 node, 2 visible gnB_2,gNB_3 nodes self.gNB = [[0,0,0], [20,30,0], [40,60,0]] self.sc_xyz= sc_xyz self.ch_model= ch_model #Action space parameters: |A| = 8C4 x |delta_p| =70 x 5 = 350 #self.Actions = { # 'K': 4, #Beam_set length # 'N': 4, #Overall beam directions # 'delta_p': [0,-1,+1,-2,+2] #position control space along x-direction #} #self.K = self.Actions['K'] #Beam_set length #self.N = self.Actions['N'] #Overall beam directions self.N = self.N_rx #Overall beam directions #self.delta_p = self.Actions['delta_p'] #position control space along x-direction self.BeamSet = Generate_BeamDir(self.N) #Set of all beam directions #self.beta = math.pi/(2*(self.N-1)) # Beamwidth beta, 0 < beta <= (pi / (N - 1)) #State of the system - UE_t w.r.t gnB_1 #self.obs_space = [[x,self.ue_s[1],self.ue_s[2]] for x in range(self.ue_s[0],self.ue_s[0]+self.ue_tdist,self.ue_v)] #Observation - RSSI information of states self.obs = None self.rate = None self.goal_diff = None#None #Action-Observation Mapping (Q_table) #self.observation_values = Custom_Space_Mapping(self.Observations) #self.rev_observation_values = dict((v[0], k) for k, v in self.observation_values.items()) #self.num_observations = len(self.observation_values.keys()) #self.min_state = self.observation_values[0][0] # minimum SNR state #self.action_values = Custom_Space_Mapping(self.Actions) #self.num_actions = len(self.action_values.keys()) #self.action_space = spaces.Discrete(self.num_actions) #I have to define this #self.observation_space = spaces.Discrete(self.num_observations) #I can avoid this # self.max_state = self.observation_values[self.num_observations-1][0]#maximum SNR state self.rate_threshold = 0.7 # good enough QoS val (Rate) #self.seed() #self.viewer = None def GenAction_Sample(self): #delta_p = self.Actions['delta_p'] #rp_ndx = np.random.randint(0, len(delta_p)) #rdelta_p = delta_p[rp_ndx] rk_beams = Gen_RandomBeams(1, self.N) #rk_beams.append([rdelta_p]) #str_rk_beams = str(rk_beams) #rk_beams = eval(str_rk_beams) return rk_beams ''' state, reward, done, {} - step(action) - A basic function prototype of an Env class under gym - This function is called every time, the env needs to be updated into a new state based on the applied action Parameters: action - the action tuple applied by the RL agent on Env current state Output: state - The new/update state of the environment reward - Env reward to RL agent, for the given action done - bool to check if the environment goal state is reached {} - empty set ''' def step(self, action): # check the legal move first and then return its reward # if action in self.actions[self.current_state]: #assert self.action_space.contains(action), "%r (%s) invalid" % (action, type(action)) #rkbeam_vec, delta_p = action[:self.K], action[self.K][0] #rkbeam_vec = action[:] #tbeam_vec = [self.TB_r] #changing only receiver beam based on the action #prev_ue = self.cur_ue #self.cur_ue[0] += self.ue_v + delta_p #prev_obs = self.obs[:] #prev_obs[0] += delta_p self.ue_s[0] += self.ue_v #+ delta_p self.obs[1] = np.array(action) self.obs[0] = np.array(self.ue_s) #self.TB_r = get_TBD(self.ue_s, self.alpha) #RIM = self.Compute_RIM(prev_obs, kbeam_vec) #rssi_gnb1 = RIM[:self.K] #best_rssi_val = np.max(rssi_gnb1) #print('[RF_Env] UE_S: ', self.ue_s) self.mimo_model = MIMO(self.ue_s, self.gNB[0], self.sc_xyz, self.ch_model, self.ptx, self.N_tx, self.N_rx) self.SNR, self.rate = self.mimo_model.Calc_Rate(self.SF_time, np.array([self.obs[1][0], 0]))#rkbeam_vec, tbeam_vec ) #print("[RF_Env] SNR: {0}, rate: {1}".format(20*np.log10(self.SNR), self.rate)) #self.mimo_los_model = MIMO(self.ue_s, self.gNB[0], self.ptx, self.N_tx, self.N_rx) #print("[RF_Env] Los_SNR: {0}, Los_rate: {1}".format(20 * np.log10(self.Los_SNR), self.Los_rate)) #self.mimo_los_model = MIMO(self.obs, self.gNB[0], self.ptx, self.N_tx, self.N_rx) #self.Los_SNR, self.LoS_rate = self.mimo_los_model.Los_Rate(SF_time, self.K) #print("[RF_Env] pos_corr: {0}, Rate: {1}".format(delta_p,self.rate)) self.cum_rate += self.rate #self.Los_rate = self.get_LoS_Rate(self.ue_s) #self.cum_Los_rate += self.Los_rate self.count += 1 rwd = self.Reward() done = self.Game_Over() #self.obs[0] += self.ue_v #done = self.Game_Over() #self.cur_ue = prev_ue return self.obs, rwd, done ''' game_state = game_over(s) - Function to check if the agent has reached its goal in the environment Parameters: s - current state of the environment Output: game_state { False if s < goal_state True if s = goal_state ''' #Define the reward function here def Reward(self): #if rate <= prev_rate:#>= self.rate_threshold: #if self.cum_rate >= (self.count*self.rate_threshold): #los_rate = self.get_LoS_Rate(self.ue_s) #done = self.Game_Over() #if done and (self.cum_rate >= self.count*self.rate_threshold): # return 1#self.rate #else: # return 0 #if self.rate >= self.rate_threshold:#done and (self.cum_rate >= self.count*self.rate_threshold): # return 1 #elif self.rate < self.rate_threshold: # return -1 #else: # return 0 #-0.5 #if self.rate >= los_rate: # return los_rate #if self.cum_rate >= self.cum_Los_rate: #if self.cum_rate >= (self.count*self.rate_threshold): return self.rate #else: # return 0 def Game_Over(self): #print("gameover: obs {0}, {1}".format(self.obs[0], self.ue_s[0] + self.ue_tdist)) #print("[Env] ue_s: {0}, goal: {1}".format(self.ue_s, self.goal)) #return (self.ue_s == self.goal)#(self.obs[0] == (self.ue_s[0]+self.ue_tdist)) return np.array_equal(self.ue_s, self.goal) #self.cum_rate += self.rate #self.cum_Los_rate += self.Los_rate #return #np.around(self.Los_rate-self.rate, decimals=4) <= self.goal_diff)#np.round(self.cum_Los_rate/self.cum_rate) <= self.rate_ratio) ''' reset() - Resets the environment to its default values - Prototype of the gym environment class ''' def reset(self, ue, vel): # Note: should be a uniform random value between starting 4-5 SNR states #self.TB_r = get_TBD(ue, self.alpha)#Gen_RandomBeams(1, self.N)[0] # one random TX beam self.RB_r = Gen_RandomBeams(1, self.N) # one random RX beam # print(self.TB_r) self.obs = [np.array(ue), self.RB_r] self.ue_s = np.array(ue) self.ue_v = vel self.count = 0 self.rate = 0 self.cum_rate = 0 self.cum_Los_rate = 0 return np.array(self.obs) def set_goal(self, ue_d): self.goal = np.array(ue_d) return def set_velocity(self, vel): self.ue_v = vel return def set_rate_threshold(self, rate_th): self.rate_threshold = rate_th return def get_Rate(self): return self.rate def get_LoS_Rate(self, ue_s): sc_xyz = np.array([]) ch_model = 'fsp' mimo_model = MIMO(ue_s, self.gNB[0], sc_xyz, ch_model, self.ptx, self.N_tx, self.N_rx) #print("[Env]: LoS h: {0}".format(mimo_model.channel.pathloss)) Los_SNR, Los_rate = mimo_model.Los_Rate() return Los_rate def get_Exh_Rate(self, ue_s): #print("[RF_Env] obs: {0}".format(self.obs)) #print("[RF_Env] TB_r: {0}".format(self.TB_r)) self.mimo_exh_model = MIMO(ue_s, self.gNB[0], self.sc_xyz, self.ch_model, self.ptx, self.N_tx, self.N_rx) #print("[Env]: exh h: {0}".format(self.mimo_exh_model.channel.pathloss)) rbeam_vec = Generate_BeamDir(self.N) tbeam_vec = Generate_BeamDir(self.N) exh_SNR = [] exh_rates=[] for rbeam in rbeam_vec: SNR, rate = self.mimo_exh_model.Calc_Rate(self.SF_time, np.array([rbeam[0], 0])) exh_SNR.append(SNR) exh_rates.append(rate) best_rbeam_ndx = np.argmax(exh_rates) return rbeam_vec[best_rbeam_ndx], np.max(exh_rates)
class UAV_Env_v2(gym.Env): """ Description: A UAV moves in a region around the base station. The problem is to provide the UAV with best possible QoS over N steps Observation: Type: Box(3,) Num Observation Min Max 0 distance (D) -100.0 100.0 1 TBD 0.0 3.14159 2 RBD 0.0 3.14159 Action: Type:Discrete(Nrx) Num Action 0 Bdir 0 1 Bdir 1 ... .... Nrx-1 Bdir {Nrx-1} Reward: Reward is rate value computed for every step taken, including the termination step. Rate value measured is [0.0, 4.0] Starting State: All observations are assigned a uniform random value in their respective Min Max range Episode Termination: When UAV makes N hops or N steps from the starting state """ def __init__(self): self.N_tx = 1 # Num of transmitter antenna elements self.N_rx = 8 # Num of receiver antenna elements self.count = 0 self.ptx = 30 #dB self.SF_time = 20 #msec - for 60KHz carrier frequency in 5G self.alpha = 0 # (x1,y1,z1) of UE_source location self.ue_s = None#[10,15,0] #self.ue_v = None self.gNB = np.array([[0,0,0]])#, [20,30,0], [40,60,0]] self.sc_xyz= np.array([]) self.ch_model= 'fsp' self.N = self.N_rx #Overall beam directions self.BeamSet = Generate_BeamDir(self.N) #Set of all beam directions #Observation - RSSI information of states self.state = None self.rate = None self.rate_threshold = None # good enough QoS val (Rate) self.Nhops = 5 #UE information self.ue_step = 50 self.ue_xloc = np.arange(-500, 550, 50) #10 locs #self.ue_xloc = np.delete(self.ue_xloc, np.argwhere(self.ue_xloc == 0)) #remove (0,0) from ue_xloc self.ue_yloc = np.arange(50,550, 50) #5 locs #self.ue_yloc = np.delete(self.ue_yloc, np.argwhere(self.ue_yloc == 0)) # remove (0,0) from ue_xloc self.ue_vx = np.array([50,100]) #3 speed parameters self.ue_vy = np.array([50,100]) #3 speed parameters self.ue_xdest = np.array([np.min(self.ue_xloc)]) # 1 x-dest loc np.min(self.ue_xloc) self.ue_ydest = np.array([np.min(self.ue_yloc)]) # 1 y-dest loc self.ue_xsrc = np.array([np.max(self.ue_xloc)]) # 1 source x-loc self.ue_ysrc = np.array([np.max(self.ue_yloc)]) # 1 source y-loc self.ue_moves = np.array(['L', 'R', 'U', 'D']) # moving direction of UAV self.seed() #low_obs = np.array([-500, 0, 0.0, 10.0, 10.0]) self.high_obs = np.array([np.max(self.ue_xloc), np.max(self.ue_yloc)]) self.obs_space = spaces.MultiDiscrete([len(self.ue_xloc), #ue_xloc len(self.ue_yloc), #ue_yloc ]) self.act_space = spaces.Discrete(self.N*len(self.ue_moves)) #n(RBD)*n(ue_xvel)*n(ue_yvel) def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def step(self, action): assert self.act_space.contains(action), "%r (%s) invalid" % (action, type(action)) state = np.rint(self.state * self.high_obs) rbd_ndx, ue_mv_ndx = self.decode_action(action) ue_vx, ue_vy = self.choose_vel(ue_mv_ndx) rbs = self.BeamSet[rbd_ndx] ue_xdest = self.ue_xdest[0] ue_ydest = self.ue_ydest[0] ue_xloc, ue_yloc = state ue_mv = self.ue_moves[ue_mv_ndx] if ue_mv == 'L': new_ue_xloc = max(ue_xloc + ue_vx, np.min(self.ue_xloc)) new_ue_yloc = ue_yloc + ue_vy if ue_mv == 'U': new_ue_xloc = ue_xloc + ue_vx new_ue_yloc = min(ue_yloc + ue_vy, np.max(self.ue_yloc)) if ue_mv == 'R': new_ue_xloc = min(ue_xloc + ue_vx, np.max(self.ue_xloc)) new_ue_yloc = ue_yloc + ue_vy if ue_mv == 'D': new_ue_xloc = ue_xloc + ue_vx new_ue_yloc = max(ue_yloc + ue_vy, np.min(self.ue_yloc)) new_ue_pos = np.array([new_ue_xloc, new_ue_yloc, 0]) self.mimo_model = MIMO(new_ue_pos, self.gNB[0], self.sc_xyz, self.ch_model, self.ptx, self.N_tx, self.N_rx) self.SNR, self.rate = self.mimo_model.Calc_Rate(self.SF_time, np.array([rbs, 0])) # rkbeam_vec, tbeam_vec ) self.cur_rate = self.rate self.cur_dist = np.sqrt((ue_xloc-ue_xdest)**2 + (ue_yloc-ue_ydest)**2) #x**2 + y**2 self.state = np.array([new_ue_xloc, new_ue_yloc]) / self.high_obs rwd, done = self._gameover(rbs, self.mimo_model.az_aod) #self.rate = 1e3*self.rate new_ue_xndx = np.where(self.ue_xloc ==new_ue_xloc)[0][0] new_ue_yndx = np.where(self.ue_yloc == new_ue_yloc)[0][0] self.ue_path_rates.append(self.rate) #self.ue_path_rates.append(self.rate) self.ue_path.append(np.array([new_ue_xloc, new_ue_yloc])) self.steps_done += 1 #rwd = self._reward(prev_dist) #print("[uav_env] rwd: {}".format(rwd)) return self.state, rwd, done, {} def reset(self, rate_thr): # Note: should be a uniform random value between starting 4-5 SNR states #self.TB_r = get_TBD(ue, self.alpha)#Gen_RandomBeams(1, self.N)[0] # one random TX beam #state_indices = self.obs_space.sample() xloc_ndx, yloc_ndx = self.obs_space.sample() #Start from a fixed start location self.state = np.array([self.ue_xloc[xloc_ndx], self.ue_yloc[yloc_ndx] ]) #self.state = np.array([self.ue_xsrc[0], # self.ue_ysrc[0] # ]) self.steps_done = 0 self.rate = 0.0 self.cur_dist = np.Inf self.cur_rate = 0.0 self.ue_path = [] self.ue_path.append(self.state) self.ue_xsrc = self.state[0] self.ue_ysrc = self.state[1] self.ue_path_rates = [] #self.ue_path_rates = [] #Computing the rate threshold for the given destination #ue_dest = np.array([self.ue_xloc[xloc_ndx], self.ue_yloc[yloc_ndx], 0]) #dest_mimo_model = MIMO(ue_dest, self.gNB[0], self.sc_xyz, self.ch_model, self.ptx, self.N_tx, self.N_rx) #dest_SNR = [] #dest_rates = [] #for rbeam in self.BeamSet: # rbeam_vec: # SNR, rate = dest_mimo_model.Calc_Rate(self.SF_time, np.array([rbeam, 0])) # dest_SNR.append(SNR) # dest_rates.append(rate) self.rate_threshold = rate_thr #np.max(dest_rates) self.state = self.state / self.high_obs #self.state = self.state.reshape((1, len(self.state))) return self.state def render(self, mode='human', close=False): #fig, ax = plt.subplots(1, 1, figsize=(10, 10)) #x_axis = [x[0] for x in self.ue_path] #y_axis = [x[1] for x in self.ue_path] #z_axis = self.ue_path_rates #plt.plot(x_axis, y_axis) #plt.show() from matplotlib.path import Path import matplotlib.patches as patches verts = [(int(x[0]),int(x[1])) for x in self.ue_path] #print(self.ue_path) #print(verts) codes = [Path.LINETO for x in range(len(verts))] codes[0] = Path.MOVETO codes[-1] = Path.STOP path = Path(verts, codes) fig = plt.figure(figsize=(10,10)) ax = fig.add_subplot(111) patch = patches.PathPatch(path, facecolor='none', lw=2) ax.add_patch(patch) xs, ys = zip(*verts) ax.plot(xs, ys, 'x--', lw=2, color='black') #xdisplay, ydisplay = ax.transData.transform_point((self.ue_xsrc, self.ue_ysrc)) bbox = dict(boxstyle="round", fc="0.8") arrowprops = dict( arrowstyle="->", connectionstyle="angle,angleA=0,angleB=90,rad=10") offset = 40 ax.annotate('Src = (%d, %d)' % (self.ue_xsrc, self.ue_ysrc), (self.ue_xsrc, self.ue_ysrc), xytext=(-2 * offset, offset), textcoords='offset points', bbox=bbox, arrowprops=arrowprops) ax.annotate('Dest = (%d, %d)' % (self.ue_xdest[0], self.ue_ydest[0]), (self.ue_xdest[0], self.ue_ydest[0]), xytext=(0.5 * offset, -offset), textcoords='offset points', bbox=bbox, arrowprops=arrowprops) offset= 10 bbox =dict(boxstyle="round", facecolor='yellow', edgecolor='none') for i in range(0,len(self.ue_path_rates)): ax.annotate('%.2f' % np.around(self.ue_path_rates[i], decimals=2), (verts[i+1][0], verts[i+1][1]), xytext=(-2 * offset, offset), textcoords='offset points', bbox=bbox, arrowprops=arrowprops) ax.grid() ax.set_xticks(self.ue_xloc) ax.set_yticks(self.ue_yloc) ax.set_title("UAV graph w.r.t gNB [0,0,0]") ax.set_xlabel("X direction") ax.set_ylabel("Y direction") plt.show() return #Not using this function def _reward(self, prev_dist): #bf_condn = False #if ((prev_rate >= self.rate) and (prev_dist <= cur_dist)) or ((prev_rate <= self.rate) and (prev_dist >= cur_dist)): # bf_condn = True #if (self.rate > self.rate_threshold) and (bf_condn is True): # return 10*(self.rate-self.rate_threshold)+8#10+ self.rate-self.rate_threshold-1 #elif (self.rate > self.rate_threshold) and (bf_condn is False): # return 3 #else: # return -3 ue_dist = np.sqrt((self.state[0]-self.ue_xdest) ** 2 + (self.state[1]-self.ue_ydest) ** 2) #ue_dest_dist = np.sqrt(self.state[0][-2]**2 + self.state[0][-1]**2) if (self.rate >= self.rate_threshold) and (ue_dist <= prev_dist): return 10*self.rate + 3 else: return 0.0#10*self.rate - 3 def dest_check(self): reached = False state = np.rint(self.state * self.high_obs) next_dist = np.sqrt((state[0] - self.ue_xdest[0]) ** 2 + (state[1] - self.ue_ydest[0]) ** 2) if next_dist < 50: reached = True return reached def _gameover(self, aoa, aod): #prev_dist, curr_rate): #ue_dist = np.sqrt(self.state[0][0]**2 + self.state[0][1]**2) #ue_dest_dist = np.sqrt(self.state[0][-2]**2 + self.state[0][-1]**2) #return ue_dist >= ue_dest_dist state = np.rint(self.state * self.high_obs) next_dist = np.sqrt((state[0] - self.ue_xdest[0]) ** 2 + (state[1] - self.ue_ydest[0]) ** 2) ang_1 = 3.14 - np.around(np.pi/self.N,decimals=2) ang_2 = 3.14 + np.around(np.pi/self.N,decimals=2) ang_3 = 0#2*3.14 - np.around(np.pi/self.N,decimals=2) ang_4 = np.around(np.pi/self.N,decimals=2)#2*3.14 #if (next_dist < 50) and (ang_1 < np.around(aod-aoa, decimals=2) < ang_2): #elif (next_dist < 50) and (ang_3 < np.around(aod-aoa, decimals=2) < ang_4): # rwd = 2.0#3.1#2.1#self.rate + 2.0#2000.0 # done = True #elif (ang_1 < np.around(aod-aoa, decimals=2) < ang_2): #(ang_1 < np.around(aod-aoa, decimals=2) < ang_2) and if (self.dest_check()) and (self.rate >= self.rate_threshold): rwd = 1.0#3.1#2.1#self.rate + 2.0#2000.0 done = True elif self.dest_check(): rwd = -1.0 done = True elif (self.rate >= self.rate_threshold): rwd = 1.0*np.exp(-1*(self.steps_done-1)/50) *np.log10(self.rate+1)# np.exp(self.rate/50)#1.0#self.rate+1.0#self.rate + 2.0 #10*np.log10(val+1) + 2.0 done = False #elif (ang_3 < np.around(aod-aoa, decimals=2) < ang_4): #(self.rate >= self.rate_threshold) and # rwd = 1.0 * np.exp(-1 * (self.steps_done - 1) / 10) # 1.0#self.rate+1.0#self.rate + 2.0 #10*np.log10(val+1) + 2.0 # done = False else: rwd = -1.0#-self.rate-1.0#-self.rate -2.0#-20.0 done = False self.aoa = aoa self.aod = aod return rwd, done def decode_action(self, action_ndx): #ue_vy_ndx = action_ndx % len(self.ue_vy) #action_ndx = action_ndx // len(self.ue_vy) #ue_v_ndx = action_ndx % len(self.ue_vx) #action_ndx = action_ndx // len(self.ue_vx) ue_mv_ndx = action_ndx % len(self.ue_moves) action_ndx = action_ndx // len(self.ue_moves) beam_ndx = action_ndx % self.N action_ndx = action_ndx // self.N assert 0<= action_ndx <= self.act_space.n return (beam_ndx, ue_mv_ndx) #Not using this function def encode_action(self, beam_ndx, ue_vx_ndx, ue_vy_ndx): i = beam_ndx i*= self.N i += ue_vx_ndx i*=len(self.ue_vx) i += ue_vy_ndx i*= len(self.ue_vy) return i def choose_vel(self, ue_mv_ndx): ue_mv = self.ue_moves[ue_mv_ndx] if ue_mv == 'L': #move left ue_vx = -1 * self.ue_vx[0] ue_vy = 0 elif ue_mv == 'U': #move up ue_vx = 0 ue_vy = self.ue_vy[0] elif ue_mv == 'D': #move down ue_vx = 0 ue_vy = -1*self.ue_vy[0] else: #move right ue_vx = self.ue_vx[0] ue_vy = 0 return ue_vx, ue_vy def get_Los_Rate(self, state): state = np.rint(state * self.high_obs) ue_xloc, ue_yloc = state sc_xyz = np.array([]) ch_model = 'fsp' ue_pos = np.array([ue_xloc, ue_yloc, 0]) mimo_model = MIMO(ue_pos, self.gNB[0], sc_xyz, ch_model, self.ptx, self.N_tx, self.N_rx) SNR, rate = mimo_model.Los_Rate() # rkbeam_vec, tbeam_vec ) #rate = 1e3 * rate return SNR, rate def get_Exh_Rate(self, state): state = np.rint(state * self.high_obs) ue_xloc, ue_yloc = state ue_pos = np.array([ue_xloc, ue_yloc,0]) mimo_exh_model = MIMO(ue_pos, self.gNB[0], self.sc_xyz, self.ch_model, self.ptx, self.N_tx, self.N_rx) #rbeam_vec = self.BeamSet#Generate_BeamDir(self.N) exh_SNR = [] exh_rates = [] for rbeam in self.BeamSet:#rbeam_vec: SNR, rate = mimo_exh_model.Calc_Rate(self.SF_time, np.array([rbeam, 0])) #rate = 1e3 * rate exh_SNR.append(SNR) exh_rates.append(rate) best_rbeam_ndx = np.argmax(exh_rates) #print("[UAV_Env]: AOD: {}, AoA: {}, AoD-AoA: {}".format(mimo_exh_model.channel.az_aod[0], self.BeamSet[best_rbeam_ndx], -self.BeamSet[best_rbeam_ndx]+mimo_exh_model.channel.az_aod[0])) return self.BeamSet[best_rbeam_ndx], np.max(exh_rates) #(Best RBS, Best Rate) def get_Rate(self): return self.rate
class UAV_Env(gym.Env): """ Description: A UAV moves in a region around the base station. The problem is to provide the UAV with best possible QoS over N steps Observation: Type: Box(3,) Num Observation Min Max 0 distance (D) -100.0 100.0 1 TBD 0.0 3.14159 2 RBD 0.0 3.14159 Action: Type:Discrete(Nrx) Num Action 0 Bdir 0 1 Bdir 1 ... .... Nrx-1 Bdir {Nrx-1} Reward: Reward is rate value computed for every step taken, including the termination step. Rate value measured is [0.0, 4.0] Starting State: All observations are assigned a uniform random value in their respective Min Max range Episode Termination: When UAV makes N hops or N steps from the starting state """ def __init__(self): self.N_tx = 1 # Num of transmitter antenna elements self.N_rx = 8 # Num of receiver antenna elements self.count = 0 self.ptx = 30 #dB self.SF_time = 20 #msec - for 60KHz carrier frequency in 5G self.alpha = 0 # (x1,y1,z1) of UE_source location self.ue_s = None#[10,15,0] self.ue_v = 10 self.gNB = np.array([[0,0,0]])#, [20,30,0], [40,60,0]] self.sc_xyz= np.array([]) self.ch_model= 'fsp' self.N = self.N_rx #Overall beam directions self.BeamSet = Generate_BeamDir(self.N) #Set of all beam directions #Observation - RSSI information of states self.state = None self.rate = None self.rate_threshold = 0.07 # good enough QoS val (Rate) self.Nhops = 5 self.seed() low_obs = np.array([30.0, 0.0, 0.0]) self.high_obs = np.array([100.0, 3.14159, 3.14159]) self.obs_space = spaces.Box(low=low_obs,high=self.high_obs) self.act_space = spaces.Discrete(self.N) def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def step(self, action): assert self.act_space.contains(action), "%r (%s) invalid" % (action, type(action)) state = self.state * self.high_obs dist, ue_ang, rbd = state rbs = self.BeamSet[action] ue_pos = np.array(sph2cart(ue_ang, 0, dist)) #ue_pos is(x,y) ue_pos[0] += self.ue_v new_dist = np.sqrt(ue_pos[0]**2 + ue_pos[1]**2) #x**2 + y**2 new_ang = np.arctan2(ue_pos[1],ue_pos[0]) self.state = np.array([new_dist, new_ang, rbs]) / self.high_obs self.mimo_model = MIMO(ue_pos, self.gNB[0], self.sc_xyz, self.ch_model, self.ptx, self.N_tx, self.N_rx) prev_rate = self.rate prev_dist = dist self.SNR, self.rate = self.mimo_model.Calc_Rate(self.SF_time, np.array([rbs, 0]))#rkbeam_vec, tbeam_vec ) self.steps_done += 1 rwd = self._reward(prev_rate, prev_dist, new_dist) #print("[uav_env] rwd: {}".format(rwd)) done = self._gameover() return self.state, rwd, done, {} def reset(self): # Note: should be a uniform random value between starting 4-5 SNR states #self.TB_r = get_TBD(ue, self.alpha)#Gen_RandomBeams(1, self.N)[0] # one random TX beam dist = np.random.uniform(low=30.0, high=50.0)#self.np_random.uniform(low=30.0, high=50.0) TBD = np.random.uniform(low=0.0, high=3.14159)#self.np_random.uniform(low=0.0, high=3.14159) RBD = Gen_RandomBeams(1, self.N) # one random RX beam self.state = np.array([dist, TBD, RBD]) / self.high_obs self.steps_done = 0 self.rate = 0 return np.array(self.state) def render(self, mode='human', close=False): pass def _reward(self, prev_rate, prev_dist, cur_dist): #bf_condn = False #if ((prev_rate >= self.rate) and (prev_dist <= cur_dist)) or ((prev_rate <= self.rate) and (prev_dist >= cur_dist)): # bf_condn = True #if (self.rate > self.rate_threshold) and (bf_condn is True): # return 10*(self.rate-self.rate_threshold)+8#10+ self.rate-self.rate_threshold-1 #elif (self.rate > self.rate_threshold) and (bf_condn is False): # return 3 #else: # return -3 #(az_aod, temp, temp) = cart2sph(rx[0] - tx[0], rx[1] - tx[1], rx[2] - tx[2]) #if az_aod == rb_ang: # return 1 #else: # return 0 #val = np.abs(az_aod-rb_ang) #print("[uav_env] val: {}, az_aod: {}, rbs: {}", val, az_aod, rb_ang) #if (val >= (np.pi)): # return 1+ np.log10((2*val/(np.pi))-1) #1+log10(2x/pi -1) #else: # return 0 if(self.rate > self.rate_threshold): return 10*self.rate +3 #np.exp(self.rate-self.rate_threshold-1) else: return 0#10*self.rate -3 def _gameover(self): return (self.steps_done == self.Nhops) def get_Los_Rate(self, state): dist, ue_ang, rbd = (state * self.high_obs) ue_pos = np.array(sph2cart(ue_ang, 0, dist)) # ue_pos is(x,y) sc_xyz = np.array([]) ch_model = 'fsp' mimo_model = MIMO(ue_pos, self.gNB[0], sc_xyz, ch_model, self.ptx, self.N_tx, self.N_rx) SNR, rate = mimo_model.Los_Rate() # rkbeam_vec, tbeam_vec ) return SNR, rate def get_Exh_Rate(self, state): dist, ue_ang, rbd = (state*self.high_obs) ue_pos = np.array(sph2cart(ue_ang, 0, dist)) # ue_pos is(x,y) mimo_exh_model = MIMO(ue_pos, self.gNB[0], self.sc_xyz, self.ch_model, self.ptx, self.N_tx, self.N_rx) #rbeam_vec = self.BeamSet#Generate_BeamDir(self.N) exh_SNR = [] exh_rates = [] for rbeam in self.BeamSet:#rbeam_vec: SNR, rate = mimo_exh_model.Calc_Rate(self.SF_time, np.array([rbeam, 0])) exh_SNR.append(SNR) exh_rates.append(rate) best_rbeam_ndx = np.argmax(exh_rates) return self.BeamSet[best_rbeam_ndx], np.max(exh_rates) #(Best RBS, Best Rate) def get_Rate(self): return self.rate
class UAV_Env_v3(gym.Env): """ Description: A UAV moves in a region within the coverage area of the base station. The objective of the problem is to guide UAV (using gNB)in a rate requirement path, reaching the destination in an energy minimized way as early as possible Observation: Type: MultiDiscrete(2,) Num Observation Min Max Step 1 UAV_xloc -500.0 500.0 50.0 2 UAV_yloc -500.0 500.0 50.0 Action: Type:Discrete(Nrx*num(uav_moves)) Num Action 0 Bdir0, mov0 1 Bdir0, mov1 2 Bdir0, mov2 3 Bdir0, mov3 4 Bdir 1, ... ... .... (Nrx-1)*uav_moves Bdir{Nrx-1}, mov3 Reward: Reward is value computed based on rate measurements and energy minimization conditions. Range [-1.0, 1.0] Starting State: Obs_space.sample() - Any random location with the Observation range Episode Termination: When UAV reaches the defined destination D """ def __init__(self): #Antenna Modelling #Uniform Linear Arrays (ULA) antenna modelling is considered self.N_tx = 8 # Num of transmitter antenna elements self.N_rx = 8 # Num of receiver antenna elements self.count = 0 self.ptx = 30 #dB self.SF_time = 20 #msec - for 60KHz carrier frequency in 5G self.alpha = 0 #Base Statin Locations self.gNB = np.array([[0, 0, 0]]) #, [20,30,0], [40,60,0]] #Channel self.sc_xyz = np.array([]) self.ch_model = 'uma-los' self.N = self.N_rx #Overall beam directions self.BeamSet = Generate_BeamDir(self.N) #Set of all beam directions #Observation - RSSI information of states self.state = None self.rate = None self.rate_threshold = None # good enough QoS val (Rate) #UE information self.ue_step = 50 self.ue_xloc = np.arange(-500, 550, self.ue_step) #10 locs #self.ue_xloc = np.delete(self.ue_xloc, np.argwhere(self.ue_xloc == 0)) #remove (0,0) from ue_xloc self.ue_yloc = np.arange(-500, 550, self.ue_step) #5 locs #self.ue_yloc = np.delete(self.ue_yloc, np.argwhere(self.ue_yloc == 0)) # remove (0,0) from ue_xloc self.ue_vx = np.array([50, 100]) #3 speed parameters self.ue_vy = np.array([50, 100]) #3 speed parameters self.ue_xdest = np.array([np.min(self.ue_xloc) ]) # 1 x-dest loc np.min(self.ue_xloc) self.ue_ydest = np.array([400]) # 1 y-dest loc self.ue_xsrc = np.array([np.max(self.ue_xloc)]) # 1 source x-loc self.ue_ysrc = np.array([np.max(self.ue_yloc)]) # 1 source y-loc self.ue_moves = np.array(['L', 'R', 'U', 'D']) # moving direction of UAV self.seed() #Observation and Action Spaces #low_obs = np.array([-500, 0, 0.0, 10.0, 10.0]) self.high_obs = np.array([np.max(self.ue_xloc), np.max(self.ue_yloc)]) self.obs_space = spaces.MultiDiscrete([ len(self.ue_xloc), #ue_xloc len(self.ue_yloc), #ue_yloc ]) self.act_space = spaces.Discrete( self.N * len(self.ue_moves)) #n(RBD)*n(ue_moves) def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def step(self, action): assert self.act_space.contains( action), "%r (%s) invalid" % (action, type(action)) state = np.rint(self.state * self.high_obs) rbd_ndx, ue_mv_ndx = self.decode_action(action) ue_vx, ue_vy = self.choose_vel(ue_mv_ndx) rbs = self.BeamSet[rbd_ndx] ue_xdest = self.ue_xdest[0] ue_ydest = self.ue_ydest[0] ue_xloc, ue_yloc = state self.cur_dist = np.sqrt( (ue_xloc - ue_xdest)**2 + (ue_yloc - ue_ydest)**2) # x**2 + y**2 self.cur_state = state if self.done: #reached terminal state return self.state, self.rwd, self.done, {} ue_mv = self.ue_moves[ue_mv_ndx] if ue_mv == 'L': new_ue_xloc = max(ue_xloc + ue_vx, np.min(self.ue_xloc)) new_ue_yloc = ue_yloc + ue_vy if ue_mv == 'U': new_ue_xloc = ue_xloc + ue_vx new_ue_yloc = min(ue_yloc + ue_vy, np.max(self.ue_yloc)) if ue_mv == 'R': new_ue_xloc = min(ue_xloc + ue_vx, np.max(self.ue_xloc)) new_ue_yloc = ue_yloc + ue_vy if ue_mv == 'D': new_ue_xloc = ue_xloc + ue_vx new_ue_yloc = max(ue_yloc + ue_vy, np.min(self.ue_yloc)) new_ue_pos = np.array([new_ue_xloc, new_ue_yloc, 0]) #Approximating (0,0) to (20,20) location to prevent rate->Inf if (new_ue_xloc == 0) and (new_ue_yloc == 0): self.mimo_model = MIMO(np.array([40, 40, 0]), self.gNB[0], self.sc_xyz, self.ch_model, self.ptx, self.N_tx, self.N_rx) self.SNR, self.rate = self.mimo_model.Calc_Rate( self.SF_time, np.array([rbs, 0])) # rkbeam_vec, tbeam_vec ) else: self.mimo_model = MIMO(new_ue_pos, self.gNB[0], self.sc_xyz, self.ch_model, self.ptx, self.N_tx, self.N_rx) self.SNR, self.rate = self.mimo_model.Calc_Rate( self.SF_time, np.array([rbs, 0])) # rkbeam_vec, tbeam_vec ) if self.measure == 'rate_thr_path': self.rwd, self.done = self._gameover() elif self.measure == 'rate_path': self.rwd, self.done = self.rate_path_gameover() elif self.measure == 'short_path': self.rwd, self.done = self.short_path_gameover() else: print("Err: Incorrect measure str\n") self.rwd, self.done = -100.0, True self.ue_path_rates.append(self.rate) self.ue_path.append(np.array([new_ue_xloc, new_ue_yloc])) self.cur_rate = self.rate self.prev_dist = self.cur_dist self.state = np.array([new_ue_xloc, new_ue_yloc]) / self.high_obs self.steps_done += 1 return self.state, self.rwd, self.done, {} def beyond_border(self, ue_xpos, ue_ypos): if (ue_xpos == np.min(self.ue_xloc)) or (ue_xpos == np.max( self.ue_xloc)) or (ue_ypos == np.min( self.ue_yloc)) or (ue_ypos == np.max(self.ue_yloc)): return True else: return False def reset(self, rate_thr, meas, state_indices): #state_indices = self.obs_space.sample() #xloc_ndx, yloc_ndx = self.obs_space.sample() xloc_ndx, yloc_ndx = state_indices #Start from a random start location self.state = np.array([self.ue_xloc[xloc_ndx], self.ue_yloc[yloc_ndx]]) #self.state = np.array([self.ue_xsrc[0], # self.ue_ysrc[0] # ]) self.steps_done = 0 self.rate = 0.0 self.ue_path = [] #self.ue_path.append(self.state) self.ue_xsrc = self.state[0] self.ue_ysrc = self.state[1] self.ue_path_rates = [] self.measure = meas self.rwd = 0.0 self.done = False #self.ue_path_rates = [] self.rate_threshold = rate_thr #np.max(dest_rates) self.state = self.state / self.high_obs self.prev_dist = np.Inf #_, self.cur_rate = self.get_Exh_Rate(self.state) self.cur_rate = 0.0 #self.state = self.state.reshape((1, len(self.state))) return self.state def render(self, mode='human', close=False): #fig, ax = plt.subplots(1, 1, figsize=(10, 10)) #x_axis = [x[0] for x in self.ue_path] #y_axis = [x[1] for x in self.ue_path] #z_axis = self.ue_path_rates #plt.plot(x_axis, y_axis) #plt.show() from matplotlib.path import Path import matplotlib.patches as patches verts = [(int(x[0]), int(x[1])) for x in self.ue_path] #print(self.ue_path) #print(verts) codes = [Path.LINETO for x in range(len(verts))] codes[0] = Path.MOVETO codes[-1] = Path.STOP path = Path(verts, codes) fig = plt.figure(figsize=(10, 10)) ax = fig.add_subplot(111) patch = patches.PathPatch(path, facecolor='none', lw=2) ax.add_patch(patch) xs, ys = zip(*verts) ax.plot(xs, ys, 'x--', lw=2, color='black') #xdisplay, ydisplay = ax.transData.transform_point((self.ue_xsrc, self.ue_ysrc)) bbox = dict(boxstyle="round", fc="0.8") arrowprops = dict(arrowstyle="->", connectionstyle="angle,angleA=0,angleB=90,rad=10") offset = 40 ax.annotate('Src = (%d, %d)' % (self.ue_xsrc, self.ue_ysrc), (self.ue_xsrc, self.ue_ysrc), xytext=(-2 * offset, offset), textcoords='offset points', bbox=bbox, arrowprops=arrowprops) ax.annotate('Dest = (%d, %d)' % (self.ue_xdest[0], self.ue_ydest[0]), (self.ue_xdest[0], self.ue_ydest[0]), xytext=(0.5 * offset, -offset), textcoords='offset points', bbox=bbox, arrowprops=arrowprops) offset = 10 bbox = dict(boxstyle="round", facecolor='yellow', edgecolor='none') for i in range(0, len(self.ue_path_rates)): ax.annotate('%.2f' % np.around(self.ue_path_rates[i], decimals=2), (verts[i][0], verts[i][1]), xytext=(-2 * offset, offset), textcoords='offset points', bbox=bbox, arrowprops=arrowprops) ax.grid() ax.set_xticks(self.ue_xloc) ax.set_yticks(self.ue_yloc) ax.set_title("UAV graph w.r.t gNB [0,0,0]") ax.set_xlabel("X direction") ax.set_ylabel("Y direction") plt.show() return def dest_check(self): reached = False #state = self.cur_state#np.rint(self.cur_state * self.high_obs) #curr_dist = np.sqrt((state[0] - self.ue_xdest[0]) ** 2 + (state[1] - self.ue_ydest[0]) ** 2) if self.cur_dist < self.ue_step: reached = True return reached def _gameover(self): #state = np.rint(self.state * self.high_obs) #curr_dist = np.sqrt((state[0] - self.ue_xdest[0]) ** 2 + (state[1] - self.ue_ydest[0]) ** 2) #if (self.dest_check()) and (self.rate >= self.rate_threshold): # rwd = 1.0#3.1#2.1#self.rate + 2.0#2000.0 # done = True if self.dest_check(): rwd = 1.0 * np.log10( 8 * self.rate + 1) #*np.log10(self.rate + 1) #*np.exp(-self.steps_done/100) done = True elif (self.cur_rate >= self.rate_threshold): #and (self.cur_dist < self.prev_dist): #rwd = 1.0*np.exp(-1*(self.steps_done-1)/50)*np.log10(max(21-self.rate,0)+1)#*np.exp(self.rate/10)/20#np.log10(max(21.5-self.rate, 0)+1)/3#*np.log10(self.rate+1)# np.exp(self.rate/50)#1.0#self.rate+1.0#self.rate + 2.0 #10*np.log10(val+1) + 2.0 rwd = 0.6 * np.exp( -self.cur_dist / 1000 ) * np.exp(-2 * (self.steps_done - 1) / 50) * np.log10( 8 * self.rate + 1 ) #*np.exp(-2*(self.steps_done-1)/50)#*np.log10(self.rate + 1)#*np.exp(self.rate/20)#*min(np.exp(self.rate/20), np.exp((self.rate_threshold-self.rate)/20.0))#0.5 * np.exp(-1 * (self.steps_done - 1) / 50) *(1-self.rate/30) #print(rwd) done = False elif (self.cur_dist < self.prev_dist): rwd = 0.8 * np.exp(-self.cur_dist / 1000) * np.exp( -2 * (self.steps_done - 1) / 50) * np.log10( 8 * self.rate + 1 ) #*np.exp(-self.rate/20)#-self.rate-1.0#-self.rate -2.0#-20.0 done = False elif (self.cur_dist > self.prev_dist): #self.cur_rate >= self.rate_threshold) and rwd = 0.2 * np.exp( -self.cur_dist / 1000 ) * np.exp(-2 * (self.steps_done - 1) / 50) * np.log10( 8 * self.rate + 1 ) #*np.log10(self.rate + 1)#*np.exp(self.rate/20)#*min(np.exp(self.rate/20), np.exp((self.rate_threshold-self.rate)/20.0)) done = False #elif (self.beyond_border(self.cur_state[0], self.cur_state[1])): # rwd = 0.3*np.exp(-curr_dist/1000)*np.exp(-2*(self.steps_done-1)/50)*np.log10(8*self.rate + 1) # done = False #elif (self.cur_dist >= self.prev_dist): # rwd = 0.2*np.exp(-self.cur_dist/1000)*np.exp(-2*(self.steps_done-1)/50)*np.log10(8*self.cur_rate + 1)#*np.exp(-self.rate/20)#-self.rate-1.0#-self.rate -2.0#-20.0 # done = False else: rwd = 0.0 #0.2*np.exp(-self.cur_dist/1000)*np.exp(-2*(self.steps_done-1)/50)*np.log10(8*self.rate + 1) done = False #done = False #if self.dest_check(): # done = True #rwd = np.log10(8*self.rate + 1) + np.exp(-self.cur_dist/1000)*np.exp(-2*(self.steps_done-1)/50) return rwd, done #Reward Function for max rate path def rate_path_gameover(self): state = np.rint(self.state * self.high_obs) next_dist = np.sqrt((state[0] - self.ue_xdest[0])**2 + (state[1] - self.ue_ydest[0])**2) if (self.dest_check()): rwd = 1.0 # 3.1#2.1#self.rate + 2.0#2000.0 done = True elif (next_dist < self.cur_dist): # rwd = 1.0*np.exp(-1*(self.steps_done-1)/50)*np.log10(max(21-self.rate,0)+1)#*np.exp(self.rate/10)/20#np.log10(max(21.5-self.rate, 0)+1)/3#*np.log10(self.rate+1)# np.exp(self.rate/50)#1.0#self.rate+1.0#self.rate + 2.0 #10*np.log10(val+1) + 2.0 rwd = 0.5 * np.exp(-1 * (self.steps_done - 1) / 50) * np.exp( self.rate / 20 ) # *min(np.exp(self.rate/20), np.exp((self.rate_threshold-self.rate)/20.0))#0.5 * np.exp(-1 * (self.steps_done - 1) / 50) *(1-self.rate/30) # print(rwd) done = False elif (next_dist > self.cur_dist): rwd = 0.2 * np.exp(-1 * (self.steps_done - 1) / 50) * np.exp( self.rate / 20 ) # *min(np.exp(self.rate/20), np.exp((self.rate_threshold-self.rate)/20.0)) done = False else: rwd = -1.0 * np.exp(-1 * (self.steps_done - 1) / 50) * np.exp( -self.rate / 20) # -self.rate-1.0#-self.rate -2.0#-20.0 done = False return rwd, done #Reward function for shortest path def short_path_gameover(self): state = np.rint(self.state * self.high_obs) next_dist = np.sqrt((state[0] - self.ue_xdest[0])**2 + (state[1] - self.ue_ydest[0])**2) if (self.dest_check()): rwd = 1.0 # 3.1#2.1#self.rate + 2.0#2000.0 done = True elif (next_dist < self.cur_dist): rwd = 0.2 done = False else: rwd = -1.0 done = False return rwd, done def decode_action(self, action_ndx): #ue_vy_ndx = action_ndx % len(self.ue_vy) #action_ndx = action_ndx // len(self.ue_vy) #ue_v_ndx = action_ndx % len(self.ue_vx) #action_ndx = action_ndx // len(self.ue_vx) ue_mv_ndx = action_ndx % len(self.ue_moves) action_ndx = action_ndx // len(self.ue_moves) beam_ndx = action_ndx % self.N action_ndx = action_ndx // self.N assert 0 <= action_ndx <= self.act_space.n return (beam_ndx, ue_mv_ndx) #Not using this function def encode_action(self, beam_ndx, ue_vx_ndx, ue_vy_ndx): i = beam_ndx i *= self.N i += ue_vx_ndx i *= len(self.ue_vx) i += ue_vy_ndx i *= len(self.ue_vy) return i def choose_vel(self, ue_mv_ndx): ue_mv = self.ue_moves[ue_mv_ndx] if ue_mv == 'L': #move left ue_vx = -1 * self.ue_vx[0] ue_vy = 0 elif ue_mv == 'U': #move up ue_vx = 0 ue_vy = self.ue_vy[0] elif ue_mv == 'D': #move down ue_vx = 0 ue_vy = -1 * self.ue_vy[0] else: #move right ue_vx = self.ue_vx[0] ue_vy = 0 return ue_vx, ue_vy def get_Los_Rate(self, state): state = np.rint(state * self.high_obs) ue_xloc, ue_yloc = state sc_xyz = np.array([]) ch_model = 'fsp' ue_pos = np.array([ue_xloc, ue_yloc, 0]) if (ue_xloc == 0) and (ue_yloc) == 0: ue_pos = np.array([ue_xloc + 40, ue_yloc + 40, 0]) mimo_model = MIMO(ue_pos, self.gNB[0], sc_xyz, ch_model, self.ptx, self.N_tx, self.N_rx) SNR, rate = mimo_model.Los_Rate() # rkbeam_vec, tbeam_vec ) return SNR, rate def get_Exh_Rate(self, state): state = np.rint(state * self.high_obs) ue_xloc, ue_yloc = state ue_pos = np.array([ue_xloc, ue_yloc, 0]) if (ue_xloc == 0) and (ue_yloc) == 0: #return -1.0,-1.0 ue_pos = np.array([ue_xloc + 40, ue_yloc + 40, 0]) mimo_exh_model = MIMO(ue_pos, self.gNB[0], self.sc_xyz, self.ch_model, self.ptx, self.N_tx, self.N_rx) #rbeam_vec = self.BeamSet#Generate_BeamDir(self.N) exh_SNR = [] exh_rates = [] for rbeam in self.BeamSet: #rbeam_vec: SNR, rate = mimo_exh_model.Calc_ExhRate(self.SF_time, np.array([rbeam, 0])) #rate = 1e3 * rate exh_SNR.append(SNR) exh_rates.append(rate) best_rbeam_ndx = np.argmax(exh_rates) best_beam = self.BeamSet[best_rbeam_ndx] SNRmax, rate_max = mimo_exh_model.Calc_ExhRate(self.SF_time, np.array([best_beam, 0]), noise_flag=False) #print("[UAV_Env]: AOD: {}, AoA: {}, AoD-AoA: {}".format(mimo_exh_model.channel.az_aod[0], self.BeamSet[best_rbeam_ndx], -self.BeamSet[best_rbeam_ndx]+mimo_exh_model.channel.az_aod[0])) return best_beam, rate_max #(Best RBS, Best Rate) def get_Rate(self): return self.cur_rate
def step(self, action): assert self.act_space.contains( action), "%r (%s) invalid" % (action, type(action)) state = np.rint(self.state * self.high_obs) rbd_ndx, ue_mv_ndx = self.decode_action(action) ue_vx, ue_vy = self.choose_vel(ue_mv_ndx) rbs = self.BeamSet[rbd_ndx] ue_xdest = self.ue_xdest[0] ue_ydest = self.ue_ydest[0] ue_xloc, ue_yloc = state self.cur_dist = np.sqrt( (ue_xloc - ue_xdest)**2 + (ue_yloc - ue_ydest)**2) # x**2 + y**2 self.cur_state = state if self.done: #reached terminal state return self.state, self.rwd, self.done, {} ue_mv = self.ue_moves[ue_mv_ndx] if ue_mv == 'L': new_ue_xloc = max(ue_xloc + ue_vx, np.min(self.ue_xloc)) new_ue_yloc = ue_yloc + ue_vy if ue_mv == 'U': new_ue_xloc = ue_xloc + ue_vx new_ue_yloc = min(ue_yloc + ue_vy, np.max(self.ue_yloc)) if ue_mv == 'R': new_ue_xloc = min(ue_xloc + ue_vx, np.max(self.ue_xloc)) new_ue_yloc = ue_yloc + ue_vy if ue_mv == 'D': new_ue_xloc = ue_xloc + ue_vx new_ue_yloc = max(ue_yloc + ue_vy, np.min(self.ue_yloc)) new_ue_pos = np.array([new_ue_xloc, new_ue_yloc, 0]) #Approximating (0,0) to (20,20) location to prevent rate->Inf if (new_ue_xloc == 0) and (new_ue_yloc == 0): self.mimo_model = MIMO(np.array([40, 40, 0]), self.gNB[0], self.sc_xyz, self.ch_model, self.ptx, self.N_tx, self.N_rx) self.SNR, self.rate = self.mimo_model.Calc_Rate( self.SF_time, np.array([rbs, 0])) # rkbeam_vec, tbeam_vec ) else: self.mimo_model = MIMO(new_ue_pos, self.gNB[0], self.sc_xyz, self.ch_model, self.ptx, self.N_tx, self.N_rx) self.SNR, self.rate = self.mimo_model.Calc_Rate( self.SF_time, np.array([rbs, 0])) # rkbeam_vec, tbeam_vec ) if self.measure == 'rate_thr_path': self.rwd, self.done = self._gameover() elif self.measure == 'rate_path': self.rwd, self.done = self.rate_path_gameover() elif self.measure == 'short_path': self.rwd, self.done = self.short_path_gameover() else: print("Err: Incorrect measure str\n") self.rwd, self.done = -100.0, True self.ue_path_rates.append(self.rate) self.ue_path.append(np.array([new_ue_xloc, new_ue_yloc])) self.cur_rate = self.rate self.prev_dist = self.cur_dist self.state = np.array([new_ue_xloc, new_ue_yloc]) / self.high_obs self.steps_done += 1 return self.state, self.rwd, self.done, {}