def __init__(self, aimRotation,actiondim=1): print("init InferenceModel") dtype = torch.float64 torch.set_default_dtype(dtype) # device = torch.device('cuda', index=0) # if torch.cuda.is_available() else device = torch.device('cpu') # if torch.cuda.is_available(): # torch.cuda.set_device(0) parser = argparse.ArgumentParser(description='PyTorch PPO example') parser.add_argument('--env-name', default="continueRealEnvPpo", metavar='G', help='name of the environment to run') parser.add_argument('--version', default="4.3.1.8.1", metavar='G', help='version') args = parser.parse_args() path = os.path.join(assets_dir(), args.version) print(path) randomSeed = 2 render = False state_dim = 64 + 12 +15 # env.observation_space.shape[0]#[0] running_state = ZFilter((state_dim,), clip=5) """define actor and critic""" policy_net =Policy(76+15, 3)#DiscretePolicy(75,5) #Policy(75, 4) value_net = Value(76+15) policy_net.load_state_dict(torch.load(os.path.join(path, 'policy_net_{}_ppo.pth'.format(args.env_name)), 'cpu')) value_net.load_state_dict(torch.load(os.path.join(path, 'value_net_{}_ppo.pth'.format(args.env_name)), 'cpu')) # policy_net = torch.load(os.path.join(path, 'policy_net_{}_ppo.pth'.format(args.env_name)), 'cpu') # value_net = torch.load(os.path.join(path, 'value_net_{}_ppo.pth'.format(args.env_name)), 'cpu') running_state, saveavgreward = pickle.load( open(os.path.join(path, 'running_state_{}_ppo.p'.format(args.env_name)), "rb")) print("get reward {}".format(saveavgreward)) policy_net.to(device) value_net.to(device) self.persistence = Persistence("real_0515_" + args.version) """create agent""" self.agent = DroneAgent(policy_net, value_net, device, running_state=running_state, render=render, num_threads=1) self.lastRotation = aimRotation self.lastLeftRightFeel = [8, 8] self.lastaction = [0, -0.5] self.lasttime = datetime.datetime.now() self.forceWallMenory =0 self.aimRotaion = aimRotation self.finalAimRotaion =aimRotation self.stoptime = 0 self.stoplong = 5 self.lastalphadirect = 0 self.lastalphacos = 0.5 print("init succ")
class InferenceModel(): def __init__(self, actiondim=1): print("init InferenceModel") dtype = torch.float64 torch.set_default_dtype(dtype) # device = torch.device('cuda', index=0) # if torch.cuda.is_available() else device = torch.device('cpu') # if torch.cuda.is_available(): # torch.cuda.set_device(0) parser = argparse.ArgumentParser(description='PyTorch PPO example') parser.add_argument('--env-name', default="continueRealEnvPpo", metavar='G', help='name of the environment to run') parser.add_argument('--version', default="4.2.4.1", metavar='G', help='version') args = parser.parse_args() path = os.path.join(assets_dir(), args.version) print(path) randomSeed = 2 render = False state_dim = 64 + 11 # env.observation_space.shape[0]#[0] running_state = ZFilter((state_dim,), clip=5) """define actor and critic""" policy_net =Policy(75, 2)#DiscretePolicy(75,5) #Policy(75, 4) value_net = Value(75) policy_net.load_state_dict(torch.load(os.path.join(path, 'policy_net_{}_ppo.pth'.format(args.env_name)), 'cpu')) value_net.load_state_dict(torch.load(os.path.join(path, 'value_net_{}_ppo.pth'.format(args.env_name)), 'cpu')) # policy_net = torch.load(os.path.join(path, 'policy_net_{}_ppo.pth'.format(args.env_name)), 'cpu') # value_net = torch.load(os.path.join(path, 'value_net_{}_ppo.pth'.format(args.env_name)), 'cpu') running_state, saveavgreward = pickle.load( open(os.path.join(path, 'running_state_{}_ppo.p'.format(args.env_name)), "rb")) print("get reward {}".format(saveavgreward)) policy_net.to(device) value_net.to(device) self.persistence = Persistence("real_0515_" + args.version) """create agent""" self.agent = DroneAgent(policy_net, value_net, device, running_state=running_state, render=render, num_threads=1) self.lastRotation = 0 self.lastLeftRightFeel = [8, 8] self.lastaction = [0, -0.5] self.lasttime = datetime.datetime.now() print("init succ") def inference(self, imgstate, rotation, aimRotation, time): deepfeel = self.caculateObs(imgstate) state = self.getState(deepfeel, rotation, aimRotation) action, value = self.agent.predictTakeValue(state) info = f"time {time} action {action} critic {value} state {state[64:]} deepfeel avg {np.mean(deepfeel)} value {deepfeel} " info2 = f"action {action} critic {value} state {state[64:]}" print(info2) self.persistence.saveTerminalRecord("stateaction", info) self.lastaction = action return action def inference4(self, imgstate, rotation, aimRotation, time):#仅往前走或者仅拐弯 deepfeel = self.caculateObs(imgstate) state = self.getState(deepfeel, rotation, aimRotation) action, value = self.agent.predictTakeValue(state) info = f"time {time} action {action} critic {value} state {state[64:]} deepfeel avg {np.mean(deepfeel)} value {deepfeel} " info2 = f"action {action} critic {value} state {state[64:]}" print(info2) self.persistence.saveTerminalRecord("stateaction", info) self.lastaction = action if action[1] > 0: action[0] = 0 return action def inference2(self, imgstate, rotation, aimRotation, time):#分开左右 deepfeel = self.caculateObs(imgstate) state = self.getState(deepfeel, rotation, aimRotation) action, value = self.agent.predictTakeValue(state) info = f"time {time} action {action} critic {value} state {state[64:]} deepfeel avg {np.mean(deepfeel)} value {deepfeel} " info2 = f"action {action} critic {value} state {state[64:]}" print(info2) self.persistence.saveTerminalRecord("stateaction", info) self.lastaction = action if action[1] <= 0: if action[2] > action[3]: action[0] = 1 else: action[0] = -1 else: action[0] /= 3 return action def inference3(self, imgstate, rotation, aimRotation, time):# 完全离散化 deepfeel = self.caculateObs(imgstate) state = self.getState(deepfeel, rotation, aimRotation) action, value = self.agent.predictTakeValue(state) info = f"time {time} action {action} critic {value} state {state[64:]} deepfeel avg {np.mean(deepfeel)} value {deepfeel} " info2 = f"action {action} critic {value} state {state[64:]}" print(info2) self.persistence.saveTerminalRecord("stateaction", info) c = -0.5 if action >= 1 and action < 4: c = 0.5 actionsingle = action action = [action, c] self.lastaction = action if actionsingle < 1: action[0] = -1 action[1] = -0.5 elif actionsingle >= 4: action[0] = 1 action[1] = -0.5 elif actionsingle >= 1 and actionsingle < 2: action[0] = -0.5 action[1] = 0.5 elif actionsingle >= 3 and actionsingle < 4: action[0] = 0.5 action[1] = 0.5 else: action[0]= 0 action[1] =0.5 return action def getState(self, deepfeel, rotation, aimRotation): # 极端诡异和空格于制表符显示问题 rotation = math.radians(rotation) aimRotation = math.radians(aimRotation) xDirect = round(math.cos(rotation), 6) yDirect = round(math.sin(rotation), 6) aimDirectX = round(math.cos(aimRotation), 6) aimDirectY = round(math.sin(aimRotation), 6) alphadirect = aimDirectX * yDirect - aimDirectY * xDirect alphacos = aimDirectX * xDirect + aimDirectY * yDirect # 直接用目标与行进方向夹角sin cos作为状态 if alphacos < 0 and alphadirect > 0: alphadirect = 1 if alphacos < 0 and alphadirect < 0: alphadirect = -1 timenow = datetime.datetime.now() internaltime = (timenow - self.lasttime).total_seconds() other = [rotation, xDirect, yDirect, self.lastaction[0], self.lastaction[1], alphadirect, alphacos, self.lastRotation, self.lastLeftRightFeel[0], self.lastLeftRightFeel[1], internaltime] nextstate = [] # print(f"other {other}") for i in deepfeel: nextstate.append(i) for i in other: nextstate.append(i) self.lastRotation = alphadirect # self.lastRotation = rotation self.lasttime = timenow self.lastLeftRightFeel = [nextstate[0], nextstate[63]] return nextstate def caculateObs(self, state, uprange=24, downrange=38): # 压缩转成线 imageCompact = [] for i in range(uprange, downrange): imageCompact.append(state[i][:]) imageCompact = np.array(imageCompact) power = np.min(imageCompact, axis=0) # print(power) for i in range(len(power)): if power[i] > 8: power[i] = 8 elif power[i] < 0.2 and power[i] > 0.000001: power[i] = 0.2 elif power[i] == 0: power[i] = 7 # print(power) return power
class InferenceModel(): def __init__(self, aimRotation, actiondim=1): print("init InferenceModel") dtype = torch.float64 torch.set_default_dtype(dtype) # device = torch.device('cuda', index=0) # if torch.cuda.is_available() else device = torch.device('cpu') # if torch.cuda.is_available(): # torch.cuda.set_device(0) parser = argparse.ArgumentParser(description='PyTorch PPO example') parser.add_argument('--env-name', default="continueRealEnvPpo", metavar='G', help='name of the environment to run') parser.add_argument('--version', default="4.3.1.8.9", metavar='G', help='version') args = parser.parse_args() path = os.path.join(assets_dir(), args.version) print(path) randomSeed = 2 render = False state_dim = 64 + 12 + 15 + ( 64 + 2 + 2) * 3 # env.observation_space.shape[0]#[0] running_state = ZFilter((state_dim, ), clip=5) """define actor and critic""" policy_net = Policy(state_dim, 3) #DiscretePolicy(75,5) #Policy(75, 4) value_net = Value(state_dim) policy_net.load_state_dict( torch.load( os.path.join(path, 'policy_net_{}_ppo.pth'.format(args.env_name)), 'cpu')) value_net.load_state_dict( torch.load( os.path.join(path, 'value_net_{}_ppo.pth'.format(args.env_name)), 'cpu')) # policy_net = torch.load(os.path.join(path, 'policy_net_{}_ppo.pth'.format(args.env_name)), 'cpu') # value_net = torch.load(os.path.join(path, 'value_net_{}_ppo.pth'.format(args.env_name)), 'cpu') running_state, saveavgreward = pickle.load( open( os.path.join(path, 'running_state_{}_ppo.p'.format(args.env_name)), "rb")) print("get reward {}".format(saveavgreward)) policy_net.to(device) value_net.to(device) self.persistence = Persistence("real_0515_" + args.version) """create agent""" self.agent = DroneAgent(policy_net, value_net, device, running_state=running_state, render=render, num_threads=1) self.lastRotation = aimRotation self.lastLeftRightFeel = [8, 8] self.lastaction = [0, -0.5, 0] self.lasttime = datetime.datetime.now() self.forceWallMenory = 0 self.aimRotaion = aimRotation self.aimRotation = aimRotation self.finalAimRotaion = aimRotation self.stoptime = 0 self.stoplong = 5 self.lastalphadirect = 0 self.lastalphacos = 0.5 self.stopfrequency = 0 self.stopfrequencylong = 5 self.saveTimes = 3 self.redirectNum = 0 # 历史内容暂存 self.saveDeepFeels = queue.Queue() self.savelastActions = queue.Queue() self.savelastAngles = queue.Queue() for i in range(3): temp = [8] * 64 #初始观感长预设为8 self.saveDeepFeels.put(temp) self.savelastActions.put([0, 0]) self.savelastAngles.put([0, 1]) self.tempPoCache = queue.Queue() for i in range(20): temp = [-1000, -1000] self.tempPoCache.put(temp) print("init succ") def inference(self, imgstate, rotation, aimRotation, time): deepfeel = self.caculateObs(imgstate) state = self.getState(deepfeel, rotation, self.lastaction[2]) action, value = self.agent.predictTakeValue(state) info = f"time {time} action {action} critic {value} state {state} deepfeel avg {np.mean(deepfeel)} value {deepfeel} " info2 = f"action {action} critic {value} state {state[0:64]}" if action[1] > 0: self.stoptime = 0 else: self.stoptime += 1 self.stopfrequency += 1 print(info2) self.persistence.saveTerminalRecord("stateaction", info) self.lastaction = action.copy() if self.stopfrequency > 0: self.stopfrequency -= 0.5 if self.forceWallMenory > 0: pass return action def sliceWindow(self, deepfeel, sliceSize=8, proValue=2, threshold=1.8): go = int(sliceSize / 2) sliceRes = [] temp = 0 for i in range(0, len(deepfeel) - sliceSize + 1, go): temp = 0 for j in range(0, sliceSize): if deepfeel[i + j] < threshold: if temp == 0: temp = 10 temp = (threshold - deepfeel[i + j]) * proValue sliceRes.append(temp) return sliceRes def judgeForceWall(self, deepfeel, alphacos, max=6.5, avgmax=3.2, twoavgmax=1.8, smallthreshold=0.55): # 待修改 #看局部最优 tempbest = False if self.stopfrequency > self.stopfrequencylong: tempbest = True if alphacos < 0.7: return False totalLength = 0 maxLength = 0 smallnum = 0 for i in deepfeel: if i > maxLength: maxLength = i if i < smallthreshold: smallnum += 1 totalLength += i avgLength = totalLength / len(deepfeel) if avgLength < avgmax and maxLength < max or avgLength < twoavgmax or smallnum > 5 or tempbest: self.forceWallMenory = random.randint(15, 20) return True if self.stoptime > self.stoplong: self.forceWallMenory = random.randint(15, 20) return True return False def chTarget(self, direct): print("更改临时目标") pos = np.array([self.drone.center.x, self.drone.center.y]) ''' aimDirectX = self.aim[0] - pos[0] aimDirectY = self.aim[1] - pos[1] if direct >= 0: self.aim[0] = aimDirectY + pos[0] self.aim[1] = (-1) * aimDirectX + pos[1] else: self.aim[0] = (-1) * aimDirectY + pos[0] self.aim[1] = aimDirectX + pos[1] c = round(math.sin(math.radians(i)),6) d = round(math.cos(math.radians(i)),6) ''' aimDirectX = self.aim[0] - pos[0] aimDirectY = self.aim[1] - pos[1] if direct > 1: direct = 1 elif direct < -1: direct = -1 angle = direct * 80 sinthlta = 1 costhlta = 0 #round(math.cos(math.radians(abs(angle))),6) if direct >= 0: self.aimRotation -= 90 else: self.aimRotation -= 90 if self.aimRotation < -180: self.aimRotation += 360 elif self.aimRotation > 180: self.aimRotation -= 360 def getState(self, deepfeel, rotation, direct): # 极端诡异和空格于制表符显示问题 rotation = math.radians(rotation) sliceRes = self.sliceWindow(deepfeel) aimRotation = math.radians(self.aimRotation) xDirect = round(math.cos(rotation), 6) yDirect = round(math.sin(rotation), 6) aimDirectX = round(math.cos(aimRotation), 6) aimDirectY = round(math.sin(aimRotation), 6) alphadirect = aimDirectX * yDirect - aimDirectY * xDirect alphacos = aimDirectX * xDirect + aimDirectY * yDirect # 直接用目标与行进方向夹角sin cos作为状态 if alphacos < 0 and alphadirect > 0: alphadirect = 1 if alphacos < 0 and alphadirect < 0: alphadirect = -1 ''' judgeState = self.judgeForceWall(deepfeel, alphacos) if judgeState == True: self.chTarget(direct) aimRotation = math.radians(self.aimRotation) xDirect = round(math.cos(rotation), 6) yDirect = round(math.sin(rotation), 6) aimDirectX = round(math.cos(aimRotation), 6) aimDirectY = round(math.sin(aimRotation), 6) alphadirect = aimDirectX * yDirect - aimDirectY * xDirect alphacos = aimDirectX * xDirect + aimDirectY * yDirect # 直接用目标与行进方向夹角sin cos作为状态 if alphacos < 0 and alphadirect > 0: alphadirect = 1 if alphacos < 0 and alphadirect < 0: alphadirect = -1 ''' free = 0 if self.forceWallMenory > 0: free = 5 + self.forceWallMenory stop = 0 if self.stoptime >= self.stoplong: stop = 5 timenow = datetime.datetime.now() internaltime = (timenow - self.lasttime).total_seconds() other = [ xDirect, yDirect, self.lastaction[0], self.lastaction[1], alphadirect, alphacos, self.lastLeftRightFeel[0], self.lastLeftRightFeel[1], free, stop, self.lastalphadirect, self.lastalphacos ] nextstate = [] # print(f"other {other}") for i in deepfeel: nextstate.append(i) for i in other: nextstate.append(i) for i in sliceRes: nextstate.append(i) for i in range(self.saveTimes): queue = self.saveDeepFeels.get() for j in queue: nextstate.append(j) self.saveDeepFeels.put(queue) for i in range(self.saveTimes): queue = self.savelastActions.get() for j in queue: nextstate.append(j) self.savelastActions.put(queue) for i in range(self.saveTimes): queue = self.savelastAngles.get() for j in queue: nextstate.append(j) self.savelastAngles.put(queue) self.savelastAngles.get() self.savelastAngles.put([alphadirect, alphacos]) self.lasttime = timenow self.lastLeftRightFeel = [nextstate[0], nextstate[63]] self.saveDeepFeels.get() self.saveDeepFeels.put(deepfeel.copy()) self.savelastActions.get() self.savelastActions.put([self.lastaction[0], self.lastaction[1]]) if self.forceWallMenory > 0: self.forceWallMenory -= 1 if self.forceWallMenory <= 0: self.aimRotaion = self.finalAimRotaion.copy() print("曾经感受到墙") self.lastalphadirect = alphadirect self.lastalphacos = alphacos return nextstate def caculateObs(self, state, uprange=25, downrange=36): # 压缩转成线 imageCompact = [] for i in range(uprange, downrange): imageCompact.append(state[i][:]) imageCompact = np.array(imageCompact) power = np.min(imageCompact, axis=0) # print(power) for i in range(len(power)): if power[i] > 8: power[i] = 8 elif power[i] < 0.2 and power[i] > 0.000001: power[i] = 0.2 elif power[i] == 0: power[i] = 7 # print(power) return power