Python DroneAgent示例

编程语言: Python

命名空间/包名称: server.RLGoInBitMap.Agent2

类/类型: DroneAgent

hotexamples.com的示例: 3

Python DroneAgent - 已找到3个示例。这些是从开源项目中提取的最受好评的server.RLGoInBitMap.Agent2.DroneAgent现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

predictTakeValue(2)

DroneAgent(1)

示例#1

显示文件

文件： PPO431slice.py 项目： z374971128/UAV_Server

    def __init__(self, aimRotation,actiondim=1):
        print("init InferenceModel")
        dtype = torch.float64
        torch.set_default_dtype(dtype)
        # device = torch.device('cuda', index=0)  # if torch.cuda.is_available() else
        device = torch.device('cpu')
        # if torch.cuda.is_available():
        #	torch.cuda.set_device(0)

        parser = argparse.ArgumentParser(description='PyTorch PPO example')
        parser.add_argument('--env-name', default="continueRealEnvPpo", metavar='G',
                            help='name of the environment to run')
        parser.add_argument('--version', default="4.3.1.8.1", metavar='G',
                            help='version')

        args = parser.parse_args()
        path = os.path.join(assets_dir(), args.version)
        print(path)


        randomSeed = 2
        render = False
        state_dim = 64 + 12 +15 # env.observation_space.shape[0]#[0]
        running_state = ZFilter((state_dim,), clip=5)
        """define actor and critic"""
        policy_net =Policy(76+15, 3)#DiscretePolicy(75,5) #Policy(75, 4)
        value_net = Value(76+15)
        policy_net.load_state_dict(torch.load(os.path.join(path, 'policy_net_{}_ppo.pth'.format(args.env_name)), 'cpu'))
        value_net.load_state_dict(torch.load(os.path.join(path, 'value_net_{}_ppo.pth'.format(args.env_name)), 'cpu'))

        # policy_net = torch.load(os.path.join(path, 'policy_net_{}_ppo.pth'.format(args.env_name)), 'cpu')
        # value_net = torch.load(os.path.join(path, 'value_net_{}_ppo.pth'.format(args.env_name)), 'cpu')
        running_state, saveavgreward = pickle.load(
            open(os.path.join(path, 'running_state_{}_ppo.p'.format(args.env_name)), "rb"))
        print("get reward {}".format(saveavgreward))
        policy_net.to(device)
        value_net.to(device)

        self.persistence = Persistence("real_0515_" + args.version)

        """create agent"""
        self.agent = DroneAgent(policy_net, value_net, device, running_state=running_state, render=render,
                                num_threads=1)

        self.lastRotation = aimRotation
        self.lastLeftRightFeel = [8, 8]
        self.lastaction = [0, -0.5]
        self.lasttime = datetime.datetime.now()
        self.forceWallMenory =0
        self.aimRotaion = aimRotation
        self.finalAimRotaion =aimRotation
        self.stoptime = 0
        self.stoplong = 5
        self.lastalphadirect = 0
        self.lastalphacos = 0.5
        print("init succ")

示例#2

显示文件

class InferenceModel():
    def __init__(self, actiondim=1):
        print("init InferenceModel")
        dtype = torch.float64
        torch.set_default_dtype(dtype)
        # device = torch.device('cuda', index=0)  # if torch.cuda.is_available() else
        device = torch.device('cpu')
        # if torch.cuda.is_available():
        #	torch.cuda.set_device(0)

        parser = argparse.ArgumentParser(description='PyTorch PPO example')
        parser.add_argument('--env-name', default="continueRealEnvPpo", metavar='G',
                            help='name of the environment to run')
        parser.add_argument('--version', default="4.2.4.1", metavar='G',
                            help='version')

        args = parser.parse_args()
        path = os.path.join(assets_dir(), args.version)
        print(path)

        randomSeed = 2
        render = False
        state_dim = 64 + 11  # env.observation_space.shape[0]#[0]
        running_state = ZFilter((state_dim,), clip=5)
        """define actor and critic"""
        policy_net =Policy(75, 2)#DiscretePolicy(75,5) #Policy(75, 4)
        value_net = Value(75)
        policy_net.load_state_dict(torch.load(os.path.join(path, 'policy_net_{}_ppo.pth'.format(args.env_name)), 'cpu'))
        value_net.load_state_dict(torch.load(os.path.join(path, 'value_net_{}_ppo.pth'.format(args.env_name)), 'cpu'))

        # policy_net = torch.load(os.path.join(path, 'policy_net_{}_ppo.pth'.format(args.env_name)), 'cpu')
        # value_net = torch.load(os.path.join(path, 'value_net_{}_ppo.pth'.format(args.env_name)), 'cpu')
        running_state, saveavgreward = pickle.load(
            open(os.path.join(path, 'running_state_{}_ppo.p'.format(args.env_name)), "rb"))
        print("get reward {}".format(saveavgreward))
        policy_net.to(device)
        value_net.to(device)

        self.persistence = Persistence("real_0515_" + args.version)

        """create agent"""
        self.agent = DroneAgent(policy_net, value_net, device, running_state=running_state, render=render,
                                num_threads=1)

        self.lastRotation = 0
        self.lastLeftRightFeel = [8, 8]
        self.lastaction = [0, -0.5]
        self.lasttime = datetime.datetime.now()

        print("init succ")

    def inference(self, imgstate, rotation, aimRotation, time):
        deepfeel = self.caculateObs(imgstate)

        state = self.getState(deepfeel, rotation, aimRotation)
        action, value = self.agent.predictTakeValue(state)
        info = f"time {time} action {action} critic {value} state {state[64:]} deepfeel avg {np.mean(deepfeel)} value {deepfeel} "
        info2 = f"action {action} critic {value} state {state[64:]}"
        print(info2)
        self.persistence.saveTerminalRecord("stateaction", info)
        self.lastaction = action
        return action

    def inference4(self, imgstate, rotation, aimRotation, time):#仅往前走或者仅拐弯
        deepfeel = self.caculateObs(imgstate)

        state = self.getState(deepfeel, rotation, aimRotation)
        action, value = self.agent.predictTakeValue(state)
        info = f"time {time} action {action} critic {value} state {state[64:]} deepfeel avg {np.mean(deepfeel)} value {deepfeel} "
        info2 = f"action {action} critic {value} state {state[64:]}"
        print(info2)
        self.persistence.saveTerminalRecord("stateaction", info)
        self.lastaction = action
        if action[1] > 0:
            action[0] = 0

        return action

    def inference2(self, imgstate, rotation, aimRotation, time):#分开左右
        deepfeel = self.caculateObs(imgstate)

        state = self.getState(deepfeel, rotation, aimRotation)
        action, value = self.agent.predictTakeValue(state)
        info = f"time {time} action {action} critic {value} state {state[64:]} deepfeel avg {np.mean(deepfeel)} value {deepfeel} "
        info2 = f"action {action} critic {value} state {state[64:]}"
        print(info2)
        self.persistence.saveTerminalRecord("stateaction", info)
        self.lastaction = action
        if action[1] <= 0:
            if action[2] > action[3]:
                action[0] = 1
            else:
                action[0] = -1
        else:
            action[0] /= 3
        return action

    def inference3(self, imgstate, rotation, aimRotation, time):# 完全离散化
        deepfeel = self.caculateObs(imgstate)

        state = self.getState(deepfeel, rotation, aimRotation)
        action, value = self.agent.predictTakeValue(state)
        info = f"time {time} action {action} critic {value} state {state[64:]} deepfeel avg {np.mean(deepfeel)} value {deepfeel} "
        info2 = f"action {action} critic {value} state {state[64:]}"
        print(info2)
        self.persistence.saveTerminalRecord("stateaction", info)
        c = -0.5
        if action >= 1 and action < 4:
            c = 0.5
        actionsingle = action
        action = [action, c]
        self.lastaction = action
        if actionsingle < 1:
            action[0] = -1
            action[1] = -0.5
        elif actionsingle >= 4:
            action[0] = 1
            action[1] = -0.5
        elif actionsingle >= 1 and actionsingle < 2:
            action[0] = -0.5
            action[1] = 0.5
        elif actionsingle >= 3 and actionsingle < 4:
            action[0] = 0.5
            action[1] = 0.5
        else:
            action[0]= 0
            action[1] =0.5

        return action




    def getState(self, deepfeel, rotation, aimRotation):  # 极端诡异和空格于制表符显示问题
        rotation = math.radians(rotation)
        aimRotation = math.radians(aimRotation)

        xDirect = round(math.cos(rotation), 6)
        yDirect = round(math.sin(rotation), 6)
        aimDirectX = round(math.cos(aimRotation), 6)
        aimDirectY = round(math.sin(aimRotation), 6)

        alphadirect = aimDirectX * yDirect - aimDirectY * xDirect
        alphacos = aimDirectX * xDirect + aimDirectY * yDirect  # 直接用目标与行进方向夹角sin cos作为状态

        if alphacos < 0 and alphadirect > 0:
            alphadirect = 1
        if alphacos < 0 and alphadirect < 0:
            alphadirect = -1

        timenow = datetime.datetime.now()
        internaltime = (timenow - self.lasttime).total_seconds()

        other = [rotation, xDirect, yDirect, self.lastaction[0], self.lastaction[1], alphadirect, alphacos,
             self.lastRotation, self.lastLeftRightFeel[0], self.lastLeftRightFeel[1], internaltime]
        nextstate = []
        # print(f"other {other}")
        for i in deepfeel:
            nextstate.append(i)
        for i in other:
            nextstate.append(i)
        self.lastRotation = alphadirect
        # self.lastRotation = rotation
        self.lasttime = timenow
        self.lastLeftRightFeel = [nextstate[0], nextstate[63]]
        return nextstate


    def caculateObs(self, state, uprange=24, downrange=38):  # 压缩转成线
        imageCompact = []
        for i in range(uprange, downrange):
            imageCompact.append(state[i][:])
        imageCompact = np.array(imageCompact)
        power = np.min(imageCompact, axis=0)

        # print(power)
        for i in range(len(power)):
            if power[i] > 8:
                power[i] = 8
            elif power[i] < 0.2 and power[i] > 0.000001:
                power[i] = 0.2
            elif power[i] == 0:
                power[i] = 7
        # print(power)
        return power

示例#3

显示文件

class InferenceModel():
    def __init__(self, aimRotation, actiondim=1):
        print("init InferenceModel")
        dtype = torch.float64
        torch.set_default_dtype(dtype)
        # device = torch.device('cuda', index=0)  # if torch.cuda.is_available() else
        device = torch.device('cpu')
        # if torch.cuda.is_available():
        #	torch.cuda.set_device(0)

        parser = argparse.ArgumentParser(description='PyTorch PPO example')
        parser.add_argument('--env-name',
                            default="continueRealEnvPpo",
                            metavar='G',
                            help='name of the environment to run')
        parser.add_argument('--version',
                            default="4.3.1.8.9",
                            metavar='G',
                            help='version')

        args = parser.parse_args()
        path = os.path.join(assets_dir(), args.version)
        print(path)

        randomSeed = 2
        render = False
        state_dim = 64 + 12 + 15 + (
            64 + 2 + 2) * 3  # env.observation_space.shape[0]#[0]
        running_state = ZFilter((state_dim, ), clip=5)
        """define actor and critic"""
        policy_net = Policy(state_dim, 3)  #DiscretePolicy(75,5) #Policy(75, 4)
        value_net = Value(state_dim)
        policy_net.load_state_dict(
            torch.load(
                os.path.join(path,
                             'policy_net_{}_ppo.pth'.format(args.env_name)),
                'cpu'))
        value_net.load_state_dict(
            torch.load(
                os.path.join(path,
                             'value_net_{}_ppo.pth'.format(args.env_name)),
                'cpu'))

        # policy_net = torch.load(os.path.join(path, 'policy_net_{}_ppo.pth'.format(args.env_name)), 'cpu')
        # value_net = torch.load(os.path.join(path, 'value_net_{}_ppo.pth'.format(args.env_name)), 'cpu')
        running_state, saveavgreward = pickle.load(
            open(
                os.path.join(path,
                             'running_state_{}_ppo.p'.format(args.env_name)),
                "rb"))
        print("get reward {}".format(saveavgreward))
        policy_net.to(device)
        value_net.to(device)

        self.persistence = Persistence("real_0515_" + args.version)
        """create agent"""
        self.agent = DroneAgent(policy_net,
                                value_net,
                                device,
                                running_state=running_state,
                                render=render,
                                num_threads=1)

        self.lastRotation = aimRotation
        self.lastLeftRightFeel = [8, 8]
        self.lastaction = [0, -0.5, 0]
        self.lasttime = datetime.datetime.now()
        self.forceWallMenory = 0
        self.aimRotaion = aimRotation
        self.aimRotation = aimRotation
        self.finalAimRotaion = aimRotation
        self.stoptime = 0
        self.stoplong = 5
        self.lastalphadirect = 0
        self.lastalphacos = 0.5

        self.stopfrequency = 0
        self.stopfrequencylong = 5
        self.saveTimes = 3
        self.redirectNum = 0
        # 历史内容暂存
        self.saveDeepFeels = queue.Queue()
        self.savelastActions = queue.Queue()
        self.savelastAngles = queue.Queue()
        for i in range(3):
            temp = [8] * 64  #初始观感长预设为8
            self.saveDeepFeels.put(temp)
            self.savelastActions.put([0, 0])
            self.savelastAngles.put([0, 1])

        self.tempPoCache = queue.Queue()

        for i in range(20):
            temp = [-1000, -1000]
            self.tempPoCache.put(temp)
        print("init succ")

    def inference(self, imgstate, rotation, aimRotation, time):

        deepfeel = self.caculateObs(imgstate)

        state = self.getState(deepfeel, rotation, self.lastaction[2])
        action, value = self.agent.predictTakeValue(state)
        info = f"time {time} action {action} critic {value} state {state} deepfeel avg {np.mean(deepfeel)} value {deepfeel} "
        info2 = f"action {action} critic {value} state {state[0:64]}"
        if action[1] > 0:
            self.stoptime = 0
        else:
            self.stoptime += 1
            self.stopfrequency += 1
        print(info2)
        self.persistence.saveTerminalRecord("stateaction", info)
        self.lastaction = action.copy()

        if self.stopfrequency > 0:
            self.stopfrequency -= 0.5
        if self.forceWallMenory > 0:
            pass
        return action

    def sliceWindow(self, deepfeel, sliceSize=8, proValue=2, threshold=1.8):
        go = int(sliceSize / 2)
        sliceRes = []
        temp = 0
        for i in range(0, len(deepfeel) - sliceSize + 1, go):
            temp = 0
            for j in range(0, sliceSize):
                if deepfeel[i + j] < threshold:
                    if temp == 0:
                        temp = 10
                    temp = (threshold - deepfeel[i + j]) * proValue
            sliceRes.append(temp)
        return sliceRes

    def judgeForceWall(self,
                       deepfeel,
                       alphacos,
                       max=6.5,
                       avgmax=3.2,
                       twoavgmax=1.8,
                       smallthreshold=0.55):  # 待修改
        #看局部最优
        tempbest = False
        if self.stopfrequency > self.stopfrequencylong:
            tempbest = True

        if alphacos < 0.7:
            return False
        totalLength = 0
        maxLength = 0
        smallnum = 0
        for i in deepfeel:
            if i > maxLength:
                maxLength = i
            if i < smallthreshold:
                smallnum += 1
            totalLength += i
        avgLength = totalLength / len(deepfeel)
        if avgLength < avgmax and maxLength < max or avgLength < twoavgmax or smallnum > 5 or tempbest:
            self.forceWallMenory = random.randint(15, 20)
            return True
        if self.stoptime > self.stoplong:
            self.forceWallMenory = random.randint(15, 20)
            return True
        return False

    def chTarget(self, direct):
        print("更改临时目标")
        pos = np.array([self.drone.center.x, self.drone.center.y])
        '''
        aimDirectX = self.aim[0] - pos[0]
        aimDirectY = self.aim[1] - pos[1]
        if direct >= 0:
            self.aim[0] = aimDirectY + pos[0]
            self.aim[1] = (-1) * aimDirectX + pos[1]
        else:
            self.aim[0] = (-1) * aimDirectY + pos[0]
            self.aim[1] = aimDirectX + pos[1]
                c = round(math.sin(math.radians(i)),6)
    d = round(math.cos(math.radians(i)),6)
        '''
        aimDirectX = self.aim[0] - pos[0]
        aimDirectY = self.aim[1] - pos[1]

        if direct > 1:
            direct = 1
        elif direct < -1:
            direct = -1
        angle = direct * 80
        sinthlta = 1
        costhlta = 0  #round(math.cos(math.radians(abs(angle))),6)
        if direct >= 0:
            self.aimRotation -= 90
        else:
            self.aimRotation -= 90
        if self.aimRotation < -180:
            self.aimRotation += 360
        elif self.aimRotation > 180:
            self.aimRotation -= 360

    def getState(self, deepfeel, rotation, direct):  # 极端诡异和空格于制表符显示问题
        rotation = math.radians(rotation)

        sliceRes = self.sliceWindow(deepfeel)
        aimRotation = math.radians(self.aimRotation)
        xDirect = round(math.cos(rotation), 6)
        yDirect = round(math.sin(rotation), 6)
        aimDirectX = round(math.cos(aimRotation), 6)
        aimDirectY = round(math.sin(aimRotation), 6)

        alphadirect = aimDirectX * yDirect - aimDirectY * xDirect
        alphacos = aimDirectX * xDirect + aimDirectY * yDirect  # 直接用目标与行进方向夹角sin cos作为状态

        if alphacos < 0 and alphadirect > 0:
            alphadirect = 1
        if alphacos < 0 and alphadirect < 0:
            alphadirect = -1
        '''
        judgeState = self.judgeForceWall(deepfeel, alphacos)
        if judgeState == True:
            self.chTarget(direct)
            aimRotation = math.radians(self.aimRotation)
            xDirect = round(math.cos(rotation), 6)
            yDirect = round(math.sin(rotation), 6)
            aimDirectX = round(math.cos(aimRotation), 6)
            aimDirectY = round(math.sin(aimRotation), 6)

            alphadirect = aimDirectX * yDirect - aimDirectY * xDirect
            alphacos = aimDirectX * xDirect + aimDirectY * yDirect # 直接用目标与行进方向夹角sin cos作为状态

            if alphacos < 0 and alphadirect > 0:
                alphadirect = 1
            if alphacos < 0 and alphadirect < 0:
                alphadirect = -1
        '''
        free = 0
        if self.forceWallMenory > 0:
            free = 5 + self.forceWallMenory
        stop = 0
        if self.stoptime >= self.stoplong:
            stop = 5

        timenow = datetime.datetime.now()
        internaltime = (timenow - self.lasttime).total_seconds()

        other = [
            xDirect, yDirect, self.lastaction[0], self.lastaction[1],
            alphadirect, alphacos, self.lastLeftRightFeel[0],
            self.lastLeftRightFeel[1], free, stop, self.lastalphadirect,
            self.lastalphacos
        ]
        nextstate = []
        # print(f"other {other}")
        for i in deepfeel:
            nextstate.append(i)
        for i in other:
            nextstate.append(i)
        for i in sliceRes:
            nextstate.append(i)

        for i in range(self.saveTimes):
            queue = self.saveDeepFeels.get()
            for j in queue:
                nextstate.append(j)
            self.saveDeepFeels.put(queue)

        for i in range(self.saveTimes):
            queue = self.savelastActions.get()
            for j in queue:
                nextstate.append(j)
            self.savelastActions.put(queue)

        for i in range(self.saveTimes):
            queue = self.savelastAngles.get()
            for j in queue:
                nextstate.append(j)
            self.savelastAngles.put(queue)

        self.savelastAngles.get()
        self.savelastAngles.put([alphadirect, alphacos])

        self.lasttime = timenow
        self.lastLeftRightFeel = [nextstate[0], nextstate[63]]

        self.saveDeepFeels.get()
        self.saveDeepFeels.put(deepfeel.copy())

        self.savelastActions.get()
        self.savelastActions.put([self.lastaction[0], self.lastaction[1]])

        if self.forceWallMenory > 0:
            self.forceWallMenory -= 1
            if self.forceWallMenory <= 0:
                self.aimRotaion = self.finalAimRotaion.copy()
            print("曾经感受到墙")

        self.lastalphadirect = alphadirect
        self.lastalphacos = alphacos

        return nextstate

    def caculateObs(self, state, uprange=25, downrange=36):  # 压缩转成线
        imageCompact = []
        for i in range(uprange, downrange):
            imageCompact.append(state[i][:])
        imageCompact = np.array(imageCompact)
        power = np.min(imageCompact, axis=0)

        # print(power)
        for i in range(len(power)):
            if power[i] > 8:
                power[i] = 8
            elif power[i] < 0.2 and power[i] > 0.000001:
                power[i] = 0.2
            elif power[i] == 0:
                power[i] = 7
        # print(power)
        return power