示例#1
0
文件: t92.py 项目: Tinky2013/RL
def initializeEnvironment(numOfAgent):
    sheepId = 0
    wolfId = 1
    distractorsIds = list(range(2, numOfAgent))
    distanceToVisualDegreeRatio = 20
    minInitSheepWolfDistance = 9 * distanceToVisualDegreeRatio
    minInitSheepDistractorDistance = 2.5 * distanceToVisualDegreeRatio  # no distractor in killzone when init
    isLegalInitPositions = IsLegalInitPositions(sheepId, wolfId, distractorsIds, minInitSheepWolfDistance,
                                                minInitSheepDistractorDistance)
    xBoundary = [0, 640]
    yBoundary = [0, 480]
    resetState = ResetState(xBoundary, yBoundary, numOfAgent, isLegalInitPositions, transPolarToCartesian)

    killzoneRadius = 2.5 * distanceToVisualDegreeRatio
    isTerminal = IsTerminal(sheepId, wolfId, killzoneRadius)

    return resetState,isTerminal
示例#2
0
    def __call__(self, parameters):
        numOfAgent = parameters['numOfAgent']
        trajectories = []
        for trajectoryId in range(self.numTrajectories):

            forwardOneStep = self.composeFowardOneTimeStepWithRandomSubtlety(
                numOfAgent)

            sheepId = 0
            wolfId = 1
            distractorsIds = list(range(2, numOfAgent))
            distanceToVisualDegreeRatio = 20
            minInitSheepWolfDistance = 9 * distanceToVisualDegreeRatio
            minInitSheepDistractorDistance = 2.5 * distanceToVisualDegreeRatio  # no distractor in killzone when init
            isLegalInitPositions = IsLegalInitPositions(
                sheepId, wolfId, distractorsIds, minInitSheepWolfDistance,
                minInitSheepDistractorDistance)
            xBoundary = [0, 600]
            yBoundary = [0, 600]
            resetState = ResetState(xBoundary, yBoundary, numOfAgent,
                                    isLegalInitPositions,
                                    transPolarToCartesian)

            killzoneRadius = 2.5 * distanceToVisualDegreeRatio
            isTerminal = IsTerminal(sheepId, wolfId, killzoneRadius)

            numMDPTimeStepPerSecond = 5
            maxRunningSteps = 25 * numMDPTimeStepPerSecond
            sampleTrajecoty = SampleTrajectory(maxRunningSteps, isTerminal,
                                               resetState, forwardOneStep)

            numActionDirections = 8
            actionSpace = [
                (np.cos(directionId * 2 * math.pi / numActionDirections),
                 np.sin(directionId * 2 * math.pi / numActionDirections))
                for directionId in range(numActionDirections)
            ]
            randomPolicy = RandomPolicy(actionSpace)
            sampleAction = lambda state: sampleFromDistribution(
                randomPolicy(state))

            trajectory = sampleTrajecoty(sampleAction)

            trajectories.append(trajectory)
        return trajectories
示例#3
0
def composeFowardOneTimeStepWithRandomSubtlety(
    numOfAgent
):  # one time step used in different algorithems; here evaluate number of agent
    # MDP

    # experiment parameter for env
    numMDPTimeStepPerSecond = 5  #  change direction every 200ms
    distanceToVisualDegreeRatio = 20

    minSheepSpeed = int(17.4 * distanceToVisualDegreeRatio /
                        numMDPTimeStepPerSecond)
    maxSheepSpeed = int(23.2 * distanceToVisualDegreeRatio /
                        numMDPTimeStepPerSecond)
    warmUpTimeSteps = 10 * numMDPTimeStepPerSecond  # 10s to warm up
    prepareSheepVelocity = PrepareSheepVelocity(minSheepSpeed, maxSheepSpeed,
                                                warmUpTimeSteps)

    minWolfSpeed = int(8.7 * distanceToVisualDegreeRatio /
                       numMDPTimeStepPerSecond)
    maxWolfSpeed = int(14.5 * distanceToVisualDegreeRatio /
                       numMDPTimeStepPerSecond)
    # wolfSubtleties = [500, 11, 3.3, 1.83, 0.92, 0.31, 0.001]
    wolfSubtleties = [500]  # 0, 30, 60, .. 180
    initWolfSubtlety = np.random.choice(wolfSubtleties)
    prepareWolfVelocity = PrepareWolfVelocity(minWolfSpeed, maxWolfSpeed,
                                              warmUpTimeSteps,
                                              initWolfSubtlety,
                                              transCartesianToPolar,
                                              transPolarToCartesian)

    minDistractorSpeed = int(8.7 * distanceToVisualDegreeRatio /
                             numMDPTimeStepPerSecond)
    maxDistractorSpeed = int(14.5 * distanceToVisualDegreeRatio /
                             numMDPTimeStepPerSecond)
    prepareDistractorVelocity = PrepareDistractorVelocity(
        minDistractorSpeed, maxDistractorSpeed, warmUpTimeSteps,
        transCartesianToPolar, transPolarToCartesian)

    sheepId = 0
    wolfId = 1
    distractorsIds = list(range(2, numOfAgent))
    prepareAllAgentsVelocities = PrepareAllAgentsVelocities(
        sheepId, wolfId, distractorsIds, prepareSheepVelocity,
        prepareWolfVelocity, prepareDistractorVelocity)

    xBoundary = [0, 640]
    yBoundary = [0, 480]
    stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity(
        xBoundary, yBoundary)

    killzoneRadius = 2.5 * distanceToVisualDegreeRatio
    isTerminal = IsTerminal(sheepId, wolfId, killzoneRadius)

    numFramePerSecond = 30  # visual display fps
    numFramesToInterpolate = int(
        numFramePerSecond / numMDPTimeStepPerSecond - 1
    )  # interpolate each MDP timestep to multiple frames; check terminal for each frame

    transitFunction = TransitWithInterpolation(
        initWolfSubtlety, numFramesToInterpolate, prepareAllAgentsVelocities,
        stayInBoundaryByReflectVelocity, isTerminal)

    aliveBonus = 0.01
    deathPenalty = -1
    rewardFunction = RewardFunctionTerminalPenalty(aliveBonus, deathPenalty,
                                                   isTerminal)

    forwardOneStep = ForwardOneStep(transitFunction, rewardFunction)
    minInitSheepWolfDistance = 9 * distanceToVisualDegreeRatio
    minInitSheepDistractorDistance = 2.5 * distanceToVisualDegreeRatio  # no distractor in killzone when init
    isLegalInitPositions = IsLegalInitPositions(
        sheepId, wolfId, distractorsIds, minInitSheepWolfDistance,
        minInitSheepDistractorDistance)
    resetState = ResetState(xBoundary, yBoundary, numOfAgent,
                            isLegalInitPositions, transPolarToCartesian)

    return forwardOneStep, resetState, isTerminal