Пример #1
0
def composeFowardOneTimeStepWithRandomSubtlety(numOfAgent, idx):
    # experiment parameter for env
    numMDPTimeStepPerSecond = 5  # change direction every 200ms
    distanceToVisualDegreeRatio = 20
    minSheepSpeed = int(17.4 * distanceToVisualDegreeRatio / numMDPTimeStepPerSecond)
    maxSheepSpeed = int(23.2 * distanceToVisualDegreeRatio / numMDPTimeStepPerSecond)
    warmUpTimeSteps = 10 * numMDPTimeStepPerSecond  # 10s to warm up
    prepareSheepVelocity = PrepareSheepVelocity(minSheepSpeed, maxSheepSpeed, warmUpTimeSteps)

    minWolfSpeed = int(8.7 * distanceToVisualDegreeRatio / numMDPTimeStepPerSecond)
    maxWolfSpeed = int(14.5 * distanceToVisualDegreeRatio / numMDPTimeStepPerSecond)
    wolfSubtleties = [500, 11, 3.3, 1.83, 0.92, 0.31, 0.001]  # 0, 30, 60, .. 180

    if idx == -1:
        initWolfSubtlety = np.random.choice(wolfSubtleties)
    else:
        initWolfSubtlety = wolfSubtleties[idx]

    prepareWolfVelocity = PrepareWolfVelocity(minWolfSpeed, maxWolfSpeed, warmUpTimeSteps, initWolfSubtlety,
                                              transCartesianToPolar, transPolarToCartesian)

    minDistractorSpeed = int(8.7 * distanceToVisualDegreeRatio / numMDPTimeStepPerSecond)
    maxDistractorSpeed = int(14.5 * distanceToVisualDegreeRatio / numMDPTimeStepPerSecond)
    prepareDistractorVelocity = PrepareDistractorVelocity(minDistractorSpeed, maxDistractorSpeed, warmUpTimeSteps,
                                                          transCartesianToPolar, transPolarToCartesian)

    sheepId = 0
    wolfId = 1
    distractorsIds = list(range(2, numOfAgent))
    prepareAllAgentsVelocities = PrepareAllAgentsVelocities(sheepId, wolfId, distractorsIds, prepareSheepVelocity,
                                                            prepareWolfVelocity, prepareDistractorVelocity)

    xBoundary = [0, 640]
    yBoundary = [0, 480]
    stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity(xBoundary, yBoundary)

    killzoneRadius = 2.5 * distanceToVisualDegreeRatio
    isTerminal = IsTerminal(sheepId, wolfId, killzoneRadius)

    numFramePerSecond = 30  # visual display fps
    numFramesToInterpolate = int(
        numFramePerSecond / numMDPTimeStepPerSecond - 1)  # interpolate each MDP timestep to multiple frames; check terminal for each frame
    transitFunction = TransitWithInterpolation(initWolfSubtlety, numFramesToInterpolate, prepareAllAgentsVelocities,
                                               stayInBoundaryByReflectVelocity, isTerminal)
    aliveBonus = 0.01
    deathPenalty = -1
    rewardFunction = RewardFunctionTerminalPenalty(aliveBonus, deathPenalty, isTerminal)
    forwardOneStep = ForwardOneStep(transitFunction, rewardFunction)

    return transitFunction, rewardFunction, forwardOneStep
Пример #2
0
def main():
    manipulatedVariables = OrderedDict()
    manipulatedVariables['numOfAgent'] = [2]
    levelNames = list(manipulatedVariables.keys())
    levelValues = list(manipulatedVariables.values())
    modelIndex = pd.MultiIndex.from_product(levelValues, names=levelNames)
    toSplitFrame = pd.DataFrame(index=modelIndex)
    productedValues = it.product(
        *[[(key, value) for value in values]
          for key, values in manipulatedVariables.items()])
    parametersAllCondtion = [
        dict(list(specificValueParameter))
        for specificValueParameter in productedValues
    ]

    numTrajectories = 3
    sampleTrajectoriesForConditions = SampleTrajectoriesForCoditions(
        numTrajectories, composeFowardOneTimeStepWithRandomSubtlety)
    trajectoriesMultipleConditions = [
        sampleTrajectoriesForConditions(para) for para in parametersAllCondtion
    ]

    visualConditionIndex = 0
    trajectoriesToVisualize = trajectoriesMultipleConditions[
        visualConditionIndex]

    visualize = True

    if visualize:
        screenWidth = 640
        screenHeight = 480
        screen = pg.display.set_mode((screenWidth, screenHeight))
        screenColor = THECOLORS['black']
        xBoundary = [0, 640]
        yBoundary = [0, 480]
        lineColor = THECOLORS['white']
        lineWidth = 4
        drawBackground = DrawBackground(screen, screenColor, xBoundary,
                                        yBoundary, lineColor, lineWidth)

        numOfAgent = 2
        numDistractors = numOfAgent - 2
        circleColorSpace = [[0, 255, 0], [255, 0, 0]
                            ] + [[255, 255, 255]] * numDistractors
        circleSize = 10
        positionIndex = [0, 1]
        agentIdsToDraw = list(range(numOfAgent))
        saveImage = False
        dirPYFile = os.path.dirname(__file__)
        imageSavePath = os.path.join(dirPYFile, '..', 'data', 'forDemo')
        if not os.path.exists(imageSavePath):
            os.makedirs(imageSavePath)
        FPS = 30
        drawState = DrawState(FPS, screen, circleColorSpace, circleSize,
                              agentIdsToDraw, positionIndex, saveImage,
                              imageSavePath, drawBackground)

        # MDP Env
        xBoundary = [0, 640]
        yBoundary = [0, 480]
        stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity(
            xBoundary, yBoundary)

        distanceToVisualDegreeRatio = 20
        killzoneRadius = 2.5 * distanceToVisualDegreeRatio
        sheepId = 0
        wolfId = 1
        isTerminal = IsTerminal(sheepId, wolfId, killzoneRadius)

        numMDPTimeStepPerSecond = 5  #  change direction every 200ms
        numFramesToInterpolate = int(
            FPS / numMDPTimeStepPerSecond - 1
        )  # interpolate each MDP timestep to multiple frames; check terminal for each frame

        interpolateStateForVisualization = InterpolateStateForVisualization(
            numFramesToInterpolate, stayInBoundaryByReflectVelocity,
            isTerminal)

        stateIndexInTimeStep = 0
        actionIndexInTimeStep = 1
        nextStateIndexInTimeStep = 2
        visualizeTraj = VisualizeTraj(stateIndexInTimeStep,
                                      actionIndexInTimeStep,
                                      nextStateIndexInTimeStep, drawState,
                                      interpolateStateForVisualization)

        [visualizeTraj(trajectory) for trajectory in trajectoriesToVisualize]