Python Reset示例

编程语言: Python

命名空间/包名称: src.MDPChasing.envNoPhysics

类/类型: Reset

hotexamples.com的示例: 4

Python Reset - 已找到4个示例。这些是从开源项目中提取的最受好评的src.MDPChasing.envNoPhysics.Reset现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

Reset(4)

常用方法

Reset (4)

示例#1

显示文件

文件： hierarchyActionDiffFromJoint.py 项目： ningtangla/ImaginedWe

    def __call__(self, parameters):
        print(parameters)
        numWolves = parameters['numWolves']
        numSheep = 1
        
        ## MDP Env  
	# state is all multi agent state # action is all multi agent action
        xBoundary = [0,600]
        yBoundary = [0,600]
        numOfAgent = numWolves + numSheep
        reset = Reset(xBoundary, yBoundary, numOfAgent)

        possibleSheepIds = list(range(numSheep))
        possibleWolvesIds = list(range(numSheep, numSheep + numWolves))
        getSheepStatesFromAll = lambda state: np.array(state)[possibleSheepIds]
        getWolvesStatesFromAll = lambda state: np.array(state)[possibleWolvesIds]
        killzoneRadius = 50
        isTerminal = IsTerminal(killzoneRadius, getSheepStatesFromAll, getWolvesStatesFromAll)

        stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity(xBoundary, yBoundary)
        interpolateOneFrame = InterpolateOneFrame(stayInBoundaryByReflectVelocity)
        numFramesToInterpolate = 3
        transit = TransitWithTerminalCheckOfInterpolation(numFramesToInterpolate, interpolateOneFrame, isTerminal)

        maxRunningSteps = 52
        timeCost = 1/maxRunningSteps
        terminalBonus = 1
        rewardFunction = RewardFunctionByTerminal(timeCost, terminalBonus, isTerminal)

        forwardOneStep = ForwardOneStep(transit, rewardFunction)
        sampleTrajectory = SampleTrajectory(maxRunningSteps, isTerminal, reset, forwardOneStep)

        ## MDP Policy
	# Sheep Part

	# Sheep Policy Function
        numSheepPolicyStateSpace = 2 * (numWolves + 1)
        sheepActionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7),
                       (-10, 0), (-7, -7), (0, -10), (7, -7), (0, 0)]
        preyPowerRatio = 12
        sheepIndividualActionSpace = list(map(tuple, np.array(sheepActionSpace) * preyPowerRatio))
        numSheepActionSpace = len(sheepIndividualActionSpace)
        regularizationFactor = 1e-4
        generateSheepModel = GenerateModel(numSheepPolicyStateSpace, numSheepActionSpace, regularizationFactor)
        sharedWidths = [128]
        actionLayerWidths = [128]
        valueLayerWidths = [128]
        sheepNNDepth = 9
        resBlockSize = 2
        dropoutRate = 0.0
        initializationMethod = 'uniform'
        initSheepModel = generateSheepModel(sharedWidths * sheepNNDepth, actionLayerWidths, valueLayerWidths, 
                resBlockSize, initializationMethod, dropoutRate)
        sheepModelPath = os.path.join('..', '..', 'data', 'preTrainModel',
                'agentId=0.'+str(numWolves)+'_depth=9_learningRate=0.0001_maxRunningSteps=50_miniBatchSize=256_numSimulations=110_trainSteps=50000')
        sheepNNModel = restoreVariables(initSheepModel, sheepModelPath)
        sheepPolicy = ApproximatePolicy(sheepNNModel, sheepIndividualActionSpace)

        # Sheep Generate Action
        softParameterInPlanningForSheep = 2.5
        softPolicyInPlanningForSheep = SoftDistribution(softParameterInPlanningForSheep)
        softenSheepPolicy = lambda relativeAgentsStatesForSheepPolicy: softPolicyInPlanningForSheep(sheepPolicy(relativeAgentsStatesForSheepPolicy))

        sheepChooseActionMethod = sampleFromDistribution
        sheepSampleActions = [SampleActionOnFixedIntention(selfId, possibleWolvesIds, sheepPolicy, sheepChooseActionMethod) for selfId in possibleSheepIds]

	# Wolves Part

        # Policy Likelihood function: Wolf Centrol Control NN Policy Given Intention
        numWolvesStateSpaces = [2 * (numInWe + 1) 
                for numInWe in range(2, numWolves + 1)]
        actionSpace = [(10, 0), (0, 10), (-10, 0), (0, -10)]
        predatorPowerRatio = 8
        wolfIndividualActionSpace = list(map(tuple, np.array(actionSpace) * predatorPowerRatio))
        wolvesCentralControlActionSpaces = [list(it.product(wolfIndividualActionSpace, repeat = numInWe)) 
                for numInWe in range(2, numWolves + 1)]
        numWolvesCentralControlActionSpaces = [len(wolvesCentralControlActionSpace)
                for wolvesCentralControlActionSpace in wolvesCentralControlActionSpaces]
        regularizationFactor = 1e-4
        generateWolvesCentralControlModels = [GenerateModel(numStateSpace, numActionSpace, regularizationFactor) 
            for numStateSpace, numActionSpace in zip(numWolvesStateSpaces, numWolvesCentralControlActionSpaces)]
        sharedWidths = [128]
        actionLayerWidths = [128]
        valueLayerWidths = [128]
        wolfNNDepth = 9
        resBlockSize = 2
        dropoutRate = 0.0
        initializationMethod = 'uniform'
        initWolvesCentralControlModels = [generateWolvesCentralControlModel(sharedWidths * wolfNNDepth, actionLayerWidths, valueLayerWidths, 
                resBlockSize, initializationMethod, dropoutRate) for generateWolvesCentralControlModel in generateWolvesCentralControlModels] 
        NNNumSimulations = 250
        wolvesModelPaths = [os.path.join('..', '..', 'data', 'preTrainModel', 
                'agentId='+str(len(actionSpace) * np.sum([10**_ for _ in
                range(numInWe)]))+'_depth=9_learningRate=0.0001_maxRunningSteps=50_miniBatchSize=256_numSimulations='+str(NNNumSimulations)+'_trainSteps=50000') 
                for numInWe in range(2, numWolves + 1)]
        print(wolvesModelPaths)
        wolvesCentralControlNNModels = [restoreVariables(initWolvesCentralControlModel, wolvesModelPath) 
                for initWolvesCentralControlModel, wolvesModelPath in zip(initWolvesCentralControlModels, wolvesModelPaths)]
        wolvesCentralControlPolicies = [ApproximatePolicy(NNModel, actionSpace) 
                for NNModel, actionSpace in zip(wolvesCentralControlNNModels, wolvesCentralControlActionSpaces)] 

        centralControlPolicyListBasedOnNumAgentsInWe = wolvesCentralControlPolicies # 0 for two agents in We, 1 for three agents...
        softParameterInInference = 1
        softPolicyInInference = SoftDistribution(softParameterInInference)
        policyForCommittedAgentsInInference = PolicyForCommittedAgent(centralControlPolicyListBasedOnNumAgentsInWe, softPolicyInInference,
                getStateThirdPersonPerspective)
        calCommittedAgentsPolicyLikelihood = CalCommittedAgentsPolicyLikelihood(policyForCommittedAgentsInInference)
        
        wolfLevel2ActionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7),
                       (-10, 0), (-7, -7), (0, -10), (7, -7)]
        wolfLevel2IndividualActionSpace = list(map(tuple, np.array(wolfLevel2ActionSpace) * predatorPowerRatio))
        wolfLevel2CentralControlActionSpace = list(it.product(wolfLevel2IndividualActionSpace))
        numWolfLevel2ActionSpace = len(wolfLevel2CentralControlActionSpace)
        regularizationFactor = 1e-4
        generatewolfLevel2Models = [GenerateModel(numStateSpace, numWolfLevel2ActionSpace, regularizationFactor) for numStateSpace in numWolvesStateSpaces]
        sharedWidths = [128]
        actionLayerWidths = [128]
        valueLayerWidths = [128]
        wolfLevel2NNDepth = 9
        resBlockSize = 2
        dropoutRate = 0.0
        initializationMethod = 'uniform'
        initwolfLevel2Models = [generatewolfLevel2Model(sharedWidths * wolfLevel2NNDepth, actionLayerWidths, valueLayerWidths, 
                resBlockSize, initializationMethod, dropoutRate) for generatewolfLevel2Model in generatewolfLevel2Models]
        wolfLevel2ModelPaths = [os.path.join('..', '..', 'data', 'preTrainModel', 
                'agentId=1.'+str(numInWe)+'_depth=9_hierarchy=2_learningRate=0.0001_maxRunningSteps=50_miniBatchSize=256_numSimulations='+str(NNNumSimulations)+'_trainSteps=50000') 
                for numInWe in range(2, numWolves + 1)]
        wolfLevel2NNModels = [restoreVariables(initwolfLevel2Model, wolfLevel2ModelPath)
                for initwolfLevel2Model, wolfLevel2ModelPath in zip(initwolfLevel2Models, wolfLevel2ModelPaths)]
        wolfLevel2Policies = [ApproximatePolicy(wolfLevel2NNModel, wolfLevel2CentralControlActionSpace) 
                for wolfLevel2NNModel in wolfLevel2NNModels]
        level2PolicyListBasedOnNumAgentsInWe = wolfLevel2Policies # 0 for two agents in We, 1 for three agents...

        softPolicy = SoftDistribution(2.5)
        totalInSmallRangeFlags = []
        for trial in range(self.numTrajectories):
            state = reset()
            while isTerminal(state):
                state = reset()

            jointActions = sampleFromDistribution(softPolicy(wolvesCentralControlPolicies[numWolves - 2](state)))

            hierarchyActions = []
            weIds = [list(range(numSheep, numWolves + numSheep)) for _ in range(numWolves)]
            for index in range(numWolves):
                weId = weIds[index].copy()
                weId.insert(0, weId.pop(index))
                relativeId = [0] + weId
                action = sampleFromDistribution(softPolicy(wolfLevel2Policies[numWolves - 2](state[relativeId])))
                hierarchyActions.append(action)

            reasonableActionRange = [int(np.linalg.norm(np.array(jointAction) - np.array(hierarchyAction)) <= 8 * predatorPowerRatio)
                    for jointAction, hierarchyAction in zip(jointActions, hierarchyActions) if jointAction != (0, 0) and hierarchyAction != (0, 0)]
            totalInSmallRangeFlags = totalInSmallRangeFlags + reasonableActionRange
        inSmallRangeRateMean = np.mean(totalInSmallRangeFlags)
        return inSmallRangeRateMean

示例#2

显示文件

文件： drawHeatmapOfValue.py 项目： ningtangla/ImaginedWe

def main():
    numWolves = 2
    numSheep = 1
    numWolvesStateSpaces = [
        2 * (numInWe + 1) for numInWe in range(2, numWolves + 1)
    ]
    actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7),
                   (0, -10), (7, -7)]
    #actionSpace = [(10, 0), (0, 10), (-10, 0), (0, -10)]
    predatorPowerRatio = 8
    wolfIndividualActionSpace = list(
        map(tuple,
            np.array(actionSpace) * predatorPowerRatio))
    wolvesCentralControlActionSpaces = [
        list(it.product(wolfIndividualActionSpace, repeat=numInWe))
        for numInWe in range(2, numWolves + 1)
    ]
    numWolvesCentralControlActionSpaces = [
        len(wolvesCentralControlActionSpace)
        for wolvesCentralControlActionSpace in wolvesCentralControlActionSpaces
    ]
    regularizationFactor = 1e-4
    generateWolvesCentralControlModels = [
        GenerateModel(numStateSpace, numActionSpace, regularizationFactor)
        for numStateSpace, numActionSpace in zip(
            numWolvesStateSpaces, numWolvesCentralControlActionSpaces)
    ]
    sharedWidths = [128]
    actionLayerWidths = [128]
    valueLayerWidths = [128]
    wolfNNDepth = 9
    resBlockSize = 2
    dropoutRate = 0.0
    initializationMethod = 'uniform'
    initWolvesCentralControlModels = [
        generateWolvesCentralControlModel(sharedWidths * wolfNNDepth,
                                          actionLayerWidths, valueLayerWidths,
                                          resBlockSize, initializationMethod,
                                          dropoutRate) for
        generateWolvesCentralControlModel in generateWolvesCentralControlModels
    ]
    NNNumSimulations = 250
    wolvesModelPaths = [
        os.path.join(
            '..', '..', 'data', 'preTrainModel', 'agentId=' +
            str(len(actionSpace) * np.sum([10**_ for _ in range(numInWe)])) +
            '_depth=9_learningRate=0.0001_maxRunningSteps=50_miniBatchSize=256_numSimulations='
            + str(NNNumSimulations) + '_trainSteps=50000')
        for numInWe in range(2, numWolves + 1)
    ]
    print(wolvesModelPaths)
    wolvesCentralControlNNModels = [
        restoreVariables(initWolvesCentralControlModel, wolvesModelPath)
        for initWolvesCentralControlModel, wolvesModelPath in zip(
            initWolvesCentralControlModels, wolvesModelPaths)
    ]
    wolvesValueFunctionListBasedOnNumAgentsInWe = [
        ApproximateValue(NNModel) for NNModel in wolvesCentralControlNNModels
    ]
    valueFunction = wolvesValueFunctionListBasedOnNumAgentsInWe[numWolves - 2]

    xBoundary = [0, 600]
    yBoundary = [0, 600]
    reset = Reset(xBoundary, yBoundary, numWolves)

    numGridX = 120
    numGridY = 120
    xInterval = (xBoundary[1] - xBoundary[0]) / numGridX
    yInterval = (yBoundary[1] - yBoundary[0]) / numGridY
    sheepXPosition = [(gridIndex + 0.5) * xInterval
                      for gridIndex in range(numGridX)]
    sheepYPosition = [(gridIndex + 0.5) * yInterval
                      for gridIndex in range(numGridY)]

    wolvesState = reset()
    wolvesState = np.array([[300, 350], [550, 400]])
    print(wolvesState)
    levelValues = [sheepXPosition, sheepYPosition]
    levelNames = ["sheepXPosition", "sheepYPosition"]

    modelIndex = pd.MultiIndex.from_product(levelValues, names=levelNames)

    toSplitFrame = pd.DataFrame(index=modelIndex)

    evaluate = lambda df: evaluateValue(df, valueFunction, wolvesState)
    valueResultDf = toSplitFrame.groupby(levelNames).apply(evaluate)

    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    drawHeatmapPlot(valueResultDf, ax)

    fig.savefig('valueMap2', dpi=300)
    plt.show()

示例#3

显示文件

文件： sampleTrajectoryHeatSeeking.py 项目： ningtangla/ImaginedWe

    def __call__(self, parameters):
        print(parameters)
        numWolves = parameters['numWolves']
        numSheep = parameters['numSheep']
        softParameterInInference = parameters['inferenceSoft']
        softParameterInPlanning = parameters['wolfPolicySoft']
        otherCompeteRate = parameters['otherCompeteRate']
        competeDetectionRate = parameters['competeDetectionRate']

        ## MDP Env
        # state is all multi agent state # action is all multi agent action
        xBoundary = [0, 600]
        yBoundary = [0, 600]
        numOfAgent = numWolves + numSheep
        reset = Reset(xBoundary, yBoundary, numOfAgent)

        possibleSheepIds = list(range(numSheep))
        possibleWolvesIds = list(range(numSheep, numSheep + numWolves))
        getSheepStatesFromAll = lambda state: np.array(state)[possibleSheepIds]
        getWolvesStatesFromAll = lambda state: np.array(state)[
            possibleWolvesIds]
        killzoneRadius = 50
        isTerminal = IsTerminal(killzoneRadius, getSheepStatesFromAll,
                                getWolvesStatesFromAll)

        stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity(
            xBoundary, yBoundary)
        interpolateOneFrame = InterpolateOneFrame(
            stayInBoundaryByReflectVelocity)
        numFramesToInterpolate = 3
        transit = TransitWithTerminalCheckOfInterpolation(
            numFramesToInterpolate, interpolateOneFrame, isTerminal)

        maxRunningSteps = 61
        timeCost = 1 / maxRunningSteps
        terminalBonus = 1
        rewardFunction = RewardFunctionByTerminal(timeCost, terminalBonus,
                                                  isTerminal)

        forwardOneStep = ForwardOneStep(transit, rewardFunction)
        sampleTrajectory = SampleTrajectory(maxRunningSteps, isTerminal, reset,
                                            forwardOneStep)

        ## MDP Policy
        # Sheep Part

        # Sheep Policy Function
        numSheepPolicyStateSpace = 2 * (numWolves + 1)
        sheepActionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0),
                            (-7, -7), (0, -10), (7, -7), (0, 0)]
        preyPowerRatio = 12
        sheepIndividualActionSpace = list(
            map(tuple,
                np.array(sheepActionSpace) * preyPowerRatio))
        numSheepActionSpace = len(sheepIndividualActionSpace)
        regularizationFactor = 1e-4
        generateSheepModel = GenerateModel(numSheepPolicyStateSpace,
                                           numSheepActionSpace,
                                           regularizationFactor)
        sharedWidths = [128]
        actionLayerWidths = [128]
        valueLayerWidths = [128]
        sheepNNDepth = 9
        resBlockSize = 2
        dropoutRate = 0.0
        initializationMethod = 'uniform'
        initSheepModel = generateSheepModel(sharedWidths * sheepNNDepth,
                                            actionLayerWidths,
                                            valueLayerWidths, resBlockSize,
                                            initializationMethod, dropoutRate)
        sheepModelPath = os.path.join(
            '..', '..', 'data', 'preTrainModel',
            'agentId=0.' + str(numWolves) +
            '_depth=9_learningRate=0.0001_maxRunningSteps=50_miniBatchSize=256_numSimulations=110_trainSteps=50000'
        )
        sheepNNModel = restoreVariables(initSheepModel, sheepModelPath)
        sheepPolicy = ApproximatePolicy(sheepNNModel,
                                        sheepIndividualActionSpace)

        # Sheep Generate Action
        softParameterInPlanningForSheep = 2.0
        softPolicyInPlanningForSheep = SoftDistribution(
            softParameterInPlanningForSheep)
        softenSheepPolicy = lambda relativeAgentsStatesForSheepPolicy: softPolicyInPlanningForSheep(
            sheepPolicy(relativeAgentsStatesForSheepPolicy))

        sheepChooseActionMethod = sampleFromDistribution
        sheepSampleActions = [
            SampleActionOnFixedIntention(selfId, possibleWolvesIds,
                                         softenSheepPolicy,
                                         sheepChooseActionMethod)
            for selfId in possibleSheepIds
        ]

        # Wolves Part

        # Percept Action For Inference
        perceptAction = lambda action: action

        # Policy Likelihood function: Wolf Centrol Control NN Policy Given Intention
        numWolvesStateSpaces = [
            2 * (numInWe + 1) for numInWe in range(2, numWolves + 1)
        ]
        actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7),
                       (0, -10), (7, -7)]
        predatorPowerRatio = 8
        wolfIndividualActionSpace = list(
            map(tuple,
                np.array(actionSpace) * predatorPowerRatio))
        wolvesCentralControlActionSpaces = [
            list(it.product(wolfIndividualActionSpace, repeat=numInWe))
            for numInWe in range(2, numWolves + 1)
        ]
        numWolvesCentralControlActionSpaces = [
            len(wolvesCentralControlActionSpace) for
            wolvesCentralControlActionSpace in wolvesCentralControlActionSpaces
        ]
        regularizationFactor = 1e-4
        generateWolvesCentralControlModels = [
            GenerateModel(numStateSpace, numActionSpace, regularizationFactor)
            for numStateSpace, numActionSpace in zip(
                numWolvesStateSpaces, numWolvesCentralControlActionSpaces)
        ]
        sharedWidths = [128]
        actionLayerWidths = [128]
        valueLayerWidths = [128]
        wolfNNDepth = 9
        resBlockSize = 2
        dropoutRate = 0.0
        initializationMethod = 'uniform'
        initWolvesCentralControlModels = [
            generateWolvesCentralControlModel(sharedWidths * wolfNNDepth,
                                              actionLayerWidths,
                                              valueLayerWidths, resBlockSize,
                                              initializationMethod,
                                              dropoutRate)
            for generateWolvesCentralControlModel in
            generateWolvesCentralControlModels
        ]
        NNNumSimulations = 250
        wolvesModelPaths = [
            os.path.join(
                '..', '..', 'data', 'preTrainModel',
                'agentId=' + str(8 * np.sum([10**_ for _ in range(numInWe)])) +
                '_depth=9_learningRate=0.0001_maxRunningSteps=50_miniBatchSize=256_numSimulations='
                + str(NNNumSimulations) + '_trainSteps=50000')
            for numInWe in range(2, numWolves + 1)
        ]
        print(wolvesModelPaths)
        wolvesCentralControlNNModels = [
            restoreVariables(initWolvesCentralControlModel, wolvesModelPath)
            for initWolvesCentralControlModel, wolvesModelPath in zip(
                initWolvesCentralControlModels, wolvesModelPaths)
        ]
        wolvesCentralControlPolicies = [
            ApproximatePolicy(NNModel, actionSpace) for NNModel, actionSpace in
            zip(wolvesCentralControlNNModels, wolvesCentralControlActionSpaces)
        ]

        centralControlPolicyListBasedOnNumAgentsInWe = wolvesCentralControlPolicies  # 0 for two agents in We, 1 for three agents...
        softPolicyInInference = SoftDistribution(softParameterInInference)
        policyForCommittedAgentsInInference = PolicyForCommittedAgent(
            centralControlPolicyListBasedOnNumAgentsInWe,
            softPolicyInInference, getStateOrActionThirdPersonPerspective)
        concernedAgentsIds = [2]
        calCommittedAgentsPolicyLikelihood = CalCommittedAgentsPolicyLikelihood(
            concernedAgentsIds, policyForCommittedAgentsInInference)

        getGoalStateForIndividualHeatseeking = lambda statesRelative: np.array(
            statesRelative)[0]
        getSelfStateForIndividualHeatseeking = lambda statesRelative: np.array(
            statesRelative)[1]
        heatseekingPrecesion = 1.83
        heatSeekingDiscreteStochasticPolicy = HeatSeekingDiscreteStochasticPolicy(
            heatseekingPrecesion, wolfIndividualActionSpace,
            getSelfStateForIndividualHeatseeking,
            getGoalStateForIndividualHeatseeking)
        policyForUncommittedAgentsInInference = PolicyForUncommittedAgent(
            possibleWolvesIds, heatSeekingDiscreteStochasticPolicy,
            softPolicyInInference, getStateOrActionFirstPersonPerspective)
        calUncommittedAgentsPolicyLikelihood = CalUncommittedAgentsPolicyLikelihood(
            possibleWolvesIds, concernedAgentsIds,
            policyForUncommittedAgentsInInference)

        # Joint Likelihood
        calJointLikelihood = lambda intention, state, perceivedAction: calCommittedAgentsPolicyLikelihood(intention, state, perceivedAction) * \
                calUncommittedAgentsPolicyLikelihood(intention, state, perceivedAction)

        wolvesValueListBasedOnNumAgentsInWe = [
            ApproximateValue(NNModel)
            for NNModel in wolvesCentralControlNNModels
        ]
        calIntentionValueGivenState = CalIntentionValueGivenState(
            wolvesValueListBasedOnNumAgentsInWe)
        softParamterForValue = 0.01
        softValueToBuildDistribution = SoftMax(softParamterForValue)
        adjustIntentionPriorGivenValueOfState = AdjustIntentionPriorGivenValueOfState(
            calIntentionValueGivenState, softValueToBuildDistribution)

        # Sample and Save Trajectory
        trajectoriesWithIntentionDists = []
        for trajectoryId in range(self.numTrajectories):

            # Intention Prior For inference
            otherWolfPossibleIntentionSpaces = {0: [(0, (1, 2))], 1: [(0, ())]}
            otherIntentionType = np.random.choice(
                [1, 0], p=[otherCompeteRate, 1 - otherCompeteRate])
            otherWolfIntentionSpace = otherWolfPossibleIntentionSpaces[
                otherIntentionType]
            selfPossibleIntentionSpaces = {
                0: [(0, (1, 2))],
                0.5: [(0, (1, 2)), (0, ())],
                1: [(0, ())]
            }
            selfWolfIntentionSpace = selfPossibleIntentionSpaces[
                competeDetectionRate]
            intentionSpacesForAllWolves = [
                selfWolfIntentionSpace, otherWolfIntentionSpace
            ]
            wolvesIntentionPriors = [{
                tuple(intention): 1 / len(allPossibleIntentionsOneWolf)
                for intention in allPossibleIntentionsOneWolf
            } for allPossibleIntentionsOneWolf in intentionSpacesForAllWolves]
            # Infer and update Intention
            variablesForAllWolves = [[
                intentionSpace
            ] for intentionSpace in intentionSpacesForAllWolves]
            jointHypothesisSpaces = [
                pd.MultiIndex.from_product(variables, names=['intention'])
                for variables in variablesForAllWolves
            ]
            concernedHypothesisVariable = ['intention']
            priorDecayRate = 1
            softPrior = SoftDistribution(priorDecayRate)
            inferIntentionOneStepList = [
                InferOneStep(jointHypothesisSpace, concernedHypothesisVariable,
                             calJointLikelihood, softPrior)
                for jointHypothesisSpace in jointHypothesisSpaces
            ]

            chooseIntention = sampleFromDistribution
            valuePriorEndTime = -100
            updateIntentions = [
                UpdateIntention(intentionPrior, valuePriorEndTime,
                                adjustIntentionPriorGivenValueOfState,
                                perceptAction, inferIntentionOneStep,
                                chooseIntention)
                for intentionPrior, inferIntentionOneStep in zip(
                    wolvesIntentionPriors, inferIntentionOneStepList)
            ]

            # reset intention and adjuste intention prior attributes tools for multiple trajectory
            intentionResetAttributes = [
                'timeStep', 'lastState', 'lastAction', 'intentionPrior',
                'formerIntentionPriors'
            ]
            intentionResetAttributeValues = [
                dict(
                    zip(intentionResetAttributes,
                        [0, None, None, intentionPrior, [intentionPrior]]))
                for intentionPrior in wolvesIntentionPriors
            ]
            resetIntentions = ResetObjects(intentionResetAttributeValues,
                                           updateIntentions)
            returnAttributes = ['formerIntentionPriors']
            getIntentionDistributions = GetObjectsValuesOfAttributes(
                returnAttributes, updateIntentions)
            attributesToRecord = ['lastAction']
            recordActionForUpdateIntention = RecordValuesForObjects(
                attributesToRecord, updateIntentions)

            # Wovels Generate Action
            softPolicyInPlanning = SoftDistribution(softParameterInPlanning)
            policyForCommittedAgentInPlanning = PolicyForCommittedAgent(
                centralControlPolicyListBasedOnNumAgentsInWe,
                softPolicyInPlanning, getStateOrActionThirdPersonPerspective)

            policyForUncommittedAgentInPlanning = PolicyForUncommittedAgent(
                possibleWolvesIds, heatSeekingDiscreteStochasticPolicy,
                softPolicyInPlanning, getStateOrActionFirstPersonPerspective)

            wolfChooseActionMethod = sampleFromDistribution
            getSelfActionThirdPersonPerspective = lambda weIds, selfId: list(
                weIds).index(selfId)
            chooseCommittedAction = GetActionFromJointActionDistribution(
                wolfChooseActionMethod, getSelfActionThirdPersonPerspective)
            chooseUncommittedAction = sampleFromDistribution
            wolvesSampleIndividualActionGivenIntentionList = [
                SampleIndividualActionGivenIntention(
                    selfId, policyForCommittedAgentInPlanning,
                    policyForUncommittedAgentInPlanning, chooseCommittedAction,
                    chooseUncommittedAction) for selfId in possibleWolvesIds
            ]

            wolvesSampleActions = [
                SampleActionOnChangableIntention(
                    updateIntention,
                    wolvesSampleIndividualActionGivenIntention)
                for updateIntention, wolvesSampleIndividualActionGivenIntention
                in zip(updateIntentions,
                       wolvesSampleIndividualActionGivenIntentionList)
            ]
            allIndividualSampleActions = sheepSampleActions + wolvesSampleActions
            sampleActionMultiAgent = SampleActionMultiagent(
                allIndividualSampleActions, recordActionForUpdateIntention)
            trajectory = sampleTrajectory(sampleActionMultiAgent)
            intentionDistributions = getIntentionDistributions()
            trajectoryWithIntentionDists = [
                tuple(list(SASRPair) + list(intentionDist)) for SASRPair,
                intentionDist in zip(trajectory, intentionDistributions)
            ]
            trajectoriesWithIntentionDists.append(
                tuple(trajectoryWithIntentionDists))
            resetIntentions()
            #print(intentionDistributions[-1], otherCompeteRate)
        trajectoryFixedParameters = {
            'sheepPolicySoft': softParameterInPlanningForSheep,
            'wolfPolicySoft': softParameterInPlanning,
            'maxRunningSteps': maxRunningSteps,
            'competePolicy': 'heatseeking',
            'NNNumSimulations': NNNumSimulations,
            'heatseekingPrecesion': heatseekingPrecesion
        }
        self.saveTrajectoryByParameters(trajectoriesWithIntentionDists,
                                        trajectoryFixedParameters, parameters)
        print(np.mean([len(tra) for tra in trajectoriesWithIntentionDists]))

示例#4

显示文件

文件： sampleTrajectoryNoSharedAgency.py 项目： ningtangla/ImaginedWe

    def __call__(self, parameters):
        print(parameters)
        numWolves = parameters['numWolves']
        numSheep = parameters['numSheep']
        softParamterForValue = parameters['valuePriorSoftMaxBeta']
        valuePriorEndTime = parameters['valuePriorEndTime']
        
        ## MDP Env  
	# state is all multi agent state # action is all multi agent action
        xBoundary = [0,600]
        yBoundary = [0,600]
        numOfAgent = numWolves + numSheep
        reset = Reset(xBoundary, yBoundary, numOfAgent)

        possibleSheepIds = list(range(numSheep))
        possibleWolvesIds = list(range(numSheep, numSheep + numWolves))
        getSheepStatesFromAll = lambda state: np.array(state)[possibleSheepIds]
        getWolvesStatesFromAll = lambda state: np.array(state)[possibleWolvesIds]
        killzoneRadius = 25
        isTerminal = IsTerminal(killzoneRadius, getSheepStatesFromAll, getWolvesStatesFromAll)

        stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity(xBoundary, yBoundary)
        interpolateOneFrame = InterpolateOneFrame(stayInBoundaryByReflectVelocity)
        numFramesToInterpolate = 5
        transit = TransitWithTerminalCheckOfInterpolation(numFramesToInterpolate, interpolateOneFrame, isTerminal)

        maxRunningSteps = 52
        timeCost = 1/maxRunningSteps
        terminalBonus = 1
        rewardFunction = RewardFunctionByTerminal(timeCost, terminalBonus, isTerminal)

        forwardOneStep = ForwardOneStep(transit, rewardFunction)
        sampleTrajectory = SampleTrajectory(maxRunningSteps, isTerminal, reset, forwardOneStep)

        ## MDP Policy
	# Sheep Part

	# Sheep Policy Function
        numSheepPolicyStateSpace = 2 * (numWolves + 1)
        sheepActionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7),
                       (-10, 0), (-7, -7), (0, -10), (7, -7), (0, 0)]
        preyPowerRatio = 12
        sheepIndividualActionSpace = list(map(tuple, np.array(sheepActionSpace) * preyPowerRatio))
        numSheepActionSpace = len(sheepIndividualActionSpace)
        regularizationFactor = 1e-4
        generateSheepModel = GenerateModel(numSheepPolicyStateSpace, numSheepActionSpace, regularizationFactor)
        sharedWidths = [128]
        actionLayerWidths = [128]
        valueLayerWidths = [128]
        sheepNNDepth = 9
        resBlockSize = 2
        dropoutRate = 0.0
        initializationMethod = 'uniform'
        initSheepModel = generateSheepModel(sharedWidths * sheepNNDepth, actionLayerWidths, valueLayerWidths, 
                resBlockSize, initializationMethod, dropoutRate)
        sheepModelPath = os.path.join('..', '..', 'data', 'preTrainModel',
                'agentId=0.'+str(numWolves)+'_depth=9_learningRate=0.0001_maxRunningSteps=50_miniBatchSize=256_numSimulations=110_trainSteps=50000')
        sheepNNModel = restoreVariables(initSheepModel, sheepModelPath)
        sheepPolicy = ApproximatePolicy(sheepNNModel, sheepIndividualActionSpace)

        # Sheep Generate Action
        softParameterInPlanningForSheep = 2.5
        softPolicyInPlanningForSheep = SoftDistribution(softParameterInPlanningForSheep)
        softenSheepPolicy = lambda relativeAgentsStatesForSheepPolicy: softPolicyInPlanningForSheep(sheepPolicy(relativeAgentsStatesForSheepPolicy))

        sheepChooseActionMethod = sampleFromDistribution
        sheepSampleActions = [SampleActionOnFixedIntention(selfId, possibleWolvesIds, softenSheepPolicy, sheepChooseActionMethod) for selfId in possibleSheepIds]

	# Wolves Part

        # Policy Likelihood function: Wolf Centrol Control NN Policy Given Intention
        numWolvesStateSpaces = [2 * (numInWe + numSheep) 
                for numInWe in range(2, numWolves + 1)]
        actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7),
                       (-10, 0), (-7, -7), (0, -10), (7, -7), (0, 0)]
        predatorPowerRatio = 8
        wolfIndividualActionSpace = list(map(tuple, np.array(actionSpace) * predatorPowerRatio))
        wolvesCentralControlActionSpaces = [list(it.product(wolfIndividualActionSpace, repeat = numInWe)) 
                for numInWe in range(2, numWolves + 1)]
        numWolvesCentralControlActionSpaces = [len(wolvesCentralControlActionSpace)
                for wolvesCentralControlActionSpace in wolvesCentralControlActionSpaces]
        regularizationFactor = 1e-4
        generateWolvesCentralControlModels = [GenerateModel(numStateSpace, numActionSpace, regularizationFactor) 
            for numStateSpace, numActionSpace in zip(numWolvesStateSpaces, numWolvesCentralControlActionSpaces)]
        sharedWidths = [128]
        actionLayerWidths = [128]
        valueLayerWidths = [128]
        wolfNNDepth = 9
        resBlockSize = 2
        dropoutRate = 0.0
        initializationMethod = 'uniform'
        initWolvesCentralControlModels = [generateWolvesCentralControlModel(sharedWidths * wolfNNDepth, actionLayerWidths, valueLayerWidths, 
                resBlockSize, initializationMethod, dropoutRate) for generateWolvesCentralControlModel in generateWolvesCentralControlModels] 
        NNNumSimulations = 250
        wolvesModelPaths = [os.path.join('..', '..', 'data', 'preTrainModel', 
                'agentId=.'+str(len(actionSpace) * np.sum([10**_ for _ in
                range(numInWe)]))+'_depth=9_learningRate=0.0001_maxRunningSteps=50_miniBatchSize=256_numSimulations='+str(NNNumSimulations)+'_trainSteps=50000') 
                for numInWe in range(2, numWolves + 1)]
        print(wolvesModelPaths)
        wolvesCentralControlNNModels = [restoreVariables(initWolvesCentralControlModel, wolvesModelPath) 
                for initWolvesCentralControlModel, wolvesModelPath in zip(initWolvesCentralControlModels, wolvesModelPaths)]
        wolvesCentralControlPolicies = [ApproximatePolicy(NNModel, actionSpace) 
                for NNModel, actionSpace in zip(wolvesCentralControlNNModels, wolvesCentralControlActionSpaces)] 


	# Wovels Generate Action
        softParameterInPlanning = 2.5
        softPolicyInPlanning = SoftDistribution(softParameterInPlanning)
        
        wolvesPolicy = lambda state: wolvesCentralControlPolicies[numWolves - 2](state) 
        wolfChooseActionMethod = sampleFromDistribution
        wolvesSampleAction = lambda state: wolfChooseActionMethod(softPolicyInPlanning(wolvesPolicy(state))) 
        
        def sampleAction(state):
            action = list(wolvesSampleAction(state)) + [sheepSampleAction(state) for sheepSampleAction in sheepSampleActions]
            return action

        # Sample and Save Trajectory
        trajectories = [sampleTrajectory(sampleAction) for _ in range(self.numTrajectories)]
        
        wolfType = 'sharedReward'
        trajectoryFixedParameters = {'sheepPolicySoft': softParameterInPlanningForSheep, 'wolfPolicySoft': softParameterInPlanning,
                'maxRunningSteps': maxRunningSteps, 'hierarchy': 0, 'NNNumSimulations':NNNumSimulations, 'wolfType': wolfType}
        self.saveTrajectoryByParameters(trajectories, trajectoryFixedParameters, parameters)
        print(np.mean([len(tra) for tra in trajectories]))