Python BuildGaussianFixCov примеры использования

Язык программирования: Python

Пространство имен/Пакет: src.mathTools.distribution

Класс/Тип: BuildGaussianFixCov

Примеров на hotexamples.com: 4

Python BuildGaussianFixCov - 4 примера найдено. Это лучшие примеры Python кода для src.mathTools.distribution.BuildGaussianFixCov, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

BuildGaussianFixCov(4)

Основные методы

BuildGaussianFixCov (4)

Пример #1

Показать файл

    def __call__(self, parameters):
        print(parameters)
        visualizeTraj = False

        numWolves = parameters['numWolves']
        numSheep = parameters['numSheep']
        wolfType = parameters['wolfType']
        sheepConcern = parameters['sheepConcern']
        
        ## MDP Env  
	# state is all multi agent state # action is all multi agent action
        wolvesID = list(range(numWolves))
        sheepsID = list(range(numWolves, numWolves + numSheep))
        possibleWolvesIds = wolvesID
        possibleSheepIds = sheepsID

        numAgents = numWolves + numSheep
        numBlocks = 5 - numWolves
        blocksID = list(range(numAgents, numAgents + numBlocks))
        numEntities = numAgents + numBlocks
        
        sheepSize = 0.05
        wolfSize = 0.075
        blockSize = 0.2
        
        sheepMaxSpeed = 1.3 * 1
        wolfMaxSpeed = 1.0 * 1
        blockMaxSpeed = None

        entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheep + [blockSize] * numBlocks
        entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [sheepMaxSpeed] * numSheep + [blockMaxSpeed] * numBlocks
        entitiesMovableList = [True]* numAgents + [False] * numBlocks
        massList = [1.0] * numEntities
        
        reshapeActionInTransit = lambda action: action
        getCollisionForce = GetCollisionForce()
        applyActionForce = ApplyActionForce(wolvesID, sheepsID, entitiesMovableList)
        applyEnvironForce = ApplyEnvironForce(numEntities, entitiesMovableList, entitiesSizeList,
                                              getCollisionForce, getPosFromAgentState)
        integrateState = IntegrateState(numEntities, entitiesMovableList, massList,
                                        entityMaxSpeedList, getVelFromAgentState, getPosFromAgentState)
        transit = TransitMultiAgentChasing(numEntities, reshapeActionInTransit, applyActionForce, applyEnvironForce, integrateState)
        
        isCollision = IsCollision(getPosFromAgentState)
        collisonRewardWolf = 1
        punishForOutOfBoundForWolf = lambda stata: 0
        rewardWolf = RewardCentralControlPunishBond(wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBoundForWolf, collisonRewardWolf)
        collisonRewardSheep = -1
        punishForOutOfBoundForSheep = PunishForOutOfBound()
        rewardSheep = RewardCentralControlPunishBond(sheepsID, wolvesID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBoundForSheep, collisonRewardSheep)

        forwardOneStep = ForwardOneStep(transit, rewardWolf)
        
        reset = ResetMultiAgentChasing(numAgents, numBlocks)
        isTerminal = lambda state: False
        maxRunningSteps = 101
        sampleTrajectory = SampleTrajectory(maxRunningSteps, isTerminal, reset, forwardOneStep)
        
        ## MDP Policy
        worldDim = 2
        actionDim = worldDim * 2 + 1

        layerWidth = [64 * (numWolves - 1), 64 * (numWolves - 1)]

	# Sheep Part
        # ------------ model ------------------------
        if sheepConcern == 'selfSheep':
            sheepConcernSelfOnly = 1
        if sheepConcern == 'allSheep':
            sheepConcernSelfOnly = 0
        numSheepToObserveWhenSheepSameOrDiff = [numSheep, 1]
        numSheepToObserve = numSheepToObserveWhenSheepSameOrDiff[sheepConcernSelfOnly]

        print(numSheepToObserve)
        sheepModelListOfDiffWolfReward = []
        sheepType = 'mixed'
        if sheepType == 'mixed':
            sheepPrefixList = ['maddpgIndividWolf', 'maddpg']
        else:
            sheepPrefixList = [sheepType]
        for sheepPrefix in sheepPrefixList:
            wolvesIDForSheepObserve = list(range(numWolves))
            sheepsIDForSheepObserve = list(range(numWolves, numSheepToObserve + numWolves))
            blocksIDForSheepObserve = list(range(numSheepToObserve + numWolves, numSheepToObserve + numWolves + numBlocks))
            observeOneAgentForSheep = lambda agentID: Observe(agentID, wolvesIDForSheepObserve, sheepsIDForSheepObserve, 
                    blocksIDForSheepObserve, getPosFromAgentState, getVelFromAgentState)
            observeSheep = lambda state: [observeOneAgentForSheep(agentID)(state) for agentID in range(numWolves + numSheepToObserve)]
           
            obsIDsForSheep = wolvesIDForSheepObserve + sheepsIDForSheepObserve + blocksIDForSheepObserve
            initObsForSheepParams = observeSheep(reset()[obsIDsForSheep])
            obsShapeSheep = [initObsForSheepParams[obsID].shape[0] for obsID in range(len(initObsForSheepParams))]
            
            buildSheepModels = BuildMADDPGModels(actionDim, numWolves + numSheepToObserve, obsShapeSheep)
            sheepModelsList = [buildSheepModels(layerWidth, agentID) for agentID in range(numWolves, numWolves + numSheepToObserve)]

            dirName = os.path.dirname(__file__)
            maxEpisode = 60000
            print(sheepPrefix)
            sheepFileName = "{}wolves{}sheep{}blocks{}eps_agent".format(numWolves, numSheepToObserve, numBlocks, maxEpisode)
            sheepModelPaths = [os.path.join(dirName, '..', '..', 'data', 'preTrainModel', sheepPrefix + sheepFileName + str(i) + '60000eps') 
                    for i in range(numWolves, numWolves + numSheepToObserve)]

            [restoreVariables(model, path) for model, path in zip(sheepModelsList, sheepModelPaths)]
            sheepModelListOfDiffWolfReward = sheepModelListOfDiffWolfReward + sheepModelsList 
        
        # Sheep Policy Function
        reshapeAction = ReshapeAction()
        actOneStepOneModelSheep = ActOneStep(actByPolicyTrainNoisy)
        
        # Sheep Generate Action
        numAllSheepModels = len(sheepModelListOfDiffWolfReward)

	# Wolves Part        

        # ------------ model ------------------------
        wolvesIDForWolfObserve = list(range(numWolves))
        sheepsIDForWolfObserve = list(range(numWolves, numSheep + numWolves))
        blocksIDForWolfObserve = list(range(numSheep + numWolves, numSheep + numWolves + numBlocks))
        observeOneAgentForWolf = lambda agentID: Observe(agentID, wolvesIDForWolfObserve, sheepsIDForWolfObserve, 
                blocksIDForWolfObserve, getPosFromAgentState, getVelFromAgentState)
        observeWolf = lambda state: [observeOneAgentForWolf(agentID)(state) for agentID in range(numWolves + numSheep)]

        obsIDsForWolf = wolvesIDForWolfObserve + sheepsIDForWolfObserve + blocksIDForWolfObserve
        initObsForWolfParams = observeWolf(reset()[obsIDsForWolf])
        obsShapeWolf = [initObsForWolfParams[obsID].shape[0] for obsID in range(len(initObsForWolfParams))]
        buildWolfModels = BuildMADDPGModels(actionDim, numWolves + numSheep, obsShapeWolf)
        layerWidthForWolf = [64 * (numWolves - 1), 64 * (numWolves - 1)]
        wolfModelsList = [buildWolfModels(layerWidthForWolf, agentID) for agentID in range(numWolves)]

        if wolfType == 'sharedReward':
            prefix = 'maddpg'
        if wolfType == 'individualReward':
            prefix = 'maddpgIndividWolf'
        wolfFileName = "{}wolves{}sheep{}blocks{}eps_agent".format(numWolves, numSheep, numBlocks, maxEpisode)
        wolfModelPaths = [os.path.join(dirName, '..', '..', 'data', 'preTrainModel', prefix + wolfFileName + str(i) + '60000eps') for i in range(numWolves)]
        print(numWolves, obsShapeWolf, wolfModelPaths) 

        [restoreVariables(model, path) for model, path in zip(wolfModelsList, wolfModelPaths)]

        actionDimReshaped = 2
        cov = [0.03 ** 2 for _ in range(actionDimReshaped)]
        buildGaussian = BuildGaussianFixCov(cov)
        actOneStepOneModelWolf = ActOneStep(actByPolicyTrainNoNoisy)
        composeWolfPolicy = lambda wolfModel: lambda state: sampleFromContinuousSpace(buildGaussian(
            tuple(reshapeAction(actOneStepOneModelWolf(wolfModel, observeWolf(state))))))
        
        #actOneStepOneModelWolf = ActOneStep(actByPolicyTrainNoisy)
        #composeWolfPolicy = lambda wolfModel: lambda state: tuple(reshapeAction(actOneStepOneModelSheep(wolfModel, observeWolf(state))))
        wolvesSampleActions = [composeWolfPolicy(wolfModel) for wolfModel in wolfModelsList]
       
        trajectories = []
        for trajectoryId in range(self.numTrajectories):
            sheepModelsForPolicy = [sheepModelListOfDiffWolfReward[np.random.choice(numAllSheepModels)] for sheepId in possibleSheepIds]
            if sheepConcernSelfOnly:
                composeSheepPolicy = lambda sheepModel : lambda state: {tuple(reshapeAction(actOneStepOneModelSheep(sheepModel, observeSheep(state)))): 1}
                sheepChooseActionMethod = sampleFromDistribution
                sheepSampleActions = [SampleActionOnFixedIntention(selfId, possibleWolvesIds, composeSheepPolicy(sheepModel), sheepChooseActionMethod, blocksID)
                        for selfId, sheepModel in zip(possibleSheepIds, sheepModelsForPolicy)]
            else:
                composeSheepPolicy = lambda sheepModel: lambda state: tuple(reshapeAction(actOneStepOneModelSheep(sheepModel, observeSheep(state))))
                sheepSampleActions = [composeSheepPolicy(sheepModel) for sheepModel in sheepModelsForPolicy]
            allIndividualSampleActions = wolvesSampleActions + sheepSampleActions
            sampleAction = lambda state: [sampleIndividualAction(state) for sampleIndividualAction in allIndividualSampleActions]
            trajectory = sampleTrajectory(sampleAction)
            trajectories.append(trajectory) 
        trajectoryFixedParameters = {'maxRunningSteps': maxRunningSteps}
        self.saveTrajectoryByParameters(trajectories, trajectoryFixedParameters, parameters)
        print(np.mean([len(tra) for tra in trajectories]))
    
        # visualize
        if visualizeTraj:
            wolfColor = np.array([0.85, 0.35, 0.35])
            sheepColor = np.array([0.35, 0.85, 0.35])
            blockColor = np.array([0.25, 0.25, 0.25])
            entitiesColorList = [wolfColor] * numWolves + [sheepColor] * numSheep + [blockColor] * numBlocks
            render = Render(entitiesSizeList, entitiesColorList, numAgents, getPosFromAgentState)
            trajToRender = np.concatenate(trajectories)
            render(trajToRender)

Пример #2

Показать файл

    def __call__(self, parameters):
        print(parameters)

        numWolves = parameters['numWolves']
        numSheep = parameters['numSheep']
        numBlocks = 2
        wolfSelfish = 1.0 if parameters[
            'wolfType'] == 'individualReward' else 0.0
        perturbedWolfID = parameters['perturbedWolfID']
        perturbedWolfGoalID = parameters['perturbedWolfGoalID']

        ## MDP Env
        numAgents = numWolves + numSheep
        numEntities = numAgents + numBlocks
        wolvesID = list(range(numWolves))
        sheepsID = list(range(numWolves, numWolves + numSheep))
        blocksID = list(range(numAgents, numEntities))

        sheepSize = 0.05
        wolfSize = 0.075
        blockSize = 0.2
        entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheep + [
            blockSize
        ] * numBlocks

        costActionRatio = 0.0
        sheepSpeedMultiplier = 1.0
        sheepMaxSpeed = 1.3 * sheepSpeedMultiplier
        wolfMaxSpeed = 1.0
        blockMaxSpeed = None

        entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [
            sheepMaxSpeed
        ] * numSheep + [blockMaxSpeed] * numBlocks
        entitiesMovableList = [True] * numAgents + [False] * numBlocks
        massList = [1.0] * numEntities

        collisionReward = 1  # for evaluation, count # of bites
        isCollision = IsCollision(getPosFromAgentState)
        rewardAllWolves = RewardWolf(wolvesID, sheepsID, entitiesSizeList,
                                     isCollision, collisionReward, wolfSelfish)
        rewardWolf = lambda state, action, nextState: np.sum(
            rewardAllWolves(state, action, nextState))

        reshapeActionInTransit = lambda action: action
        getCollisionForce = GetCollisionForce()
        applyActionForce = ApplyActionForce(wolvesID, sheepsID,
                                            entitiesMovableList)
        applyEnvironForce = ApplyEnvironForce(numEntities, entitiesMovableList,
                                              entitiesSizeList,
                                              getCollisionForce,
                                              getPosFromAgentState)
        integrateState = IntegrateState(numEntities, entitiesMovableList,
                                        massList, entityMaxSpeedList,
                                        getVelFromAgentState,
                                        getPosFromAgentState)
        transit = TransitMultiAgentChasing(numEntities, reshapeActionInTransit,
                                           applyActionForce, applyEnvironForce,
                                           integrateState)

        forwardOneStep = ForwardOneStep(transit, rewardWolf)

        reset = ResetMultiAgentChasingWithSeed(numAgents, numBlocks)
        isTerminal = lambda state: False
        maxRunningStepsToSample = 101
        sampleTrajectory = SampleTrajectory(maxRunningStepsToSample,
                                            isTerminal, reset, forwardOneStep)

        ## MDP Policy
        worldDim = 2
        actionDim = worldDim * 2 + 1

        layerWidth = [128, 128]
        maxTimeStep = 75
        maxEpisode = 60000
        dirName = os.path.dirname(__file__)

        # ------------ sheep recover variables ------------------------
        numSheepToObserve = 1
        sheepModelListOfDiffWolfReward = []
        sheepTypeList = [0.0, 1.0]

        for sheepType in sheepTypeList:
            wolvesIDForSheepObserve = list(range(numWolves))
            sheepsIDForSheepObserve = list(
                range(numWolves, numSheepToObserve + numWolves))
            blocksIDForSheepObserve = list(
                range(numSheepToObserve + numWolves,
                      numSheepToObserve + numWolves + numBlocks))
            observeOneAgentForSheep = lambda agentID: Observe(
                agentID, wolvesIDForSheepObserve, sheepsIDForSheepObserve,
                blocksIDForSheepObserve, getPosFromAgentState,
                getVelFromAgentState)
            observeSheep = lambda state: [
                observeOneAgentForSheep(agentID)(state)
                for agentID in range(numWolves + numSheepToObserve)
            ]

            obsIDsForSheep = wolvesIDForSheepObserve + sheepsIDForSheepObserve + blocksIDForSheepObserve
            initObsForSheepParams = observeSheep(reset()[obsIDsForSheep])
            obsShapeSheep = [
                initObsForSheepParams[obsID].shape[0]
                for obsID in range(len(initObsForSheepParams))
            ]

            buildSheepModels = BuildMADDPGModels(actionDim,
                                                 numWolves + numSheepToObserve,
                                                 obsShapeSheep)
            sheepModelsList = [
                buildSheepModels(layerWidth, agentID)
                for agentID in range(numWolves, numWolves + numSheepToObserve)
            ]

            sheepFileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}WolfActCost{}individ{}_agent".format(
                numWolves, numSheepToObserve, numBlocks, maxEpisode,
                maxTimeStep, sheepSpeedMultiplier, costActionRatio, sheepType)
            sheepModelPaths = [
                os.path.join(dirName, '..', '..', 'data', 'preTrainModel',
                             sheepFileName + str(i))
                for i in range(numWolves, numWolves + numSheepToObserve)
            ]
            [
                restoreVariables(model, path)
                for model, path in zip(sheepModelsList, sheepModelPaths)
            ]
            sheepModelListOfDiffWolfReward = sheepModelListOfDiffWolfReward + sheepModelsList

        # # actOneStep = ActOneStep(actByPolicyTrainNoisy) #TODO
        actOneStep = ActOneStep(actByPolicyTrainNoNoisy)

        numAllSheepModels = len(sheepModelListOfDiffWolfReward)

        # ------------ wolves recover variables ------------------------

        # ------------ Recover one perturbed wolf for comparison -------
        numSheepForPerturbedWolf = 1
        wolvesIDForPerturbedWolf = wolvesID
        sheepsIDForPerturbedWolf = [sheepsID[perturbedWolfGoalID]]
        blocksIDForPerturbedWolf = list(
            range(numWolves + numSheep,
                  numEntities))  # skip the unattended sheep id

        observeOneAgentForPerturbedWolf = lambda agentID: Observe(
            agentID, wolvesIDForPerturbedWolf, sheepsIDForPerturbedWolf,
            blocksIDForPerturbedWolf, getPosFromAgentState,
            getVelFromAgentState)
        observePerturbedWolf = lambda state: [
            observeOneAgentForPerturbedWolf(agentID)(state)
            for agentID in wolvesIDForPerturbedWolf + sheepsIDForPerturbedWolf
        ]

        initObsForPerturbedWolfParams = observePerturbedWolf(reset())
        obsShapePerturbedWolf = [
            initObsForPerturbedWolfParams[obsID].shape[0]
            for obsID in range(len(initObsForPerturbedWolfParams))
        ]
        buildPerturbedWolfModels = BuildMADDPGModels(
            actionDim, numWolves + numSheepForPerturbedWolf,
            obsShapePerturbedWolf)
        layerWidthForWolf = [128, 128]
        perturbedWolfModel = buildPerturbedWolfModels(layerWidthForWolf,
                                                      perturbedWolfID)

        perturbedWolfFileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}WolfActCost{}individ{}_agent".format(
            numWolves, numSheepForPerturbedWolf, numBlocks, maxEpisode,
            maxTimeStep, sheepSpeedMultiplier, costActionRatio, wolfSelfish)
        perturbedWolfModelPath = os.path.join(
            dirName, '..', '..', 'data', 'preTrainModel',
            perturbedWolfFileName + str(perturbedWolfID))
        restoreVariables(perturbedWolfModel, perturbedWolfModelPath)

        # ------------ Recover other wolves trained with multiple goals -------

        wolvesIDForWolfObserve = wolvesID
        sheepsIDForWolfObserve = sheepsID
        blocksIDForWolfObserve = blocksID
        observeOneAgentForWolf = lambda agentID: Observe(
            agentID, wolvesIDForWolfObserve, sheepsIDForWolfObserve,
            blocksIDForWolfObserve, getPosFromAgentState, getVelFromAgentState)
        observeWolf = lambda state: [
            observeOneAgentForWolf(agentID)(state)
            for agentID in range(numWolves + numSheep)
        ]

        obsIDsForWolf = wolvesIDForWolfObserve + sheepsIDForWolfObserve + blocksIDForWolfObserve
        initObsForWolfParams = observeWolf(reset()[obsIDsForWolf])
        obsShapeWolf = [
            initObsForWolfParams[obsID].shape[0]
            for obsID in range(len(initObsForWolfParams))
        ]
        buildWolfModels = BuildMADDPGModels(actionDim, numWolves + numSheep,
                                            obsShapeWolf)
        layerWidthForWolf = [128, 128]
        wolfModelsList = [
            buildWolfModels(layerWidthForWolf, agentID)
            for agentID in range(numWolves)
        ]

        wolfFileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}WolfActCost{}individ{}_agent".format(
            numWolves, numSheep, numBlocks, maxEpisode, maxTimeStep,
            sheepSpeedMultiplier, costActionRatio, wolfSelfish)
        wolfModelPaths = [
            os.path.join(dirName, '..', '..', 'data', 'preTrainModel',
                         wolfFileName + str(i)) for i in range(numWolves)
        ]
        [
            restoreVariables(model, path)
            for model, path in zip(wolfModelsList, wolfModelPaths)
        ]

        # ------------ compose  policy ---------------------
        actionDimReshaped = 2
        cov = [0.00000000001**2 for _ in range(actionDimReshaped)]
        buildGaussian = BuildGaussianFixCov(cov)
        reshapeAction = ReshapeAction()

        # unperturbed policy
        composeWolfPolicy = lambda wolfModel: lambda state: sampleFromContinuousSpace(
            buildGaussian(
                tuple(reshapeAction(actOneStep(wolfModel, observeWolf(state))))
            ))
        wolvesSampleActions = [
            composeWolfPolicy(wolfModel) for wolfModel in wolfModelsList
        ]

        # perturbed policy
        composePerturbedWolfPolicy = lambda perturbedModel: lambda state: sampleFromContinuousSpace(
            buildGaussian(
                tuple(
                    reshapeAction(
                        actOneStep(perturbedModel, observePerturbedWolf(state))
                    ))))
        wolvesSampleActionsPerturbed = wolvesSampleActions.copy()
        wolvesSampleActionsPerturbed[
            perturbedWolfID] = composePerturbedWolfPolicy(perturbedWolfModel)

        trajectories = []
        for trajectoryId in range(self.numTrajectories):
            sheepModelsForPolicy = [
                sheepModelListOfDiffWolfReward[np.random.choice(
                    numAllSheepModels)] for sheepId in sheepsID
            ]
            composeSheepPolicy = lambda sheepModel: lambda state: {
                tuple(
                    reshapeAction(actOneStep(sheepModel, observeSheep(state)))):
                1
            }
            sheepChooseActionMethod = sampleFromDistribution
            sheepSampleActions = [
                SampleActionOnFixedIntention(selfId, wolvesID,
                                             composeSheepPolicy(sheepModel),
                                             sheepChooseActionMethod, blocksID)
                for selfId, sheepModel in zip(sheepsID, sheepModelsForPolicy)
            ]

            allIndividualSampleActionsPerturbed = wolvesSampleActionsPerturbed + sheepSampleActions
            sampleActionPerturbed = lambda state: [
                sampleIndividualAction(state) for sampleIndividualAction in
                allIndividualSampleActionsPerturbed
            ]

            trajectory = sampleTrajectory(sampleActionPerturbed)
            trajectories.append(trajectory)

        trajectoryFixedParameters = {
            'maxRunningStepsToSample': maxRunningStepsToSample
        }
        self.saveTrajectoryByParameters(trajectories,
                                        trajectoryFixedParameters, parameters)

Пример #3

Показать файл

    def __call__(self, parameters):
        print(parameters)

        valuePriorEndTime = -100
        deviationFor2DAction = 1.0
        rationalityBetaInInference = 1.0

        numWolves = parameters['numWolves']
        numSheep = parameters['numSheep']
        wolfType = parameters['wolfType']
        wolfSelfish = 0.0 if wolfType == 'sharedAgencyBySharedRewardWolf' else 1.0
        perturbedWolfID = parameters['perturbedWolfID']
        perturbedWolfGoalID = parameters['perturbedWolfGoalID']

        ## MDP Env
        numBlocks = 2
        numAgents = numWolves + numSheep
        numEntities = numAgents + numBlocks
        wolvesID = list(range(numWolves))
        sheepsID = list(range(numWolves, numWolves + numSheep))
        blocksID = list(range(numAgents, numEntities))

        sheepSize = 0.05
        wolfSize = 0.075
        blockSize = 0.2
        entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheep + [blockSize] * numBlocks

        costActionRatio = 0.0
        sheepSpeedMultiplier = 1.0
        sheepMaxSpeed = 1.3 * sheepSpeedMultiplier
        wolfMaxSpeed = 1.0
        blockMaxSpeed = None

        entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [sheepMaxSpeed] * numSheep + [blockMaxSpeed] * numBlocks
        entitiesMovableList = [True] * numAgents + [False] * numBlocks
        massList = [1.0] * numEntities

        collisionReward = 1  # for evaluation, count # of bites
        isCollision = IsCollision(getPosFromAgentState)
        rewardAllWolves = RewardWolf(wolvesID, sheepsID, entitiesSizeList, isCollision, collisionReward, wolfSelfish)
        rewardWolf = lambda state, action, nextState: np.sum(rewardAllWolves(state, action, nextState))

        reshapeActionInTransit = lambda action: action
        getCollisionForce = GetCollisionForce()
        applyActionForce = ApplyActionForce(wolvesID, sheepsID, entitiesMovableList)
        applyEnvironForce = ApplyEnvironForce(numEntities, entitiesMovableList, entitiesSizeList, getCollisionForce,
                                              getPosFromAgentState)
        integrateState = IntegrateState(numEntities, entitiesMovableList, massList, entityMaxSpeedList,
                                        getVelFromAgentState, getPosFromAgentState)
        transit = TransitMultiAgentChasing(numEntities, reshapeActionInTransit, applyActionForce, applyEnvironForce,
                                           integrateState)

        forwardOneStep = ForwardOneStep(transit, rewardWolf)

        reset = ResetMultiAgentChasingWithSeed(numAgents, numBlocks)
        isTerminal = lambda state: False
        maxRunningStepsToSample = 101
        sampleTrajectory = SampleTrajectory(maxRunningStepsToSample, isTerminal, reset, forwardOneStep)

        ## MDP Policy
        worldDim = 2
        actionDim = worldDim * 2 + 1

        layerWidth = [128, 128]
        maxTimeStep = 75
        maxEpisode = 60000
        dirName = os.path.dirname(__file__)

        # ------------ sheep recover variables ------------------------
        numSheepToObserve = 1
        sheepModelListOfDiffWolfReward = []
        sheepTypeList = [0.0, 1.0]

        for sheepType in sheepTypeList:
            wolvesIDForSheepObserve = list(range(numWolves))
            sheepsIDForSheepObserve = list(range(numWolves, numSheepToObserve + numWolves))
            blocksIDForSheepObserve = list(
                range(numSheepToObserve + numWolves, numSheepToObserve + numWolves + numBlocks))
            observeOneAgentForSheep = lambda agentID: Observe(agentID, wolvesIDForSheepObserve, sheepsIDForSheepObserve,
                                                              blocksIDForSheepObserve, getPosFromAgentState,
                                                              getVelFromAgentState)
            observeSheep = lambda state: [observeOneAgentForSheep(agentID)(state) for agentID in
                                          range(numWolves + numSheepToObserve)]

            obsIDsForSheep = wolvesIDForSheepObserve + sheepsIDForSheepObserve + blocksIDForSheepObserve
            initObsForSheepParams = observeSheep(reset()[obsIDsForSheep])
            obsShapeSheep = [initObsForSheepParams[obsID].shape[0] for obsID in range(len(initObsForSheepParams))]

            buildSheepModels = BuildMADDPGModels(actionDim, numWolves + numSheepToObserve, obsShapeSheep)
            sheepModelsList = [buildSheepModels(layerWidth, agentID) for agentID in
                               range(numWolves, numWolves + numSheepToObserve)]

            sheepFileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}WolfActCost{}individ{}_agent".format(
                numWolves, numSheepToObserve, numBlocks, maxEpisode, maxTimeStep, sheepSpeedMultiplier, costActionRatio,
                sheepType)
            sheepModelPaths = [os.path.join(dirName, '..', '..', 'data', 'preTrainModel', sheepFileName + str(i)) for i
                               in range(numWolves, numWolves + numSheepToObserve)]
            [restoreVariables(model, path) for model, path in zip(sheepModelsList, sheepModelPaths)]
            sheepModelListOfDiffWolfReward = sheepModelListOfDiffWolfReward + sheepModelsList

        actOneStep = ActOneStep(actByPolicyTrainNoNoisy)
        numAllSheepModels = len(sheepModelListOfDiffWolfReward)

        # ------------ recover variables for "we" ------------------------
        numAgentsInWe = numWolves
        numSheepInWe = 1
        numBlocksForWe = numBlocks
        wolvesIDForWolfObserve = list(range(numAgentsInWe))
        sheepsIDForWolfObserve = list(range(numAgentsInWe, numSheepInWe + numAgentsInWe))
        blocksIDForWolfObserve = list(
            range(numSheepInWe + numAgentsInWe, numSheepInWe + numAgentsInWe + numBlocksForWe))

        observeOneAgentForWolf = lambda agentID: Observe(agentID, wolvesIDForWolfObserve, sheepsIDForWolfObserve,
                                                         blocksIDForWolfObserve, getPosFromAgentState,
                                                         getVelFromAgentState)
        observeWolf = lambda state: [observeOneAgentForWolf(agentID)(state) for agentID in
                                     range(numAgentsInWe + numSheepInWe)]

        obsIDsForWolf = wolvesIDForWolfObserve + sheepsIDForWolfObserve + blocksIDForWolfObserve
        initObsForWolfParams = observeWolf(reset()[obsIDsForWolf])
        obsShapeWolf = [initObsForWolfParams[obsID].shape[0] for obsID in range(len(initObsForWolfParams))]
        buildWolfModels = BuildMADDPGModels(actionDim, numAgentsInWe + numSheepInWe, obsShapeWolf)
        layerWidthForWolf = [128, 128]
        wolfModelsList = [buildWolfModels(layerWidthForWolf, agentID) for agentID in range(numAgentsInWe)]

        wolfFileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}WolfActCost{}individ{}_agent".format(
            numWolves, numSheepInWe, numBlocks, maxEpisode, maxTimeStep, sheepSpeedMultiplier, costActionRatio,
            wolfSelfish)
        wolfModelPaths = [os.path.join(dirName, '..', '..', 'data', 'preTrainModel', wolfFileName + str(i)) for i in
                          range(numAgentsInWe)]
        [restoreVariables(model, path) for model, path in zip(wolfModelsList, wolfModelPaths)]

        # ------------ compose wolves policy no perturbation ------------------------
        actionDimReshaped = 2
        cov = [deviationFor2DAction ** 2 for _ in range(actionDimReshaped)] # 1
        buildGaussian = BuildGaussianFixCov(cov)
        actOneStep = ActOneStep(actByPolicyTrainNoNoisy)
        reshapeAction = ReshapeAction()
        composeCentralControlPolicy = lambda observe: ComposeCentralControlPolicyByGaussianOnDeterministicAction(
            reshapeAction, observe, actOneStep, buildGaussian)
        wolvesCentralControlPolicy = [composeCentralControlPolicy(observeWolf)(wolfModelsList, numAgentsInWe)]  # input state, return a list of gaussian distributions with cov 1

        softPolicyInInference = lambda distribution: distribution
        getStateThirdPersonPerspective = lambda state, goalId, weIds: getStateOrActionThirdPersonPerspective(state,
                                                                                                             goalId,
                                                                                                             weIds,
                                                                                                             blocksID)  # nochange
        policyForCommittedAgentsInInference = PolicyForCommittedAgent(wolvesCentralControlPolicy, softPolicyInInference,
                                                                      getStateThirdPersonPerspective) # same as wolvesCentralControlPolicy(state)
        concernedAgentsIds = wolvesID
        calCommittedAgentsPolicyLikelihood = CalCommittedAgentsContinuousPolicyLikelihood(concernedAgentsIds,
                                                                                          policyForCommittedAgentsInInference,
                                                                                          rationalityBetaInInference)

        randomActionSpace = [(5, 0), (3.5, 3.5), (0, 5), (-3.5, 3.5), (-5, 0), (-3.5, -3.5), (0, -5), (3.5, -3.5),
                             (0, 0)]
        randomPolicy = RandomPolicy(randomActionSpace)
        getStateFirstPersonPerspective = lambda state, goalId, weIds, selfId: getStateOrActionFirstPersonPerspective(
            state, goalId, weIds, selfId, blocksID)
        policyForUncommittedAgentsInInference = PolicyForUncommittedAgent(wolvesID, randomPolicy, softPolicyInInference,
                                                                          getStateFirstPersonPerspective)  # random policy, returns action distribution
        calUncommittedAgentsPolicyLikelihood = CalUncommittedAgentsPolicyLikelihood(wolvesID, concernedAgentsIds,
                                                                                    policyForUncommittedAgentsInInference)  # returns 1

        # Joint Likelihood
        calJointLikelihood = lambda intention, state, perceivedAction: calCommittedAgentsPolicyLikelihood(intention,
                                                                                                          state,
                                                                                                          perceivedAction) * \
                                                                       calUncommittedAgentsPolicyLikelihood(intention,
                                                                                                            state,
                                                                                                            perceivedAction)  # __* 1

        # ------------ wolves intention ------------------------
        intentionSpacesForAllWolves = [tuple(it.product(sheepsID, [tuple(wolvesID)])) for wolfId in
                                       wolvesID]  # <class 'tuple'>: ((3, (0, 1, 2)), (4, (0, 1, 2)), (5, (0, 1, 2)), (6, (0, 1, 2)))
        print('intentionSpacesForAllWolves', intentionSpacesForAllWolves)
        wolvesIntentionPriors = [
            {tuple(intention): 1 / len(allPossibleIntentionsOneWolf) for intention in allPossibleIntentionsOneWolf} for
            allPossibleIntentionsOneWolf in intentionSpacesForAllWolves]
        perceptSelfAction = SampleNoisyAction(deviationFor2DAction)
        perceptOtherAction = SampleNoisyAction(deviationFor2DAction)
        perceptAction = PerceptImaginedWeAction(wolvesID, perceptSelfAction,
                                                perceptOtherAction)  # input self, others action

        # Infer and update Intention
        variablesForAllWolves = [[intentionSpace] for intentionSpace in intentionSpacesForAllWolves]
        jointHypothesisSpaces = [pd.MultiIndex.from_product(variables, names=['intention']) for variables in
                                 variablesForAllWolves]
        concernedHypothesisVariable = ['intention']
        priorDecayRate = 1
        softPrior = SoftDistribution(priorDecayRate)  # no change
        inferIntentionOneStepList = [InferOneStep(jointHypothesisSpace, concernedHypothesisVariable,
                                                  calJointLikelihood, softPrior) for jointHypothesisSpace in
                                     jointHypothesisSpaces]

        if numSheep == 1:
            inferIntentionOneStepList = [lambda prior, state, action: prior] * 3

        adjustIntentionPriorGivenValueOfState = lambda state: 1
        chooseIntention = sampleFromDistribution
        updateIntentions = [UpdateIntention(intentionPrior, valuePriorEndTime, adjustIntentionPriorGivenValueOfState,
                                            perceptAction, inferIntentionOneStep, chooseIntention)
                            for intentionPrior, inferIntentionOneStep in
                            zip(wolvesIntentionPriors, inferIntentionOneStepList)]

        # reset intention and adjust intention prior attributes tools for multiple trajectory
        intentionResetAttributes = ['timeStep', 'lastState', 'lastAction', 'intentionPrior', 'formerIntentionPriors']
        intentionResetAttributeValues = [
            dict(zip(intentionResetAttributes, [0, None, None, intentionPrior, [intentionPrior]]))
            for intentionPrior in wolvesIntentionPriors]
        resetIntentions = ResetObjects(intentionResetAttributeValues, updateIntentions)
        returnAttributes = ['formerIntentionPriors']
        getIntentionDistributions = GetObjectsValuesOfAttributes(returnAttributes, updateIntentions[1:])
        attributesToRecord = ['lastAction']
        recordActionForUpdateIntention = RecordValuesForObjects(attributesToRecord, updateIntentions)

        # Wovels Generate Action #TODO
        covForPlanning = [0.00000001 for _ in range(actionDimReshaped)]
        # covForPlanning = [0.03 ** 2 for _ in range(actionDimReshaped)]
        buildGaussianForPlanning = BuildGaussianFixCov(covForPlanning)
        composeCentralControlPolicyForPlanning = lambda \
                observe: ComposeCentralControlPolicyByGaussianOnDeterministicAction(reshapeAction,
                                                                                    observe, actOneStep,
                                                                                    buildGaussianForPlanning)
        wolvesCentralControlPoliciesForPlanning = [
            composeCentralControlPolicyForPlanning(observeWolf)(wolfModelsList, numAgentsInWe)]

        centralControlPolicyListBasedOnNumAgentsInWeForPlanning = wolvesCentralControlPoliciesForPlanning  # 0 for two agents in We, 1 for three agents...
        softPolicyInPlanning = lambda distribution: distribution
        policyForCommittedAgentInPlanning = PolicyForCommittedAgent(
            centralControlPolicyListBasedOnNumAgentsInWeForPlanning, softPolicyInPlanning,
            getStateThirdPersonPerspective)

        policyForUncommittedAgentInPlanning = PolicyForUncommittedAgent(wolvesID, randomPolicy, softPolicyInPlanning,
                                                                        getStateFirstPersonPerspective)

        def wolfChooseActionMethod(individualContinuousDistributions):
            centralControlAction = tuple(
                [tuple(sampleFromContinuousSpace(distribution)) for distribution in individualContinuousDistributions])
            return centralControlAction

        getSelfActionIDInThirdPersonPerspective = lambda weIds, selfId: list(weIds).index(selfId)
        chooseCommittedAction = GetActionFromJointActionDistribution(wolfChooseActionMethod,
                                                                     getSelfActionIDInThirdPersonPerspective)
        chooseUncommittedAction = sampleFromDistribution
        wolvesSampleIndividualActionGivenIntentionList = [
            SampleIndividualActionGivenIntention(selfId, policyForCommittedAgentInPlanning,
                                                 policyForUncommittedAgentInPlanning, chooseCommittedAction,
                                                 chooseUncommittedAction)
            for selfId in wolvesID]

        # ------------------- recover one wolf model that only concerns sheep 0 -------------------
        numSheepForPerturbedWolf = 1
        wolvesIDForPerturbedWolf = wolvesID
        sheepsIDForPerturbedWolf = [sheepsID[perturbedWolfGoalID]]
        blocksIDForPerturbedWolf = list(range(numWolves + numSheep, numEntities)) # skip the unattended sheep id

        observeOneAgentForPerturbedWolf = lambda agentID: Observe(agentID, wolvesIDForPerturbedWolf, sheepsIDForPerturbedWolf,
                blocksIDForPerturbedWolf, getPosFromAgentState, getVelFromAgentState)
        observePerturbedWolf = lambda state: [observeOneAgentForPerturbedWolf(agentID)(state) for agentID in wolvesIDForPerturbedWolf + sheepsIDForPerturbedWolf]

        initObsForPerturbedWolfParams = observePerturbedWolf(reset())
        obsShapePerturbedWolf = [initObsForPerturbedWolfParams[obsID].shape[0] for obsID in range(len(initObsForPerturbedWolfParams))]
        buildPerturbedWolfModels = BuildMADDPGModels(actionDim, numWolves + numSheepForPerturbedWolf, obsShapePerturbedWolf)
        layerWidthForWolf = [128, 128]
        perturbedWolfModel = buildPerturbedWolfModels(layerWidthForWolf, perturbedWolfID)

        perturbedWolfFileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}WolfActCost{}individ{}_agent".format(
            numWolves, numSheepForPerturbedWolf, numBlocks, maxEpisode, maxTimeStep, sheepSpeedMultiplier, costActionRatio, wolfSelfish)
        perturbedWolfModelPath = os.path.join(dirName, '..', '..', 'data', 'preTrainModel', perturbedWolfFileName + str(perturbedWolfID))
        restoreVariables(perturbedWolfModel, perturbedWolfModelPath)


        # ------------------- Sample and Save Trajectory -------------------

        wolvesSampleActions = [
            SampleActionOnChangableIntention(updateIntention, wolvesSampleIndividualActionGivenIntention)
            for updateIntention, wolvesSampleIndividualActionGivenIntention in
            zip(updateIntentions, wolvesSampleIndividualActionGivenIntentionList)]

        perturbedWolfSampleActions = lambda state: tuple(reshapeAction(actOneStep(perturbedWolfModel, observePerturbedWolf(state))))
        wolvesSampleActionsPerturbed = wolvesSampleActions#.copy()
        wolvesSampleActionsPerturbed[perturbedWolfID] = perturbedWolfSampleActions


        trajectoriesWithIntentionDists = []
        for trajectoryId in range(self.numTrajectories):
            sheepModelsForPolicy = [sheepModelListOfDiffWolfReward[np.random.choice(numAllSheepModels)] for sheepId in
                                    sheepsID]
            composeSheepPolicy = lambda sheepModel: lambda state: {
                tuple(reshapeAction(actOneStep(sheepModel, observeSheep(state)))): 1}
            sheepChooseActionMethod = sampleFromDistribution
            sheepSampleActions = [SampleActionOnFixedIntention(selfId, wolvesID, composeSheepPolicy(sheepModel),
                                                               sheepChooseActionMethod, blocksID)
                                  for selfId, sheepModel in zip(sheepsID, sheepModelsForPolicy)]
            allIndividualSampleActions = wolvesSampleActions + sheepSampleActions

            sampleActionMultiAgent = SampleActionMultiagent(allIndividualSampleActions, recordActionForUpdateIntention)

            allIndividualSampleActionsPerturbed = wolvesSampleActionsPerturbed + sheepSampleActions
            sampleActionMultiAgentPerturbed = SampleActionMultiagent(allIndividualSampleActionsPerturbed, recordActionForUpdateIntention)

            # trajectory = sampleTrajectory(sampleActionMultiAgentPerturbed)
            trajectory = sampleTrajectory(sampleActionMultiAgentPerturbed)

            intentionDistributions = getIntentionDistributions()
            trajectoryWithIntentionDists = [tuple(list(SASRPair) + list(intentionDist)) for SASRPair, intentionDist in
                                            zip(trajectory, intentionDistributions)]
            trajectoriesWithIntentionDists.append(tuple(trajectoryWithIntentionDists))
            # trajectoriesWithIntentionDists.append(trajectory)
            resetIntentions()
        trajectoryFixedParameters = {'maxRunningStepsToSample': maxRunningStepsToSample}
        self.saveTrajectoryByParameters(trajectoriesWithIntentionDists, trajectoryFixedParameters, parameters)

Пример #4

Показать файл

Файл: sampleTrajectorySharedAngecy.py Проект: ningtangla/ImaginedWe

    def __call__(self, parameters):
        print(parameters)
        visualizeTraj = False

        numWolves = parameters['numWolves']
        numSheep = parameters['numSheep']
        softParamterForValue = parameters['valuePriorSoftMaxBeta']
        valuePriorEndTime = parameters['valuePriorEndTime']
        deviationFor2DAction = parameters['deviationFor2DAction']
        rationalityBetaInInference = parameters['rationalityBetaInInference']
        wolfType = parameters['wolfType']
        sheepConcern = parameters['sheepConcern']
        print(rationalityBetaInInference)

        ## MDP Env
        # state is all multi agent state # action is all multi agent action
        wolvesID = list(range(numWolves))
        sheepsID = list(range(numWolves, numWolves + numSheep))
        possibleWolvesIds = wolvesID
        possibleSheepIds = sheepsID

        numAgents = numWolves + numSheep
        numBlocks = 5 - numWolves
        blocksID = list(range(numAgents, numAgents + numBlocks))
        numEntities = numAgents + numBlocks

        sheepSize = 0.05
        wolfSize = 0.075
        blockSize = 0.2

        sheepMaxSpeed = 1.3 * 1
        wolfMaxSpeed = 1.0 * 1
        blockMaxSpeed = None

        entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheep + [
            blockSize
        ] * numBlocks
        entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [
            sheepMaxSpeed
        ] * numSheep + [blockMaxSpeed] * numBlocks
        entitiesMovableList = [True] * numAgents + [False] * numBlocks
        massList = [1.0] * numEntities

        reshapeActionInTransit = lambda action: action
        getCollisionForce = GetCollisionForce()
        applyActionForce = ApplyActionForce(wolvesID, sheepsID,
                                            entitiesMovableList)
        applyEnvironForce = ApplyEnvironForce(numEntities, entitiesMovableList,
                                              entitiesSizeList,
                                              getCollisionForce,
                                              getPosFromAgentState)
        integrateState = IntegrateState(numEntities, entitiesMovableList,
                                        massList, entityMaxSpeedList,
                                        getVelFromAgentState,
                                        getPosFromAgentState)
        transit = TransitMultiAgentChasing(numEntities, reshapeActionInTransit,
                                           applyActionForce, applyEnvironForce,
                                           integrateState)

        isCollision = IsCollision(getPosFromAgentState)
        collisonRewardWolf = 1
        punishForOutOfBoundForWolf = lambda stata: 0
        rewardWolf = RewardCentralControlPunishBond(
            wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState,
            isCollision, punishForOutOfBoundForWolf, collisonRewardWolf)
        collisonRewardSheep = -1
        punishForOutOfBoundForSheep = PunishForOutOfBound()
        rewardSheep = RewardCentralControlPunishBond(
            sheepsID, wolvesID, entitiesSizeList, getPosFromAgentState,
            isCollision, punishForOutOfBoundForSheep, collisonRewardSheep)

        forwardOneStep = ForwardOneStep(transit, rewardWolf)

        reset = ResetMultiAgentChasing(numAgents, numBlocks)
        isTerminal = lambda state: False
        maxRunningSteps = 101
        sampleTrajectory = SampleTrajectory(maxRunningSteps, isTerminal, reset,
                                            forwardOneStep)

        ## MDP Policy
        worldDim = 2
        actionDim = worldDim * 2 + 1

        layerWidth = [64 * (numWolves - 1), 64 * (numWolves - 1)]

        # Sheep Part
        # ------------ model ------------------------
        if sheepConcern == 'selfSheep':
            sheepConcernSelfOnly = 1
        if sheepConcern == 'allSheep':
            sheepConcernSelfOnly = 0
        numSheepToObserveWhenSheepSameOrDiff = [numSheep, 1]
        numSheepToObserve = numSheepToObserveWhenSheepSameOrDiff[
            sheepConcernSelfOnly]

        print(numSheepToObserve)
        sheepModelListOfDiffWolfReward = []
        sheepType = 'mixed'
        if sheepType == 'mixed':
            sheepPrefixList = ['maddpgIndividWolf', 'maddpg']
        else:
            sheepPrefixList = [sheepType]
        for sheepPrefix in sheepPrefixList:
            wolvesIDForSheepObserve = list(range(numWolves))
            sheepsIDForSheepObserve = list(
                range(numWolves, numSheepToObserve + numWolves))
            blocksIDForSheepObserve = list(
                range(numSheepToObserve + numWolves,
                      numSheepToObserve + numWolves + numBlocks))
            observeOneAgentForSheep = lambda agentID: Observe(
                agentID, wolvesIDForSheepObserve, sheepsIDForSheepObserve,
                blocksIDForSheepObserve, getPosFromAgentState,
                getVelFromAgentState)
            observeSheep = lambda state: [
                observeOneAgentForSheep(agentID)(state)
                for agentID in range(numWolves + numSheepToObserve)
            ]

            obsIDsForSheep = wolvesIDForSheepObserve + sheepsIDForSheepObserve + blocksIDForSheepObserve
            initObsForSheepParams = observeSheep(reset()[obsIDsForSheep])
            obsShapeSheep = [
                initObsForSheepParams[obsID].shape[0]
                for obsID in range(len(initObsForSheepParams))
            ]

            buildSheepModels = BuildMADDPGModels(actionDim,
                                                 numWolves + numSheepToObserve,
                                                 obsShapeSheep)
            sheepModelsList = [
                buildSheepModels(layerWidth, agentID)
                for agentID in range(numWolves, numWolves + numSheepToObserve)
            ]

            dirName = os.path.dirname(__file__)
            maxEpisode = 60000
            print(sheepPrefix)
            sheepFileName = "{}wolves{}sheep{}blocks{}eps_agent".format(
                numWolves, numSheepToObserve, numBlocks, maxEpisode)
            sheepModelPaths = [
                os.path.join(dirName, '..', '..', 'data', 'preTrainModel',
                             sheepPrefix + sheepFileName + str(i) + '60000eps')
                for i in range(numWolves, numWolves + numSheepToObserve)
            ]

            [
                restoreVariables(model, path)
                for model, path in zip(sheepModelsList, sheepModelPaths)
            ]
            sheepModelListOfDiffWolfReward = sheepModelListOfDiffWolfReward + sheepModelsList

        # Sheep Policy Function
        reshapeAction = ReshapeAction()
        actOneStepOneModelSheep = ActOneStep(actByPolicyTrainNoisy)

        # Sheep Generate Action
        numAllSheepModels = len(sheepModelListOfDiffWolfReward)

        # Wolves Part

        # Intention Prior For inference
        #createIntentionSpaceGivenSelfId = CreateIntentionSpaceGivenSelfId(possibleSheepIds, possibleWolvesIds)
        #intentionSpacesForAllWolves = [createAllPossibleIntentionsGivenSelfId(wolfId)
        #        for wolfId in possibleWolvesIds]
        intentionSpacesForAllWolves = [
            tuple(it.product(possibleSheepIds, [tuple(possibleWolvesIds)]))
            for wolfId in possibleWolvesIds
        ]
        print(intentionSpacesForAllWolves)
        wolvesIntentionPriors = [{
            tuple(intention): 1 / len(allPossibleIntentionsOneWolf)
            for intention in allPossibleIntentionsOneWolf
        } for allPossibleIntentionsOneWolf in intentionSpacesForAllWolves]
        # Percept Action For Inference
        #perceptAction = lambda action: action
        perceptSelfAction = SampleNoisyAction(deviationFor2DAction)
        perceptOtherAction = SampleNoisyAction(deviationFor2DAction)
        perceptAction = PerceptImaginedWeAction(possibleWolvesIds,
                                                perceptSelfAction,
                                                perceptOtherAction)
        #perceptAction = lambda action: action

        # Policy Likelihood function: Wolf Centrol Control NN Policy Given Intention
        # ------------ model ------------------------
        weModelsListBaseOnNumInWe = []
        observeListBaseOnNumInWe = []
        for numAgentInWe in range(2, numWolves + 1):
            numBlocksForWe = 5 - numAgentInWe
            wolvesIDForWolfObserve = list(range(numAgentInWe))
            sheepsIDForWolfObserve = list(range(numAgentInWe,
                                                1 + numAgentInWe))
            blocksIDForWolfObserve = list(
                range(1 + numAgentInWe, 1 + numAgentInWe + numBlocksForWe))
            observeOneAgentForWolf = lambda agentID: Observe(
                agentID, wolvesIDForWolfObserve, sheepsIDForWolfObserve,
                blocksIDForWolfObserve, getPosFromAgentState,
                getVelFromAgentState)
            observeWolf = lambda state: [
                observeOneAgentForWolf(agentID)(state)
                for agentID in range(numAgentInWe + 1)
            ]
            observeListBaseOnNumInWe.append(observeWolf)

            obsIDsForWolf = wolvesIDForWolfObserve + sheepsIDForWolfObserve + blocksIDForWolfObserve
            initObsForWolfParams = observeWolf(reset()[obsIDsForWolf])
            obsShapeWolf = [
                initObsForWolfParams[obsID].shape[0]
                for obsID in range(len(initObsForWolfParams))
            ]
            buildWolfModels = BuildMADDPGModels(actionDim, numAgentInWe + 1,
                                                obsShapeWolf)
            layerWidthForWolf = [
                64 * (numAgentInWe - 1), 64 * (numAgentInWe - 1)
            ]
            wolfModelsList = [
                buildWolfModels(layerWidthForWolf, agentID)
                for agentID in range(numAgentInWe)
            ]

            if wolfType == 'sharedAgencyByIndividualRewardWolf':
                wolfPrefix = 'maddpgIndividWolf'
            if wolfType == 'sharedAgencyBySharedRewardWolf':
                wolfPrefix = 'maddpg'
            wolfFileName = "{}wolves{}sheep{}blocks{}eps_agent".format(
                numAgentInWe, 1, numBlocksForWe, maxEpisode)
            wolfModelPaths = [
                os.path.join(dirName, '..', '..', 'data', 'preTrainModel',
                             wolfPrefix + wolfFileName + str(i) + '60000eps')
                for i in range(numAgentInWe)
            ]
            print(numAgentInWe, obsShapeWolf, wolfModelPaths)

            [
                restoreVariables(model, path)
                for model, path in zip(wolfModelsList, wolfModelPaths)
            ]
            weModelsListBaseOnNumInWe.append(wolfModelsList)

        actionDimReshaped = 2
        cov = [deviationFor2DAction**2 for _ in range(actionDimReshaped)]
        buildGaussian = BuildGaussianFixCov(cov)
        actOneStepOneModelWolf = ActOneStep(actByPolicyTrainNoNoisy)
        #actOneStepOneModelWolf = ActOneStep(actByPolicyTrainNoisy)
        composeCentralControlPolicy = lambda observe: ComposeCentralControlPolicyByGaussianOnDeterministicAction(
            reshapeAction, observe, actOneStepOneModelWolf, buildGaussian)
        wolvesCentralControlPolicies = [
            composeCentralControlPolicy(
                observeListBaseOnNumInWe[numAgentsInWe - 2])(
                    weModelsListBaseOnNumInWe[numAgentsInWe - 2],
                    numAgentsInWe)
            for numAgentsInWe in range(2, numWolves + 1)
        ]

        centralControlPolicyListBasedOnNumAgentsInWe = wolvesCentralControlPolicies  # 0 for two agents in We, 1 for three agents...
        softPolicyInInference = lambda distribution: distribution
        getStateThirdPersonPerspective = lambda state, goalId, weIds: getStateOrActionThirdPersonPerspective(
            state, goalId, weIds, blocksID)
        policyForCommittedAgentsInInference = PolicyForCommittedAgent(
            centralControlPolicyListBasedOnNumAgentsInWe,
            softPolicyInInference, getStateThirdPersonPerspective)
        concernedAgentsIds = possibleWolvesIds
        calCommittedAgentsPolicyLikelihood = CalCommittedAgentsContinuousPolicyLikelihood(
            concernedAgentsIds, policyForCommittedAgentsInInference,
            rationalityBetaInInference)

        randomActionSpace = [(5, 0), (3.5, 3.5), (0, 5), (-3.5, 3.5), (-5, 0),
                             (-3.5, -3.5), (0, -5), (3.5, -3.5), (0, 0)]
        randomPolicy = RandomPolicy(randomActionSpace)
        getStateFirstPersonPerspective = lambda state, goalId, weIds, selfId: getStateOrActionFirstPersonPerspective(
            state, goalId, weIds, selfId, blocksID)
        policyForUncommittedAgentsInInference = PolicyForUncommittedAgent(
            possibleWolvesIds, randomPolicy, softPolicyInInference,
            getStateFirstPersonPerspective)
        calUncommittedAgentsPolicyLikelihood = CalUncommittedAgentsPolicyLikelihood(
            possibleWolvesIds, concernedAgentsIds,
            policyForUncommittedAgentsInInference)

        # Joint Likelihood
        calJointLikelihood = lambda intention, state, perceivedAction: calCommittedAgentsPolicyLikelihood(intention, state, perceivedAction) * \
                calUncommittedAgentsPolicyLikelihood(intention, state, perceivedAction)

        # Infer and update Intention
        variablesForAllWolves = [
            [intentionSpace] for intentionSpace in intentionSpacesForAllWolves
        ]
        jointHypothesisSpaces = [
            pd.MultiIndex.from_product(variables, names=['intention'])
            for variables in variablesForAllWolves
        ]
        concernedHypothesisVariable = ['intention']
        priorDecayRate = 1
        softPrior = SoftDistribution(priorDecayRate)
        inferIntentionOneStepList = [
            InferOneStep(jointHypothesisSpace, concernedHypothesisVariable,
                         calJointLikelihood, softPrior)
            for jointHypothesisSpace in jointHypothesisSpaces
        ]

        if numSheep == 1:
            inferIntentionOneStepList = [lambda prior, state, action: prior
                                         ] * 3

        adjustIntentionPriorGivenValueOfState = lambda state: 1
        chooseIntention = sampleFromDistribution
        updateIntentions = [
            UpdateIntention(intentionPrior, valuePriorEndTime,
                            adjustIntentionPriorGivenValueOfState,
                            perceptAction, inferIntentionOneStep,
                            chooseIntention)
            for intentionPrior, inferIntentionOneStep in zip(
                wolvesIntentionPriors, inferIntentionOneStepList)
        ]

        # reset intention and adjuste intention prior attributes tools for multiple trajectory
        intentionResetAttributes = [
            'timeStep', 'lastState', 'lastAction', 'intentionPrior',
            'formerIntentionPriors'
        ]
        intentionResetAttributeValues = [
            dict(
                zip(intentionResetAttributes,
                    [0, None, None, intentionPrior, [intentionPrior]]))
            for intentionPrior in wolvesIntentionPriors
        ]
        resetIntentions = ResetObjects(intentionResetAttributeValues,
                                       updateIntentions)
        returnAttributes = ['formerIntentionPriors']
        getIntentionDistributions = GetObjectsValuesOfAttributes(
            returnAttributes, updateIntentions)
        attributesToRecord = ['lastAction']
        recordActionForUpdateIntention = RecordValuesForObjects(
            attributesToRecord, updateIntentions)

        # Wovels Generate Action
        covForPlanning = [0.03**2 for _ in range(actionDimReshaped)]
        buildGaussianForPlanning = BuildGaussianFixCov(covForPlanning)
        composeCentralControlPolicyForPlanning = lambda observe: ComposeCentralControlPolicyByGaussianOnDeterministicAction(
            reshapeAction, observe, actOneStepOneModelWolf,
            buildGaussianForPlanning)
        wolvesCentralControlPoliciesForPlanning = [
            composeCentralControlPolicyForPlanning(
                observeListBaseOnNumInWe[numAgentsInWe - 2])(
                    weModelsListBaseOnNumInWe[numAgentsInWe - 2],
                    numAgentsInWe)
            for numAgentsInWe in range(2, numWolves + 1)
        ]

        centralControlPolicyListBasedOnNumAgentsInWeForPlanning = wolvesCentralControlPoliciesForPlanning  # 0 for two agents in We, 1 for three agents...
        softPolicyInPlanning = lambda distribution: distribution
        policyForCommittedAgentInPlanning = PolicyForCommittedAgent(
            centralControlPolicyListBasedOnNumAgentsInWeForPlanning,
            softPolicyInPlanning, getStateThirdPersonPerspective)

        policyForUncommittedAgentInPlanning = PolicyForUncommittedAgent(
            possibleWolvesIds, randomPolicy, softPolicyInPlanning,
            getStateFirstPersonPerspective)

        def wolfChooseActionMethod(individualContinuousDistributions):
            centralControlAction = tuple([
                tuple(sampleFromContinuousSpace(distribution))
                for distribution in individualContinuousDistributions
            ])
            return centralControlAction

        getSelfActionThirdPersonPerspective = lambda weIds, selfId: list(
            weIds).index(selfId)
        chooseCommittedAction = GetActionFromJointActionDistribution(
            wolfChooseActionMethod, getSelfActionThirdPersonPerspective)
        chooseUncommittedAction = sampleFromDistribution
        wolvesSampleIndividualActionGivenIntentionList = [
            SampleIndividualActionGivenIntention(
                selfId, policyForCommittedAgentInPlanning,
                policyForUncommittedAgentInPlanning, chooseCommittedAction,
                chooseUncommittedAction) for selfId in possibleWolvesIds
        ]

        # Sample and Save Trajectory
        trajectoriesWithIntentionDists = []
        for trajectoryId in range(self.numTrajectories):
            sheepModelsForPolicy = [
                sheepModelListOfDiffWolfReward[np.random.choice(
                    numAllSheepModels)] for sheepId in possibleSheepIds
            ]
            if sheepConcernSelfOnly:
                composeSheepPolicy = lambda sheepModel: lambda state: {
                    tuple(
                        reshapeAction(
                            actOneStepOneModelSheep(sheepModel,
                                                    observeSheep(state)))):
                    1
                }
                sheepChooseActionMethod = sampleFromDistribution
                sheepSampleActions = [
                    SampleActionOnFixedIntention(
                        selfId, possibleWolvesIds,
                        composeSheepPolicy(sheepModel),
                        sheepChooseActionMethod, blocksID) for selfId,
                    sheepModel in zip(possibleSheepIds, sheepModelsForPolicy)
                ]
            else:
                composeSheepPolicy = lambda sheepModel: lambda state: tuple(
                    reshapeAction(
                        actOneStepOneModelSheep(sheepModel, observeSheep(state)
                                                )))
                sheepSampleActions = [
                    composeSheepPolicy(sheepModel)
                    for sheepModel in sheepModelsForPolicy
                ]

            wolvesSampleActions = [
                SampleActionOnChangableIntention(
                    updateIntention,
                    wolvesSampleIndividualActionGivenIntention)
                for updateIntention, wolvesSampleIndividualActionGivenIntention
                in zip(updateIntentions,
                       wolvesSampleIndividualActionGivenIntentionList)
            ]
            allIndividualSampleActions = wolvesSampleActions + sheepSampleActions
            sampleActionMultiAgent = SampleActionMultiagent(
                allIndividualSampleActions, recordActionForUpdateIntention)
            trajectory = sampleTrajectory(sampleActionMultiAgent)
            intentionDistributions = getIntentionDistributions()
            trajectoryWithIntentionDists = [
                tuple(list(SASRPair) + list(intentionDist)) for SASRPair,
                intentionDist in zip(trajectory, intentionDistributions)
            ]
            trajectoriesWithIntentionDists.append(
                tuple(trajectoryWithIntentionDists))
            resetIntentions()
            #print(intentionDistributions)
        trajectoryFixedParameters = {'maxRunningSteps': maxRunningSteps}
        self.saveTrajectoryByParameters(trajectoriesWithIntentionDists,
                                        trajectoryFixedParameters, parameters)
        print(np.mean([len(tra) for tra in trajectoriesWithIntentionDists]))

        # visualize
        if visualizeTraj:
            wolfColor = np.array([0.85, 0.35, 0.35])
            sheepColor = np.array([0.35, 0.85, 0.35])
            blockColor = np.array([0.25, 0.25, 0.25])
            entitiesColorList = [wolfColor] * numWolves + [
                sheepColor
            ] * numSheep + [blockColor] * numBlocks
            render = Render(entitiesSizeList, entitiesColorList, numAgents,
                            getPosFromAgentState)
            trajToRender = np.concatenate(trajectoriesWithIntentionDists)
            render(trajToRender)