def __call__(self, parameters): print(parameters) numWolves = parameters['numWolves'] numSheep = parameters['numSheep'] softParameterInInference = parameters['inferenceSoft'] softParameterInPlanning = parameters['wolfPolicySoft'] otherCompeteRate = parameters['otherCompeteRate'] competeDetectionRate = parameters['competeDetectionRate'] ## MDP Env # state is all multi agent state # action is all multi agent action xBoundary = [0, 600] yBoundary = [0, 600] numOfAgent = numWolves + numSheep reset = Reset(xBoundary, yBoundary, numOfAgent) possibleSheepIds = list(range(numSheep)) possibleWolvesIds = list(range(numSheep, numSheep + numWolves)) getSheepStatesFromAll = lambda state: np.array(state)[possibleSheepIds] getWolvesStatesFromAll = lambda state: np.array(state)[ possibleWolvesIds] killzoneRadius = 50 isTerminal = IsTerminal(killzoneRadius, getSheepStatesFromAll, getWolvesStatesFromAll) stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity( xBoundary, yBoundary) interpolateOneFrame = InterpolateOneFrame( stayInBoundaryByReflectVelocity) numFramesToInterpolate = 3 transit = TransitWithTerminalCheckOfInterpolation( numFramesToInterpolate, interpolateOneFrame, isTerminal) maxRunningSteps = 61 timeCost = 1 / maxRunningSteps terminalBonus = 1 rewardFunction = RewardFunctionByTerminal(timeCost, terminalBonus, isTerminal) forwardOneStep = ForwardOneStep(transit, rewardFunction) sampleTrajectory = SampleTrajectory(maxRunningSteps, isTerminal, reset, forwardOneStep) ## MDP Policy # Sheep Part # Sheep Policy Function numSheepPolicyStateSpace = 2 * (numWolves + 1) sheepActionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7), (0, -10), (7, -7), (0, 0)] preyPowerRatio = 12 sheepIndividualActionSpace = list( map(tuple, np.array(sheepActionSpace) * preyPowerRatio)) numSheepActionSpace = len(sheepIndividualActionSpace) regularizationFactor = 1e-4 generateSheepModel = GenerateModel(numSheepPolicyStateSpace, numSheepActionSpace, regularizationFactor) sharedWidths = [128] actionLayerWidths = [128] valueLayerWidths = [128] sheepNNDepth = 9 resBlockSize = 2 dropoutRate = 0.0 initializationMethod = 'uniform' initSheepModel = generateSheepModel(sharedWidths * sheepNNDepth, actionLayerWidths, valueLayerWidths, resBlockSize, initializationMethod, dropoutRate) sheepModelPath = os.path.join( '..', '..', 'data', 'preTrainModel', 'agentId=0.' + str(numWolves) + '_depth=9_learningRate=0.0001_maxRunningSteps=50_miniBatchSize=256_numSimulations=110_trainSteps=50000' ) sheepNNModel = restoreVariables(initSheepModel, sheepModelPath) sheepPolicy = ApproximatePolicy(sheepNNModel, sheepIndividualActionSpace) # Sheep Generate Action softParameterInPlanningForSheep = 2.0 softPolicyInPlanningForSheep = SoftDistribution( softParameterInPlanningForSheep) softenSheepPolicy = lambda relativeAgentsStatesForSheepPolicy: softPolicyInPlanningForSheep( sheepPolicy(relativeAgentsStatesForSheepPolicy)) sheepChooseActionMethod = sampleFromDistribution sheepSampleActions = [ SampleActionOnFixedIntention(selfId, possibleWolvesIds, softenSheepPolicy, sheepChooseActionMethod) for selfId in possibleSheepIds ] # Wolves Part # Percept Action For Inference perceptAction = lambda action: action # Policy Likelihood function: Wolf Centrol Control NN Policy Given Intention numWolvesStateSpaces = [ 2 * (numInWe + 1) for numInWe in range(2, numWolves + 1) ] actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7), (0, -10), (7, -7)] predatorPowerRatio = 8 wolfIndividualActionSpace = list( map(tuple, np.array(actionSpace) * predatorPowerRatio)) wolvesCentralControlActionSpaces = [ list(it.product(wolfIndividualActionSpace, repeat=numInWe)) for numInWe in range(2, numWolves + 1) ] numWolvesCentralControlActionSpaces = [ len(wolvesCentralControlActionSpace) for wolvesCentralControlActionSpace in wolvesCentralControlActionSpaces ] regularizationFactor = 1e-4 generateWolvesCentralControlModels = [ GenerateModel(numStateSpace, numActionSpace, regularizationFactor) for numStateSpace, numActionSpace in zip( numWolvesStateSpaces, numWolvesCentralControlActionSpaces) ] sharedWidths = [128] actionLayerWidths = [128] valueLayerWidths = [128] wolfNNDepth = 9 resBlockSize = 2 dropoutRate = 0.0 initializationMethod = 'uniform' initWolvesCentralControlModels = [ generateWolvesCentralControlModel(sharedWidths * wolfNNDepth, actionLayerWidths, valueLayerWidths, resBlockSize, initializationMethod, dropoutRate) for generateWolvesCentralControlModel in generateWolvesCentralControlModels ] NNNumSimulations = 250 wolvesModelPaths = [ os.path.join( '..', '..', 'data', 'preTrainModel', 'agentId=' + str(8 * np.sum([10**_ for _ in range(numInWe)])) + '_depth=9_learningRate=0.0001_maxRunningSteps=50_miniBatchSize=256_numSimulations=' + str(NNNumSimulations) + '_trainSteps=50000') for numInWe in range(2, numWolves + 1) ] print(wolvesModelPaths) wolvesCentralControlNNModels = [ restoreVariables(initWolvesCentralControlModel, wolvesModelPath) for initWolvesCentralControlModel, wolvesModelPath in zip( initWolvesCentralControlModels, wolvesModelPaths) ] wolvesCentralControlPolicies = [ ApproximatePolicy(NNModel, actionSpace) for NNModel, actionSpace in zip(wolvesCentralControlNNModels, wolvesCentralControlActionSpaces) ] centralControlPolicyListBasedOnNumAgentsInWe = wolvesCentralControlPolicies # 0 for two agents in We, 1 for three agents... softPolicyInInference = SoftDistribution(softParameterInInference) policyForCommittedAgentsInInference = PolicyForCommittedAgent( centralControlPolicyListBasedOnNumAgentsInWe, softPolicyInInference, getStateOrActionThirdPersonPerspective) concernedAgentsIds = [2] calCommittedAgentsPolicyLikelihood = CalCommittedAgentsPolicyLikelihood( concernedAgentsIds, policyForCommittedAgentsInInference) getGoalStateForIndividualHeatseeking = lambda statesRelative: np.array( statesRelative)[0] getSelfStateForIndividualHeatseeking = lambda statesRelative: np.array( statesRelative)[1] heatseekingPrecesion = 1.83 heatSeekingDiscreteStochasticPolicy = HeatSeekingDiscreteStochasticPolicy( heatseekingPrecesion, wolfIndividualActionSpace, getSelfStateForIndividualHeatseeking, getGoalStateForIndividualHeatseeking) policyForUncommittedAgentsInInference = PolicyForUncommittedAgent( possibleWolvesIds, heatSeekingDiscreteStochasticPolicy, softPolicyInInference, getStateOrActionFirstPersonPerspective) calUncommittedAgentsPolicyLikelihood = CalUncommittedAgentsPolicyLikelihood( possibleWolvesIds, concernedAgentsIds, policyForUncommittedAgentsInInference) # Joint Likelihood calJointLikelihood = lambda intention, state, perceivedAction: calCommittedAgentsPolicyLikelihood(intention, state, perceivedAction) * \ calUncommittedAgentsPolicyLikelihood(intention, state, perceivedAction) wolvesValueListBasedOnNumAgentsInWe = [ ApproximateValue(NNModel) for NNModel in wolvesCentralControlNNModels ] calIntentionValueGivenState = CalIntentionValueGivenState( wolvesValueListBasedOnNumAgentsInWe) softParamterForValue = 0.01 softValueToBuildDistribution = SoftMax(softParamterForValue) adjustIntentionPriorGivenValueOfState = AdjustIntentionPriorGivenValueOfState( calIntentionValueGivenState, softValueToBuildDistribution) # Sample and Save Trajectory trajectoriesWithIntentionDists = [] for trajectoryId in range(self.numTrajectories): # Intention Prior For inference otherWolfPossibleIntentionSpaces = {0: [(0, (1, 2))], 1: [(0, ())]} otherIntentionType = np.random.choice( [1, 0], p=[otherCompeteRate, 1 - otherCompeteRate]) otherWolfIntentionSpace = otherWolfPossibleIntentionSpaces[ otherIntentionType] selfPossibleIntentionSpaces = { 0: [(0, (1, 2))], 0.5: [(0, (1, 2)), (0, ())], 1: [(0, ())] } selfWolfIntentionSpace = selfPossibleIntentionSpaces[ competeDetectionRate] intentionSpacesForAllWolves = [ selfWolfIntentionSpace, otherWolfIntentionSpace ] wolvesIntentionPriors = [{ tuple(intention): 1 / len(allPossibleIntentionsOneWolf) for intention in allPossibleIntentionsOneWolf } for allPossibleIntentionsOneWolf in intentionSpacesForAllWolves] # Infer and update Intention variablesForAllWolves = [[ intentionSpace ] for intentionSpace in intentionSpacesForAllWolves] jointHypothesisSpaces = [ pd.MultiIndex.from_product(variables, names=['intention']) for variables in variablesForAllWolves ] concernedHypothesisVariable = ['intention'] priorDecayRate = 1 softPrior = SoftDistribution(priorDecayRate) inferIntentionOneStepList = [ InferOneStep(jointHypothesisSpace, concernedHypothesisVariable, calJointLikelihood, softPrior) for jointHypothesisSpace in jointHypothesisSpaces ] chooseIntention = sampleFromDistribution valuePriorEndTime = -100 updateIntentions = [ UpdateIntention(intentionPrior, valuePriorEndTime, adjustIntentionPriorGivenValueOfState, perceptAction, inferIntentionOneStep, chooseIntention) for intentionPrior, inferIntentionOneStep in zip( wolvesIntentionPriors, inferIntentionOneStepList) ] # reset intention and adjuste intention prior attributes tools for multiple trajectory intentionResetAttributes = [ 'timeStep', 'lastState', 'lastAction', 'intentionPrior', 'formerIntentionPriors' ] intentionResetAttributeValues = [ dict( zip(intentionResetAttributes, [0, None, None, intentionPrior, [intentionPrior]])) for intentionPrior in wolvesIntentionPriors ] resetIntentions = ResetObjects(intentionResetAttributeValues, updateIntentions) returnAttributes = ['formerIntentionPriors'] getIntentionDistributions = GetObjectsValuesOfAttributes( returnAttributes, updateIntentions) attributesToRecord = ['lastAction'] recordActionForUpdateIntention = RecordValuesForObjects( attributesToRecord, updateIntentions) # Wovels Generate Action softPolicyInPlanning = SoftDistribution(softParameterInPlanning) policyForCommittedAgentInPlanning = PolicyForCommittedAgent( centralControlPolicyListBasedOnNumAgentsInWe, softPolicyInPlanning, getStateOrActionThirdPersonPerspective) policyForUncommittedAgentInPlanning = PolicyForUncommittedAgent( possibleWolvesIds, heatSeekingDiscreteStochasticPolicy, softPolicyInPlanning, getStateOrActionFirstPersonPerspective) wolfChooseActionMethod = sampleFromDistribution getSelfActionThirdPersonPerspective = lambda weIds, selfId: list( weIds).index(selfId) chooseCommittedAction = GetActionFromJointActionDistribution( wolfChooseActionMethod, getSelfActionThirdPersonPerspective) chooseUncommittedAction = sampleFromDistribution wolvesSampleIndividualActionGivenIntentionList = [ SampleIndividualActionGivenIntention( selfId, policyForCommittedAgentInPlanning, policyForUncommittedAgentInPlanning, chooseCommittedAction, chooseUncommittedAction) for selfId in possibleWolvesIds ] wolvesSampleActions = [ SampleActionOnChangableIntention( updateIntention, wolvesSampleIndividualActionGivenIntention) for updateIntention, wolvesSampleIndividualActionGivenIntention in zip(updateIntentions, wolvesSampleIndividualActionGivenIntentionList) ] allIndividualSampleActions = sheepSampleActions + wolvesSampleActions sampleActionMultiAgent = SampleActionMultiagent( allIndividualSampleActions, recordActionForUpdateIntention) trajectory = sampleTrajectory(sampleActionMultiAgent) intentionDistributions = getIntentionDistributions() trajectoryWithIntentionDists = [ tuple(list(SASRPair) + list(intentionDist)) for SASRPair, intentionDist in zip(trajectory, intentionDistributions) ] trajectoriesWithIntentionDists.append( tuple(trajectoryWithIntentionDists)) resetIntentions() #print(intentionDistributions[-1], otherCompeteRate) trajectoryFixedParameters = { 'sheepPolicySoft': softParameterInPlanningForSheep, 'wolfPolicySoft': softParameterInPlanning, 'maxRunningSteps': maxRunningSteps, 'competePolicy': 'heatseeking', 'NNNumSimulations': NNNumSimulations, 'heatseekingPrecesion': heatseekingPrecesion } self.saveTrajectoryByParameters(trajectoriesWithIntentionDists, trajectoryFixedParameters, parameters) print(np.mean([len(tra) for tra in trajectoriesWithIntentionDists]))
def __call__(self, parameters): print(parameters) valuePriorEndTime = -100 deviationFor2DAction = 1.0 rationalityBetaInInference = 1.0 numWolves = parameters['numWolves'] numSheep = parameters['numSheep'] wolfType = parameters['wolfType'] wolfSelfish = 0.0 if wolfType == 'sharedAgencyBySharedRewardWolf' else 1.0 perturbedWolfID = parameters['perturbedWolfID'] perturbedWolfGoalID = parameters['perturbedWolfGoalID'] ## MDP Env numBlocks = 2 numAgents = numWolves + numSheep numEntities = numAgents + numBlocks wolvesID = list(range(numWolves)) sheepsID = list(range(numWolves, numWolves + numSheep)) blocksID = list(range(numAgents, numEntities)) sheepSize = 0.05 wolfSize = 0.075 blockSize = 0.2 entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheep + [blockSize] * numBlocks costActionRatio = 0.0 sheepSpeedMultiplier = 1.0 sheepMaxSpeed = 1.3 * sheepSpeedMultiplier wolfMaxSpeed = 1.0 blockMaxSpeed = None entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [sheepMaxSpeed] * numSheep + [blockMaxSpeed] * numBlocks entitiesMovableList = [True] * numAgents + [False] * numBlocks massList = [1.0] * numEntities collisionReward = 1 # for evaluation, count # of bites isCollision = IsCollision(getPosFromAgentState) rewardAllWolves = RewardWolf(wolvesID, sheepsID, entitiesSizeList, isCollision, collisionReward, wolfSelfish) rewardWolf = lambda state, action, nextState: np.sum(rewardAllWolves(state, action, nextState)) reshapeActionInTransit = lambda action: action getCollisionForce = GetCollisionForce() applyActionForce = ApplyActionForce(wolvesID, sheepsID, entitiesMovableList) applyEnvironForce = ApplyEnvironForce(numEntities, entitiesMovableList, entitiesSizeList, getCollisionForce, getPosFromAgentState) integrateState = IntegrateState(numEntities, entitiesMovableList, massList, entityMaxSpeedList, getVelFromAgentState, getPosFromAgentState) transit = TransitMultiAgentChasing(numEntities, reshapeActionInTransit, applyActionForce, applyEnvironForce, integrateState) forwardOneStep = ForwardOneStep(transit, rewardWolf) reset = ResetMultiAgentChasingWithSeed(numAgents, numBlocks) isTerminal = lambda state: False maxRunningStepsToSample = 101 sampleTrajectory = SampleTrajectory(maxRunningStepsToSample, isTerminal, reset, forwardOneStep) ## MDP Policy worldDim = 2 actionDim = worldDim * 2 + 1 layerWidth = [128, 128] maxTimeStep = 75 maxEpisode = 60000 dirName = os.path.dirname(__file__) # ------------ sheep recover variables ------------------------ numSheepToObserve = 1 sheepModelListOfDiffWolfReward = [] sheepTypeList = [0.0, 1.0] for sheepType in sheepTypeList: wolvesIDForSheepObserve = list(range(numWolves)) sheepsIDForSheepObserve = list(range(numWolves, numSheepToObserve + numWolves)) blocksIDForSheepObserve = list( range(numSheepToObserve + numWolves, numSheepToObserve + numWolves + numBlocks)) observeOneAgentForSheep = lambda agentID: Observe(agentID, wolvesIDForSheepObserve, sheepsIDForSheepObserve, blocksIDForSheepObserve, getPosFromAgentState, getVelFromAgentState) observeSheep = lambda state: [observeOneAgentForSheep(agentID)(state) for agentID in range(numWolves + numSheepToObserve)] obsIDsForSheep = wolvesIDForSheepObserve + sheepsIDForSheepObserve + blocksIDForSheepObserve initObsForSheepParams = observeSheep(reset()[obsIDsForSheep]) obsShapeSheep = [initObsForSheepParams[obsID].shape[0] for obsID in range(len(initObsForSheepParams))] buildSheepModels = BuildMADDPGModels(actionDim, numWolves + numSheepToObserve, obsShapeSheep) sheepModelsList = [buildSheepModels(layerWidth, agentID) for agentID in range(numWolves, numWolves + numSheepToObserve)] sheepFileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}WolfActCost{}individ{}_agent".format( numWolves, numSheepToObserve, numBlocks, maxEpisode, maxTimeStep, sheepSpeedMultiplier, costActionRatio, sheepType) sheepModelPaths = [os.path.join(dirName, '..', '..', 'data', 'preTrainModel', sheepFileName + str(i)) for i in range(numWolves, numWolves + numSheepToObserve)] [restoreVariables(model, path) for model, path in zip(sheepModelsList, sheepModelPaths)] sheepModelListOfDiffWolfReward = sheepModelListOfDiffWolfReward + sheepModelsList actOneStep = ActOneStep(actByPolicyTrainNoNoisy) numAllSheepModels = len(sheepModelListOfDiffWolfReward) # ------------ recover variables for "we" ------------------------ numAgentsInWe = numWolves numSheepInWe = 1 numBlocksForWe = numBlocks wolvesIDForWolfObserve = list(range(numAgentsInWe)) sheepsIDForWolfObserve = list(range(numAgentsInWe, numSheepInWe + numAgentsInWe)) blocksIDForWolfObserve = list( range(numSheepInWe + numAgentsInWe, numSheepInWe + numAgentsInWe + numBlocksForWe)) observeOneAgentForWolf = lambda agentID: Observe(agentID, wolvesIDForWolfObserve, sheepsIDForWolfObserve, blocksIDForWolfObserve, getPosFromAgentState, getVelFromAgentState) observeWolf = lambda state: [observeOneAgentForWolf(agentID)(state) for agentID in range(numAgentsInWe + numSheepInWe)] obsIDsForWolf = wolvesIDForWolfObserve + sheepsIDForWolfObserve + blocksIDForWolfObserve initObsForWolfParams = observeWolf(reset()[obsIDsForWolf]) obsShapeWolf = [initObsForWolfParams[obsID].shape[0] for obsID in range(len(initObsForWolfParams))] buildWolfModels = BuildMADDPGModels(actionDim, numAgentsInWe + numSheepInWe, obsShapeWolf) layerWidthForWolf = [128, 128] wolfModelsList = [buildWolfModels(layerWidthForWolf, agentID) for agentID in range(numAgentsInWe)] wolfFileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}WolfActCost{}individ{}_agent".format( numWolves, numSheepInWe, numBlocks, maxEpisode, maxTimeStep, sheepSpeedMultiplier, costActionRatio, wolfSelfish) wolfModelPaths = [os.path.join(dirName, '..', '..', 'data', 'preTrainModel', wolfFileName + str(i)) for i in range(numAgentsInWe)] [restoreVariables(model, path) for model, path in zip(wolfModelsList, wolfModelPaths)] # ------------ compose wolves policy no perturbation ------------------------ actionDimReshaped = 2 cov = [deviationFor2DAction ** 2 for _ in range(actionDimReshaped)] # 1 buildGaussian = BuildGaussianFixCov(cov) actOneStep = ActOneStep(actByPolicyTrainNoNoisy) reshapeAction = ReshapeAction() composeCentralControlPolicy = lambda observe: ComposeCentralControlPolicyByGaussianOnDeterministicAction( reshapeAction, observe, actOneStep, buildGaussian) wolvesCentralControlPolicy = [composeCentralControlPolicy(observeWolf)(wolfModelsList, numAgentsInWe)] # input state, return a list of gaussian distributions with cov 1 softPolicyInInference = lambda distribution: distribution getStateThirdPersonPerspective = lambda state, goalId, weIds: getStateOrActionThirdPersonPerspective(state, goalId, weIds, blocksID) # nochange policyForCommittedAgentsInInference = PolicyForCommittedAgent(wolvesCentralControlPolicy, softPolicyInInference, getStateThirdPersonPerspective) # same as wolvesCentralControlPolicy(state) concernedAgentsIds = wolvesID calCommittedAgentsPolicyLikelihood = CalCommittedAgentsContinuousPolicyLikelihood(concernedAgentsIds, policyForCommittedAgentsInInference, rationalityBetaInInference) randomActionSpace = [(5, 0), (3.5, 3.5), (0, 5), (-3.5, 3.5), (-5, 0), (-3.5, -3.5), (0, -5), (3.5, -3.5), (0, 0)] randomPolicy = RandomPolicy(randomActionSpace) getStateFirstPersonPerspective = lambda state, goalId, weIds, selfId: getStateOrActionFirstPersonPerspective( state, goalId, weIds, selfId, blocksID) policyForUncommittedAgentsInInference = PolicyForUncommittedAgent(wolvesID, randomPolicy, softPolicyInInference, getStateFirstPersonPerspective) # random policy, returns action distribution calUncommittedAgentsPolicyLikelihood = CalUncommittedAgentsPolicyLikelihood(wolvesID, concernedAgentsIds, policyForUncommittedAgentsInInference) # returns 1 # Joint Likelihood calJointLikelihood = lambda intention, state, perceivedAction: calCommittedAgentsPolicyLikelihood(intention, state, perceivedAction) * \ calUncommittedAgentsPolicyLikelihood(intention, state, perceivedAction) # __* 1 # ------------ wolves intention ------------------------ intentionSpacesForAllWolves = [tuple(it.product(sheepsID, [tuple(wolvesID)])) for wolfId in wolvesID] # <class 'tuple'>: ((3, (0, 1, 2)), (4, (0, 1, 2)), (5, (0, 1, 2)), (6, (0, 1, 2))) print('intentionSpacesForAllWolves', intentionSpacesForAllWolves) wolvesIntentionPriors = [ {tuple(intention): 1 / len(allPossibleIntentionsOneWolf) for intention in allPossibleIntentionsOneWolf} for allPossibleIntentionsOneWolf in intentionSpacesForAllWolves] perceptSelfAction = SampleNoisyAction(deviationFor2DAction) perceptOtherAction = SampleNoisyAction(deviationFor2DAction) perceptAction = PerceptImaginedWeAction(wolvesID, perceptSelfAction, perceptOtherAction) # input self, others action # Infer and update Intention variablesForAllWolves = [[intentionSpace] for intentionSpace in intentionSpacesForAllWolves] jointHypothesisSpaces = [pd.MultiIndex.from_product(variables, names=['intention']) for variables in variablesForAllWolves] concernedHypothesisVariable = ['intention'] priorDecayRate = 1 softPrior = SoftDistribution(priorDecayRate) # no change inferIntentionOneStepList = [InferOneStep(jointHypothesisSpace, concernedHypothesisVariable, calJointLikelihood, softPrior) for jointHypothesisSpace in jointHypothesisSpaces] if numSheep == 1: inferIntentionOneStepList = [lambda prior, state, action: prior] * 3 adjustIntentionPriorGivenValueOfState = lambda state: 1 chooseIntention = sampleFromDistribution updateIntentions = [UpdateIntention(intentionPrior, valuePriorEndTime, adjustIntentionPriorGivenValueOfState, perceptAction, inferIntentionOneStep, chooseIntention) for intentionPrior, inferIntentionOneStep in zip(wolvesIntentionPriors, inferIntentionOneStepList)] # reset intention and adjust intention prior attributes tools for multiple trajectory intentionResetAttributes = ['timeStep', 'lastState', 'lastAction', 'intentionPrior', 'formerIntentionPriors'] intentionResetAttributeValues = [ dict(zip(intentionResetAttributes, [0, None, None, intentionPrior, [intentionPrior]])) for intentionPrior in wolvesIntentionPriors] resetIntentions = ResetObjects(intentionResetAttributeValues, updateIntentions) returnAttributes = ['formerIntentionPriors'] getIntentionDistributions = GetObjectsValuesOfAttributes(returnAttributes, updateIntentions[1:]) attributesToRecord = ['lastAction'] recordActionForUpdateIntention = RecordValuesForObjects(attributesToRecord, updateIntentions) # Wovels Generate Action #TODO covForPlanning = [0.00000001 for _ in range(actionDimReshaped)] # covForPlanning = [0.03 ** 2 for _ in range(actionDimReshaped)] buildGaussianForPlanning = BuildGaussianFixCov(covForPlanning) composeCentralControlPolicyForPlanning = lambda \ observe: ComposeCentralControlPolicyByGaussianOnDeterministicAction(reshapeAction, observe, actOneStep, buildGaussianForPlanning) wolvesCentralControlPoliciesForPlanning = [ composeCentralControlPolicyForPlanning(observeWolf)(wolfModelsList, numAgentsInWe)] centralControlPolicyListBasedOnNumAgentsInWeForPlanning = wolvesCentralControlPoliciesForPlanning # 0 for two agents in We, 1 for three agents... softPolicyInPlanning = lambda distribution: distribution policyForCommittedAgentInPlanning = PolicyForCommittedAgent( centralControlPolicyListBasedOnNumAgentsInWeForPlanning, softPolicyInPlanning, getStateThirdPersonPerspective) policyForUncommittedAgentInPlanning = PolicyForUncommittedAgent(wolvesID, randomPolicy, softPolicyInPlanning, getStateFirstPersonPerspective) def wolfChooseActionMethod(individualContinuousDistributions): centralControlAction = tuple( [tuple(sampleFromContinuousSpace(distribution)) for distribution in individualContinuousDistributions]) return centralControlAction getSelfActionIDInThirdPersonPerspective = lambda weIds, selfId: list(weIds).index(selfId) chooseCommittedAction = GetActionFromJointActionDistribution(wolfChooseActionMethod, getSelfActionIDInThirdPersonPerspective) chooseUncommittedAction = sampleFromDistribution wolvesSampleIndividualActionGivenIntentionList = [ SampleIndividualActionGivenIntention(selfId, policyForCommittedAgentInPlanning, policyForUncommittedAgentInPlanning, chooseCommittedAction, chooseUncommittedAction) for selfId in wolvesID] # ------------------- recover one wolf model that only concerns sheep 0 ------------------- numSheepForPerturbedWolf = 1 wolvesIDForPerturbedWolf = wolvesID sheepsIDForPerturbedWolf = [sheepsID[perturbedWolfGoalID]] blocksIDForPerturbedWolf = list(range(numWolves + numSheep, numEntities)) # skip the unattended sheep id observeOneAgentForPerturbedWolf = lambda agentID: Observe(agentID, wolvesIDForPerturbedWolf, sheepsIDForPerturbedWolf, blocksIDForPerturbedWolf, getPosFromAgentState, getVelFromAgentState) observePerturbedWolf = lambda state: [observeOneAgentForPerturbedWolf(agentID)(state) for agentID in wolvesIDForPerturbedWolf + sheepsIDForPerturbedWolf] initObsForPerturbedWolfParams = observePerturbedWolf(reset()) obsShapePerturbedWolf = [initObsForPerturbedWolfParams[obsID].shape[0] for obsID in range(len(initObsForPerturbedWolfParams))] buildPerturbedWolfModels = BuildMADDPGModels(actionDim, numWolves + numSheepForPerturbedWolf, obsShapePerturbedWolf) layerWidthForWolf = [128, 128] perturbedWolfModel = buildPerturbedWolfModels(layerWidthForWolf, perturbedWolfID) perturbedWolfFileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}WolfActCost{}individ{}_agent".format( numWolves, numSheepForPerturbedWolf, numBlocks, maxEpisode, maxTimeStep, sheepSpeedMultiplier, costActionRatio, wolfSelfish) perturbedWolfModelPath = os.path.join(dirName, '..', '..', 'data', 'preTrainModel', perturbedWolfFileName + str(perturbedWolfID)) restoreVariables(perturbedWolfModel, perturbedWolfModelPath) # ------------------- Sample and Save Trajectory ------------------- wolvesSampleActions = [ SampleActionOnChangableIntention(updateIntention, wolvesSampleIndividualActionGivenIntention) for updateIntention, wolvesSampleIndividualActionGivenIntention in zip(updateIntentions, wolvesSampleIndividualActionGivenIntentionList)] perturbedWolfSampleActions = lambda state: tuple(reshapeAction(actOneStep(perturbedWolfModel, observePerturbedWolf(state)))) wolvesSampleActionsPerturbed = wolvesSampleActions#.copy() wolvesSampleActionsPerturbed[perturbedWolfID] = perturbedWolfSampleActions trajectoriesWithIntentionDists = [] for trajectoryId in range(self.numTrajectories): sheepModelsForPolicy = [sheepModelListOfDiffWolfReward[np.random.choice(numAllSheepModels)] for sheepId in sheepsID] composeSheepPolicy = lambda sheepModel: lambda state: { tuple(reshapeAction(actOneStep(sheepModel, observeSheep(state)))): 1} sheepChooseActionMethod = sampleFromDistribution sheepSampleActions = [SampleActionOnFixedIntention(selfId, wolvesID, composeSheepPolicy(sheepModel), sheepChooseActionMethod, blocksID) for selfId, sheepModel in zip(sheepsID, sheepModelsForPolicy)] allIndividualSampleActions = wolvesSampleActions + sheepSampleActions sampleActionMultiAgent = SampleActionMultiagent(allIndividualSampleActions, recordActionForUpdateIntention) allIndividualSampleActionsPerturbed = wolvesSampleActionsPerturbed + sheepSampleActions sampleActionMultiAgentPerturbed = SampleActionMultiagent(allIndividualSampleActionsPerturbed, recordActionForUpdateIntention) # trajectory = sampleTrajectory(sampleActionMultiAgentPerturbed) trajectory = sampleTrajectory(sampleActionMultiAgentPerturbed) intentionDistributions = getIntentionDistributions() trajectoryWithIntentionDists = [tuple(list(SASRPair) + list(intentionDist)) for SASRPair, intentionDist in zip(trajectory, intentionDistributions)] trajectoriesWithIntentionDists.append(tuple(trajectoryWithIntentionDists)) # trajectoriesWithIntentionDists.append(trajectory) resetIntentions() trajectoryFixedParameters = {'maxRunningStepsToSample': maxRunningStepsToSample} self.saveTrajectoryByParameters(trajectoriesWithIntentionDists, trajectoryFixedParameters, parameters)
def __call__(self, parameters): print(parameters) visualizeTraj = False numWolves = parameters['numWolves'] numSheep = parameters['numSheep'] softParamterForValue = parameters['valuePriorSoftMaxBeta'] valuePriorEndTime = parameters['valuePriorEndTime'] deviationFor2DAction = parameters['deviationFor2DAction'] rationalityBetaInInference = parameters['rationalityBetaInInference'] wolfType = parameters['wolfType'] sheepConcern = parameters['sheepConcern'] print(rationalityBetaInInference) ## MDP Env # state is all multi agent state # action is all multi agent action wolvesID = list(range(numWolves)) sheepsID = list(range(numWolves, numWolves + numSheep)) possibleWolvesIds = wolvesID possibleSheepIds = sheepsID numAgents = numWolves + numSheep numBlocks = 5 - numWolves blocksID = list(range(numAgents, numAgents + numBlocks)) numEntities = numAgents + numBlocks sheepSize = 0.05 wolfSize = 0.075 blockSize = 0.2 sheepMaxSpeed = 1.3 * 1 wolfMaxSpeed = 1.0 * 1 blockMaxSpeed = None entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheep + [ blockSize ] * numBlocks entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [ sheepMaxSpeed ] * numSheep + [blockMaxSpeed] * numBlocks entitiesMovableList = [True] * numAgents + [False] * numBlocks massList = [1.0] * numEntities reshapeActionInTransit = lambda action: action getCollisionForce = GetCollisionForce() applyActionForce = ApplyActionForce(wolvesID, sheepsID, entitiesMovableList) applyEnvironForce = ApplyEnvironForce(numEntities, entitiesMovableList, entitiesSizeList, getCollisionForce, getPosFromAgentState) integrateState = IntegrateState(numEntities, entitiesMovableList, massList, entityMaxSpeedList, getVelFromAgentState, getPosFromAgentState) transit = TransitMultiAgentChasing(numEntities, reshapeActionInTransit, applyActionForce, applyEnvironForce, integrateState) isCollision = IsCollision(getPosFromAgentState) collisonRewardWolf = 1 punishForOutOfBoundForWolf = lambda stata: 0 rewardWolf = RewardCentralControlPunishBond( wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBoundForWolf, collisonRewardWolf) collisonRewardSheep = -1 punishForOutOfBoundForSheep = PunishForOutOfBound() rewardSheep = RewardCentralControlPunishBond( sheepsID, wolvesID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBoundForSheep, collisonRewardSheep) forwardOneStep = ForwardOneStep(transit, rewardWolf) reset = ResetMultiAgentChasing(numAgents, numBlocks) isTerminal = lambda state: False maxRunningSteps = 101 sampleTrajectory = SampleTrajectory(maxRunningSteps, isTerminal, reset, forwardOneStep) ## MDP Policy worldDim = 2 actionDim = worldDim * 2 + 1 layerWidth = [64 * (numWolves - 1), 64 * (numWolves - 1)] # Sheep Part # ------------ model ------------------------ if sheepConcern == 'selfSheep': sheepConcernSelfOnly = 1 if sheepConcern == 'allSheep': sheepConcernSelfOnly = 0 numSheepToObserveWhenSheepSameOrDiff = [numSheep, 1] numSheepToObserve = numSheepToObserveWhenSheepSameOrDiff[ sheepConcernSelfOnly] print(numSheepToObserve) sheepModelListOfDiffWolfReward = [] sheepType = 'mixed' if sheepType == 'mixed': sheepPrefixList = ['maddpgIndividWolf', 'maddpg'] else: sheepPrefixList = [sheepType] for sheepPrefix in sheepPrefixList: wolvesIDForSheepObserve = list(range(numWolves)) sheepsIDForSheepObserve = list( range(numWolves, numSheepToObserve + numWolves)) blocksIDForSheepObserve = list( range(numSheepToObserve + numWolves, numSheepToObserve + numWolves + numBlocks)) observeOneAgentForSheep = lambda agentID: Observe( agentID, wolvesIDForSheepObserve, sheepsIDForSheepObserve, blocksIDForSheepObserve, getPosFromAgentState, getVelFromAgentState) observeSheep = lambda state: [ observeOneAgentForSheep(agentID)(state) for agentID in range(numWolves + numSheepToObserve) ] obsIDsForSheep = wolvesIDForSheepObserve + sheepsIDForSheepObserve + blocksIDForSheepObserve initObsForSheepParams = observeSheep(reset()[obsIDsForSheep]) obsShapeSheep = [ initObsForSheepParams[obsID].shape[0] for obsID in range(len(initObsForSheepParams)) ] buildSheepModels = BuildMADDPGModels(actionDim, numWolves + numSheepToObserve, obsShapeSheep) sheepModelsList = [ buildSheepModels(layerWidth, agentID) for agentID in range(numWolves, numWolves + numSheepToObserve) ] dirName = os.path.dirname(__file__) maxEpisode = 60000 print(sheepPrefix) sheepFileName = "{}wolves{}sheep{}blocks{}eps_agent".format( numWolves, numSheepToObserve, numBlocks, maxEpisode) sheepModelPaths = [ os.path.join(dirName, '..', '..', 'data', 'preTrainModel', sheepPrefix + sheepFileName + str(i) + '60000eps') for i in range(numWolves, numWolves + numSheepToObserve) ] [ restoreVariables(model, path) for model, path in zip(sheepModelsList, sheepModelPaths) ] sheepModelListOfDiffWolfReward = sheepModelListOfDiffWolfReward + sheepModelsList # Sheep Policy Function reshapeAction = ReshapeAction() actOneStepOneModelSheep = ActOneStep(actByPolicyTrainNoisy) # Sheep Generate Action numAllSheepModels = len(sheepModelListOfDiffWolfReward) # Wolves Part # Intention Prior For inference #createIntentionSpaceGivenSelfId = CreateIntentionSpaceGivenSelfId(possibleSheepIds, possibleWolvesIds) #intentionSpacesForAllWolves = [createAllPossibleIntentionsGivenSelfId(wolfId) # for wolfId in possibleWolvesIds] intentionSpacesForAllWolves = [ tuple(it.product(possibleSheepIds, [tuple(possibleWolvesIds)])) for wolfId in possibleWolvesIds ] print(intentionSpacesForAllWolves) wolvesIntentionPriors = [{ tuple(intention): 1 / len(allPossibleIntentionsOneWolf) for intention in allPossibleIntentionsOneWolf } for allPossibleIntentionsOneWolf in intentionSpacesForAllWolves] # Percept Action For Inference #perceptAction = lambda action: action perceptSelfAction = SampleNoisyAction(deviationFor2DAction) perceptOtherAction = SampleNoisyAction(deviationFor2DAction) perceptAction = PerceptImaginedWeAction(possibleWolvesIds, perceptSelfAction, perceptOtherAction) #perceptAction = lambda action: action # Policy Likelihood function: Wolf Centrol Control NN Policy Given Intention # ------------ model ------------------------ weModelsListBaseOnNumInWe = [] observeListBaseOnNumInWe = [] for numAgentInWe in range(2, numWolves + 1): numBlocksForWe = 5 - numAgentInWe wolvesIDForWolfObserve = list(range(numAgentInWe)) sheepsIDForWolfObserve = list(range(numAgentInWe, 1 + numAgentInWe)) blocksIDForWolfObserve = list( range(1 + numAgentInWe, 1 + numAgentInWe + numBlocksForWe)) observeOneAgentForWolf = lambda agentID: Observe( agentID, wolvesIDForWolfObserve, sheepsIDForWolfObserve, blocksIDForWolfObserve, getPosFromAgentState, getVelFromAgentState) observeWolf = lambda state: [ observeOneAgentForWolf(agentID)(state) for agentID in range(numAgentInWe + 1) ] observeListBaseOnNumInWe.append(observeWolf) obsIDsForWolf = wolvesIDForWolfObserve + sheepsIDForWolfObserve + blocksIDForWolfObserve initObsForWolfParams = observeWolf(reset()[obsIDsForWolf]) obsShapeWolf = [ initObsForWolfParams[obsID].shape[0] for obsID in range(len(initObsForWolfParams)) ] buildWolfModels = BuildMADDPGModels(actionDim, numAgentInWe + 1, obsShapeWolf) layerWidthForWolf = [ 64 * (numAgentInWe - 1), 64 * (numAgentInWe - 1) ] wolfModelsList = [ buildWolfModels(layerWidthForWolf, agentID) for agentID in range(numAgentInWe) ] if wolfType == 'sharedAgencyByIndividualRewardWolf': wolfPrefix = 'maddpgIndividWolf' if wolfType == 'sharedAgencyBySharedRewardWolf': wolfPrefix = 'maddpg' wolfFileName = "{}wolves{}sheep{}blocks{}eps_agent".format( numAgentInWe, 1, numBlocksForWe, maxEpisode) wolfModelPaths = [ os.path.join(dirName, '..', '..', 'data', 'preTrainModel', wolfPrefix + wolfFileName + str(i) + '60000eps') for i in range(numAgentInWe) ] print(numAgentInWe, obsShapeWolf, wolfModelPaths) [ restoreVariables(model, path) for model, path in zip(wolfModelsList, wolfModelPaths) ] weModelsListBaseOnNumInWe.append(wolfModelsList) actionDimReshaped = 2 cov = [deviationFor2DAction**2 for _ in range(actionDimReshaped)] buildGaussian = BuildGaussianFixCov(cov) actOneStepOneModelWolf = ActOneStep(actByPolicyTrainNoNoisy) #actOneStepOneModelWolf = ActOneStep(actByPolicyTrainNoisy) composeCentralControlPolicy = lambda observe: ComposeCentralControlPolicyByGaussianOnDeterministicAction( reshapeAction, observe, actOneStepOneModelWolf, buildGaussian) wolvesCentralControlPolicies = [ composeCentralControlPolicy( observeListBaseOnNumInWe[numAgentsInWe - 2])( weModelsListBaseOnNumInWe[numAgentsInWe - 2], numAgentsInWe) for numAgentsInWe in range(2, numWolves + 1) ] centralControlPolicyListBasedOnNumAgentsInWe = wolvesCentralControlPolicies # 0 for two agents in We, 1 for three agents... softPolicyInInference = lambda distribution: distribution getStateThirdPersonPerspective = lambda state, goalId, weIds: getStateOrActionThirdPersonPerspective( state, goalId, weIds, blocksID) policyForCommittedAgentsInInference = PolicyForCommittedAgent( centralControlPolicyListBasedOnNumAgentsInWe, softPolicyInInference, getStateThirdPersonPerspective) concernedAgentsIds = possibleWolvesIds calCommittedAgentsPolicyLikelihood = CalCommittedAgentsContinuousPolicyLikelihood( concernedAgentsIds, policyForCommittedAgentsInInference, rationalityBetaInInference) randomActionSpace = [(5, 0), (3.5, 3.5), (0, 5), (-3.5, 3.5), (-5, 0), (-3.5, -3.5), (0, -5), (3.5, -3.5), (0, 0)] randomPolicy = RandomPolicy(randomActionSpace) getStateFirstPersonPerspective = lambda state, goalId, weIds, selfId: getStateOrActionFirstPersonPerspective( state, goalId, weIds, selfId, blocksID) policyForUncommittedAgentsInInference = PolicyForUncommittedAgent( possibleWolvesIds, randomPolicy, softPolicyInInference, getStateFirstPersonPerspective) calUncommittedAgentsPolicyLikelihood = CalUncommittedAgentsPolicyLikelihood( possibleWolvesIds, concernedAgentsIds, policyForUncommittedAgentsInInference) # Joint Likelihood calJointLikelihood = lambda intention, state, perceivedAction: calCommittedAgentsPolicyLikelihood(intention, state, perceivedAction) * \ calUncommittedAgentsPolicyLikelihood(intention, state, perceivedAction) # Infer and update Intention variablesForAllWolves = [ [intentionSpace] for intentionSpace in intentionSpacesForAllWolves ] jointHypothesisSpaces = [ pd.MultiIndex.from_product(variables, names=['intention']) for variables in variablesForAllWolves ] concernedHypothesisVariable = ['intention'] priorDecayRate = 1 softPrior = SoftDistribution(priorDecayRate) inferIntentionOneStepList = [ InferOneStep(jointHypothesisSpace, concernedHypothesisVariable, calJointLikelihood, softPrior) for jointHypothesisSpace in jointHypothesisSpaces ] if numSheep == 1: inferIntentionOneStepList = [lambda prior, state, action: prior ] * 3 adjustIntentionPriorGivenValueOfState = lambda state: 1 chooseIntention = sampleFromDistribution updateIntentions = [ UpdateIntention(intentionPrior, valuePriorEndTime, adjustIntentionPriorGivenValueOfState, perceptAction, inferIntentionOneStep, chooseIntention) for intentionPrior, inferIntentionOneStep in zip( wolvesIntentionPriors, inferIntentionOneStepList) ] # reset intention and adjuste intention prior attributes tools for multiple trajectory intentionResetAttributes = [ 'timeStep', 'lastState', 'lastAction', 'intentionPrior', 'formerIntentionPriors' ] intentionResetAttributeValues = [ dict( zip(intentionResetAttributes, [0, None, None, intentionPrior, [intentionPrior]])) for intentionPrior in wolvesIntentionPriors ] resetIntentions = ResetObjects(intentionResetAttributeValues, updateIntentions) returnAttributes = ['formerIntentionPriors'] getIntentionDistributions = GetObjectsValuesOfAttributes( returnAttributes, updateIntentions) attributesToRecord = ['lastAction'] recordActionForUpdateIntention = RecordValuesForObjects( attributesToRecord, updateIntentions) # Wovels Generate Action covForPlanning = [0.03**2 for _ in range(actionDimReshaped)] buildGaussianForPlanning = BuildGaussianFixCov(covForPlanning) composeCentralControlPolicyForPlanning = lambda observe: ComposeCentralControlPolicyByGaussianOnDeterministicAction( reshapeAction, observe, actOneStepOneModelWolf, buildGaussianForPlanning) wolvesCentralControlPoliciesForPlanning = [ composeCentralControlPolicyForPlanning( observeListBaseOnNumInWe[numAgentsInWe - 2])( weModelsListBaseOnNumInWe[numAgentsInWe - 2], numAgentsInWe) for numAgentsInWe in range(2, numWolves + 1) ] centralControlPolicyListBasedOnNumAgentsInWeForPlanning = wolvesCentralControlPoliciesForPlanning # 0 for two agents in We, 1 for three agents... softPolicyInPlanning = lambda distribution: distribution policyForCommittedAgentInPlanning = PolicyForCommittedAgent( centralControlPolicyListBasedOnNumAgentsInWeForPlanning, softPolicyInPlanning, getStateThirdPersonPerspective) policyForUncommittedAgentInPlanning = PolicyForUncommittedAgent( possibleWolvesIds, randomPolicy, softPolicyInPlanning, getStateFirstPersonPerspective) def wolfChooseActionMethod(individualContinuousDistributions): centralControlAction = tuple([ tuple(sampleFromContinuousSpace(distribution)) for distribution in individualContinuousDistributions ]) return centralControlAction getSelfActionThirdPersonPerspective = lambda weIds, selfId: list( weIds).index(selfId) chooseCommittedAction = GetActionFromJointActionDistribution( wolfChooseActionMethod, getSelfActionThirdPersonPerspective) chooseUncommittedAction = sampleFromDistribution wolvesSampleIndividualActionGivenIntentionList = [ SampleIndividualActionGivenIntention( selfId, policyForCommittedAgentInPlanning, policyForUncommittedAgentInPlanning, chooseCommittedAction, chooseUncommittedAction) for selfId in possibleWolvesIds ] # Sample and Save Trajectory trajectoriesWithIntentionDists = [] for trajectoryId in range(self.numTrajectories): sheepModelsForPolicy = [ sheepModelListOfDiffWolfReward[np.random.choice( numAllSheepModels)] for sheepId in possibleSheepIds ] if sheepConcernSelfOnly: composeSheepPolicy = lambda sheepModel: lambda state: { tuple( reshapeAction( actOneStepOneModelSheep(sheepModel, observeSheep(state)))): 1 } sheepChooseActionMethod = sampleFromDistribution sheepSampleActions = [ SampleActionOnFixedIntention( selfId, possibleWolvesIds, composeSheepPolicy(sheepModel), sheepChooseActionMethod, blocksID) for selfId, sheepModel in zip(possibleSheepIds, sheepModelsForPolicy) ] else: composeSheepPolicy = lambda sheepModel: lambda state: tuple( reshapeAction( actOneStepOneModelSheep(sheepModel, observeSheep(state) ))) sheepSampleActions = [ composeSheepPolicy(sheepModel) for sheepModel in sheepModelsForPolicy ] wolvesSampleActions = [ SampleActionOnChangableIntention( updateIntention, wolvesSampleIndividualActionGivenIntention) for updateIntention, wolvesSampleIndividualActionGivenIntention in zip(updateIntentions, wolvesSampleIndividualActionGivenIntentionList) ] allIndividualSampleActions = wolvesSampleActions + sheepSampleActions sampleActionMultiAgent = SampleActionMultiagent( allIndividualSampleActions, recordActionForUpdateIntention) trajectory = sampleTrajectory(sampleActionMultiAgent) intentionDistributions = getIntentionDistributions() trajectoryWithIntentionDists = [ tuple(list(SASRPair) + list(intentionDist)) for SASRPair, intentionDist in zip(trajectory, intentionDistributions) ] trajectoriesWithIntentionDists.append( tuple(trajectoryWithIntentionDists)) resetIntentions() #print(intentionDistributions) trajectoryFixedParameters = {'maxRunningSteps': maxRunningSteps} self.saveTrajectoryByParameters(trajectoriesWithIntentionDists, trajectoryFixedParameters, parameters) print(np.mean([len(tra) for tra in trajectoriesWithIntentionDists])) # visualize if visualizeTraj: wolfColor = np.array([0.85, 0.35, 0.35]) sheepColor = np.array([0.35, 0.85, 0.35]) blockColor = np.array([0.25, 0.25, 0.25]) entitiesColorList = [wolfColor] * numWolves + [ sheepColor ] * numSheep + [blockColor] * numBlocks render = Render(entitiesSizeList, entitiesColorList, numAgents, getPosFromAgentState) trajToRender = np.concatenate(trajectoriesWithIntentionDists) render(trajToRender)