def __call__(self, parameters): print(parameters) visualizeTraj = False numWolves = parameters['numWolves'] numSheep = parameters['numSheep'] wolfType = parameters['wolfType'] sheepConcern = parameters['sheepConcern'] ## MDP Env # state is all multi agent state # action is all multi agent action wolvesID = list(range(numWolves)) sheepsID = list(range(numWolves, numWolves + numSheep)) possibleWolvesIds = wolvesID possibleSheepIds = sheepsID numAgents = numWolves + numSheep numBlocks = 5 - numWolves blocksID = list(range(numAgents, numAgents + numBlocks)) numEntities = numAgents + numBlocks sheepSize = 0.05 wolfSize = 0.075 blockSize = 0.2 sheepMaxSpeed = 1.3 * 1 wolfMaxSpeed = 1.0 * 1 blockMaxSpeed = None entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheep + [blockSize] * numBlocks entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [sheepMaxSpeed] * numSheep + [blockMaxSpeed] * numBlocks entitiesMovableList = [True]* numAgents + [False] * numBlocks massList = [1.0] * numEntities reshapeActionInTransit = lambda action: action getCollisionForce = GetCollisionForce() applyActionForce = ApplyActionForce(wolvesID, sheepsID, entitiesMovableList) applyEnvironForce = ApplyEnvironForce(numEntities, entitiesMovableList, entitiesSizeList, getCollisionForce, getPosFromAgentState) integrateState = IntegrateState(numEntities, entitiesMovableList, massList, entityMaxSpeedList, getVelFromAgentState, getPosFromAgentState) transit = TransitMultiAgentChasing(numEntities, reshapeActionInTransit, applyActionForce, applyEnvironForce, integrateState) isCollision = IsCollision(getPosFromAgentState) collisonRewardWolf = 1 punishForOutOfBoundForWolf = lambda stata: 0 rewardWolf = RewardCentralControlPunishBond(wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBoundForWolf, collisonRewardWolf) collisonRewardSheep = -1 punishForOutOfBoundForSheep = PunishForOutOfBound() rewardSheep = RewardCentralControlPunishBond(sheepsID, wolvesID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBoundForSheep, collisonRewardSheep) forwardOneStep = ForwardOneStep(transit, rewardWolf) reset = ResetMultiAgentChasing(numAgents, numBlocks) isTerminal = lambda state: False maxRunningSteps = 101 sampleTrajectory = SampleTrajectory(maxRunningSteps, isTerminal, reset, forwardOneStep) ## MDP Policy worldDim = 2 actionDim = worldDim * 2 + 1 layerWidth = [64 * (numWolves - 1), 64 * (numWolves - 1)] # Sheep Part # ------------ model ------------------------ if sheepConcern == 'selfSheep': sheepConcernSelfOnly = 1 if sheepConcern == 'allSheep': sheepConcernSelfOnly = 0 numSheepToObserveWhenSheepSameOrDiff = [numSheep, 1] numSheepToObserve = numSheepToObserveWhenSheepSameOrDiff[sheepConcernSelfOnly] print(numSheepToObserve) sheepModelListOfDiffWolfReward = [] sheepType = 'mixed' if sheepType == 'mixed': sheepPrefixList = ['maddpgIndividWolf', 'maddpg'] else: sheepPrefixList = [sheepType] for sheepPrefix in sheepPrefixList: wolvesIDForSheepObserve = list(range(numWolves)) sheepsIDForSheepObserve = list(range(numWolves, numSheepToObserve + numWolves)) blocksIDForSheepObserve = list(range(numSheepToObserve + numWolves, numSheepToObserve + numWolves + numBlocks)) observeOneAgentForSheep = lambda agentID: Observe(agentID, wolvesIDForSheepObserve, sheepsIDForSheepObserve, blocksIDForSheepObserve, getPosFromAgentState, getVelFromAgentState) observeSheep = lambda state: [observeOneAgentForSheep(agentID)(state) for agentID in range(numWolves + numSheepToObserve)] obsIDsForSheep = wolvesIDForSheepObserve + sheepsIDForSheepObserve + blocksIDForSheepObserve initObsForSheepParams = observeSheep(reset()[obsIDsForSheep]) obsShapeSheep = [initObsForSheepParams[obsID].shape[0] for obsID in range(len(initObsForSheepParams))] buildSheepModels = BuildMADDPGModels(actionDim, numWolves + numSheepToObserve, obsShapeSheep) sheepModelsList = [buildSheepModels(layerWidth, agentID) for agentID in range(numWolves, numWolves + numSheepToObserve)] dirName = os.path.dirname(__file__) maxEpisode = 60000 print(sheepPrefix) sheepFileName = "{}wolves{}sheep{}blocks{}eps_agent".format(numWolves, numSheepToObserve, numBlocks, maxEpisode) sheepModelPaths = [os.path.join(dirName, '..', '..', 'data', 'preTrainModel', sheepPrefix + sheepFileName + str(i) + '60000eps') for i in range(numWolves, numWolves + numSheepToObserve)] [restoreVariables(model, path) for model, path in zip(sheepModelsList, sheepModelPaths)] sheepModelListOfDiffWolfReward = sheepModelListOfDiffWolfReward + sheepModelsList # Sheep Policy Function reshapeAction = ReshapeAction() actOneStepOneModelSheep = ActOneStep(actByPolicyTrainNoisy) # Sheep Generate Action numAllSheepModels = len(sheepModelListOfDiffWolfReward) # Wolves Part # ------------ model ------------------------ wolvesIDForWolfObserve = list(range(numWolves)) sheepsIDForWolfObserve = list(range(numWolves, numSheep + numWolves)) blocksIDForWolfObserve = list(range(numSheep + numWolves, numSheep + numWolves + numBlocks)) observeOneAgentForWolf = lambda agentID: Observe(agentID, wolvesIDForWolfObserve, sheepsIDForWolfObserve, blocksIDForWolfObserve, getPosFromAgentState, getVelFromAgentState) observeWolf = lambda state: [observeOneAgentForWolf(agentID)(state) for agentID in range(numWolves + numSheep)] obsIDsForWolf = wolvesIDForWolfObserve + sheepsIDForWolfObserve + blocksIDForWolfObserve initObsForWolfParams = observeWolf(reset()[obsIDsForWolf]) obsShapeWolf = [initObsForWolfParams[obsID].shape[0] for obsID in range(len(initObsForWolfParams))] buildWolfModels = BuildMADDPGModels(actionDim, numWolves + numSheep, obsShapeWolf) layerWidthForWolf = [64 * (numWolves - 1), 64 * (numWolves - 1)] wolfModelsList = [buildWolfModels(layerWidthForWolf, agentID) for agentID in range(numWolves)] if wolfType == 'sharedReward': prefix = 'maddpg' if wolfType == 'individualReward': prefix = 'maddpgIndividWolf' wolfFileName = "{}wolves{}sheep{}blocks{}eps_agent".format(numWolves, numSheep, numBlocks, maxEpisode) wolfModelPaths = [os.path.join(dirName, '..', '..', 'data', 'preTrainModel', prefix + wolfFileName + str(i) + '60000eps') for i in range(numWolves)] print(numWolves, obsShapeWolf, wolfModelPaths) [restoreVariables(model, path) for model, path in zip(wolfModelsList, wolfModelPaths)] actionDimReshaped = 2 cov = [0.03 ** 2 for _ in range(actionDimReshaped)] buildGaussian = BuildGaussianFixCov(cov) actOneStepOneModelWolf = ActOneStep(actByPolicyTrainNoNoisy) composeWolfPolicy = lambda wolfModel: lambda state: sampleFromContinuousSpace(buildGaussian( tuple(reshapeAction(actOneStepOneModelWolf(wolfModel, observeWolf(state)))))) #actOneStepOneModelWolf = ActOneStep(actByPolicyTrainNoisy) #composeWolfPolicy = lambda wolfModel: lambda state: tuple(reshapeAction(actOneStepOneModelSheep(wolfModel, observeWolf(state)))) wolvesSampleActions = [composeWolfPolicy(wolfModel) for wolfModel in wolfModelsList] trajectories = [] for trajectoryId in range(self.numTrajectories): sheepModelsForPolicy = [sheepModelListOfDiffWolfReward[np.random.choice(numAllSheepModels)] for sheepId in possibleSheepIds] if sheepConcernSelfOnly: composeSheepPolicy = lambda sheepModel : lambda state: {tuple(reshapeAction(actOneStepOneModelSheep(sheepModel, observeSheep(state)))): 1} sheepChooseActionMethod = sampleFromDistribution sheepSampleActions = [SampleActionOnFixedIntention(selfId, possibleWolvesIds, composeSheepPolicy(sheepModel), sheepChooseActionMethod, blocksID) for selfId, sheepModel in zip(possibleSheepIds, sheepModelsForPolicy)] else: composeSheepPolicy = lambda sheepModel: lambda state: tuple(reshapeAction(actOneStepOneModelSheep(sheepModel, observeSheep(state)))) sheepSampleActions = [composeSheepPolicy(sheepModel) for sheepModel in sheepModelsForPolicy] allIndividualSampleActions = wolvesSampleActions + sheepSampleActions sampleAction = lambda state: [sampleIndividualAction(state) for sampleIndividualAction in allIndividualSampleActions] trajectory = sampleTrajectory(sampleAction) trajectories.append(trajectory) trajectoryFixedParameters = {'maxRunningSteps': maxRunningSteps} self.saveTrajectoryByParameters(trajectories, trajectoryFixedParameters, parameters) print(np.mean([len(tra) for tra in trajectories])) # visualize if visualizeTraj: wolfColor = np.array([0.85, 0.35, 0.35]) sheepColor = np.array([0.35, 0.85, 0.35]) blockColor = np.array([0.25, 0.25, 0.25]) entitiesColorList = [wolfColor] * numWolves + [sheepColor] * numSheep + [blockColor] * numBlocks render = Render(entitiesSizeList, entitiesColorList, numAgents, getPosFromAgentState) trajToRender = np.concatenate(trajectories) render(trajToRender)
def __call__(self, parameters): print(parameters) numWolves = parameters['numWolves'] numSheep = parameters['numSheep'] numBlocks = 2 wolfSelfish = 1.0 if parameters[ 'wolfType'] == 'individualReward' else 0.0 perturbedWolfID = parameters['perturbedWolfID'] perturbedWolfGoalID = parameters['perturbedWolfGoalID'] ## MDP Env numAgents = numWolves + numSheep numEntities = numAgents + numBlocks wolvesID = list(range(numWolves)) sheepsID = list(range(numWolves, numWolves + numSheep)) blocksID = list(range(numAgents, numEntities)) sheepSize = 0.05 wolfSize = 0.075 blockSize = 0.2 entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheep + [ blockSize ] * numBlocks costActionRatio = 0.0 sheepSpeedMultiplier = 1.0 sheepMaxSpeed = 1.3 * sheepSpeedMultiplier wolfMaxSpeed = 1.0 blockMaxSpeed = None entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [ sheepMaxSpeed ] * numSheep + [blockMaxSpeed] * numBlocks entitiesMovableList = [True] * numAgents + [False] * numBlocks massList = [1.0] * numEntities collisionReward = 1 # for evaluation, count # of bites isCollision = IsCollision(getPosFromAgentState) rewardAllWolves = RewardWolf(wolvesID, sheepsID, entitiesSizeList, isCollision, collisionReward, wolfSelfish) rewardWolf = lambda state, action, nextState: np.sum( rewardAllWolves(state, action, nextState)) reshapeActionInTransit = lambda action: action getCollisionForce = GetCollisionForce() applyActionForce = ApplyActionForce(wolvesID, sheepsID, entitiesMovableList) applyEnvironForce = ApplyEnvironForce(numEntities, entitiesMovableList, entitiesSizeList, getCollisionForce, getPosFromAgentState) integrateState = IntegrateState(numEntities, entitiesMovableList, massList, entityMaxSpeedList, getVelFromAgentState, getPosFromAgentState) transit = TransitMultiAgentChasing(numEntities, reshapeActionInTransit, applyActionForce, applyEnvironForce, integrateState) forwardOneStep = ForwardOneStep(transit, rewardWolf) reset = ResetMultiAgentChasingWithSeed(numAgents, numBlocks) isTerminal = lambda state: False maxRunningStepsToSample = 101 sampleTrajectory = SampleTrajectory(maxRunningStepsToSample, isTerminal, reset, forwardOneStep) ## MDP Policy worldDim = 2 actionDim = worldDim * 2 + 1 layerWidth = [128, 128] maxTimeStep = 75 maxEpisode = 60000 dirName = os.path.dirname(__file__) # ------------ sheep recover variables ------------------------ numSheepToObserve = 1 sheepModelListOfDiffWolfReward = [] sheepTypeList = [0.0, 1.0] for sheepType in sheepTypeList: wolvesIDForSheepObserve = list(range(numWolves)) sheepsIDForSheepObserve = list( range(numWolves, numSheepToObserve + numWolves)) blocksIDForSheepObserve = list( range(numSheepToObserve + numWolves, numSheepToObserve + numWolves + numBlocks)) observeOneAgentForSheep = lambda agentID: Observe( agentID, wolvesIDForSheepObserve, sheepsIDForSheepObserve, blocksIDForSheepObserve, getPosFromAgentState, getVelFromAgentState) observeSheep = lambda state: [ observeOneAgentForSheep(agentID)(state) for agentID in range(numWolves + numSheepToObserve) ] obsIDsForSheep = wolvesIDForSheepObserve + sheepsIDForSheepObserve + blocksIDForSheepObserve initObsForSheepParams = observeSheep(reset()[obsIDsForSheep]) obsShapeSheep = [ initObsForSheepParams[obsID].shape[0] for obsID in range(len(initObsForSheepParams)) ] buildSheepModels = BuildMADDPGModels(actionDim, numWolves + numSheepToObserve, obsShapeSheep) sheepModelsList = [ buildSheepModels(layerWidth, agentID) for agentID in range(numWolves, numWolves + numSheepToObserve) ] sheepFileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}WolfActCost{}individ{}_agent".format( numWolves, numSheepToObserve, numBlocks, maxEpisode, maxTimeStep, sheepSpeedMultiplier, costActionRatio, sheepType) sheepModelPaths = [ os.path.join(dirName, '..', '..', 'data', 'preTrainModel', sheepFileName + str(i)) for i in range(numWolves, numWolves + numSheepToObserve) ] [ restoreVariables(model, path) for model, path in zip(sheepModelsList, sheepModelPaths) ] sheepModelListOfDiffWolfReward = sheepModelListOfDiffWolfReward + sheepModelsList # # actOneStep = ActOneStep(actByPolicyTrainNoisy) #TODO actOneStep = ActOneStep(actByPolicyTrainNoNoisy) numAllSheepModels = len(sheepModelListOfDiffWolfReward) # ------------ wolves recover variables ------------------------ # ------------ Recover one perturbed wolf for comparison ------- numSheepForPerturbedWolf = 1 wolvesIDForPerturbedWolf = wolvesID sheepsIDForPerturbedWolf = [sheepsID[perturbedWolfGoalID]] blocksIDForPerturbedWolf = list( range(numWolves + numSheep, numEntities)) # skip the unattended sheep id observeOneAgentForPerturbedWolf = lambda agentID: Observe( agentID, wolvesIDForPerturbedWolf, sheepsIDForPerturbedWolf, blocksIDForPerturbedWolf, getPosFromAgentState, getVelFromAgentState) observePerturbedWolf = lambda state: [ observeOneAgentForPerturbedWolf(agentID)(state) for agentID in wolvesIDForPerturbedWolf + sheepsIDForPerturbedWolf ] initObsForPerturbedWolfParams = observePerturbedWolf(reset()) obsShapePerturbedWolf = [ initObsForPerturbedWolfParams[obsID].shape[0] for obsID in range(len(initObsForPerturbedWolfParams)) ] buildPerturbedWolfModels = BuildMADDPGModels( actionDim, numWolves + numSheepForPerturbedWolf, obsShapePerturbedWolf) layerWidthForWolf = [128, 128] perturbedWolfModel = buildPerturbedWolfModels(layerWidthForWolf, perturbedWolfID) perturbedWolfFileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}WolfActCost{}individ{}_agent".format( numWolves, numSheepForPerturbedWolf, numBlocks, maxEpisode, maxTimeStep, sheepSpeedMultiplier, costActionRatio, wolfSelfish) perturbedWolfModelPath = os.path.join( dirName, '..', '..', 'data', 'preTrainModel', perturbedWolfFileName + str(perturbedWolfID)) restoreVariables(perturbedWolfModel, perturbedWolfModelPath) # ------------ Recover other wolves trained with multiple goals ------- wolvesIDForWolfObserve = wolvesID sheepsIDForWolfObserve = sheepsID blocksIDForWolfObserve = blocksID observeOneAgentForWolf = lambda agentID: Observe( agentID, wolvesIDForWolfObserve, sheepsIDForWolfObserve, blocksIDForWolfObserve, getPosFromAgentState, getVelFromAgentState) observeWolf = lambda state: [ observeOneAgentForWolf(agentID)(state) for agentID in range(numWolves + numSheep) ] obsIDsForWolf = wolvesIDForWolfObserve + sheepsIDForWolfObserve + blocksIDForWolfObserve initObsForWolfParams = observeWolf(reset()[obsIDsForWolf]) obsShapeWolf = [ initObsForWolfParams[obsID].shape[0] for obsID in range(len(initObsForWolfParams)) ] buildWolfModels = BuildMADDPGModels(actionDim, numWolves + numSheep, obsShapeWolf) layerWidthForWolf = [128, 128] wolfModelsList = [ buildWolfModels(layerWidthForWolf, agentID) for agentID in range(numWolves) ] wolfFileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}WolfActCost{}individ{}_agent".format( numWolves, numSheep, numBlocks, maxEpisode, maxTimeStep, sheepSpeedMultiplier, costActionRatio, wolfSelfish) wolfModelPaths = [ os.path.join(dirName, '..', '..', 'data', 'preTrainModel', wolfFileName + str(i)) for i in range(numWolves) ] [ restoreVariables(model, path) for model, path in zip(wolfModelsList, wolfModelPaths) ] # ------------ compose policy --------------------- actionDimReshaped = 2 cov = [0.00000000001**2 for _ in range(actionDimReshaped)] buildGaussian = BuildGaussianFixCov(cov) reshapeAction = ReshapeAction() # unperturbed policy composeWolfPolicy = lambda wolfModel: lambda state: sampleFromContinuousSpace( buildGaussian( tuple(reshapeAction(actOneStep(wolfModel, observeWolf(state)))) )) wolvesSampleActions = [ composeWolfPolicy(wolfModel) for wolfModel in wolfModelsList ] # perturbed policy composePerturbedWolfPolicy = lambda perturbedModel: lambda state: sampleFromContinuousSpace( buildGaussian( tuple( reshapeAction( actOneStep(perturbedModel, observePerturbedWolf(state)) )))) wolvesSampleActionsPerturbed = wolvesSampleActions.copy() wolvesSampleActionsPerturbed[ perturbedWolfID] = composePerturbedWolfPolicy(perturbedWolfModel) trajectories = [] for trajectoryId in range(self.numTrajectories): sheepModelsForPolicy = [ sheepModelListOfDiffWolfReward[np.random.choice( numAllSheepModels)] for sheepId in sheepsID ] composeSheepPolicy = lambda sheepModel: lambda state: { tuple( reshapeAction(actOneStep(sheepModel, observeSheep(state)))): 1 } sheepChooseActionMethod = sampleFromDistribution sheepSampleActions = [ SampleActionOnFixedIntention(selfId, wolvesID, composeSheepPolicy(sheepModel), sheepChooseActionMethod, blocksID) for selfId, sheepModel in zip(sheepsID, sheepModelsForPolicy) ] allIndividualSampleActionsPerturbed = wolvesSampleActionsPerturbed + sheepSampleActions sampleActionPerturbed = lambda state: [ sampleIndividualAction(state) for sampleIndividualAction in allIndividualSampleActionsPerturbed ] trajectory = sampleTrajectory(sampleActionPerturbed) trajectories.append(trajectory) trajectoryFixedParameters = { 'maxRunningStepsToSample': maxRunningStepsToSample } self.saveTrajectoryByParameters(trajectories, trajectoryFixedParameters, parameters)
def __call__(self, parameters): print(parameters) valuePriorEndTime = -100 deviationFor2DAction = 1.0 rationalityBetaInInference = 1.0 numWolves = parameters['numWolves'] numSheep = parameters['numSheep'] wolfType = parameters['wolfType'] wolfSelfish = 0.0 if wolfType == 'sharedAgencyBySharedRewardWolf' else 1.0 perturbedWolfID = parameters['perturbedWolfID'] perturbedWolfGoalID = parameters['perturbedWolfGoalID'] ## MDP Env numBlocks = 2 numAgents = numWolves + numSheep numEntities = numAgents + numBlocks wolvesID = list(range(numWolves)) sheepsID = list(range(numWolves, numWolves + numSheep)) blocksID = list(range(numAgents, numEntities)) sheepSize = 0.05 wolfSize = 0.075 blockSize = 0.2 entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheep + [blockSize] * numBlocks costActionRatio = 0.0 sheepSpeedMultiplier = 1.0 sheepMaxSpeed = 1.3 * sheepSpeedMultiplier wolfMaxSpeed = 1.0 blockMaxSpeed = None entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [sheepMaxSpeed] * numSheep + [blockMaxSpeed] * numBlocks entitiesMovableList = [True] * numAgents + [False] * numBlocks massList = [1.0] * numEntities collisionReward = 1 # for evaluation, count # of bites isCollision = IsCollision(getPosFromAgentState) rewardAllWolves = RewardWolf(wolvesID, sheepsID, entitiesSizeList, isCollision, collisionReward, wolfSelfish) rewardWolf = lambda state, action, nextState: np.sum(rewardAllWolves(state, action, nextState)) reshapeActionInTransit = lambda action: action getCollisionForce = GetCollisionForce() applyActionForce = ApplyActionForce(wolvesID, sheepsID, entitiesMovableList) applyEnvironForce = ApplyEnvironForce(numEntities, entitiesMovableList, entitiesSizeList, getCollisionForce, getPosFromAgentState) integrateState = IntegrateState(numEntities, entitiesMovableList, massList, entityMaxSpeedList, getVelFromAgentState, getPosFromAgentState) transit = TransitMultiAgentChasing(numEntities, reshapeActionInTransit, applyActionForce, applyEnvironForce, integrateState) forwardOneStep = ForwardOneStep(transit, rewardWolf) reset = ResetMultiAgentChasingWithSeed(numAgents, numBlocks) isTerminal = lambda state: False maxRunningStepsToSample = 101 sampleTrajectory = SampleTrajectory(maxRunningStepsToSample, isTerminal, reset, forwardOneStep) ## MDP Policy worldDim = 2 actionDim = worldDim * 2 + 1 layerWidth = [128, 128] maxTimeStep = 75 maxEpisode = 60000 dirName = os.path.dirname(__file__) # ------------ sheep recover variables ------------------------ numSheepToObserve = 1 sheepModelListOfDiffWolfReward = [] sheepTypeList = [0.0, 1.0] for sheepType in sheepTypeList: wolvesIDForSheepObserve = list(range(numWolves)) sheepsIDForSheepObserve = list(range(numWolves, numSheepToObserve + numWolves)) blocksIDForSheepObserve = list( range(numSheepToObserve + numWolves, numSheepToObserve + numWolves + numBlocks)) observeOneAgentForSheep = lambda agentID: Observe(agentID, wolvesIDForSheepObserve, sheepsIDForSheepObserve, blocksIDForSheepObserve, getPosFromAgentState, getVelFromAgentState) observeSheep = lambda state: [observeOneAgentForSheep(agentID)(state) for agentID in range(numWolves + numSheepToObserve)] obsIDsForSheep = wolvesIDForSheepObserve + sheepsIDForSheepObserve + blocksIDForSheepObserve initObsForSheepParams = observeSheep(reset()[obsIDsForSheep]) obsShapeSheep = [initObsForSheepParams[obsID].shape[0] for obsID in range(len(initObsForSheepParams))] buildSheepModels = BuildMADDPGModels(actionDim, numWolves + numSheepToObserve, obsShapeSheep) sheepModelsList = [buildSheepModels(layerWidth, agentID) for agentID in range(numWolves, numWolves + numSheepToObserve)] sheepFileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}WolfActCost{}individ{}_agent".format( numWolves, numSheepToObserve, numBlocks, maxEpisode, maxTimeStep, sheepSpeedMultiplier, costActionRatio, sheepType) sheepModelPaths = [os.path.join(dirName, '..', '..', 'data', 'preTrainModel', sheepFileName + str(i)) for i in range(numWolves, numWolves + numSheepToObserve)] [restoreVariables(model, path) for model, path in zip(sheepModelsList, sheepModelPaths)] sheepModelListOfDiffWolfReward = sheepModelListOfDiffWolfReward + sheepModelsList actOneStep = ActOneStep(actByPolicyTrainNoNoisy) numAllSheepModels = len(sheepModelListOfDiffWolfReward) # ------------ recover variables for "we" ------------------------ numAgentsInWe = numWolves numSheepInWe = 1 numBlocksForWe = numBlocks wolvesIDForWolfObserve = list(range(numAgentsInWe)) sheepsIDForWolfObserve = list(range(numAgentsInWe, numSheepInWe + numAgentsInWe)) blocksIDForWolfObserve = list( range(numSheepInWe + numAgentsInWe, numSheepInWe + numAgentsInWe + numBlocksForWe)) observeOneAgentForWolf = lambda agentID: Observe(agentID, wolvesIDForWolfObserve, sheepsIDForWolfObserve, blocksIDForWolfObserve, getPosFromAgentState, getVelFromAgentState) observeWolf = lambda state: [observeOneAgentForWolf(agentID)(state) for agentID in range(numAgentsInWe + numSheepInWe)] obsIDsForWolf = wolvesIDForWolfObserve + sheepsIDForWolfObserve + blocksIDForWolfObserve initObsForWolfParams = observeWolf(reset()[obsIDsForWolf]) obsShapeWolf = [initObsForWolfParams[obsID].shape[0] for obsID in range(len(initObsForWolfParams))] buildWolfModels = BuildMADDPGModels(actionDim, numAgentsInWe + numSheepInWe, obsShapeWolf) layerWidthForWolf = [128, 128] wolfModelsList = [buildWolfModels(layerWidthForWolf, agentID) for agentID in range(numAgentsInWe)] wolfFileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}WolfActCost{}individ{}_agent".format( numWolves, numSheepInWe, numBlocks, maxEpisode, maxTimeStep, sheepSpeedMultiplier, costActionRatio, wolfSelfish) wolfModelPaths = [os.path.join(dirName, '..', '..', 'data', 'preTrainModel', wolfFileName + str(i)) for i in range(numAgentsInWe)] [restoreVariables(model, path) for model, path in zip(wolfModelsList, wolfModelPaths)] # ------------ compose wolves policy no perturbation ------------------------ actionDimReshaped = 2 cov = [deviationFor2DAction ** 2 for _ in range(actionDimReshaped)] # 1 buildGaussian = BuildGaussianFixCov(cov) actOneStep = ActOneStep(actByPolicyTrainNoNoisy) reshapeAction = ReshapeAction() composeCentralControlPolicy = lambda observe: ComposeCentralControlPolicyByGaussianOnDeterministicAction( reshapeAction, observe, actOneStep, buildGaussian) wolvesCentralControlPolicy = [composeCentralControlPolicy(observeWolf)(wolfModelsList, numAgentsInWe)] # input state, return a list of gaussian distributions with cov 1 softPolicyInInference = lambda distribution: distribution getStateThirdPersonPerspective = lambda state, goalId, weIds: getStateOrActionThirdPersonPerspective(state, goalId, weIds, blocksID) # nochange policyForCommittedAgentsInInference = PolicyForCommittedAgent(wolvesCentralControlPolicy, softPolicyInInference, getStateThirdPersonPerspective) # same as wolvesCentralControlPolicy(state) concernedAgentsIds = wolvesID calCommittedAgentsPolicyLikelihood = CalCommittedAgentsContinuousPolicyLikelihood(concernedAgentsIds, policyForCommittedAgentsInInference, rationalityBetaInInference) randomActionSpace = [(5, 0), (3.5, 3.5), (0, 5), (-3.5, 3.5), (-5, 0), (-3.5, -3.5), (0, -5), (3.5, -3.5), (0, 0)] randomPolicy = RandomPolicy(randomActionSpace) getStateFirstPersonPerspective = lambda state, goalId, weIds, selfId: getStateOrActionFirstPersonPerspective( state, goalId, weIds, selfId, blocksID) policyForUncommittedAgentsInInference = PolicyForUncommittedAgent(wolvesID, randomPolicy, softPolicyInInference, getStateFirstPersonPerspective) # random policy, returns action distribution calUncommittedAgentsPolicyLikelihood = CalUncommittedAgentsPolicyLikelihood(wolvesID, concernedAgentsIds, policyForUncommittedAgentsInInference) # returns 1 # Joint Likelihood calJointLikelihood = lambda intention, state, perceivedAction: calCommittedAgentsPolicyLikelihood(intention, state, perceivedAction) * \ calUncommittedAgentsPolicyLikelihood(intention, state, perceivedAction) # __* 1 # ------------ wolves intention ------------------------ intentionSpacesForAllWolves = [tuple(it.product(sheepsID, [tuple(wolvesID)])) for wolfId in wolvesID] # <class 'tuple'>: ((3, (0, 1, 2)), (4, (0, 1, 2)), (5, (0, 1, 2)), (6, (0, 1, 2))) print('intentionSpacesForAllWolves', intentionSpacesForAllWolves) wolvesIntentionPriors = [ {tuple(intention): 1 / len(allPossibleIntentionsOneWolf) for intention in allPossibleIntentionsOneWolf} for allPossibleIntentionsOneWolf in intentionSpacesForAllWolves] perceptSelfAction = SampleNoisyAction(deviationFor2DAction) perceptOtherAction = SampleNoisyAction(deviationFor2DAction) perceptAction = PerceptImaginedWeAction(wolvesID, perceptSelfAction, perceptOtherAction) # input self, others action # Infer and update Intention variablesForAllWolves = [[intentionSpace] for intentionSpace in intentionSpacesForAllWolves] jointHypothesisSpaces = [pd.MultiIndex.from_product(variables, names=['intention']) for variables in variablesForAllWolves] concernedHypothesisVariable = ['intention'] priorDecayRate = 1 softPrior = SoftDistribution(priorDecayRate) # no change inferIntentionOneStepList = [InferOneStep(jointHypothesisSpace, concernedHypothesisVariable, calJointLikelihood, softPrior) for jointHypothesisSpace in jointHypothesisSpaces] if numSheep == 1: inferIntentionOneStepList = [lambda prior, state, action: prior] * 3 adjustIntentionPriorGivenValueOfState = lambda state: 1 chooseIntention = sampleFromDistribution updateIntentions = [UpdateIntention(intentionPrior, valuePriorEndTime, adjustIntentionPriorGivenValueOfState, perceptAction, inferIntentionOneStep, chooseIntention) for intentionPrior, inferIntentionOneStep in zip(wolvesIntentionPriors, inferIntentionOneStepList)] # reset intention and adjust intention prior attributes tools for multiple trajectory intentionResetAttributes = ['timeStep', 'lastState', 'lastAction', 'intentionPrior', 'formerIntentionPriors'] intentionResetAttributeValues = [ dict(zip(intentionResetAttributes, [0, None, None, intentionPrior, [intentionPrior]])) for intentionPrior in wolvesIntentionPriors] resetIntentions = ResetObjects(intentionResetAttributeValues, updateIntentions) returnAttributes = ['formerIntentionPriors'] getIntentionDistributions = GetObjectsValuesOfAttributes(returnAttributes, updateIntentions[1:]) attributesToRecord = ['lastAction'] recordActionForUpdateIntention = RecordValuesForObjects(attributesToRecord, updateIntentions) # Wovels Generate Action #TODO covForPlanning = [0.00000001 for _ in range(actionDimReshaped)] # covForPlanning = [0.03 ** 2 for _ in range(actionDimReshaped)] buildGaussianForPlanning = BuildGaussianFixCov(covForPlanning) composeCentralControlPolicyForPlanning = lambda \ observe: ComposeCentralControlPolicyByGaussianOnDeterministicAction(reshapeAction, observe, actOneStep, buildGaussianForPlanning) wolvesCentralControlPoliciesForPlanning = [ composeCentralControlPolicyForPlanning(observeWolf)(wolfModelsList, numAgentsInWe)] centralControlPolicyListBasedOnNumAgentsInWeForPlanning = wolvesCentralControlPoliciesForPlanning # 0 for two agents in We, 1 for three agents... softPolicyInPlanning = lambda distribution: distribution policyForCommittedAgentInPlanning = PolicyForCommittedAgent( centralControlPolicyListBasedOnNumAgentsInWeForPlanning, softPolicyInPlanning, getStateThirdPersonPerspective) policyForUncommittedAgentInPlanning = PolicyForUncommittedAgent(wolvesID, randomPolicy, softPolicyInPlanning, getStateFirstPersonPerspective) def wolfChooseActionMethod(individualContinuousDistributions): centralControlAction = tuple( [tuple(sampleFromContinuousSpace(distribution)) for distribution in individualContinuousDistributions]) return centralControlAction getSelfActionIDInThirdPersonPerspective = lambda weIds, selfId: list(weIds).index(selfId) chooseCommittedAction = GetActionFromJointActionDistribution(wolfChooseActionMethod, getSelfActionIDInThirdPersonPerspective) chooseUncommittedAction = sampleFromDistribution wolvesSampleIndividualActionGivenIntentionList = [ SampleIndividualActionGivenIntention(selfId, policyForCommittedAgentInPlanning, policyForUncommittedAgentInPlanning, chooseCommittedAction, chooseUncommittedAction) for selfId in wolvesID] # ------------------- recover one wolf model that only concerns sheep 0 ------------------- numSheepForPerturbedWolf = 1 wolvesIDForPerturbedWolf = wolvesID sheepsIDForPerturbedWolf = [sheepsID[perturbedWolfGoalID]] blocksIDForPerturbedWolf = list(range(numWolves + numSheep, numEntities)) # skip the unattended sheep id observeOneAgentForPerturbedWolf = lambda agentID: Observe(agentID, wolvesIDForPerturbedWolf, sheepsIDForPerturbedWolf, blocksIDForPerturbedWolf, getPosFromAgentState, getVelFromAgentState) observePerturbedWolf = lambda state: [observeOneAgentForPerturbedWolf(agentID)(state) for agentID in wolvesIDForPerturbedWolf + sheepsIDForPerturbedWolf] initObsForPerturbedWolfParams = observePerturbedWolf(reset()) obsShapePerturbedWolf = [initObsForPerturbedWolfParams[obsID].shape[0] for obsID in range(len(initObsForPerturbedWolfParams))] buildPerturbedWolfModels = BuildMADDPGModels(actionDim, numWolves + numSheepForPerturbedWolf, obsShapePerturbedWolf) layerWidthForWolf = [128, 128] perturbedWolfModel = buildPerturbedWolfModels(layerWidthForWolf, perturbedWolfID) perturbedWolfFileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}WolfActCost{}individ{}_agent".format( numWolves, numSheepForPerturbedWolf, numBlocks, maxEpisode, maxTimeStep, sheepSpeedMultiplier, costActionRatio, wolfSelfish) perturbedWolfModelPath = os.path.join(dirName, '..', '..', 'data', 'preTrainModel', perturbedWolfFileName + str(perturbedWolfID)) restoreVariables(perturbedWolfModel, perturbedWolfModelPath) # ------------------- Sample and Save Trajectory ------------------- wolvesSampleActions = [ SampleActionOnChangableIntention(updateIntention, wolvesSampleIndividualActionGivenIntention) for updateIntention, wolvesSampleIndividualActionGivenIntention in zip(updateIntentions, wolvesSampleIndividualActionGivenIntentionList)] perturbedWolfSampleActions = lambda state: tuple(reshapeAction(actOneStep(perturbedWolfModel, observePerturbedWolf(state)))) wolvesSampleActionsPerturbed = wolvesSampleActions#.copy() wolvesSampleActionsPerturbed[perturbedWolfID] = perturbedWolfSampleActions trajectoriesWithIntentionDists = [] for trajectoryId in range(self.numTrajectories): sheepModelsForPolicy = [sheepModelListOfDiffWolfReward[np.random.choice(numAllSheepModels)] for sheepId in sheepsID] composeSheepPolicy = lambda sheepModel: lambda state: { tuple(reshapeAction(actOneStep(sheepModel, observeSheep(state)))): 1} sheepChooseActionMethod = sampleFromDistribution sheepSampleActions = [SampleActionOnFixedIntention(selfId, wolvesID, composeSheepPolicy(sheepModel), sheepChooseActionMethod, blocksID) for selfId, sheepModel in zip(sheepsID, sheepModelsForPolicy)] allIndividualSampleActions = wolvesSampleActions + sheepSampleActions sampleActionMultiAgent = SampleActionMultiagent(allIndividualSampleActions, recordActionForUpdateIntention) allIndividualSampleActionsPerturbed = wolvesSampleActionsPerturbed + sheepSampleActions sampleActionMultiAgentPerturbed = SampleActionMultiagent(allIndividualSampleActionsPerturbed, recordActionForUpdateIntention) # trajectory = sampleTrajectory(sampleActionMultiAgentPerturbed) trajectory = sampleTrajectory(sampleActionMultiAgentPerturbed) intentionDistributions = getIntentionDistributions() trajectoryWithIntentionDists = [tuple(list(SASRPair) + list(intentionDist)) for SASRPair, intentionDist in zip(trajectory, intentionDistributions)] trajectoriesWithIntentionDists.append(tuple(trajectoryWithIntentionDists)) # trajectoriesWithIntentionDists.append(trajectory) resetIntentions() trajectoryFixedParameters = {'maxRunningStepsToSample': maxRunningStepsToSample} self.saveTrajectoryByParameters(trajectoriesWithIntentionDists, trajectoryFixedParameters, parameters)
def __call__(self, parameters): print(parameters) visualizeTraj = False numWolves = parameters['numWolves'] numSheep = parameters['numSheep'] softParamterForValue = parameters['valuePriorSoftMaxBeta'] valuePriorEndTime = parameters['valuePriorEndTime'] deviationFor2DAction = parameters['deviationFor2DAction'] rationalityBetaInInference = parameters['rationalityBetaInInference'] wolfType = parameters['wolfType'] sheepConcern = parameters['sheepConcern'] print(rationalityBetaInInference) ## MDP Env # state is all multi agent state # action is all multi agent action wolvesID = list(range(numWolves)) sheepsID = list(range(numWolves, numWolves + numSheep)) possibleWolvesIds = wolvesID possibleSheepIds = sheepsID numAgents = numWolves + numSheep numBlocks = 5 - numWolves blocksID = list(range(numAgents, numAgents + numBlocks)) numEntities = numAgents + numBlocks sheepSize = 0.05 wolfSize = 0.075 blockSize = 0.2 sheepMaxSpeed = 1.3 * 1 wolfMaxSpeed = 1.0 * 1 blockMaxSpeed = None entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheep + [ blockSize ] * numBlocks entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [ sheepMaxSpeed ] * numSheep + [blockMaxSpeed] * numBlocks entitiesMovableList = [True] * numAgents + [False] * numBlocks massList = [1.0] * numEntities reshapeActionInTransit = lambda action: action getCollisionForce = GetCollisionForce() applyActionForce = ApplyActionForce(wolvesID, sheepsID, entitiesMovableList) applyEnvironForce = ApplyEnvironForce(numEntities, entitiesMovableList, entitiesSizeList, getCollisionForce, getPosFromAgentState) integrateState = IntegrateState(numEntities, entitiesMovableList, massList, entityMaxSpeedList, getVelFromAgentState, getPosFromAgentState) transit = TransitMultiAgentChasing(numEntities, reshapeActionInTransit, applyActionForce, applyEnvironForce, integrateState) isCollision = IsCollision(getPosFromAgentState) collisonRewardWolf = 1 punishForOutOfBoundForWolf = lambda stata: 0 rewardWolf = RewardCentralControlPunishBond( wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBoundForWolf, collisonRewardWolf) collisonRewardSheep = -1 punishForOutOfBoundForSheep = PunishForOutOfBound() rewardSheep = RewardCentralControlPunishBond( sheepsID, wolvesID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBoundForSheep, collisonRewardSheep) forwardOneStep = ForwardOneStep(transit, rewardWolf) reset = ResetMultiAgentChasing(numAgents, numBlocks) isTerminal = lambda state: False maxRunningSteps = 101 sampleTrajectory = SampleTrajectory(maxRunningSteps, isTerminal, reset, forwardOneStep) ## MDP Policy worldDim = 2 actionDim = worldDim * 2 + 1 layerWidth = [64 * (numWolves - 1), 64 * (numWolves - 1)] # Sheep Part # ------------ model ------------------------ if sheepConcern == 'selfSheep': sheepConcernSelfOnly = 1 if sheepConcern == 'allSheep': sheepConcernSelfOnly = 0 numSheepToObserveWhenSheepSameOrDiff = [numSheep, 1] numSheepToObserve = numSheepToObserveWhenSheepSameOrDiff[ sheepConcernSelfOnly] print(numSheepToObserve) sheepModelListOfDiffWolfReward = [] sheepType = 'mixed' if sheepType == 'mixed': sheepPrefixList = ['maddpgIndividWolf', 'maddpg'] else: sheepPrefixList = [sheepType] for sheepPrefix in sheepPrefixList: wolvesIDForSheepObserve = list(range(numWolves)) sheepsIDForSheepObserve = list( range(numWolves, numSheepToObserve + numWolves)) blocksIDForSheepObserve = list( range(numSheepToObserve + numWolves, numSheepToObserve + numWolves + numBlocks)) observeOneAgentForSheep = lambda agentID: Observe( agentID, wolvesIDForSheepObserve, sheepsIDForSheepObserve, blocksIDForSheepObserve, getPosFromAgentState, getVelFromAgentState) observeSheep = lambda state: [ observeOneAgentForSheep(agentID)(state) for agentID in range(numWolves + numSheepToObserve) ] obsIDsForSheep = wolvesIDForSheepObserve + sheepsIDForSheepObserve + blocksIDForSheepObserve initObsForSheepParams = observeSheep(reset()[obsIDsForSheep]) obsShapeSheep = [ initObsForSheepParams[obsID].shape[0] for obsID in range(len(initObsForSheepParams)) ] buildSheepModels = BuildMADDPGModels(actionDim, numWolves + numSheepToObserve, obsShapeSheep) sheepModelsList = [ buildSheepModels(layerWidth, agentID) for agentID in range(numWolves, numWolves + numSheepToObserve) ] dirName = os.path.dirname(__file__) maxEpisode = 60000 print(sheepPrefix) sheepFileName = "{}wolves{}sheep{}blocks{}eps_agent".format( numWolves, numSheepToObserve, numBlocks, maxEpisode) sheepModelPaths = [ os.path.join(dirName, '..', '..', 'data', 'preTrainModel', sheepPrefix + sheepFileName + str(i) + '60000eps') for i in range(numWolves, numWolves + numSheepToObserve) ] [ restoreVariables(model, path) for model, path in zip(sheepModelsList, sheepModelPaths) ] sheepModelListOfDiffWolfReward = sheepModelListOfDiffWolfReward + sheepModelsList # Sheep Policy Function reshapeAction = ReshapeAction() actOneStepOneModelSheep = ActOneStep(actByPolicyTrainNoisy) # Sheep Generate Action numAllSheepModels = len(sheepModelListOfDiffWolfReward) # Wolves Part # Intention Prior For inference #createIntentionSpaceGivenSelfId = CreateIntentionSpaceGivenSelfId(possibleSheepIds, possibleWolvesIds) #intentionSpacesForAllWolves = [createAllPossibleIntentionsGivenSelfId(wolfId) # for wolfId in possibleWolvesIds] intentionSpacesForAllWolves = [ tuple(it.product(possibleSheepIds, [tuple(possibleWolvesIds)])) for wolfId in possibleWolvesIds ] print(intentionSpacesForAllWolves) wolvesIntentionPriors = [{ tuple(intention): 1 / len(allPossibleIntentionsOneWolf) for intention in allPossibleIntentionsOneWolf } for allPossibleIntentionsOneWolf in intentionSpacesForAllWolves] # Percept Action For Inference #perceptAction = lambda action: action perceptSelfAction = SampleNoisyAction(deviationFor2DAction) perceptOtherAction = SampleNoisyAction(deviationFor2DAction) perceptAction = PerceptImaginedWeAction(possibleWolvesIds, perceptSelfAction, perceptOtherAction) #perceptAction = lambda action: action # Policy Likelihood function: Wolf Centrol Control NN Policy Given Intention # ------------ model ------------------------ weModelsListBaseOnNumInWe = [] observeListBaseOnNumInWe = [] for numAgentInWe in range(2, numWolves + 1): numBlocksForWe = 5 - numAgentInWe wolvesIDForWolfObserve = list(range(numAgentInWe)) sheepsIDForWolfObserve = list(range(numAgentInWe, 1 + numAgentInWe)) blocksIDForWolfObserve = list( range(1 + numAgentInWe, 1 + numAgentInWe + numBlocksForWe)) observeOneAgentForWolf = lambda agentID: Observe( agentID, wolvesIDForWolfObserve, sheepsIDForWolfObserve, blocksIDForWolfObserve, getPosFromAgentState, getVelFromAgentState) observeWolf = lambda state: [ observeOneAgentForWolf(agentID)(state) for agentID in range(numAgentInWe + 1) ] observeListBaseOnNumInWe.append(observeWolf) obsIDsForWolf = wolvesIDForWolfObserve + sheepsIDForWolfObserve + blocksIDForWolfObserve initObsForWolfParams = observeWolf(reset()[obsIDsForWolf]) obsShapeWolf = [ initObsForWolfParams[obsID].shape[0] for obsID in range(len(initObsForWolfParams)) ] buildWolfModels = BuildMADDPGModels(actionDim, numAgentInWe + 1, obsShapeWolf) layerWidthForWolf = [ 64 * (numAgentInWe - 1), 64 * (numAgentInWe - 1) ] wolfModelsList = [ buildWolfModels(layerWidthForWolf, agentID) for agentID in range(numAgentInWe) ] if wolfType == 'sharedAgencyByIndividualRewardWolf': wolfPrefix = 'maddpgIndividWolf' if wolfType == 'sharedAgencyBySharedRewardWolf': wolfPrefix = 'maddpg' wolfFileName = "{}wolves{}sheep{}blocks{}eps_agent".format( numAgentInWe, 1, numBlocksForWe, maxEpisode) wolfModelPaths = [ os.path.join(dirName, '..', '..', 'data', 'preTrainModel', wolfPrefix + wolfFileName + str(i) + '60000eps') for i in range(numAgentInWe) ] print(numAgentInWe, obsShapeWolf, wolfModelPaths) [ restoreVariables(model, path) for model, path in zip(wolfModelsList, wolfModelPaths) ] weModelsListBaseOnNumInWe.append(wolfModelsList) actionDimReshaped = 2 cov = [deviationFor2DAction**2 for _ in range(actionDimReshaped)] buildGaussian = BuildGaussianFixCov(cov) actOneStepOneModelWolf = ActOneStep(actByPolicyTrainNoNoisy) #actOneStepOneModelWolf = ActOneStep(actByPolicyTrainNoisy) composeCentralControlPolicy = lambda observe: ComposeCentralControlPolicyByGaussianOnDeterministicAction( reshapeAction, observe, actOneStepOneModelWolf, buildGaussian) wolvesCentralControlPolicies = [ composeCentralControlPolicy( observeListBaseOnNumInWe[numAgentsInWe - 2])( weModelsListBaseOnNumInWe[numAgentsInWe - 2], numAgentsInWe) for numAgentsInWe in range(2, numWolves + 1) ] centralControlPolicyListBasedOnNumAgentsInWe = wolvesCentralControlPolicies # 0 for two agents in We, 1 for three agents... softPolicyInInference = lambda distribution: distribution getStateThirdPersonPerspective = lambda state, goalId, weIds: getStateOrActionThirdPersonPerspective( state, goalId, weIds, blocksID) policyForCommittedAgentsInInference = PolicyForCommittedAgent( centralControlPolicyListBasedOnNumAgentsInWe, softPolicyInInference, getStateThirdPersonPerspective) concernedAgentsIds = possibleWolvesIds calCommittedAgentsPolicyLikelihood = CalCommittedAgentsContinuousPolicyLikelihood( concernedAgentsIds, policyForCommittedAgentsInInference, rationalityBetaInInference) randomActionSpace = [(5, 0), (3.5, 3.5), (0, 5), (-3.5, 3.5), (-5, 0), (-3.5, -3.5), (0, -5), (3.5, -3.5), (0, 0)] randomPolicy = RandomPolicy(randomActionSpace) getStateFirstPersonPerspective = lambda state, goalId, weIds, selfId: getStateOrActionFirstPersonPerspective( state, goalId, weIds, selfId, blocksID) policyForUncommittedAgentsInInference = PolicyForUncommittedAgent( possibleWolvesIds, randomPolicy, softPolicyInInference, getStateFirstPersonPerspective) calUncommittedAgentsPolicyLikelihood = CalUncommittedAgentsPolicyLikelihood( possibleWolvesIds, concernedAgentsIds, policyForUncommittedAgentsInInference) # Joint Likelihood calJointLikelihood = lambda intention, state, perceivedAction: calCommittedAgentsPolicyLikelihood(intention, state, perceivedAction) * \ calUncommittedAgentsPolicyLikelihood(intention, state, perceivedAction) # Infer and update Intention variablesForAllWolves = [ [intentionSpace] for intentionSpace in intentionSpacesForAllWolves ] jointHypothesisSpaces = [ pd.MultiIndex.from_product(variables, names=['intention']) for variables in variablesForAllWolves ] concernedHypothesisVariable = ['intention'] priorDecayRate = 1 softPrior = SoftDistribution(priorDecayRate) inferIntentionOneStepList = [ InferOneStep(jointHypothesisSpace, concernedHypothesisVariable, calJointLikelihood, softPrior) for jointHypothesisSpace in jointHypothesisSpaces ] if numSheep == 1: inferIntentionOneStepList = [lambda prior, state, action: prior ] * 3 adjustIntentionPriorGivenValueOfState = lambda state: 1 chooseIntention = sampleFromDistribution updateIntentions = [ UpdateIntention(intentionPrior, valuePriorEndTime, adjustIntentionPriorGivenValueOfState, perceptAction, inferIntentionOneStep, chooseIntention) for intentionPrior, inferIntentionOneStep in zip( wolvesIntentionPriors, inferIntentionOneStepList) ] # reset intention and adjuste intention prior attributes tools for multiple trajectory intentionResetAttributes = [ 'timeStep', 'lastState', 'lastAction', 'intentionPrior', 'formerIntentionPriors' ] intentionResetAttributeValues = [ dict( zip(intentionResetAttributes, [0, None, None, intentionPrior, [intentionPrior]])) for intentionPrior in wolvesIntentionPriors ] resetIntentions = ResetObjects(intentionResetAttributeValues, updateIntentions) returnAttributes = ['formerIntentionPriors'] getIntentionDistributions = GetObjectsValuesOfAttributes( returnAttributes, updateIntentions) attributesToRecord = ['lastAction'] recordActionForUpdateIntention = RecordValuesForObjects( attributesToRecord, updateIntentions) # Wovels Generate Action covForPlanning = [0.03**2 for _ in range(actionDimReshaped)] buildGaussianForPlanning = BuildGaussianFixCov(covForPlanning) composeCentralControlPolicyForPlanning = lambda observe: ComposeCentralControlPolicyByGaussianOnDeterministicAction( reshapeAction, observe, actOneStepOneModelWolf, buildGaussianForPlanning) wolvesCentralControlPoliciesForPlanning = [ composeCentralControlPolicyForPlanning( observeListBaseOnNumInWe[numAgentsInWe - 2])( weModelsListBaseOnNumInWe[numAgentsInWe - 2], numAgentsInWe) for numAgentsInWe in range(2, numWolves + 1) ] centralControlPolicyListBasedOnNumAgentsInWeForPlanning = wolvesCentralControlPoliciesForPlanning # 0 for two agents in We, 1 for three agents... softPolicyInPlanning = lambda distribution: distribution policyForCommittedAgentInPlanning = PolicyForCommittedAgent( centralControlPolicyListBasedOnNumAgentsInWeForPlanning, softPolicyInPlanning, getStateThirdPersonPerspective) policyForUncommittedAgentInPlanning = PolicyForUncommittedAgent( possibleWolvesIds, randomPolicy, softPolicyInPlanning, getStateFirstPersonPerspective) def wolfChooseActionMethod(individualContinuousDistributions): centralControlAction = tuple([ tuple(sampleFromContinuousSpace(distribution)) for distribution in individualContinuousDistributions ]) return centralControlAction getSelfActionThirdPersonPerspective = lambda weIds, selfId: list( weIds).index(selfId) chooseCommittedAction = GetActionFromJointActionDistribution( wolfChooseActionMethod, getSelfActionThirdPersonPerspective) chooseUncommittedAction = sampleFromDistribution wolvesSampleIndividualActionGivenIntentionList = [ SampleIndividualActionGivenIntention( selfId, policyForCommittedAgentInPlanning, policyForUncommittedAgentInPlanning, chooseCommittedAction, chooseUncommittedAction) for selfId in possibleWolvesIds ] # Sample and Save Trajectory trajectoriesWithIntentionDists = [] for trajectoryId in range(self.numTrajectories): sheepModelsForPolicy = [ sheepModelListOfDiffWolfReward[np.random.choice( numAllSheepModels)] for sheepId in possibleSheepIds ] if sheepConcernSelfOnly: composeSheepPolicy = lambda sheepModel: lambda state: { tuple( reshapeAction( actOneStepOneModelSheep(sheepModel, observeSheep(state)))): 1 } sheepChooseActionMethod = sampleFromDistribution sheepSampleActions = [ SampleActionOnFixedIntention( selfId, possibleWolvesIds, composeSheepPolicy(sheepModel), sheepChooseActionMethod, blocksID) for selfId, sheepModel in zip(possibleSheepIds, sheepModelsForPolicy) ] else: composeSheepPolicy = lambda sheepModel: lambda state: tuple( reshapeAction( actOneStepOneModelSheep(sheepModel, observeSheep(state) ))) sheepSampleActions = [ composeSheepPolicy(sheepModel) for sheepModel in sheepModelsForPolicy ] wolvesSampleActions = [ SampleActionOnChangableIntention( updateIntention, wolvesSampleIndividualActionGivenIntention) for updateIntention, wolvesSampleIndividualActionGivenIntention in zip(updateIntentions, wolvesSampleIndividualActionGivenIntentionList) ] allIndividualSampleActions = wolvesSampleActions + sheepSampleActions sampleActionMultiAgent = SampleActionMultiagent( allIndividualSampleActions, recordActionForUpdateIntention) trajectory = sampleTrajectory(sampleActionMultiAgent) intentionDistributions = getIntentionDistributions() trajectoryWithIntentionDists = [ tuple(list(SASRPair) + list(intentionDist)) for SASRPair, intentionDist in zip(trajectory, intentionDistributions) ] trajectoriesWithIntentionDists.append( tuple(trajectoryWithIntentionDists)) resetIntentions() #print(intentionDistributions) trajectoryFixedParameters = {'maxRunningSteps': maxRunningSteps} self.saveTrajectoryByParameters(trajectoriesWithIntentionDists, trajectoryFixedParameters, parameters) print(np.mean([len(tra) for tra in trajectoriesWithIntentionDists])) # visualize if visualizeTraj: wolfColor = np.array([0.85, 0.35, 0.35]) sheepColor = np.array([0.35, 0.85, 0.35]) blockColor = np.array([0.25, 0.25, 0.25]) entitiesColorList = [wolfColor] * numWolves + [ sheepColor ] * numSheep + [blockColor] * numBlocks render = Render(entitiesSizeList, entitiesColorList, numAgents, getPosFromAgentState) trajToRender = np.concatenate(trajectoriesWithIntentionDists) render(trajToRender)