def wolfChooseActionMethod(individualContinuousDistributions): centralControlAction = tuple( [tuple(sampleFromContinuousSpace(distribution)) for distribution in individualContinuousDistributions]) return centralControlAction
def __call__(self, parameters): print(parameters) visualizeTraj = False numWolves = parameters['numWolves'] numSheep = parameters['numSheep'] wolfType = parameters['wolfType'] sheepConcern = parameters['sheepConcern'] ## MDP Env # state is all multi agent state # action is all multi agent action wolvesID = list(range(numWolves)) sheepsID = list(range(numWolves, numWolves + numSheep)) possibleWolvesIds = wolvesID possibleSheepIds = sheepsID numAgents = numWolves + numSheep numBlocks = 5 - numWolves blocksID = list(range(numAgents, numAgents + numBlocks)) numEntities = numAgents + numBlocks sheepSize = 0.05 wolfSize = 0.075 blockSize = 0.2 sheepMaxSpeed = 1.3 * 1 wolfMaxSpeed = 1.0 * 1 blockMaxSpeed = None entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheep + [blockSize] * numBlocks entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [sheepMaxSpeed] * numSheep + [blockMaxSpeed] * numBlocks entitiesMovableList = [True]* numAgents + [False] * numBlocks massList = [1.0] * numEntities reshapeActionInTransit = lambda action: action getCollisionForce = GetCollisionForce() applyActionForce = ApplyActionForce(wolvesID, sheepsID, entitiesMovableList) applyEnvironForce = ApplyEnvironForce(numEntities, entitiesMovableList, entitiesSizeList, getCollisionForce, getPosFromAgentState) integrateState = IntegrateState(numEntities, entitiesMovableList, massList, entityMaxSpeedList, getVelFromAgentState, getPosFromAgentState) transit = TransitMultiAgentChasing(numEntities, reshapeActionInTransit, applyActionForce, applyEnvironForce, integrateState) isCollision = IsCollision(getPosFromAgentState) collisonRewardWolf = 1 punishForOutOfBoundForWolf = lambda stata: 0 rewardWolf = RewardCentralControlPunishBond(wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBoundForWolf, collisonRewardWolf) collisonRewardSheep = -1 punishForOutOfBoundForSheep = PunishForOutOfBound() rewardSheep = RewardCentralControlPunishBond(sheepsID, wolvesID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBoundForSheep, collisonRewardSheep) forwardOneStep = ForwardOneStep(transit, rewardWolf) reset = ResetMultiAgentChasing(numAgents, numBlocks) isTerminal = lambda state: False maxRunningSteps = 101 sampleTrajectory = SampleTrajectory(maxRunningSteps, isTerminal, reset, forwardOneStep) ## MDP Policy worldDim = 2 actionDim = worldDim * 2 + 1 layerWidth = [64 * (numWolves - 1), 64 * (numWolves - 1)] # Sheep Part # ------------ model ------------------------ if sheepConcern == 'selfSheep': sheepConcernSelfOnly = 1 if sheepConcern == 'allSheep': sheepConcernSelfOnly = 0 numSheepToObserveWhenSheepSameOrDiff = [numSheep, 1] numSheepToObserve = numSheepToObserveWhenSheepSameOrDiff[sheepConcernSelfOnly] print(numSheepToObserve) sheepModelListOfDiffWolfReward = [] sheepType = 'mixed' if sheepType == 'mixed': sheepPrefixList = ['maddpgIndividWolf', 'maddpg'] else: sheepPrefixList = [sheepType] for sheepPrefix in sheepPrefixList: wolvesIDForSheepObserve = list(range(numWolves)) sheepsIDForSheepObserve = list(range(numWolves, numSheepToObserve + numWolves)) blocksIDForSheepObserve = list(range(numSheepToObserve + numWolves, numSheepToObserve + numWolves + numBlocks)) observeOneAgentForSheep = lambda agentID: Observe(agentID, wolvesIDForSheepObserve, sheepsIDForSheepObserve, blocksIDForSheepObserve, getPosFromAgentState, getVelFromAgentState) observeSheep = lambda state: [observeOneAgentForSheep(agentID)(state) for agentID in range(numWolves + numSheepToObserve)] obsIDsForSheep = wolvesIDForSheepObserve + sheepsIDForSheepObserve + blocksIDForSheepObserve initObsForSheepParams = observeSheep(reset()[obsIDsForSheep]) obsShapeSheep = [initObsForSheepParams[obsID].shape[0] for obsID in range(len(initObsForSheepParams))] buildSheepModels = BuildMADDPGModels(actionDim, numWolves + numSheepToObserve, obsShapeSheep) sheepModelsList = [buildSheepModels(layerWidth, agentID) for agentID in range(numWolves, numWolves + numSheepToObserve)] dirName = os.path.dirname(__file__) maxEpisode = 60000 print(sheepPrefix) sheepFileName = "{}wolves{}sheep{}blocks{}eps_agent".format(numWolves, numSheepToObserve, numBlocks, maxEpisode) sheepModelPaths = [os.path.join(dirName, '..', '..', 'data', 'preTrainModel', sheepPrefix + sheepFileName + str(i) + '60000eps') for i in range(numWolves, numWolves + numSheepToObserve)] [restoreVariables(model, path) for model, path in zip(sheepModelsList, sheepModelPaths)] sheepModelListOfDiffWolfReward = sheepModelListOfDiffWolfReward + sheepModelsList # Sheep Policy Function reshapeAction = ReshapeAction() actOneStepOneModelSheep = ActOneStep(actByPolicyTrainNoisy) # Sheep Generate Action numAllSheepModels = len(sheepModelListOfDiffWolfReward) # Wolves Part # ------------ model ------------------------ wolvesIDForWolfObserve = list(range(numWolves)) sheepsIDForWolfObserve = list(range(numWolves, numSheep + numWolves)) blocksIDForWolfObserve = list(range(numSheep + numWolves, numSheep + numWolves + numBlocks)) observeOneAgentForWolf = lambda agentID: Observe(agentID, wolvesIDForWolfObserve, sheepsIDForWolfObserve, blocksIDForWolfObserve, getPosFromAgentState, getVelFromAgentState) observeWolf = lambda state: [observeOneAgentForWolf(agentID)(state) for agentID in range(numWolves + numSheep)] obsIDsForWolf = wolvesIDForWolfObserve + sheepsIDForWolfObserve + blocksIDForWolfObserve initObsForWolfParams = observeWolf(reset()[obsIDsForWolf]) obsShapeWolf = [initObsForWolfParams[obsID].shape[0] for obsID in range(len(initObsForWolfParams))] buildWolfModels = BuildMADDPGModels(actionDim, numWolves + numSheep, obsShapeWolf) layerWidthForWolf = [64 * (numWolves - 1), 64 * (numWolves - 1)] wolfModelsList = [buildWolfModels(layerWidthForWolf, agentID) for agentID in range(numWolves)] if wolfType == 'sharedReward': prefix = 'maddpg' if wolfType == 'individualReward': prefix = 'maddpgIndividWolf' wolfFileName = "{}wolves{}sheep{}blocks{}eps_agent".format(numWolves, numSheep, numBlocks, maxEpisode) wolfModelPaths = [os.path.join(dirName, '..', '..', 'data', 'preTrainModel', prefix + wolfFileName + str(i) + '60000eps') for i in range(numWolves)] print(numWolves, obsShapeWolf, wolfModelPaths) [restoreVariables(model, path) for model, path in zip(wolfModelsList, wolfModelPaths)] actionDimReshaped = 2 cov = [0.03 ** 2 for _ in range(actionDimReshaped)] buildGaussian = BuildGaussianFixCov(cov) actOneStepOneModelWolf = ActOneStep(actByPolicyTrainNoNoisy) composeWolfPolicy = lambda wolfModel: lambda state: sampleFromContinuousSpace(buildGaussian( tuple(reshapeAction(actOneStepOneModelWolf(wolfModel, observeWolf(state)))))) #actOneStepOneModelWolf = ActOneStep(actByPolicyTrainNoisy) #composeWolfPolicy = lambda wolfModel: lambda state: tuple(reshapeAction(actOneStepOneModelSheep(wolfModel, observeWolf(state)))) wolvesSampleActions = [composeWolfPolicy(wolfModel) for wolfModel in wolfModelsList] trajectories = [] for trajectoryId in range(self.numTrajectories): sheepModelsForPolicy = [sheepModelListOfDiffWolfReward[np.random.choice(numAllSheepModels)] for sheepId in possibleSheepIds] if sheepConcernSelfOnly: composeSheepPolicy = lambda sheepModel : lambda state: {tuple(reshapeAction(actOneStepOneModelSheep(sheepModel, observeSheep(state)))): 1} sheepChooseActionMethod = sampleFromDistribution sheepSampleActions = [SampleActionOnFixedIntention(selfId, possibleWolvesIds, composeSheepPolicy(sheepModel), sheepChooseActionMethod, blocksID) for selfId, sheepModel in zip(possibleSheepIds, sheepModelsForPolicy)] else: composeSheepPolicy = lambda sheepModel: lambda state: tuple(reshapeAction(actOneStepOneModelSheep(sheepModel, observeSheep(state)))) sheepSampleActions = [composeSheepPolicy(sheepModel) for sheepModel in sheepModelsForPolicy] allIndividualSampleActions = wolvesSampleActions + sheepSampleActions sampleAction = lambda state: [sampleIndividualAction(state) for sampleIndividualAction in allIndividualSampleActions] trajectory = sampleTrajectory(sampleAction) trajectories.append(trajectory) trajectoryFixedParameters = {'maxRunningSteps': maxRunningSteps} self.saveTrajectoryByParameters(trajectories, trajectoryFixedParameters, parameters) print(np.mean([len(tra) for tra in trajectories])) # visualize if visualizeTraj: wolfColor = np.array([0.85, 0.35, 0.35]) sheepColor = np.array([0.35, 0.85, 0.35]) blockColor = np.array([0.25, 0.25, 0.25]) entitiesColorList = [wolfColor] * numWolves + [sheepColor] * numSheep + [blockColor] * numBlocks render = Render(entitiesSizeList, entitiesColorList, numAgents, getPosFromAgentState) trajToRender = np.concatenate(trajectories) render(trajToRender)
def __call__(self, parameters): print(parameters) numWolves = parameters['numWolves'] numSheep = parameters['numSheep'] numBlocks = 2 wolfSelfish = 1.0 if parameters[ 'wolfType'] == 'individualReward' else 0.0 perturbedWolfID = parameters['perturbedWolfID'] perturbedWolfGoalID = parameters['perturbedWolfGoalID'] ## MDP Env numAgents = numWolves + numSheep numEntities = numAgents + numBlocks wolvesID = list(range(numWolves)) sheepsID = list(range(numWolves, numWolves + numSheep)) blocksID = list(range(numAgents, numEntities)) sheepSize = 0.05 wolfSize = 0.075 blockSize = 0.2 entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheep + [ blockSize ] * numBlocks costActionRatio = 0.0 sheepSpeedMultiplier = 1.0 sheepMaxSpeed = 1.3 * sheepSpeedMultiplier wolfMaxSpeed = 1.0 blockMaxSpeed = None entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [ sheepMaxSpeed ] * numSheep + [blockMaxSpeed] * numBlocks entitiesMovableList = [True] * numAgents + [False] * numBlocks massList = [1.0] * numEntities collisionReward = 1 # for evaluation, count # of bites isCollision = IsCollision(getPosFromAgentState) rewardAllWolves = RewardWolf(wolvesID, sheepsID, entitiesSizeList, isCollision, collisionReward, wolfSelfish) rewardWolf = lambda state, action, nextState: np.sum( rewardAllWolves(state, action, nextState)) reshapeActionInTransit = lambda action: action getCollisionForce = GetCollisionForce() applyActionForce = ApplyActionForce(wolvesID, sheepsID, entitiesMovableList) applyEnvironForce = ApplyEnvironForce(numEntities, entitiesMovableList, entitiesSizeList, getCollisionForce, getPosFromAgentState) integrateState = IntegrateState(numEntities, entitiesMovableList, massList, entityMaxSpeedList, getVelFromAgentState, getPosFromAgentState) transit = TransitMultiAgentChasing(numEntities, reshapeActionInTransit, applyActionForce, applyEnvironForce, integrateState) forwardOneStep = ForwardOneStep(transit, rewardWolf) reset = ResetMultiAgentChasingWithSeed(numAgents, numBlocks) isTerminal = lambda state: False maxRunningStepsToSample = 101 sampleTrajectory = SampleTrajectory(maxRunningStepsToSample, isTerminal, reset, forwardOneStep) ## MDP Policy worldDim = 2 actionDim = worldDim * 2 + 1 layerWidth = [128, 128] maxTimeStep = 75 maxEpisode = 60000 dirName = os.path.dirname(__file__) # ------------ sheep recover variables ------------------------ numSheepToObserve = 1 sheepModelListOfDiffWolfReward = [] sheepTypeList = [0.0, 1.0] for sheepType in sheepTypeList: wolvesIDForSheepObserve = list(range(numWolves)) sheepsIDForSheepObserve = list( range(numWolves, numSheepToObserve + numWolves)) blocksIDForSheepObserve = list( range(numSheepToObserve + numWolves, numSheepToObserve + numWolves + numBlocks)) observeOneAgentForSheep = lambda agentID: Observe( agentID, wolvesIDForSheepObserve, sheepsIDForSheepObserve, blocksIDForSheepObserve, getPosFromAgentState, getVelFromAgentState) observeSheep = lambda state: [ observeOneAgentForSheep(agentID)(state) for agentID in range(numWolves + numSheepToObserve) ] obsIDsForSheep = wolvesIDForSheepObserve + sheepsIDForSheepObserve + blocksIDForSheepObserve initObsForSheepParams = observeSheep(reset()[obsIDsForSheep]) obsShapeSheep = [ initObsForSheepParams[obsID].shape[0] for obsID in range(len(initObsForSheepParams)) ] buildSheepModels = BuildMADDPGModels(actionDim, numWolves + numSheepToObserve, obsShapeSheep) sheepModelsList = [ buildSheepModels(layerWidth, agentID) for agentID in range(numWolves, numWolves + numSheepToObserve) ] sheepFileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}WolfActCost{}individ{}_agent".format( numWolves, numSheepToObserve, numBlocks, maxEpisode, maxTimeStep, sheepSpeedMultiplier, costActionRatio, sheepType) sheepModelPaths = [ os.path.join(dirName, '..', '..', 'data', 'preTrainModel', sheepFileName + str(i)) for i in range(numWolves, numWolves + numSheepToObserve) ] [ restoreVariables(model, path) for model, path in zip(sheepModelsList, sheepModelPaths) ] sheepModelListOfDiffWolfReward = sheepModelListOfDiffWolfReward + sheepModelsList # # actOneStep = ActOneStep(actByPolicyTrainNoisy) #TODO actOneStep = ActOneStep(actByPolicyTrainNoNoisy) numAllSheepModels = len(sheepModelListOfDiffWolfReward) # ------------ wolves recover variables ------------------------ # ------------ Recover one perturbed wolf for comparison ------- numSheepForPerturbedWolf = 1 wolvesIDForPerturbedWolf = wolvesID sheepsIDForPerturbedWolf = [sheepsID[perturbedWolfGoalID]] blocksIDForPerturbedWolf = list( range(numWolves + numSheep, numEntities)) # skip the unattended sheep id observeOneAgentForPerturbedWolf = lambda agentID: Observe( agentID, wolvesIDForPerturbedWolf, sheepsIDForPerturbedWolf, blocksIDForPerturbedWolf, getPosFromAgentState, getVelFromAgentState) observePerturbedWolf = lambda state: [ observeOneAgentForPerturbedWolf(agentID)(state) for agentID in wolvesIDForPerturbedWolf + sheepsIDForPerturbedWolf ] initObsForPerturbedWolfParams = observePerturbedWolf(reset()) obsShapePerturbedWolf = [ initObsForPerturbedWolfParams[obsID].shape[0] for obsID in range(len(initObsForPerturbedWolfParams)) ] buildPerturbedWolfModels = BuildMADDPGModels( actionDim, numWolves + numSheepForPerturbedWolf, obsShapePerturbedWolf) layerWidthForWolf = [128, 128] perturbedWolfModel = buildPerturbedWolfModels(layerWidthForWolf, perturbedWolfID) perturbedWolfFileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}WolfActCost{}individ{}_agent".format( numWolves, numSheepForPerturbedWolf, numBlocks, maxEpisode, maxTimeStep, sheepSpeedMultiplier, costActionRatio, wolfSelfish) perturbedWolfModelPath = os.path.join( dirName, '..', '..', 'data', 'preTrainModel', perturbedWolfFileName + str(perturbedWolfID)) restoreVariables(perturbedWolfModel, perturbedWolfModelPath) # ------------ Recover other wolves trained with multiple goals ------- wolvesIDForWolfObserve = wolvesID sheepsIDForWolfObserve = sheepsID blocksIDForWolfObserve = blocksID observeOneAgentForWolf = lambda agentID: Observe( agentID, wolvesIDForWolfObserve, sheepsIDForWolfObserve, blocksIDForWolfObserve, getPosFromAgentState, getVelFromAgentState) observeWolf = lambda state: [ observeOneAgentForWolf(agentID)(state) for agentID in range(numWolves + numSheep) ] obsIDsForWolf = wolvesIDForWolfObserve + sheepsIDForWolfObserve + blocksIDForWolfObserve initObsForWolfParams = observeWolf(reset()[obsIDsForWolf]) obsShapeWolf = [ initObsForWolfParams[obsID].shape[0] for obsID in range(len(initObsForWolfParams)) ] buildWolfModels = BuildMADDPGModels(actionDim, numWolves + numSheep, obsShapeWolf) layerWidthForWolf = [128, 128] wolfModelsList = [ buildWolfModels(layerWidthForWolf, agentID) for agentID in range(numWolves) ] wolfFileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}WolfActCost{}individ{}_agent".format( numWolves, numSheep, numBlocks, maxEpisode, maxTimeStep, sheepSpeedMultiplier, costActionRatio, wolfSelfish) wolfModelPaths = [ os.path.join(dirName, '..', '..', 'data', 'preTrainModel', wolfFileName + str(i)) for i in range(numWolves) ] [ restoreVariables(model, path) for model, path in zip(wolfModelsList, wolfModelPaths) ] # ------------ compose policy --------------------- actionDimReshaped = 2 cov = [0.00000000001**2 for _ in range(actionDimReshaped)] buildGaussian = BuildGaussianFixCov(cov) reshapeAction = ReshapeAction() # unperturbed policy composeWolfPolicy = lambda wolfModel: lambda state: sampleFromContinuousSpace( buildGaussian( tuple(reshapeAction(actOneStep(wolfModel, observeWolf(state)))) )) wolvesSampleActions = [ composeWolfPolicy(wolfModel) for wolfModel in wolfModelsList ] # perturbed policy composePerturbedWolfPolicy = lambda perturbedModel: lambda state: sampleFromContinuousSpace( buildGaussian( tuple( reshapeAction( actOneStep(perturbedModel, observePerturbedWolf(state)) )))) wolvesSampleActionsPerturbed = wolvesSampleActions.copy() wolvesSampleActionsPerturbed[ perturbedWolfID] = composePerturbedWolfPolicy(perturbedWolfModel) trajectories = [] for trajectoryId in range(self.numTrajectories): sheepModelsForPolicy = [ sheepModelListOfDiffWolfReward[np.random.choice( numAllSheepModels)] for sheepId in sheepsID ] composeSheepPolicy = lambda sheepModel: lambda state: { tuple( reshapeAction(actOneStep(sheepModel, observeSheep(state)))): 1 } sheepChooseActionMethod = sampleFromDistribution sheepSampleActions = [ SampleActionOnFixedIntention(selfId, wolvesID, composeSheepPolicy(sheepModel), sheepChooseActionMethod, blocksID) for selfId, sheepModel in zip(sheepsID, sheepModelsForPolicy) ] allIndividualSampleActionsPerturbed = wolvesSampleActionsPerturbed + sheepSampleActions sampleActionPerturbed = lambda state: [ sampleIndividualAction(state) for sampleIndividualAction in allIndividualSampleActionsPerturbed ] trajectory = sampleTrajectory(sampleActionPerturbed) trajectories.append(trajectory) trajectoryFixedParameters = { 'maxRunningStepsToSample': maxRunningStepsToSample } self.saveTrajectoryByParameters(trajectories, trajectoryFixedParameters, parameters)