def main(): actionSpace = [[10, 0], [7, 7], [0, 10], [-7, 7], [-10, 0], [-7, -7], [0, -10], [7, -7]] numActionSpace = len(actionSpace) numStateSpace = 4 initSheepPosition = np.array([180, 180]) initWolfPosition = np.array([180, 180]) initSheepVelocity = np.array([0, 0]) initWolfVelocity = np.array([0, 0]) initSheepPositionNoise = np.array([90, 150]) initWolfPositionNoise = np.array([0, 60]) sheepPositionReset = ag.SheepPositionReset(initSheepPosition, initSheepPositionNoise) wolfPositionReset = ag.WolfPositionReset(initWolfPosition, initWolfPositionNoise) numOneAgentState = 2 positionIndex = [0, 1] xBoundary = [0, 360] yBoundary = [0, 360] checkBoundaryAndAdjust = ag.CheckBoundaryAndAdjust(xBoundary, yBoundary) sheepPositionTransition = ag.SheepPositionTransition( numOneAgentState, positionIndex, checkBoundaryAndAdjust) wolfPositionTransition = ag.WolfPositionTransition(numOneAgentState, positionIndex, checkBoundaryAndAdjust) numAgent = 2 sheepId = 0 wolfId = 1 transitionFunction = env.TransitionFunction(sheepId, wolfId, sheepPositionReset, wolfPositionReset, sheepPositionTransition, wolfPositionTransition) minDistance = 15 isTerminal = env.IsTerminal(sheepId, wolfId, numOneAgentState, positionIndex, minDistance) screen = pg.display.set_mode([xBoundary[1], yBoundary[1]]) screenColor = [255, 255, 255] circleColorList = [[50, 255, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50]] circleSize = 8 saveImage = False saveImageFile = 'image' render = env.Render(numAgent, numOneAgentState, positionIndex, screen, screenColor, circleColorList, circleSize, saveImage, saveImageFile) aliveBouns = -1 deathPenalty = 20 rewardDecay = 0.99 rewardFunction = reward.RewardFunctionTerminalPenalty( sheepId, wolfId, numOneAgentState, positionIndex, aliveBouns, deathPenalty, isTerminal) accumulateReward = AccumulateReward(rewardDecay, rewardFunction)
def __call__(self, condition): getSavePath = self.getTrajectorySavePathByCondition(condition) getCSVSavePath = self.getCSVSavePathByCondition(condition) attentionType = condition['attentionType'] alpha = condition['alphaForStateWidening'] C = condition['CForStateWidening'] minAttentionDistance = condition['minAttentionDistance'] rangeAttention = condition['rangeAttention'] numTree = condition['numTrees'] numSimulations = condition['numSimulationTimes'] actionRatio = condition['actionRatio'] cBase = condition['cBase'] burnTime = condition['burnTime'] numSub = 2 allResultsIdentity = [] allResultsAttention = [] allResultsIdentitySampled = [] possibleTrialSubtleties = [11.0, 3.3]#[500.0, 11.0, 3.3, 1.83, 0.92, 0.31, 0.001] for subIndex in range(numSub): meanIdentityPerceptionOnConditions = {} meanAttentionPerceptionOnConditions = {} meanIdentitySampledOnConditions = {} for chasingSubtlety in possibleTrialSubtleties: print(numTree, chasingSubtlety, numSimulations, attentionType) numAgent = 25 sheepId = 0 suspectorIds = list(range(1, numAgent)) resetWolfIdAndSubtlety = ag.ResetWolfIdAndSubtlety(suspectorIds, [chasingSubtlety]) distanceToVisualDegreeRatio = 20 minInitSheepWolfDistance = 9 * distanceToVisualDegreeRatio minInitSheepDistractorDistance = 2.5 * distanceToVisualDegreeRatio # no distractor in killzone when init isLegalInitPositions = ag.IsLegalInitPositions(sheepId, minInitSheepWolfDistance, minInitSheepDistractorDistance) xBoundary = [0, 640] yBoundary = [0, 480] resetAgentPositions = ag.ResetAgentPositions(xBoundary, yBoundary, numAgent, isLegalInitPositions) resetPhysicalState = ag.ResetPhysicalState(sheepId, numAgent, resetAgentPositions, resetWolfIdAndSubtlety) numFramePerSecond = 40 numMDPTimeStepPerSecond = 5 numFrameWithoutActionChange = int(numFramePerSecond/numMDPTimeStepPerSecond) sheepActionUpdateFrequency = 1 minSheepSpeed = int(17.4 * distanceToVisualDegreeRatio/numFramePerSecond) maxSheepSpeed = int(23.2 * distanceToVisualDegreeRatio/numFramePerSecond) warmUpTimeSteps = int(10 * numMDPTimeStepPerSecond) sheepPolicy = ag.SheepPolicy(sheepActionUpdateFrequency, minSheepSpeed, maxSheepSpeed, warmUpTimeSteps, burnTime) wolfActionUpdateFrequency = int(0.2 * numMDPTimeStepPerSecond) minWolfSpeed = int(8.7 * distanceToVisualDegreeRatio/numFramePerSecond) maxWolfSpeed = int(14.5 * distanceToVisualDegreeRatio/numFramePerSecond) wolfPolicy = ag.WolfPolicy(wolfActionUpdateFrequency, minWolfSpeed, maxWolfSpeed, warmUpTimeSteps) distractorActionUpdateFrequency = int(0.2 * numMDPTimeStepPerSecond) minDistractorSpeed = int(8.7 * distanceToVisualDegreeRatio/numFramePerSecond) maxDistractorSpeed = int(14.5 * distanceToVisualDegreeRatio/numFramePerSecond) distractorPolicy = ag.DistractorPolicy(distractorActionUpdateFrequency, minDistractorSpeed, maxDistractorSpeed, warmUpTimeSteps) preparePolicy = ag.PreparePolicy(sheepId, numAgent, sheepPolicy, wolfPolicy, distractorPolicy) updatePhysicalState = ag.UpdatePhysicalState(sheepId, numAgent, preparePolicy) xBoundary = [0, 640] yBoundary = [0, 480] checkBoundaryAndAdjust = ag.CheckBoundaryAndAdjust(xBoundary, yBoundary) transiteMultiAgentMotion = ag.TransiteMultiAgentMotion(checkBoundaryAndAdjust) minDistance = 0.0 * distanceToVisualDegreeRatio isTerminal = env.IsTerminal(sheepId, minDistance) # screen = pg.display.set_mode([xBoundary[1], yBoundary[1]]) # screenColor = np.array([0, 0, 0]) # sheepColor = np.array([0, 255, 0]) # wolfColor = np.array([255, 0, 0]) # circleSize = 10 # saveImage = True # saveImageFile = 'image3' # render = env.Render(numAgent, screen, xBoundary[1], yBoundary[1], screenColor, sheepColor, wolfColor, circleSize, saveImage, saveImageFile, isTerminal) render = None renderOnInSimulation = False transiteStateWithoutActionChangeInSimulation = env.TransiteStateWithoutActionChange(numFrameWithoutActionChange, isTerminal, transiteMultiAgentMotion, render, renderOnInSimulation) renderOnInPlay = False transiteStateWithoutActionChangeInPlay = env.TransiteStateWithoutActionChange(numFrameWithoutActionChange, isTerminal, transiteMultiAgentMotion, render, renderOnInPlay) if attentionType == 'idealObserver': attentionLimitation= 4 precisionPerSlot=500.0 precisionForUntracked=500.0 memoryratePerSlot=1.0 memoryrateForUntracked=1.0 if attentionType == 'preAttention': attentionLimitation= 4 precisionPerSlot=2.5 precisionForUntracked=2.5 memoryratePerSlot=0.45 memoryrateForUntracked=0.45 if attentionType == 'attention3': attentionLimitation= 3 precisionPerSlot=8.0 precisionForUntracked=0.01 memoryratePerSlot=0.7 memoryrateForUntracked=0.01 if attentionType == 'hybrid3': attentionLimitation= 3 precisionPerSlot=8.0 precisionForUntracked=2.5 memoryratePerSlot=0.7 memoryrateForUntracked=0.45 if attentionType == 'attention4': attentionLimitation= 4 precisionPerSlot=8.0 precisionForUntracked=0.01 memoryratePerSlot=0.7 memoryrateForUntracked=0.01 if attentionType == 'hybrid4': attentionLimitation= 4 precisionPerSlot=8.0 precisionForUntracked=2.5 memoryratePerSlot=0.7 memoryrateForUntracked=0.45 if attentionType == 'preAttentionMem0.25': attentionLimitation= 4 precisionPerSlot=2.5 precisionForUntracked=2.5 memoryratePerSlot=0.25 memoryrateForUntracked=0.25 if attentionType == 'preAttentionMem0.65': attentionLimitation= 4 precisionPerSlot=2.5 precisionForUntracked=2.5 memoryratePerSlot=0.65 memoryrateForUntracked=0.65 if attentionType == 'preAttentionPre0.5': attentionLimitation= 4 precisionPerSlot=0.5 precisionForUntracked=0.5 memoryratePerSlot=0.45 memoryrateForUntracked=0.45 if attentionType == 'preAttentionPre4.5': attentionLimitation= 4 precisionPerSlot=4.5 precisionForUntracked=4.5 memoryratePerSlot=0.45 memoryrateForUntracked=0.45 attention = Attention.AttentionToPrecisionAndDecay(precisionPerSlot, precisionForUntracked, memoryratePerSlot, memoryrateForUntracked) transferMultiAgentStatesToPositionDF = ba.TransferMultiAgentStatesToPositionDF(numAgent) possibleSubtleties = [500.0, 11.0, 3.3, 1.83, 0.92, 0.31, 0.001] resetBeliefAndAttention = ba.ResetBeliefAndAttention(sheepId, suspectorIds, possibleSubtleties, attentionLimitation, transferMultiAgentStatesToPositionDF, attention) maxAttentionDistance = minAttentionDistance + rangeAttention attentionMinDistance = minAttentionDistance * distanceToVisualDegreeRatio attentionMaxDistance = maxAttentionDistance * distanceToVisualDegreeRatio numStandardErrorInDistanceRange = 4 calDistancePriorOnAttentionSlot = Attention.CalDistancePriorOnAttentionSlot(attentionMinDistance, attentionMaxDistance, numStandardErrorInDistanceRange) attentionSwitch = Attention.AttentionSwitch(attentionLimitation, calDistancePriorOnAttentionSlot) computePosterior = calPosterior.CalPosteriorLog(minDistance) print(attentionLimitation, attentionMinDistance/distanceToVisualDegreeRatio, attentionMaxDistance/distanceToVisualDegreeRatio) attentionSwitchFrequencyInSimulation = np.inf beliefUpdateFrequencyInSimulation = np.inf updateBeliefAndAttentionInSimulation = ba.UpdateBeliefAndAttentionState(attention, computePosterior, attentionSwitch, transferMultiAgentStatesToPositionDF, attentionSwitchFrequencyInSimulation, beliefUpdateFrequencyInSimulation, burnTime) attentionSwitchFrequencyInPlay = int(0.6 * numMDPTimeStepPerSecond) beliefUpdateFrequencyInPlay = int(0.2 * numMDPTimeStepPerSecond) updateBeliefAndAttentionInPlay = ba.UpdateBeliefAndAttentionState(attention, computePosterior, attentionSwitch, transferMultiAgentStatesToPositionDF, attentionSwitchFrequencyInPlay, beliefUpdateFrequencyInPlay, burnTime) updatePhysicalStateByBeliefFrequencyInSimulationRoot = int(0.2 * numMDPTimeStepPerSecond) updatePhysicalStateByBeliefInSimulationRoot = ba.UpdatePhysicalStateImagedByBelief(updatePhysicalStateByBeliefFrequencyInSimulationRoot) updatePhysicalStateByBeliefFrequencyInSimulation = np.inf updatePhysicalStateByBeliefInSimulation = ba.UpdatePhysicalStateImagedByBelief(updatePhysicalStateByBeliefFrequencyInSimulation) updatePhysicalStateByBeliefFrequencyInPlay = np.inf updatePhysicalStateByBeliefInPlay = ba.UpdatePhysicalStateImagedByBelief(updatePhysicalStateByBeliefFrequencyInPlay) transitionFunctionInSimulation = env.TransitionFunction(resetPhysicalState, resetBeliefAndAttention, updatePhysicalState, transiteStateWithoutActionChangeInSimulation, updateBeliefAndAttentionInSimulation, updatePhysicalStateByBeliefInSimulation) transitionFunctionInPlay = env.TransitionFunction(resetPhysicalState, resetBeliefAndAttention, updatePhysicalState, transiteStateWithoutActionChangeInPlay, updateBeliefAndAttentionInPlay, updatePhysicalStateByBeliefInPlay) maxRollOutSteps = 5 aliveBouns = 1/maxRollOutSteps deathPenalty = -1 rewardFunction = reward.RewardFunctionTerminalPenalty(sheepId, aliveBouns, deathPenalty, isTerminal) rewardRollout = lambda state, action, nextState: rewardFunction(state, action) numActionSpace = 8 actionInterval = int(360/(numActionSpace)) actionMagnitude = actionRatio * minSheepSpeed actionSpace = [(np.cos(degreeInPolar) * actionMagnitude, np.sin(degreeInPolar) * actionMagnitude) for degreeInPolar in np.arange(0, 360, actionInterval)/180 * math.pi] getActionPrior = lambda state : {action: 1/len(actionSpace) for action in actionSpace} cInit = 1 #cBase = 50 scoreChild = ScoreChild(cInit, cBase) selectAction = SelectAction(scoreChild) selectNextState = SelectNextState(selectAction) initializeChildren = InitializeChildren(actionSpace, transitionFunctionInSimulation, getActionPrior) expand = Expand(isTerminal, initializeChildren) pWidening = PWidening(alpha, C) expandNewState = ExpandNextState(transitionFunctionInSimulation, pWidening) rolloutPolicy = lambda state: actionSpace[np.random.choice(range(numActionSpace))] rolloutHeuristic = lambda state: 0 estimateValue = RollOut(rolloutPolicy, maxRollOutSteps, transitionFunctionInSimulation, rewardRollout, isTerminal, rolloutHeuristic) numActionPlaned = 1 outputAction = OutputAction(numActionPlaned, actionSpace) #numSimulations = int(numTotalSimulationTimes/numTree) #sheepColorInMcts = np.array([0, 255, 0]) #wolfColorInMcts = np.array([255, 0, 0]) #distractorColorInMcts = np.array([255, 255, 255]) #saveImageMCTS = True #mctsRender = env.MctsRender(numAgent, screen, xBoundary[1], yBoundary[1], screenColor, sheepColorInMcts, wolfColorInMcts, distractorColorInMcts, circleSize, saveImageMCTS, saveImageFile) #mctsRenderOn = False #mctsRender = None #pg.init() #mcts = MCTS(numSimulations, selectChild, expand, rollout, backup, selectAction, mctsRender, mctsRenderOn) pwMultipleTrees = PWMultipleTrees(numSimulations, selectAction, selectNextState, expand, expandNewState, estimateValue, backup, outputAction) maxRunningSteps = int(25 * numMDPTimeStepPerSecond) makeDiffSimulationRoot = MakeDiffSimulationRoot(isTerminal, updatePhysicalStateByBeliefInSimulationRoot) runMCTSTrjactory = RunMCTSTrjactory(maxRunningSteps, numTree, numActionPlaned, sheepActionUpdateFrequency, transitionFunctionInPlay, isTerminal, makeDiffSimulationRoot, render) rootAction = actionSpace[np.random.choice(range(numActionSpace))] numTrial = 10 trajectories = [runMCTSTrjactory(pwMultipleTrees) for trial in range(numTrial)] savePath = getSavePath({'chasingSubtlety': chasingSubtlety, 'subIndex': subIndex}) #tsl.saveToPickle(trajectories, savePath) #def getTrueWolfIndentityAcc(trajectory): # AccTrial = [] # for timeStepIndex in range(len(trajectory)): # timeStep = trajectory[timeStepIndex] # wolfId = trajectory[0][0][0][3][0] # wolfIdInEach = timeStep[0][0][3][0] # #print(wolfId, '**', wolfIdInEach) # if (timeStepIndex % 3 == 0) and timeStepIndex >= 11: # AccTrial.append(timeStep[0][1][int(wolfIdInEach) - 1]) # meanIdentityAcc = np.mean(AccTrial) # #meanIdentityAcc = np.mean(np.array([timeStep[0][1][int(timeStep[0][0][3][0] - 1)] for timeStep in trajectory])[11:]) # return meanIdentityAcc getTrueWolfIndentityAcc = lambda trajectory: np.array([timeStep[0][1][int(timeStep[0][0][3][0] - 1)] for timeStep in trajectory])[:] identityPerceptionTimeSeries = np.mean([getTrueWolfIndentityAcc(trajectory) for trajectory in trajectories], axis = 0) resultsTimeSeries = pd.DataFrame([identityPerceptionTimeSeries], columns = list(range(len(identityPerceptionTimeSeries)))) savePathIdentitySeries = getCSVSavePath({'chasingSubtlety': chasingSubtlety, 'measure': 'identity'}) if subIndex == 0: resultsTimeSeries.to_csv(savePathIdentitySeries, mode='a') else: resultsTimeSeries.to_csv(savePathIdentitySeries, mode='a', header=False) meanIdentityPerception = np.mean([getTrueWolfIndentityAcc(trajectory) for trajectory in trajectories]) meanIdentityPerceptionOnConditions.update({chasingSubtlety: meanIdentityPerception}) print(meanIdentityPerceptionOnConditions) getTrueWolfAttentionNumber = lambda trajectory: np.array([timeStep[0][2][int(timeStep[0][0][3][0] - 1)] for timeStep in trajectory])[:] attentionNumberTimeSeries = np.mean([getTrueWolfAttentionNumber(trajectory) for trajectory in trajectories], axis = 0) resultsAttentionTimeSeries = pd.DataFrame([attentionNumberTimeSeries], columns = list(range(len(attentionNumberTimeSeries)))) savePathAttentionSeries = getCSVSavePath({'chasingSubtlety': chasingSubtlety, 'measure': 'attentionNumber'}) if subIndex == 0: resultsAttentionTimeSeries.to_csv(savePathAttentionSeries, mode='a') else: resultsAttentionTimeSeries.to_csv(savePathAttentionSeries, mode='a', header=False) meanAttentionPerception = np.mean([getTrueWolfAttentionNumber(trajectory) for trajectory in trajectories]) meanAttentionPerceptionOnConditions.update({chasingSubtlety: meanAttentionPerception}) getSampledWolfIndentityAcc = lambda trajectory: np.array([int(int(timeStep[0][3][0][0]) == int(timeStep[0][0][3][0])) for timeStep in trajectory])[:] identitySampledTimeSeries = np.mean([getSampledWolfIndentityAcc(trajectory) for trajectory in trajectories], axis = 0) resultsSampledTimeSeries = pd.DataFrame([identitySampledTimeSeries], columns = list(range(len(identitySampledTimeSeries)))) savePathIdentitySampledSeries = getCSVSavePath({'chasingSubtlety': chasingSubtlety, 'measure': 'identitySampled'}) if subIndex == 0: resultsSampledTimeSeries.to_csv(savePathIdentitySampledSeries, mode='a') else: resultsSampledTimeSeries.to_csv(savePathIdentitySampledSeries, mode='a', header=False) meanIdentitySampled = np.mean([getSampledWolfIndentityAcc(trajectory) for trajectory in trajectories]) meanIdentitySampledOnConditions.update({chasingSubtlety: meanIdentitySampled}) allResultsIdentity.append(meanIdentityPerceptionOnConditions) resultsIdentity = pd.DataFrame(allResultsIdentity) csvSavePathIdentity = getCSVSavePath({'measure': 'identity'}) resultsIdentity.to_csv(csvSavePathIdentity) allResultsAttention.append(meanAttentionPerceptionOnConditions) resultsAttention = pd.DataFrame(allResultsAttention) csvSavePathAttention = getCSVSavePath({'measure': 'attentionNumber'}) resultsAttention.to_csv(csvSavePathAttention) allResultsIdentitySampled.append(meanIdentitySampledOnConditions) resultsIdentitySampled = pd.DataFrame(allResultsIdentitySampled) csvSavePathIdentitySampled = getCSVSavePath({'measure': 'identitySampled'}) resultsIdentitySampled.to_csv(csvSavePathIdentitySampled)
def main(): #tf.set_random_seed(123) #np.random.seed(123) actionSpace = [[10, 0], [7, 7], [0, 10], [-7, 7], [-10, 0], [-7, -7], [0, -10], [7, -7]] numActionSpace = len(actionSpace) numStateSpace = 4 numActorFC1Unit = 50 numActorFC2Unit = 50 numActorFC3Unit = 50 numActorFC4Unit = 50 numCriticFC1Unit = 100 numCriticFC2Unit = 100 numCriticFC3Unit = 100 numCriticFC4Unit = 100 learningRateActor = 1e-4 learningRateCritic = 3e-4 actorGraph = tf.Graph() with actorGraph.as_default(): with tf.name_scope("inputs"): state_ = tf.placeholder(tf.float32, [None, numStateSpace], name="state_") actionLabel_ = tf.placeholder(tf.int32, [None, numActionSpace], name="actionLabel_") advantages_ = tf.placeholder(tf.float32, [ None, ], name="advantages_") with tf.name_scope("hidden"): initWeight = tf.random_uniform_initializer(-0.03, 0.03) initBias = tf.constant_initializer(0.01) fullyConnected1_ = tf.layers.dense(inputs=state_, units=numActorFC1Unit, activation=tf.nn.relu, kernel_initializer=initWeight, bias_initializer=initBias) fullyConnected2_ = tf.layers.dense(inputs=fullyConnected1_, units=numActorFC2Unit, activation=tf.nn.relu, kernel_initializer=initWeight, bias_initializer=initBias) fullyConnected3_ = tf.layers.dense(inputs=fullyConnected2_, units=numActorFC2Unit, activation=tf.nn.relu, kernel_initializer=initWeight, bias_initializer=initBias) allActionActivation_ = tf.layers.dense( inputs=fullyConnected3_, units=numActionSpace, activation=None, kernel_initializer=initWeight, bias_initializer=initBias) with tf.name_scope("outputs"): actionDistribution_ = tf.nn.softmax(allActionActivation_, name='actionDistribution_') actionEntropy_ = tf.multiply(tfp.distributions.Categorical( probs=actionDistribution_).entropy(), 1, name='actionEntropy_') negLogProb_ = tf.nn.softmax_cross_entropy_with_logits_v2( logits=allActionActivation_, labels=actionLabel_, name='negLogProb_') loss_ = tf.reduce_mean(tf.multiply(negLogProb_, advantages_), name='loss_') actorLossSummary = tf.summary.scalar("ActorLoss", loss_) with tf.name_scope("train"): trainOpt_ = tf.train.AdamOptimizer(learningRateActor, name='adamOpt_').minimize(loss_) actorInit = tf.global_variables_initializer() actorModel = tf.Session(graph=actorGraph) actorModel.run(actorInit) criticGraph = tf.Graph() with criticGraph.as_default(): with tf.name_scope("inputs"): state_ = tf.placeholder(tf.float32, [None, numStateSpace], name="state_") valueTarget_ = tf.placeholder(tf.float32, [None, 1], name="valueTarget_") with tf.name_scope("hidden"): initWeight = tf.random_uniform_initializer(-0.03, 0.03) initBias = tf.constant_initializer(0.001) fullyConnected1_ = tf.layers.dense(inputs=state_, units=numActorFC1Unit, activation=tf.nn.relu, kernel_initializer=initWeight, bias_initializer=initBias) fullyConnected2_ = tf.layers.dense(inputs=fullyConnected1_, units=numActorFC2Unit, activation=tf.nn.relu, kernel_initializer=initWeight, bias_initializer=initBias) fullyConnected3_ = tf.layers.dense(inputs=fullyConnected2_, units=numActorFC3Unit, activation=tf.nn.relu, kernel_initializer=initWeight, bias_initializer=initBias) fullyConnected4_ = tf.layers.dense(inputs=fullyConnected3_, units=numActorFC4Unit, activation=tf.nn.relu, kernel_initializer=initWeight, bias_initializer=initBias) with tf.name_scope("outputs"): value_ = tf.layers.dense(inputs=fullyConnected4_, units=1, activation=None, name='value_', kernel_initializer=initWeight, bias_initializer=initBias) diff_ = tf.subtract(valueTarget_, value_, name='diff_') loss_ = tf.reduce_mean(tf.square(diff_), name='loss_') criticLossSummary = tf.summary.scalar("CriticLoss", loss_) with tf.name_scope("train"): trainOpt_ = tf.train.AdamOptimizer(learningRateCritic, name='adamOpt_').minimize(loss_) criticInit = tf.global_variables_initializer() criticModel = tf.Session(graph=criticGraph) criticModel.run(criticInit) xBoundary = [0, 360] yBoundary = [0, 360] checkBoundaryAndAdjust = ag.CheckBoundaryAndAdjust(xBoundary, yBoundary) initSheepPosition = np.array([180, 180]) initWolfPosition = np.array([180, 180]) initSheepVelocity = np.array([0, 0]) initWolfVelocity = np.array([0, 0]) initSheepPositionNoise = np.array([60, 120]) initWolfPositionNoise = np.array([0, 60]) sheepPositionReset = ag.SheepPositionReset(initSheepPosition, initSheepPositionNoise) wolfPositionReset = ag.WolfPositionReset(initWolfPosition, initWolfPositionNoise) numOneAgentState = 2 positionIndex = [0, 1] sheepPositionTransition = ag.SheepPositionTransition( numOneAgentState, positionIndex, checkBoundaryAndAdjust) wolfPositionTransition = ag.WolfPositionTransition(numOneAgentState, positionIndex, checkBoundaryAndAdjust) numAgent = 2 sheepId = 0 wolfId = 1 transitionFunction = env.TransitionFunction(sheepId, wolfId, sheepPositionReset, wolfPositionReset, sheepPositionTransition, wolfPositionTransition) minDistance = 15 isTerminal = env.IsTerminal(sheepId, wolfId, numOneAgentState, positionIndex, minDistance) screen = pg.display.set_mode([xBoundary[1], yBoundary[1]]) screenColor = [255, 255, 255] circleColorList = [[50, 255, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50]] circleSize = 8 saveImage = False saveImageFile = 'image' render = env.Render(numAgent, numOneAgentState, positionIndex, screen, screenColor, circleColorList, circleSize, saveImage, saveImageFile) aliveBouns = -1 deathPenalty = 20 rewardDecay = 0.99 rewardFunction = reward.RewardFunctionTerminalPenalty( sheepId, wolfId, numOneAgentState, positionIndex, aliveBouns, deathPenalty, isTerminal) accumulateReward = AccumulateReward(rewardDecay, rewardFunction) maxTimeStep = 150 sampleTrajectory = SampleTrajectory(maxTimeStep, transitionFunction, isTerminal) approximatePolicy = ApproximatePolicy(actionSpace) trainCritic = TrainCriticMonteCarloTensorflow(accumulateReward) estimateAdvantage = EstimateAdvantageMonteCarlo(accumulateReward) trainActor = TrainActorMonteCarloTensorflow(actionSpace) numTrajectory = 50 maxEpisode = 602 actorCritic = OfflineAdvantageActorCritic(numTrajectory, maxEpisode, render) trainedActorModel, trainedCriticModel = actorCritic( actorModel, criticModel, approximatePolicy, sampleTrajectory, trainCritic, approximateValue, estimateAdvantage, trainActor) savePathActor = 'data/tmpModelActor.ckpt' savePathCritic = 'data/tmpModelCritic.ckpt' with actorModel.as_default(): actorSaver.save(trainedActorModel, savePathActor) with criticModel.as_default(): criticSaver.save(trainedCriticModel, savePathCritic)
def evaluate(cInit, cBase): actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7), (0, -10), (7, -7)] numActionSpace = len(actionSpace) getActionPrior = GetActionPrior(actionSpace) numStateSpace = 4 initSheepPosition = np.array([90, 90]) initWolfPosition = np.array([90, 90]) initSheepVelocity = np.array([0, 0]) initWolfVelocity = np.array([0, 0]) initSheepPositionNoise = np.array([40, 60]) initWolfPositionNoise = np.array([0, 20]) sheepPositionReset = ag.SheepPositionReset(initSheepPosition, initSheepPositionNoise) wolfPositionReset = ag.WolfPositionReset(initWolfPosition, initWolfPositionNoise) numOneAgentState = 2 positionIndex = [0, 1] xBoundary = [0, 180] yBoundary = [0, 180] checkBoundaryAndAdjust = ag.CheckBoundaryAndAdjust(xBoundary, yBoundary) sheepPositionTransition = ag.SheepPositionTransition( numOneAgentState, positionIndex, checkBoundaryAndAdjust) wolfSpeed = 7 wolfPositionTransition = ag.WolfPositionTransition(numOneAgentState, positionIndex, checkBoundaryAndAdjust, wolfSpeed) numAgent = 2 sheepId = 0 wolfId = 1 transition = env.TransitionFunction(sheepId, wolfId, sheepPositionReset, wolfPositionReset, sheepPositionTransition, wolfPositionTransition) minDistance = 10 isTerminal = env.IsTerminal(sheepId, wolfId, numOneAgentState, positionIndex, minDistance) screen = pg.display.set_mode([xBoundary[1], yBoundary[1]]) screenColor = [255, 255, 255] circleColorList = [[50, 255, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50]] circleSize = 8 saveImage = True saveImageFile = 'image' render = env.Render(numAgent, numOneAgentState, positionIndex, screen, screenColor, circleColorList, circleSize, saveImage, saveImageFile) aliveBouns = 0.05 deathPenalty = -1 rewardFunction = reward.RewardFunctionTerminalPenalty( sheepId, wolfId, numOneAgentState, positionIndex, aliveBouns, deathPenalty, isTerminal) # Hyper-parameters numSimulations = 600 maxRunningSteps = 70 # MCTS algorithm # Select child calculateScore = CalculateScore(cInit, cBase) selectChild = SelectChild(calculateScore) # expand initializeChildren = InitializeChildren(actionSpace, transition, getActionPrior) expand = Expand(transition, isTerminal, initializeChildren) #selectNextRoot = selectNextRoot # Rollout rolloutPolicy = lambda state: actionSpace[np.random.choice( range(numActionSpace))] maxRollOutSteps = 50 rollout = RollOut(rolloutPolicy, maxRollOutSteps, transition, rewardFunction, isTerminal) mcts = MCTS(numSimulations, selectChild, expand, rollout, backup, selectNextRoot) runMCTS = RunMCTS(mcts, maxRunningSteps, isTerminal, render) rootAction = actionSpace[np.random.choice(range(numActionSpace))] numTestingIterations = 70 episodeLengths = [] for step in range(numTestingIterations): import datetime print(datetime.datetime.now()) state, action = None, None initState = transition(state, action) #optimal = math.ceil((np.sqrt(np.sum(np.power(initState[0:2] - initState[2:4], 2))) - minDistance )/10) rootNode = Node(id={rootAction: initState}, num_visited=0, sum_value=0, is_expanded=True) episodeLength = runMCTS(rootNode) episodeLengths.append(episodeLength) meanEpisodeLength = np.mean(episodeLengths) print("mean episode length is", meanEpisodeLength) return [meanEpisodeLength]
def __call__(self, condition): getSavePath = self.getTrajectorySavePathByCondition(condition) attentionType = condition['attentionType'] alpha = condition['alphaForStateWidening'] C = condition['CForStateWidening'] minAttentionDistance = condition['minAttentionDistance'] rangeAttention = condition['rangeAttention'] numTree = condition['numTrees'] numSimulations = condition['numSimulationTimes'] actionRatio = condition['actionRatio'] cBase = condition['cBase'] burnTime = condition['burnTime'] softParaForIdentity = condition['softId'] softParaForSubtlety = condition['softSubtlety'] numSub = 5 allIdentityResults = [] allPerceptionResults = [] allActionResults = [] allVelDiffResults = [] allResults = [] possibleTrialSubtleties = [0.92, 0.01] for subIndex in range(numSub): meanIdentiyOnConditions = {} meanPerceptionOnConditions = {} meanActionOnConditions = {} meanVelDiffOnConditions = {} meanEscapeOnConditions = {} for chasingSubtlety in possibleTrialSubtleties: print(numTree, chasingSubtlety, numSimulations, attentionType) numAgent = 25 sheepId = 0 suspectorIds = list(range(1, numAgent)) resetWolfIdAndSubtlety = ag.ResetWolfIdAndSubtlety( suspectorIds, [chasingSubtlety]) distanceToVisualDegreeRatio = 20 minInitSheepWolfDistance = 9 * distanceToVisualDegreeRatio minInitSheepDistractorDistance = 2.5 * distanceToVisualDegreeRatio # no distractor in killzone when init isLegalInitPositions = ag.IsLegalInitPositions( sheepId, minInitSheepWolfDistance, minInitSheepDistractorDistance) xBoundary = [0, 640] yBoundary = [0, 480] resetAgentPositions = ag.ResetAgentPositions( xBoundary, yBoundary, numAgent, isLegalInitPositions) resetPhysicalState = ag.ResetPhysicalState( sheepId, numAgent, resetAgentPositions, resetWolfIdAndSubtlety) numFramePerSecond = 20 numMDPTimeStepPerSecond = 5 numFrameWithoutActionChange = int(numFramePerSecond / numMDPTimeStepPerSecond) sheepActionUpdateFrequency = 1 minSheepSpeed = int(17.4 * distanceToVisualDegreeRatio / numFramePerSecond) maxSheepSpeed = int(23.2 * distanceToVisualDegreeRatio / numFramePerSecond) warmUpTimeSteps = int(10 * numMDPTimeStepPerSecond) sheepPolicy = ag.SheepPolicy(sheepActionUpdateFrequency, minSheepSpeed, maxSheepSpeed, warmUpTimeSteps, burnTime) wolfActionUpdateFrequency = int(0.2 * numMDPTimeStepPerSecond) minWolfSpeed = int(8.7 * distanceToVisualDegreeRatio / numFramePerSecond) maxWolfSpeed = int(14.5 * distanceToVisualDegreeRatio / numFramePerSecond) wolfPolicy = ag.WolfPolicy(wolfActionUpdateFrequency, minWolfSpeed, maxWolfSpeed, warmUpTimeSteps) distractorActionUpdateFrequency = int(0.2 * numMDPTimeStepPerSecond) minDistractorSpeed = int(8.7 * distanceToVisualDegreeRatio / numFramePerSecond) maxDistractorSpeed = int(14.5 * distanceToVisualDegreeRatio / numFramePerSecond) distractorPolicy = ag.DistractorPolicy( distractorActionUpdateFrequency, minDistractorSpeed, maxDistractorSpeed, warmUpTimeSteps) preparePolicy = ag.PreparePolicy(sheepId, numAgent, sheepPolicy, wolfPolicy, distractorPolicy) updatePhysicalState = ag.UpdatePhysicalState( sheepId, numAgent, preparePolicy) xBoundary = [0, 640] yBoundary = [0, 480] checkBoundaryAndAdjust = ag.CheckBoundaryAndAdjust( xBoundary, yBoundary) transiteMultiAgentMotion = ag.TransiteMultiAgentMotion( checkBoundaryAndAdjust) minDistance = 2.5 * distanceToVisualDegreeRatio isTerminal = env.IsTerminal(sheepId, minDistance) # screen = pg.display.set_mode([xBoundary[1], yBoundary[1]]) # screenColor = np.array([0, 0, 0]) # sheepColor = np.array([0, 255, 0]) # wolfColor = np.array([255, 0, 0]) # circleSize = 10 # saveImage = True # saveImageFile = 'image3' # render = env.Render(numAgent, screen, xBoundary[1], yBoundary[1], screenColor, sheepColor, wolfColor, circleSize, saveImage, saveImageFile, isTerminal) render = None renderOnInSimulation = False transiteStateWithoutActionChangeInSimulation = env.TransiteStateWithoutActionChange( numFrameWithoutActionChange, isTerminal, transiteMultiAgentMotion, render, renderOnInSimulation) renderOnInPlay = False transiteStateWithoutActionChangeInPlay = env.TransiteStateWithoutActionChange( numFrameWithoutActionChange, isTerminal, transiteMultiAgentMotion, render, renderOnInPlay) if attentionType == 'idealObserver': attentionLimitation = 1 precisionPerSlot = 500.0 precisionForUntracked = 500.0 memoryratePerSlot = 1.0 memoryrateForUntracked = 1.0 if attentionType == 'preAttention': attentionLimitation = 1 precisionPerSlot = 2.5 precisionForUntracked = 2.5 memoryratePerSlot = 0.45 memoryrateForUntracked = 0.45 if attentionType == 'attention3': attentionLimitation = 3 precisionPerSlot = 8.0 precisionForUntracked = 0.01 memoryratePerSlot = 0.7 memoryrateForUntracked = 0.01 if attentionType == 'hybrid3': attentionLimitation = 3 precisionPerSlot = 8.0 precisionForUntracked = 2.5 memoryratePerSlot = 0.7 memoryrateForUntracked = 0.45 if attentionType == 'attention4': attentionLimitation = 4 precisionPerSlot = 8.0 precisionForUntracked = 0.01 memoryratePerSlot = 0.7 memoryrateForUntracked = 0.01 if attentionType == 'hybrid4': attentionLimitation = 4 precisionPerSlot = 8.0 precisionForUntracked = 2.5 memoryratePerSlot = 0.7 memoryrateForUntracked = 0.45 if attentionType == 'preAttentionMem0.25': attentionLimitation = 1 precisionPerSlot = 2.5 precisionForUntracked = 2.5 memoryratePerSlot = 0.25 memoryrateForUntracked = 0.25 if attentionType == 'preAttentionMem0.65': attentionLimitation = 1 precisionPerSlot = 2.5 precisionForUntracked = 2.5 memoryratePerSlot = 0.65 memoryrateForUntracked = 0.65 if attentionType == 'preAttentionPre0.5': attentionLimitation = 1 precisionPerSlot = 0.5 precisionForUntracked = 0.5 memoryratePerSlot = 0.45 memoryrateForUntracked = 0.45 if attentionType == 'preAttentionPre4.5': attentionLimitation = 1 precisionPerSlot = 4.5 precisionForUntracked = 4.5 memoryratePerSlot = 0.45 memoryrateForUntracked = 0.45 attention = Attention.AttentionToPrecisionAndDecay( precisionPerSlot, precisionForUntracked, memoryratePerSlot, memoryrateForUntracked) transferMultiAgentStatesToPositionDF = ba.TransferMultiAgentStatesToPositionDF( numAgent) possibleSubtleties = [500.0, 11.0, 3.3, 1.83, 0.92, 0.31, 0.01] resetBeliefAndAttention = ba.ResetBeliefAndAttention( sheepId, suspectorIds, possibleSubtleties, attentionLimitation, transferMultiAgentStatesToPositionDF, attention) maxAttentionDistance = minAttentionDistance + rangeAttention attentionMinDistance = minAttentionDistance * distanceToVisualDegreeRatio attentionMaxDistance = maxAttentionDistance * distanceToVisualDegreeRatio numStandardErrorInDistanceRange = 4 calDistancePriorOnAttentionSlot = Attention.CalDistancePriorOnAttentionSlot( attentionMinDistance, attentionMaxDistance, numStandardErrorInDistanceRange) attentionSwitch = Attention.AttentionSwitch( attentionLimitation, calDistancePriorOnAttentionSlot) computePosterior = calPosterior.CalPosteriorLog(minDistance) print(attentionLimitation, attentionMinDistance / distanceToVisualDegreeRatio, attentionMaxDistance / distanceToVisualDegreeRatio) attentionSwitchFrequencyInSimulation = np.inf beliefUpdateFrequencyInSimulation = np.inf updateBeliefAndAttentionInSimulation = ba.UpdateBeliefAndAttentionState( attention, computePosterior, attentionSwitch, transferMultiAgentStatesToPositionDF, attentionSwitchFrequencyInSimulation, beliefUpdateFrequencyInSimulation, burnTime) attentionSwitchFrequencyInPlay = int(0.6 * numMDPTimeStepPerSecond) beliefUpdateFrequencyInPlay = int(0.2 * numMDPTimeStepPerSecond) updateBeliefAndAttentionInPlay = ba.UpdateBeliefAndAttentionState( attention, computePosterior, attentionSwitch, transferMultiAgentStatesToPositionDF, attentionSwitchFrequencyInPlay, beliefUpdateFrequencyInPlay, burnTime) updatePhysicalStateByBeliefFrequencyInSimulationRoot = int( 0.2 * numMDPTimeStepPerSecond) updatePhysicalStateByBeliefInSimulationRoot = ba.UpdatePhysicalStateImagedByBelief( updatePhysicalStateByBeliefFrequencyInSimulationRoot, softParaForIdentity, softParaForSubtlety) reUpdatePhysicalStateByBeliefInSimulationRoot = ba.UpdatePhysicalStateImagedByBelief( updatePhysicalStateByBeliefFrequencyInSimulationRoot, softParaForIdentity=1, softParaForSubtlety=1) updatePhysicalStateByBeliefFrequencyInSimulation = np.inf updatePhysicalStateByBeliefInSimulation = ba.UpdatePhysicalStateImagedByBelief( updatePhysicalStateByBeliefFrequencyInSimulation, softParaForIdentity, softParaForSubtlety) updatePhysicalStateByBeliefFrequencyInPlay = np.inf updatePhysicalStateByBeliefInPlay = ba.UpdatePhysicalStateImagedByBelief( updatePhysicalStateByBeliefFrequencyInPlay, softParaForIdentity, softParaForSubtlety) transitionFunctionInSimulation = env.TransitionFunction( resetPhysicalState, resetBeliefAndAttention, updatePhysicalState, transiteStateWithoutActionChangeInSimulation, updateBeliefAndAttentionInSimulation, updatePhysicalStateByBeliefInSimulation) transitionFunctionInPlay = env.TransitionFunction( resetPhysicalState, resetBeliefAndAttention, updatePhysicalState, transiteStateWithoutActionChangeInPlay, updateBeliefAndAttentionInPlay, updatePhysicalStateByBeliefInPlay) maxRollOutSteps = 5 aliveBouns = 1 / maxRollOutSteps deathPenalty = -1 rewardFunction = reward.RewardFunctionTerminalPenalty( sheepId, aliveBouns, deathPenalty, isTerminal) rewardRollout = lambda state, action, nextState: rewardFunction( state, action) numActionSpace = 8 actionInterval = int(360 / (numActionSpace)) actionMagnitude = actionRatio * minSheepSpeed * numFramePerSecond actionSpace = [ (np.cos(degreeInPolar) * actionMagnitude, np.sin(degreeInPolar) * actionMagnitude) for degreeInPolar in np.arange(0, 360, actionInterval) / 180 * math.pi ] getActionPrior = lambda state: { action: 1 / len(actionSpace) for action in actionSpace } cInit = 1 #cBase = 50 scoreChild = ScoreChild(cInit, cBase) selectAction = SelectAction(scoreChild) selectNextState = SelectNextState(selectAction) initializeChildren = InitializeChildren( actionSpace, transitionFunctionInSimulation, getActionPrior) expand = Expand(isTerminal, initializeChildren) pWidening = PWidening(alpha, C) expandNewState = ExpandNextState( transitionFunctionInSimulation, pWidening) rolloutPolicy = lambda state: actionSpace[np.random.choice( range(numActionSpace))] rolloutHeuristic = lambda state: 0 estimateValue = RollOut(rolloutPolicy, maxRollOutSteps, transitionFunctionInSimulation, rewardRollout, isTerminal, rolloutHeuristic) numActionPlaned = 1 outputAction = OutputAction(numActionPlaned, actionSpace) #numSimulations = int(numTotalSimulationTimes/numTree) #sheepColorInMcts = np.array([0, 255, 0]) #wolfColorInMcts = np.array([255, 0, 0]) #distractorColorInMcts = np.array([255, 255, 255]) #saveImageMCTS = True #mctsRender = env.MctsRender(numAgent, screen, xBoundary[1], yBoundary[1], screenColor, sheepColorInMcts, wolfColorInMcts, distractorColorInMcts, circleSize, saveImageMCTS, saveImageFile) #mctsRenderOn = False #mctsRender = None #pg.init() #mcts = MCTS(numSimulations, selectChild, expand, rollout, backup, selectAction, mctsRender, mctsRenderOn) pwMultipleTrees = PWMultipleTrees(numSimulations, selectAction, selectNextState, expand, expandNewState, estimateValue, backup, outputAction) maxRunningSteps = int(25 * numMDPTimeStepPerSecond) makeDiffSimulationRoot = MakeDiffSimulationRoot( isTerminal, updatePhysicalStateByBeliefInSimulationRoot, reUpdatePhysicalStateByBeliefInSimulationRoot) runMCTSTrjactory = RunMCTSTrjactory( maxRunningSteps, numTree, numActionPlaned, sheepActionUpdateFrequency, transitionFunctionInPlay, isTerminal, makeDiffSimulationRoot, render) rootAction = actionSpace[np.random.choice( range(numActionSpace))] numTrial = 10 trajectories = [ runMCTSTrjactory(pwMultipleTrees) for trial in range(numTrial) ] savePath = getSavePath({ 'chasingSubtlety': chasingSubtlety, 'subIndex': subIndex }) tsl.saveToPickle(trajectories, savePath) getCSVSavePath = self.getCSVSavePathByCondition(condition) startStatsIndex = 1 def getTrueWolfIdAcc(trajectory): AccTrial = [] for timeStepIndex in range(len(trajectory) - 2): timeStep = trajectory[timeStepIndex] wolfId = timeStep[0][0][3][0] wolfSubtlety = timeStep[0][0][3][1] #print(wolfId, '**', wolfIdInEach) if timeStepIndex >= startStatsIndex: IdAcc = [ int(IdAndSubtlety[0] == wolfId) for IdAndSubtlety in timeStep[5] ] AccTrial.append(IdAcc) meanAcc = np.mean(AccTrial) return meanAcc meanIdentiy = np.mean([ getTrueWolfIdAcc(trajectory) for trajectory in trajectories ]) meanIdentiyOnConditions.update({chasingSubtlety: meanIdentiy}) def getTrueWolfIdSubtletyAcc(trajectory): AccTrial = [] for timeStepIndex in range(len(trajectory) - 2): timeStep = trajectory[timeStepIndex] wolfId = timeStep[0][0][3][0] wolfSubtlety = timeStep[0][0][3][1] #print(wolfId, '**', wolfIdInEach) if timeStepIndex >= startStatsIndex: IdAndSubtletyAcc = [ int((IdAndSubtlety[0] == wolfId) and (IdAndSubtlety[1] == wolfSubtlety)) for IdAndSubtlety in timeStep[5] ] AccTrial.append(IdAndSubtletyAcc) meanAcc = np.mean(AccTrial) return meanAcc meanPerception = np.mean([ getTrueWolfIdSubtletyAcc(trajectory) for trajectory in trajectories ]) meanPerceptionOnConditions.update( {chasingSubtlety: meanPerception}) def getActionDeviationLevel(trajectory): AccTrial = [] for timeStepIndex in range(len(trajectory) - 2): timeStep = trajectory[timeStepIndex] actionReal = np.array(timeStep[1]) actionOnTruth = np.array(timeStep[4]) if timeStepIndex >= startStatsIndex: deviateLevel = round( agf.computeAngleBetweenVectors( actionReal, actionOnTruth) / (math.pi / 4)) AccTrial.append(deviateLevel) meanAcc = np.mean(AccTrial) return meanAcc meanAction = np.mean([ getActionDeviationLevel(trajectory) for trajectory in trajectories ]) meanActionOnConditions.update({chasingSubtlety: meanAction}) def getVelocityDiff(trajectory): AccTrial = [] for timeStepIndex in range(len(trajectory) - 2): timeStep = trajectory[timeStepIndex] velReal = np.array(timeStep[0][0][0][1][0]) velWithActionOnTruth = np.array(timeStep[2][1][0]) velWithActionOppo = np.array(timeStep[3][1][0]) if timeStepIndex >= startStatsIndex: velDiffNormWithActionOnTruth = np.linalg.norm( (velReal - velWithActionOnTruth)) velDiffNormWithActionOppo = np.linalg.norm( (velReal - velWithActionOppo)) velDiffRatio = 1.0 * velDiffNormWithActionOnTruth / velDiffNormWithActionOppo AccTrial.append(velDiffRatio) meanAcc = np.mean(AccTrial) return meanAcc meanVelDiff = np.mean([ getVelocityDiff(trajectory) for trajectory in trajectories ]) meanVelDiffOnConditions.update({chasingSubtlety: meanVelDiff}) getEscapeAcc = lambda trajectory: int( len(trajectory) >= (maxRunningSteps - 2)) meanEscape = np.mean( [getEscapeAcc(trajectory) for trajectory in trajectories]) meanEscapeOnConditions.update({chasingSubtlety: meanEscape}) print(meanEscapeOnConditions) allResults.append(meanEscapeOnConditions) results = pd.DataFrame(allResults) escapeCSVSavePath = getCSVSavePath({'measure': 'escape'}) results.to_csv(escapeCSVSavePath) allIdentityResults.append(meanIdentiyOnConditions) identityResults = pd.DataFrame(allIdentityResults) identityCSVSavePath = getCSVSavePath({'measure': 'identity'}) identityResults.to_csv(identityCSVSavePath) allPerceptionResults.append(meanPerceptionOnConditions) perceptionResults = pd.DataFrame(allPerceptionResults) perceptionCSVSavePath = getCSVSavePath({'measure': 'percetion'}) perceptionResults.to_csv(perceptionCSVSavePath) allActionResults.append(meanActionOnConditions) actionResults = pd.DataFrame(allActionResults) actionCSVSavePath = getCSVSavePath({'measure': 'action'}) actionResults.to_csv(actionCSVSavePath) allVelDiffResults.append(meanVelDiffOnConditions) velDiffResults = pd.DataFrame(allVelDiffResults) velDiffCSVSavePath = getCSVSavePath({'measure': 'velDiff'}) velDiffResults.to_csv(velDiffCSVSavePath)
def evaluate(numTree, chasingSubtlety, numTotalSimulationTimes, cInit, cBase): print(numTree, chasingSubtlety, numTotalSimulationTimes, cInit, cBase) numActionSpace = 8 actionInterval = int(360 / numActionSpace) actionSpace = [ (np.cos(degreeInPolar), np.sin(degreeInPolar)) for degreeInPolar in np.arange(0, 360, actionInterval) / 180 * math.pi ] getActionPrior = GetActionPrior(actionSpace) # 2D Env initSheepPosition = np.array([320, 240]) initSheepPositionNoise = np.array([0, 0]) resetSheepState = ag.ResetAgentState(initSheepPosition, initSheepPositionNoise) initWolfOrDistractorPosition = np.array([320, 240]) initWolfOrDistractorPositionNoise = np.array([125, 230]) resetWolfOrDistractorState = ag.ResetAgentState( initWolfOrDistractorPosition, initWolfOrDistractorPositionNoise) numAgent = 25 sheepId = 0 suspectorIds = list(range(1, numAgent)) resetWolfIdAndSubtlety = ag.ResetWolfIdAndSubtlety(suspectorIds, [chasingSubtlety]) resetPhysicalState = ag.ResetPhysicalState(sheepId, numAgent, resetSheepState, resetWolfOrDistractorState, resetWolfIdAndSubtlety) numFramePerSecond = 60 numMDPTimeStepPerSecond = 5 numFrameWithoutActionChange = int(numFramePerSecond / numMDPTimeStepPerSecond) sheepActionUpdateFrequency = 1 distanceToVisualDegreeRatio = 20 minSheepSpeed = int(17.4 * distanceToVisualDegreeRatio / numFramePerSecond) maxSheepSpeed = int(23.2 * distanceToVisualDegreeRatio / numFramePerSecond) warmUpTimeSteps = int(10 * numMDPTimeStepPerSecond) sheepPolicy = ag.SheepPolicy(sheepActionUpdateFrequency, minSheepSpeed, maxSheepSpeed, warmUpTimeSteps) wolfActionUpdateFrequency = int(0.2 * numMDPTimeStepPerSecond) minWolfSpeed = int(8.7 * distanceToVisualDegreeRatio / numFramePerSecond) maxWolfSpeed = int(14.5 * distanceToVisualDegreeRatio / numFramePerSecond) wolfPolicy = ag.WolfPolicy(wolfActionUpdateFrequency, minWolfSpeed, maxWolfSpeed, warmUpTimeSteps) distractorActionUpdateFrequency = int(0.2 * numMDPTimeStepPerSecond) minDistractorSpeed = int(8.7 * distanceToVisualDegreeRatio / numFramePerSecond) maxDistractorSpeed = int(14.5 * distanceToVisualDegreeRatio / numFramePerSecond) distractorPolicy = ag.DistractorPolicy(distractorActionUpdateFrequency, minDistractorSpeed, maxDistractorSpeed, warmUpTimeSteps) preparePolicy = ag.PreparePolicy(sheepId, numAgent, sheepPolicy, wolfPolicy, distractorPolicy) updatePhysicalState = ag.UpdatePhysicalState(numAgent, preparePolicy) xBoundary = [0, 640] yBoundary = [0, 480] checkBoundaryAndAdjust = ag.CheckBoundaryAndAdjust(xBoundary, yBoundary) transiteMultiAgentMotion = ag.TransiteMultiAgentMotion( checkBoundaryAndAdjust) minDistance = 2.5 * distanceToVisualDegreeRatio isTerminal = env.IsTerminal(sheepId, minDistance) screen = pg.display.set_mode([xBoundary[1], yBoundary[1]]) #screen = None screenColor = np.array([255, 255, 255]) sheepColor = np.array([0, 255, 0]) wolfColor = np.array([255, 0, 0]) circleSize = 10 saveImage = True saveImageFile = 'image1' render = env.Render(numAgent, screen, screenColor, sheepColor, wolfColor, circleSize, saveImage, saveImageFile) renderOnInSimulation = False transiteStateWithoutActionChangeInSimulation = env.TransiteStateWithoutActionChange( numFrameWithoutActionChange, isTerminal, transiteMultiAgentMotion, render, renderOnInSimulation) renderOnInPlay = True transiteStateWithoutActionChangeInPlay = env.TransiteStateWithoutActionChange( numFrameWithoutActionChange, isTerminal, transiteMultiAgentMotion, render, renderOnInPlay) attentionLimitation = 4 precisionPerSlot = 8.0 precisionForUntracked = 2.5 memoryratePerSlot = 0.7 memoryrateForUntracked = 0.45 attention = Attention.AttentionToPrecisionAndDecay(precisionPerSlot, precisionForUntracked, memoryratePerSlot, memoryrateForUntracked) transferMultiAgentStatesToPositionDF = ba.TransferMultiAgentStatesToPositionDF( numAgent) possibleSubtleties = [500, 11, 3.3, 1.83, 0.92, 0.31] resetBeliefAndAttention = ba.ResetBeliefAndAttention( sheepId, suspectorIds, possibleSubtleties, attentionLimitation, transferMultiAgentStatesToPositionDF, attention) maxDistance = 7.5 * distanceToVisualDegreeRatio numStandardErrorInDistanceRange = 2 calDistancePriorOnAttentionSlot = Attention.CalDistancePriorOnAttentionSlot( minDistance, maxDistance, numStandardErrorInDistanceRange) attentionSwitch = Attention.AttentionSwitch( attentionLimitation, calDistancePriorOnAttentionSlot) computePosterior = calPosterior.CalPosteriorLog(minDistance) attentionSwitchFrequencyInSimulation = np.inf beliefUpdateFrequencyInSimulation = np.inf updateBeliefAndAttentionInSimulation = ba.UpdateBeliefAndAttentionState( attention, computePosterior, attentionSwitch, transferMultiAgentStatesToPositionDF, attentionSwitchFrequencyInSimulation, beliefUpdateFrequencyInSimulation) attentionSwitchFrequencyInPlay = int(0.6 * numMDPTimeStepPerSecond) beliefUpdateFrequencyInPlay = int(0.2 * numMDPTimeStepPerSecond) updateBeliefAndAttentionInPlay = ba.UpdateBeliefAndAttentionState( attention, computePosterior, attentionSwitch, transferMultiAgentStatesToPositionDF, attentionSwitchFrequencyInPlay, beliefUpdateFrequencyInPlay) updatePhysicalStateByBeliefFrequencyInSimulationRoot = int( 0.2 * numMDPTimeStepPerSecond) updatePhysicalStateByBeliefInSimulationRoot = ba.UpdatePhysicalStateImagedByBelief( updatePhysicalStateByBeliefFrequencyInSimulationRoot) updatePhysicalStateByBeliefFrequencyInSimulation = np.inf updatePhysicalStateByBeliefInSimulation = ba.UpdatePhysicalStateImagedByBelief( updatePhysicalStateByBeliefFrequencyInSimulation) updatePhysicalStateByBeliefFrequencyInPlay = np.inf updatePhysicalStateByBeliefInPlay = ba.UpdatePhysicalStateImagedByBelief( updatePhysicalStateByBeliefFrequencyInPlay) transitionFunctionInSimulation = env.TransitionFunction( resetPhysicalState, resetBeliefAndAttention, updatePhysicalState, transiteStateWithoutActionChangeInSimulation, updateBeliefAndAttentionInSimulation, updatePhysicalStateByBeliefInSimulation) transitionFunctionInPlay = env.TransitionFunction( resetPhysicalState, resetBeliefAndAttention, updatePhysicalState, transiteStateWithoutActionChangeInPlay, updateBeliefAndAttentionInPlay, updatePhysicalStateByBeliefInPlay) maxRollOutSteps = 5 aliveBouns = 1 / maxRollOutSteps deathPenalty = -1 rewardFunction = reward.RewardFunctionTerminalPenalty( sheepId, aliveBouns, deathPenalty, isTerminal) # MCTS algorithm # Select child calculateScore = CalculateScore(cInit, cBase) selectChild = SelectChild(calculateScore) # expand initializeChildren = InitializeChildren(actionSpace, transitionFunctionInSimulation, getActionPrior) expand = Expand(isTerminal, initializeChildren) # Rollout rolloutPolicy = lambda state: actionSpace[np.random.choice( range(numActionSpace))] rollout = RollOut(rolloutPolicy, maxRollOutSteps, transitionFunctionInSimulation, rewardFunction, isTerminal) numActionPlaned = 1 selectAction = SelectAction(numActionPlaned, actionSpace) numSimulations = int(numTotalSimulationTimes / numTree) sheepColorInMcts = np.array([0, 255, 0]) wolfColorInMcts = np.array([255, 0, 0]) distractorColorInMcts = np.array([0, 0, 0]) mctsRender = env.MctsRender(numAgent, screen, xBoundary[1], yBoundary[1], screenColor, sheepColorInMcts, wolfColorInMcts, distractorColorInMcts, circleSize, saveImage, saveImageFile) mctsRenderOn = True mcts = MCTS(numSimulations, selectChild, expand, rollout, backup, selectAction, mctsRender, mctsRenderOn) maxRunningSteps = int(25 * numMDPTimeStepPerSecond) makeDiffSimulationRoot = MakeDiffSimulationRoot( isTerminal, updatePhysicalStateByBeliefInSimulationRoot) runMCTS = RunMCTS(maxRunningSteps, numTree, numActionPlaned, transitionFunctionInPlay, isTerminal, makeDiffSimulationRoot, render) rootAction = actionSpace[np.random.choice(range(numActionSpace))] numTestingIterations = 1 episodeLengths = [] escape = 0 step = 1 while step <= numTestingIterations: import datetime print(datetime.datetime.now()) episodeLength = runMCTS(mcts) if episodeLength >= 1 * numMDPTimeStepPerSecond: step = step + 1 episodeLengths.append(episodeLength) if episodeLength >= maxRunningSteps - 10: escape = escape + 1 meanEpisodeLength = np.mean(episodeLengths) print("mean episode length is", meanEpisodeLength, escape / numTestingIterations) return [meanEpisodeLength, escape / numTestingIterations]
def main(): #tf.set_random_seed(123) #np.random.seed(123) actionSpace = [[10, 0], [7, 7], [0, 10], [-7, 7], [-10, 0], [-7, -7], [0, -10], [7, -7]] numActionSpace = len(actionSpace) numStateSpace = 4 xBoundary = [0, 360] yBoundary = [0, 360] checkBoundaryAndAdjust = ag.CheckBoundaryAndAdjust(xBoundary, yBoundary) initSheepPosition = np.array([180, 180]) initWolfPosition = np.array([180, 180]) initSheepVelocity = np.array([0, 0]) initWolfVelocity = np.array([0, 0]) initSheepPositionNoise = np.array([120, 120]) initWolfPositionNoise = np.array([60, 60]) sheepPositionReset = ag.SheepPositionReset(initSheepPosition, initSheepPositionNoise, checkBoundaryAndAdjust) wolfPositionReset = ag.WolfPositionReset(initWolfPosition, initWolfPositionNoise, checkBoundaryAndAdjust) numOneAgentState = 2 positionIndex = [0, 1] sheepPositionTransition = ag.SheepPositionTransition( numOneAgentState, positionIndex, checkBoundaryAndAdjust) wolfPositionTransition = ag.WolfPositionTransition(numOneAgentState, positionIndex, checkBoundaryAndAdjust) numAgent = 2 sheepId = 0 wolfId = 1 transitionFunction = env.TransitionFunction(sheepId, wolfId, sheepPositionReset, wolfPositionReset, sheepPositionTransition, wolfPositionTransition) minDistance = 15 isTerminal = env.IsTerminal(sheepId, wolfId, numOneAgentState, positionIndex, minDistance) screen = pg.display.set_mode([xBoundary[1], yBoundary[1]]) screenColor = [255, 255, 255] circleColorList = [[50, 255, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50]] circleSize = 8 saveImage = False saveImageFile = 'image' render = env.Render(numAgent, numOneAgentState, positionIndex, screen, screenColor, circleColorList, circleSize, saveImage, saveImageFile) aliveBouns = -1 deathPenalty = 20 rewardDecay = 0.99 rewardFunction = reward.RewardFunctionTerminalPenalty( sheepId, wolfId, numOneAgentState, positionIndex, aliveBouns, deathPenalty, isTerminal) accumulateReward = A2CMC.AccumulateReward(rewardDecay, rewardFunction) maxTimeStep = 150 sampleTrajectory = A2CMC.SampleTrajectory(maxTimeStep, transitionFunction, isTerminal) approximatePolicy = A2CMC.ApproximatePolicy(actionSpace) approximateValue = A2CMC.approximateValue trainCritic = A2CMC.TrainCriticMonteCarloTensorflow(accumulateReward) estimateAdvantage = A2CMC.EstimateAdvantageMonteCarlo(accumulateReward) trainActor = A2CMC.TrainActorMonteCarloTensorflow(actionSpace) numTrajectory = 5 maxEpisode = 1 actorCritic = A2CMC.OfflineAdvantageActorCritic(numTrajectory, maxEpisode, render) # Generate models. learningRateActor = 1e-4 learningRateCritic = 3e-4 hiddenNeuronNumbers = [128, 256, 512, 1024] hiddenDepths = [2, 4, 8] generateModel = GenerateActorCriticModel(numStateSpace, numActionSpace, learningRateActor, learningRateCritic) modelDict = {(n, d): generateModel(d, round(n / d)) for n, d in it.product(hiddenNeuronNumbers, hiddenDepths)} print("Generated graphs") # Train. actorCritic = A2CMC.OfflineAdvantageActorCritic(numTrajectory, maxEpisode, render) modelTrain = lambda actorModel, criticModel: actorCritic( actorModel, criticModel, approximatePolicy, sampleTrajectory, trainCritic, approximateValue, estimateAdvantage, trainActor) trainedModelDict = { key: modelTrain(model[0], model[1]) for key, model in modelDict.items() } print("Finished training") # Evaluate modelEvaluate = Evaluate(numTrajectory, approximatePolicy, sampleTrajectory, rewardFunction) meanEpisodeRewards = { key: modelEvaluate(model[0], model[1]) for key, model in trainedModelDict.items() } print("Finished evaluating") # Visualize independentVariableNames = ['NeuroTotalNumber', 'layerNumber'] draw(meanEpisodeRewards, independentVariableNames)
def __call__(self, condition): getSavePath = self.getTrajectorySavePathByCondition(condition) attentionType = condition['attentionType'] minAttentionDistance = condition['minAttentionDistance'] maxAttentionDistance = condition['maxAttentionDistance'] numTree = condition['numTrees'] numTotalSimulationTimes = condition['totalNumSimulationTimes'] numSub = 10 allResults = [] possibleTrialSubtleties = [80.0, 11.0, 3.3, 1.83, 0.92, 0.31, 0.01] for subIndex in range(numSub): meanEscapeOnConditions = {} for chasingSubtlety in possibleTrialSubtleties: print(numTree, chasingSubtlety, numTotalSimulationTimes, attentionType) numActionSpace = 8 actionInterval = int(360/(numActionSpace)) actionSpace = [(np.cos(degreeInPolar), np.sin(degreeInPolar)) for degreeInPolar in np.arange(0, 360, actionInterval)/180 * math.pi] getActionPrior = GetActionPrior(actionSpace) numAgent = 25 sheepId = 0 suspectorIds = list(range(1, numAgent)) resetWolfIdAndSubtlety = ag.ResetWolfIdAndSubtlety(suspectorIds, [chasingSubtlety]) distanceToVisualDegreeRatio = 20 minInitSheepWolfDistance = 9 * distanceToVisualDegreeRatio minInitSheepDistractorDistance = 2.5 * distanceToVisualDegreeRatio # no distractor in killzone when init isLegalInitPositions = ag.IsLegalInitPositions(sheepId, minInitSheepWolfDistance, minInitSheepDistractorDistance) xBoundary = [0, 640] yBoundary = [0, 480] resetAgentPositions = ag.ResetAgentPositions(xBoundary, yBoundary, numAgent, isLegalInitPositions) resetPhysicalState = ag.ResetPhysicalState(sheepId, numAgent, resetAgentPositions, resetWolfIdAndSubtlety) numFramePerSecond = 20 numMDPTimeStepPerSecond = 5 numFrameWithoutActionChange = int(numFramePerSecond/numMDPTimeStepPerSecond) sheepActionUpdateFrequency = 1 minSheepSpeed = int(17.4 * distanceToVisualDegreeRatio/numFramePerSecond) maxSheepSpeed = int(23.2 * distanceToVisualDegreeRatio/numFramePerSecond) warmUpTimeSteps = int(10 * numMDPTimeStepPerSecond) sheepPolicy = ag.SheepPolicy(sheepActionUpdateFrequency, minSheepSpeed, maxSheepSpeed, warmUpTimeSteps) wolfActionUpdateFrequency = int(0.2 * numMDPTimeStepPerSecond) minWolfSpeed = int(8.7 * distanceToVisualDegreeRatio/numFramePerSecond) maxWolfSpeed = int(14.5 * distanceToVisualDegreeRatio/numFramePerSecond) wolfPolicy = ag.WolfPolicy(wolfActionUpdateFrequency, minWolfSpeed, maxWolfSpeed, warmUpTimeSteps) distractorActionUpdateFrequency = int(0.2 * numMDPTimeStepPerSecond) minDistractorSpeed = int(8.7 * distanceToVisualDegreeRatio/numFramePerSecond) maxDistractorSpeed = int(14.5 * distanceToVisualDegreeRatio/numFramePerSecond) distractorPolicy = ag.DistractorPolicy(distractorActionUpdateFrequency, minDistractorSpeed, maxDistractorSpeed, warmUpTimeSteps) preparePolicy = ag.PreparePolicy(sheepId, numAgent, sheepPolicy, wolfPolicy, distractorPolicy) updatePhysicalState = ag.UpdatePhysicalState(sheepId, numAgent, preparePolicy) xBoundary = [0, 640] yBoundary = [0, 480] checkBoundaryAndAdjust = ag.CheckBoundaryAndAdjust(xBoundary, yBoundary) transiteMultiAgentMotion = ag.TransiteMultiAgentMotion(checkBoundaryAndAdjust) minDistance = 2.5 * distanceToVisualDegreeRatio isTerminal = env.IsTerminal(sheepId, minDistance) # screen = pg.display.set_mode([xBoundary[1], yBoundary[1]]) # screenColor = np.array([0, 0, 0]) # sheepColor = np.array([0, 255, 0]) # wolfColor = np.array([255, 0, 0]) # circleSize = 10 # saveImage = True # saveImageFile = 'image3' # render = env.Render(numAgent, screen, xBoundary[1], yBoundary[1], screenColor, sheepColor, wolfColor, circleSize, saveImage, saveImageFile, isTerminal) render = None renderOnInSimulation = False transiteStateWithoutActionChangeInSimulation = env.TransiteStateWithoutActionChange(numFrameWithoutActionChange, isTerminal, transiteMultiAgentMotion, render, renderOnInSimulation) renderOnInPlay = False transiteStateWithoutActionChangeInPlay = env.TransiteStateWithoutActionChange(numFrameWithoutActionChange, isTerminal, transiteMultiAgentMotion, render, renderOnInPlay) if attentionType == 'idealObserver': attentionLimitation= 1 precisionPerSlot=100.0 precisionForUntracked=100.0 memoryratePerSlot=0.99 memoryrateForUntracked=0.99 minAttentionDistance = 50 maxAttentionDistance = 51 if attentionType == 'preAttention': attentionLimitation= 1 precisionPerSlot=2.5 precisionForUntracked=2.5 memoryratePerSlot=0.45 memoryrateForUntracked=0.45 if attentionType == 'attention3': attentionLimitation= 3 precisionPerSlot=8.0 precisionForUntracked=0.01 memoryratePerSlot=0.7 memoryrateForUntracked=0.01 if attentionType == 'hybrid3': attentionLimitation= 3 precisionPerSlot=8.0 precisionForUntracked=2.5 memoryratePerSlot=0.7 memoryrateForUntracked=0.45 if attentionType == 'attention4': attentionLimitation= 4 precisionPerSlot=8.0 precisionForUntracked=0.01 memoryratePerSlot=0.7 memoryrateForUntracked=0.01 if attentionType == 'hybrid4': attentionLimitation= 4 precisionPerSlot=8.0 precisionForUntracked=2.5 memoryratePerSlot=0.7 memoryrateForUntracked=0.45 attention = Attention.AttentionToPrecisionAndDecay(precisionPerSlot, precisionForUntracked, memoryratePerSlot, memoryrateForUntracked) transferMultiAgentStatesToPositionDF = ba.TransferMultiAgentStatesToPositionDF(numAgent) possibleSubtleties = [80.0, 11.0, 3.3, 1.83, 0.92, 0.31, 0.01] resetBeliefAndAttention = ba.ResetBeliefAndAttention(sheepId, suspectorIds, possibleSubtleties, attentionLimitation, transferMultiAgentStatesToPositionDF, attention) attentionMinDistance = minAttentionDistance * distanceToVisualDegreeRatio attentionMaxDistance = maxAttentionDistance * distanceToVisualDegreeRatio numStandardErrorInDistanceRange = 4 calDistancePriorOnAttentionSlot = Attention.CalDistancePriorOnAttentionSlot(attentionMinDistance, attentionMaxDistance, numStandardErrorInDistanceRange) attentionSwitch = Attention.AttentionSwitch(attentionLimitation, calDistancePriorOnAttentionSlot) computePosterior = calPosterior.CalPosteriorLog(minDistance) attentionSwitchFrequencyInSimulation = np.inf beliefUpdateFrequencyInSimulation = np.inf updateBeliefAndAttentionInSimulation = ba.UpdateBeliefAndAttentionState(attention, computePosterior, attentionSwitch, transferMultiAgentStatesToPositionDF, attentionSwitchFrequencyInSimulation, beliefUpdateFrequencyInSimulation) attentionSwitchFrequencyInPlay = int(0.6 * numMDPTimeStepPerSecond) beliefUpdateFrequencyInPlay = int(0.2 * numMDPTimeStepPerSecond) updateBeliefAndAttentionInPlay = ba.UpdateBeliefAndAttentionState(attention, computePosterior, attentionSwitch, transferMultiAgentStatesToPositionDF, attentionSwitchFrequencyInPlay, beliefUpdateFrequencyInPlay) updatePhysicalStateByBeliefFrequencyInSimulationRoot = int(0.6 * numMDPTimeStepPerSecond) updatePhysicalStateByBeliefInSimulationRoot = ba.UpdatePhysicalStateImagedByBelief(updatePhysicalStateByBeliefFrequencyInSimulationRoot) updatePhysicalStateByBeliefFrequencyInSimulation = np.inf updatePhysicalStateByBeliefInSimulation = ba.UpdatePhysicalStateImagedByBelief(updatePhysicalStateByBeliefFrequencyInSimulation) updatePhysicalStateByBeliefFrequencyInPlay = np.inf updatePhysicalStateByBeliefInPlay = ba.UpdatePhysicalStateImagedByBelief(updatePhysicalStateByBeliefFrequencyInPlay) transitionFunctionInSimulation = env.TransitionFunction(resetPhysicalState, resetBeliefAndAttention, updatePhysicalState, transiteStateWithoutActionChangeInSimulation, updateBeliefAndAttentionInSimulation, updatePhysicalStateByBeliefInSimulation) transitionFunctionInPlay = env.TransitionFunction(resetPhysicalState, resetBeliefAndAttention, updatePhysicalState, transiteStateWithoutActionChangeInPlay, updateBeliefAndAttentionInPlay, updatePhysicalStateByBeliefInPlay) maxRollOutSteps = 5 aliveBouns = 1/maxRollOutSteps deathPenalty = -1 rewardFunction = reward.RewardFunctionTerminalPenalty(sheepId, aliveBouns, deathPenalty, isTerminal) cInit = 1 cBase = 100 calculateScore = CalculateScore(cInit, cBase) selectChild = SelectChild(calculateScore) initializeChildren = InitializeChildren(actionSpace, transitionFunctionInSimulation, getActionPrior) expand = Expand(isTerminal, initializeChildren) rolloutPolicy = lambda state: actionSpace[np.random.choice(range(numActionSpace))] rollout = RollOut(rolloutPolicy, maxRollOutSteps, transitionFunctionInSimulation, rewardFunction, isTerminal) numActionPlaned = 1 selectAction = SelectAction(numActionPlaned, actionSpace) numSimulations = int(numTotalSimulationTimes/numTree) #sheepColorInMcts = np.array([0, 255, 0]) #wolfColorInMcts = np.array([255, 0, 0]) #distractorColorInMcts = np.array([255, 255, 255]) #saveImageMCTS = True #mctsRender = env.MctsRender(numAgent, screen, xBoundary[1], yBoundary[1], screenColor, sheepColorInMcts, wolfColorInMcts, distractorColorInMcts, circleSize, saveImageMCTS, saveImageFile) mctsRenderOn = False mctsRender = None pg.init() mcts = MCTS(numSimulations, selectChild, expand, rollout, backup, selectAction, mctsRender, mctsRenderOn) maxRunningSteps = int(25 * numMDPTimeStepPerSecond) makeDiffSimulationRoot = MakeDiffSimulationRoot(isTerminal, updatePhysicalStateByBeliefInSimulationRoot) runMCTSTrjactory = RunMCTSTrjactory(maxRunningSteps, numTree, numActionPlaned, sheepActionUpdateFrequency, transitionFunctionInPlay, isTerminal, makeDiffSimulationRoot, render) rootAction = actionSpace[np.random.choice(range(numActionSpace))] numTrial = 15 print(attentionLimitation, attentionMinDistance/distanceToVisualDegreeRatio, attentionMaxDistance/distanceToVisualDegreeRatio) trajectories = [runMCTSTrjactory(mcts) for trial in range(numTrial)] savePath = getSavePath({'chasingSubtlety': chasingSubtlety, 'subIndex': subIndex}) tsl.saveToPickle(trajectories, savePath) meanEscape = np.mean([1 if len(trajectory) >= (maxRunningSteps - 1) else 0 for trajectory in trajectories]) meanEscapeOnConditions.update({chasingSubtlety: meanEscape}) print(meanEscapeOnConditions) allResults.append(meanEscapeOnConditions) results = pd.DataFrame(allResults) getCSVSavePath = self.getCSVSavePathByCondition(condition) csvSavePath = getCSVSavePath({}) results.to_csv(csvSavePath)