def __call__(self, condition): getSavePath = self.getTrajectorySavePathByCondition(condition) attentionType = condition['attType'] alpha = condition['alpha'] C = condition['C'] minAttentionDistance = condition['minAttDist'] rangeAttention = condition['rangeAtt'] numTree = condition['numTrees'] numSimulations = condition['numSim'] actionRatio = condition['actRatio'] cBase = condition['cBase'] burnTime = condition['burnTime'] softParaForIdentity = condition['softId'] softParaForSubtlety = condition['softSubtlety'] damp = condition['damp'] actionCost = condition['actCost'] numSub = 10 allIdentityResults = [] allPerceptionResults = [] allActionResults = [] allVelDiffResults = [] allResults = [] possibleTrialSubtleties = [3.3, 1.83, 0.01]#[500.0, 3.3, 1.83, 0.92, 0.01] for subIndex in range(numSub): meanIdentiyOnConditions = {} meanPerceptionOnConditions = {} meanActionOnConditions = {} meanVelDiffOnConditions = {} meanEscapeOnConditions = {} for chasingSubtlety in possibleTrialSubtleties: print(numTree, chasingSubtlety, numSimulations, attentionType) numAgent = 25 sheepId = 0 suspectorIds = list(range(1, numAgent)) resetWolfIdAndSubtlety = ag.ResetWolfIdAndSubtlety(suspectorIds, [chasingSubtlety]) distanceToVisualDegreeRatio = 20 minInitSheepWolfDistance = 9 * distanceToVisualDegreeRatio minInitSheepDistractorDistance = 2.5 * distanceToVisualDegreeRatio # no distractor in killzone when init isLegalInitPositions = ag.IsLegalInitPositions(sheepId, minInitSheepWolfDistance, minInitSheepDistractorDistance) xBoundary = [0, 640] yBoundary = [0, 480] resetAgentPositions = ag.ResetAgentPositions(xBoundary, yBoundary, numAgent, isLegalInitPositions) resetPhysicalState = ag.ResetPhysicalState(sheepId, numAgent, resetAgentPositions, resetWolfIdAndSubtlety) numFramePerSecond = 20 numMDPTimeStepPerSecond = 5 numFrameWithoutActionChange = int(numFramePerSecond/numMDPTimeStepPerSecond) sheepActionUpdateFrequency = 1 minSheepSpeed = int(17.4 * distanceToVisualDegreeRatio/numFramePerSecond) maxSheepSpeed = int(23.2 * distanceToVisualDegreeRatio/numFramePerSecond) warmUpTimeSteps = int(10 * numMDPTimeStepPerSecond) sheepPolicy = ag.SheepPolicy(sheepActionUpdateFrequency, minSheepSpeed, maxSheepSpeed, warmUpTimeSteps, burnTime, damp) wolfActionUpdateFrequency = int(0.2 * numMDPTimeStepPerSecond) minWolfSpeed = int(8.7 * distanceToVisualDegreeRatio/numFramePerSecond) maxWolfSpeed = int(14.5 * distanceToVisualDegreeRatio/numFramePerSecond) wolfPolicy = ag.WolfPolicy(wolfActionUpdateFrequency, minWolfSpeed, maxWolfSpeed, warmUpTimeSteps) distractorActionUpdateFrequency = int(0.2 * numMDPTimeStepPerSecond) minDistractorSpeed = int(8.7 * distanceToVisualDegreeRatio/numFramePerSecond) maxDistractorSpeed = int(14.5 * distanceToVisualDegreeRatio/numFramePerSecond) distractorPolicy = ag.DistractorPolicy(distractorActionUpdateFrequency, minDistractorSpeed, maxDistractorSpeed, warmUpTimeSteps) preparePolicy = ag.PreparePolicy(sheepId, numAgent, sheepPolicy, wolfPolicy, distractorPolicy) updatePhysicalState = ag.UpdatePhysicalState(sheepId, numAgent, preparePolicy) xBoundary = [0, 640] yBoundary = [0, 480] checkBoundaryAndAdjust = ag.CheckBoundaryAndAdjust(xBoundary, yBoundary) transiteMultiAgentMotion = ag.TransiteMultiAgentMotion(checkBoundaryAndAdjust) minDistance = 2.5 * distanceToVisualDegreeRatio isTerminal = env.IsTerminal(sheepId, minDistance) # screen = pg.display.set_mode([xBoundary[1], yBoundary[1]]) # screenColor = np.array([0, 0, 0]) # sheepColor = np.array([0, 255, 0]) # wolfColor = np.array([255, 0, 0]) # circleSize = 10 # saveImage = True # saveImageFile = 'image3' # render = env.Render(numAgent, screen, xBoundary[1], yBoundary[1], screenColor, sheepColor, wolfColor, circleSize, saveImage, saveImageFile, isTerminal) render = None renderOnInSimulation = False transiteStateWithoutActionChangeInSimulation = env.TransiteStateWithoutActionChange(numFrameWithoutActionChange, isTerminal, transiteMultiAgentMotion, render, renderOnInSimulation) renderOnInPlay = False transiteStateWithoutActionChangeInPlay = env.TransiteStateWithoutActionChange(numFrameWithoutActionChange, isTerminal, transiteMultiAgentMotion, render, renderOnInPlay) if attentionType == 'idealObserver': attentionLimitation= 1 precisionPerSlot=500.0 precisionForUntracked=500.0 memoryratePerSlot=1.0 memoryrateForUntracked=1.0 if attentionType == 'preAttention': attentionLimitation= 1 precisionPerSlot=2.5 precisionForUntracked=2.5 memoryratePerSlot=0.45 memoryrateForUntracked=0.45 if attentionType == 'attention3': attentionLimitation= 3 precisionPerSlot=8.0 precisionForUntracked=0.01 memoryratePerSlot=0.7 memoryrateForUntracked=0.01 if attentionType == 'hybrid3': attentionLimitation= 3 precisionPerSlot=8.0 precisionForUntracked=2.5 memoryratePerSlot=0.7 memoryrateForUntracked=0.45 if attentionType == 'attention4': attentionLimitation= 4 precisionPerSlot=8.0 precisionForUntracked=0.01 memoryratePerSlot=0.7 memoryrateForUntracked=0.01 if attentionType == 'hybrid4': attentionLimitation= 4 precisionPerSlot=8.0 precisionForUntracked=2.5 memoryratePerSlot=0.7 memoryrateForUntracked=0.45 if attentionType == 'preAttentionMem0.25': attentionLimitation= 1 precisionPerSlot=2.5 precisionForUntracked=2.5 memoryratePerSlot=0.25 memoryrateForUntracked=0.25 if attentionType == 'preAttentionMem0.65': attentionLimitation= 1 precisionPerSlot=2.5 precisionForUntracked=2.5 memoryratePerSlot=0.65 memoryrateForUntracked=0.65 if attentionType == 'preAttentionPre0.5': attentionLimitation= 1 precisionPerSlot=0.5 precisionForUntracked=0.5 memoryratePerSlot=0.45 memoryrateForUntracked=0.45 if attentionType == 'preAttentionPre4.5': attentionLimitation= 1 precisionPerSlot=4.5 precisionForUntracked=4.5 memoryratePerSlot=0.45 memoryrateForUntracked=0.45 attention = Attention.AttentionToPrecisionAndDecay(precisionPerSlot, precisionForUntracked, memoryratePerSlot, memoryrateForUntracked) transferMultiAgentStatesToPositionDF = ba.TransferMultiAgentStatesToPositionDF(numAgent) possibleSubtleties = [500.0, 11.0, 3.3, 1.83, 0.92, 0.31, 0.01] resetBeliefAndAttention = ba.ResetBeliefAndAttention(sheepId, suspectorIds, possibleSubtleties, attentionLimitation, transferMultiAgentStatesToPositionDF, attention) maxAttentionDistance = minAttentionDistance + rangeAttention attentionMinDistance = minAttentionDistance * distanceToVisualDegreeRatio attentionMaxDistance = maxAttentionDistance * distanceToVisualDegreeRatio numStandardErrorInDistanceRange = 4 calDistancePriorOnAttentionSlot = Attention.CalDistancePriorOnAttentionSlot(attentionMinDistance, attentionMaxDistance, numStandardErrorInDistanceRange) attentionSwitch = Attention.AttentionSwitch(attentionLimitation, calDistancePriorOnAttentionSlot) computePosterior = calPosterior.CalPosteriorLog(minDistance) print(attentionLimitation, attentionMinDistance/distanceToVisualDegreeRatio, attentionMaxDistance/distanceToVisualDegreeRatio) attentionSwitchFrequencyInSimulation = np.inf beliefUpdateFrequencyInSimulation = np.inf updateBeliefAndAttentionInSimulation = ba.UpdateBeliefAndAttentionState(attention, computePosterior, attentionSwitch, transferMultiAgentStatesToPositionDF, attentionSwitchFrequencyInSimulation, beliefUpdateFrequencyInSimulation, burnTime) attentionSwitchFrequencyInPlay = int(0.6 * numMDPTimeStepPerSecond) beliefUpdateFrequencyInPlay = int(0.6 * numMDPTimeStepPerSecond) updateBeliefAndAttentionInPlay = ba.UpdateBeliefAndAttentionState(attention, computePosterior, attentionSwitch, transferMultiAgentStatesToPositionDF, attentionSwitchFrequencyInPlay, beliefUpdateFrequencyInPlay, burnTime) updatePhysicalStateByBeliefFrequencyInSimulationRoot = int(0.2 * numMDPTimeStepPerSecond) updatePhysicalStateByBeliefInSimulationRoot = ba.UpdatePhysicalStateImagedByBelief(updatePhysicalStateByBeliefFrequencyInSimulationRoot, softParaForIdentity, softParaForSubtlety) reUpdatePhysicalStateByBeliefInSimulationRoot = ba.UpdatePhysicalStateImagedByBelief(updatePhysicalStateByBeliefFrequencyInSimulationRoot, softParaForIdentity = 1, softParaForSubtlety = 1) updatePhysicalStateByBeliefFrequencyInSimulation = np.inf #updatePhysicalStateByBeliefInSimulation = ba.UpdatePhysicalStateImagedByBelief(updatePhysicalStateByBeliefFrequencyInSimulation, # softParaForIdentity, softParaForSubtlety) updatePhysicalStateByBeliefInSimulation = lambda state: state updatePhysicalStateByBeliefFrequencyInPlay = np.inf #updatePhysicalStateByBeliefInPlay = ba.UpdatePhysicalStateImagedByBelief(updatePhysicalStateByBeliefFrequencyInPlay, # softParaForIdentity, softParaForSubtlety) updatePhysicalStateByBeliefInPlay = lambda state: state transitionFunctionInSimulation = env.TransitionFunction(resetPhysicalState, resetBeliefAndAttention, updatePhysicalState, transiteStateWithoutActionChangeInSimulation, updateBeliefAndAttentionInSimulation, updatePhysicalStateByBeliefInSimulation) transitionFunctionInPlay = env.TransitionFunction(resetPhysicalState, resetBeliefAndAttention, updatePhysicalState, transiteStateWithoutActionChangeInPlay, updateBeliefAndAttentionInPlay, updatePhysicalStateByBeliefInPlay) numActionSpace = 8 actionInterval = int(360/(numActionSpace)) actionMagnitude = actionRatio * minSheepSpeed * numFramePerSecond actionSpaceFull = [(np.cos(degreeInPolar) * actionMagnitude, np.sin(degreeInPolar) * actionMagnitude) for degreeInPolar in np.arange(0, 360, actionInterval)/180 * math.pi] actionSpaceHalf = [(np.cos(degreeInPolar) * actionMagnitude * 0.5, np.sin(degreeInPolar) * actionMagnitude * 0.5) for degreeInPolar in np.arange(0, 360, actionInterval)/180 * math.pi] actionSpace = [(0, 0)] + actionSpaceFull# + actionSpaceHalf getActionPrior = lambda state : {action: 1/len(actionSpace) for action in actionSpace} maxRollOutSteps = 5 aliveBouns = 0.2 * 0 deathPenalty = -1 rewardFunction = reward.RewardFunctionTerminalPenalty(sheepId, aliveBouns, actionCost, deathPenalty, isTerminal, actionSpace) rewardRollout = lambda state, action, nextState: rewardFunction(state, action) cInit = 1 #cBase = 50 scoreChild = ScoreChild(cInit, cBase) selectAction = SelectAction(scoreChild) selectNextState = SelectNextState(selectAction) initializeChildren = InitializeChildren(actionSpace, transitionFunctionInSimulation, getActionPrior) expand = Expand(isTerminal, initializeChildren) pWidening = PWidening(alpha, C) expandNewState = ExpandNextState(transitionFunctionInSimulation, pWidening) rolloutPolicy = lambda state: actionSpace[np.random.choice(range(numActionSpace))] rolloutHeuristic = lambda state: 0 estimateValue = RollOut(rolloutPolicy, maxRollOutSteps, transitionFunctionInSimulation, rewardRollout, isTerminal, rolloutHeuristic) numActionPlaned = 1 outputAction = OutputAction(numActionPlaned, actionSpace) #numSimulations = int(numTotalSimulationTimes/numTree) #sheepColorInMcts = np.array([0, 255, 0]) #wolfColorInMcts = np.array([255, 0, 0]) #distractorColorInMcts = np.array([255, 255, 255]) #saveImageMCTS = True #mctsRender = env.MctsRender(numAgent, screen, xBoundary[1], yBoundary[1], screenColor, sheepColorInMcts, wolfColorInMcts, distractorColorInMcts, circleSize, saveImageMCTS, saveImageFile) #mctsRenderOn = False #mctsRender = None #pg.init() #mcts = MCTS(numSimulations, selectChild, expand, rollout, backup, selectAction, mctsRender, mctsRenderOn) pwMultipleTrees = PWMultipleTrees(numSimulations, selectAction, selectNextState, expand, expandNewState, estimateValue, backup, outputAction) maxRunningSteps = int(25 * numMDPTimeStepPerSecond) makeDiffSimulationRoot = MakeDiffSimulationRoot(isTerminal, updatePhysicalStateByBeliefInSimulationRoot, reUpdatePhysicalStateByBeliefInSimulationRoot) runMCTSTrjactory = RunMCTSTrjactory(maxRunningSteps, numTree, numActionPlaned, sheepActionUpdateFrequency, transitionFunctionInPlay, isTerminal, makeDiffSimulationRoot, render) rootAction = actionSpace[np.random.choice(range(numActionSpace))] numTrial = 10 trajectories = [runMCTSTrjactory(pwMultipleTrees) for trial in range(numTrial)] savePath = getSavePath({'chasingSubtlety': chasingSubtlety, 'subIndex': subIndex}) tsl.saveToPickle(trajectories, savePath) getCSVSavePath = self.getCSVSavePathByCondition(condition) startStatsIndex = 1 def getTrueWolfIdAcc(trajectory): AccTrial = [] for timeStepIndex in range(len(trajectory) - 2): timeStep = trajectory[timeStepIndex] wolfId = timeStep[0][0][3][0] wolfSubtlety = timeStep[0][0][3][1] #print(wolfId, '**', wolfIdInEach) if timeStepIndex >= startStatsIndex: IdAcc = np.mean([int(IdAndSubtlety[0] == wolfId) for IdAndSubtlety in timeStep[5]]) AccTrial.append(IdAcc) meanAcc = np.mean(AccTrial) return meanAcc meanIdentiy = np.mean([getTrueWolfIdAcc(trajectory) for trajectory in trajectories]) meanIdentiyOnConditions.update({chasingSubtlety: meanIdentiy}) def getTrueWolfIdSubtletyAcc(trajectory): AccTrial = [] for timeStepIndex in range(len(trajectory) - 2): timeStep = trajectory[timeStepIndex] wolfId = timeStep[0][0][3][0] wolfSubtlety = timeStep[0][0][3][1] #print(wolfId, '**', wolfIdInEach) if timeStepIndex >= startStatsIndex: IdAndSubtletyAcc = np.mean([int((IdAndSubtlety[0] == wolfId) and (IdAndSubtlety[1] == wolfSubtlety)) for IdAndSubtlety in timeStep[5]]) AccTrial.append(IdAndSubtletyAcc) meanAcc = np.mean(AccTrial) return meanAcc meanPerception = np.mean([getTrueWolfIdSubtletyAcc(trajectory) for trajectory in trajectories]) meanPerceptionOnConditions.update({chasingSubtlety: meanPerception}) def getActionDeviationLevel(trajectory): AccTrial = [] for timeStepIndex in range(len(trajectory) - 2): timeStep = trajectory[timeStepIndex] actionReal = np.array(timeStep[1]) actionOnTruth = np.array(timeStep[4]) if timeStepIndex >= startStatsIndex: deviateLevel = round(agf.computeAngleBetweenVectors(actionReal, actionOnTruth) / (math.pi / 4)) AccTrial.append(deviateLevel) meanAcc = np.mean(AccTrial) return meanAcc meanAction = np.mean([getActionDeviationLevel(trajectory) for trajectory in trajectories]) meanActionOnConditions.update({chasingSubtlety: meanAction}) def getVelocityDiff(trajectory): AccTrial = [] for timeStepIndex in range(len(trajectory) - 2): timeStep = trajectory[timeStepIndex] velReal = np.array(timeStep[0][0][0][1][0]) velWithActionOnTruth = np.array(timeStep[2][1][0]) velWithActionOppo = np.array(timeStep[3][1][0]) if timeStepIndex >= startStatsIndex: velDiffNormWithActionOnTruth = np.linalg.norm((velReal - velWithActionOnTruth)) velDiffNormWithActionOppo = np.linalg.norm((velReal - velWithActionOppo)) velDiffRatio = 1.0 * velDiffNormWithActionOnTruth / velDiffNormWithActionOppo AccTrial.append(velDiffRatio) meanAcc = np.mean(AccTrial) return meanAcc meanVelDiff = np.mean([getVelocityDiff(trajectory) for trajectory in trajectories]) meanVelDiffOnConditions.update({chasingSubtlety: meanVelDiff}) getEscapeAcc = lambda trajectory: int(len(trajectory) >= (maxRunningSteps - 2)) meanEscape = np.mean([getEscapeAcc(trajectory) for trajectory in trajectories]) meanEscapeOnConditions.update({chasingSubtlety: meanEscape}) allResults.append(meanEscapeOnConditions) results = pd.DataFrame(allResults) escapeCSVSavePath = getCSVSavePath({'measure': 'escape'}) results.to_csv(escapeCSVSavePath) allIdentityResults.append(meanIdentiyOnConditions) identityResults = pd.DataFrame(allIdentityResults) identityCSVSavePath = getCSVSavePath({'measure': 'identity'}) identityResults.to_csv(identityCSVSavePath) allPerceptionResults.append(meanPerceptionOnConditions) perceptionResults = pd.DataFrame(allPerceptionResults) perceptionCSVSavePath = getCSVSavePath({'measure': 'percetion'}) perceptionResults.to_csv(perceptionCSVSavePath) allActionResults.append(meanActionOnConditions) actionResults = pd.DataFrame(allActionResults) actionCSVSavePath = getCSVSavePath({'measure': 'action'}) actionResults.to_csv(actionCSVSavePath) allVelDiffResults.append(meanVelDiffOnConditions) velDiffResults = pd.DataFrame(allVelDiffResults) velDiffCSVSavePath = getCSVSavePath({'measure': 'velDiff'}) velDiffResults.to_csv(velDiffCSVSavePath)
def __call__(self, condition): getSavePath = self.getTrajectorySavePathByCondition(condition) attentionType = condition['attentionType'] alpha = condition['alphaForStateWidening'] C = condition['CForStateWidening'] cBase = condition['cBase'] numTree = condition['numTrees'] numSimulations = condition['numSimulationTimes'] actionRatio = condition['actionRatio'] burnTime = condition['burnTime'] damp = condition['damp'] actionCost = condition['actionCost'] numSub = 10 allResults = [] possibleTrialSubtleties = [500.0, 3.3, 1.83, 0.92, 0.001] for subIndex in range(numSub): meanEscapeOnConditions = {} for chasingSubtlety in possibleTrialSubtleties: print(numTree, chasingSubtlety, numSimulations, attentionType) numAgent = 25 sheepId = 0 suspectorIds = list(range(1, numAgent)) resetWolfIdAndSubtlety = ag.ResetWolfIdAndSubtlety( suspectorIds, [chasingSubtlety]) distanceToVisualDegreeRatio = 20 minInitSheepWolfDistance = 9 * distanceToVisualDegreeRatio minInitSheepDistractorDistance = 2.5 * distanceToVisualDegreeRatio # no distractor in killzone when init isLegalInitPositions = ag.IsLegalInitPositions( sheepId, minInitSheepWolfDistance, minInitSheepDistractorDistance) xBoundary = [0, 640] yBoundary = [0, 480] resetAgentPositions = ag.ResetAgentPositions( xBoundary, yBoundary, numAgent, isLegalInitPositions) resetPhysicalState = ag.ResetPhysicalState( sheepId, numAgent, resetAgentPositions, resetWolfIdAndSubtlety) numFramePerSecond = 20 numMDPTimeStepPerSecond = 5 numFrameWithoutActionChange = int(numFramePerSecond / numMDPTimeStepPerSecond) sheepActionUpdateFrequency = 1 minSheepSpeed = int(17.4 * distanceToVisualDegreeRatio / numFramePerSecond) maxSheepSpeed = int(23.2 * distanceToVisualDegreeRatio / numFramePerSecond) warmUpTimeSteps = int(10 * numMDPTimeStepPerSecond) sheepPolicy = ag.SheepPolicy(sheepActionUpdateFrequency, minSheepSpeed, maxSheepSpeed, warmUpTimeSteps, burnTime, damp) wolfActionUpdateFrequency = int(0.2 * numMDPTimeStepPerSecond) minWolfSpeed = int(8.7 * distanceToVisualDegreeRatio / numFramePerSecond) maxWolfSpeed = int(14.5 * distanceToVisualDegreeRatio / numFramePerSecond) wolfPolicy = ag.WolfPolicy(wolfActionUpdateFrequency, minWolfSpeed, maxWolfSpeed, warmUpTimeSteps) distractorActionUpdateFrequency = int(0.2 * numMDPTimeStepPerSecond) minDistractorSpeed = int(8.7 * distanceToVisualDegreeRatio / numFramePerSecond) maxDistractorSpeed = int(14.5 * distanceToVisualDegreeRatio / numFramePerSecond) distractorPolicy = ag.DistractorPolicy( distractorActionUpdateFrequency, minDistractorSpeed, maxDistractorSpeed, warmUpTimeSteps) preparePolicy = ag.PreparePolicy(sheepId, numAgent, sheepPolicy, wolfPolicy, distractorPolicy) updatePhysicalState = ag.UpdatePhysicalState( sheepId, numAgent, preparePolicy) xBoundary = [0, 640] yBoundary = [0, 480] checkBoundaryAndAdjust = ag.CheckBoundaryAndAdjust( xBoundary, yBoundary) transiteMultiAgentMotion = ag.TransiteMultiAgentMotion( checkBoundaryAndAdjust) minDistance = 2.5 * distanceToVisualDegreeRatio isTerminal = env.IsTerminal(sheepId, minDistance) # screen = pg.display.set_mode([xBoundary[1], yBoundary[1]]) # screenColor = np.array([0, 0, 0]) # sheepColor = np.array([0, 255, 0]) # wolfColor = np.array([255, 0, 0]) # circleSize = 10 # saveImage = True # saveImageFile = 'image3' # render = env.Render(numAgent, screen, xBoundary[1], yBoundary[1], screenColor, sheepColor, wolfColor, circleSize, saveImage, saveImageFile, isTerminal) render = None renderOnInSimulation = False transiteStateWithoutActionChangeInSimulation = env.TransiteStateWithoutActionChange( numFrameWithoutActionChange, isTerminal, transiteMultiAgentMotion, render, renderOnInSimulation) renderOnInPlay = False transiteStateWithoutActionChangeInPlay = env.TransiteStateWithoutActionChange( numFrameWithoutActionChange, isTerminal, transiteMultiAgentMotion, render, renderOnInPlay) if attentionType == 'idealObserver': attentionLimitation = 1 precisionPerSlot = 500.0 precisionForUntracked = 500.0 memoryratePerSlot = 1.0 memoryrateForUntracked = 1.0 if attentionType == 'preAttention': attentionLimitation = 1 precisionPerSlot = 2.5 precisionForUntracked = 2.5 memoryratePerSlot = 0.45 memoryrateForUntracked = 0.45 if attentionType == 'attention3': attentionLimitation = 3 precisionPerSlot = 8.0 precisionForUntracked = 0.01 memoryratePerSlot = 0.7 memoryrateForUntracked = 0.01 if attentionType == 'hybrid3': attentionLimitation = 3 precisionPerSlot = 8.0 precisionForUntracked = 2.5 memoryratePerSlot = 0.7 memoryrateForUntracked = 0.45 if attentionType == 'attention4': attentionLimitation = 4 precisionPerSlot = 8.0 precisionForUntracked = 0.01 memoryratePerSlot = 0.7 memoryrateForUntracked = 0.01 if attentionType == 'hybrid4': attentionLimitation = 4 precisionPerSlot = 8.0 precisionForUntracked = 2.5 memoryratePerSlot = 0.7 memoryrateForUntracked = 0.45 attention = Attention.AttentionToPrecisionAndDecay( precisionPerSlot, precisionForUntracked, memoryratePerSlot, memoryrateForUntracked) transferMultiAgentStatesToPositionDF = ba.TransferMultiAgentStatesToPositionDF( numAgent) possibleSubtleties = [ 500.0, 11.0, 3.3, 1.83, 0.92, 0.31, 0.001 ] resetBeliefAndAttention = ba.ResetBeliefAndAttention( sheepId, suspectorIds, possibleSubtleties, attentionLimitation, transferMultiAgentStatesToPositionDF, attention) minAttentionDistance = 40.0 rangeAttention = 10.0 maxAttentionDistance = minAttentionDistance + rangeAttention attentionMinDistance = minAttentionDistance * distanceToVisualDegreeRatio attentionMaxDistance = maxAttentionDistance * distanceToVisualDegreeRatio numStandardErrorInDistanceRange = 4 calDistancePriorOnAttentionSlot = Attention.CalDistancePriorOnAttentionSlot( attentionMinDistance, attentionMaxDistance, numStandardErrorInDistanceRange) attentionSwitch = Attention.AttentionSwitch( attentionLimitation, calDistancePriorOnAttentionSlot) computePosterior = calPosterior.CalPosteriorLog(minDistance) attentionSwitchFrequencyInSimulation = np.inf beliefUpdateFrequencyInSimulation = np.inf updateBeliefAndAttentionInSimulation = ba.UpdateBeliefAndAttentionState( attention, computePosterior, attentionSwitch, transferMultiAgentStatesToPositionDF, attentionSwitchFrequencyInSimulation, beliefUpdateFrequencyInSimulation, burnTime) attentionSwitchFrequencyInPlay = int(0.6 * numMDPTimeStepPerSecond) beliefUpdateFrequencyInPlay = int(0.2 * numMDPTimeStepPerSecond) updateBeliefAndAttentionInPlay = ba.UpdateBeliefAndAttentionState( attention, computePosterior, attentionSwitch, transferMultiAgentStatesToPositionDF, attentionSwitchFrequencyInPlay, beliefUpdateFrequencyInPlay, burnTime) updatePhysicalStateByBeliefFrequencyInSimulationRoot = int( 0.6 * numMDPTimeStepPerSecond) updatePhysicalStateByBeliefInSimulationRoot = ba.UpdatePhysicalStateImagedByBelief( updatePhysicalStateByBeliefFrequencyInSimulationRoot) updatePhysicalStateByBeliefFrequencyInSimulation = np.inf updatePhysicalStateByBeliefInSimulation = ba.UpdatePhysicalStateImagedByBelief( updatePhysicalStateByBeliefFrequencyInSimulation) updatePhysicalStateByBeliefFrequencyInPlay = np.inf updatePhysicalStateByBeliefInPlay = ba.UpdatePhysicalStateImagedByBelief( updatePhysicalStateByBeliefFrequencyInPlay) transitionFunctionInSimulation = env.TransitionFunction( resetPhysicalState, resetBeliefAndAttention, updatePhysicalState, transiteStateWithoutActionChangeInSimulation, updateBeliefAndAttentionInSimulation, updatePhysicalStateByBeliefInSimulation) transitionFunctionInPlay = env.TransitionFunction( resetPhysicalState, resetBeliefAndAttention, updatePhysicalState, transiteStateWithoutActionChangeInPlay, updateBeliefAndAttentionInPlay, updatePhysicalStateByBeliefInPlay) maxRollOutSteps = 5 aliveBouns = 1 / maxRollOutSteps deathPenalty = -1 rewardFunction = reward.RewardFunctionTerminalPenalty( sheepId, aliveBouns, actionCost, deathPenalty, isTerminal) rewardRollout = lambda state, action, nextState: rewardFunction( state, action) numActionSpace = 8 actionInterval = int(360 / (numActionSpace)) actionMagnitude = actionRatio * minSheepSpeed actionSpace = [ (0, 0) ] + [(np.cos(degreeInPolar) * actionMagnitude, np.sin(degreeInPolar) * actionMagnitude) for degreeInPolar in np.arange(0, 360, actionInterval) / 180 * math.pi] getActionPrior = lambda state: { action: 1 / len(actionSpace) for action in actionSpace } cInit = 1 #cBase = 50 scoreChild = ScoreChild(cInit, cBase) selectAction = SelectAction(scoreChild) selectNextState = SelectNextState(selectAction) initializeChildren = InitializeChildren( actionSpace, transitionFunctionInSimulation, getActionPrior) expand = Expand(isTerminal, initializeChildren) pWidening = PWidening(alpha, C) expandNewState = ExpandNextState( transitionFunctionInSimulation, pWidening) rolloutPolicy = lambda state: actionSpace[np.random.choice( range(numActionSpace))] rolloutHeuristic = lambda state: 0 estimateValue = RollOut(rolloutPolicy, maxRollOutSteps, transitionFunctionInSimulation, rewardRollout, isTerminal, rolloutHeuristic) numActionPlaned = 1 outputAction = OutputAction(numActionPlaned, actionSpace) #numSimulations = int(numTotalSimulationTimes/numTree) #sheepColorInMcts = np.array([0, 255, 0]) #wolfColorInMcts = np.array([255, 0, 0]) #distractorColorInMcts = np.array([255, 255, 255]) #saveImageMCTS = True #mctsRender = env.MctsRender(numAgent, screen, xBoundary[1], yBoundary[1], screenColor, sheepColorInMcts, wolfColorInMcts, distractorColorInMcts, circleSize, saveImageMCTS, saveImageFile) #mctsRenderOn = False #mctsRender = None #pg.init() #mcts = MCTS(numSimulations, selectChild, expand, rollout, backup, selectAction, mctsRender, mctsRenderOn) pwMultipleTrees = PWMultipleTrees(numSimulations, selectAction, selectNextState, expand, expandNewState, estimateValue, backup, outputAction) maxRunningSteps = int(25 * numMDPTimeStepPerSecond) makeDiffSimulationRoot = MakeDiffSimulationRoot( isTerminal, updatePhysicalStateByBeliefInSimulationRoot) runMCTSTrjactory = RunMCTSTrjactory( maxRunningSteps, numTree, numActionPlaned, sheepActionUpdateFrequency, transitionFunctionInPlay, isTerminal, makeDiffSimulationRoot, render) rootAction = actionSpace[np.random.choice( range(numActionSpace))] numTrial = 15 print(attentionLimitation, attentionMinDistance / distanceToVisualDegreeRatio, attentionMaxDistance / distanceToVisualDegreeRatio) trajectories = [ runMCTSTrjactory(pwMultipleTrees) for trial in range(numTrial) ] savePath = getSavePath({ 'chasingSubtlety': chasingSubtlety, 'subIndex': subIndex }) tsl.saveToPickle(trajectories, savePath) meanEscape = np.mean([ 1 if len(trajectory) >= (maxRunningSteps - 1) else 0 for trajectory in trajectories ]) meanEscapeOnConditions.update({chasingSubtlety: meanEscape}) print(meanEscapeOnConditions) allResults.append(meanEscapeOnConditions) results = pd.DataFrame(allResults) getCSVSavePath = self.getCSVSavePathByCondition(condition) csvSavePath = getCSVSavePath({}) results.to_csv(csvSavePath)