def composeFowardOneTimeStepWithRandomSubtlety(numOfAgent, idx): # experiment parameter for env numMDPTimeStepPerSecond = 5 # change direction every 200ms distanceToVisualDegreeRatio = 20 minSheepSpeed = int(17.4 * distanceToVisualDegreeRatio / numMDPTimeStepPerSecond) maxSheepSpeed = int(23.2 * distanceToVisualDegreeRatio / numMDPTimeStepPerSecond) warmUpTimeSteps = 10 * numMDPTimeStepPerSecond # 10s to warm up prepareSheepVelocity = PrepareSheepVelocity(minSheepSpeed, maxSheepSpeed, warmUpTimeSteps) minWolfSpeed = int(8.7 * distanceToVisualDegreeRatio / numMDPTimeStepPerSecond) maxWolfSpeed = int(14.5 * distanceToVisualDegreeRatio / numMDPTimeStepPerSecond) wolfSubtleties = [500, 11, 3.3, 1.83, 0.92, 0.31, 0.001] # 0, 30, 60, .. 180 if idx == -1: initWolfSubtlety = np.random.choice(wolfSubtleties) else: initWolfSubtlety = wolfSubtleties[idx] prepareWolfVelocity = PrepareWolfVelocity(minWolfSpeed, maxWolfSpeed, warmUpTimeSteps, initWolfSubtlety, transCartesianToPolar, transPolarToCartesian) minDistractorSpeed = int(8.7 * distanceToVisualDegreeRatio / numMDPTimeStepPerSecond) maxDistractorSpeed = int(14.5 * distanceToVisualDegreeRatio / numMDPTimeStepPerSecond) prepareDistractorVelocity = PrepareDistractorVelocity(minDistractorSpeed, maxDistractorSpeed, warmUpTimeSteps, transCartesianToPolar, transPolarToCartesian) sheepId = 0 wolfId = 1 distractorsIds = list(range(2, numOfAgent)) prepareAllAgentsVelocities = PrepareAllAgentsVelocities(sheepId, wolfId, distractorsIds, prepareSheepVelocity, prepareWolfVelocity, prepareDistractorVelocity) xBoundary = [0, 640] yBoundary = [0, 480] stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity(xBoundary, yBoundary) killzoneRadius = 2.5 * distanceToVisualDegreeRatio isTerminal = IsTerminal(sheepId, wolfId, killzoneRadius) numFramePerSecond = 30 # visual display fps numFramesToInterpolate = int( numFramePerSecond / numMDPTimeStepPerSecond - 1) # interpolate each MDP timestep to multiple frames; check terminal for each frame transitFunction = TransitWithInterpolation(initWolfSubtlety, numFramesToInterpolate, prepareAllAgentsVelocities, stayInBoundaryByReflectVelocity, isTerminal) aliveBonus = 0.01 deathPenalty = -1 rewardFunction = RewardFunctionTerminalPenalty(aliveBonus, deathPenalty, isTerminal) forwardOneStep = ForwardOneStep(transitFunction, rewardFunction) return transitFunction, rewardFunction, forwardOneStep
def main(): manipulatedVariables = OrderedDict() manipulatedVariables['numOfAgent'] = [2] levelNames = list(manipulatedVariables.keys()) levelValues = list(manipulatedVariables.values()) modelIndex = pd.MultiIndex.from_product(levelValues, names=levelNames) toSplitFrame = pd.DataFrame(index=modelIndex) productedValues = it.product( *[[(key, value) for value in values] for key, values in manipulatedVariables.items()]) parametersAllCondtion = [ dict(list(specificValueParameter)) for specificValueParameter in productedValues ] numTrajectories = 3 sampleTrajectoriesForConditions = SampleTrajectoriesForCoditions( numTrajectories, composeFowardOneTimeStepWithRandomSubtlety) trajectoriesMultipleConditions = [ sampleTrajectoriesForConditions(para) for para in parametersAllCondtion ] visualConditionIndex = 0 trajectoriesToVisualize = trajectoriesMultipleConditions[ visualConditionIndex] visualize = True if visualize: screenWidth = 640 screenHeight = 480 screen = pg.display.set_mode((screenWidth, screenHeight)) screenColor = THECOLORS['black'] xBoundary = [0, 640] yBoundary = [0, 480] lineColor = THECOLORS['white'] lineWidth = 4 drawBackground = DrawBackground(screen, screenColor, xBoundary, yBoundary, lineColor, lineWidth) numOfAgent = 2 numDistractors = numOfAgent - 2 circleColorSpace = [[0, 255, 0], [255, 0, 0] ] + [[255, 255, 255]] * numDistractors circleSize = 10 positionIndex = [0, 1] agentIdsToDraw = list(range(numOfAgent)) saveImage = False dirPYFile = os.path.dirname(__file__) imageSavePath = os.path.join(dirPYFile, '..', 'data', 'forDemo') if not os.path.exists(imageSavePath): os.makedirs(imageSavePath) FPS = 30 drawState = DrawState(FPS, screen, circleColorSpace, circleSize, agentIdsToDraw, positionIndex, saveImage, imageSavePath, drawBackground) # MDP Env xBoundary = [0, 640] yBoundary = [0, 480] stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity( xBoundary, yBoundary) distanceToVisualDegreeRatio = 20 killzoneRadius = 2.5 * distanceToVisualDegreeRatio sheepId = 0 wolfId = 1 isTerminal = IsTerminal(sheepId, wolfId, killzoneRadius) numMDPTimeStepPerSecond = 5 # change direction every 200ms numFramesToInterpolate = int( FPS / numMDPTimeStepPerSecond - 1 ) # interpolate each MDP timestep to multiple frames; check terminal for each frame interpolateStateForVisualization = InterpolateStateForVisualization( numFramesToInterpolate, stayInBoundaryByReflectVelocity, isTerminal) stateIndexInTimeStep = 0 actionIndexInTimeStep = 1 nextStateIndexInTimeStep = 2 visualizeTraj = VisualizeTraj(stateIndexInTimeStep, actionIndexInTimeStep, nextStateIndexInTimeStep, drawState, interpolateStateForVisualization) [visualizeTraj(trajectory) for trajectory in trajectoriesToVisualize]