def setUp(self): self.actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7), (0, -10), (7, -7)] self.sheepId = 0 self.wolfId = 1 self.posIndex = [0, 1] self.getPredatorPos = GetAgentPosFromState(self.wolfId, self.posIndex) self.getPreyPos = GetAgentPosFromState(self.sheepId, self.posIndex)
def setUp(self): self.actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7), (0, -10), (7, -7)] self.sheepId = 0 self.wolfId = 1 self.xPosIndex = [2, 3] self.getSheepXPos = GetAgentPosFromState(self.sheepId, self.xPosIndex) self.getWolfXPos = GetAgentPosFromState(self.wolfId, self.xPosIndex) self.killzoneRadius = 0.5 self.isTerminal = IsTerminal(self.killzoneRadius, self.getSheepXPos, self.getWolfXPos)
def testMassEffectInTransition(self, state, allAgentsActions, smallMass, largeMass): # transition function dirName = os.path.dirname(__file__) physicsDynamicsPath = os.path.join(dirName, '..', 'env', 'xmls', 'twoAgents.xml') agentsBodyMassIndex = [6, 7] physicsSmallMassModel = mujoco.load_model_from_path( physicsDynamicsPath) physicsSmallMassModel.body_mass[agentsBodyMassIndex] = [ smallMass, smallMass ] physicsLargeMassModel = mujoco.load_model_from_path( physicsDynamicsPath) physicsLargeMassModel.body_mass[agentsBodyMassIndex] = [ largeMass, largeMass ] physicsSmallMassSimulation = mujoco.MjSim(physicsSmallMassModel) physicsLargeMassSimulation = mujoco.MjSim(physicsLargeMassModel) # set_constants fit for mujoco_py version >= 2.0, no fit for 1.50 physicsSmallMassSimulation.set_constants() physicsLargeMassSimulation.set_constants() sheepId = 0 wolfId = 1 xPosIndex = [2, 3] getSheepXPos = GetAgentPosFromState(sheepId, xPosIndex) getWolfXPos = GetAgentPosFromState(wolfId, xPosIndex) killzoneRadius = 2 isTerminal = IsTerminal(killzoneRadius, getSheepXPos, getWolfXPos) numSimulationFrames = 20 transitSmallMassAgents = TransitionFunction(physicsSmallMassSimulation, isTerminal, numSimulationFrames) transitLargeMassAgents = TransitionFunction(physicsLargeMassSimulation, isTerminal, numSimulationFrames) nextStateInSmallMassTransition = transitSmallMassAgents( state, allAgentsActions) nextStateInLargeMassTransition = transitLargeMassAgents( state, allAgentsActions) stateChangeInSmallMassTransition = nextStateInSmallMassTransition - state stateChangeInLargeMassTransition = nextStateInLargeMassTransition - state trueMassRatio = smallMass / largeMass self.assertTrue( np.allclose(stateChangeInLargeMassTransition, stateChangeInSmallMassTransition * trueMassRatio))
def setUp(self): self.numOfAgent = 2 self.sheepId = 0 self.wolfId = 1 self.posIndex = [0, 1] self.xBoundary = [0, 640] self.yBoundary = [0, 480] self.minDistance = 50 self.getPreyPos = GetAgentPosFromState(self.sheepId, self.posIndex) self.getPredatorPos = GetAgentPosFromState(self.wolfId, self.posIndex) self.stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity( self.xBoundary, self.yBoundary) self.isTerminal = IsTerminal(self.getPredatorPos, self.getPreyPos, self.minDistance) self.transition = TransiteForNoPhysics( self.stayInBoundaryByReflectVelocity)
def testGetAgentPosFromState(self, agentId, posIndex, state, groundTruthAgentPos): getAgentPosFromState = GetAgentPosFromState(agentId, posIndex) agentPos = getAgentPosFromState(state) truthValue = np.array_equal(agentPos, groundTruthAgentPos) self.assertTrue(truthValue)
def setUp(self): self.modelPath = os.path.join(DIRNAME, '..', 'env', 'xmls', 'twoAgents.xml') self.model = load_model_from_path(self.modelPath) self.simulation = MjSim(self.model) self.numJointEachAgent = 2 self.numAgent = 2 self.killzoneRadius = 0.5 self.numSimulationFrames = 20 self.sheepId = 0 self.wolfId = 1 self.xPosIndex = [2, 3] self.getSheepPos = GetAgentPosFromState(self.sheepId, self.xPosIndex) self.getWolfPos = GetAgentPosFromState(self.wolfId, self.xPosIndex) self.isTerminal = IsTerminal(self.killzoneRadius, self.getSheepPos, self.getWolfPos) self.minQPos = (-9.7, -9.7) self.maxQPos = (9.7, 9.7) self.withinBounds = WithinBounds(self.minQPos, self.maxQPos)
def setUp(self): self.actionSpace = [(-1, 0), (1, 0), (0, 1), (0, -1), (0, 0)] self.rationalityParam = 0.9 self.lowerBoundAngle = 0 self.upperBoundAngle = np.pi / 2 self.actHeatSeeking = ActHeatSeeking(self.actionSpace, computeAngleBetweenVectors, self.lowerBoundAngle, self.upperBoundAngle) self.wolfID = 0 self.sheepID = 2 self.masterID = 1 self.positionIndex = [0, 1] self.getWolfPos = GetAgentPosFromState(self.wolfID, self.positionIndex) self.getSheepPos = GetAgentPosFromState(self.sheepID, self.positionIndex) self.getMasterPos = GetAgentPosFromState(self.masterID, self.positionIndex)
def testGetAgentPosFromTrajectory(self, agentId, timeStep, trajectory, groundTruthAgentPos): xPosIndex = [2, 3] stateIndex = 0 getAgentPosFromState = GetAgentPosFromState(agentId, xPosIndex) getStateFromTrajectory = GetStateFromTrajectory(timeStep, stateIndex) getAgentPosFromTrajectory = GetAgentPosFromTrajectory( getAgentPosFromState, getStateFromTrajectory) agentPos = getAgentPosFromTrajectory(trajectory) truthValue = np.array_equal(agentPos, groundTruthAgentPos) self.assertTrue(truthValue)
def setUp(self): self.actionSpace = [(-1, 0), (1, 0), (0, 1), (0, -1), (0, 0)] self.lowerBoundAngle = 0 self.upperBoundAngle = np.pi / 2 self.lowerBoundary = 1 self.wolfID = 2 self.sheepID = 0 self.masterID = 1 self.positionIndex = [0, 1] self.getWolfPos = GetAgentPosFromState(self.wolfID, self.positionIndex) self.getSheepPos = GetAgentPosFromState(self.sheepID, self.positionIndex) self.getMasterPos = GetAgentPosFromState(self.masterID, self.positionIndex) self.pulledAgentID = self.wolfID self.noPullingAgentID = self.sheepID self.pullingAgentID = self.masterID self.getPulledAgentPos = GetAgentPosFromState(self.pulledAgentID, self.positionIndex) self.getNoPullAgentPos = GetAgentPosFromState(self.noPullingAgentID, self.positionIndex) self.getPullingAgentPos = GetAgentPosFromState(self.pullingAgentID, self.positionIndex) self.forceSpace = [(-1, 0), (1, 0), (0, 1), (0, -1)] self.samplePulledForceDirection = SamplePulledForceDirection( computeAngleBetweenVectors, self.forceSpace, self.lowerBoundAngle, self.upperBoundAngle) self.distanceForceRatio = 2 self.getPullingForceValue = GetPullingForceValue( self.distanceForceRatio) self.getPulledAgentForce = GetPulledAgentForce( self.getPullingAgentPos, self.getPulledAgentPos, self.samplePulledForceDirection, self.getPullingForceValue) self.getAgentsForce = GetAgentsForce(self.getPulledAgentForce, self.pulledAgentID, self.noPullingAgentID, self.pullingAgentID)
def main(): # manipulated variables manipulatedVariables = OrderedDict() manipulatedVariables['dataSize'] = [1000, 2000, 3000] manipulatedVariables['depth'] = [5, 9] manipulatedVariables['trainSteps'] = list(range(0, 50001, 10000)) levelNames = list(manipulatedVariables.keys()) levelValues = list(manipulatedVariables.values()) modelIndex = pd.MultiIndex.from_product(levelValues, names=levelNames) toSplitFrame = pd.DataFrame(index=modelIndex) killzoneRadius = 25 maxRunningSteps = 100 numSimulations = 200 # accumulate rewards for trajectories numOfAgent = 3 sheepId = 0 wolvesId = 1 wolfOneId = 1 wolfTwoId = 2 xPosIndex = [0, 1] xBoundary = [0, 600] yBoundary = [0, 600] getSheepXPos = GetAgentPosFromState(sheepId, xPosIndex) getWolfOneXPos = GetAgentPosFromState(wolfOneId, xPosIndex) getWolfTwoXPos = GetAgentPosFromState(wolfTwoId, xPosIndex) isTerminalOne = IsTerminal(getWolfOneXPos, getSheepXPos, killzoneRadius) isTerminalTwo = IsTerminal(getWolfTwoXPos, getSheepXPos, killzoneRadius) playIsTerminal = lambda state: isTerminalOne(state) or isTerminalTwo(state) stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity( xBoundary, yBoundary) transit = TransiteForNoPhysics(stayInBoundaryByReflectVelocity) playAliveBonus = -1 / maxRunningSteps playDeathPenalty = 1 playKillzoneRadius = killzoneRadius playReward = RewardFunctionCompete(playAliveBonus, playDeathPenalty, playIsTerminal) decay = 1 accumulateRewards = AccumulateRewards(decay, playReward) addValuesToTrajectory = AddValuesToTrajectory(accumulateRewards) # generate trajectory parallel generateTrajectoriesCodeName = 'generateCenterControlTrajectoryByCondition.py' evalNumTrials = 500 numCpuCores = os.cpu_count() numCpuToUse = int(0.75 * numCpuCores) numCmdList = min(evalNumTrials, numCpuToUse) generateTrajectoriesParallel = GenerateTrajectoriesParallel( generateTrajectoriesCodeName, evalNumTrials, numCmdList) # run all trials and save trajectories generateTrajectoriesParallelFromDf = lambda df: generateTrajectoriesParallel( readParametersFromDf(df)) # toSplitFrame.groupby(levelNames).apply(generateTrajectoriesParallelFromDf) # save evaluation trajectories dirName = os.path.dirname(__file__) trajectoriesSaveDirectory = os.path.join( dirName, '..', '..', '..', 'data', 'evaluateSupervisedLearning', 'multiMCTSAgentResNetNoPhysicsCenterControl', 'evaluateCenterControlTrajByCondition') if not os.path.exists(trajectoriesSaveDirectory): os.makedirs(trajectoriesSaveDirectory) trajectoryExtension = '.pickle' trajectoryFixedParameters = { 'maxRunningSteps': maxRunningSteps, 'numSimulations': numSimulations, 'killzoneRadius': killzoneRadius } getTrajectorySavePath = GetSavePath(trajectoriesSaveDirectory, trajectoryExtension, trajectoryFixedParameters) getTrajectorySavePathFromDf = lambda df: getTrajectorySavePath( readParametersFromDf(df)) # compute statistics on the trajectories fuzzySearchParameterNames = [] loadTrajectories = LoadTrajectories(getTrajectorySavePath, loadFromPickle, fuzzySearchParameterNames) loadTrajectoriesFromDf = lambda df: loadTrajectories( readParametersFromDf(df)) measurementFunction = lambda trajectory: accumulateRewards(trajectory)[0] computeStatistics = ComputeStatistics(loadTrajectoriesFromDf, measurementFunction) statisticsDf = toSplitFrame.groupby(levelNames).apply(computeStatistics) def calculateSuriveRatio(trajectory): lenght = np.array(len(trajectory)) count = np.array( [lenght < 50, lenght >= 50 and lenght < 100, lenght >= 100]) return count computeNumbers = ComputeStatistics(loadTrajectoriesFromDf, calculateSuriveRatio) df = toSplitFrame.groupby(levelNames).apply(computeNumbers) print(df) fig = plt.figure() numRows = 1 numColumns = 1 plotCounter = 1 axForDraw = fig.add_subplot(numRows, numColumns, plotCounter) xlabel = ['0-50', '50-100', '100-150'] x = np.arange(len(xlabel)) numTrials = 500 yMean = df['mean'].tolist() yRrr = np.array(df['std'].tolist()) / (np.sqrt(numTrials) - 1) totalWidth, n = 0.6, 3 width = totalWidth / n x = x - (totalWidth - width) / 2 plt.bar(x, yMean[0], yerr=yRrr[0], width=width, label='trainStep0') plt.bar(x + width, yMean[1], yerr=yRrr[1], width=width, label='trainStep10000') plt.bar(x + width * 2, yMean[2], yerr=yRrr[2], width=width, label='trainStep30000') plt.bar(x + width * 3, yMean[3], yerr=yRrr[3], width=width, label='trainStep50000') plt.suptitle('dataSize 3000') plt.xticks(x, xlabel) plt.ylim(0, 1) plt.xlabel('living steps') plt.legend(loc='best') # plt.show() # plot the results fig = plt.figure() numRows = len(manipulatedVariables['depth']) numColumns = len(manipulatedVariables['dataSize']) plotCounter = 1 print(statisticsDf) for depth, grp in statisticsDf.groupby('depth'): grp.index = grp.index.droplevel('depth') for dataSize, group in grp.groupby('dataSize'): group.index = group.index.droplevel('dataSize') axForDraw = fig.add_subplot(numRows, numColumns, plotCounter) if plotCounter % numColumns == 1: axForDraw.set_ylabel('depth: {}'.format(depth)) if plotCounter <= numColumns: axForDraw.set_title('dataSize: {}'.format(dataSize)) axForDraw.set_ylim(-1, 1) # plt.ylabel('Accumulated rewards') maxTrainSteps = manipulatedVariables['trainSteps'][-1] plt.plot([0, maxTrainSteps], [0.354] * 2, '--m', color="#1C2833", label='pure MCTS') group.plot(ax=axForDraw, y='mean', yerr='std', marker='o', logx=False) plotCounter += 1 plt.suptitle('center control wolves') plt.legend(loc='best') plt.show()
def main(): # important parameters # manipulated variables manipulatedVariables = OrderedDict() manipulatedVariables['miniBatchSize'] = [64, 256] manipulatedVariables['learningRate'] = [1e-3, 1e-4, 1e-5] manipulatedVariables['depth'] = [5, 9, 17] #[4,8,16]# manipulatedVariables['trainSteps'] = [0, 5000, 10000, 20000, 50000] levelNames = list(manipulatedVariables.keys()) levelValues = list(manipulatedVariables.values()) modelIndex = pd.MultiIndex.from_product(levelValues, names=levelNames) toSplitFrame = pd.DataFrame(index=modelIndex) # accumulate rewards for trajectories sheepId = 0 wolfId = 1 xPosIndex = [0, 1] getSheepPos = GetAgentPosFromState(sheepId, xPosIndex) getWolfPos = GetAgentPosFromState(wolfId, xPosIndex) killzoneRadius = 2 numSimulations = 150 maxRunningSteps = 30 agentId = 1 playAliveBonus = -1 / maxRunningSteps playDeathPenalty = 1 playKillzoneRadius = killzoneRadius playIsTerminal = IsTerminal(playKillzoneRadius, getSheepPos, getWolfPos) playReward = RewardFunctionCompete(playAliveBonus, playDeathPenalty, playIsTerminal) decay = 1 accumulateRewards = AccumulateRewards(decay, playReward) addValuesToTrajectory = AddValuesToTrajectory(accumulateRewards) # generate trajectory parallel # generateTrajectoriesCodeName = 'generateWolfResNNEvaluationTrajectoryFixObstacle.py' # generateTrajectoriesCodeName = 'generateWolfNNEvaluationTrajectoryFixObstacle.py' # generateTrajectoriesCodeName = 'generateWolfResNNEvaluationTrajectoryMovedObstacle.py' generateTrajectoriesCodeName = 'generateWolfResNNEvaluationTrajectoryRandomObstacle.py' # generateTrajectoriesCodeName = 'generateWolfNNEvaluationTrajectoryRandomObstacle.py' evalNumTrials = 100 numCpuCores = os.cpu_count() numCpuToUse = int(0.75 * numCpuCores) numCmdList = min(evalNumTrials, numCpuToUse) generateTrajectoriesParallel = GenerateTrajectoriesParallel( generateTrajectoriesCodeName, evalNumTrials, numCmdList) # run all trials and save trajectories generateTrajectoriesParallelFromDf = lambda df: generateTrajectoriesParallel( readParametersFromDf(df)) toSplitFrame.groupby(levelNames).apply(generateTrajectoriesParallelFromDf) # save evaluation trajectories dirName = os.path.dirname(__file__) dataFolderName = os.path.join(dirName, '..', '..', '..', 'data', 'multiAgentTrain', 'MCTSRandomObstacle') trajectoryDirectory = os.path.join( dataFolderName, 'evaluationTrajectoriesResNNWithObstacle') if not os.path.exists(trajectoryDirectory): os.makedirs(trajectoryDirectory) trajectoryExtension = '.pickle' trajectoryFixedParameters = { 'maxRunningSteps': maxRunningSteps, 'numSimulations': numSimulations, 'killzoneRadius': killzoneRadius, 'agentId': agentId } getTrajectorySavePath = GetSavePath(trajectoryDirectory, trajectoryExtension, trajectoryFixedParameters) getTrajectorySavePathFromDf = lambda df: getTrajectorySavePath( readParametersFromDf(df)) # compute statistics on the trajectories fuzzySearchParameterNames = ['sampleIndex'] loadTrajectories = LoadTrajectories(getTrajectorySavePath, loadFromPickle, fuzzySearchParameterNames) loadTrajectoriesFromDf = lambda df: loadTrajectories( readParametersFromDf(df)) measurementFunction = lambda trajectory: accumulateRewards(trajectory)[0] computeStatistics = ComputeStatistics(loadTrajectoriesFromDf, measurementFunction) statisticsDf = toSplitFrame.groupby(levelNames).apply(computeStatistics) print(statisticsDf) # manipulatedVariables['miniBatchSize'] = [64, 128] # manipulatedVariables['learningRate'] = [ 1e-3,1e-4,1e-5] # manipulatedVariables['depth'] = [4,8,16] # manipulatedVariables['trainSteps']=[0,20000,40000,60000,100000,180000] # plot the results fig = plt.figure() numRows = len(manipulatedVariables['depth']) numColumns = len(manipulatedVariables['learningRate']) plotCounter = 1 selfId = 0 for depth, grp in statisticsDf.groupby('depth'): grp.index = grp.index.droplevel('depth') for learningRate, group in grp.groupby('learningRate'): group.index = group.index.droplevel('learningRate') axForDraw = fig.add_subplot(numRows, numColumns, plotCounter) if (plotCounter % numColumns == 1) or numColumns == 1: axForDraw.set_ylabel('depth: {}'.format(depth)) if plotCounter <= numColumns: axForDraw.set_title('learningRate: {}'.format(learningRate)) axForDraw.set_ylim(-1, 1) drawPerformanceLine(group, axForDraw, selfId) plotCounter += 1 plt.suptitle('SupervisedNNWolfwithRandomWallState') plt.legend(loc='best') plt.show()
def main(): DEBUG = 0 renderOn = 0 if DEBUG: parametersForTrajectoryPath = {} startSampleIndex = 5 endSampleIndex = 7 agentId = 1 parametersForTrajectoryPath['sampleIndex'] = (startSampleIndex, endSampleIndex) else: parametersForTrajectoryPath = json.loads(sys.argv[1]) startSampleIndex = int(sys.argv[2]) endSampleIndex = int(sys.argv[3]) agentId = int(parametersForTrajectoryPath['agentId']) parametersForTrajectoryPath['sampleIndex'] = (startSampleIndex, endSampleIndex) # check file exists or not dirName = os.path.dirname(__file__) trajectoriesSaveDirectory = os.path.join( dirName, '..', '..', '..', '..', 'data', 'NoPhysics2wolves1sheep', 'trainWolvesTwoCenterControlAction88', 'trajectories') if not os.path.exists(trajectoriesSaveDirectory): os.makedirs(trajectoriesSaveDirectory) trajectorySaveExtension = '.pickle' maxRunningSteps = 50 numSimulations = 250 killzoneRadius = 150 fixedParameters = { 'agentId': agentId, 'maxRunningSteps': maxRunningSteps, 'numSimulations': numSimulations, 'killzoneRadius': killzoneRadius } generateTrajectorySavePath = GetSavePath(trajectoriesSaveDirectory, trajectorySaveExtension, fixedParameters) trajectorySavePath = generateTrajectorySavePath( parametersForTrajectoryPath) if not os.path.isfile(trajectorySavePath): numOfAgent = 3 xBoundary = [0, 600] yBoundary = [0, 600] resetState = Reset(xBoundary, yBoundary, numOfAgent) stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity( xBoundary, yBoundary) interpolateOneFrame = InterpolateOneFrame( stayInBoundaryByReflectVelocity) chooseInterpolatedNextState = lambda interpolatedStates: interpolatedStates[ -1] sheepId = 0 wolvesId = 1 centerControlIndexList = [wolvesId] unpackCenterControlAction = UnpackCenterControlAction( centerControlIndexList) numFramesToInterpolate = 0 transit = TransitWithInterpolation(numFramesToInterpolate, interpolateOneFrame, chooseInterpolatedNextState, unpackCenterControlAction) # NNGuidedMCTS init cInit = 1 cBase = 100 calculateScore = ScoreChild(cInit, cBase) selectChild = SelectChild(calculateScore) actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7), (0, -10), (7, -7), (0, 0)] wolfActionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7), (0, -10), (7, -7)] preyPowerRatio = 10 sheepActionSpace = list( map(tuple, np.array(actionSpace) * preyPowerRatio)) predatorPowerRatio = 8 wolfActionOneSpace = list( map(tuple, np.array(wolfActionSpace) * predatorPowerRatio)) wolfActionTwoSpace = list( map(tuple, np.array(wolfActionSpace) * predatorPowerRatio)) wolvesActionSpace = list( product(wolfActionOneSpace, wolfActionTwoSpace)) actionSpaceList = [sheepActionSpace, wolvesActionSpace] # neural network init numStateSpace = 2 * numOfAgent numSheepActionSpace = len(sheepActionSpace) numWolvesActionSpace = len(wolvesActionSpace) regularizationFactor = 1e-4 sharedWidths = [128] actionLayerWidths = [128] valueLayerWidths = [128] generateSheepModel = GenerateModel(numStateSpace, numSheepActionSpace, regularizationFactor) # load save dir NNModelSaveExtension = '' sheepNNModelSaveDirectory = os.path.join( dirName, '..', '..', '..', '..', 'data', 'NoPhysics2wolves1sheep', 'trainSheepWithTwoHeatSeekingWolves', 'trainedResNNModels') sheepNNModelFixedParameters = { 'agentId': 0, 'maxRunningSteps': 50, 'numSimulations': 110, 'miniBatchSize': 256, 'learningRate': 0.0001, } getSheepNNModelSavePath = GetSavePath(sheepNNModelSaveDirectory, NNModelSaveExtension, sheepNNModelFixedParameters) depth = 9 resBlockSize = 2 dropoutRate = 0.0 initializationMethod = 'uniform' initSheepNNModel = generateSheepModel(sharedWidths * depth, actionLayerWidths, valueLayerWidths, resBlockSize, initializationMethod, dropoutRate) sheepTrainedModelPath = getSheepNNModelSavePath({ 'trainSteps': 50000, 'depth': depth }) sheepTrainedModel = restoreVariables(initSheepNNModel, sheepTrainedModelPath) sheepPolicy = ApproximatePolicy(sheepTrainedModel, sheepActionSpace) wolfOneId = 1 wolfTwoId = 2 xPosIndex = [0, 1] getSheepXPos = GetAgentPosFromState(sheepId, xPosIndex) getWolfOneXPos = GetAgentPosFromState(wolfOneId, xPosIndex) speed = 120 #sheepPolicy = HeatSeekingContinuesDeterministicPolicy(getWolfOneXPos, getSheepXPos, speed) # MCTS cInit = 1 cBase = 100 calculateScore = ScoreChild(cInit, cBase) selectChild = SelectChild(calculateScore) # prior getActionPrior = lambda state: { action: 1 / len(wolvesActionSpace) for action in wolvesActionSpace } # load chase nn policy chooseActionInMCTS = sampleFromDistribution def wolvesTransit(state, action): return transit(state, [chooseActionInMCTS(sheepPolicy(state)), action]) # reward function wolfOneId = 1 wolfTwoId = 2 xPosIndex = [0, 1] getSheepXPos = GetAgentPosFromState(sheepId, xPosIndex) getWolfOneXPos = GetAgentPosFromState(wolfOneId, xPosIndex) getWolfTwoXPos = GetAgentPosFromState(wolfTwoId, xPosIndex) isCollidedOne = IsTerminal(getWolfOneXPos, getSheepXPos, killzoneRadius) isCollidedTwo = IsTerminal(getWolfTwoXPos, getSheepXPos, killzoneRadius) calCollisionTimes = lambda state: np.sum([ isCollidedOne(state), isCollidedTwo(state) ]) # collisionTimeByAddingCollisionInAllWolves #calCollisionTimes = lambda state: np.max([isCollidedOne(state), isCollidedTwo(state)]) # collisionTimeByBooleanCollisionForAnyWolf calTerminationSignals = calCollisionTimes chooseInterpolatedStateByEarlyTermination = ChooseInterpolatedStateByEarlyTermination( calTerminationSignals) numFramesToInterpolateInReward = 3 interpolateStateInReward = TransitWithInterpolation( numFramesToInterpolateInReward, interpolateOneFrame, chooseInterpolatedStateByEarlyTermination, unpackCenterControlAction) aliveBonus = -1 / maxRunningSteps * 10 deathPenalty = 1 rewardFunction = RewardFunctionCompeteWithStateInterpolation( aliveBonus, deathPenalty, calCollisionTimes, interpolateStateInReward) # initialize children; expand initializeChildren = InitializeChildren(wolvesActionSpace, wolvesTransit, getActionPrior) isTerminal = lambda state: False expand = Expand(isTerminal, initializeChildren) # random rollout policy def rolloutPolicy(state): return [ sampleFromDistribution(sheepPolicy(state)), wolvesActionSpace[np.random.choice( range(numWolvesActionSpace))] ] # rollout #rolloutHeuristicWeight = 0 #minDistance = 400 #rolloutHeuristic1 = HeuristicDistanceToTarget( # rolloutHeuristicWeight, getWolfOneXPos, getSheepXPos, minDistance) #rolloutHeuristic2 = HeuristicDistanceToTarget( # rolloutHeuristicWeight, getWolfTwoXPos, getSheepXPos, minDistance) #rolloutHeuristic = lambda state: (rolloutHeuristic1(state) + rolloutHeuristic2(state)) / 2 rolloutHeuristic = lambda state: 0 maxRolloutSteps = 15 rollout = RollOut(rolloutPolicy, maxRolloutSteps, transit, rewardFunction, isTerminal, rolloutHeuristic) wolfPolicy = MCTS(numSimulations, selectChild, expand, rollout, backup, establishSoftmaxActionDist) # All agents' policies policy = lambda state: [sheepPolicy(state), wolfPolicy(state)] chooseActionList = [maxFromDistribution, maxFromDistribution] def sampleAction(state): actionDists = [sheepPolicy(state), wolfPolicy(state)] action = [ chooseAction(actionDist) for actionDist, chooseAction in zip( actionDists, chooseActionList) ] return action render = None if renderOn: import pygame as pg from pygame.color import THECOLORS screenColor = THECOLORS['black'] circleColorList = [ THECOLORS['green'], THECOLORS['yellow'], THECOLORS['red'] ] circleSize = 10 saveImage = False saveImageDir = os.path.join(dirName, '..', '..', '..', '..', 'data', 'demoImg') if not os.path.exists(saveImageDir): os.makedirs(saveImageDir) screen = pg.display.set_mode([max(xBoundary), max(yBoundary)]) render = Render(numOfAgent, xPosIndex, screen, screenColor, circleColorList, circleSize, saveImage, saveImageDir) forwardOneStep = ForwardOneStep(transit, rewardFunction) sampleTrajectory = SampleTrajectoryWithRender(maxRunningSteps, isTerminal, resetState, forwardOneStep, render, renderOn) trajectories = [ sampleTrajectory(sampleAction) for sampleIndex in range(startSampleIndex, endSampleIndex) ] print([len(traj) for traj in trajectories]) saveToPickle(trajectories, trajectorySavePath)
def main(): parametersForTrajectoryPath = json.loads(sys.argv[1]) startSampleIndex = int(sys.argv[2]) endSampleIndex = int(sys.argv[3]) # parametersForTrajectoryPath['sampleOneStepPerTraj']=1 #0 # parametersForTrajectoryPath['sampleIndex'] = (startSampleIndex, endSampleIndex) trainSteps = int(parametersForTrajectoryPath['trainSteps']) depth = int(parametersForTrajectoryPath['depth']) dataSize = int(parametersForTrajectoryPath['dataSize']) # parametersForTrajectoryPath = {} # depth = 5 # dataSize = 5000 # trainSteps = 50000 # startSampleIndex = 0 # endSampleIndex = 100 killzoneRadius = 25 numSimulations = 200 maxRunningSteps = 100 fixedParameters = { 'maxRunningSteps': maxRunningSteps, 'numSimulations': numSimulations, 'killzoneRadius': killzoneRadius } trajectorySaveExtension = '.pickle' dirName = os.path.dirname(__file__) trajectoriesSaveDirectory = os.path.join( dirName, '..', '..', '..', 'data', 'evaluateSupervisedLearning', 'multiMCTSAgentResNetNoPhysicsCenterControl', 'evaluateCenterControlTrajByCondition') if not os.path.exists(trajectoriesSaveDirectory): os.makedirs(trajectoriesSaveDirectory) generateTrajectorySavePath = GetSavePath(trajectoriesSaveDirectory, trajectorySaveExtension, fixedParameters) trajectorySavePath = generateTrajectorySavePath( parametersForTrajectoryPath) if not os.path.isfile(trajectorySavePath): numOfAgent = 3 sheepId = 0 wolvesId = 1 wolfOneId = 1 wolfTwoId = 2 xPosIndex = [0, 1] xBoundary = [0, 600] yBoundary = [0, 600] reset = Reset(xBoundary, yBoundary, numOfAgent) getSheepXPos = GetAgentPosFromState(sheepId, xPosIndex) getWolfOneXPos = GetAgentPosFromState(wolfOneId, xPosIndex) getWolfTwoXPos = GetAgentPosFromState(wolfTwoId, xPosIndex) isTerminalOne = IsTerminal(getWolfOneXPos, getSheepXPos, killzoneRadius) isTerminalTwo = IsTerminal(getWolfTwoXPos, getSheepXPos, killzoneRadius) isTerminal = lambda state: isTerminalOne(state) or isTerminalTwo(state) stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity( xBoundary, yBoundary) transit = TransiteForNoPhysics(stayInBoundaryByReflectVelocity) actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7), (0, -10), (7, -7), (0, 0)] preyPowerRatio = 3 sheepActionSpace = list( map(tuple, np.array(actionSpace) * preyPowerRatio)) predatorPowerRatio = 2 wolfActionOneSpace = list( map(tuple, np.array(actionSpace) * predatorPowerRatio)) wolfActionTwoSpace = list( map(tuple, np.array(actionSpace) * predatorPowerRatio)) wolvesActionSpace = list( it.product(wolfActionOneSpace, wolfActionTwoSpace)) # neural network init numStateSpace = 6 numSheepActionSpace = len(sheepActionSpace) numWolvesActionSpace = len(wolvesActionSpace) regularizationFactor = 1e-4 sharedWidths = [128] actionLayerWidths = [128] valueLayerWidths = [128] generateSheepModel = GenerateModel(numStateSpace, numSheepActionSpace, regularizationFactor) # load save dir NNModelSaveExtension = '' NNModelSaveDirectory = os.path.join( dirName, '..', '..', '..', 'data', 'evaluateEscapeMultiChasingNoPhysics', 'trainedResNNModelsMultiStillAction') NNModelFixedParameters = { 'agentId': 0, 'maxRunningSteps': 150, 'numSimulations': 200, 'miniBatchSize': 256, 'learningRate': 0.0001 } getNNModelSavePath = GetSavePath(NNModelSaveDirectory, NNModelSaveExtension, NNModelFixedParameters) if not os.path.exists(NNModelSaveDirectory): os.makedirs(NNModelSaveDirectory) resBlockSize = 2 dropoutRate = 0.0 initializationMethod = 'uniform' initSheepNNModel = generateSheepModel(sharedWidths * 5, actionLayerWidths, valueLayerWidths, resBlockSize, initializationMethod, dropoutRate) sheepTrainedModelPath = getNNModelSavePath({ 'trainSteps': 50000, 'depth': 5 }) sheepTrainedModel = restoreVariables(initSheepNNModel, sheepTrainedModelPath) sheepPolicy = ApproximatePolicy(sheepTrainedModel, sheepActionSpace) generateWolvesModel = GenerateModel(numStateSpace, numWolvesActionSpace, regularizationFactor) initWolvesNNModel = generateWolvesModel(sharedWidths * depth, actionLayerWidths, valueLayerWidths, resBlockSize, initializationMethod, dropoutRate) NNModelSaveDirectory = os.path.join( dirName, '..', '..', '..', 'data', 'evaluateSupervisedLearning', 'multiMCTSAgentResNetNoPhysicsCenterControl', 'trainedResNNModels') wolfId = 1 NNModelFixedParametersWolves = { 'agentId': wolfId, 'maxRunningSteps': maxRunningSteps, 'numSimulations': numSimulations, 'miniBatchSize': 256, 'learningRate': 0.0001, } getNNModelSavePath = GetSavePath(NNModelSaveDirectory, NNModelSaveExtension, NNModelFixedParametersWolves) wolvesTrainedModelPath = getNNModelSavePath({ 'trainSteps': trainSteps, 'depth': depth, 'dataSize': dataSize }) wolvesTrainedModel = restoreVariables(initWolvesNNModel, wolvesTrainedModelPath) wolfPolicy = ApproximatePolicy(wolvesTrainedModel, wolvesActionSpace) from exec.evaluateNoPhysicsEnvWithRender import Render import pygame as pg from pygame.color import THECOLORS screenColor = THECOLORS['black'] circleColorList = [ THECOLORS['green'], THECOLORS['red'], THECOLORS['orange'] ] circleSize = 10 saveImage = False saveImageDir = os.path.join(dirName, '..', '..', '..', 'data', 'demoImg') if not os.path.exists(saveImageDir): os.makedirs(saveImageDir) renderOn = False render = None if renderOn: screen = pg.display.set_mode([xBoundary[1], yBoundary[1]]) render = Render(numOfAgent, xPosIndex, screen, screenColor, circleColorList, circleSize, saveImage, saveImageDir) chooseActionList = [chooseGreedyAction, chooseGreedyAction] sampleTrajectory = SampleTrajectoryWithRender(maxRunningSteps, transit, isTerminal, reset, chooseActionList, render, renderOn) # All agents' policies policy = lambda state: [sheepPolicy(state), wolfPolicy(state)] trajectories = [ sampleTrajectory(policy) for sampleIndex in range(startSampleIndex, endSampleIndex) ] saveToPickle(trajectories, trajectorySavePath)
def main(): DEBUG = 0 renderOn = 0 if DEBUG: parametersForTrajectoryPath = {} startSampleIndex = 0 endSampleIndex = 10 agentId = 1 parametersForTrajectoryPath['sampleIndex'] = (startSampleIndex, endSampleIndex) else: parametersForTrajectoryPath = json.loads(sys.argv[1]) startSampleIndex = int(sys.argv[2]) endSampleIndex = int(sys.argv[3]) agentId = int(parametersForTrajectoryPath['agentId']) parametersForTrajectoryPath['sampleIndex'] = (startSampleIndex, endSampleIndex) # check file exists or not dirName = os.path.dirname(__file__) trajectoriesSaveDirectory = os.path.join( dirName, '..', '..', '..', '..', 'data', '2wolves1sheep', 'trainWolvesTwoCenterControlMultiTrees', 'trajectories') if not os.path.exists(trajectoriesSaveDirectory): os.makedirs(trajectoriesSaveDirectory) trajectorySaveExtension = '.pickle' maxRunningSteps = 50 numSimulations = 500 killzoneRadius = 50 fixedParameters = { 'agentId': agentId, 'maxRunningSteps': maxRunningSteps, 'numSimulations': numSimulations, 'killzoneRadius': killzoneRadius } generateTrajectorySavePath = GetSavePath(trajectoriesSaveDirectory, trajectorySaveExtension, fixedParameters) trajectorySavePath = generateTrajectorySavePath( parametersForTrajectoryPath) if not os.path.isfile(trajectorySavePath): numOfAgent = 3 sheepId = 0 wolvesId = 1 wolfOneId = 1 wolfTwoId = 2 xPosIndex = [0, 1] xBoundary = [0, 600] yBoundary = [0, 600] getSheepXPos = GetAgentPosFromState(sheepId, xPosIndex) getWolfOneXPos = GetAgentPosFromState(wolfOneId, xPosIndex) getWolfTwoXPos = GetAgentPosFromState(wolfTwoId, xPosIndex) reset = Reset(xBoundary, yBoundary, numOfAgent) isTerminalOne = IsTerminal(getWolfOneXPos, getSheepXPos, killzoneRadius) isTerminalTwo = IsTerminal(getWolfTwoXPos, getSheepXPos, killzoneRadius) isTerminal = lambda state: isTerminalOne(state) or isTerminalTwo(state) stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity( xBoundary, yBoundary) centerControlIndexList = [wolvesId] unpackCenterControlAction = UnpackCenterControlAction( centerControlIndexList) transitionFunction = TransiteForNoPhysicsWithCenterControlAction( stayInBoundaryByReflectVelocity) numFramesToInterpolate = 3 transit = TransitWithInterpolateStateWithCenterControlAction( numFramesToInterpolate, transitionFunction, isTerminal, unpackCenterControlAction) # NNGuidedMCTS init cInit = 1 cBase = 100 calculateScore = ScoreChild(cInit, cBase) selectChild = SelectChild(calculateScore) actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7), (0, -10), (7, -7), (0, 0)] wolfActionSpace = actionSpace # wolfActionSpace = [(10, 0), (0, 10), (-10, 0), (0, -10), (0, 0)] preyPowerRatio = 12 sheepActionSpace = list( map(tuple, np.array(actionSpace) * preyPowerRatio)) predatorPowerRatio = 8 wolfActionOneSpace = list( map(tuple, np.array(wolfActionSpace) * predatorPowerRatio)) wolfActionTwoSpace = list( map(tuple, np.array(wolfActionSpace) * predatorPowerRatio)) wolvesActionSpace = list( product(wolfActionOneSpace, wolfActionTwoSpace)) actionSpaceList = [sheepActionSpace, wolvesActionSpace] # neural network init numStateSpace = 2 * numOfAgent numSheepActionSpace = len(sheepActionSpace) numWolvesActionSpace = len(wolvesActionSpace) regularizationFactor = 1e-4 sharedWidths = [128] actionLayerWidths = [128] valueLayerWidths = [128] generateSheepModel = GenerateModel(numStateSpace, numSheepActionSpace, regularizationFactor) # load save dir NNModelSaveExtension = '' sheepNNModelSaveDirectory = os.path.join( dirName, '..', '..', '..', '..', 'data', '2wolves1sheep', 'trainSheepWithTwoHeatSeekingWolves', 'trainedResNNModels') sheepNNModelFixedParameters = { 'agentId': 0, 'maxRunningSteps': 50, 'numSimulations': 110, 'miniBatchSize': 256, 'learningRate': 0.0001, } getSheepNNModelSavePath = GetSavePath(sheepNNModelSaveDirectory, NNModelSaveExtension, sheepNNModelFixedParameters) depth = 9 resBlockSize = 2 dropoutRate = 0.0 initializationMethod = 'uniform' initSheepNNModel = generateSheepModel(sharedWidths * depth, actionLayerWidths, valueLayerWidths, resBlockSize, initializationMethod, dropoutRate) sheepTrainedModelPath = getSheepNNModelSavePath({ 'trainSteps': 50000, 'depth': depth }) sheepTrainedModel = restoreVariables(initSheepNNModel, sheepTrainedModelPath) sheepPolicy = ApproximatePolicy(sheepTrainedModel, sheepActionSpace) # MCTS cInit = 1 cBase = 100 calculateScore = ScoreChild(cInit, cBase) selectChild = SelectChild(calculateScore) # prior getActionPrior = lambda state: { action: 1 / len(wolvesActionSpace) for action in wolvesActionSpace } # load chase nn policy temperatureInMCTS = 1 chooseActionInMCTS = SampleAction(temperatureInMCTS) def wolvesTransit(state, action): return transit(state, [chooseActionInMCTS(sheepPolicy(state)), action]) # reward function aliveBonus = -1 / maxRunningSteps deathPenalty = 1 rewardFunction = reward.RewardFunctionCompete(aliveBonus, deathPenalty, isTerminal) # initialize children; expand initializeChildren = InitializeChildren(wolvesActionSpace, wolvesTransit, getActionPrior) expand = Expand(isTerminal, initializeChildren) # random rollout policy def rolloutPolicy(state): return wolvesActionSpace[np.random.choice( range(numWolvesActionSpace))] # rollout rolloutHeuristicWeight = 0 minDistance = 400 rolloutHeuristic1 = reward.HeuristicDistanceToTarget( rolloutHeuristicWeight, getWolfOneXPos, getSheepXPos, minDistance) rolloutHeuristic2 = reward.HeuristicDistanceToTarget( rolloutHeuristicWeight, getWolfTwoXPos, getSheepXPos, minDistance) rolloutHeuristic = lambda state: (rolloutHeuristic1(state) + rolloutHeuristic2(state)) / 2 maxRolloutSteps = 15 rollout = RollOut(rolloutPolicy, maxRolloutSteps, wolvesTransit, rewardFunction, isTerminal, rolloutHeuristic) numTree = 4 numSimulationsPerTree = int(numSimulations / numTree) wolfPolicy = StochasticMCTS( numTree, numSimulationsPerTree, selectChild, expand, rollout, backup, establishSoftmaxActionDistFromMultipleTrees) # All agents' policies policy = lambda state: [sheepPolicy(state), wolfPolicy(state)] chooseActionList = [chooseGreedyAction, chooseGreedyAction] render = None if renderOn: import pygame as pg from pygame.color import THECOLORS screenColor = THECOLORS['black'] circleColorList = [ THECOLORS['green'], THECOLORS['red'], THECOLORS['red'] ] circleSize = 10 saveImage = False saveImageDir = os.path.join(dirName, '..', '..', '..', '..', 'data', 'demoImg') if not os.path.exists(saveImageDir): os.makedirs(saveImageDir) screen = pg.display.set_mode([xBoundary[1], yBoundary[1]]) render = Render(numOfAgent, xPosIndex, screen, screenColor, circleColorList, circleSize, saveImage, saveImageDir) sampleTrajectory = SampleTrajectoryWithRender(maxRunningSteps, transit, isTerminal, reset, chooseActionList, render, renderOn) trajectories = [ sampleTrajectory(policy) for sampleIndex in range(startSampleIndex, endSampleIndex) ] print([len(traj) for traj in trajectories]) saveToPickle(trajectories, trajectorySavePath)
def trainOneCondition(manipulatedVariables): depth = int(manipulatedVariables['depth']) # Get dataset for training DIRNAME = os.path.dirname(__file__) dataSetDirectory = os.path.join(dirName, '..', '..', '..', '..', 'data', '2wolves1sheep', 'trainWolvesTwoCenterControlMultiTrees', 'trajectories') if not os.path.exists(dataSetDirectory): os.makedirs(dataSetDirectory) dataSetExtension = '.pickle' dataSetMaxRunningSteps = 50 dataSetNumSimulations = 500 killzoneRadius = 50 agentId = 1 wolvesId = 1 dataSetFixedParameters = { 'agentId': agentId, 'maxRunningSteps': dataSetMaxRunningSteps, 'numSimulations': dataSetNumSimulations, 'killzoneRadius': killzoneRadius } getDataSetSavePath = GetSavePath(dataSetDirectory, dataSetExtension, dataSetFixedParameters) print("DATASET LOADED!") # accumulate rewards for trajectories numOfAgent = 3 sheepId = 0 wolvesId = 1 wolfOneId = 1 wolfTwoId = 2 xPosIndex = [0, 1] xBoundary = [0, 600] yBoundary = [0, 600] getSheepXPos = GetAgentPosFromState(sheepId, xPosIndex) getWolfOneXPos = GetAgentPosFromState(wolfOneId, xPosIndex) getWolfTwoXPos = GetAgentPosFromState(wolfTwoId, xPosIndex) reset = Reset(xBoundary, yBoundary, numOfAgent) isTerminalOne = IsTerminal(getWolfOneXPos, getSheepXPos, killzoneRadius) isTerminalTwo = IsTerminal(getWolfTwoXPos, getSheepXPos, killzoneRadius) playIsTerminal = lambda state: isTerminalOne(state) or isTerminalTwo(state) stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity( xBoundary, yBoundary) transit = TransiteForNoPhysics(stayInBoundaryByReflectVelocity) playAliveBonus = -1 / dataSetMaxRunningSteps playDeathPenalty = 1 playKillzoneRadius = killzoneRadius playReward = RewardFunctionCompete(playAliveBonus, playDeathPenalty, playIsTerminal) decay = 1 accumulateRewards = AccumulateRewards(decay, playReward) addValuesToTrajectory = AddValuesToTrajectory(accumulateRewards) # pre-process the trajectories actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7), (0, -10), (7, -7), (0, 0)] preyPowerRatio = 12 sheepActionSpace = list(map(tuple, np.array(actionSpace) * preyPowerRatio)) predatorPowerRatio = 8 actionSpaceOne = [(10, 0), (-10, 0)] wolfActionOneSpace = list( map(tuple, np.array(actionSpace) * predatorPowerRatio)) actionSpaceTwo = [(10, 0), (-10, 0)] wolfActionTwoSpace = list( map(tuple, np.array(actionSpace) * predatorPowerRatio)) wolvesActionSpace = list(it.product(wolfActionOneSpace, wolfActionTwoSpace)) numActionSpace = len(wolvesActionSpace) actionIndex = 1 actionToOneHot = ActionToOneHot(wolvesActionSpace) getTerminalActionFromTrajectory = lambda trajectory: trajectory[-1][ actionIndex] removeTerminalTupleFromTrajectory = RemoveTerminalTupleFromTrajectory( getTerminalActionFromTrajectory) processTrajectoryForNN = ProcessTrajectoryForPolicyValueNet( actionToOneHot, wolvesId) preProcessTrajectories = PreProcessTrajectories( addValuesToTrajectory, removeTerminalTupleFromTrajectory, processTrajectoryForNN) fuzzySearchParameterNames = ['sampleIndex'] loadTrajectories = LoadTrajectories(getDataSetSavePath, loadFromPickle, fuzzySearchParameterNames) loadedTrajectories = loadTrajectories(parameters={}) # print(loadedTrajectories[0]) filterState = lambda timeStep: (timeStep[0][0:3], timeStep[1], timeStep[2] ) # !!? magic trajectories = [[filterState(timeStep) for timeStep in trajectory] for trajectory in loadedTrajectories] print(len(trajectories)) preProcessedTrajectories = np.concatenate( preProcessTrajectories(trajectories)) trainData = [list(varBatch) for varBatch in zip(*preProcessedTrajectories)] valuedTrajectories = [addValuesToTrajectory(tra) for tra in trajectories] # neural network init and save path numStateSpace = 6 regularizationFactor = 1e-4 sharedWidths = [128] actionLayerWidths = [128] valueLayerWidths = [128] generateModel = GenerateModel(numStateSpace, numActionSpace, regularizationFactor) resBlockSize = 2 dropoutRate = 0.0 initializationMethod = 'uniform' sheepNNModel = generateModel(sharedWidths * depth, actionLayerWidths, valueLayerWidths, resBlockSize, initializationMethod, dropoutRate) initTimeStep = 0 valueIndex = 3 trainDataMeanAccumulatedReward = np.mean( [tra[initTimeStep][valueIndex] for tra in valuedTrajectories]) print(trainDataMeanAccumulatedReward) # function to train NN model terminalThreshold = 1e-10 lossHistorySize = 10 initActionCoeff = 1 initValueCoeff = 1 initCoeff = (initActionCoeff, initValueCoeff) afterActionCoeff = 1 afterValueCoeff = 1 afterCoeff = (afterActionCoeff, afterValueCoeff) terminalController = lambda evalDict, numSteps: False coefficientController = CoefficientCotroller(initCoeff, afterCoeff) reportInterval = 10000 trainStepsIntervel = 10000 trainReporter = TrainReporter(trainStepsIntervel, reportInterval) learningRateDecay = 1 learningRateDecayStep = 1 learningRateModifier = lambda learningRate: LearningRateModifier( learningRate, learningRateDecay, learningRateDecayStep) getTrainNN = lambda batchSize, learningRate: Train( trainStepsIntervel, batchSize, sampleData, learningRateModifier(learningRate), terminalController, coefficientController, trainReporter) # get path to save trained models NNModelFixedParameters = { 'agentId': agentId, 'maxRunningSteps': dataSetMaxRunningSteps, 'numSimulations': dataSetNumSimulations } NNModelSaveDirectory = os.path.join( dirName, '..', '..', '..', '..', 'data', '2wolves1sheep', 'trainWolvesTwoCenterControlMultiTrees', 'trainedResNNModels') if not os.path.exists(NNModelSaveDirectory): os.makedirs(NNModelSaveDirectory) NNModelSaveExtension = '' getNNModelSavePath = GetSavePath(NNModelSaveDirectory, NNModelSaveExtension, NNModelFixedParameters) # function to train models numOfTrainStepsIntervel = 6 trainIntervelIndexes = list(range(numOfTrainStepsIntervel)) trainModelForConditions = TrainModelForConditions(trainIntervelIndexes, trainStepsIntervel, trainData, sheepNNModel, getTrainNN, getNNModelSavePath) trainModelForConditions(manipulatedVariables)