def setUp(self):
     self.actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0),
                         (-7, -7), (0, -10), (7, -7)]
     self.sheepId = 0
     self.wolfId = 1
     self.posIndex = [0, 1]
     self.getPredatorPos = GetAgentPosFromState(self.wolfId, self.posIndex)
     self.getPreyPos = GetAgentPosFromState(self.sheepId, self.posIndex)
 def setUp(self):
     self.actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0),
                         (-7, -7), (0, -10), (7, -7)]
     self.sheepId = 0
     self.wolfId = 1
     self.xPosIndex = [2, 3]
     self.getSheepXPos = GetAgentPosFromState(self.sheepId, self.xPosIndex)
     self.getWolfXPos = GetAgentPosFromState(self.wolfId, self.xPosIndex)
     self.killzoneRadius = 0.5
     self.isTerminal = IsTerminal(self.killzoneRadius, self.getSheepXPos,
                                  self.getWolfXPos)
示例#3
0
    def testMassEffectInTransition(self, state, allAgentsActions, smallMass,
                                   largeMass):
        # transition function
        dirName = os.path.dirname(__file__)
        physicsDynamicsPath = os.path.join(dirName, '..', 'env', 'xmls',
                                           'twoAgents.xml')
        agentsBodyMassIndex = [6, 7]
        physicsSmallMassModel = mujoco.load_model_from_path(
            physicsDynamicsPath)
        physicsSmallMassModel.body_mass[agentsBodyMassIndex] = [
            smallMass, smallMass
        ]
        physicsLargeMassModel = mujoco.load_model_from_path(
            physicsDynamicsPath)
        physicsLargeMassModel.body_mass[agentsBodyMassIndex] = [
            largeMass, largeMass
        ]
        physicsSmallMassSimulation = mujoco.MjSim(physicsSmallMassModel)
        physicsLargeMassSimulation = mujoco.MjSim(physicsLargeMassModel)
        # set_constants fit for mujoco_py version >= 2.0, no fit for 1.50
        physicsSmallMassSimulation.set_constants()
        physicsLargeMassSimulation.set_constants()

        sheepId = 0
        wolfId = 1
        xPosIndex = [2, 3]
        getSheepXPos = GetAgentPosFromState(sheepId, xPosIndex)
        getWolfXPos = GetAgentPosFromState(wolfId, xPosIndex)
        killzoneRadius = 2
        isTerminal = IsTerminal(killzoneRadius, getSheepXPos, getWolfXPos)

        numSimulationFrames = 20
        transitSmallMassAgents = TransitionFunction(physicsSmallMassSimulation,
                                                    isTerminal,
                                                    numSimulationFrames)
        transitLargeMassAgents = TransitionFunction(physicsLargeMassSimulation,
                                                    isTerminal,
                                                    numSimulationFrames)
        nextStateInSmallMassTransition = transitSmallMassAgents(
            state, allAgentsActions)
        nextStateInLargeMassTransition = transitLargeMassAgents(
            state, allAgentsActions)
        stateChangeInSmallMassTransition = nextStateInSmallMassTransition - state
        stateChangeInLargeMassTransition = nextStateInLargeMassTransition - state

        trueMassRatio = smallMass / largeMass
        self.assertTrue(
            np.allclose(stateChangeInLargeMassTransition,
                        stateChangeInSmallMassTransition * trueMassRatio))
示例#4
0
 def setUp(self):
     self.numOfAgent = 2
     self.sheepId = 0
     self.wolfId = 1
     self.posIndex = [0, 1]
     self.xBoundary = [0, 640]
     self.yBoundary = [0, 480]
     self.minDistance = 50
     self.getPreyPos = GetAgentPosFromState(self.sheepId, self.posIndex)
     self.getPredatorPos = GetAgentPosFromState(self.wolfId, self.posIndex)
     self.stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity(
         self.xBoundary, self.yBoundary)
     self.isTerminal = IsTerminal(self.getPredatorPos, self.getPreyPos,
                                  self.minDistance)
     self.transition = TransiteForNoPhysics(
         self.stayInBoundaryByReflectVelocity)
    def testGetAgentPosFromState(self, agentId, posIndex, state,
                                 groundTruthAgentPos):
        getAgentPosFromState = GetAgentPosFromState(agentId, posIndex)
        agentPos = getAgentPosFromState(state)

        truthValue = np.array_equal(agentPos, groundTruthAgentPos)
        self.assertTrue(truthValue)
示例#6
0
 def setUp(self):
     self.modelPath = os.path.join(DIRNAME, '..', 'env', 'xmls',
                                   'twoAgents.xml')
     self.model = load_model_from_path(self.modelPath)
     self.simulation = MjSim(self.model)
     self.numJointEachAgent = 2
     self.numAgent = 2
     self.killzoneRadius = 0.5
     self.numSimulationFrames = 20
     self.sheepId = 0
     self.wolfId = 1
     self.xPosIndex = [2, 3]
     self.getSheepPos = GetAgentPosFromState(self.sheepId, self.xPosIndex)
     self.getWolfPos = GetAgentPosFromState(self.wolfId, self.xPosIndex)
     self.isTerminal = IsTerminal(self.killzoneRadius, self.getSheepPos,
                                  self.getWolfPos)
     self.minQPos = (-9.7, -9.7)
     self.maxQPos = (9.7, 9.7)
     self.withinBounds = WithinBounds(self.minQPos, self.maxQPos)
    def setUp(self):
        self.actionSpace = [(-1, 0), (1, 0), (0, 1), (0, -1), (0, 0)]
        self.rationalityParam = 0.9
        self.lowerBoundAngle = 0
        self.upperBoundAngle = np.pi / 2

        self.actHeatSeeking = ActHeatSeeking(self.actionSpace,
                                             computeAngleBetweenVectors,
                                             self.lowerBoundAngle,
                                             self.upperBoundAngle)
        self.wolfID = 0
        self.sheepID = 2
        self.masterID = 1
        self.positionIndex = [0, 1]

        self.getWolfPos = GetAgentPosFromState(self.wolfID, self.positionIndex)
        self.getSheepPos = GetAgentPosFromState(self.sheepID,
                                                self.positionIndex)
        self.getMasterPos = GetAgentPosFromState(self.masterID,
                                                 self.positionIndex)
    def testGetAgentPosFromTrajectory(self, agentId, timeStep, trajectory,
                                      groundTruthAgentPos):
        xPosIndex = [2, 3]
        stateIndex = 0
        getAgentPosFromState = GetAgentPosFromState(agentId, xPosIndex)
        getStateFromTrajectory = GetStateFromTrajectory(timeStep, stateIndex)
        getAgentPosFromTrajectory = GetAgentPosFromTrajectory(
            getAgentPosFromState, getStateFromTrajectory)
        agentPos = getAgentPosFromTrajectory(trajectory)

        truthValue = np.array_equal(agentPos, groundTruthAgentPos)
        self.assertTrue(truthValue)
    def setUp(self):
        self.actionSpace = [(-1, 0), (1, 0), (0, 1), (0, -1), (0, 0)]
        self.lowerBoundAngle = 0
        self.upperBoundAngle = np.pi / 2
        self.lowerBoundary = 1

        self.wolfID = 2
        self.sheepID = 0
        self.masterID = 1
        self.positionIndex = [0, 1]

        self.getWolfPos = GetAgentPosFromState(self.wolfID, self.positionIndex)
        self.getSheepPos = GetAgentPosFromState(self.sheepID,
                                                self.positionIndex)
        self.getMasterPos = GetAgentPosFromState(self.masterID,
                                                 self.positionIndex)

        self.pulledAgentID = self.wolfID
        self.noPullingAgentID = self.sheepID
        self.pullingAgentID = self.masterID

        self.getPulledAgentPos = GetAgentPosFromState(self.pulledAgentID,
                                                      self.positionIndex)
        self.getNoPullAgentPos = GetAgentPosFromState(self.noPullingAgentID,
                                                      self.positionIndex)
        self.getPullingAgentPos = GetAgentPosFromState(self.pullingAgentID,
                                                       self.positionIndex)

        self.forceSpace = [(-1, 0), (1, 0), (0, 1), (0, -1)]
        self.samplePulledForceDirection = SamplePulledForceDirection(
            computeAngleBetweenVectors, self.forceSpace, self.lowerBoundAngle,
            self.upperBoundAngle)

        self.distanceForceRatio = 2
        self.getPullingForceValue = GetPullingForceValue(
            self.distanceForceRatio)

        self.getPulledAgentForce = GetPulledAgentForce(
            self.getPullingAgentPos, self.getPulledAgentPos,
            self.samplePulledForceDirection, self.getPullingForceValue)

        self.getAgentsForce = GetAgentsForce(self.getPulledAgentForce,
                                             self.pulledAgentID,
                                             self.noPullingAgentID,
                                             self.pullingAgentID)
示例#10
0
def main():
    # manipulated variables
    manipulatedVariables = OrderedDict()
    manipulatedVariables['dataSize'] = [1000, 2000, 3000]
    manipulatedVariables['depth'] = [5, 9]
    manipulatedVariables['trainSteps'] = list(range(0, 50001, 10000))

    levelNames = list(manipulatedVariables.keys())
    levelValues = list(manipulatedVariables.values())
    modelIndex = pd.MultiIndex.from_product(levelValues, names=levelNames)
    toSplitFrame = pd.DataFrame(index=modelIndex)

    killzoneRadius = 25
    maxRunningSteps = 100
    numSimulations = 200
    # accumulate rewards for trajectories
    numOfAgent = 3
    sheepId = 0
    wolvesId = 1

    wolfOneId = 1
    wolfTwoId = 2
    xPosIndex = [0, 1]
    xBoundary = [0, 600]
    yBoundary = [0, 600]

    getSheepXPos = GetAgentPosFromState(sheepId, xPosIndex)
    getWolfOneXPos = GetAgentPosFromState(wolfOneId, xPosIndex)
    getWolfTwoXPos = GetAgentPosFromState(wolfTwoId, xPosIndex)

    isTerminalOne = IsTerminal(getWolfOneXPos, getSheepXPos, killzoneRadius)
    isTerminalTwo = IsTerminal(getWolfTwoXPos, getSheepXPos, killzoneRadius)
    playIsTerminal = lambda state: isTerminalOne(state) or isTerminalTwo(state)

    stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity(
        xBoundary, yBoundary)
    transit = TransiteForNoPhysics(stayInBoundaryByReflectVelocity)

    playAliveBonus = -1 / maxRunningSteps
    playDeathPenalty = 1
    playKillzoneRadius = killzoneRadius
    playReward = RewardFunctionCompete(playAliveBonus, playDeathPenalty,
                                       playIsTerminal)

    decay = 1
    accumulateRewards = AccumulateRewards(decay, playReward)
    addValuesToTrajectory = AddValuesToTrajectory(accumulateRewards)

    # generate trajectory parallel
    generateTrajectoriesCodeName = 'generateCenterControlTrajectoryByCondition.py'
    evalNumTrials = 500
    numCpuCores = os.cpu_count()
    numCpuToUse = int(0.75 * numCpuCores)
    numCmdList = min(evalNumTrials, numCpuToUse)
    generateTrajectoriesParallel = GenerateTrajectoriesParallel(
        generateTrajectoriesCodeName, evalNumTrials, numCmdList)

    # run all trials and save trajectories
    generateTrajectoriesParallelFromDf = lambda df: generateTrajectoriesParallel(
        readParametersFromDf(df))
    # toSplitFrame.groupby(levelNames).apply(generateTrajectoriesParallelFromDf)

    # save evaluation trajectories
    dirName = os.path.dirname(__file__)
    trajectoriesSaveDirectory = os.path.join(
        dirName, '..', '..', '..', 'data', 'evaluateSupervisedLearning',
        'multiMCTSAgentResNetNoPhysicsCenterControl',
        'evaluateCenterControlTrajByCondition')

    if not os.path.exists(trajectoriesSaveDirectory):
        os.makedirs(trajectoriesSaveDirectory)
    trajectoryExtension = '.pickle'

    trajectoryFixedParameters = {
        'maxRunningSteps': maxRunningSteps,
        'numSimulations': numSimulations,
        'killzoneRadius': killzoneRadius
    }

    getTrajectorySavePath = GetSavePath(trajectoriesSaveDirectory,
                                        trajectoryExtension,
                                        trajectoryFixedParameters)
    getTrajectorySavePathFromDf = lambda df: getTrajectorySavePath(
        readParametersFromDf(df))

    # compute statistics on the trajectories
    fuzzySearchParameterNames = []
    loadTrajectories = LoadTrajectories(getTrajectorySavePath, loadFromPickle,
                                        fuzzySearchParameterNames)
    loadTrajectoriesFromDf = lambda df: loadTrajectories(
        readParametersFromDf(df))
    measurementFunction = lambda trajectory: accumulateRewards(trajectory)[0]
    computeStatistics = ComputeStatistics(loadTrajectoriesFromDf,
                                          measurementFunction)
    statisticsDf = toSplitFrame.groupby(levelNames).apply(computeStatistics)

    def calculateSuriveRatio(trajectory):
        lenght = np.array(len(trajectory))
        count = np.array(
            [lenght < 50, lenght >= 50 and lenght < 100, lenght >= 100])
        return count

    computeNumbers = ComputeStatistics(loadTrajectoriesFromDf,
                                       calculateSuriveRatio)
    df = toSplitFrame.groupby(levelNames).apply(computeNumbers)
    print(df)

    fig = plt.figure()
    numRows = 1
    numColumns = 1
    plotCounter = 1
    axForDraw = fig.add_subplot(numRows, numColumns, plotCounter)
    xlabel = ['0-50', '50-100', '100-150']
    x = np.arange(len(xlabel))

    numTrials = 500
    yMean = df['mean'].tolist()
    yRrr = np.array(df['std'].tolist()) / (np.sqrt(numTrials) - 1)

    totalWidth, n = 0.6, 3
    width = totalWidth / n

    x = x - (totalWidth - width) / 2
    plt.bar(x, yMean[0], yerr=yRrr[0], width=width, label='trainStep0')
    plt.bar(x + width,
            yMean[1],
            yerr=yRrr[1],
            width=width,
            label='trainStep10000')
    plt.bar(x + width * 2,
            yMean[2],
            yerr=yRrr[2],
            width=width,
            label='trainStep30000')
    plt.bar(x + width * 3,
            yMean[3],
            yerr=yRrr[3],
            width=width,
            label='trainStep50000')
    plt.suptitle('dataSize 3000')
    plt.xticks(x, xlabel)
    plt.ylim(0, 1)
    plt.xlabel('living steps')
    plt.legend(loc='best')
    # plt.show()

    # plot the results
    fig = plt.figure()
    numRows = len(manipulatedVariables['depth'])
    numColumns = len(manipulatedVariables['dataSize'])
    plotCounter = 1
    print(statisticsDf)
    for depth, grp in statisticsDf.groupby('depth'):
        grp.index = grp.index.droplevel('depth')

        for dataSize, group in grp.groupby('dataSize'):
            group.index = group.index.droplevel('dataSize')

            axForDraw = fig.add_subplot(numRows, numColumns, plotCounter)
            if plotCounter % numColumns == 1:
                axForDraw.set_ylabel('depth: {}'.format(depth))
            if plotCounter <= numColumns:
                axForDraw.set_title('dataSize: {}'.format(dataSize))

            axForDraw.set_ylim(-1, 1)
            # plt.ylabel('Accumulated rewards')
            maxTrainSteps = manipulatedVariables['trainSteps'][-1]

            plt.plot([0, maxTrainSteps], [0.354] * 2,
                     '--m',
                     color="#1C2833",
                     label='pure MCTS')

            group.plot(ax=axForDraw,
                       y='mean',
                       yerr='std',
                       marker='o',
                       logx=False)

            plotCounter += 1

    plt.suptitle('center control wolves')
    plt.legend(loc='best')
    plt.show()
示例#11
0
def main():
    # important parameters

    # manipulated variables

    manipulatedVariables = OrderedDict()

    manipulatedVariables['miniBatchSize'] = [64, 256]
    manipulatedVariables['learningRate'] = [1e-3, 1e-4, 1e-5]
    manipulatedVariables['depth'] = [5, 9, 17]  #[4,8,16]#
    manipulatedVariables['trainSteps'] = [0, 5000, 10000, 20000, 50000]
    levelNames = list(manipulatedVariables.keys())
    levelValues = list(manipulatedVariables.values())
    modelIndex = pd.MultiIndex.from_product(levelValues, names=levelNames)
    toSplitFrame = pd.DataFrame(index=modelIndex)

    # accumulate rewards for trajectories
    sheepId = 0
    wolfId = 1

    xPosIndex = [0, 1]
    getSheepPos = GetAgentPosFromState(sheepId, xPosIndex)
    getWolfPos = GetAgentPosFromState(wolfId, xPosIndex)

    killzoneRadius = 2
    numSimulations = 150
    maxRunningSteps = 30
    agentId = 1

    playAliveBonus = -1 / maxRunningSteps
    playDeathPenalty = 1
    playKillzoneRadius = killzoneRadius
    playIsTerminal = IsTerminal(playKillzoneRadius, getSheepPos, getWolfPos)
    playReward = RewardFunctionCompete(playAliveBonus, playDeathPenalty,
                                       playIsTerminal)

    decay = 1
    accumulateRewards = AccumulateRewards(decay, playReward)
    addValuesToTrajectory = AddValuesToTrajectory(accumulateRewards)

    # generate trajectory parallel
    # generateTrajectoriesCodeName = 'generateWolfResNNEvaluationTrajectoryFixObstacle.py'
    # generateTrajectoriesCodeName = 'generateWolfNNEvaluationTrajectoryFixObstacle.py'
    # generateTrajectoriesCodeName = 'generateWolfResNNEvaluationTrajectoryMovedObstacle.py'
    generateTrajectoriesCodeName = 'generateWolfResNNEvaluationTrajectoryRandomObstacle.py'
    # generateTrajectoriesCodeName = 'generateWolfNNEvaluationTrajectoryRandomObstacle.py'
    evalNumTrials = 100
    numCpuCores = os.cpu_count()
    numCpuToUse = int(0.75 * numCpuCores)
    numCmdList = min(evalNumTrials, numCpuToUse)
    generateTrajectoriesParallel = GenerateTrajectoriesParallel(
        generateTrajectoriesCodeName, evalNumTrials, numCmdList)

    # run all trials and save trajectories
    generateTrajectoriesParallelFromDf = lambda df: generateTrajectoriesParallel(
        readParametersFromDf(df))
    toSplitFrame.groupby(levelNames).apply(generateTrajectoriesParallelFromDf)

    # save evaluation trajectories
    dirName = os.path.dirname(__file__)
    dataFolderName = os.path.join(dirName, '..', '..', '..', 'data',
                                  'multiAgentTrain', 'MCTSRandomObstacle')
    trajectoryDirectory = os.path.join(
        dataFolderName, 'evaluationTrajectoriesResNNWithObstacle')

    if not os.path.exists(trajectoryDirectory):
        os.makedirs(trajectoryDirectory)
    trajectoryExtension = '.pickle'

    trajectoryFixedParameters = {
        'maxRunningSteps': maxRunningSteps,
        'numSimulations': numSimulations,
        'killzoneRadius': killzoneRadius,
        'agentId': agentId
    }

    getTrajectorySavePath = GetSavePath(trajectoryDirectory,
                                        trajectoryExtension,
                                        trajectoryFixedParameters)
    getTrajectorySavePathFromDf = lambda df: getTrajectorySavePath(
        readParametersFromDf(df))

    # compute statistics on the trajectories

    fuzzySearchParameterNames = ['sampleIndex']
    loadTrajectories = LoadTrajectories(getTrajectorySavePath, loadFromPickle,
                                        fuzzySearchParameterNames)
    loadTrajectoriesFromDf = lambda df: loadTrajectories(
        readParametersFromDf(df))
    measurementFunction = lambda trajectory: accumulateRewards(trajectory)[0]

    computeStatistics = ComputeStatistics(loadTrajectoriesFromDf,
                                          measurementFunction)
    statisticsDf = toSplitFrame.groupby(levelNames).apply(computeStatistics)
    print(statisticsDf)

    # manipulatedVariables['miniBatchSize'] = [64, 128]
    # manipulatedVariables['learningRate'] =  [ 1e-3,1e-4,1e-5]
    # manipulatedVariables['depth'] = [4,8,16]
    # manipulatedVariables['trainSteps']=[0,20000,40000,60000,100000,180000]

    # plot the results
    fig = plt.figure()
    numRows = len(manipulatedVariables['depth'])
    numColumns = len(manipulatedVariables['learningRate'])
    plotCounter = 1
    selfId = 0
    for depth, grp in statisticsDf.groupby('depth'):
        grp.index = grp.index.droplevel('depth')

        for learningRate, group in grp.groupby('learningRate'):
            group.index = group.index.droplevel('learningRate')

            axForDraw = fig.add_subplot(numRows, numColumns, plotCounter)
            if (plotCounter % numColumns == 1) or numColumns == 1:
                axForDraw.set_ylabel('depth: {}'.format(depth))
            if plotCounter <= numColumns:
                axForDraw.set_title('learningRate: {}'.format(learningRate))

            axForDraw.set_ylim(-1, 1)
            drawPerformanceLine(group, axForDraw, selfId)
            plotCounter += 1

    plt.suptitle('SupervisedNNWolfwithRandomWallState')
    plt.legend(loc='best')
    plt.show()
示例#12
0
def main():
    DEBUG = 0
    renderOn = 0
    if DEBUG:
        parametersForTrajectoryPath = {}
        startSampleIndex = 5
        endSampleIndex = 7
        agentId = 1
        parametersForTrajectoryPath['sampleIndex'] = (startSampleIndex,
                                                      endSampleIndex)
    else:
        parametersForTrajectoryPath = json.loads(sys.argv[1])
        startSampleIndex = int(sys.argv[2])
        endSampleIndex = int(sys.argv[3])
        agentId = int(parametersForTrajectoryPath['agentId'])
        parametersForTrajectoryPath['sampleIndex'] = (startSampleIndex,
                                                      endSampleIndex)

    # check file exists or not
    dirName = os.path.dirname(__file__)
    trajectoriesSaveDirectory = os.path.join(
        dirName, '..', '..', '..', '..', 'data', 'NoPhysics2wolves1sheep',
        'trainWolvesTwoCenterControlAction88', 'trajectories')
    if not os.path.exists(trajectoriesSaveDirectory):
        os.makedirs(trajectoriesSaveDirectory)

    trajectorySaveExtension = '.pickle'
    maxRunningSteps = 50
    numSimulations = 250
    killzoneRadius = 150
    fixedParameters = {
        'agentId': agentId,
        'maxRunningSteps': maxRunningSteps,
        'numSimulations': numSimulations,
        'killzoneRadius': killzoneRadius
    }

    generateTrajectorySavePath = GetSavePath(trajectoriesSaveDirectory,
                                             trajectorySaveExtension,
                                             fixedParameters)

    trajectorySavePath = generateTrajectorySavePath(
        parametersForTrajectoryPath)

    if not os.path.isfile(trajectorySavePath):
        numOfAgent = 3
        xBoundary = [0, 600]
        yBoundary = [0, 600]
        resetState = Reset(xBoundary, yBoundary, numOfAgent)

        stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity(
            xBoundary, yBoundary)
        interpolateOneFrame = InterpolateOneFrame(
            stayInBoundaryByReflectVelocity)

        chooseInterpolatedNextState = lambda interpolatedStates: interpolatedStates[
            -1]

        sheepId = 0
        wolvesId = 1
        centerControlIndexList = [wolvesId]
        unpackCenterControlAction = UnpackCenterControlAction(
            centerControlIndexList)

        numFramesToInterpolate = 0
        transit = TransitWithInterpolation(numFramesToInterpolate,
                                           interpolateOneFrame,
                                           chooseInterpolatedNextState,
                                           unpackCenterControlAction)

        # NNGuidedMCTS init
        cInit = 1
        cBase = 100
        calculateScore = ScoreChild(cInit, cBase)
        selectChild = SelectChild(calculateScore)

        actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7),
                       (0, -10), (7, -7), (0, 0)]
        wolfActionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0),
                           (-7, -7), (0, -10), (7, -7)]

        preyPowerRatio = 10
        sheepActionSpace = list(
            map(tuple,
                np.array(actionSpace) * preyPowerRatio))

        predatorPowerRatio = 8
        wolfActionOneSpace = list(
            map(tuple,
                np.array(wolfActionSpace) * predatorPowerRatio))
        wolfActionTwoSpace = list(
            map(tuple,
                np.array(wolfActionSpace) * predatorPowerRatio))

        wolvesActionSpace = list(
            product(wolfActionOneSpace, wolfActionTwoSpace))

        actionSpaceList = [sheepActionSpace, wolvesActionSpace]

        # neural network init
        numStateSpace = 2 * numOfAgent
        numSheepActionSpace = len(sheepActionSpace)
        numWolvesActionSpace = len(wolvesActionSpace)

        regularizationFactor = 1e-4
        sharedWidths = [128]
        actionLayerWidths = [128]
        valueLayerWidths = [128]
        generateSheepModel = GenerateModel(numStateSpace, numSheepActionSpace,
                                           regularizationFactor)

        # load save dir
        NNModelSaveExtension = ''
        sheepNNModelSaveDirectory = os.path.join(
            dirName, '..', '..', '..', '..', 'data', 'NoPhysics2wolves1sheep',
            'trainSheepWithTwoHeatSeekingWolves', 'trainedResNNModels')
        sheepNNModelFixedParameters = {
            'agentId': 0,
            'maxRunningSteps': 50,
            'numSimulations': 110,
            'miniBatchSize': 256,
            'learningRate': 0.0001,
        }
        getSheepNNModelSavePath = GetSavePath(sheepNNModelSaveDirectory,
                                              NNModelSaveExtension,
                                              sheepNNModelFixedParameters)

        depth = 9
        resBlockSize = 2
        dropoutRate = 0.0
        initializationMethod = 'uniform'
        initSheepNNModel = generateSheepModel(sharedWidths * depth,
                                              actionLayerWidths,
                                              valueLayerWidths, resBlockSize,
                                              initializationMethod,
                                              dropoutRate)

        sheepTrainedModelPath = getSheepNNModelSavePath({
            'trainSteps': 50000,
            'depth': depth
        })
        sheepTrainedModel = restoreVariables(initSheepNNModel,
                                             sheepTrainedModelPath)
        sheepPolicy = ApproximatePolicy(sheepTrainedModel, sheepActionSpace)

        wolfOneId = 1
        wolfTwoId = 2
        xPosIndex = [0, 1]
        getSheepXPos = GetAgentPosFromState(sheepId, xPosIndex)
        getWolfOneXPos = GetAgentPosFromState(wolfOneId, xPosIndex)
        speed = 120
        #sheepPolicy = HeatSeekingContinuesDeterministicPolicy(getWolfOneXPos, getSheepXPos, speed)

        # MCTS
        cInit = 1
        cBase = 100
        calculateScore = ScoreChild(cInit, cBase)
        selectChild = SelectChild(calculateScore)

        # prior
        getActionPrior = lambda state: {
            action: 1 / len(wolvesActionSpace)
            for action in wolvesActionSpace
        }

        # load chase nn policy
        chooseActionInMCTS = sampleFromDistribution

        def wolvesTransit(state, action):
            return transit(state,
                           [chooseActionInMCTS(sheepPolicy(state)), action])

        # reward function
        wolfOneId = 1
        wolfTwoId = 2
        xPosIndex = [0, 1]
        getSheepXPos = GetAgentPosFromState(sheepId, xPosIndex)
        getWolfOneXPos = GetAgentPosFromState(wolfOneId, xPosIndex)
        getWolfTwoXPos = GetAgentPosFromState(wolfTwoId, xPosIndex)
        isCollidedOne = IsTerminal(getWolfOneXPos, getSheepXPos,
                                   killzoneRadius)
        isCollidedTwo = IsTerminal(getWolfTwoXPos, getSheepXPos,
                                   killzoneRadius)

        calCollisionTimes = lambda state: np.sum([
            isCollidedOne(state), isCollidedTwo(state)
        ])  # collisionTimeByAddingCollisionInAllWolves
        #calCollisionTimes = lambda state: np.max([isCollidedOne(state), isCollidedTwo(state)]) # collisionTimeByBooleanCollisionForAnyWolf

        calTerminationSignals = calCollisionTimes
        chooseInterpolatedStateByEarlyTermination = ChooseInterpolatedStateByEarlyTermination(
            calTerminationSignals)

        numFramesToInterpolateInReward = 3
        interpolateStateInReward = TransitWithInterpolation(
            numFramesToInterpolateInReward, interpolateOneFrame,
            chooseInterpolatedStateByEarlyTermination,
            unpackCenterControlAction)

        aliveBonus = -1 / maxRunningSteps * 10
        deathPenalty = 1
        rewardFunction = RewardFunctionCompeteWithStateInterpolation(
            aliveBonus, deathPenalty, calCollisionTimes,
            interpolateStateInReward)

        # initialize children; expand
        initializeChildren = InitializeChildren(wolvesActionSpace,
                                                wolvesTransit, getActionPrior)
        isTerminal = lambda state: False
        expand = Expand(isTerminal, initializeChildren)

        # random rollout policy
        def rolloutPolicy(state):
            return [
                sampleFromDistribution(sheepPolicy(state)),
                wolvesActionSpace[np.random.choice(
                    range(numWolvesActionSpace))]
            ]

        # rollout
        #rolloutHeuristicWeight = 0
        #minDistance = 400
        #rolloutHeuristic1 = HeuristicDistanceToTarget(
        #    rolloutHeuristicWeight, getWolfOneXPos, getSheepXPos, minDistance)
        #rolloutHeuristic2 = HeuristicDistanceToTarget(
        #    rolloutHeuristicWeight, getWolfTwoXPos, getSheepXPos, minDistance)

        #rolloutHeuristic = lambda state: (rolloutHeuristic1(state) + rolloutHeuristic2(state)) / 2

        rolloutHeuristic = lambda state: 0
        maxRolloutSteps = 15
        rollout = RollOut(rolloutPolicy, maxRolloutSteps, transit,
                          rewardFunction, isTerminal, rolloutHeuristic)

        wolfPolicy = MCTS(numSimulations, selectChild, expand, rollout, backup,
                          establishSoftmaxActionDist)

        # All agents' policies
        policy = lambda state: [sheepPolicy(state), wolfPolicy(state)]
        chooseActionList = [maxFromDistribution, maxFromDistribution]

        def sampleAction(state):
            actionDists = [sheepPolicy(state), wolfPolicy(state)]
            action = [
                chooseAction(actionDist) for actionDist, chooseAction in zip(
                    actionDists, chooseActionList)
            ]
            return action

        render = None
        if renderOn:
            import pygame as pg
            from pygame.color import THECOLORS
            screenColor = THECOLORS['black']
            circleColorList = [
                THECOLORS['green'], THECOLORS['yellow'], THECOLORS['red']
            ]
            circleSize = 10
            saveImage = False
            saveImageDir = os.path.join(dirName, '..', '..', '..', '..',
                                        'data', 'demoImg')
            if not os.path.exists(saveImageDir):
                os.makedirs(saveImageDir)
            screen = pg.display.set_mode([max(xBoundary), max(yBoundary)])
            render = Render(numOfAgent, xPosIndex, screen, screenColor,
                            circleColorList, circleSize, saveImage,
                            saveImageDir)

        forwardOneStep = ForwardOneStep(transit, rewardFunction)
        sampleTrajectory = SampleTrajectoryWithRender(maxRunningSteps,
                                                      isTerminal, resetState,
                                                      forwardOneStep, render,
                                                      renderOn)

        trajectories = [
            sampleTrajectory(sampleAction)
            for sampleIndex in range(startSampleIndex, endSampleIndex)
        ]
        print([len(traj) for traj in trajectories])
        saveToPickle(trajectories, trajectorySavePath)
示例#13
0
def main():
    parametersForTrajectoryPath = json.loads(sys.argv[1])
    startSampleIndex = int(sys.argv[2])
    endSampleIndex = int(sys.argv[3])

    # parametersForTrajectoryPath['sampleOneStepPerTraj']=1 #0
    # parametersForTrajectoryPath['sampleIndex'] = (startSampleIndex, endSampleIndex)

    trainSteps = int(parametersForTrajectoryPath['trainSteps'])
    depth = int(parametersForTrajectoryPath['depth'])
    dataSize = int(parametersForTrajectoryPath['dataSize'])

    # parametersForTrajectoryPath = {}
    # depth = 5
    # dataSize = 5000
    # trainSteps = 50000
    # startSampleIndex = 0
    # endSampleIndex = 100

    killzoneRadius = 25
    numSimulations = 200
    maxRunningSteps = 100

    fixedParameters = {
        'maxRunningSteps': maxRunningSteps,
        'numSimulations': numSimulations,
        'killzoneRadius': killzoneRadius
    }
    trajectorySaveExtension = '.pickle'
    dirName = os.path.dirname(__file__)
    trajectoriesSaveDirectory = os.path.join(
        dirName, '..', '..', '..', 'data', 'evaluateSupervisedLearning',
        'multiMCTSAgentResNetNoPhysicsCenterControl',
        'evaluateCenterControlTrajByCondition')
    if not os.path.exists(trajectoriesSaveDirectory):
        os.makedirs(trajectoriesSaveDirectory)
    generateTrajectorySavePath = GetSavePath(trajectoriesSaveDirectory,
                                             trajectorySaveExtension,
                                             fixedParameters)

    trajectorySavePath = generateTrajectorySavePath(
        parametersForTrajectoryPath)
    if not os.path.isfile(trajectorySavePath):

        numOfAgent = 3
        sheepId = 0
        wolvesId = 1

        wolfOneId = 1
        wolfTwoId = 2
        xPosIndex = [0, 1]
        xBoundary = [0, 600]
        yBoundary = [0, 600]
        reset = Reset(xBoundary, yBoundary, numOfAgent)

        getSheepXPos = GetAgentPosFromState(sheepId, xPosIndex)
        getWolfOneXPos = GetAgentPosFromState(wolfOneId, xPosIndex)
        getWolfTwoXPos = GetAgentPosFromState(wolfTwoId, xPosIndex)

        isTerminalOne = IsTerminal(getWolfOneXPos, getSheepXPos,
                                   killzoneRadius)
        isTerminalTwo = IsTerminal(getWolfTwoXPos, getSheepXPos,
                                   killzoneRadius)
        isTerminal = lambda state: isTerminalOne(state) or isTerminalTwo(state)

        stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity(
            xBoundary, yBoundary)
        transit = TransiteForNoPhysics(stayInBoundaryByReflectVelocity)

        actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7),
                       (0, -10), (7, -7), (0, 0)]
        preyPowerRatio = 3
        sheepActionSpace = list(
            map(tuple,
                np.array(actionSpace) * preyPowerRatio))

        predatorPowerRatio = 2
        wolfActionOneSpace = list(
            map(tuple,
                np.array(actionSpace) * predatorPowerRatio))
        wolfActionTwoSpace = list(
            map(tuple,
                np.array(actionSpace) * predatorPowerRatio))
        wolvesActionSpace = list(
            it.product(wolfActionOneSpace, wolfActionTwoSpace))

        # neural network init
        numStateSpace = 6
        numSheepActionSpace = len(sheepActionSpace)
        numWolvesActionSpace = len(wolvesActionSpace)

        regularizationFactor = 1e-4
        sharedWidths = [128]
        actionLayerWidths = [128]
        valueLayerWidths = [128]
        generateSheepModel = GenerateModel(numStateSpace, numSheepActionSpace,
                                           regularizationFactor)

        # load save dir
        NNModelSaveExtension = ''
        NNModelSaveDirectory = os.path.join(
            dirName, '..', '..', '..', 'data',
            'evaluateEscapeMultiChasingNoPhysics',
            'trainedResNNModelsMultiStillAction')
        NNModelFixedParameters = {
            'agentId': 0,
            'maxRunningSteps': 150,
            'numSimulations': 200,
            'miniBatchSize': 256,
            'learningRate': 0.0001
        }
        getNNModelSavePath = GetSavePath(NNModelSaveDirectory,
                                         NNModelSaveExtension,
                                         NNModelFixedParameters)

        if not os.path.exists(NNModelSaveDirectory):
            os.makedirs(NNModelSaveDirectory)

        resBlockSize = 2
        dropoutRate = 0.0
        initializationMethod = 'uniform'
        initSheepNNModel = generateSheepModel(sharedWidths * 5,
                                              actionLayerWidths,
                                              valueLayerWidths, resBlockSize,
                                              initializationMethod,
                                              dropoutRate)

        sheepTrainedModelPath = getNNModelSavePath({
            'trainSteps': 50000,
            'depth': 5
        })
        sheepTrainedModel = restoreVariables(initSheepNNModel,
                                             sheepTrainedModelPath)
        sheepPolicy = ApproximatePolicy(sheepTrainedModel, sheepActionSpace)

        generateWolvesModel = GenerateModel(numStateSpace,
                                            numWolvesActionSpace,
                                            regularizationFactor)
        initWolvesNNModel = generateWolvesModel(sharedWidths * depth,
                                                actionLayerWidths,
                                                valueLayerWidths, resBlockSize,
                                                initializationMethod,
                                                dropoutRate)
        NNModelSaveDirectory = os.path.join(
            dirName, '..', '..', '..', 'data', 'evaluateSupervisedLearning',
            'multiMCTSAgentResNetNoPhysicsCenterControl', 'trainedResNNModels')
        wolfId = 1
        NNModelFixedParametersWolves = {
            'agentId': wolfId,
            'maxRunningSteps': maxRunningSteps,
            'numSimulations': numSimulations,
            'miniBatchSize': 256,
            'learningRate': 0.0001,
        }

        getNNModelSavePath = GetSavePath(NNModelSaveDirectory,
                                         NNModelSaveExtension,
                                         NNModelFixedParametersWolves)
        wolvesTrainedModelPath = getNNModelSavePath({
            'trainSteps': trainSteps,
            'depth': depth,
            'dataSize': dataSize
        })
        wolvesTrainedModel = restoreVariables(initWolvesNNModel,
                                              wolvesTrainedModelPath)
        wolfPolicy = ApproximatePolicy(wolvesTrainedModel, wolvesActionSpace)

        from exec.evaluateNoPhysicsEnvWithRender import Render
        import pygame as pg
        from pygame.color import THECOLORS
        screenColor = THECOLORS['black']
        circleColorList = [
            THECOLORS['green'], THECOLORS['red'], THECOLORS['orange']
        ]
        circleSize = 10

        saveImage = False
        saveImageDir = os.path.join(dirName, '..', '..', '..', 'data',
                                    'demoImg')
        if not os.path.exists(saveImageDir):
            os.makedirs(saveImageDir)
        renderOn = False
        render = None
        if renderOn:
            screen = pg.display.set_mode([xBoundary[1], yBoundary[1]])
            render = Render(numOfAgent, xPosIndex, screen, screenColor,
                            circleColorList, circleSize, saveImage,
                            saveImageDir)
        chooseActionList = [chooseGreedyAction, chooseGreedyAction]
        sampleTrajectory = SampleTrajectoryWithRender(maxRunningSteps, transit,
                                                      isTerminal, reset,
                                                      chooseActionList, render,
                                                      renderOn)
        # All agents' policies
        policy = lambda state: [sheepPolicy(state), wolfPolicy(state)]
        trajectories = [
            sampleTrajectory(policy)
            for sampleIndex in range(startSampleIndex, endSampleIndex)
        ]

        saveToPickle(trajectories, trajectorySavePath)
示例#14
0
def main():
    DEBUG = 0
    renderOn = 0
    if DEBUG:
        parametersForTrajectoryPath = {}
        startSampleIndex = 0
        endSampleIndex = 10
        agentId = 1
        parametersForTrajectoryPath['sampleIndex'] = (startSampleIndex,
                                                      endSampleIndex)
    else:
        parametersForTrajectoryPath = json.loads(sys.argv[1])
        startSampleIndex = int(sys.argv[2])
        endSampleIndex = int(sys.argv[3])
        agentId = int(parametersForTrajectoryPath['agentId'])
        parametersForTrajectoryPath['sampleIndex'] = (startSampleIndex,
                                                      endSampleIndex)

    # check file exists or not
    dirName = os.path.dirname(__file__)
    trajectoriesSaveDirectory = os.path.join(
        dirName, '..', '..', '..', '..', 'data', '2wolves1sheep',
        'trainWolvesTwoCenterControlMultiTrees', 'trajectories')
    if not os.path.exists(trajectoriesSaveDirectory):
        os.makedirs(trajectoriesSaveDirectory)

    trajectorySaveExtension = '.pickle'
    maxRunningSteps = 50
    numSimulations = 500
    killzoneRadius = 50
    fixedParameters = {
        'agentId': agentId,
        'maxRunningSteps': maxRunningSteps,
        'numSimulations': numSimulations,
        'killzoneRadius': killzoneRadius
    }

    generateTrajectorySavePath = GetSavePath(trajectoriesSaveDirectory,
                                             trajectorySaveExtension,
                                             fixedParameters)

    trajectorySavePath = generateTrajectorySavePath(
        parametersForTrajectoryPath)

    if not os.path.isfile(trajectorySavePath):
        numOfAgent = 3
        sheepId = 0
        wolvesId = 1

        wolfOneId = 1
        wolfTwoId = 2

        xPosIndex = [0, 1]
        xBoundary = [0, 600]
        yBoundary = [0, 600]

        getSheepXPos = GetAgentPosFromState(sheepId, xPosIndex)
        getWolfOneXPos = GetAgentPosFromState(wolfOneId, xPosIndex)
        getWolfTwoXPos = GetAgentPosFromState(wolfTwoId, xPosIndex)

        reset = Reset(xBoundary, yBoundary, numOfAgent)

        isTerminalOne = IsTerminal(getWolfOneXPos, getSheepXPos,
                                   killzoneRadius)
        isTerminalTwo = IsTerminal(getWolfTwoXPos, getSheepXPos,
                                   killzoneRadius)

        isTerminal = lambda state: isTerminalOne(state) or isTerminalTwo(state)

        stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity(
            xBoundary, yBoundary)

        centerControlIndexList = [wolvesId]
        unpackCenterControlAction = UnpackCenterControlAction(
            centerControlIndexList)
        transitionFunction = TransiteForNoPhysicsWithCenterControlAction(
            stayInBoundaryByReflectVelocity)

        numFramesToInterpolate = 3
        transit = TransitWithInterpolateStateWithCenterControlAction(
            numFramesToInterpolate, transitionFunction, isTerminal,
            unpackCenterControlAction)

        # NNGuidedMCTS init
        cInit = 1
        cBase = 100
        calculateScore = ScoreChild(cInit, cBase)
        selectChild = SelectChild(calculateScore)

        actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7),
                       (0, -10), (7, -7), (0, 0)]
        wolfActionSpace = actionSpace
        # wolfActionSpace = [(10, 0), (0, 10), (-10, 0), (0, -10), (0, 0)]

        preyPowerRatio = 12
        sheepActionSpace = list(
            map(tuple,
                np.array(actionSpace) * preyPowerRatio))

        predatorPowerRatio = 8
        wolfActionOneSpace = list(
            map(tuple,
                np.array(wolfActionSpace) * predatorPowerRatio))
        wolfActionTwoSpace = list(
            map(tuple,
                np.array(wolfActionSpace) * predatorPowerRatio))

        wolvesActionSpace = list(
            product(wolfActionOneSpace, wolfActionTwoSpace))

        actionSpaceList = [sheepActionSpace, wolvesActionSpace]

        # neural network init
        numStateSpace = 2 * numOfAgent
        numSheepActionSpace = len(sheepActionSpace)
        numWolvesActionSpace = len(wolvesActionSpace)

        regularizationFactor = 1e-4
        sharedWidths = [128]
        actionLayerWidths = [128]
        valueLayerWidths = [128]
        generateSheepModel = GenerateModel(numStateSpace, numSheepActionSpace,
                                           regularizationFactor)

        # load save dir
        NNModelSaveExtension = ''
        sheepNNModelSaveDirectory = os.path.join(
            dirName, '..', '..', '..', '..', 'data', '2wolves1sheep',
            'trainSheepWithTwoHeatSeekingWolves', 'trainedResNNModels')
        sheepNNModelFixedParameters = {
            'agentId': 0,
            'maxRunningSteps': 50,
            'numSimulations': 110,
            'miniBatchSize': 256,
            'learningRate': 0.0001,
        }
        getSheepNNModelSavePath = GetSavePath(sheepNNModelSaveDirectory,
                                              NNModelSaveExtension,
                                              sheepNNModelFixedParameters)

        depth = 9
        resBlockSize = 2
        dropoutRate = 0.0
        initializationMethod = 'uniform'
        initSheepNNModel = generateSheepModel(sharedWidths * depth,
                                              actionLayerWidths,
                                              valueLayerWidths, resBlockSize,
                                              initializationMethod,
                                              dropoutRate)

        sheepTrainedModelPath = getSheepNNModelSavePath({
            'trainSteps': 50000,
            'depth': depth
        })
        sheepTrainedModel = restoreVariables(initSheepNNModel,
                                             sheepTrainedModelPath)
        sheepPolicy = ApproximatePolicy(sheepTrainedModel, sheepActionSpace)

        # MCTS
        cInit = 1
        cBase = 100
        calculateScore = ScoreChild(cInit, cBase)
        selectChild = SelectChild(calculateScore)

        # prior
        getActionPrior = lambda state: {
            action: 1 / len(wolvesActionSpace)
            for action in wolvesActionSpace
        }

        # load chase nn policy
        temperatureInMCTS = 1
        chooseActionInMCTS = SampleAction(temperatureInMCTS)

        def wolvesTransit(state, action):
            return transit(state,
                           [chooseActionInMCTS(sheepPolicy(state)), action])

        # reward function
        aliveBonus = -1 / maxRunningSteps
        deathPenalty = 1
        rewardFunction = reward.RewardFunctionCompete(aliveBonus, deathPenalty,
                                                      isTerminal)

        # initialize children; expand
        initializeChildren = InitializeChildren(wolvesActionSpace,
                                                wolvesTransit, getActionPrior)
        expand = Expand(isTerminal, initializeChildren)

        # random rollout policy
        def rolloutPolicy(state):
            return wolvesActionSpace[np.random.choice(
                range(numWolvesActionSpace))]

        # rollout
        rolloutHeuristicWeight = 0
        minDistance = 400
        rolloutHeuristic1 = reward.HeuristicDistanceToTarget(
            rolloutHeuristicWeight, getWolfOneXPos, getSheepXPos, minDistance)
        rolloutHeuristic2 = reward.HeuristicDistanceToTarget(
            rolloutHeuristicWeight, getWolfTwoXPos, getSheepXPos, minDistance)

        rolloutHeuristic = lambda state: (rolloutHeuristic1(state) +
                                          rolloutHeuristic2(state)) / 2

        maxRolloutSteps = 15
        rollout = RollOut(rolloutPolicy, maxRolloutSteps, wolvesTransit,
                          rewardFunction, isTerminal, rolloutHeuristic)

        numTree = 4
        numSimulationsPerTree = int(numSimulations / numTree)
        wolfPolicy = StochasticMCTS(
            numTree, numSimulationsPerTree, selectChild, expand, rollout,
            backup, establishSoftmaxActionDistFromMultipleTrees)

        # All agents' policies
        policy = lambda state: [sheepPolicy(state), wolfPolicy(state)]
        chooseActionList = [chooseGreedyAction, chooseGreedyAction]

        render = None
        if renderOn:
            import pygame as pg
            from pygame.color import THECOLORS
            screenColor = THECOLORS['black']
            circleColorList = [
                THECOLORS['green'], THECOLORS['red'], THECOLORS['red']
            ]
            circleSize = 10

            saveImage = False
            saveImageDir = os.path.join(dirName, '..', '..', '..', '..',
                                        'data', 'demoImg')
            if not os.path.exists(saveImageDir):
                os.makedirs(saveImageDir)

            screen = pg.display.set_mode([xBoundary[1], yBoundary[1]])
            render = Render(numOfAgent, xPosIndex, screen, screenColor,
                            circleColorList, circleSize, saveImage,
                            saveImageDir)

        sampleTrajectory = SampleTrajectoryWithRender(maxRunningSteps, transit,
                                                      isTerminal, reset,
                                                      chooseActionList, render,
                                                      renderOn)
        trajectories = [
            sampleTrajectory(policy)
            for sampleIndex in range(startSampleIndex, endSampleIndex)
        ]
        print([len(traj) for traj in trajectories])
        saveToPickle(trajectories, trajectorySavePath)
def trainOneCondition(manipulatedVariables):
    depth = int(manipulatedVariables['depth'])
    # Get dataset for training
    DIRNAME = os.path.dirname(__file__)
    dataSetDirectory = os.path.join(dirName, '..', '..', '..', '..', 'data',
                                    '2wolves1sheep',
                                    'trainWolvesTwoCenterControlMultiTrees',
                                    'trajectories')

    if not os.path.exists(dataSetDirectory):
        os.makedirs(dataSetDirectory)

    dataSetExtension = '.pickle'
    dataSetMaxRunningSteps = 50
    dataSetNumSimulations = 500
    killzoneRadius = 50
    agentId = 1
    wolvesId = 1
    dataSetFixedParameters = {
        'agentId': agentId,
        'maxRunningSteps': dataSetMaxRunningSteps,
        'numSimulations': dataSetNumSimulations,
        'killzoneRadius': killzoneRadius
    }

    getDataSetSavePath = GetSavePath(dataSetDirectory, dataSetExtension,
                                     dataSetFixedParameters)
    print("DATASET LOADED!")

    # accumulate rewards for trajectories
    numOfAgent = 3

    sheepId = 0
    wolvesId = 1

    wolfOneId = 1
    wolfTwoId = 2
    xPosIndex = [0, 1]
    xBoundary = [0, 600]
    yBoundary = [0, 600]

    getSheepXPos = GetAgentPosFromState(sheepId, xPosIndex)
    getWolfOneXPos = GetAgentPosFromState(wolfOneId, xPosIndex)
    getWolfTwoXPos = GetAgentPosFromState(wolfTwoId, xPosIndex)

    reset = Reset(xBoundary, yBoundary, numOfAgent)

    isTerminalOne = IsTerminal(getWolfOneXPos, getSheepXPos, killzoneRadius)
    isTerminalTwo = IsTerminal(getWolfTwoXPos, getSheepXPos, killzoneRadius)
    playIsTerminal = lambda state: isTerminalOne(state) or isTerminalTwo(state)

    stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity(
        xBoundary, yBoundary)
    transit = TransiteForNoPhysics(stayInBoundaryByReflectVelocity)

    playAliveBonus = -1 / dataSetMaxRunningSteps
    playDeathPenalty = 1
    playKillzoneRadius = killzoneRadius

    playReward = RewardFunctionCompete(playAliveBonus, playDeathPenalty,
                                       playIsTerminal)

    decay = 1
    accumulateRewards = AccumulateRewards(decay, playReward)
    addValuesToTrajectory = AddValuesToTrajectory(accumulateRewards)

    # pre-process the trajectories

    actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7),
                   (0, -10), (7, -7), (0, 0)]
    preyPowerRatio = 12
    sheepActionSpace = list(map(tuple, np.array(actionSpace) * preyPowerRatio))

    predatorPowerRatio = 8

    actionSpaceOne = [(10, 0), (-10, 0)]
    wolfActionOneSpace = list(
        map(tuple,
            np.array(actionSpace) * predatorPowerRatio))

    actionSpaceTwo = [(10, 0), (-10, 0)]
    wolfActionTwoSpace = list(
        map(tuple,
            np.array(actionSpace) * predatorPowerRatio))
    wolvesActionSpace = list(it.product(wolfActionOneSpace,
                                        wolfActionTwoSpace))

    numActionSpace = len(wolvesActionSpace)

    actionIndex = 1
    actionToOneHot = ActionToOneHot(wolvesActionSpace)
    getTerminalActionFromTrajectory = lambda trajectory: trajectory[-1][
        actionIndex]
    removeTerminalTupleFromTrajectory = RemoveTerminalTupleFromTrajectory(
        getTerminalActionFromTrajectory)
    processTrajectoryForNN = ProcessTrajectoryForPolicyValueNet(
        actionToOneHot, wolvesId)

    preProcessTrajectories = PreProcessTrajectories(
        addValuesToTrajectory, removeTerminalTupleFromTrajectory,
        processTrajectoryForNN)

    fuzzySearchParameterNames = ['sampleIndex']
    loadTrajectories = LoadTrajectories(getDataSetSavePath, loadFromPickle,
                                        fuzzySearchParameterNames)
    loadedTrajectories = loadTrajectories(parameters={})
    # print(loadedTrajectories[0])

    filterState = lambda timeStep: (timeStep[0][0:3], timeStep[1], timeStep[2]
                                    )  # !!? magic
    trajectories = [[filterState(timeStep) for timeStep in trajectory]
                    for trajectory in loadedTrajectories]
    print(len(trajectories))

    preProcessedTrajectories = np.concatenate(
        preProcessTrajectories(trajectories))

    trainData = [list(varBatch) for varBatch in zip(*preProcessedTrajectories)]
    valuedTrajectories = [addValuesToTrajectory(tra) for tra in trajectories]

    # neural network init and save path
    numStateSpace = 6
    regularizationFactor = 1e-4
    sharedWidths = [128]
    actionLayerWidths = [128]
    valueLayerWidths = [128]

    generateModel = GenerateModel(numStateSpace, numActionSpace,
                                  regularizationFactor)

    resBlockSize = 2
    dropoutRate = 0.0
    initializationMethod = 'uniform'
    sheepNNModel = generateModel(sharedWidths * depth, actionLayerWidths,
                                 valueLayerWidths, resBlockSize,
                                 initializationMethod, dropoutRate)

    initTimeStep = 0
    valueIndex = 3
    trainDataMeanAccumulatedReward = np.mean(
        [tra[initTimeStep][valueIndex] for tra in valuedTrajectories])
    print(trainDataMeanAccumulatedReward)

    # function to train NN model
    terminalThreshold = 1e-10
    lossHistorySize = 10
    initActionCoeff = 1
    initValueCoeff = 1
    initCoeff = (initActionCoeff, initValueCoeff)
    afterActionCoeff = 1
    afterValueCoeff = 1
    afterCoeff = (afterActionCoeff, afterValueCoeff)
    terminalController = lambda evalDict, numSteps: False
    coefficientController = CoefficientCotroller(initCoeff, afterCoeff)
    reportInterval = 10000
    trainStepsIntervel = 10000
    trainReporter = TrainReporter(trainStepsIntervel, reportInterval)
    learningRateDecay = 1
    learningRateDecayStep = 1
    learningRateModifier = lambda learningRate: LearningRateModifier(
        learningRate, learningRateDecay, learningRateDecayStep)
    getTrainNN = lambda batchSize, learningRate: Train(
        trainStepsIntervel, batchSize, sampleData,
        learningRateModifier(learningRate), terminalController,
        coefficientController, trainReporter)

    # get path to save trained models
    NNModelFixedParameters = {
        'agentId': agentId,
        'maxRunningSteps': dataSetMaxRunningSteps,
        'numSimulations': dataSetNumSimulations
    }

    NNModelSaveDirectory = os.path.join(
        dirName, '..', '..', '..', '..', 'data', '2wolves1sheep',
        'trainWolvesTwoCenterControlMultiTrees', 'trainedResNNModels')
    if not os.path.exists(NNModelSaveDirectory):
        os.makedirs(NNModelSaveDirectory)
    NNModelSaveExtension = ''
    getNNModelSavePath = GetSavePath(NNModelSaveDirectory,
                                     NNModelSaveExtension,
                                     NNModelFixedParameters)

    # function to train models
    numOfTrainStepsIntervel = 6
    trainIntervelIndexes = list(range(numOfTrainStepsIntervel))
    trainModelForConditions = TrainModelForConditions(trainIntervelIndexes,
                                                      trainStepsIntervel,
                                                      trainData, sheepNNModel,
                                                      getTrainNN,
                                                      getNNModelSavePath)
    trainModelForConditions(manipulatedVariables)