Пример #1
0
def main():
    DEBUG = 1
    renderOn = 1

    if DEBUG:
        parametersForTrajectoryPath = {}
        startSampleIndex = 1
        endSampleIndex = 2
        parametersForTrajectoryPath['sampleIndex'] = (startSampleIndex, endSampleIndex)
        iterationIndex = 2
        numTrainStepEachIteration = 1
        numTrajectoriesPerIteration = 1

    else:
        parametersForTrajectoryPath = json.loads(sys.argv[1])
        startSampleIndex = int(sys.argv[2])
        endSampleIndex = int(sys.argv[3])
        parametersForTrajectoryPath['sampleIndex'] = (startSampleIndex, endSampleIndex)
        iterationIndex = int(parametersForTrajectoryPath['iterationIndex'])
        numTrainStepEachIteration = int(parametersForTrajectoryPath['numTrainStepEachIteration'])
        numTrajectoriesPerIteration = int(parametersForTrajectoryPath['numTrajectoriesPerIteration'])

    # check file exists or not
    dirName = os.path.dirname(__file__)
    trajectoriesSaveDirectory = os.path.join(dirName, '..', '..',  'data', 'iterTrain2wolves1sheepMADDPGEnv', 'trajectories')
    if not os.path.exists(trajectoriesSaveDirectory):
        os.makedirs(trajectoriesSaveDirectory)

    trajectorySaveExtension = '.pickle'

    maxRunningSteps = 50
    numSimulations = 250
    killzoneRadius = 50
    numTree = 2
    fixedParameters = {'maxRunningSteps': maxRunningSteps, 'numSimulations': numSimulations, 'killzoneRadius': killzoneRadius}
    generateTrajectorySavePath = GetSavePath(trajectoriesSaveDirectory, trajectorySaveExtension, fixedParameters)
    trajectorySavePath = generateTrajectorySavePath(parametersForTrajectoryPath)

    if not os.path.isfile(trajectorySavePath):
        # env MDP
        sheepsID = [0]
        wolvesID = [1, 2]
        blocksID = []

        numSheeps = len(sheepsID)
        numWolves = len(wolvesID)
        numBlocks = len(blocksID)

        numAgents = numWolves + numSheeps
        numEntities = numAgents + numBlocks

        sheepSize = 0.05
        wolfSize = 0.075
        blockSize = 0.2

        sheepMaxSpeed = 1.3 * 1
        wolfMaxSpeed = 1.0 * 1
        blockMaxSpeed = None

        entitiesSizeList = [sheepSize] * numSheeps + [wolfSize] * numWolves + [blockSize] * numBlocks
        entityMaxSpeedList = [sheepMaxSpeed] * numSheeps + [wolfMaxSpeed] * numWolves + [blockMaxSpeed] * numBlocks
        entitiesMovableList = [True] * numAgents + [False] * numBlocks
        massList = [1.0] * numEntities

        centralControlId = 1
        centerControlIndexList = [centralControlId]
        reshapeAction = UnpackCenterControlAction(centerControlIndexList)
        getCollisionForce = GetCollisionForce()
        applyActionForce = ApplyActionForce(wolvesID, sheepsID, entitiesMovableList)
        applyEnvironForce = ApplyEnvironForce(numEntities, entitiesMovableList, entitiesSizeList,
                                              getCollisionForce, getPosFromAgentState)
        integrateState = IntegrateState(numEntities, entitiesMovableList, massList,
                                        entityMaxSpeedList, getVelFromAgentState, getPosFromAgentState)
        interpolateState = TransitMultiAgentChasing(numEntities, reshapeAction, applyActionForce, applyEnvironForce, integrateState)

        numFramesToInterpolate = 1

        def transit(state, action):
            for frameIndex in range(numFramesToInterpolate):
                nextState = interpolateState(state, action)
                action = np.array([(0, 0)] * numAgents)
                state = nextState
            return nextState

        isTerminal = lambda state: False

        isCollision = IsCollision(getPosFromAgentState)
        collisonRewardWolf = 1
        punishForOutOfBound = PunishForOutOfBound()
        rewardWolf = RewardCentralControlPunishBond(wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBound, collisonRewardWolf)
        collisonRewardSheep = -1
        rewardSheep = RewardCentralControlPunishBond(sheepsID, wolvesID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBound, collisonRewardSheep)
        terminalRewardList = [collisonRewardSheep,collisonRewardWolf]
        rewardMultiAgents = [rewardSheep, rewardWolf]

        resetState = ResetMultiAgentChasing(numAgents, numBlocks)

        observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID, blocksID, getPosFromAgentState, getVelFromAgentState)
        observe = lambda state: [observeOneAgent(agentID)(state) for agentID in range(numAgents)]

    # policy
        actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7), (0, -10), (7, -7), (0, 0)]
        wolfActionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7), (0, -10), (7, -7), (0, 0)]

        preyPowerRatio = 0.5
        sheepActionSpace = list(map(tuple, np.array(actionSpace) * preyPowerRatio))

        predatorPowerRatio = 0.5
        wolfActionOneSpace = list(map(tuple, np.array(wolfActionSpace) * predatorPowerRatio))
        wolfActionTwoSpace = list(map(tuple, np.array(wolfActionSpace) * predatorPowerRatio))

        wolvesActionSpace = list(product(wolfActionOneSpace, wolfActionTwoSpace))

        actionSpaceList = [sheepActionSpace, wolvesActionSpace]

        # neural network init
        numStateSpace = 4 * numEntities
        numSheepActionSpace = len(sheepActionSpace)
        numWolvesActionSpace = len(wolvesActionSpace)

        regularizationFactor = 1e-4
        sharedWidths = [128]
        actionLayerWidths = [128]
        valueLayerWidths = [128]
        generateSheepModel = GenerateModel(numStateSpace, numSheepActionSpace, regularizationFactor)
        generateWolvesModel = GenerateModel(numStateSpace, numWolvesActionSpace, regularizationFactor)
        generateModelList = [generateSheepModel, generateWolvesModel]

        sheepDepth = 9
        wolfDepth = 9
        depthList = [sheepDepth, wolfDepth]
        resBlockSize = 2
        dropoutRate = 0.0
        initializationMethod = 'uniform'
        sheepId,wolvesId = [0,1]
        trainableAgentIds = [sheepId, wolvesId]

        multiAgentNNmodel = [generateModel(sharedWidths * depth, actionLayerWidths, valueLayerWidths, resBlockSize, initializationMethod, dropoutRate) for depth, generateModel in zip(depthList, generateModelList)]

        otherAgentApproximatePolicy = [lambda NNmodel, : ApproximatePolicy(NNmodel, sheepActionSpace), lambda NNmodel, : ApproximatePolicy(NNmodel, wolvesActionSpace)]
        # NNGuidedMCTS init
        cInit = 1
        cBase = 100
        calculateScore = ScoreChild(cInit, cBase)
        selectChild = SelectChild(calculateScore)

        getApproximatePolicy = [lambda NNmodel, : ApproximatePolicy(NNmodel, sheepActionSpace), lambda NNmodel, : ApproximatePolicy(NNmodel, wolvesActionSpace)]
        getApproximateValue = [lambda NNmodel: ApproximateValue(NNmodel), lambda NNmodel: ApproximateValue(NNmodel)]

        def getStateFromNode(node): return list(node.id.values())[0]

        chooseActionInMCTS = sampleFromDistribution

        composeMultiAgentTransitInSingleAgentMCTS = ComposeMultiAgentTransitInSingleAgentMCTS(chooseActionInMCTS)
        composeSingleAgentGuidedMCTS = ComposeSingleAgentGuidedMCTS(numTree, numSimulations, actionSpaceList, terminalRewardList, selectChild, isTerminal, transit, getStateFromNode, getApproximatePolicy, getApproximateValue, composeMultiAgentTransitInSingleAgentMCTS)
        prepareMultiAgentPolicy = PrepareMultiAgentPolicy(composeSingleAgentGuidedMCTS, otherAgentApproximatePolicy, trainableAgentIds)

        # load model
        NNModelSaveExtension = ''
        NNModelSaveDirectory = os.path.join(dirName, '..', '..',  'data', 'iterTrain2wolves1sheepMADDPGEnv', 'NNModelRes')
        if not os.path.exists(NNModelSaveDirectory):
            os.makedirs(NNModelSaveDirectory)

        generateNNModelSavePath = GetSavePath(NNModelSaveDirectory, NNModelSaveExtension, fixedParameters)

        for agentId in trainableAgentIds:
            modelPath = generateNNModelSavePath({'iterationIndex': iterationIndex - 1, 'agentId': agentId, 'numTrajectoriesPerIteration': numTrajectoriesPerIteration, 'numTrainStepEachIteration': numTrainStepEachIteration})
            restoredNNModel = restoreVariables(multiAgentNNmodel[agentId], modelPath)
            multiAgentNNmodel[agentId] = restoredNNModel

        multiAgentPolicy = prepareMultiAgentPolicy(multiAgentNNmodel)
        chooseActionList = [maxFromDistribution, maxFromDistribution]

        def sampleAction(state):
            actionDists = multiAgentPolicy(state)
            action = [chooseAction(actionDist) for actionDist, chooseAction in zip(actionDists, chooseActionList)]
            return action

        render = lambda state: None
        forwardOneStep = ForwardMultiAgentsOneStep(transit, rewardMultiAgents)
        sampleTrajectory = SampleTrajectoryWithRender(maxRunningSteps, isTerminal, resetState, forwardOneStep, render, renderOn)

        trajectories = [sampleTrajectory(sampleAction) for sampleIndex in range(startSampleIndex, endSampleIndex)]
        print([len(traj) for traj in trajectories])
        saveToPickle(trajectories, trajectorySavePath)
    def __call__(self, parameters):
        print(parameters)
        numWolves = parameters['numWolves']
        numSheep = parameters['numSheep']
        softParameterInInference = parameters['inferenceSoft']
        softParameterInPlanning = parameters['wolfPolicySoft']
        otherCompeteRate = parameters['otherCompeteRate']
        competeDetectionRate = parameters['competeDetectionRate']

        ## MDP Env
        # state is all multi agent state # action is all multi agent action
        xBoundary = [0, 600]
        yBoundary = [0, 600]
        numOfAgent = numWolves + numSheep
        reset = Reset(xBoundary, yBoundary, numOfAgent)

        possibleSheepIds = list(range(numSheep))
        possibleWolvesIds = list(range(numSheep, numSheep + numWolves))
        getSheepStatesFromAll = lambda state: np.array(state)[possibleSheepIds]
        getWolvesStatesFromAll = lambda state: np.array(state)[
            possibleWolvesIds]
        killzoneRadius = 50
        isTerminal = IsTerminal(killzoneRadius, getSheepStatesFromAll,
                                getWolvesStatesFromAll)

        stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity(
            xBoundary, yBoundary)
        interpolateOneFrame = InterpolateOneFrame(
            stayInBoundaryByReflectVelocity)
        numFramesToInterpolate = 3
        transit = TransitWithTerminalCheckOfInterpolation(
            numFramesToInterpolate, interpolateOneFrame, isTerminal)

        maxRunningSteps = 61
        timeCost = 1 / maxRunningSteps
        terminalBonus = 1
        rewardFunction = RewardFunctionByTerminal(timeCost, terminalBonus,
                                                  isTerminal)

        forwardOneStep = ForwardOneStep(transit, rewardFunction)
        sampleTrajectory = SampleTrajectory(maxRunningSteps, isTerminal, reset,
                                            forwardOneStep)

        ## MDP Policy
        # Sheep Part

        # Sheep Policy Function
        numSheepPolicyStateSpace = 2 * (numWolves + 1)
        sheepActionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0),
                            (-7, -7), (0, -10), (7, -7), (0, 0)]
        preyPowerRatio = 12
        sheepIndividualActionSpace = list(
            map(tuple,
                np.array(sheepActionSpace) * preyPowerRatio))
        numSheepActionSpace = len(sheepIndividualActionSpace)
        regularizationFactor = 1e-4
        generateSheepModel = GenerateModel(numSheepPolicyStateSpace,
                                           numSheepActionSpace,
                                           regularizationFactor)
        sharedWidths = [128]
        actionLayerWidths = [128]
        valueLayerWidths = [128]
        sheepNNDepth = 9
        resBlockSize = 2
        dropoutRate = 0.0
        initializationMethod = 'uniform'
        initSheepModel = generateSheepModel(sharedWidths * sheepNNDepth,
                                            actionLayerWidths,
                                            valueLayerWidths, resBlockSize,
                                            initializationMethod, dropoutRate)
        sheepModelPath = os.path.join(
            '..', '..', 'data', 'preTrainModel',
            'agentId=0.' + str(numWolves) +
            '_depth=9_learningRate=0.0001_maxRunningSteps=50_miniBatchSize=256_numSimulations=110_trainSteps=50000'
        )
        sheepNNModel = restoreVariables(initSheepModel, sheepModelPath)
        sheepPolicy = ApproximatePolicy(sheepNNModel,
                                        sheepIndividualActionSpace)

        # Sheep Generate Action
        softParameterInPlanningForSheep = 2.0
        softPolicyInPlanningForSheep = SoftDistribution(
            softParameterInPlanningForSheep)
        softenSheepPolicy = lambda relativeAgentsStatesForSheepPolicy: softPolicyInPlanningForSheep(
            sheepPolicy(relativeAgentsStatesForSheepPolicy))

        sheepChooseActionMethod = sampleFromDistribution
        sheepSampleActions = [
            SampleActionOnFixedIntention(selfId, possibleWolvesIds,
                                         softenSheepPolicy,
                                         sheepChooseActionMethod)
            for selfId in possibleSheepIds
        ]

        # Wolves Part

        # Percept Action For Inference
        perceptAction = lambda action: action

        # Policy Likelihood function: Wolf Centrol Control NN Policy Given Intention
        numWolvesStateSpaces = [
            2 * (numInWe + 1) for numInWe in range(2, numWolves + 1)
        ]
        actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7),
                       (0, -10), (7, -7)]
        predatorPowerRatio = 8
        wolfIndividualActionSpace = list(
            map(tuple,
                np.array(actionSpace) * predatorPowerRatio))
        wolvesCentralControlActionSpaces = [
            list(it.product(wolfIndividualActionSpace, repeat=numInWe))
            for numInWe in range(2, numWolves + 1)
        ]
        numWolvesCentralControlActionSpaces = [
            len(wolvesCentralControlActionSpace) for
            wolvesCentralControlActionSpace in wolvesCentralControlActionSpaces
        ]
        regularizationFactor = 1e-4
        generateWolvesCentralControlModels = [
            GenerateModel(numStateSpace, numActionSpace, regularizationFactor)
            for numStateSpace, numActionSpace in zip(
                numWolvesStateSpaces, numWolvesCentralControlActionSpaces)
        ]
        sharedWidths = [128]
        actionLayerWidths = [128]
        valueLayerWidths = [128]
        wolfNNDepth = 9
        resBlockSize = 2
        dropoutRate = 0.0
        initializationMethod = 'uniform'
        initWolvesCentralControlModels = [
            generateWolvesCentralControlModel(sharedWidths * wolfNNDepth,
                                              actionLayerWidths,
                                              valueLayerWidths, resBlockSize,
                                              initializationMethod,
                                              dropoutRate)
            for generateWolvesCentralControlModel in
            generateWolvesCentralControlModels
        ]
        NNNumSimulations = 250
        wolvesModelPaths = [
            os.path.join(
                '..', '..', 'data', 'preTrainModel',
                'agentId=' + str(8 * np.sum([10**_ for _ in range(numInWe)])) +
                '_depth=9_learningRate=0.0001_maxRunningSteps=50_miniBatchSize=256_numSimulations='
                + str(NNNumSimulations) + '_trainSteps=50000')
            for numInWe in range(2, numWolves + 1)
        ]
        print(wolvesModelPaths)
        wolvesCentralControlNNModels = [
            restoreVariables(initWolvesCentralControlModel, wolvesModelPath)
            for initWolvesCentralControlModel, wolvesModelPath in zip(
                initWolvesCentralControlModels, wolvesModelPaths)
        ]
        wolvesCentralControlPolicies = [
            ApproximatePolicy(NNModel, actionSpace) for NNModel, actionSpace in
            zip(wolvesCentralControlNNModels, wolvesCentralControlActionSpaces)
        ]

        centralControlPolicyListBasedOnNumAgentsInWe = wolvesCentralControlPolicies  # 0 for two agents in We, 1 for three agents...
        softPolicyInInference = SoftDistribution(softParameterInInference)
        policyForCommittedAgentsInInference = PolicyForCommittedAgent(
            centralControlPolicyListBasedOnNumAgentsInWe,
            softPolicyInInference, getStateOrActionThirdPersonPerspective)
        concernedAgentsIds = [2]
        calCommittedAgentsPolicyLikelihood = CalCommittedAgentsPolicyLikelihood(
            concernedAgentsIds, policyForCommittedAgentsInInference)

        getGoalStateForIndividualHeatseeking = lambda statesRelative: np.array(
            statesRelative)[0]
        getSelfStateForIndividualHeatseeking = lambda statesRelative: np.array(
            statesRelative)[1]
        heatseekingPrecesion = 1.83
        heatSeekingDiscreteStochasticPolicy = HeatSeekingDiscreteStochasticPolicy(
            heatseekingPrecesion, wolfIndividualActionSpace,
            getSelfStateForIndividualHeatseeking,
            getGoalStateForIndividualHeatseeking)
        policyForUncommittedAgentsInInference = PolicyForUncommittedAgent(
            possibleWolvesIds, heatSeekingDiscreteStochasticPolicy,
            softPolicyInInference, getStateOrActionFirstPersonPerspective)
        calUncommittedAgentsPolicyLikelihood = CalUncommittedAgentsPolicyLikelihood(
            possibleWolvesIds, concernedAgentsIds,
            policyForUncommittedAgentsInInference)

        # Joint Likelihood
        calJointLikelihood = lambda intention, state, perceivedAction: calCommittedAgentsPolicyLikelihood(intention, state, perceivedAction) * \
                calUncommittedAgentsPolicyLikelihood(intention, state, perceivedAction)

        wolvesValueListBasedOnNumAgentsInWe = [
            ApproximateValue(NNModel)
            for NNModel in wolvesCentralControlNNModels
        ]
        calIntentionValueGivenState = CalIntentionValueGivenState(
            wolvesValueListBasedOnNumAgentsInWe)
        softParamterForValue = 0.01
        softValueToBuildDistribution = SoftMax(softParamterForValue)
        adjustIntentionPriorGivenValueOfState = AdjustIntentionPriorGivenValueOfState(
            calIntentionValueGivenState, softValueToBuildDistribution)

        # Sample and Save Trajectory
        trajectoriesWithIntentionDists = []
        for trajectoryId in range(self.numTrajectories):

            # Intention Prior For inference
            otherWolfPossibleIntentionSpaces = {0: [(0, (1, 2))], 1: [(0, ())]}
            otherIntentionType = np.random.choice(
                [1, 0], p=[otherCompeteRate, 1 - otherCompeteRate])
            otherWolfIntentionSpace = otherWolfPossibleIntentionSpaces[
                otherIntentionType]
            selfPossibleIntentionSpaces = {
                0: [(0, (1, 2))],
                0.5: [(0, (1, 2)), (0, ())],
                1: [(0, ())]
            }
            selfWolfIntentionSpace = selfPossibleIntentionSpaces[
                competeDetectionRate]
            intentionSpacesForAllWolves = [
                selfWolfIntentionSpace, otherWolfIntentionSpace
            ]
            wolvesIntentionPriors = [{
                tuple(intention): 1 / len(allPossibleIntentionsOneWolf)
                for intention in allPossibleIntentionsOneWolf
            } for allPossibleIntentionsOneWolf in intentionSpacesForAllWolves]
            # Infer and update Intention
            variablesForAllWolves = [[
                intentionSpace
            ] for intentionSpace in intentionSpacesForAllWolves]
            jointHypothesisSpaces = [
                pd.MultiIndex.from_product(variables, names=['intention'])
                for variables in variablesForAllWolves
            ]
            concernedHypothesisVariable = ['intention']
            priorDecayRate = 1
            softPrior = SoftDistribution(priorDecayRate)
            inferIntentionOneStepList = [
                InferOneStep(jointHypothesisSpace, concernedHypothesisVariable,
                             calJointLikelihood, softPrior)
                for jointHypothesisSpace in jointHypothesisSpaces
            ]

            chooseIntention = sampleFromDistribution
            valuePriorEndTime = -100
            updateIntentions = [
                UpdateIntention(intentionPrior, valuePriorEndTime,
                                adjustIntentionPriorGivenValueOfState,
                                perceptAction, inferIntentionOneStep,
                                chooseIntention)
                for intentionPrior, inferIntentionOneStep in zip(
                    wolvesIntentionPriors, inferIntentionOneStepList)
            ]

            # reset intention and adjuste intention prior attributes tools for multiple trajectory
            intentionResetAttributes = [
                'timeStep', 'lastState', 'lastAction', 'intentionPrior',
                'formerIntentionPriors'
            ]
            intentionResetAttributeValues = [
                dict(
                    zip(intentionResetAttributes,
                        [0, None, None, intentionPrior, [intentionPrior]]))
                for intentionPrior in wolvesIntentionPriors
            ]
            resetIntentions = ResetObjects(intentionResetAttributeValues,
                                           updateIntentions)
            returnAttributes = ['formerIntentionPriors']
            getIntentionDistributions = GetObjectsValuesOfAttributes(
                returnAttributes, updateIntentions)
            attributesToRecord = ['lastAction']
            recordActionForUpdateIntention = RecordValuesForObjects(
                attributesToRecord, updateIntentions)

            # Wovels Generate Action
            softPolicyInPlanning = SoftDistribution(softParameterInPlanning)
            policyForCommittedAgentInPlanning = PolicyForCommittedAgent(
                centralControlPolicyListBasedOnNumAgentsInWe,
                softPolicyInPlanning, getStateOrActionThirdPersonPerspective)

            policyForUncommittedAgentInPlanning = PolicyForUncommittedAgent(
                possibleWolvesIds, heatSeekingDiscreteStochasticPolicy,
                softPolicyInPlanning, getStateOrActionFirstPersonPerspective)

            wolfChooseActionMethod = sampleFromDistribution
            getSelfActionThirdPersonPerspective = lambda weIds, selfId: list(
                weIds).index(selfId)
            chooseCommittedAction = GetActionFromJointActionDistribution(
                wolfChooseActionMethod, getSelfActionThirdPersonPerspective)
            chooseUncommittedAction = sampleFromDistribution
            wolvesSampleIndividualActionGivenIntentionList = [
                SampleIndividualActionGivenIntention(
                    selfId, policyForCommittedAgentInPlanning,
                    policyForUncommittedAgentInPlanning, chooseCommittedAction,
                    chooseUncommittedAction) for selfId in possibleWolvesIds
            ]

            wolvesSampleActions = [
                SampleActionOnChangableIntention(
                    updateIntention,
                    wolvesSampleIndividualActionGivenIntention)
                for updateIntention, wolvesSampleIndividualActionGivenIntention
                in zip(updateIntentions,
                       wolvesSampleIndividualActionGivenIntentionList)
            ]
            allIndividualSampleActions = sheepSampleActions + wolvesSampleActions
            sampleActionMultiAgent = SampleActionMultiagent(
                allIndividualSampleActions, recordActionForUpdateIntention)
            trajectory = sampleTrajectory(sampleActionMultiAgent)
            intentionDistributions = getIntentionDistributions()
            trajectoryWithIntentionDists = [
                tuple(list(SASRPair) + list(intentionDist)) for SASRPair,
                intentionDist in zip(trajectory, intentionDistributions)
            ]
            trajectoriesWithIntentionDists.append(
                tuple(trajectoryWithIntentionDists))
            resetIntentions()
            #print(intentionDistributions[-1], otherCompeteRate)
        trajectoryFixedParameters = {
            'sheepPolicySoft': softParameterInPlanningForSheep,
            'wolfPolicySoft': softParameterInPlanning,
            'maxRunningSteps': maxRunningSteps,
            'competePolicy': 'heatseeking',
            'NNNumSimulations': NNNumSimulations,
            'heatseekingPrecesion': heatseekingPrecesion
        }
        self.saveTrajectoryByParameters(trajectoriesWithIntentionDists,
                                        trajectoryFixedParameters, parameters)
        print(np.mean([len(tra) for tra in trajectoriesWithIntentionDists]))
Пример #3
0
def main():
    numWolves = 2
    numSheep = 1
    numWolvesStateSpaces = [
        2 * (numInWe + 1) for numInWe in range(2, numWolves + 1)
    ]
    actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7),
                   (0, -10), (7, -7)]
    #actionSpace = [(10, 0), (0, 10), (-10, 0), (0, -10)]
    predatorPowerRatio = 8
    wolfIndividualActionSpace = list(
        map(tuple,
            np.array(actionSpace) * predatorPowerRatio))
    wolvesCentralControlActionSpaces = [
        list(it.product(wolfIndividualActionSpace, repeat=numInWe))
        for numInWe in range(2, numWolves + 1)
    ]
    numWolvesCentralControlActionSpaces = [
        len(wolvesCentralControlActionSpace)
        for wolvesCentralControlActionSpace in wolvesCentralControlActionSpaces
    ]
    regularizationFactor = 1e-4
    generateWolvesCentralControlModels = [
        GenerateModel(numStateSpace, numActionSpace, regularizationFactor)
        for numStateSpace, numActionSpace in zip(
            numWolvesStateSpaces, numWolvesCentralControlActionSpaces)
    ]
    sharedWidths = [128]
    actionLayerWidths = [128]
    valueLayerWidths = [128]
    wolfNNDepth = 9
    resBlockSize = 2
    dropoutRate = 0.0
    initializationMethod = 'uniform'
    initWolvesCentralControlModels = [
        generateWolvesCentralControlModel(sharedWidths * wolfNNDepth,
                                          actionLayerWidths, valueLayerWidths,
                                          resBlockSize, initializationMethod,
                                          dropoutRate) for
        generateWolvesCentralControlModel in generateWolvesCentralControlModels
    ]
    NNNumSimulations = 250
    wolvesModelPaths = [
        os.path.join(
            '..', '..', 'data', 'preTrainModel', 'agentId=' +
            str(len(actionSpace) * np.sum([10**_ for _ in range(numInWe)])) +
            '_depth=9_learningRate=0.0001_maxRunningSteps=50_miniBatchSize=256_numSimulations='
            + str(NNNumSimulations) + '_trainSteps=50000')
        for numInWe in range(2, numWolves + 1)
    ]
    print(wolvesModelPaths)
    wolvesCentralControlNNModels = [
        restoreVariables(initWolvesCentralControlModel, wolvesModelPath)
        for initWolvesCentralControlModel, wolvesModelPath in zip(
            initWolvesCentralControlModels, wolvesModelPaths)
    ]
    wolvesValueFunctionListBasedOnNumAgentsInWe = [
        ApproximateValue(NNModel) for NNModel in wolvesCentralControlNNModels
    ]
    valueFunction = wolvesValueFunctionListBasedOnNumAgentsInWe[numWolves - 2]

    xBoundary = [0, 600]
    yBoundary = [0, 600]
    reset = Reset(xBoundary, yBoundary, numWolves)

    numGridX = 120
    numGridY = 120
    xInterval = (xBoundary[1] - xBoundary[0]) / numGridX
    yInterval = (yBoundary[1] - yBoundary[0]) / numGridY
    sheepXPosition = [(gridIndex + 0.5) * xInterval
                      for gridIndex in range(numGridX)]
    sheepYPosition = [(gridIndex + 0.5) * yInterval
                      for gridIndex in range(numGridY)]

    wolvesState = reset()
    wolvesState = np.array([[300, 350], [550, 400]])
    print(wolvesState)
    levelValues = [sheepXPosition, sheepYPosition]
    levelNames = ["sheepXPosition", "sheepYPosition"]

    modelIndex = pd.MultiIndex.from_product(levelValues, names=levelNames)

    toSplitFrame = pd.DataFrame(index=modelIndex)

    evaluate = lambda df: evaluateValue(df, valueFunction, wolvesState)
    valueResultDf = toSplitFrame.groupby(levelNames).apply(evaluate)

    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    drawHeatmapPlot(valueResultDf, ax)

    fig.savefig('valueMap2', dpi=300)
    plt.show()