Python RewardSheep примеры использования

Язык программирования: Python

Пространство имен/Пакет: environment.chasingEnv.multiAgentEnv

Класс/Тип: RewardSheep

Примеров на hotexamples.com: 10

Python RewardSheep - 10 примеров найдено. Это лучшие примеры Python кода для environment.chasingEnv.multiAgentEnv.RewardSheep, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

RewardSheep(10)

Основные методы

RewardSheep (10)

Пример #1

Показать файл

Файл: evaluateVariousCollisionMujocoEnv.py Проект: Frostwoods/ChasingWithRopeMujocoEnv

def simuliateOneParameter(parameterDict, evalNum, randomSeed, dt):
    mujocoVisualize = False
    demoVisualize = False

    wolfSize = 0.075
    sheepSize = 0.05
    blockSize = 0.2

    wolfColor = np.array([0.85, 0.35, 0.35])
    sheepColor = np.array([0.35, 0.85, 0.35])
    blockColor = np.array([0.25, 0.25, 0.25])

    wolvesID = [0]
    sheepsID = [1]
    blocksID = [2]

    numWolves = len(wolvesID)
    numSheeps = len(sheepsID)
    numBlocks = len(blocksID)

    sheepMaxSpeed = 1.3
    wolfMaxSpeed = 1.0
    blockMaxSpeed = None

    agentsMaxSpeedList = [wolfMaxSpeed] * numWolves + [sheepMaxSpeed
                                                       ] * numSheeps

    numAgents = numWolves + numSheeps
    numEntities = numAgents + numBlocks

    entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheeps + [
        blockSize
    ] * numBlocks
    entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [
        sheepMaxSpeed
    ] * numSheeps + [blockMaxSpeed] * numBlocks
    entitiesMovableList = [True] * numAgents + [False] * numBlocks
    massList = [1.0] * numEntities

    isCollision = IsCollision(getPosFromAgentState)
    rewardWolf = RewardWolf(wolvesID, sheepsID, entitiesSizeList, isCollision)
    punishForOutOfBound = PunishForOutOfBound()
    rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList,
                              getPosFromAgentState, isCollision,
                              punishForOutOfBound)

    rewardFunc = lambda state, action, nextState: \
        list(rewardWolf(state, action, nextState)) + list(rewardSheep(state, action, nextState))

    makePropertyList = MakePropertyList(transferNumberListToStr)

    #changeCollisionReleventParameter
    dmin = parameterDict['dmin']
    dmax = parameterDict['dmax']
    width = parameterDict['width']
    midpoint = parameterDict['midpoint']
    power = parameterDict['power']
    timeconst = parameterDict['timeconst']
    dampratio = parameterDict['dampratio']

    geomIds = [1, 2, 3]
    keyNameList = ['@solimp', '@solref']
    valueList = [[[dmin, dmax, width, midpoint, power], [timeconst, dampratio]]
                 ] * len(geomIds)
    collisionParameter = makePropertyList(geomIds, keyNameList, valueList)

    #changeSize
    # geomIds=[1,2]
    # keyNameList=['@size']
    # valueList=[[[0.075,0.075]],[[0.1,0.1]]]
    # sizeParameter=makePropertyList(geomIds,keyNameList,valueList)

    #load env xml and change some geoms' property
    physicsDynamicsPath = os.path.join(
        dirName, '..', '..', 'environment', 'mujocoEnv',
        'variousCollision_numBlocks=1_numSheeps=1_numWolves=1dt={}.xml'.format(
            dt))

    with open(physicsDynamicsPath) as f:
        xml_string = f.read()
    envXmlDict = xmltodict.parse(xml_string.strip())

    envXmlPropertyDictList = [collisionParameter]
    changeEnvXmlPropertFuntionyList = [changeGeomProperty]
    for propertyDict, changeXmlProperty in zip(
            envXmlPropertyDictList, changeEnvXmlPropertFuntionyList):
        envXmlDict = changeXmlProperty(envXmlDict, propertyDict)

    envXml = xmltodict.unparse(envXmlDict)
    # print(envXmlDict['mujoco']['worldbody']['body'][0])
    physicsModel = mujoco.load_model_from_xml(envXml)
    physicsSimulation = mujoco.MjSim(physicsModel)

    # MDP function
    qPosInit = [0, 0] * numAgents
    qVelInit = [0, 0] * numAgents

    qVelInitNoise = 0
    qPosInitNoise = 1

    # reset=ResetUniformWithoutXPos(physicsSimulation, qPosInit, qVelInit, numAgents, numBlocks,qPosInitNoise, qVelInitNoise)
    # fixReset=ResetFixWithoutXPos(physicsSimulation, qPosInit, qVelInit, numAgents,numBlocks)
    # blocksState=[[0,-0.8,0,0]]
    # reset=lambda :fixReset([-0.5,0.8,-0.5,0,0.5,0.8],[0,0,0,0,0,0],blocksState)

    isTerminal = lambda state: False
    numSimulationFrames = int(0.1 / dt)
    print(numSimulationFrames)
    # dt=0.01
    damping = 2.5
    reshapeAction = lambda action: action
    # transit = TransitionFunctionWithoutXPos(physicsSimulation,numAgents , numSimulationFrames,damping*dt/numSimulationFrames,agentsMaxSpeedList,mujocoVisualize,reshapeAction)

    maxRunningSteps = 10

    # sampleTrajectory = SampleTrajectory(maxRunningSteps, transit, isTerminal, rewardFunc, reset)
    # observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID, blocksID, getPosFromAgentState, getVelFromAgentState)
    # observe = lambda state: [observeOneAgent(agentID)(state) for agentID in range(numAgents)]
    # initObsForParams = observe(reset())
    # obsShape = [initObsForParams[obsID].shape for obsID in range(len(initObsForParams))]

    class ImpulsePolicy(object):
        """docstring for c"""
        def __init__(self, initAction):
            self.initAction = initAction

        def __call__(self, state, timeStep):
            action = [[0, 0], [0, 0]]
            if timeStep == 0:
                action = self.initAction
            return action

    worldDim = 2
    trajList = []
    startTime = time.time()
    for i in range(evalNum):

        np.random.seed(randomSeed + i)
        initSpeedDirection = np.random.uniform(-np.pi / 2, np.pi / 2, 1)[0]
        initSpeed = np.random.uniform(0, 1, 1)[0]
        initActionDirection = np.random.uniform(-np.pi / 2, np.pi / 2, 1)[0]
        initForce = np.random.uniform(0, 5, 1)[0]
        # print(initSpeedDirection,initSpeed,initActionDirection,initForce)

        fixReset = ResetFixWithoutXPos(physicsSimulation, qPosInit, qVelInit,
                                       numAgents, numBlocks)
        blocksState = [[0, 0, 0, 0]]  #posX posY velX velY
        reset = lambda: fixReset([-0.28, 0, 8, 8], [
            initSpeed * np.cos(initSpeedDirection), initSpeed * np.sin(
                initSpeedDirection), 0, 0
        ], blocksState)

        transit = TransitionFunctionWithoutXPos(
            physicsSimulation, numAgents, numSimulationFrames,
            damping * dt * numSimulationFrames, agentsMaxSpeedList,
            mujocoVisualize, isTerminal, reshapeAction)

        initAction = [[
            initForce * np.cos(initActionDirection),
            initForce * np.sin(initActionDirection)
        ], [0, 0]]
        impulsePolicy = ImpulsePolicy(initAction)

        sampleTrajectory = SampleTrajectory(maxRunningSteps, transit,
                                            isTerminal, rewardFunc, reset)

        traj = sampleTrajectory(impulsePolicy)
        # print('traj',traj[0])
        trajList.append(traj)

    # saveTraj
    # print(trajList)
    saveTraj = True
    if saveTraj:
        # trajSavePath = os.path.join(dirName,'traj', 'evaluateCollision', 'CollisionMoveTransitDamplingCylinder.pickle')
        trajectorySaveExtension = '.pickle'
        fixedParameters = {
            'isMujoco': 1,
            'isCylinder': 1,
            'randomSeed': randomSeed,
            'evalNum': evalNum
        }
        trajectoriesSaveDirectory = trajSavePath = os.path.join(
            dirName, '..', 'trajectory', 'variousCollsiondt={}'.format(dt))
        generateTrajectorySavePath = GetSavePath(trajectoriesSaveDirectory,
                                                 trajectorySaveExtension,
                                                 fixedParameters)
        trajectorySavePath = generateTrajectorySavePath(parameterDict)
        saveToPickle(trajList, trajectorySavePath)

    # visualize

    if demoVisualize:
        entitiesColorList = [wolfColor] * numWolves + [
            sheepColor
        ] * numSheeps + [blockColor] * numBlocks
        render = Render(entitiesSizeList, entitiesColorList, numAgents,
                        getPosFromAgentState)
        trajToRender = np.concatenate(trajList)
        render(trajToRender)
    endTime = time.time()
    print("Time taken {} seconds to generate {} trajectories".format(
        (endTime - startTime), evalNum))

Пример #2

Показать файл

Файл: evluateCollisionMujocoEnv.py Проект: Frostwoods/ChasingWithRopeMujocoEnv

def simuliateOneParameter(parameterDict):


    wolfSize = 0.075
    sheepSize = 0.05
    blockSize = 0.2


    wolfColor = np.array([0.85, 0.35, 0.35])
    sheepColor = np.array([0.35, 0.85, 0.35])
    blockColor = np.array([0.25, 0.25, 0.25])

    wolvesID = [0,1]
    sheepsID = [2]
    blocksID = [3]

    numWolves = len(wolvesID)
    numSheeps = len(sheepsID)
    numBlocks = len(blocksID)

    sheepMaxSpeed = 1.3
    wolfMaxSpeed =1.0
    blockMaxSpeed = None



    agentsMaxSpeedList = [wolfMaxSpeed]* numWolves + [sheepMaxSpeed] * numSheeps

    numAgents = numWolves + numSheeps
    numEntities = numAgents + numBlocks

    entitiesSizeList = [wolfSize]* numWolves + [sheepSize] * numSheeps + [blockSize]* numBlocks
    entityMaxSpeedList = [wolfMaxSpeed]* numWolves + [sheepMaxSpeed] * numSheeps + [blockMaxSpeed]* numBlocks
    entitiesMovableList = [True]* numAgents + [False] * numBlocks
    massList = [1.0] * numEntities

    isCollision = IsCollision(getPosFromAgentState)
    rewardWolf = RewardWolf(wolvesID, sheepsID, entitiesSizeList, isCollision)
    punishForOutOfBound = PunishForOutOfBound()
    rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBound)

    rewardFunc = lambda state, action, nextState: \
        list(rewardWolf(state, action, nextState)) + list(rewardSheep(state, action, nextState))

    makePropertyList=MakePropertyList(transferNumberListToStr)

    #changeCollisionReleventParameter
    dmin=parameterDict['dmin']
    dmax=parameterDict['dmax']
    width=parameterDict['width']
    midpoint=parameterDict['midpoint']
    power=parameterDict['power']
    timeconst=parameterDict['timeconst']
    dampratio=parameterDict['dampratio']

    geomIds=[1,2]
    keyNameList=['@solimp','@solref']
    valueList=[[[dmin,dmax,width,midpoint,power],[timeconst,dampratio]]]*len(geomIds)
    collisionParameter=makePropertyList(geomIds,keyNameList,valueList)

#changeSize
    # geomIds=[1,2]
    # keyNameList=['@size']
    # valueList=[[[0.075,0.075]],[[0.1,0.1]]]
    # sizeParameter=makePropertyList(geomIds,keyNameList,valueList)

#load env xml and change some geoms' property
    # physicsDynamicsPath=os.path.join(dirName,'multiAgentcollision.xml')
    physicsDynamicsPath=os.path.join(dirName,'..','..','environment','mujocoEnv','multiAgentcollision.xml')
    with open(physicsDynamicsPath) as f:
        xml_string = f.read()
    envXmlDict = xmltodict.parse(xml_string.strip())
    print(envXmlDict)
    envXmlPropertyDictList=[collisionParameter]
    changeEnvXmlPropertFuntionyList=[changeGeomProperty]
    for propertyDict,changeXmlProperty in zip(envXmlPropertyDictList,changeEnvXmlPropertFuntionyList):
        envXmlDict=changeXmlProperty(envXmlDict,propertyDict)

    envXml=xmltodict.unparse(envXmlDict)

    physicsModel = mujoco.load_model_from_xml(envXml)
    physicsSimulation = mujoco.MjSim(physicsModel)


    # MDP function
    qPosInit = [0, 0]*numAgents
    qVelInit = [0, 0]*numAgents

    qVelInitNoise = 0
    qPosInitNoise = 1

    # reset=ResetUniformWithoutXPos(physicsSimulation, qPosInit, qVelInit, numAgents, numBlocks,qPosInitNoise, qVelInitNoise)

    fixReset=ResetFixWithoutXPos(physicsSimulation, qPosInit, qVelInit, numAgents,numBlocks)
    blocksState=[[0,0,0,0]]
    reset=lambda :fixReset([-0.5,0.8,-0.5,0,0.5,0.8],[0,0,0,0,0,0],blocksState)


    isTerminal = lambda state: False
    numSimulationFrames = 1
    visualize=False
    # physicsViewer=None
    dt=0.1
    damping=2.5
    transit = TransitionFunctionWithoutXPos(physicsSimulation,numAgents , numSimulationFrames,damping*dt/numSimulationFrames,agentsMaxSpeedList,visualize,isTerminal)


    maxRunningSteps = 100
    sampleTrajectory = SampleTrajectory(maxRunningSteps, transit, isTerminal, rewardFunc, reset)
    observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID, blocksID, getPosFromAgentState, getVelFromAgentState)
    observe = lambda state: [observeOneAgent(agentID)(state) for agentID in range(numAgents)]
    initObsForParams = observe(reset())
    obsShape = [initObsForParams[obsID].shape for obsID in range(len(initObsForParams))]

    worldDim = 2
    evalNum=1

    trajectorySaveExtension = '.pickle'
    fixedParameters = {'isMujoco': 1,'isCylinder':1,'evalNum':evalNum}
    trajectoriesSaveDirectory=trajSavePath = os.path.join(dirName,'..','trajectory')
    generateTrajectorySavePath = GetSavePath(trajectoriesSaveDirectory, trajectorySaveExtension, fixedParameters)
    trajectorySavePath = generateTrajectorySavePath(parameterDict)


    if not os.path.isfile(trajectorySavePath):
        trajList =list()

        for i in range(evalNum):

            # policy =lambda state: [[-3,0] for agent in range(numAgents)]
            np.random.seed(i)
            # policy =lambda state: [np.random.uniform(-5,5,2) for agent in range(numAgents)]sss
            # policy =lambda state: [[0,1] for agent in range(numAgents)]
            policy =lambda state: [[1,0] ]
            # policy =lambda state: [[np.random.uniform(0,1,1),0] ,[np.random.uniform(-1,0,1),0] ]
            traj = sampleTrajectory(policy)
            # print(i,'traj',[state[1] for state in traj[:2]])
            # print(traj)
            trajList.append( traj)

        # saveTraj
        saveTraj = True
        if saveTraj:
            # trajSavePath = os.path.join(dirName,'traj', 'evaluateCollision', 'CollisionMoveTransitDamplingCylinder.pickle')

            saveToPickle(trajList, trajectorySavePath)

        # visualize

        # physicsViewer.render()
        visualize = True
        if visualize:
            entitiesColorList = [wolfColor] * numWolves + [sheepColor] * numSheeps + [blockColor] * numBlocks
            render = Render(entitiesSizeList, entitiesColorList, numAgents, getPosFromAgentState)
            render(trajList)

Пример #3

Показать файл

Файл: generateTraj_maddpgMujocoWithRope.py Проект: Frostwoods/ChasingWithRopeMujocoEnv

def generateSingleCondition(condition):
    debug = 0
    if debug:


        damping=2.0
        frictionloss=0.0
        masterForce=1.0

        numWolves = 1
        numSheeps = 1
        numMasters = 1
        maxTimeStep = 25

        saveTraj=False
        saveImage=True
        visualizeMujoco=False
        visualizeTraj = True
        makeVideo=True
    else:

        # print(sys.argv)
        # condition = json.loads(sys.argv[1])
        damping = float(condition['damping'])
        frictionloss = float(condition['frictionloss'])
        masterForce = float(condition['masterForce'])

        numWolves = 1
        numSheeps = 1
        numMasters = 1
        maxTimeStep = 25

        saveTraj=False
        saveImage=True
        visualizeMujoco=False
        visualizeTraj = True
        makeVideo=False

    print("maddpg: , saveTraj: {}, visualize: {},damping; {},frictionloss: {}".format( str(saveTraj), str(visualizeMujoco),damping,frictionloss))


    numAgents = numWolves + numSheeps+numMasters
    numEntities = numAgents + numMasters
    wolvesID = [0]
    sheepsID = [1]
    masterID = [2]

    wolfSize = 0.075
    sheepSize = 0.05
    masterSize = 0.075
    entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheeps + [masterSize] * numMasters

    wolfMaxSpeed = 1.0
    blockMaxSpeed = None


    entitiesMovableList = [True] * numAgents + [False] * numMasters
    massList = [1.0] * numEntities

    isCollision = IsCollision(getPosFromAgentState)
    punishForOutOfBound = PunishForOutOfBound()
    rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState, isCollision,punishForOutOfBound)


    rewardWolf = RewardWolf(wolvesID, sheepsID, entitiesSizeList, isCollision)
    rewardMaster= lambda state, action, nextState: [-reward  for reward in rewardWolf(state, action, nextState)]
    rewardFunc = lambda state, action, nextState: \
        list(rewardWolf(state, action, nextState)) + list(rewardSheep(state, action, nextState))+list(rewardMaster(state, action, nextState))

    dirName = os.path.dirname(__file__)
    # physicsDynamicsPath=os.path.join(dirName,'..','..','environment','mujocoEnv','rope','leased.xml')

    makePropertyList=MakePropertyList(transferNumberListToStr)

    geomIds=[1,2,3]
    keyNameList=[0,1]
    valueList=[[damping,damping]]*len(geomIds)
    dampngParameter=makePropertyList(geomIds,keyNameList,valueList)

    changeJointDampingProperty=lambda envDict,geomPropertyDict:changeJointProperty(envDict,geomPropertyDict,'@damping')

    geomIds=[1,2,3]
    keyNameList=[0,1]
    valueList=[[frictionloss,frictionloss]]*len(geomIds)
    frictionlossParameter=makePropertyList(geomIds,keyNameList,valueList)
    changeJointFrictionlossProperty=lambda envDict,geomPropertyDict:changeJointProperty(envDict,geomPropertyDict,'@frictionloss')


    physicsDynamicsPath=os.path.join(dirName,'..','..','environment','mujocoEnv','rope','leasedNew.xml')
    # physicsDynamicsPath=os.path.join(dirName,'..','..','environment','mujocoEnv','rope','leased.xml')


    with open(physicsDynamicsPath) as f:
        xml_string = f.read()
    envXmlDict = xmltodict.parse(xml_string.strip())




    envXmlDict = xmltodict.parse(xml_string.strip())
    envXmlPropertyDictList=[dampngParameter,frictionlossParameter]
    changeEnvXmlPropertFuntionyList=[changeJointDampingProperty,changeJointFrictionlossProperty]
    for propertyDict,changeXmlProperty in zip(envXmlPropertyDictList,changeEnvXmlPropertFuntionyList):
        envXmlDict=changeXmlProperty(envXmlDict,propertyDict)





    envXml=xmltodict.unparse(envXmlDict)

    physicsModel = mujoco.load_model_from_xml(envXml)
    physicsSimulation = mujoco.MjSim(physicsModel)
    # print(physicsSimulation.model.body_pos)

    # print(dir(physicsSimulation.model))
    # print(dir(physicsSimulation.data),physicsSimulation.dataphysicsSimulation.data)

    # print(physicsSimulation.data.qpos,dir(physicsSimulation.data.qpos))
    # print(physicsSimulation.data.qpos,dir(physicsSimulation.data.qpos))
    qPosInit = (0, ) * 24
    qVelInit = (0, ) * 24
    qPosInitNoise = 0.4
    qVelInitNoise = 0
    numAgent = 2
    tiedAgentId = [0, 2]
    ropePartIndex = list(range(3, 12))
    maxRopePartLength = 0.06
    reset = ResetUniformWithoutXPosForLeashed(physicsSimulation, qPosInit, qVelInit, numAgent, tiedAgentId,ropePartIndex, maxRopePartLength, qPosInitNoise, qVelInitNoise)
    numSimulationFrames=10
    isTerminal= lambda state: False
    reshapeActionList = [ReshapeAction(5),ReshapeAction(5),ReshapeAction(masterForce)]
    transit=TransitionFunctionWithoutXPos(physicsSimulation, numSimulationFrames, visualizeMujoco,isTerminal, reshapeActionList)

    # damping=2.5
    # numSimulationFrames =int(0.1/dt)
    # agentsMaxSpeedList = [wolfMaxSpeed]* numWolves + [sheepMaxSpeed] * numSheeps
    # reshapeAction = ReshapeAction()
    # isTerminal = lambda state: False
    # transit = TransitionFunctionWithoutXPos(physicsSimulation,numAgents , numSimulationFrames,damping*dt*numSimulationFrames,agentsMaxSpeedList,visualizeMujoco,isTerminal,reshapeAction)


    maxRunningStepsToSample = 100
    sampleTrajectory = SampleTrajectory(maxRunningStepsToSample, transit, isTerminal, rewardFunc, reset)

    observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID, masterID, getPosFromAgentState, getVelFromAgentState)
    observe = lambda state: [observeOneAgent(agentID)(state) for agentID in range(numAgents)]

    initObsForParams = observe(reset())
    obsShape = [initObsForParams[obsID].shape[0] for obsID in range(len(initObsForParams))]

    worldDim = 2
    actionDim = worldDim * 2 + 1

    layerWidth = [128, 128]

    # ------------ model ------------------------
    buildMADDPGModels = BuildMADDPGModels(actionDim, numAgents, obsShape)
    modelsList = [buildMADDPGModels(layerWidth, agentID) for agentID in range(numAgents)]


    # individStr = 'individ' if individualRewardWolf else 'shared'
    # fileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}{}_agent".format(
    #     numWolves, numSheeps, numMasters, maxEpisode, maxTimeStep, sheepSpeedMultiplier, individStr)


    # wolvesModelPaths = [os.path.join(dirName, '..', 'trainedModels', '3wolvesMaddpg', fileName + str(i) + '60000eps') for i in wolvesID]
    # [restoreVariables(model, path) for model, path in zip(wolvesModel, wolvesModelPaths)]
    #
    # actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy)
    # policy = lambda allAgentsStates: [actOneStepOneModel(model, observe(allAgentsStates)) for model in modelsList]

    # modelPaths = [os.path.join(dirName, '..', 'trainedModels', '3wolvesMaddpg_ExpEpsLengthAndSheepSpeed', fileName + str(i)) for i in
    #               range(numAgents)]
    # modelFolder = os.path.join(dirName, '..', 'trainedModels', 'mujocoMADDPG')
    # modelFolder = os.path.join(dirName, '..', 'trainedModels', 'mujocoMADDPG','timeconst='+str(timeconst)+'dampratio='+str(dampratio))

    modelFolder = os.path.join(dirName, '..', 'trainedModels', 'mujocoMADDPGLeasedFixedEnv2','damping={}_frictionloss={}_masterForce={}'.format(damping,frictionloss,masterForce))
    fileName = "maddpg{}episodes{}step_agent".format(maxEpisode, maxTimeStep)

    modelPaths = [os.path.join(modelFolder,  fileName + str(i) + '60000eps') for i in range(numAgents)]

    [restoreVariables(model, path) for model, path in zip(modelsList, modelPaths)]

    actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy)
    policy = lambda allAgentsStates: [actOneStepOneModel(model, observe(allAgentsStates)) for model in modelsList]


    trajList = []
    numTrajToSample = 5
    for i in range(numTrajToSample):
        np.random.seed(i)
        traj = sampleTrajectory(policy)
        trajList.append(list(traj))

    # saveTraj
    if saveTraj:
        trajFileName = "maddpg{}wolves{}sheep{}blocks{}eps{}stepSheepSpeed{}{}Traj".format(numWolves, numSheeps, numMasters, maxEpisode, maxTimeStep, sheepSpeedMultiplier, individStr)
        trajSavePath = os.path.join(dirName, '..', 'trajectory', trajFileName)
        saveToPickle(trajList, trajSavePath)


    # visualize
    if visualizeTraj:
        demoFolder = os.path.join(dirName, '..', 'demos', 'mujocoMADDPGLeasedFixedEnv2','damping={}_frictionloss={}_masterForce={}'.format(damping,frictionloss,masterForce))

        if not os.path.exists(demoFolder):
            os.makedirs(demoFolder)
        entitiesColorList = [wolfColor] * numWolves + [sheepColor] * numSheeps + [masterColor] * numMasters
        render = Render(entitiesSizeList, entitiesColorList, numAgents,demoFolder,saveImage, getPosFromAgentState)
        # print(trajList[0][0],'!!!3',trajList[0][1])
        trajToRender = np.concatenate(trajList)
        render(trajToRender)

Пример #4

Показать файл

Файл: testlRopeMujocoEnv.py Проект: Frostwoods/ChasingWithRopeMujocoEnv

def simuliateOneParameter(parameterDict):


    wolfSize = 0.075
    sheepSize = 0.05
    blockSize = 0.2


    wolfColor = np.array([0.85, 0.35, 0.35])
    sheepColor = np.array([0.35, 0.85, 0.35])
    blockColor = np.array([0.25, 0.25, 0.25])

    wolvesID = [0,1]
    sheepsID = [2]
    blocksID = [3]

    numWolves = len(wolvesID)
    numSheeps = len(sheepsID)
    numBlocks = len(blocksID)

    sheepMaxSpeed = 1.3
    wolfMaxSpeed =1.0
    blockMaxSpeed = None



    agentsMaxSpeedList = [wolfMaxSpeed]* numWolves + [sheepMaxSpeed] * numSheeps

    numAgents = numWolves + numSheeps
    numEntities = numAgents + numBlocks

    entitiesSizeList = [wolfSize]* numWolves + [sheepSize] * numSheeps + [blockSize]* numBlocks
    entityMaxSpeedList = [wolfMaxSpeed]* numWolves + [sheepMaxSpeed] * numSheeps + [blockMaxSpeed]* numBlocks
    entitiesMovableList = [True]* numAgents + [False] * numBlocks
    massList = [1.0] * numEntities

    isCollision = IsCollision(getPosFromAgentState)
    rewardWolf = RewardWolf(wolvesID, sheepsID, entitiesSizeList, isCollision)
    punishForOutOfBound = PunishForOutOfBound()
    rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBound)

    rewardFunc = lambda state, action, nextState: \
        list(rewardWolf(state, action, nextState)) + list(rewardSheep(state, action, nextState))

    makePropertyList=MakePropertyList(transferNumberListToStr)

    #changeCollisionReleventParameter
    dmin=parameterDict['dmin']
    dmax=parameterDict['dmax']
    width=parameterDict['width']
    midpoint=parameterDict['midpoint']
    power=parameterDict['power']
    timeconst=parameterDict['timeconst']
    dampratio=parameterDict['dampratio']

    geomIds=[1,2,3,4]
    keyNameList=['@solimp','@solref']
    valueList=[[[dmin,dmax,width,midpoint,power],[timeconst,dampratio]]]*len(geomIds)
    collisionParameter=makePropertyList(geomIds,keyNameList,valueList)

#changeSize
    # geomIds=[1,2]
    # keyNameList=['@size']
    # valueList=[[[0.075,0.075]],[[0.1,0.1]]]
    # sizeParameter=makePropertyList(geomIds,keyNameList,valueList)

#load env xml and change some geoms' property
    physicsDynamicsPath=os.path.join(dirName,'..','..','environment','mujocoEnv','rope','1leased.xml')

    with open(physicsDynamicsPath) as f:
        xml_string = f.read()
    envXmlDict = xmltodict.parse(xml_string.strip())

    # envXmlPropertyDictList=[collisionParameter]
    # changeEnvXmlPropertFuntionyList=[changeGeomProperty]
    # for propertyDict,changeXmlProperty in zip(envXmlPropertyDictList,changeEnvXmlPropertFuntionyList):
    #     envXmlDict=changeXmlProperty(envXmlDict,propertyDict)

    envXml=xmltodict.unparse(envXmlDict)
    # print(envXmlDict['mujoco']['worldbody']['body'][0])
    physicsModel = mujoco.load_model_from_xml(envXml)
    physicsSimulation = mujoco.MjSim(physicsModel)


    # MDP function
    qPosInit = [0, 0]*numAgents
    qVelInit = [0, 0]*numAgents

    qVelInitNoise = 0
    qPosInitNoise = 1

    # reset=ResetUniformWithoutXPos(physicsSimulation, qPosInit, qVelInit, numAgents, numBlocks,qPosInitNoise, qVelInitNoise)

    fixReset=ResetFixWithoutXPos(physicsSimulation, qPosInit, qVelInit, numAgents,numBlocks)
    blocksState=[[0,-0.8,0,0]]
    reset=lambda :fixReset([-0.5,0.8,-0.5,0,0.5,0.8],[0,0,0,0,0,0],blocksState)


    isTerminal = lambda state: False
    numSimulationFrames = 10
    visualize=True
    # physicsViewer=None
    dt=0.01
    damping=2.5
    reshapeAction=lambda action:action
    physicsViewer = mujoco.MjViewer(physicsSimulation)

    qPosInit = (0, ) * 24
    qVelInit = (0, ) * 24
    qPosInitNoise = 6
    qVelInitNoise = 4
    numAgent = 3
    tiedAgentId = [1, 2]
    ropePartIndex = list(range(3, 12))
    maxRopePartLength = 0.6
    reset = ResetUniformForLeashed(physicsSimulation, qPosInit, qVelInit, numAgent, tiedAgentId, \
            ropePartIndex, maxRopePartLength, qPosInitNoise, qVelInitNoise)
    reset()
    for i in range(6000):
        physicsViewer.render()

Пример #5

Показать файл

def main():
    debug = 0
    if debug:
        numWolves = 3
        numSheeps = 1
        numBlocks = 2
        saveAllmodels = False
        maxTimeStep = 25
        sheepSpeedMultiplier = 1
        sampleMethod = '5'
        learningRateSheepCritic = 0.005
        learningRateSheepActor = 0.005

    else:
        print(sys.argv)
        condition = json.loads(sys.argv[1])
        numWolves = 3
        numSheeps = 1
        numBlocks = 2
        saveAllmodels = False
        maxTimeStep = 25
        sheepSpeedMultiplier = 1
        sampleMethod = condition['sampleMethod']
        learningRateSheepCritic = condition['sheepLr']
        learningRateSheepActor = condition['sheepLr']

    print(
        "maddpg: {} wolves, {} sheep, {} blocks, {} episodes with {} steps each eps, sheepSpeed: {}x,  sampleMethod: {}"
        .format(numWolves, numSheeps, numBlocks, maxEpisode, maxTimeStep,
                sheepSpeedMultiplier, str(sampleMethod)))

    numAgents = numWolves + numSheeps
    numEntities = numAgents + numBlocks
    wolvesID = list(range(numWolves))
    sheepsID = list(range(numWolves, numAgents))
    blocksID = list(range(numAgents, numEntities))

    wolfSize = 0.075
    sheepSize = 0.05
    blockSize = 0.2
    entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheeps + [
        blockSize
    ] * numBlocks

    wolfMaxSpeed = 1.0
    blockMaxSpeed = None
    sheepMaxSpeedOriginal = 1.3
    sheepMaxSpeed = sheepMaxSpeedOriginal * sheepSpeedMultiplier

    entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [
        sheepMaxSpeed
    ] * numSheeps + [blockMaxSpeed] * numBlocks
    entitiesMovableList = [True] * numAgents + [False] * numBlocks
    massList = [1.0] * numEntities

    isCollision = IsCollision(getPosFromAgentState)
    punishForOutOfBound = PunishForOutOfBound()
    rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList,
                              getPosFromAgentState, isCollision,
                              punishForOutOfBound)

    rewardWolfIndivid = RewardWolfIndividual(wolvesID, sheepsID,
                                             entitiesSizeList, isCollision)
    rewardWolfShared = RewardWolf(wolvesID, sheepsID, entitiesSizeList,
                                  isCollision)

    rewardFuncIndividWolf = lambda state, action, nextState: \
        list(rewardWolfIndivid(state, action, nextState)) + list(rewardSheep(state, action, nextState))
    rewardFuncSharedWolf = lambda state, action, nextState: \
        list(rewardWolfShared(state, action, nextState)) + list(rewardSheep(state, action, nextState))

    reset = ResetMultiAgentChasing(numAgents, numBlocks)
    observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID,
                                              blocksID, getPosFromAgentState,
                                              getVelFromAgentState)
    observe = lambda state: [
        observeOneAgent(agentID)(state) for agentID in range(numAgents)
    ]

    reshapeAction = ReshapeAction()
    getCollisionForce = GetCollisionForce()
    applyActionForce = ApplyActionForce(wolvesID, sheepsID,
                                        entitiesMovableList)
    applyEnvironForce = ApplyEnvironForce(numEntities, entitiesMovableList,
                                          entitiesSizeList, getCollisionForce,
                                          getPosFromAgentState)
    integrateState = IntegrateState(numEntities, entitiesMovableList, massList,
                                    entityMaxSpeedList, getVelFromAgentState,
                                    getPosFromAgentState)
    transit = TransitMultiAgentChasing(numEntities, reshapeAction,
                                       applyActionForce, applyEnvironForce,
                                       integrateState)

    isTerminal = lambda state: [False] * numAgents
    initObsForParams = observe(reset())
    obsShape = [
        initObsForParams[obsID].shape[0]
        for obsID in range(len(initObsForParams))
    ]

    worldDim = 2
    actionDim = worldDim * 2 + 1

    layerWidth = [128, 128]

    #------------ models ------------------------

    buildMADDPGModels = BuildMADDPGModels(actionDim, numAgents, obsShape)
    modelsListShared = [
        buildMADDPGModels(layerWidth, agentID) for agentID in range(numAgents)
    ]
    sheepModel = [modelsListShared[sheepID] for sheepID in sheepsID]
    modelsListIndivid = [
        buildMADDPGModels(layerWidth, agentID) for agentID in wolvesID
    ] + sheepModel

    trainCriticBySASRWolf = TrainCriticBySASR(
        actByPolicyTargetNoisyForNextState, learningRateWolfCritic, gamma)
    trainCriticWolf = TrainCritic(trainCriticBySASRWolf)
    trainCriticBySASRSheep = TrainCriticBySASR(
        actByPolicyTargetNoisyForNextState, learningRateSheepCritic, gamma)
    trainCriticSheep = TrainCritic(trainCriticBySASRSheep)

    trainActorFromSAWolf = TrainActorFromSA(learningRateWolfActor)
    trainActorWolf = TrainActor(trainActorFromSAWolf)

    trainActorFromSASheep = TrainActorFromSA(learningRateSheepActor)
    trainActorSheep = TrainActor(trainActorFromSASheep)

    trainActorList = [trainActorWolf] * numWolves + [trainActorSheep
                                                     ] * numSheeps
    trainCriticList = [trainCriticWolf] * numWolves + [trainCriticSheep
                                                       ] * numSheeps

    paramUpdateInterval = 1  #
    updateParameters = UpdateParameters(paramUpdateInterval, tau)
    sampleBatchFromMemory = SampleFromMemory(minibatchSize)

    learnInterval = 100
    learningStartBufferSize = minibatchSize * maxTimeStep
    startLearn = StartLearn(learningStartBufferSize, learnInterval)

    trainMADDPGModelsIndivid = TrainMADDPGModelsWithIterSheep(
        updateParameters, trainActorList, trainCriticList,
        sampleBatchFromMemory, startLearn, modelsListIndivid)
    trainMADDPGModelsShared = TrainMADDPGModelsWithIterSheep(
        updateParameters, trainActorList, trainCriticList,
        sampleBatchFromMemory, startLearn, modelsListShared)

    actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy)
    actOneStepIndivid = lambda allAgentsStates, runTime: [
        actOneStepOneModel(model, allAgentsStates)
        for model in modelsListIndivid
    ]
    actOneStepShared = lambda allAgentsStates, runTime: [
        actOneStepOneModel(model, allAgentsStates)
        for model in modelsListShared
    ]

    sampleOneStepIndivid = SampleOneStep(transit, rewardFuncIndividWolf)
    sampleOneStepShared = SampleOneStep(transit, rewardFuncSharedWolf)

    runDDPGTimeStepIndivid = RunTimeStep(actOneStepIndivid,
                                         sampleOneStepIndivid,
                                         trainMADDPGModelsIndivid,
                                         observe=observe)
    runDDPGTimeStepShared = RunTimeStep(actOneStepShared,
                                        sampleOneStepShared,
                                        trainMADDPGModelsShared,
                                        observe=observe)

    runEpisodeIndivid = RunEpisode(reset, runDDPGTimeStepIndivid, maxTimeStep,
                                   isTerminal)
    runEpisodeShared = RunEpisode(reset, runDDPGTimeStepShared, maxTimeStep,
                                  isTerminal)

    getAgentModelIndivid = lambda agentId: lambda: trainMADDPGModelsIndivid.getTrainedModels(
    )[agentId]
    getModelListIndivid = [getAgentModelIndivid(i) for i in range(numAgents)]
    modelSaveRate = 1000
    individStr = 'individ'
    fileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}Lr{}SampleMethod{}{}_agent".format(
        numWolves, numSheeps, numBlocks, maxEpisode, maxTimeStep,
        sheepSpeedMultiplier, learningRateSheepActor, sampleMethod, individStr)
    modelPath = os.path.join(dirName, '..', 'trainedModels',
                             'IterTrainSheep_evalSheeplrAndSampleMethod',
                             fileName)
    saveModelsIndivid = [
        SaveModel(modelSaveRate, saveVariables, getTrainedModel,
                  modelPath + str(i), saveAllmodels)
        for i, getTrainedModel in enumerate(getModelListIndivid)
    ]

    getAgentModelShared = lambda agentId: lambda: trainMADDPGModelsShared.getTrainedModels(
    )[agentId]
    getModelListShared = [getAgentModelShared(i) for i in range(numAgents)]
    individStr = 'shared'
    fileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}Lr{}SampleMethod{}{}_agent".format(
        numWolves, numSheeps, numBlocks, maxEpisode, maxTimeStep,
        sheepSpeedMultiplier, learningRateSheepActor, sampleMethod, individStr)
    modelPath = os.path.join(dirName, '..', 'trainedModels',
                             'IterTrainSheep_evalSheeplrAndSampleMethod',
                             fileName)
    saveModelsShared = [
        SaveModel(modelSaveRate, saveVariables, getTrainedModel,
                  modelPath + str(i), saveAllmodels)
        for i, getTrainedModel in enumerate(getModelListShared)
    ]

    maddpgIterSheep = RunAlgorithmWithIterSheep(runEpisodeIndivid,
                                                runEpisodeShared, maxEpisode,
                                                saveModelsIndivid,
                                                saveModelsShared, sampleMethod,
                                                numAgents)

    replayBufferIndivid = getBuffer(bufferSize)
    replayBufferShared = getBuffer(bufferSize)

    meanRewardList, trajectory = maddpgIterSheep(replayBufferShared,
                                                 replayBufferIndivid)

Пример #6

Показать файл

Файл: runMyMADDPGchasing.py Проект: Frostwoods/ChasingWithRopeMujocoEnv

def main():
    debug = 1
    if debug:
        numWolves = 2
        numSheeps = 1
        numBlocks = 1
        saveAllmodels = True
        maxTimeStep = 25
        sheepSpeedMultiplier = 1
        individualRewardWolf = int(False)

    else:
        print(sys.argv)
        condition = json.loads(sys.argv[1])
        numWolves = int(condition['numWolves'])
        numSheeps = int(condition['numSheeps'])
        numBlocks = int(condition['numBlocks'])

        maxTimeStep = int(condition['maxTimeStep'])
        sheepSpeedMultiplier = float(condition['sheepSpeedMultiplier'])
        individualRewardWolf = int(condition['individualRewardWolf'])

        saveAllmodels = False

    print("maddpg: {} wolves, {} sheep, {} blocks, {} episodes with {} steps each eps, sheepSpeed: {}x, wolfIndividualReward: {}, save all models: {}".
          format(numWolves, numSheeps, numBlocks, maxEpisode, maxTimeStep, sheepSpeedMultiplier, individualRewardWolf, str(saveAllmodels)))


    numAgents = numWolves + numSheeps
    numEntities = numAgents + numBlocks
    wolvesID = list(range(numWolves))
    sheepsID = list(range(numWolves, numAgents))
    blocksID = list(range(numAgents, numEntities))

    wolfSize = 0.075
    sheepSize = 0.05
    blockSize = 0.2
    entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheeps + [blockSize] * numBlocks

    wolfMaxSpeed = 1.0
    blockMaxSpeed = None
    sheepMaxSpeedOriginal = 1.3
    sheepMaxSpeed = sheepMaxSpeedOriginal * sheepSpeedMultiplier

    entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [sheepMaxSpeed] * numSheeps + [blockMaxSpeed] * numBlocks
    entitiesMovableList = [True] * numAgents + [False] * numBlocks
    massList = [1.0] * numEntities

    isCollision = IsCollision(getPosFromAgentState)
    punishForOutOfBound = PunishForOutOfBound()
    rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState, isCollision,
                              punishForOutOfBound)

    if individualRewardWolf:
        rewardWolf = RewardWolfIndividual(wolvesID, sheepsID, entitiesSizeList, isCollision)
    else:
        rewardWolf = RewardWolf(wolvesID, sheepsID, entitiesSizeList, isCollision)

    rewardFunc = lambda state, action, nextState: \
        list(rewardWolf(state, action, nextState)) + list(rewardSheep(state, action, nextState))

    reset = ResetMultiAgentChasing(numAgents, numBlocks)
    observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID, blocksID, getPosFromAgentState,
                                              getVelFromAgentState)
    observe = lambda state: [observeOneAgent(agentID)(state) for agentID in range(numAgents)]

    reshapeAction = ReshapeAction()
    getCollisionForce = GetCollisionForce()
    applyActionForce = ApplyActionForce(wolvesID, sheepsID, entitiesMovableList)
    applyEnvironForce = ApplyEnvironForce(numEntities, entitiesMovableList, entitiesSizeList,getCollisionForce, getPosFromAgentState)
    integrateState = IntegrateState(numEntities, entitiesMovableList, massList,entityMaxSpeedList, getVelFromAgentState, getPosFromAgentState)
    transit = TransitMultiAgentChasing(numEntities, reshapeAction, applyActionForce, applyEnvironForce, integrateState)

    isTerminal = lambda state: [False]* numAgents
    initObsForParams = observe(reset())
    obsShape = [initObsForParams[obsID].shape[0] for obsID in range(len(initObsForParams))]

    worldDim = 2
    actionDim = worldDim * 2 + 1

    layerWidth = [128, 128]

#------------ models ------------------------

    buildMADDPGModels = BuildMADDPGModels(actionDim, numAgents, obsShape)
    modelsList = [buildMADDPGModels(layerWidth, agentID) for agentID in range(numAgents)]

    trainCriticBySASR = TrainCriticBySASR(actByPolicyTargetNoisyForNextState, learningRateCritic, gamma)
    trainCritic = TrainCritic(trainCriticBySASR)
    trainActorFromSA = TrainActorFromSA(learningRateActor)
    trainActor = TrainActor(trainActorFromSA)

    paramUpdateInterval = 1 #
    updateParameters = UpdateParameters(paramUpdateInterval, tau)
    sampleBatchFromMemory = SampleFromMemory(minibatchSize)

    learnInterval = 100
    learningStartBufferSize = minibatchSize * maxTimeStep
    startLearn = StartLearn(learningStartBufferSize, learnInterval)

    trainMADDPGModels = TrainMADDPGModelsWithBuffer(updateParameters, trainActor, trainCritic, sampleBatchFromMemory, startLearn, modelsList)

    actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy)
    actOneStep = lambda allAgentsStates, runTime: [actOneStepOneModel(model, allAgentsStates) for model in modelsList]

    sampleOneStep = SampleOneStep(transit, rewardFunc)
    runDDPGTimeStep = RunTimeStep(actOneStep, sampleOneStep, trainMADDPGModels, observe = observe)

    runEpisode = RunEpisode(reset, runDDPGTimeStep, maxTimeStep, isTerminal)

    getAgentModel = lambda agentId: lambda: trainMADDPGModels.getTrainedModels()[agentId]
    getModelList = [getAgentModel(i) for i in range(numAgents)]
    modelSaveRate = 1000
    individStr = 'individ' if individualRewardWolf else 'shared'
    fileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}{}_agent".format(
        numWolves, numSheeps, numBlocks, maxEpisode, maxTimeStep, sheepSpeedMultiplier, individStr)

    modelPath = os.path.join(dirName, '..', 'trainedModels', 'maddpg', fileName)
    saveModels = [SaveModel(modelSaveRate, saveVariables, getTrainedModel, modelPath+ str(i), saveAllmodels) for i, getTrainedModel in enumerate(getModelList)]

    maddpg = RunAlgorithm(runEpisode, maxEpisode, saveModels, numAgents)
    replayBuffer = getBuffer(bufferSize)
    meanRewardList, trajectory = maddpg(replayBuffer)

Пример #7

Показать файл

def main():
    debug = 1
    if debug:
        numWolves = 3
        numSheeps = 1
        numBlocks = 2
        timeconst = 0.5
        dampratio = 0.2
        saveTraj = False
        visualizeTraj = True
        maxTimeStep = 25
        sheepSpeedMultiplier = 1.0
        individualRewardWolf = 0
        hasWalls = 1.5
        dt = 0.05

        visualizeMujoco = True

    else:
        print(sys.argv)
        condition = json.loads(sys.argv[1])
        numWolves = int(condition['numWolves'])
        numSheeps = int(condition['numSheeps'])
        numBlocks = int(condition['numBlocks'])
        saveTraj = True
        visualizeTraj = False
        maxTimeStep = 50
        sheepSpeedMultiplier = 1.25
        individualRewardWolf = 1

    print(
        "maddpg: {} wolves, {} sheep, {} blocks, saveTraj: {}, visualize: {}".
        format(numWolves, numSheeps, numBlocks, str(saveTraj),
               str(visualizeTraj)))

    numAgents = numWolves + numSheeps
    numEntities = numAgents + numBlocks
    wolvesID = list(range(numWolves))
    sheepsID = list(range(numWolves, numAgents))
    blocksID = list(range(numAgents, numEntities))

    wolfSize = 0.075
    sheepSize = 0.05
    blockSize = 0.2
    entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheeps + [
        blockSize
    ] * numBlocks

    wolfMaxSpeed = 1.0
    blockMaxSpeed = None
    sheepMaxSpeedOriginal = 1.3
    sheepMaxSpeed = sheepMaxSpeedOriginal * sheepSpeedMultiplier
    entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [
        sheepMaxSpeed
    ] * numSheeps + [blockMaxSpeed] * numBlocks

    entitiesMovableList = [True] * numAgents + [False] * numBlocks
    massList = [1.0] * numEntities

    isCollision = IsCollision(getPosFromAgentState)
    punishForOutOfBound = PunishForOutOfBound()
    rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList,
                              getPosFromAgentState, isCollision,
                              punishForOutOfBound)

    if individualRewardWolf:
        rewardWolf = RewardWolfIndividual(wolvesID, sheepsID, entitiesSizeList,
                                          isCollision)
    else:
        rewardWolf = RewardWolf(wolvesID, sheepsID, entitiesSizeList,
                                isCollision)

    rewardFunc = lambda state, action, nextState: list(
        rewardWolf(state, action, nextState)) + list(
            rewardSheep(state, action, nextState))
    dirName = os.path.dirname(__file__)
    # physicsDynamicsPath=os.path.join(dirName,'..','..','environment','mujocoEnv','numBlocks=1_numSheeps=1_numWolves=1.xml')
    if hasWalls:
        # physicsDynamicsPath=os.path.join(dirName,'..','..','environment','mujocoEnv','hasWalls=1_numBlocks={}_numSheeps={}_numWolves={}timeconst={}dampratio={}.xml'.format(numBlocks,numSheeps,numWolves,timeconst,dampratio))
        physicsDynamicsPath = os.path.join(
            dirName, '..', '..', 'environment', 'mujocoEnv',
            'dt={}'.format(dt),
            'hasWalls={}_numBlocks={}_numSheeps={}_numWolves={}timeconst={}dampratio={}.xml'
            .format(hasWalls, numBlocks, numSheeps, numWolves, timeconst,
                    dampratio))

        # physicsDynamicsPath=os.path.join(dirName,'..','..','environment','mujocoEnv','crossTest3w1s2b.xml')

    else:
        physicsDynamicsPath = os.path.join(
            dirName, '..', '..', 'environment', 'mujocoEnv',
            'numBlocks={}_numSheeps={}_numWolves={}timeconst={}dampratio={}.xml'
            .format(numBlocks, numSheeps, numWolves, timeconst, dampratio))

    with open(physicsDynamicsPath) as f:
        xml_string = f.read()
    envXmlDict = xmltodict.parse(xml_string.strip())
    envXml = xmltodict.unparse(envXmlDict)

    physicsModel = mujoco.load_model_from_xml(envXml)
    physicsSimulation = mujoco.MjSim(physicsModel)
    print(physicsSimulation.model.body_pos)

    # print(dir(physicsSimulation.model))
    # print(dir(physicsSimulation.data),physicsSimulation.dataphysicsSimulation.data)

    # print(physicsSimulation.data.qpos,dir(physicsSimulation.data.qpos))
    # print(physicsSimulation.data.qpos,dir(physicsSimulation.data.qpos))
    qPosInit = [0, 0] * numAgents
    qVelInit = [0, 0] * numAgents
    qVelInitNoise = 0 * hasWalls
    qPosInitNoise = 0.8 * hasWalls
    getBlockRandomPos = lambda: np.random.uniform(-0.7 * hasWalls, +0.7 *
                                                  hasWalls, 2)
    getBlockSpeed = lambda: np.zeros(2)

    numQPos = len(physicsSimulation.data.qpos)
    numQVel = len(physicsSimulation.data.qvel)

    sampleAgentsQPos = lambda: np.asarray(qPosInit) + np.random.uniform(
        low=-qPosInitNoise, high=qPosInitNoise, size=numQPos)
    sampleAgentsQVel = lambda: np.asarray(qVelInit) + np.random.uniform(
        low=-qVelInitNoise, high=qVelInitNoise, size=numQVel)

    minDistance = 0.2 + 2 * blockSize
    isOverlap = IsOverlap(minDistance)

    sampleBlockState = SampleBlockState(numBlocks, getBlockRandomPos,
                                        getBlockSpeed, isOverlap)

    reset = ResetUniformWithoutXPos(physicsSimulation, numAgents, numBlocks,
                                    sampleAgentsQPos, sampleAgentsQVel,
                                    sampleBlockState)

    damping = 2.5
    numSimulationFrames = int(0.1 / dt)
    agentsMaxSpeedList = [wolfMaxSpeed] * numWolves + [sheepMaxSpeed
                                                       ] * numSheeps
    reshapeAction = ReshapeAction()
    isTerminal = lambda state: False
    transit = TransitionFunctionWithoutXPos(physicsSimulation, numAgents,
                                            numSimulationFrames,
                                            damping * dt * numSimulationFrames,
                                            agentsMaxSpeedList,
                                            visualizeMujoco, isTerminal,
                                            reshapeAction)

    maxRunningStepsToSample = 100
    sampleTrajectory = SampleTrajectory(maxRunningStepsToSample, transit,
                                        isTerminal, rewardFunc, reset)

    observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID,
                                              blocksID, getPosFromAgentState,
                                              getVelFromAgentState)
    observe = lambda state: [
        observeOneAgent(agentID)(state) for agentID in range(numAgents)
    ]

    initObsForParams = observe(reset())
    obsShape = [
        initObsForParams[obsID].shape[0]
        for obsID in range(len(initObsForParams))
    ]

    worldDim = 2
    actionDim = worldDim * 2 + 1

    layerWidth = [128, 128]

    # ------------ model ------------------------
    buildMADDPGModels = BuildMADDPGModels(actionDim, numAgents, obsShape)
    modelsList = [
        buildMADDPGModels(layerWidth, agentID) for agentID in range(numAgents)
    ]

    # individStr = 'individ' if individualRewardWolf else 'shared'
    # fileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}{}_agent".format(
    #     numWolves, numSheeps, numBlocks, maxEpisode, maxTimeStep, sheepSpeedMultiplier, individStr)

    # wolvesModelPaths = [os.path.join(dirName, '..', 'trainedModels', '3wolvesMaddpg', fileName + str(i) + '60000eps') for i in wolvesID]
    # [restoreVariables(model, path) for model, path in zip(wolvesModel, wolvesModelPaths)]
    #
    # actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy)
    # policy = lambda allAgentsStates: [actOneStepOneModel(model, observe(allAgentsStates)) for model in modelsList]

    # modelPaths = [os.path.join(dirName, '..', 'trainedModels', '3wolvesMaddpg_ExpEpsLengthAndSheepSpeed', fileName + str(i)) for i in
    #               range(numAgents)]
    # modelFolder = os.path.join(dirName, '..', 'trainedModels', 'mujocoMADDPG')
    # modelFolder = os.path.join(dirName, '..', 'trainedModels', 'mujocoMADDPG','timeconst='+str(timeconst)+'dampratio='+str(dampratio))
    if hasWalls:
        # modelFolder = os.path.join(dirName, '..', 'trainedModels', 'mujocoMADDPG','hasWalls=1'+'numBlocks='+str(numBlocks)+'numSheeps='+str(numSheeps)+'numWolves='+str(numWolves)+'timeconst='+str(timeconst)+'dampratio='+str(dampratio))
        # modelFolder = os.path.join(dirName, '..', 'trainedModels', 'mujocoMADDPG','dt={}'.format(dt),'hasWalls='+str(hasWalls)+'numBlocks='+str(numBlocks)+'numSheeps='+str(numSheeps)+'numWolves='+str(numWolves)+'timeconst='+str(timeconst)+'dampratio='+str(dampratio)+'individualRewardWolf='+str(individualRewardWolf)+'sheepSpeedMultiplier='+str(sheepSpeedMultiplier))
        # modelFolder = os.path.join(dirName, '..', 'trainedModels', 'mujocoMADDPG','dt={}'.format(dt),'hasWalls='+str(hasWalls)+'numBlocks='+str(numBlocks)+'numSheeps='+str(numSheeps)+'numWolves='+str(numWolves)+'timeconst='+str(timeconst)+'dampratio='+str(dampratio)+'individualRewardWolf='+str(individualRewardWolf)+'sheepSpeedMultiplier='+str(sheepSpeedMultiplier))

        modelFolder = os.path.join(
            dirName, '..', 'trainedModels', 'mujocoMADDPGeavluateWall',
            'dt={}'.format(dt),
            'hasWalls=' + str(hasWalls) + 'numBlocks=' + str(numBlocks) +
            'numSheeps=' + str(numSheeps) + 'numWolves=' + str(numWolves) +
            'timeconst=' + str(timeconst) + 'dampratio=' + str(dampratio) +
            'individualRewardWolf=' + str(individualRewardWolf) +
            'sheepSpeedMultiplier=' + str(sheepSpeedMultiplier))
        individStr = 'individ' if individualRewardWolf else 'shared'
        fileName = "maddpghasWalls={}{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}{}_agent".format(
            hasWalls, numWolves, numSheeps, numBlocks, maxEpisode, maxTimeStep,
            sheepSpeedMultiplier, individStr)

    else:
        modelFolder = os.path.join(
            dirName, '..', 'trainedModels', 'mujocoMADDPG',
            'numBlocks=' + str(numBlocks) + 'numSheeps=' + str(numSheeps) +
            'numWolves=' + str(numWolves) + 'timeconst=' + str(timeconst) +
            'dampratio=' + str(dampratio))
        fileName = "maddpg{}wolves{}sheep{}blocks60000episodes25stepSheepSpeed1.0shared_agent".format(
            numWolves, numSheeps, numBlocks)
    modelPaths = [
        os.path.join(modelFolder, fileName + str(i) + '60000eps')
        for i in range(numAgents)
    ]

    [
        restoreVariables(model, path)
        for model, path in zip(modelsList, modelPaths)
    ]

    actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy)
    policy = lambda allAgentsStates: [
        actOneStepOneModel(model, observe(allAgentsStates))
        for model in modelsList
    ]

    trajList = []
    numTrajToSample = 50
    for i in range(numTrajToSample):
        np.random.seed(i)
        traj = sampleTrajectory(policy)
        trajList.append(list(traj))

    # saveTraj
    if saveTraj:
        trajFileName = "maddpg{}wolves{}sheep{}blocks{}eps{}stepSheepSpeed{}{}Traj".format(
            numWolves, numSheeps, numBlocks, maxEpisode, maxTimeStep,
            sheepSpeedMultiplier, individStr)
        trajSavePath = os.path.join(dirName, '..', 'trajectory', trajFileName)
        saveToPickle(trajList, trajSavePath)

    # visualize
    if visualizeTraj:
        entitiesColorList = [wolfColor] * numWolves + [
            sheepColor
        ] * numSheeps + [blockColor] * numBlocks
        render = Render(entitiesSizeList, entitiesColorList, numAgents,
                        getPosFromAgentState)
        print(trajList[0][0], '!!!3', trajList[0][1])
        trajToRender = np.concatenate(trajList)
        render(trajToRender)

Пример #8

Показать файл

Файл: runMujocoEnvWithRopesMyMADDPGchasing.py Проект: Frostwoods/ChasingWithRopeMujocoEnv

def main():
    debug = 0
    if debug:

        damping = 0.0
        frictionloss = 0.4
        masterForce = 1.0

        numWolves = 1
        numSheeps = 1
        numMasters = 1
        saveAllmodels = True
        maxTimeStep = 25
        visualize = False

    else:
        print(sys.argv)
        condition = json.loads(sys.argv[1])
        numWolves = 1
        numSheeps = 1
        numMasters = 1
        damping = float(condition['damping'])
        frictionloss = float(condition['frictionloss'])
        masterForce = float(condition['masterForce'])

        maxTimeStep = 25
        visualize = False
        saveAllmodels = True
    print(
        "maddpg: {} wolves, {} sheep, {} blocks, {} episodes with {} steps each eps,  save all models: {}"
        .format(numWolves, numSheeps, numMasters, maxEpisode, maxTimeStep,
                str(saveAllmodels)))
    print(damping, frictionloss, masterForce)

    modelFolder = os.path.join(
        dirName, '..', 'trainedModels', 'mujocoMADDPGLeasedFixedEnv2',
        'damping={}_frictionloss={}_masterForce={}'.format(
            damping, frictionloss, masterForce))

    if not os.path.exists(modelFolder):
        os.makedirs(modelFolder)

    numAgents = numWolves + numSheeps + numMasters
    numEntities = numAgents + numMasters
    wolvesID = [0]
    sheepsID = [1]
    masterID = [2]

    wolfSize = 0.075
    sheepSize = 0.05
    blockSize = 0.075
    entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheeps + [
        blockSize
    ] * numMasters

    massList = [1.0] * numEntities

    isCollision = IsCollision(getPosFromAgentState)
    punishForOutOfBound = PunishForOutOfBound()
    rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList,
                              getPosFromAgentState, isCollision,
                              punishForOutOfBound)

    rewardWolf = RewardWolf(wolvesID, sheepsID, entitiesSizeList, isCollision)
    rewardMaster = lambda state, action, nextState: [
        -reward for reward in rewardWolf(state, action, nextState)
    ]
    rewardFunc = lambda state, action, nextState: \
        list(rewardWolf(state, action, nextState)) + list(rewardSheep(state, action, nextState))+list(rewardMaster(state, action, nextState))

    makePropertyList = MakePropertyList(transferNumberListToStr)

    geomIds = [1, 2, 3]
    keyNameList = [0, 1]
    valueList = [[damping, damping]] * len(geomIds)
    dampngParameter = makePropertyList(geomIds, keyNameList, valueList)

    changeJointDampingProperty = lambda envDict, geomPropertyDict: changeJointProperty(
        envDict, geomPropertyDict, '@damping')

    geomIds = [1, 2, 3]
    keyNameList = [0, 1]
    valueList = [[frictionloss, frictionloss]] * len(geomIds)
    frictionlossParameter = makePropertyList(geomIds, keyNameList, valueList)
    changeJointFrictionlossProperty = lambda envDict, geomPropertyDict: changeJointProperty(
        envDict, geomPropertyDict, '@frictionloss')

    physicsDynamicsPath = os.path.join(dirName, '..', '..', 'environment',
                                       'mujocoEnv', 'rope', 'leasedNew.xml')
    with open(physicsDynamicsPath) as f:
        xml_string = f.read()

    envXmlDict = xmltodict.parse(xml_string.strip())
    envXmlPropertyDictList = [dampngParameter, frictionlossParameter]
    changeEnvXmlPropertFuntionyList = [
        changeJointDampingProperty, changeJointFrictionlossProperty
    ]
    for propertyDict, changeXmlProperty in zip(
            envXmlPropertyDictList, changeEnvXmlPropertFuntionyList):
        envXmlDict = changeXmlProperty(envXmlDict, propertyDict)

    envXml = xmltodict.unparse(envXmlDict)
    physicsModel = mujoco.load_model_from_xml(envXml)
    physicsSimulation = mujoco.MjSim(physicsModel)

    qPosInit = (0, ) * 24
    qVelInit = (0, ) * 24
    qPosInitNoise = 0.6
    qVelInitNoise = 0
    numAgent = 3
    tiedAgentId = [0, 2]
    ropePartIndex = list(range(3, 12))
    maxRopePartLength = 0.06
    reset = ResetUniformWithoutXPosForLeashed(physicsSimulation, qPosInit,
                                              qVelInit, numAgent, tiedAgentId,
                                              ropePartIndex, maxRopePartLength,
                                              qPosInitNoise, qVelInitNoise)

    numSimulationFrames = 10
    isTerminal = lambda state: False
    reshapeActionList = [
        ReshapeAction(5),
        ReshapeAction(5),
        ReshapeAction(masterForce)
    ]
    transit = TransitionFunctionWithoutXPos(physicsSimulation,
                                            numSimulationFrames, visualize,
                                            isTerminal, reshapeActionList)

    observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID,
                                              masterID, getPosFromAgentState,
                                              getVelFromAgentState)
    observe = lambda state: [
        observeOneAgent(agentID)(state) for agentID in range(numAgents)
    ]
    initObsForParams = observe(reset())
    obsShape = [
        initObsForParams[obsID].shape[0]
        for obsID in range(len(initObsForParams))
    ]

    worldDim = 2
    actionDim = worldDim * 2 + 1

    layerWidth = [128, 128]

    #------------ models ------------------------

    buildMADDPGModels = BuildMADDPGModels(actionDim, numAgents, obsShape)
    modelsList = [
        buildMADDPGModels(layerWidth, agentID) for agentID in range(numAgents)
    ]

    trainCriticBySASR = TrainCriticBySASR(actByPolicyTargetNoisyForNextState,
                                          learningRateCritic, gamma)
    trainCritic = TrainCritic(trainCriticBySASR)
    trainActorFromSA = TrainActorFromSA(learningRateActor)
    trainActor = TrainActor(trainActorFromSA)

    paramUpdateInterval = 1  #
    updateParameters = UpdateParameters(paramUpdateInterval, tau)
    sampleBatchFromMemory = SampleFromMemory(minibatchSize)

    learnInterval = 100
    learningStartBufferSize = minibatchSize * maxTimeStep
    startLearn = StartLearn(learningStartBufferSize, learnInterval)

    trainMADDPGModels = TrainMADDPGModelsWithBuffer(updateParameters,
                                                    trainActor, trainCritic,
                                                    sampleBatchFromMemory,
                                                    startLearn, modelsList)

    actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy)
    actOneStep = lambda allAgentsStates, runTime: [
        actOneStepOneModel(model, allAgentsStates) for model in modelsList
    ]

    sampleOneStep = SampleOneStep(transit, rewardFunc)
    runDDPGTimeStep = RunTimeStep(actOneStep,
                                  sampleOneStep,
                                  trainMADDPGModels,
                                  observe=observe)

    runEpisode = RunEpisode(reset, runDDPGTimeStep, maxTimeStep, isTerminal)

    getAgentModel = lambda agentId: lambda: trainMADDPGModels.getTrainedModels(
    )[agentId]
    getModelList = [getAgentModel(i) for i in range(numAgents)]
    modelSaveRate = 1000
    fileName = "maddpg{}episodes{}step_agent".format(maxEpisode, maxTimeStep)

    modelPath = os.path.join(modelFolder, fileName)

    saveModels = [
        SaveModel(modelSaveRate, saveVariables, getTrainedModel,
                  modelPath + str(i), saveAllmodels)
        for i, getTrainedModel in enumerate(getModelList)
    ]

    maddpg = RunAlgorithm(runEpisode, maxEpisode, saveModels, numAgents)
    replayBuffer = getBuffer(bufferSize)
    meanRewardList, trajectory = maddpg(replayBuffer)

Пример #9

Показать файл

def simuliateOneParameter(parameterOneCondiiton, evalNum, randomSeed):
    saveTraj = True
    visualizeTraj = False
    visualizeMujoco = False

    numWolves = parameterOneCondiiton['numWolves']
    numSheeps = parameterOneCondiiton['numSheeps']
    numBlocks = parameterOneCondiiton['numBlocks']
    timeconst = parameterOneCondiiton['timeconst']
    dampratio = parameterOneCondiiton['dampratio']
    maxTimeStep = parameterOneCondiiton['maxTimeStep']
    sheepSpeedMultiplier = parameterOneCondiiton['sheepSpeedMultiplier']
    individualRewardWolf = parameterOneCondiiton['individualRewardWolf']
    hasWalls = parameterOneCondiiton['hasWalls']
    dt = parameterOneCondiiton['dt']

    print(
        "maddpg: {} wolves, {} sheep, {} blocks, saveTraj: {}, visualize: {}".
        format(numWolves, numSheeps, numBlocks, str(saveTraj),
               str(visualizeTraj)))

    numAgents = numWolves + numSheeps
    numEntities = numAgents + numBlocks
    wolvesID = list(range(numWolves))
    sheepsID = list(range(numWolves, numAgents))
    blocksID = list(range(numAgents, numEntities))

    wolfSize = 0.075
    sheepSize = 0.05
    blockSize = 0.2
    entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheeps + [
        blockSize
    ] * numBlocks

    wolfMaxSpeed = 1.0
    blockMaxSpeed = None
    sheepMaxSpeedOriginal = 1.3
    sheepMaxSpeed = sheepMaxSpeedOriginal * sheepSpeedMultiplier
    entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [
        sheepMaxSpeed
    ] * numSheeps + [blockMaxSpeed] * numBlocks

    entitiesMovableList = [True] * numAgents + [False] * numBlocks
    massList = [1.0] * numEntities

    isCollision = IsCollision(getPosFromAgentState)
    punishForOutOfBound = PunishForOutOfBound()
    rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList,
                              getPosFromAgentState, isCollision,
                              punishForOutOfBound)

    if individualRewardWolf:
        rewardWolf = RewardWolfIndividual(wolvesID, sheepsID, entitiesSizeList,
                                          isCollision)
    else:
        rewardWolf = RewardWolf(wolvesID, sheepsID, entitiesSizeList,
                                isCollision)

    rewardFunc = lambda state, action, nextState: list(
        rewardWolf(state, action, nextState)) + list(
            rewardSheep(state, action, nextState))
    dirName = os.path.dirname(__file__)

    if hasWalls:

        physicsDynamicsPath = os.path.join(
            dirName, '..', '..', 'environment', 'mujocoEnv',
            'dt={}'.format(dt),
            'hasWalls={}_numBlocks={}_numSheeps={}_numWolves={}timeconst={}dampratio={}.xml'
            .format(hasWalls, numBlocks, numSheeps, numWolves, timeconst,
                    dampratio))

    else:
        physicsDynamicsPath = os.path.join(
            dirName, '..', '..', 'environment', 'mujocoEnv',
            'numBlocks={}_numSheeps={}_numWolves={}timeconst={}dampratio={}.xml'
            .format(numBlocks, numSheeps, numWolves, timeconst, dampratio))

    with open(physicsDynamicsPath) as f:
        xml_string = f.read()
    envXmlDict = xmltodict.parse(xml_string.strip())
    envXml = xmltodict.unparse(envXmlDict)

    physicsModel = mujoco.load_model_from_xml(envXml)
    physicsSimulation = mujoco.MjSim(physicsModel)
    # print(physicsSimulation.model.body_pos)

    qPosInit = [0, 0] * numAgents
    qVelInit = [0, 0] * numAgents
    qVelInitNoise = 0 * hasWalls
    qPosInitNoise = 0.8 * hasWalls
    getBlockRandomPos = lambda: np.random.uniform(-0.7 * hasWalls, +0.7 *
                                                  hasWalls, 2)
    getBlockSpeed = lambda: np.zeros(2)

    numQPos = len(physicsSimulation.data.qpos)
    numQVel = len(physicsSimulation.data.qvel)

    sampleAgentsQPos = lambda: np.asarray(qPosInit) + np.random.uniform(
        low=-qPosInitNoise, high=qPosInitNoise, size=numQPos)
    sampleAgentsQVel = lambda: np.asarray(qVelInit) + np.random.uniform(
        low=-qVelInitNoise, high=qVelInitNoise, size=numQVel)

    minDistance = 0.2 + 2 * blockSize
    isOverlap = IsOverlap(minDistance)

    sampleBlockState = SampleBlockState(numBlocks, getBlockRandomPos,
                                        getBlockSpeed, isOverlap)

    reset = ResetUniformWithoutXPos(physicsSimulation, numAgents, numBlocks,
                                    sampleAgentsQPos, sampleAgentsQVel,
                                    sampleBlockState)

    damping = 2.5
    numSimulationFrames = int(0.1 / dt)
    agentsMaxSpeedList = [wolfMaxSpeed] * numWolves + [sheepMaxSpeed
                                                       ] * numSheeps
    reshapeAction = ReshapeAction()
    isTerminal = lambda state: False
    transit = TransitionFunctionWithoutXPos(physicsSimulation, numAgents,
                                            numSimulationFrames,
                                            damping * dt * numSimulationFrames,
                                            agentsMaxSpeedList,
                                            visualizeMujoco, isTerminal,
                                            reshapeAction)

    maxRunningStepsToSample = 100
    sampleTrajectory = SampleTrajectory(maxRunningStepsToSample, transit,
                                        isTerminal, rewardFunc, reset)

    observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID,
                                              blocksID, getPosFromAgentState,
                                              getVelFromAgentState)
    observe = lambda state: [
        observeOneAgent(agentID)(state) for agentID in range(numAgents)
    ]

    initObsForParams = observe(reset())
    obsShape = [
        initObsForParams[obsID].shape[0]
        for obsID in range(len(initObsForParams))
    ]

    worldDim = 2
    actionDim = worldDim * 2 + 1

    layerWidth = [128, 128]

    # ------------ model ------------------------
    buildMADDPGModels = BuildMADDPGModels(actionDim, numAgents, obsShape)
    modelsList = [
        buildMADDPGModels(layerWidth, agentID) for agentID in range(numAgents)
    ]

    if hasWalls:

        modelFolder = os.path.join(
            dirName, '..', 'trainedModels', 'mujocoMADDPGeavluateWall',
            'dt={}'.format(dt),
            'hasWalls=' + str(hasWalls) + 'numBlocks=' + str(numBlocks) +
            'numSheeps=' + str(numSheeps) + 'numWolves=' + str(numWolves) +
            'timeconst=' + str(timeconst) + 'dampratio=' + str(dampratio) +
            'individualRewardWolf=' + str(individualRewardWolf) +
            'sheepSpeedMultiplier=' + str(sheepSpeedMultiplier))
        individStr = 'individ' if individualRewardWolf else 'shared'
        fileName = "maddpghasWalls={}{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}{}_agent".format(
            hasWalls, numWolves, numSheeps, numBlocks, maxEpisode, maxTimeStep,
            sheepSpeedMultiplier, individStr)

    else:
        modelFolder = os.path.join(
            dirName, '..', 'trainedModels', 'mujocoMADDPG',
            'numBlocks=' + str(numBlocks) + 'numSheeps=' + str(numSheeps) +
            'numWolves=' + str(numWolves) + 'timeconst=' + str(timeconst) +
            'dampratio=' + str(dampratio))
        fileName = "maddpg{}wolves{}sheep{}blocks60000episodes25stepSheepSpeed1.0shared_agent".format(
            numWolves, numSheeps, numBlocks)
    modelPaths = [
        os.path.join(modelFolder, fileName + str(i) + '60000eps')
        for i in range(numAgents)
    ]

    [
        restoreVariables(model, path)
        for model, path in zip(modelsList, modelPaths)
    ]

    actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy)
    policy = lambda allAgentsStates: [
        actOneStepOneModel(model, observe(allAgentsStates))
        for model in modelsList
    ]

    startTime = time.time()
    trajList = []
    for i in range(evalNum):
        np.random.seed(i)
        traj = sampleTrajectory(policy)
        trajList.append(list(traj))
    endTime = time.time()
    print("Time taken {} seconds to generate {} trajectories".format(
        (endTime - startTime), evalNum))

    # saveTraj
    if saveTraj:

        trajectoryFolder = os.path.join(dirName, '..', 'trajectory',
                                        'evluateWall')
        if not os.path.exists(trajectoryFolder):
            os.makedirs(trajectoryFolder)

        trajectorySaveExtension = '.pickle'
        fixedParameters = {'randomSeed': randomSeed, 'evalNum': evalNum}
        generateTrajectorySavePath = GetSavePath(trajectoryFolder,
                                                 trajectorySaveExtension,
                                                 fixedParameters)
        trajectorySavePath = generateTrajectorySavePath(parameterOneCondiiton)

        saveToPickle(trajList, trajectorySavePath)

    # visualize
    if visualizeTraj:
        entitiesColorList = [wolfColor] * numWolves + [
            sheepColor
        ] * numSheeps + [blockColor] * numBlocks
        render = Render(entitiesSizeList, entitiesColorList, numAgents,
                        getPosFromAgentState)
        print(trajList[0][0], '!!!3', trajList[0][1])
        trajToRender = np.concatenate(trajList)
        render(trajToRender)

    return endTime - startTime

Пример #10

Показать файл

Файл: runMADDPGchasingMujocoEnvWithWalls.py Проект: Frostwoods/ChasingWithRopeMujocoEnv

def main():
    debug = 1
    if debug:

        numWolves = 2
        numSheeps = 4
        numBlocks = 2
        hasWalls = 1.0

        dt = 0.02
        maxTimeStep = 25
        sheepSpeedMultiplier = 1.0
        individualRewardWolf = int(False)

        mujocoVisualize = False
        saveAllmodels = True

    else:

        print(sys.argv)
        condition = json.loads(sys.argv[1])
        numWolves = int(condition['numWolves'])
        numSheeps = int(condition['numSheeps'])
        numBlocks = int(condition['numBlocks'])
        hasWalls = float(condition['hasWalls'])

        dt = float(condition['dt'])
        maxTimeStep = int(condition['maxTimeStep'])
        sheepSpeedMultiplier = float(condition['sheepSpeedMultiplier'])
        individualRewardWolf = int(condition['individualRewardWolf'])

        saveAllmodels = True
        mujocoVisualize = False

    print(
        "maddpg: {} wolves, {} sheep, {} blocks, {} episodes with {} steps each eps, sheepSpeed: {}x, wolfIndividualReward: {}, save all models: {}"
        .format(numWolves, numSheeps, numBlocks, maxEpisode,
                maxTimeStep, sheepSpeedMultiplier, individualRewardWolf,
                str(saveAllmodels)))

    dataMainFolder = os.path.join(dirName, '..', 'trainedModels',
                                  'mujocoMADDPG')
    modelFolder = os.path.join(
        dataMainFolder, 'dt={}'.format(dt),
        'hasWalls={}_numBlocks={}_numSheeps={}_numWolves={}_individualRewardWolf={}_sheepSpeedMultiplier={}.xml'
        .format(hasWalls, numBlocks, numSheeps, numWolves,
                individualRewardWolf, sheepSpeedMultiplier))

    if not os.path.exists(modelFolder):
        os.makedirs(modelFolder)

    numAgents = numWolves + numSheeps
    numEntities = numAgents + numBlocks
    wolvesID = list(range(numWolves))
    sheepsID = list(range(numWolves, numAgents))
    blocksID = list(range(numAgents, numEntities))

    wolfSize = 0.075
    sheepSize = 0.05
    blockSize = 0.2
    entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheeps + [
        blockSize
    ] * numBlocks

    isCollision = IsCollision(getPosFromAgentState)
    punishForOutOfBound = lambda state: 0  #PunishForOutOfBound()
    rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList,
                              getPosFromAgentState, isCollision,
                              punishForOutOfBound)
    if individualRewardWolf:
        rewardWolf = RewardWolfIndividual(wolvesID, sheepsID, entitiesSizeList,
                                          isCollision)
    else:
        rewardWolf = RewardWolf(wolvesID, sheepsID, entitiesSizeList,
                                isCollision)
    rewardFunc = lambda state, action, nextState: \
        list(rewardWolf(state, action, nextState)) + list(rewardSheep(state, action, nextState))

    #------------ mujocoEnv ------------------------

    physicsDynamicsPath = os.path.join(
        dirName, '..', '..', 'environment', 'mujocoEnv', 'dt={}'.format(dt),
        'hasWalls={}_numBlocks={}_numSheeps={}_numWolves={}.xml'.format(
            hasWalls, numBlocks, numSheeps, numWolves))

    with open(physicsDynamicsPath) as f:
        xml_string = f.read()
    envXmlDict = xmltodict.parse(xml_string.strip())
    envXml = xmltodict.unparse(envXmlDict)
    physicsModel = mujoco.load_model_from_xml(envXml)
    physicsSimulation = mujoco.MjSim(physicsModel)

    qPosInit = [0, 0] * numAgents
    qVelInit = [0, 0] * numAgents
    qVelInitNoise = 0 * hasWalls
    qPosInitNoise = 0.8 * hasWalls
    getBlockRandomPos = lambda: np.random.uniform(-0.7 * hasWalls, +0.7 *
                                                  hasWalls, 2)
    getBlockSpeed = lambda: np.zeros(2)

    numQPos = len(physicsSimulation.data.qpos)
    numQVel = len(physicsSimulation.data.qvel)

    sampleAgentsQPos = lambda: np.asarray(qPosInit) + np.random.uniform(
        low=-qPosInitNoise, high=qPosInitNoise, size=numQPos)
    sampleAgentsQVel = lambda: np.asarray(qVelInit) + np.random.uniform(
        low=-qVelInitNoise, high=qVelInitNoise, size=numQVel)

    minDistance = 0.2 + 2 * blockSize  #>2*wolfSize+2*blockSize
    isOverlap = IsOverlap(minDistance)
    sampleBlockState = SampleBlockState(numBlocks, getBlockRandomPos,
                                        getBlockSpeed, isOverlap)

    reset = ResetUniformWithoutXPos(physicsSimulation, numAgents, numBlocks,
                                    sampleAgentsQPos, sampleAgentsQVel,
                                    sampleBlockState)

    transitTimePerStep = 0.1
    numSimulationFrames = int(transitTimePerStep / dt)

    isTerminal = lambda state: [False] * numAgents
    reshapeAction = ReshapeAction()
    transit = TransitionFunction(physicsSimulation, numAgents,
                                 numSimulationFrames, mujocoVisualize,
                                 isTerminal, reshapeAction)

    observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID,
                                              blocksID, getPosFromAgentState,
                                              getVelFromAgentState)
    observe = lambda state: [
        observeOneAgent(agentID)(state) for agentID in range(numAgents)
    ]
    initObsForParams = observe(reset())
    obsShape = [
        initObsForParams[obsID].shape[0]
        for obsID in range(len(initObsForParams))
    ]

    worldDim = 2
    actionDim = worldDim * 2 + 1

    layerWidth = [128, 128]

    #------------ models ------------------------

    buildMADDPGModels = BuildMADDPGModels(actionDim, numAgents, obsShape)
    modelsList = [
        buildMADDPGModels(layerWidth, agentID) for agentID in range(numAgents)
    ]

    trainCriticBySASR = TrainCriticBySASR(actByPolicyTargetNoisyForNextState,
                                          learningRateCritic, gamma)
    trainCritic = TrainCritic(trainCriticBySASR)
    trainActorFromSA = TrainActorFromSA(learningRateActor)
    trainActor = TrainActor(trainActorFromSA)

    paramUpdateInterval = 1  #
    updateParameters = UpdateParameters(paramUpdateInterval, tau)
    sampleBatchFromMemory = SampleFromMemory(minibatchSize)

    learnInterval = 100
    learningStartBufferSize = minibatchSize * maxTimeStep
    startLearn = StartLearn(learningStartBufferSize, learnInterval)

    trainMADDPGModels = TrainMADDPGModelsWithBuffer(updateParameters,
                                                    trainActor, trainCritic,
                                                    sampleBatchFromMemory,
                                                    startLearn, modelsList)

    actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy)
    actOneStep = lambda allAgentsStates, runTime: [
        actOneStepOneModel(model, allAgentsStates) for model in modelsList
    ]

    sampleOneStep = SampleOneStep(transit, rewardFunc)
    runDDPGTimeStep = RunTimeStep(actOneStep,
                                  sampleOneStep,
                                  trainMADDPGModels,
                                  observe=observe)

    runEpisode = RunEpisode(reset, runDDPGTimeStep, maxTimeStep, isTerminal)

    getAgentModel = lambda agentId: lambda: trainMADDPGModels.getTrainedModels(
    )[agentId]
    getModelList = [getAgentModel(i) for i in range(numAgents)]
    modelSaveRate = 1000
    individStr = 'individ' if individualRewardWolf else 'shared'
    fileName = "maddpghasWalls={}{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}{}_agent".format(
        hasWalls, numWolves, numSheeps, numBlocks, maxEpisode, maxTimeStep,
        sheepSpeedMultiplier, individStr)

    modelPath = os.path.join(modelFolder, fileName)

    saveModels = [
        SaveModel(modelSaveRate, saveVariables, getTrainedModel,
                  modelPath + str(i), saveAllmodels)
        for i, getTrainedModel in enumerate(getModelList)
    ]

    maddpg = RunAlgorithm(runEpisode, maxEpisode, saveModels, numAgents)
    replayBuffer = getBuffer(bufferSize)
    meanRewardList, trajectory = maddpg(replayBuffer)