def simuliateOneParameter(parameterDict, evalNum, randomSeed, dt):
    mujocoVisualize = False
    demoVisualize = False

    wolfSize = 0.075
    sheepSize = 0.05
    blockSize = 0.2

    wolfColor = np.array([0.85, 0.35, 0.35])
    sheepColor = np.array([0.35, 0.85, 0.35])
    blockColor = np.array([0.25, 0.25, 0.25])

    wolvesID = [0]
    sheepsID = [1]
    blocksID = [2]

    numWolves = len(wolvesID)
    numSheeps = len(sheepsID)
    numBlocks = len(blocksID)

    sheepMaxSpeed = 1.3
    wolfMaxSpeed = 1.0
    blockMaxSpeed = None

    agentsMaxSpeedList = [wolfMaxSpeed] * numWolves + [sheepMaxSpeed
                                                       ] * numSheeps

    numAgents = numWolves + numSheeps
    numEntities = numAgents + numBlocks

    entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheeps + [
        blockSize
    ] * numBlocks
    entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [
        sheepMaxSpeed
    ] * numSheeps + [blockMaxSpeed] * numBlocks
    entitiesMovableList = [True] * numAgents + [False] * numBlocks
    massList = [1.0] * numEntities

    isCollision = IsCollision(getPosFromAgentState)
    rewardWolf = RewardWolf(wolvesID, sheepsID, entitiesSizeList, isCollision)
    punishForOutOfBound = PunishForOutOfBound()
    rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList,
                              getPosFromAgentState, isCollision,
                              punishForOutOfBound)

    rewardFunc = lambda state, action, nextState: \
        list(rewardWolf(state, action, nextState)) + list(rewardSheep(state, action, nextState))

    makePropertyList = MakePropertyList(transferNumberListToStr)

    #changeCollisionReleventParameter
    dmin = parameterDict['dmin']
    dmax = parameterDict['dmax']
    width = parameterDict['width']
    midpoint = parameterDict['midpoint']
    power = parameterDict['power']
    timeconst = parameterDict['timeconst']
    dampratio = parameterDict['dampratio']

    geomIds = [1, 2, 3]
    keyNameList = ['@solimp', '@solref']
    valueList = [[[dmin, dmax, width, midpoint, power], [timeconst, dampratio]]
                 ] * len(geomIds)
    collisionParameter = makePropertyList(geomIds, keyNameList, valueList)

    #changeSize
    # geomIds=[1,2]
    # keyNameList=['@size']
    # valueList=[[[0.075,0.075]],[[0.1,0.1]]]
    # sizeParameter=makePropertyList(geomIds,keyNameList,valueList)

    #load env xml and change some geoms' property
    physicsDynamicsPath = os.path.join(
        dirName, '..', '..', 'environment', 'mujocoEnv',
        'variousCollision_numBlocks=1_numSheeps=1_numWolves=1dt={}.xml'.format(
            dt))

    with open(physicsDynamicsPath) as f:
        xml_string = f.read()
    envXmlDict = xmltodict.parse(xml_string.strip())

    envXmlPropertyDictList = [collisionParameter]
    changeEnvXmlPropertFuntionyList = [changeGeomProperty]
    for propertyDict, changeXmlProperty in zip(
            envXmlPropertyDictList, changeEnvXmlPropertFuntionyList):
        envXmlDict = changeXmlProperty(envXmlDict, propertyDict)

    envXml = xmltodict.unparse(envXmlDict)
    # print(envXmlDict['mujoco']['worldbody']['body'][0])
    physicsModel = mujoco.load_model_from_xml(envXml)
    physicsSimulation = mujoco.MjSim(physicsModel)

    # MDP function
    qPosInit = [0, 0] * numAgents
    qVelInit = [0, 0] * numAgents

    qVelInitNoise = 0
    qPosInitNoise = 1

    # reset=ResetUniformWithoutXPos(physicsSimulation, qPosInit, qVelInit, numAgents, numBlocks,qPosInitNoise, qVelInitNoise)
    # fixReset=ResetFixWithoutXPos(physicsSimulation, qPosInit, qVelInit, numAgents,numBlocks)
    # blocksState=[[0,-0.8,0,0]]
    # reset=lambda :fixReset([-0.5,0.8,-0.5,0,0.5,0.8],[0,0,0,0,0,0],blocksState)

    isTerminal = lambda state: False
    numSimulationFrames = int(0.1 / dt)
    print(numSimulationFrames)
    # dt=0.01
    damping = 2.5
    reshapeAction = lambda action: action
    # transit = TransitionFunctionWithoutXPos(physicsSimulation,numAgents , numSimulationFrames,damping*dt/numSimulationFrames,agentsMaxSpeedList,mujocoVisualize,reshapeAction)

    maxRunningSteps = 10

    # sampleTrajectory = SampleTrajectory(maxRunningSteps, transit, isTerminal, rewardFunc, reset)
    # observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID, blocksID, getPosFromAgentState, getVelFromAgentState)
    # observe = lambda state: [observeOneAgent(agentID)(state) for agentID in range(numAgents)]
    # initObsForParams = observe(reset())
    # obsShape = [initObsForParams[obsID].shape for obsID in range(len(initObsForParams))]

    class ImpulsePolicy(object):
        """docstring for c"""
        def __init__(self, initAction):
            self.initAction = initAction

        def __call__(self, state, timeStep):
            action = [[0, 0], [0, 0]]
            if timeStep == 0:
                action = self.initAction
            return action

    worldDim = 2
    trajList = []
    startTime = time.time()
    for i in range(evalNum):

        np.random.seed(randomSeed + i)
        initSpeedDirection = np.random.uniform(-np.pi / 2, np.pi / 2, 1)[0]
        initSpeed = np.random.uniform(0, 1, 1)[0]
        initActionDirection = np.random.uniform(-np.pi / 2, np.pi / 2, 1)[0]
        initForce = np.random.uniform(0, 5, 1)[0]
        # print(initSpeedDirection,initSpeed,initActionDirection,initForce)

        fixReset = ResetFixWithoutXPos(physicsSimulation, qPosInit, qVelInit,
                                       numAgents, numBlocks)
        blocksState = [[0, 0, 0, 0]]  #posX posY velX velY
        reset = lambda: fixReset([-0.28, 0, 8, 8], [
            initSpeed * np.cos(initSpeedDirection), initSpeed * np.sin(
                initSpeedDirection), 0, 0
        ], blocksState)

        transit = TransitionFunctionWithoutXPos(
            physicsSimulation, numAgents, numSimulationFrames,
            damping * dt * numSimulationFrames, agentsMaxSpeedList,
            mujocoVisualize, isTerminal, reshapeAction)

        initAction = [[
            initForce * np.cos(initActionDirection),
            initForce * np.sin(initActionDirection)
        ], [0, 0]]
        impulsePolicy = ImpulsePolicy(initAction)

        sampleTrajectory = SampleTrajectory(maxRunningSteps, transit,
                                            isTerminal, rewardFunc, reset)

        traj = sampleTrajectory(impulsePolicy)
        # print('traj',traj[0])
        trajList.append(traj)

    # saveTraj
    # print(trajList)
    saveTraj = True
    if saveTraj:
        # trajSavePath = os.path.join(dirName,'traj', 'evaluateCollision', 'CollisionMoveTransitDamplingCylinder.pickle')
        trajectorySaveExtension = '.pickle'
        fixedParameters = {
            'isMujoco': 1,
            'isCylinder': 1,
            'randomSeed': randomSeed,
            'evalNum': evalNum
        }
        trajectoriesSaveDirectory = trajSavePath = os.path.join(
            dirName, '..', 'trajectory', 'variousCollsiondt={}'.format(dt))
        generateTrajectorySavePath = GetSavePath(trajectoriesSaveDirectory,
                                                 trajectorySaveExtension,
                                                 fixedParameters)
        trajectorySavePath = generateTrajectorySavePath(parameterDict)
        saveToPickle(trajList, trajectorySavePath)

    # visualize

    if demoVisualize:
        entitiesColorList = [wolfColor] * numWolves + [
            sheepColor
        ] * numSheeps + [blockColor] * numBlocks
        render = Render(entitiesSizeList, entitiesColorList, numAgents,
                        getPosFromAgentState)
        trajToRender = np.concatenate(trajList)
        render(trajToRender)
    endTime = time.time()
    print("Time taken {} seconds to generate {} trajectories".format(
        (endTime - startTime), evalNum))
def simuliateOneParameter(parameterDict):


    wolfSize = 0.075
    sheepSize = 0.05
    blockSize = 0.2


    wolfColor = np.array([0.85, 0.35, 0.35])
    sheepColor = np.array([0.35, 0.85, 0.35])
    blockColor = np.array([0.25, 0.25, 0.25])

    wolvesID = [0,1]
    sheepsID = [2]
    blocksID = [3]

    numWolves = len(wolvesID)
    numSheeps = len(sheepsID)
    numBlocks = len(blocksID)

    sheepMaxSpeed = 1.3
    wolfMaxSpeed =1.0
    blockMaxSpeed = None



    agentsMaxSpeedList = [wolfMaxSpeed]* numWolves + [sheepMaxSpeed] * numSheeps

    numAgents = numWolves + numSheeps
    numEntities = numAgents + numBlocks

    entitiesSizeList = [wolfSize]* numWolves + [sheepSize] * numSheeps + [blockSize]* numBlocks
    entityMaxSpeedList = [wolfMaxSpeed]* numWolves + [sheepMaxSpeed] * numSheeps + [blockMaxSpeed]* numBlocks
    entitiesMovableList = [True]* numAgents + [False] * numBlocks
    massList = [1.0] * numEntities

    isCollision = IsCollision(getPosFromAgentState)
    rewardWolf = RewardWolf(wolvesID, sheepsID, entitiesSizeList, isCollision)
    punishForOutOfBound = PunishForOutOfBound()
    rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBound)

    rewardFunc = lambda state, action, nextState: \
        list(rewardWolf(state, action, nextState)) + list(rewardSheep(state, action, nextState))

    makePropertyList=MakePropertyList(transferNumberListToStr)

    #changeCollisionReleventParameter
    dmin=parameterDict['dmin']
    dmax=parameterDict['dmax']
    width=parameterDict['width']
    midpoint=parameterDict['midpoint']
    power=parameterDict['power']
    timeconst=parameterDict['timeconst']
    dampratio=parameterDict['dampratio']

    geomIds=[1,2]
    keyNameList=['@solimp','@solref']
    valueList=[[[dmin,dmax,width,midpoint,power],[timeconst,dampratio]]]*len(geomIds)
    collisionParameter=makePropertyList(geomIds,keyNameList,valueList)

#changeSize
    # geomIds=[1,2]
    # keyNameList=['@size']
    # valueList=[[[0.075,0.075]],[[0.1,0.1]]]
    # sizeParameter=makePropertyList(geomIds,keyNameList,valueList)

#load env xml and change some geoms' property
    # physicsDynamicsPath=os.path.join(dirName,'multiAgentcollision.xml')
    physicsDynamicsPath=os.path.join(dirName,'..','..','environment','mujocoEnv','multiAgentcollision.xml')
    with open(physicsDynamicsPath) as f:
        xml_string = f.read()
    envXmlDict = xmltodict.parse(xml_string.strip())
    print(envXmlDict)
    envXmlPropertyDictList=[collisionParameter]
    changeEnvXmlPropertFuntionyList=[changeGeomProperty]
    for propertyDict,changeXmlProperty in zip(envXmlPropertyDictList,changeEnvXmlPropertFuntionyList):
        envXmlDict=changeXmlProperty(envXmlDict,propertyDict)

    envXml=xmltodict.unparse(envXmlDict)

    physicsModel = mujoco.load_model_from_xml(envXml)
    physicsSimulation = mujoco.MjSim(physicsModel)


    # MDP function
    qPosInit = [0, 0]*numAgents
    qVelInit = [0, 0]*numAgents

    qVelInitNoise = 0
    qPosInitNoise = 1

    # reset=ResetUniformWithoutXPos(physicsSimulation, qPosInit, qVelInit, numAgents, numBlocks,qPosInitNoise, qVelInitNoise)

    fixReset=ResetFixWithoutXPos(physicsSimulation, qPosInit, qVelInit, numAgents,numBlocks)
    blocksState=[[0,0,0,0]]
    reset=lambda :fixReset([-0.5,0.8,-0.5,0,0.5,0.8],[0,0,0,0,0,0],blocksState)


    isTerminal = lambda state: False
    numSimulationFrames = 1
    visualize=False
    # physicsViewer=None
    dt=0.1
    damping=2.5
    transit = TransitionFunctionWithoutXPos(physicsSimulation,numAgents , numSimulationFrames,damping*dt/numSimulationFrames,agentsMaxSpeedList,visualize,isTerminal)


    maxRunningSteps = 100
    sampleTrajectory = SampleTrajectory(maxRunningSteps, transit, isTerminal, rewardFunc, reset)
    observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID, blocksID, getPosFromAgentState, getVelFromAgentState)
    observe = lambda state: [observeOneAgent(agentID)(state) for agentID in range(numAgents)]
    initObsForParams = observe(reset())
    obsShape = [initObsForParams[obsID].shape for obsID in range(len(initObsForParams))]

    worldDim = 2
    evalNum=1

    trajectorySaveExtension = '.pickle'
    fixedParameters = {'isMujoco': 1,'isCylinder':1,'evalNum':evalNum}
    trajectoriesSaveDirectory=trajSavePath = os.path.join(dirName,'..','trajectory')
    generateTrajectorySavePath = GetSavePath(trajectoriesSaveDirectory, trajectorySaveExtension, fixedParameters)
    trajectorySavePath = generateTrajectorySavePath(parameterDict)


    if not os.path.isfile(trajectorySavePath):
        trajList =list()

        for i in range(evalNum):

            # policy =lambda state: [[-3,0] for agent in range(numAgents)]
            np.random.seed(i)
            # policy =lambda state: [np.random.uniform(-5,5,2) for agent in range(numAgents)]sss
            # policy =lambda state: [[0,1] for agent in range(numAgents)]
            policy =lambda state: [[1,0] ]
            # policy =lambda state: [[np.random.uniform(0,1,1),0] ,[np.random.uniform(-1,0,1),0] ]
            traj = sampleTrajectory(policy)
            # print(i,'traj',[state[1] for state in traj[:2]])
            # print(traj)
            trajList.append( traj)

        # saveTraj
        saveTraj = True
        if saveTraj:
            # trajSavePath = os.path.join(dirName,'traj', 'evaluateCollision', 'CollisionMoveTransitDamplingCylinder.pickle')

            saveToPickle(trajList, trajectorySavePath)

        # visualize

        # physicsViewer.render()
        visualize = True
        if visualize:
            entitiesColorList = [wolfColor] * numWolves + [sheepColor] * numSheeps + [blockColor] * numBlocks
            render = Render(entitiesSizeList, entitiesColorList, numAgents, getPosFromAgentState)
            render(trajList)
def generateSingleCondition(condition):
    debug = 0
    if debug:


        damping=2.0
        frictionloss=0.0
        masterForce=1.0

        numWolves = 1
        numSheeps = 1
        numMasters = 1
        maxTimeStep = 25

        saveTraj=False
        saveImage=True
        visualizeMujoco=False
        visualizeTraj = True
        makeVideo=True
    else:

        # print(sys.argv)
        # condition = json.loads(sys.argv[1])
        damping = float(condition['damping'])
        frictionloss = float(condition['frictionloss'])
        masterForce = float(condition['masterForce'])

        numWolves = 1
        numSheeps = 1
        numMasters = 1
        maxTimeStep = 25

        saveTraj=False
        saveImage=True
        visualizeMujoco=False
        visualizeTraj = True
        makeVideo=False

    print("maddpg: , saveTraj: {}, visualize: {},damping; {},frictionloss: {}".format( str(saveTraj), str(visualizeMujoco),damping,frictionloss))


    numAgents = numWolves + numSheeps+numMasters
    numEntities = numAgents + numMasters
    wolvesID = [0]
    sheepsID = [1]
    masterID = [2]

    wolfSize = 0.075
    sheepSize = 0.05
    masterSize = 0.075
    entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheeps + [masterSize] * numMasters

    wolfMaxSpeed = 1.0
    blockMaxSpeed = None


    entitiesMovableList = [True] * numAgents + [False] * numMasters
    massList = [1.0] * numEntities

    isCollision = IsCollision(getPosFromAgentState)
    punishForOutOfBound = PunishForOutOfBound()
    rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState, isCollision,punishForOutOfBound)


    rewardWolf = RewardWolf(wolvesID, sheepsID, entitiesSizeList, isCollision)
    rewardMaster= lambda state, action, nextState: [-reward  for reward in rewardWolf(state, action, nextState)]
    rewardFunc = lambda state, action, nextState: \
        list(rewardWolf(state, action, nextState)) + list(rewardSheep(state, action, nextState))+list(rewardMaster(state, action, nextState))

    dirName = os.path.dirname(__file__)
    # physicsDynamicsPath=os.path.join(dirName,'..','..','environment','mujocoEnv','rope','leased.xml')

    makePropertyList=MakePropertyList(transferNumberListToStr)

    geomIds=[1,2,3]
    keyNameList=[0,1]
    valueList=[[damping,damping]]*len(geomIds)
    dampngParameter=makePropertyList(geomIds,keyNameList,valueList)

    changeJointDampingProperty=lambda envDict,geomPropertyDict:changeJointProperty(envDict,geomPropertyDict,'@damping')

    geomIds=[1,2,3]
    keyNameList=[0,1]
    valueList=[[frictionloss,frictionloss]]*len(geomIds)
    frictionlossParameter=makePropertyList(geomIds,keyNameList,valueList)
    changeJointFrictionlossProperty=lambda envDict,geomPropertyDict:changeJointProperty(envDict,geomPropertyDict,'@frictionloss')


    physicsDynamicsPath=os.path.join(dirName,'..','..','environment','mujocoEnv','rope','leasedNew.xml')
    # physicsDynamicsPath=os.path.join(dirName,'..','..','environment','mujocoEnv','rope','leased.xml')


    with open(physicsDynamicsPath) as f:
        xml_string = f.read()
    envXmlDict = xmltodict.parse(xml_string.strip())




    envXmlDict = xmltodict.parse(xml_string.strip())
    envXmlPropertyDictList=[dampngParameter,frictionlossParameter]
    changeEnvXmlPropertFuntionyList=[changeJointDampingProperty,changeJointFrictionlossProperty]
    for propertyDict,changeXmlProperty in zip(envXmlPropertyDictList,changeEnvXmlPropertFuntionyList):
        envXmlDict=changeXmlProperty(envXmlDict,propertyDict)





    envXml=xmltodict.unparse(envXmlDict)

    physicsModel = mujoco.load_model_from_xml(envXml)
    physicsSimulation = mujoco.MjSim(physicsModel)
    # print(physicsSimulation.model.body_pos)

    # print(dir(physicsSimulation.model))
    # print(dir(physicsSimulation.data),physicsSimulation.dataphysicsSimulation.data)

    # print(physicsSimulation.data.qpos,dir(physicsSimulation.data.qpos))
    # print(physicsSimulation.data.qpos,dir(physicsSimulation.data.qpos))
    qPosInit = (0, ) * 24
    qVelInit = (0, ) * 24
    qPosInitNoise = 0.4
    qVelInitNoise = 0
    numAgent = 2
    tiedAgentId = [0, 2]
    ropePartIndex = list(range(3, 12))
    maxRopePartLength = 0.06
    reset = ResetUniformWithoutXPosForLeashed(physicsSimulation, qPosInit, qVelInit, numAgent, tiedAgentId,ropePartIndex, maxRopePartLength, qPosInitNoise, qVelInitNoise)
    numSimulationFrames=10
    isTerminal= lambda state: False
    reshapeActionList = [ReshapeAction(5),ReshapeAction(5),ReshapeAction(masterForce)]
    transit=TransitionFunctionWithoutXPos(physicsSimulation, numSimulationFrames, visualizeMujoco,isTerminal, reshapeActionList)

    # damping=2.5
    # numSimulationFrames =int(0.1/dt)
    # agentsMaxSpeedList = [wolfMaxSpeed]* numWolves + [sheepMaxSpeed] * numSheeps
    # reshapeAction = ReshapeAction()
    # isTerminal = lambda state: False
    # transit = TransitionFunctionWithoutXPos(physicsSimulation,numAgents , numSimulationFrames,damping*dt*numSimulationFrames,agentsMaxSpeedList,visualizeMujoco,isTerminal,reshapeAction)


    maxRunningStepsToSample = 100
    sampleTrajectory = SampleTrajectory(maxRunningStepsToSample, transit, isTerminal, rewardFunc, reset)

    observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID, masterID, getPosFromAgentState, getVelFromAgentState)
    observe = lambda state: [observeOneAgent(agentID)(state) for agentID in range(numAgents)]

    initObsForParams = observe(reset())
    obsShape = [initObsForParams[obsID].shape[0] for obsID in range(len(initObsForParams))]

    worldDim = 2
    actionDim = worldDim * 2 + 1

    layerWidth = [128, 128]

    # ------------ model ------------------------
    buildMADDPGModels = BuildMADDPGModels(actionDim, numAgents, obsShape)
    modelsList = [buildMADDPGModels(layerWidth, agentID) for agentID in range(numAgents)]


    # individStr = 'individ' if individualRewardWolf else 'shared'
    # fileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}{}_agent".format(
    #     numWolves, numSheeps, numMasters, maxEpisode, maxTimeStep, sheepSpeedMultiplier, individStr)


    # wolvesModelPaths = [os.path.join(dirName, '..', 'trainedModels', '3wolvesMaddpg', fileName + str(i) + '60000eps') for i in wolvesID]
    # [restoreVariables(model, path) for model, path in zip(wolvesModel, wolvesModelPaths)]
    #
    # actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy)
    # policy = lambda allAgentsStates: [actOneStepOneModel(model, observe(allAgentsStates)) for model in modelsList]

    # modelPaths = [os.path.join(dirName, '..', 'trainedModels', '3wolvesMaddpg_ExpEpsLengthAndSheepSpeed', fileName + str(i)) for i in
    #               range(numAgents)]
    # modelFolder = os.path.join(dirName, '..', 'trainedModels', 'mujocoMADDPG')
    # modelFolder = os.path.join(dirName, '..', 'trainedModels', 'mujocoMADDPG','timeconst='+str(timeconst)+'dampratio='+str(dampratio))

    modelFolder = os.path.join(dirName, '..', 'trainedModels', 'mujocoMADDPGLeasedFixedEnv2','damping={}_frictionloss={}_masterForce={}'.format(damping,frictionloss,masterForce))
    fileName = "maddpg{}episodes{}step_agent".format(maxEpisode, maxTimeStep)

    modelPaths = [os.path.join(modelFolder,  fileName + str(i) + '60000eps') for i in range(numAgents)]

    [restoreVariables(model, path) for model, path in zip(modelsList, modelPaths)]

    actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy)
    policy = lambda allAgentsStates: [actOneStepOneModel(model, observe(allAgentsStates)) for model in modelsList]


    trajList = []
    numTrajToSample = 5
    for i in range(numTrajToSample):
        np.random.seed(i)
        traj = sampleTrajectory(policy)
        trajList.append(list(traj))

    # saveTraj
    if saveTraj:
        trajFileName = "maddpg{}wolves{}sheep{}blocks{}eps{}stepSheepSpeed{}{}Traj".format(numWolves, numSheeps, numMasters, maxEpisode, maxTimeStep, sheepSpeedMultiplier, individStr)
        trajSavePath = os.path.join(dirName, '..', 'trajectory', trajFileName)
        saveToPickle(trajList, trajSavePath)


    # visualize
    if visualizeTraj:
        demoFolder = os.path.join(dirName, '..', 'demos', 'mujocoMADDPGLeasedFixedEnv2','damping={}_frictionloss={}_masterForce={}'.format(damping,frictionloss,masterForce))

        if not os.path.exists(demoFolder):
            os.makedirs(demoFolder)
        entitiesColorList = [wolfColor] * numWolves + [sheepColor] * numSheeps + [masterColor] * numMasters
        render = Render(entitiesSizeList, entitiesColorList, numAgents,demoFolder,saveImage, getPosFromAgentState)
        # print(trajList[0][0],'!!!3',trajList[0][1])
        trajToRender = np.concatenate(trajList)
        render(trajToRender)
def simuliateOneParameter(parameterDict):


    wolfSize = 0.075
    sheepSize = 0.05
    blockSize = 0.2


    wolfColor = np.array([0.85, 0.35, 0.35])
    sheepColor = np.array([0.35, 0.85, 0.35])
    blockColor = np.array([0.25, 0.25, 0.25])

    wolvesID = [0,1]
    sheepsID = [2]
    blocksID = [3]

    numWolves = len(wolvesID)
    numSheeps = len(sheepsID)
    numBlocks = len(blocksID)

    sheepMaxSpeed = 1.3
    wolfMaxSpeed =1.0
    blockMaxSpeed = None



    agentsMaxSpeedList = [wolfMaxSpeed]* numWolves + [sheepMaxSpeed] * numSheeps

    numAgents = numWolves + numSheeps
    numEntities = numAgents + numBlocks

    entitiesSizeList = [wolfSize]* numWolves + [sheepSize] * numSheeps + [blockSize]* numBlocks
    entityMaxSpeedList = [wolfMaxSpeed]* numWolves + [sheepMaxSpeed] * numSheeps + [blockMaxSpeed]* numBlocks
    entitiesMovableList = [True]* numAgents + [False] * numBlocks
    massList = [1.0] * numEntities

    isCollision = IsCollision(getPosFromAgentState)
    rewardWolf = RewardWolf(wolvesID, sheepsID, entitiesSizeList, isCollision)
    punishForOutOfBound = PunishForOutOfBound()
    rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBound)

    rewardFunc = lambda state, action, nextState: \
        list(rewardWolf(state, action, nextState)) + list(rewardSheep(state, action, nextState))

    makePropertyList=MakePropertyList(transferNumberListToStr)

    #changeCollisionReleventParameter
    dmin=parameterDict['dmin']
    dmax=parameterDict['dmax']
    width=parameterDict['width']
    midpoint=parameterDict['midpoint']
    power=parameterDict['power']
    timeconst=parameterDict['timeconst']
    dampratio=parameterDict['dampratio']

    geomIds=[1,2,3,4]
    keyNameList=['@solimp','@solref']
    valueList=[[[dmin,dmax,width,midpoint,power],[timeconst,dampratio]]]*len(geomIds)
    collisionParameter=makePropertyList(geomIds,keyNameList,valueList)

#changeSize
    # geomIds=[1,2]
    # keyNameList=['@size']
    # valueList=[[[0.075,0.075]],[[0.1,0.1]]]
    # sizeParameter=makePropertyList(geomIds,keyNameList,valueList)

#load env xml and change some geoms' property
    physicsDynamicsPath=os.path.join(dirName,'..','..','environment','mujocoEnv','rope','1leased.xml')

    with open(physicsDynamicsPath) as f:
        xml_string = f.read()
    envXmlDict = xmltodict.parse(xml_string.strip())

    # envXmlPropertyDictList=[collisionParameter]
    # changeEnvXmlPropertFuntionyList=[changeGeomProperty]
    # for propertyDict,changeXmlProperty in zip(envXmlPropertyDictList,changeEnvXmlPropertFuntionyList):
    #     envXmlDict=changeXmlProperty(envXmlDict,propertyDict)

    envXml=xmltodict.unparse(envXmlDict)
    # print(envXmlDict['mujoco']['worldbody']['body'][0])
    physicsModel = mujoco.load_model_from_xml(envXml)
    physicsSimulation = mujoco.MjSim(physicsModel)


    # MDP function
    qPosInit = [0, 0]*numAgents
    qVelInit = [0, 0]*numAgents

    qVelInitNoise = 0
    qPosInitNoise = 1

    # reset=ResetUniformWithoutXPos(physicsSimulation, qPosInit, qVelInit, numAgents, numBlocks,qPosInitNoise, qVelInitNoise)

    fixReset=ResetFixWithoutXPos(physicsSimulation, qPosInit, qVelInit, numAgents,numBlocks)
    blocksState=[[0,-0.8,0,0]]
    reset=lambda :fixReset([-0.5,0.8,-0.5,0,0.5,0.8],[0,0,0,0,0,0],blocksState)


    isTerminal = lambda state: False
    numSimulationFrames = 10
    visualize=True
    # physicsViewer=None
    dt=0.01
    damping=2.5
    reshapeAction=lambda action:action
    physicsViewer = mujoco.MjViewer(physicsSimulation)

    qPosInit = (0, ) * 24
    qVelInit = (0, ) * 24
    qPosInitNoise = 6
    qVelInitNoise = 4
    numAgent = 3
    tiedAgentId = [1, 2]
    ropePartIndex = list(range(3, 12))
    maxRopePartLength = 0.6
    reset = ResetUniformForLeashed(physicsSimulation, qPosInit, qVelInit, numAgent, tiedAgentId, \
            ropePartIndex, maxRopePartLength, qPosInitNoise, qVelInitNoise)
    reset()
    for i in range(6000):
        physicsViewer.render()
Exemplo n.º 5
0
def main():
    debug = 0
    if debug:
        numWolves = 3
        numSheeps = 1
        numBlocks = 2
        saveAllmodels = False
        maxTimeStep = 25
        sheepSpeedMultiplier = 1
        sampleMethod = '5'
        learningRateSheepCritic = 0.005
        learningRateSheepActor = 0.005

    else:
        print(sys.argv)
        condition = json.loads(sys.argv[1])
        numWolves = 3
        numSheeps = 1
        numBlocks = 2
        saveAllmodels = False
        maxTimeStep = 25
        sheepSpeedMultiplier = 1
        sampleMethod = condition['sampleMethod']
        learningRateSheepCritic = condition['sheepLr']
        learningRateSheepActor = condition['sheepLr']

    print(
        "maddpg: {} wolves, {} sheep, {} blocks, {} episodes with {} steps each eps, sheepSpeed: {}x,  sampleMethod: {}"
        .format(numWolves, numSheeps, numBlocks, maxEpisode, maxTimeStep,
                sheepSpeedMultiplier, str(sampleMethod)))

    numAgents = numWolves + numSheeps
    numEntities = numAgents + numBlocks
    wolvesID = list(range(numWolves))
    sheepsID = list(range(numWolves, numAgents))
    blocksID = list(range(numAgents, numEntities))

    wolfSize = 0.075
    sheepSize = 0.05
    blockSize = 0.2
    entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheeps + [
        blockSize
    ] * numBlocks

    wolfMaxSpeed = 1.0
    blockMaxSpeed = None
    sheepMaxSpeedOriginal = 1.3
    sheepMaxSpeed = sheepMaxSpeedOriginal * sheepSpeedMultiplier

    entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [
        sheepMaxSpeed
    ] * numSheeps + [blockMaxSpeed] * numBlocks
    entitiesMovableList = [True] * numAgents + [False] * numBlocks
    massList = [1.0] * numEntities

    isCollision = IsCollision(getPosFromAgentState)
    punishForOutOfBound = PunishForOutOfBound()
    rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList,
                              getPosFromAgentState, isCollision,
                              punishForOutOfBound)

    rewardWolfIndivid = RewardWolfIndividual(wolvesID, sheepsID,
                                             entitiesSizeList, isCollision)
    rewardWolfShared = RewardWolf(wolvesID, sheepsID, entitiesSizeList,
                                  isCollision)

    rewardFuncIndividWolf = lambda state, action, nextState: \
        list(rewardWolfIndivid(state, action, nextState)) + list(rewardSheep(state, action, nextState))
    rewardFuncSharedWolf = lambda state, action, nextState: \
        list(rewardWolfShared(state, action, nextState)) + list(rewardSheep(state, action, nextState))

    reset = ResetMultiAgentChasing(numAgents, numBlocks)
    observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID,
                                              blocksID, getPosFromAgentState,
                                              getVelFromAgentState)
    observe = lambda state: [
        observeOneAgent(agentID)(state) for agentID in range(numAgents)
    ]

    reshapeAction = ReshapeAction()
    getCollisionForce = GetCollisionForce()
    applyActionForce = ApplyActionForce(wolvesID, sheepsID,
                                        entitiesMovableList)
    applyEnvironForce = ApplyEnvironForce(numEntities, entitiesMovableList,
                                          entitiesSizeList, getCollisionForce,
                                          getPosFromAgentState)
    integrateState = IntegrateState(numEntities, entitiesMovableList, massList,
                                    entityMaxSpeedList, getVelFromAgentState,
                                    getPosFromAgentState)
    transit = TransitMultiAgentChasing(numEntities, reshapeAction,
                                       applyActionForce, applyEnvironForce,
                                       integrateState)

    isTerminal = lambda state: [False] * numAgents
    initObsForParams = observe(reset())
    obsShape = [
        initObsForParams[obsID].shape[0]
        for obsID in range(len(initObsForParams))
    ]

    worldDim = 2
    actionDim = worldDim * 2 + 1

    layerWidth = [128, 128]

    #------------ models ------------------------

    buildMADDPGModels = BuildMADDPGModels(actionDim, numAgents, obsShape)
    modelsListShared = [
        buildMADDPGModels(layerWidth, agentID) for agentID in range(numAgents)
    ]
    sheepModel = [modelsListShared[sheepID] for sheepID in sheepsID]
    modelsListIndivid = [
        buildMADDPGModels(layerWidth, agentID) for agentID in wolvesID
    ] + sheepModel

    trainCriticBySASRWolf = TrainCriticBySASR(
        actByPolicyTargetNoisyForNextState, learningRateWolfCritic, gamma)
    trainCriticWolf = TrainCritic(trainCriticBySASRWolf)
    trainCriticBySASRSheep = TrainCriticBySASR(
        actByPolicyTargetNoisyForNextState, learningRateSheepCritic, gamma)
    trainCriticSheep = TrainCritic(trainCriticBySASRSheep)

    trainActorFromSAWolf = TrainActorFromSA(learningRateWolfActor)
    trainActorWolf = TrainActor(trainActorFromSAWolf)

    trainActorFromSASheep = TrainActorFromSA(learningRateSheepActor)
    trainActorSheep = TrainActor(trainActorFromSASheep)

    trainActorList = [trainActorWolf] * numWolves + [trainActorSheep
                                                     ] * numSheeps
    trainCriticList = [trainCriticWolf] * numWolves + [trainCriticSheep
                                                       ] * numSheeps

    paramUpdateInterval = 1  #
    updateParameters = UpdateParameters(paramUpdateInterval, tau)
    sampleBatchFromMemory = SampleFromMemory(minibatchSize)

    learnInterval = 100
    learningStartBufferSize = minibatchSize * maxTimeStep
    startLearn = StartLearn(learningStartBufferSize, learnInterval)

    trainMADDPGModelsIndivid = TrainMADDPGModelsWithIterSheep(
        updateParameters, trainActorList, trainCriticList,
        sampleBatchFromMemory, startLearn, modelsListIndivid)
    trainMADDPGModelsShared = TrainMADDPGModelsWithIterSheep(
        updateParameters, trainActorList, trainCriticList,
        sampleBatchFromMemory, startLearn, modelsListShared)

    actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy)
    actOneStepIndivid = lambda allAgentsStates, runTime: [
        actOneStepOneModel(model, allAgentsStates)
        for model in modelsListIndivid
    ]
    actOneStepShared = lambda allAgentsStates, runTime: [
        actOneStepOneModel(model, allAgentsStates)
        for model in modelsListShared
    ]

    sampleOneStepIndivid = SampleOneStep(transit, rewardFuncIndividWolf)
    sampleOneStepShared = SampleOneStep(transit, rewardFuncSharedWolf)

    runDDPGTimeStepIndivid = RunTimeStep(actOneStepIndivid,
                                         sampleOneStepIndivid,
                                         trainMADDPGModelsIndivid,
                                         observe=observe)
    runDDPGTimeStepShared = RunTimeStep(actOneStepShared,
                                        sampleOneStepShared,
                                        trainMADDPGModelsShared,
                                        observe=observe)

    runEpisodeIndivid = RunEpisode(reset, runDDPGTimeStepIndivid, maxTimeStep,
                                   isTerminal)
    runEpisodeShared = RunEpisode(reset, runDDPGTimeStepShared, maxTimeStep,
                                  isTerminal)

    getAgentModelIndivid = lambda agentId: lambda: trainMADDPGModelsIndivid.getTrainedModels(
    )[agentId]
    getModelListIndivid = [getAgentModelIndivid(i) for i in range(numAgents)]
    modelSaveRate = 1000
    individStr = 'individ'
    fileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}Lr{}SampleMethod{}{}_agent".format(
        numWolves, numSheeps, numBlocks, maxEpisode, maxTimeStep,
        sheepSpeedMultiplier, learningRateSheepActor, sampleMethod, individStr)
    modelPath = os.path.join(dirName, '..', 'trainedModels',
                             'IterTrainSheep_evalSheeplrAndSampleMethod',
                             fileName)
    saveModelsIndivid = [
        SaveModel(modelSaveRate, saveVariables, getTrainedModel,
                  modelPath + str(i), saveAllmodels)
        for i, getTrainedModel in enumerate(getModelListIndivid)
    ]

    getAgentModelShared = lambda agentId: lambda: trainMADDPGModelsShared.getTrainedModels(
    )[agentId]
    getModelListShared = [getAgentModelShared(i) for i in range(numAgents)]
    individStr = 'shared'
    fileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}Lr{}SampleMethod{}{}_agent".format(
        numWolves, numSheeps, numBlocks, maxEpisode, maxTimeStep,
        sheepSpeedMultiplier, learningRateSheepActor, sampleMethod, individStr)
    modelPath = os.path.join(dirName, '..', 'trainedModels',
                             'IterTrainSheep_evalSheeplrAndSampleMethod',
                             fileName)
    saveModelsShared = [
        SaveModel(modelSaveRate, saveVariables, getTrainedModel,
                  modelPath + str(i), saveAllmodels)
        for i, getTrainedModel in enumerate(getModelListShared)
    ]

    maddpgIterSheep = RunAlgorithmWithIterSheep(runEpisodeIndivid,
                                                runEpisodeShared, maxEpisode,
                                                saveModelsIndivid,
                                                saveModelsShared, sampleMethod,
                                                numAgents)

    replayBufferIndivid = getBuffer(bufferSize)
    replayBufferShared = getBuffer(bufferSize)

    meanRewardList, trajectory = maddpgIterSheep(replayBufferShared,
                                                 replayBufferIndivid)
def main():
    debug = 1
    if debug:
        numWolves = 2
        numSheeps = 1
        numBlocks = 1
        saveAllmodels = True
        maxTimeStep = 25
        sheepSpeedMultiplier = 1
        individualRewardWolf = int(False)

    else:
        print(sys.argv)
        condition = json.loads(sys.argv[1])
        numWolves = int(condition['numWolves'])
        numSheeps = int(condition['numSheeps'])
        numBlocks = int(condition['numBlocks'])

        maxTimeStep = int(condition['maxTimeStep'])
        sheepSpeedMultiplier = float(condition['sheepSpeedMultiplier'])
        individualRewardWolf = int(condition['individualRewardWolf'])

        saveAllmodels = False

    print("maddpg: {} wolves, {} sheep, {} blocks, {} episodes with {} steps each eps, sheepSpeed: {}x, wolfIndividualReward: {}, save all models: {}".
          format(numWolves, numSheeps, numBlocks, maxEpisode, maxTimeStep, sheepSpeedMultiplier, individualRewardWolf, str(saveAllmodels)))


    numAgents = numWolves + numSheeps
    numEntities = numAgents + numBlocks
    wolvesID = list(range(numWolves))
    sheepsID = list(range(numWolves, numAgents))
    blocksID = list(range(numAgents, numEntities))

    wolfSize = 0.075
    sheepSize = 0.05
    blockSize = 0.2
    entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheeps + [blockSize] * numBlocks

    wolfMaxSpeed = 1.0
    blockMaxSpeed = None
    sheepMaxSpeedOriginal = 1.3
    sheepMaxSpeed = sheepMaxSpeedOriginal * sheepSpeedMultiplier

    entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [sheepMaxSpeed] * numSheeps + [blockMaxSpeed] * numBlocks
    entitiesMovableList = [True] * numAgents + [False] * numBlocks
    massList = [1.0] * numEntities

    isCollision = IsCollision(getPosFromAgentState)
    punishForOutOfBound = PunishForOutOfBound()
    rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState, isCollision,
                              punishForOutOfBound)

    if individualRewardWolf:
        rewardWolf = RewardWolfIndividual(wolvesID, sheepsID, entitiesSizeList, isCollision)
    else:
        rewardWolf = RewardWolf(wolvesID, sheepsID, entitiesSizeList, isCollision)

    rewardFunc = lambda state, action, nextState: \
        list(rewardWolf(state, action, nextState)) + list(rewardSheep(state, action, nextState))

    reset = ResetMultiAgentChasing(numAgents, numBlocks)
    observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID, blocksID, getPosFromAgentState,
                                              getVelFromAgentState)
    observe = lambda state: [observeOneAgent(agentID)(state) for agentID in range(numAgents)]

    reshapeAction = ReshapeAction()
    getCollisionForce = GetCollisionForce()
    applyActionForce = ApplyActionForce(wolvesID, sheepsID, entitiesMovableList)
    applyEnvironForce = ApplyEnvironForce(numEntities, entitiesMovableList, entitiesSizeList,getCollisionForce, getPosFromAgentState)
    integrateState = IntegrateState(numEntities, entitiesMovableList, massList,entityMaxSpeedList, getVelFromAgentState, getPosFromAgentState)
    transit = TransitMultiAgentChasing(numEntities, reshapeAction, applyActionForce, applyEnvironForce, integrateState)

    isTerminal = lambda state: [False]* numAgents
    initObsForParams = observe(reset())
    obsShape = [initObsForParams[obsID].shape[0] for obsID in range(len(initObsForParams))]

    worldDim = 2
    actionDim = worldDim * 2 + 1

    layerWidth = [128, 128]

#------------ models ------------------------

    buildMADDPGModels = BuildMADDPGModels(actionDim, numAgents, obsShape)
    modelsList = [buildMADDPGModels(layerWidth, agentID) for agentID in range(numAgents)]

    trainCriticBySASR = TrainCriticBySASR(actByPolicyTargetNoisyForNextState, learningRateCritic, gamma)
    trainCritic = TrainCritic(trainCriticBySASR)
    trainActorFromSA = TrainActorFromSA(learningRateActor)
    trainActor = TrainActor(trainActorFromSA)

    paramUpdateInterval = 1 #
    updateParameters = UpdateParameters(paramUpdateInterval, tau)
    sampleBatchFromMemory = SampleFromMemory(minibatchSize)

    learnInterval = 100
    learningStartBufferSize = minibatchSize * maxTimeStep
    startLearn = StartLearn(learningStartBufferSize, learnInterval)

    trainMADDPGModels = TrainMADDPGModelsWithBuffer(updateParameters, trainActor, trainCritic, sampleBatchFromMemory, startLearn, modelsList)

    actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy)
    actOneStep = lambda allAgentsStates, runTime: [actOneStepOneModel(model, allAgentsStates) for model in modelsList]

    sampleOneStep = SampleOneStep(transit, rewardFunc)
    runDDPGTimeStep = RunTimeStep(actOneStep, sampleOneStep, trainMADDPGModels, observe = observe)

    runEpisode = RunEpisode(reset, runDDPGTimeStep, maxTimeStep, isTerminal)

    getAgentModel = lambda agentId: lambda: trainMADDPGModels.getTrainedModels()[agentId]
    getModelList = [getAgentModel(i) for i in range(numAgents)]
    modelSaveRate = 1000
    individStr = 'individ' if individualRewardWolf else 'shared'
    fileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}{}_agent".format(
        numWolves, numSheeps, numBlocks, maxEpisode, maxTimeStep, sheepSpeedMultiplier, individStr)

    modelPath = os.path.join(dirName, '..', 'trainedModels', 'maddpg', fileName)
    saveModels = [SaveModel(modelSaveRate, saveVariables, getTrainedModel, modelPath+ str(i), saveAllmodels) for i, getTrainedModel in enumerate(getModelList)]

    maddpg = RunAlgorithm(runEpisode, maxEpisode, saveModels, numAgents)
    replayBuffer = getBuffer(bufferSize)
    meanRewardList, trajectory = maddpg(replayBuffer)
Exemplo n.º 7
0
def main():
    debug = 1
    if debug:
        numWolves = 3
        numSheeps = 1
        numBlocks = 2
        timeconst = 0.5
        dampratio = 0.2
        saveTraj = False
        visualizeTraj = True
        maxTimeStep = 25
        sheepSpeedMultiplier = 1.0
        individualRewardWolf = 0
        hasWalls = 1.5
        dt = 0.05

        visualizeMujoco = True

    else:
        print(sys.argv)
        condition = json.loads(sys.argv[1])
        numWolves = int(condition['numWolves'])
        numSheeps = int(condition['numSheeps'])
        numBlocks = int(condition['numBlocks'])
        saveTraj = True
        visualizeTraj = False
        maxTimeStep = 50
        sheepSpeedMultiplier = 1.25
        individualRewardWolf = 1

    print(
        "maddpg: {} wolves, {} sheep, {} blocks, saveTraj: {}, visualize: {}".
        format(numWolves, numSheeps, numBlocks, str(saveTraj),
               str(visualizeTraj)))

    numAgents = numWolves + numSheeps
    numEntities = numAgents + numBlocks
    wolvesID = list(range(numWolves))
    sheepsID = list(range(numWolves, numAgents))
    blocksID = list(range(numAgents, numEntities))

    wolfSize = 0.075
    sheepSize = 0.05
    blockSize = 0.2
    entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheeps + [
        blockSize
    ] * numBlocks

    wolfMaxSpeed = 1.0
    blockMaxSpeed = None
    sheepMaxSpeedOriginal = 1.3
    sheepMaxSpeed = sheepMaxSpeedOriginal * sheepSpeedMultiplier
    entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [
        sheepMaxSpeed
    ] * numSheeps + [blockMaxSpeed] * numBlocks

    entitiesMovableList = [True] * numAgents + [False] * numBlocks
    massList = [1.0] * numEntities

    isCollision = IsCollision(getPosFromAgentState)
    punishForOutOfBound = PunishForOutOfBound()
    rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList,
                              getPosFromAgentState, isCollision,
                              punishForOutOfBound)

    if individualRewardWolf:
        rewardWolf = RewardWolfIndividual(wolvesID, sheepsID, entitiesSizeList,
                                          isCollision)
    else:
        rewardWolf = RewardWolf(wolvesID, sheepsID, entitiesSizeList,
                                isCollision)

    rewardFunc = lambda state, action, nextState: list(
        rewardWolf(state, action, nextState)) + list(
            rewardSheep(state, action, nextState))
    dirName = os.path.dirname(__file__)
    # physicsDynamicsPath=os.path.join(dirName,'..','..','environment','mujocoEnv','numBlocks=1_numSheeps=1_numWolves=1.xml')
    if hasWalls:
        # physicsDynamicsPath=os.path.join(dirName,'..','..','environment','mujocoEnv','hasWalls=1_numBlocks={}_numSheeps={}_numWolves={}timeconst={}dampratio={}.xml'.format(numBlocks,numSheeps,numWolves,timeconst,dampratio))
        physicsDynamicsPath = os.path.join(
            dirName, '..', '..', 'environment', 'mujocoEnv',
            'dt={}'.format(dt),
            'hasWalls={}_numBlocks={}_numSheeps={}_numWolves={}timeconst={}dampratio={}.xml'
            .format(hasWalls, numBlocks, numSheeps, numWolves, timeconst,
                    dampratio))

        # physicsDynamicsPath=os.path.join(dirName,'..','..','environment','mujocoEnv','crossTest3w1s2b.xml')

    else:
        physicsDynamicsPath = os.path.join(
            dirName, '..', '..', 'environment', 'mujocoEnv',
            'numBlocks={}_numSheeps={}_numWolves={}timeconst={}dampratio={}.xml'
            .format(numBlocks, numSheeps, numWolves, timeconst, dampratio))

    with open(physicsDynamicsPath) as f:
        xml_string = f.read()
    envXmlDict = xmltodict.parse(xml_string.strip())
    envXml = xmltodict.unparse(envXmlDict)

    physicsModel = mujoco.load_model_from_xml(envXml)
    physicsSimulation = mujoco.MjSim(physicsModel)
    print(physicsSimulation.model.body_pos)

    # print(dir(physicsSimulation.model))
    # print(dir(physicsSimulation.data),physicsSimulation.dataphysicsSimulation.data)

    # print(physicsSimulation.data.qpos,dir(physicsSimulation.data.qpos))
    # print(physicsSimulation.data.qpos,dir(physicsSimulation.data.qpos))
    qPosInit = [0, 0] * numAgents
    qVelInit = [0, 0] * numAgents
    qVelInitNoise = 0 * hasWalls
    qPosInitNoise = 0.8 * hasWalls
    getBlockRandomPos = lambda: np.random.uniform(-0.7 * hasWalls, +0.7 *
                                                  hasWalls, 2)
    getBlockSpeed = lambda: np.zeros(2)

    numQPos = len(physicsSimulation.data.qpos)
    numQVel = len(physicsSimulation.data.qvel)

    sampleAgentsQPos = lambda: np.asarray(qPosInit) + np.random.uniform(
        low=-qPosInitNoise, high=qPosInitNoise, size=numQPos)
    sampleAgentsQVel = lambda: np.asarray(qVelInit) + np.random.uniform(
        low=-qVelInitNoise, high=qVelInitNoise, size=numQVel)

    minDistance = 0.2 + 2 * blockSize
    isOverlap = IsOverlap(minDistance)

    sampleBlockState = SampleBlockState(numBlocks, getBlockRandomPos,
                                        getBlockSpeed, isOverlap)

    reset = ResetUniformWithoutXPos(physicsSimulation, numAgents, numBlocks,
                                    sampleAgentsQPos, sampleAgentsQVel,
                                    sampleBlockState)

    damping = 2.5
    numSimulationFrames = int(0.1 / dt)
    agentsMaxSpeedList = [wolfMaxSpeed] * numWolves + [sheepMaxSpeed
                                                       ] * numSheeps
    reshapeAction = ReshapeAction()
    isTerminal = lambda state: False
    transit = TransitionFunctionWithoutXPos(physicsSimulation, numAgents,
                                            numSimulationFrames,
                                            damping * dt * numSimulationFrames,
                                            agentsMaxSpeedList,
                                            visualizeMujoco, isTerminal,
                                            reshapeAction)

    maxRunningStepsToSample = 100
    sampleTrajectory = SampleTrajectory(maxRunningStepsToSample, transit,
                                        isTerminal, rewardFunc, reset)

    observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID,
                                              blocksID, getPosFromAgentState,
                                              getVelFromAgentState)
    observe = lambda state: [
        observeOneAgent(agentID)(state) for agentID in range(numAgents)
    ]

    initObsForParams = observe(reset())
    obsShape = [
        initObsForParams[obsID].shape[0]
        for obsID in range(len(initObsForParams))
    ]

    worldDim = 2
    actionDim = worldDim * 2 + 1

    layerWidth = [128, 128]

    # ------------ model ------------------------
    buildMADDPGModels = BuildMADDPGModels(actionDim, numAgents, obsShape)
    modelsList = [
        buildMADDPGModels(layerWidth, agentID) for agentID in range(numAgents)
    ]

    # individStr = 'individ' if individualRewardWolf else 'shared'
    # fileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}{}_agent".format(
    #     numWolves, numSheeps, numBlocks, maxEpisode, maxTimeStep, sheepSpeedMultiplier, individStr)

    # wolvesModelPaths = [os.path.join(dirName, '..', 'trainedModels', '3wolvesMaddpg', fileName + str(i) + '60000eps') for i in wolvesID]
    # [restoreVariables(model, path) for model, path in zip(wolvesModel, wolvesModelPaths)]
    #
    # actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy)
    # policy = lambda allAgentsStates: [actOneStepOneModel(model, observe(allAgentsStates)) for model in modelsList]

    # modelPaths = [os.path.join(dirName, '..', 'trainedModels', '3wolvesMaddpg_ExpEpsLengthAndSheepSpeed', fileName + str(i)) for i in
    #               range(numAgents)]
    # modelFolder = os.path.join(dirName, '..', 'trainedModels', 'mujocoMADDPG')
    # modelFolder = os.path.join(dirName, '..', 'trainedModels', 'mujocoMADDPG','timeconst='+str(timeconst)+'dampratio='+str(dampratio))
    if hasWalls:
        # modelFolder = os.path.join(dirName, '..', 'trainedModels', 'mujocoMADDPG','hasWalls=1'+'numBlocks='+str(numBlocks)+'numSheeps='+str(numSheeps)+'numWolves='+str(numWolves)+'timeconst='+str(timeconst)+'dampratio='+str(dampratio))
        # modelFolder = os.path.join(dirName, '..', 'trainedModels', 'mujocoMADDPG','dt={}'.format(dt),'hasWalls='+str(hasWalls)+'numBlocks='+str(numBlocks)+'numSheeps='+str(numSheeps)+'numWolves='+str(numWolves)+'timeconst='+str(timeconst)+'dampratio='+str(dampratio)+'individualRewardWolf='+str(individualRewardWolf)+'sheepSpeedMultiplier='+str(sheepSpeedMultiplier))
        # modelFolder = os.path.join(dirName, '..', 'trainedModels', 'mujocoMADDPG','dt={}'.format(dt),'hasWalls='+str(hasWalls)+'numBlocks='+str(numBlocks)+'numSheeps='+str(numSheeps)+'numWolves='+str(numWolves)+'timeconst='+str(timeconst)+'dampratio='+str(dampratio)+'individualRewardWolf='+str(individualRewardWolf)+'sheepSpeedMultiplier='+str(sheepSpeedMultiplier))

        modelFolder = os.path.join(
            dirName, '..', 'trainedModels', 'mujocoMADDPGeavluateWall',
            'dt={}'.format(dt),
            'hasWalls=' + str(hasWalls) + 'numBlocks=' + str(numBlocks) +
            'numSheeps=' + str(numSheeps) + 'numWolves=' + str(numWolves) +
            'timeconst=' + str(timeconst) + 'dampratio=' + str(dampratio) +
            'individualRewardWolf=' + str(individualRewardWolf) +
            'sheepSpeedMultiplier=' + str(sheepSpeedMultiplier))
        individStr = 'individ' if individualRewardWolf else 'shared'
        fileName = "maddpghasWalls={}{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}{}_agent".format(
            hasWalls, numWolves, numSheeps, numBlocks, maxEpisode, maxTimeStep,
            sheepSpeedMultiplier, individStr)

    else:
        modelFolder = os.path.join(
            dirName, '..', 'trainedModels', 'mujocoMADDPG',
            'numBlocks=' + str(numBlocks) + 'numSheeps=' + str(numSheeps) +
            'numWolves=' + str(numWolves) + 'timeconst=' + str(timeconst) +
            'dampratio=' + str(dampratio))
        fileName = "maddpg{}wolves{}sheep{}blocks60000episodes25stepSheepSpeed1.0shared_agent".format(
            numWolves, numSheeps, numBlocks)
    modelPaths = [
        os.path.join(modelFolder, fileName + str(i) + '60000eps')
        for i in range(numAgents)
    ]

    [
        restoreVariables(model, path)
        for model, path in zip(modelsList, modelPaths)
    ]

    actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy)
    policy = lambda allAgentsStates: [
        actOneStepOneModel(model, observe(allAgentsStates))
        for model in modelsList
    ]

    trajList = []
    numTrajToSample = 50
    for i in range(numTrajToSample):
        np.random.seed(i)
        traj = sampleTrajectory(policy)
        trajList.append(list(traj))

    # saveTraj
    if saveTraj:
        trajFileName = "maddpg{}wolves{}sheep{}blocks{}eps{}stepSheepSpeed{}{}Traj".format(
            numWolves, numSheeps, numBlocks, maxEpisode, maxTimeStep,
            sheepSpeedMultiplier, individStr)
        trajSavePath = os.path.join(dirName, '..', 'trajectory', trajFileName)
        saveToPickle(trajList, trajSavePath)

    # visualize
    if visualizeTraj:
        entitiesColorList = [wolfColor] * numWolves + [
            sheepColor
        ] * numSheeps + [blockColor] * numBlocks
        render = Render(entitiesSizeList, entitiesColorList, numAgents,
                        getPosFromAgentState)
        print(trajList[0][0], '!!!3', trajList[0][1])
        trajToRender = np.concatenate(trajList)
        render(trajToRender)
def main():
    debug = 0
    if debug:

        damping = 0.0
        frictionloss = 0.4
        masterForce = 1.0

        numWolves = 1
        numSheeps = 1
        numMasters = 1
        saveAllmodels = True
        maxTimeStep = 25
        visualize = False

    else:
        print(sys.argv)
        condition = json.loads(sys.argv[1])
        numWolves = 1
        numSheeps = 1
        numMasters = 1
        damping = float(condition['damping'])
        frictionloss = float(condition['frictionloss'])
        masterForce = float(condition['masterForce'])

        maxTimeStep = 25
        visualize = False
        saveAllmodels = True
    print(
        "maddpg: {} wolves, {} sheep, {} blocks, {} episodes with {} steps each eps,  save all models: {}"
        .format(numWolves, numSheeps, numMasters, maxEpisode, maxTimeStep,
                str(saveAllmodels)))
    print(damping, frictionloss, masterForce)

    modelFolder = os.path.join(
        dirName, '..', 'trainedModels', 'mujocoMADDPGLeasedFixedEnv2',
        'damping={}_frictionloss={}_masterForce={}'.format(
            damping, frictionloss, masterForce))

    if not os.path.exists(modelFolder):
        os.makedirs(modelFolder)

    numAgents = numWolves + numSheeps + numMasters
    numEntities = numAgents + numMasters
    wolvesID = [0]
    sheepsID = [1]
    masterID = [2]

    wolfSize = 0.075
    sheepSize = 0.05
    blockSize = 0.075
    entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheeps + [
        blockSize
    ] * numMasters

    massList = [1.0] * numEntities

    isCollision = IsCollision(getPosFromAgentState)
    punishForOutOfBound = PunishForOutOfBound()
    rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList,
                              getPosFromAgentState, isCollision,
                              punishForOutOfBound)

    rewardWolf = RewardWolf(wolvesID, sheepsID, entitiesSizeList, isCollision)
    rewardMaster = lambda state, action, nextState: [
        -reward for reward in rewardWolf(state, action, nextState)
    ]
    rewardFunc = lambda state, action, nextState: \
        list(rewardWolf(state, action, nextState)) + list(rewardSheep(state, action, nextState))+list(rewardMaster(state, action, nextState))

    makePropertyList = MakePropertyList(transferNumberListToStr)

    geomIds = [1, 2, 3]
    keyNameList = [0, 1]
    valueList = [[damping, damping]] * len(geomIds)
    dampngParameter = makePropertyList(geomIds, keyNameList, valueList)

    changeJointDampingProperty = lambda envDict, geomPropertyDict: changeJointProperty(
        envDict, geomPropertyDict, '@damping')

    geomIds = [1, 2, 3]
    keyNameList = [0, 1]
    valueList = [[frictionloss, frictionloss]] * len(geomIds)
    frictionlossParameter = makePropertyList(geomIds, keyNameList, valueList)
    changeJointFrictionlossProperty = lambda envDict, geomPropertyDict: changeJointProperty(
        envDict, geomPropertyDict, '@frictionloss')

    physicsDynamicsPath = os.path.join(dirName, '..', '..', 'environment',
                                       'mujocoEnv', 'rope', 'leasedNew.xml')
    with open(physicsDynamicsPath) as f:
        xml_string = f.read()

    envXmlDict = xmltodict.parse(xml_string.strip())
    envXmlPropertyDictList = [dampngParameter, frictionlossParameter]
    changeEnvXmlPropertFuntionyList = [
        changeJointDampingProperty, changeJointFrictionlossProperty
    ]
    for propertyDict, changeXmlProperty in zip(
            envXmlPropertyDictList, changeEnvXmlPropertFuntionyList):
        envXmlDict = changeXmlProperty(envXmlDict, propertyDict)

    envXml = xmltodict.unparse(envXmlDict)
    physicsModel = mujoco.load_model_from_xml(envXml)
    physicsSimulation = mujoco.MjSim(physicsModel)

    qPosInit = (0, ) * 24
    qVelInit = (0, ) * 24
    qPosInitNoise = 0.6
    qVelInitNoise = 0
    numAgent = 3
    tiedAgentId = [0, 2]
    ropePartIndex = list(range(3, 12))
    maxRopePartLength = 0.06
    reset = ResetUniformWithoutXPosForLeashed(physicsSimulation, qPosInit,
                                              qVelInit, numAgent, tiedAgentId,
                                              ropePartIndex, maxRopePartLength,
                                              qPosInitNoise, qVelInitNoise)

    numSimulationFrames = 10
    isTerminal = lambda state: False
    reshapeActionList = [
        ReshapeAction(5),
        ReshapeAction(5),
        ReshapeAction(masterForce)
    ]
    transit = TransitionFunctionWithoutXPos(physicsSimulation,
                                            numSimulationFrames, visualize,
                                            isTerminal, reshapeActionList)

    observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID,
                                              masterID, getPosFromAgentState,
                                              getVelFromAgentState)
    observe = lambda state: [
        observeOneAgent(agentID)(state) for agentID in range(numAgents)
    ]
    initObsForParams = observe(reset())
    obsShape = [
        initObsForParams[obsID].shape[0]
        for obsID in range(len(initObsForParams))
    ]

    worldDim = 2
    actionDim = worldDim * 2 + 1

    layerWidth = [128, 128]

    #------------ models ------------------------

    buildMADDPGModels = BuildMADDPGModels(actionDim, numAgents, obsShape)
    modelsList = [
        buildMADDPGModels(layerWidth, agentID) for agentID in range(numAgents)
    ]

    trainCriticBySASR = TrainCriticBySASR(actByPolicyTargetNoisyForNextState,
                                          learningRateCritic, gamma)
    trainCritic = TrainCritic(trainCriticBySASR)
    trainActorFromSA = TrainActorFromSA(learningRateActor)
    trainActor = TrainActor(trainActorFromSA)

    paramUpdateInterval = 1  #
    updateParameters = UpdateParameters(paramUpdateInterval, tau)
    sampleBatchFromMemory = SampleFromMemory(minibatchSize)

    learnInterval = 100
    learningStartBufferSize = minibatchSize * maxTimeStep
    startLearn = StartLearn(learningStartBufferSize, learnInterval)

    trainMADDPGModels = TrainMADDPGModelsWithBuffer(updateParameters,
                                                    trainActor, trainCritic,
                                                    sampleBatchFromMemory,
                                                    startLearn, modelsList)

    actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy)
    actOneStep = lambda allAgentsStates, runTime: [
        actOneStepOneModel(model, allAgentsStates) for model in modelsList
    ]

    sampleOneStep = SampleOneStep(transit, rewardFunc)
    runDDPGTimeStep = RunTimeStep(actOneStep,
                                  sampleOneStep,
                                  trainMADDPGModels,
                                  observe=observe)

    runEpisode = RunEpisode(reset, runDDPGTimeStep, maxTimeStep, isTerminal)

    getAgentModel = lambda agentId: lambda: trainMADDPGModels.getTrainedModels(
    )[agentId]
    getModelList = [getAgentModel(i) for i in range(numAgents)]
    modelSaveRate = 1000
    fileName = "maddpg{}episodes{}step_agent".format(maxEpisode, maxTimeStep)

    modelPath = os.path.join(modelFolder, fileName)

    saveModels = [
        SaveModel(modelSaveRate, saveVariables, getTrainedModel,
                  modelPath + str(i), saveAllmodels)
        for i, getTrainedModel in enumerate(getModelList)
    ]

    maddpg = RunAlgorithm(runEpisode, maxEpisode, saveModels, numAgents)
    replayBuffer = getBuffer(bufferSize)
    meanRewardList, trajectory = maddpg(replayBuffer)
Exemplo n.º 9
0
def simuliateOneParameter(parameterOneCondiiton, evalNum, randomSeed):
    saveTraj = True
    visualizeTraj = False
    visualizeMujoco = False

    numWolves = parameterOneCondiiton['numWolves']
    numSheeps = parameterOneCondiiton['numSheeps']
    numBlocks = parameterOneCondiiton['numBlocks']
    timeconst = parameterOneCondiiton['timeconst']
    dampratio = parameterOneCondiiton['dampratio']
    maxTimeStep = parameterOneCondiiton['maxTimeStep']
    sheepSpeedMultiplier = parameterOneCondiiton['sheepSpeedMultiplier']
    individualRewardWolf = parameterOneCondiiton['individualRewardWolf']
    hasWalls = parameterOneCondiiton['hasWalls']
    dt = parameterOneCondiiton['dt']

    print(
        "maddpg: {} wolves, {} sheep, {} blocks, saveTraj: {}, visualize: {}".
        format(numWolves, numSheeps, numBlocks, str(saveTraj),
               str(visualizeTraj)))

    numAgents = numWolves + numSheeps
    numEntities = numAgents + numBlocks
    wolvesID = list(range(numWolves))
    sheepsID = list(range(numWolves, numAgents))
    blocksID = list(range(numAgents, numEntities))

    wolfSize = 0.075
    sheepSize = 0.05
    blockSize = 0.2
    entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheeps + [
        blockSize
    ] * numBlocks

    wolfMaxSpeed = 1.0
    blockMaxSpeed = None
    sheepMaxSpeedOriginal = 1.3
    sheepMaxSpeed = sheepMaxSpeedOriginal * sheepSpeedMultiplier
    entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [
        sheepMaxSpeed
    ] * numSheeps + [blockMaxSpeed] * numBlocks

    entitiesMovableList = [True] * numAgents + [False] * numBlocks
    massList = [1.0] * numEntities

    isCollision = IsCollision(getPosFromAgentState)
    punishForOutOfBound = PunishForOutOfBound()
    rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList,
                              getPosFromAgentState, isCollision,
                              punishForOutOfBound)

    if individualRewardWolf:
        rewardWolf = RewardWolfIndividual(wolvesID, sheepsID, entitiesSizeList,
                                          isCollision)
    else:
        rewardWolf = RewardWolf(wolvesID, sheepsID, entitiesSizeList,
                                isCollision)

    rewardFunc = lambda state, action, nextState: list(
        rewardWolf(state, action, nextState)) + list(
            rewardSheep(state, action, nextState))
    dirName = os.path.dirname(__file__)

    if hasWalls:

        physicsDynamicsPath = os.path.join(
            dirName, '..', '..', 'environment', 'mujocoEnv',
            'dt={}'.format(dt),
            'hasWalls={}_numBlocks={}_numSheeps={}_numWolves={}timeconst={}dampratio={}.xml'
            .format(hasWalls, numBlocks, numSheeps, numWolves, timeconst,
                    dampratio))

    else:
        physicsDynamicsPath = os.path.join(
            dirName, '..', '..', 'environment', 'mujocoEnv',
            'numBlocks={}_numSheeps={}_numWolves={}timeconst={}dampratio={}.xml'
            .format(numBlocks, numSheeps, numWolves, timeconst, dampratio))

    with open(physicsDynamicsPath) as f:
        xml_string = f.read()
    envXmlDict = xmltodict.parse(xml_string.strip())
    envXml = xmltodict.unparse(envXmlDict)

    physicsModel = mujoco.load_model_from_xml(envXml)
    physicsSimulation = mujoco.MjSim(physicsModel)
    # print(physicsSimulation.model.body_pos)

    qPosInit = [0, 0] * numAgents
    qVelInit = [0, 0] * numAgents
    qVelInitNoise = 0 * hasWalls
    qPosInitNoise = 0.8 * hasWalls
    getBlockRandomPos = lambda: np.random.uniform(-0.7 * hasWalls, +0.7 *
                                                  hasWalls, 2)
    getBlockSpeed = lambda: np.zeros(2)

    numQPos = len(physicsSimulation.data.qpos)
    numQVel = len(physicsSimulation.data.qvel)

    sampleAgentsQPos = lambda: np.asarray(qPosInit) + np.random.uniform(
        low=-qPosInitNoise, high=qPosInitNoise, size=numQPos)
    sampleAgentsQVel = lambda: np.asarray(qVelInit) + np.random.uniform(
        low=-qVelInitNoise, high=qVelInitNoise, size=numQVel)

    minDistance = 0.2 + 2 * blockSize
    isOverlap = IsOverlap(minDistance)

    sampleBlockState = SampleBlockState(numBlocks, getBlockRandomPos,
                                        getBlockSpeed, isOverlap)

    reset = ResetUniformWithoutXPos(physicsSimulation, numAgents, numBlocks,
                                    sampleAgentsQPos, sampleAgentsQVel,
                                    sampleBlockState)

    damping = 2.5
    numSimulationFrames = int(0.1 / dt)
    agentsMaxSpeedList = [wolfMaxSpeed] * numWolves + [sheepMaxSpeed
                                                       ] * numSheeps
    reshapeAction = ReshapeAction()
    isTerminal = lambda state: False
    transit = TransitionFunctionWithoutXPos(physicsSimulation, numAgents,
                                            numSimulationFrames,
                                            damping * dt * numSimulationFrames,
                                            agentsMaxSpeedList,
                                            visualizeMujoco, isTerminal,
                                            reshapeAction)

    maxRunningStepsToSample = 100
    sampleTrajectory = SampleTrajectory(maxRunningStepsToSample, transit,
                                        isTerminal, rewardFunc, reset)

    observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID,
                                              blocksID, getPosFromAgentState,
                                              getVelFromAgentState)
    observe = lambda state: [
        observeOneAgent(agentID)(state) for agentID in range(numAgents)
    ]

    initObsForParams = observe(reset())
    obsShape = [
        initObsForParams[obsID].shape[0]
        for obsID in range(len(initObsForParams))
    ]

    worldDim = 2
    actionDim = worldDim * 2 + 1

    layerWidth = [128, 128]

    # ------------ model ------------------------
    buildMADDPGModels = BuildMADDPGModels(actionDim, numAgents, obsShape)
    modelsList = [
        buildMADDPGModels(layerWidth, agentID) for agentID in range(numAgents)
    ]

    if hasWalls:

        modelFolder = os.path.join(
            dirName, '..', 'trainedModels', 'mujocoMADDPGeavluateWall',
            'dt={}'.format(dt),
            'hasWalls=' + str(hasWalls) + 'numBlocks=' + str(numBlocks) +
            'numSheeps=' + str(numSheeps) + 'numWolves=' + str(numWolves) +
            'timeconst=' + str(timeconst) + 'dampratio=' + str(dampratio) +
            'individualRewardWolf=' + str(individualRewardWolf) +
            'sheepSpeedMultiplier=' + str(sheepSpeedMultiplier))
        individStr = 'individ' if individualRewardWolf else 'shared'
        fileName = "maddpghasWalls={}{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}{}_agent".format(
            hasWalls, numWolves, numSheeps, numBlocks, maxEpisode, maxTimeStep,
            sheepSpeedMultiplier, individStr)

    else:
        modelFolder = os.path.join(
            dirName, '..', 'trainedModels', 'mujocoMADDPG',
            'numBlocks=' + str(numBlocks) + 'numSheeps=' + str(numSheeps) +
            'numWolves=' + str(numWolves) + 'timeconst=' + str(timeconst) +
            'dampratio=' + str(dampratio))
        fileName = "maddpg{}wolves{}sheep{}blocks60000episodes25stepSheepSpeed1.0shared_agent".format(
            numWolves, numSheeps, numBlocks)
    modelPaths = [
        os.path.join(modelFolder, fileName + str(i) + '60000eps')
        for i in range(numAgents)
    ]

    [
        restoreVariables(model, path)
        for model, path in zip(modelsList, modelPaths)
    ]

    actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy)
    policy = lambda allAgentsStates: [
        actOneStepOneModel(model, observe(allAgentsStates))
        for model in modelsList
    ]

    startTime = time.time()
    trajList = []
    for i in range(evalNum):
        np.random.seed(i)
        traj = sampleTrajectory(policy)
        trajList.append(list(traj))
    endTime = time.time()
    print("Time taken {} seconds to generate {} trajectories".format(
        (endTime - startTime), evalNum))

    # saveTraj
    if saveTraj:

        trajectoryFolder = os.path.join(dirName, '..', 'trajectory',
                                        'evluateWall')
        if not os.path.exists(trajectoryFolder):
            os.makedirs(trajectoryFolder)

        trajectorySaveExtension = '.pickle'
        fixedParameters = {'randomSeed': randomSeed, 'evalNum': evalNum}
        generateTrajectorySavePath = GetSavePath(trajectoryFolder,
                                                 trajectorySaveExtension,
                                                 fixedParameters)
        trajectorySavePath = generateTrajectorySavePath(parameterOneCondiiton)

        saveToPickle(trajList, trajectorySavePath)

    # visualize
    if visualizeTraj:
        entitiesColorList = [wolfColor] * numWolves + [
            sheepColor
        ] * numSheeps + [blockColor] * numBlocks
        render = Render(entitiesSizeList, entitiesColorList, numAgents,
                        getPosFromAgentState)
        print(trajList[0][0], '!!!3', trajList[0][1])
        trajToRender = np.concatenate(trajList)
        render(trajToRender)

    return endTime - startTime
def main():
    debug = 1
    if debug:

        numWolves = 2
        numSheeps = 4
        numBlocks = 2
        hasWalls = 1.0

        dt = 0.02
        maxTimeStep = 25
        sheepSpeedMultiplier = 1.0
        individualRewardWolf = int(False)

        mujocoVisualize = False
        saveAllmodels = True

    else:

        print(sys.argv)
        condition = json.loads(sys.argv[1])
        numWolves = int(condition['numWolves'])
        numSheeps = int(condition['numSheeps'])
        numBlocks = int(condition['numBlocks'])
        hasWalls = float(condition['hasWalls'])

        dt = float(condition['dt'])
        maxTimeStep = int(condition['maxTimeStep'])
        sheepSpeedMultiplier = float(condition['sheepSpeedMultiplier'])
        individualRewardWolf = int(condition['individualRewardWolf'])

        saveAllmodels = True
        mujocoVisualize = False

    print(
        "maddpg: {} wolves, {} sheep, {} blocks, {} episodes with {} steps each eps, sheepSpeed: {}x, wolfIndividualReward: {}, save all models: {}"
        .format(numWolves, numSheeps, numBlocks, maxEpisode,
                maxTimeStep, sheepSpeedMultiplier, individualRewardWolf,
                str(saveAllmodels)))

    dataMainFolder = os.path.join(dirName, '..', 'trainedModels',
                                  'mujocoMADDPG')
    modelFolder = os.path.join(
        dataMainFolder, 'dt={}'.format(dt),
        'hasWalls={}_numBlocks={}_numSheeps={}_numWolves={}_individualRewardWolf={}_sheepSpeedMultiplier={}.xml'
        .format(hasWalls, numBlocks, numSheeps, numWolves,
                individualRewardWolf, sheepSpeedMultiplier))

    if not os.path.exists(modelFolder):
        os.makedirs(modelFolder)

    numAgents = numWolves + numSheeps
    numEntities = numAgents + numBlocks
    wolvesID = list(range(numWolves))
    sheepsID = list(range(numWolves, numAgents))
    blocksID = list(range(numAgents, numEntities))

    wolfSize = 0.075
    sheepSize = 0.05
    blockSize = 0.2
    entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheeps + [
        blockSize
    ] * numBlocks

    isCollision = IsCollision(getPosFromAgentState)
    punishForOutOfBound = lambda state: 0  #PunishForOutOfBound()
    rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList,
                              getPosFromAgentState, isCollision,
                              punishForOutOfBound)
    if individualRewardWolf:
        rewardWolf = RewardWolfIndividual(wolvesID, sheepsID, entitiesSizeList,
                                          isCollision)
    else:
        rewardWolf = RewardWolf(wolvesID, sheepsID, entitiesSizeList,
                                isCollision)
    rewardFunc = lambda state, action, nextState: \
        list(rewardWolf(state, action, nextState)) + list(rewardSheep(state, action, nextState))

    #------------ mujocoEnv ------------------------

    physicsDynamicsPath = os.path.join(
        dirName, '..', '..', 'environment', 'mujocoEnv', 'dt={}'.format(dt),
        'hasWalls={}_numBlocks={}_numSheeps={}_numWolves={}.xml'.format(
            hasWalls, numBlocks, numSheeps, numWolves))

    with open(physicsDynamicsPath) as f:
        xml_string = f.read()
    envXmlDict = xmltodict.parse(xml_string.strip())
    envXml = xmltodict.unparse(envXmlDict)
    physicsModel = mujoco.load_model_from_xml(envXml)
    physicsSimulation = mujoco.MjSim(physicsModel)

    qPosInit = [0, 0] * numAgents
    qVelInit = [0, 0] * numAgents
    qVelInitNoise = 0 * hasWalls
    qPosInitNoise = 0.8 * hasWalls
    getBlockRandomPos = lambda: np.random.uniform(-0.7 * hasWalls, +0.7 *
                                                  hasWalls, 2)
    getBlockSpeed = lambda: np.zeros(2)

    numQPos = len(physicsSimulation.data.qpos)
    numQVel = len(physicsSimulation.data.qvel)

    sampleAgentsQPos = lambda: np.asarray(qPosInit) + np.random.uniform(
        low=-qPosInitNoise, high=qPosInitNoise, size=numQPos)
    sampleAgentsQVel = lambda: np.asarray(qVelInit) + np.random.uniform(
        low=-qVelInitNoise, high=qVelInitNoise, size=numQVel)

    minDistance = 0.2 + 2 * blockSize  #>2*wolfSize+2*blockSize
    isOverlap = IsOverlap(minDistance)
    sampleBlockState = SampleBlockState(numBlocks, getBlockRandomPos,
                                        getBlockSpeed, isOverlap)

    reset = ResetUniformWithoutXPos(physicsSimulation, numAgents, numBlocks,
                                    sampleAgentsQPos, sampleAgentsQVel,
                                    sampleBlockState)

    transitTimePerStep = 0.1
    numSimulationFrames = int(transitTimePerStep / dt)

    isTerminal = lambda state: [False] * numAgents
    reshapeAction = ReshapeAction()
    transit = TransitionFunction(physicsSimulation, numAgents,
                                 numSimulationFrames, mujocoVisualize,
                                 isTerminal, reshapeAction)

    observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID,
                                              blocksID, getPosFromAgentState,
                                              getVelFromAgentState)
    observe = lambda state: [
        observeOneAgent(agentID)(state) for agentID in range(numAgents)
    ]
    initObsForParams = observe(reset())
    obsShape = [
        initObsForParams[obsID].shape[0]
        for obsID in range(len(initObsForParams))
    ]

    worldDim = 2
    actionDim = worldDim * 2 + 1

    layerWidth = [128, 128]

    #------------ models ------------------------

    buildMADDPGModels = BuildMADDPGModels(actionDim, numAgents, obsShape)
    modelsList = [
        buildMADDPGModels(layerWidth, agentID) for agentID in range(numAgents)
    ]

    trainCriticBySASR = TrainCriticBySASR(actByPolicyTargetNoisyForNextState,
                                          learningRateCritic, gamma)
    trainCritic = TrainCritic(trainCriticBySASR)
    trainActorFromSA = TrainActorFromSA(learningRateActor)
    trainActor = TrainActor(trainActorFromSA)

    paramUpdateInterval = 1  #
    updateParameters = UpdateParameters(paramUpdateInterval, tau)
    sampleBatchFromMemory = SampleFromMemory(minibatchSize)

    learnInterval = 100
    learningStartBufferSize = minibatchSize * maxTimeStep
    startLearn = StartLearn(learningStartBufferSize, learnInterval)

    trainMADDPGModels = TrainMADDPGModelsWithBuffer(updateParameters,
                                                    trainActor, trainCritic,
                                                    sampleBatchFromMemory,
                                                    startLearn, modelsList)

    actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy)
    actOneStep = lambda allAgentsStates, runTime: [
        actOneStepOneModel(model, allAgentsStates) for model in modelsList
    ]

    sampleOneStep = SampleOneStep(transit, rewardFunc)
    runDDPGTimeStep = RunTimeStep(actOneStep,
                                  sampleOneStep,
                                  trainMADDPGModels,
                                  observe=observe)

    runEpisode = RunEpisode(reset, runDDPGTimeStep, maxTimeStep, isTerminal)

    getAgentModel = lambda agentId: lambda: trainMADDPGModels.getTrainedModels(
    )[agentId]
    getModelList = [getAgentModel(i) for i in range(numAgents)]
    modelSaveRate = 1000
    individStr = 'individ' if individualRewardWolf else 'shared'
    fileName = "maddpghasWalls={}{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}{}_agent".format(
        hasWalls, numWolves, numSheeps, numBlocks, maxEpisode, maxTimeStep,
        sheepSpeedMultiplier, individStr)

    modelPath = os.path.join(modelFolder, fileName)

    saveModels = [
        SaveModel(modelSaveRate, saveVariables, getTrainedModel,
                  modelPath + str(i), saveAllmodels)
        for i, getTrainedModel in enumerate(getModelList)
    ]

    maddpg = RunAlgorithm(runEpisode, maxEpisode, saveModels, numAgents)
    replayBuffer = getBuffer(bufferSize)
    meanRewardList, trajectory = maddpg(replayBuffer)