Пример #1
0
def main():
    independentVariables = OrderedDict()
    # independentVariables['bufferSize'] = [1000, 5000, 10000, 20000]
    # independentVariables['layerWidth'] = [[32], [32, 32], [64, 64, 64], [128, 128, 128, 128]]
    # independentVariables['varianceDiscount'] = [0.995, 0.9995, 0.99995]

    independentVariables['bufferSize'] = [1000, 5000]
    independentVariables['layerWidth'] = [(32, 32), (32, 32, 32)]
    independentVariables['varianceDiscount'] = [0.995, 0.9995]

    modelSaveDirectory = "../trainedDDPGModels"
    modelSaveExtension = '.ckpt'
    getSavePath = GetSavePath(modelSaveDirectory, modelSaveExtension)

    evaluate = EvaluateNoiseAndMemorySize(getSavePath, saveModel=False)

    levelNames = list(independentVariables.keys())
    levelValues = list(independentVariables.values())
    levelIndex = pd.MultiIndex.from_product(levelValues, names=levelNames)
    toSplitFrame = pd.DataFrame(index=levelIndex)
    resultDF = toSplitFrame.groupby(levelNames).apply(evaluate)
    resultPath = os.path.join(
        dirName, '..', '..', 'plots',
        str(list(independentVariables.keys())) + '.pickle')
    saveToPickle(resultDF, resultPath)

    nCols = len(independentVariables['bufferSize'])
    nRows = len(independentVariables['layerWidth'])
    numplot = 1
    axs = []
    figure = plt.figure(dpi=200)
    figure.set_size_inches(7.5, 6)
    figure.suptitle('buffer: ' + str(independentVariables['bufferSize']) +
                    ', layers: ' + str(independentVariables['layerWidth']) +
                    ', varDiscount: ' +
                    str(independentVariables['varianceDiscount']),
                    fontsize=8)

    for layerWidth, outterSubDf in resultDF.groupby('layerWidth'):
        for bufferSize, innerSubDf in outterSubDf.groupby('bufferSize'):
            subplot = figure.add_subplot(nRows, nCols, numplot)
            axs.append(subplot)
            numplot += 1
            plt.ylim([-800, 400])
            innerSubDf.T.plot(ax=subplot)

            subplot.set_title('layerWidth = ' + str(layerWidth) +
                              ' bufferSize = ' + str(bufferSize),
                              fontsize=5)
            subplot.set_ylabel('MeanEpsReward', fontsize=5)
            subplot.set_xlabel('Episode', fontsize=5)
            subplot.tick_params(axis='both', which='major', labelsize=5)
            subplot.tick_params(axis='both', which='minor', labelsize=5)
            plt.legend(loc='best', prop={'size': 5})

    plotPath = os.path.join(dirName, '..', '..', 'plots')
    plt.savefig(os.path.join(plotPath, str(list(independentVariables.keys()))))
    plt.show()
def main():
        
    statesdim = env.observation_space.shape[0]
    actiondim = env.action_space.n
    
    fixedParameters = OrderedDict()
    fixedParameters['batchsize'] = 128
    fixedParameters['maxEpisode'] = 400
    fixedParameters['gamma'] = 0.9
    fixedParameters['numberlayers'] = 20
    fixedParameters['stateDim'] = statesdim
    fixedParameters['actionDim'] = actiondim
    fixedParameters['replaceiter'] = 100
    fixedParameters['initepsilon'] = 0.9
    fixedParameters['minepsilon'] = 0.1
    fixedParameters['epsilondec'] = 0.001


    learningRate = [0.1,0.01,0.001]
    buffersize = [1000,5000,20000]

    modelSaveDirectory = "/path/to/logs/trainedDQNModels"
    modelSaveExtension = '.ckpt'
    getSavePath = GetSavePath(modelSaveDirectory, modelSaveExtension)

    evaluateModel = EvaluateLearningrateAndbatchsize(fixedParameters, getSavePath, saveModel= True)

    levelValues = [learningRate,buffersize]
    levelNames = ["learningRate", "buffersize"]
    
    modelIndex = pd.MultiIndex.from_product(levelValues, names=levelNames)
    
    toSplitFrame = pd.DataFrame(index = modelIndex)

    modelResultDf = toSplitFrame.groupby(levelNames).apply(evaluateModel)
    
    

    plotRowNum = len(learningRate)
    plotColNum = len(buffersize)
    plotCounter = 1
    axs = []
    figure = plt.figure(figsize=(12, 10))
    
    for keyCol, outterSubDf in modelResultDf.groupby('buffersize'):
        for keyRow, innerSubDf in outterSubDf.groupby("learningRate"):
            subplot = figure.add_subplot(plotRowNum, plotColNum, plotCounter)
            axs.append(subplot)
            plotCounter += 1
            plt.ylim([0, 300])
            innerSubDf.T.plot(ax=subplot)
    dirName = os.path.dirname(__file__)
    plotPath = os.path.join(dirName,'plots')
    plt.savefig(os.path.join(plotPath, 'CarpoleEvaluation'))
    plt.show()
def main():
    independentVariables = OrderedDict()
    independentVariables['actionDim'] = [3, 7, 11, 15]
    independentVariables['epsilonIncrease'] = [1e-5, 1e-4, 2e-4, 1e-3]

    # independentVariables['actionDim'] = [3, 15]
    # independentVariables['epsilonIncrease'] = [1e-5, 1e-3]

    modelSaveDirectory = "../trainedDQNModels"
    modelSaveExtension = '.ckpt'
    getSavePath = GetSavePath(modelSaveDirectory, modelSaveExtension)

    evaluate = EvaluateNoiseAndMemorySize(getSavePath, saveModel=False)

    levelNames = list(independentVariables.keys())
    levelValues = list(independentVariables.values())
    levelIndex = pd.MultiIndex.from_product(levelValues, names=levelNames)
    toSplitFrame = pd.DataFrame(index=levelIndex)
    resultDF = toSplitFrame.groupby(levelNames).apply(evaluate)

    nCols = len(independentVariables['actionDim'])
    nRows = len(independentVariables['epsilonIncrease'])
    numplot = 1
    axs = []
    figure = plt.figure(figsize=(12, 10))
    for keyCol, outterSubDf in resultDF.groupby('epsilonIncrease'):
        for keyRow, innerSubDf in outterSubDf.groupby('actionDim'):
            subplot = figure.add_subplot(nRows, nCols, numplot)
            axs.append(subplot)
            numplot += 1
            plt.ylim([-1600, 50])
            innerSubDf.T.plot(ax=subplot)

    dirName = os.path.dirname(__file__)
    plotPath = os.path.join(dirName, '..', 'plots')
    plt.savefig(os.path.join(plotPath, 'pendulumEvaluation'))
    plt.show()
def simuliateOneParameter(parameterDict):


    wolfSize = 0.075
    sheepSize = 0.05
    blockSize = 0.2


    wolfColor = np.array([0.85, 0.35, 0.35])
    sheepColor = np.array([0.35, 0.85, 0.35])
    blockColor = np.array([0.25, 0.25, 0.25])

    wolvesID = [0,1]
    sheepsID = [2]
    blocksID = [3]

    numWolves = len(wolvesID)
    numSheeps = len(sheepsID)
    numBlocks = len(blocksID)

    sheepMaxSpeed = 1.3
    wolfMaxSpeed =1.0
    blockMaxSpeed = None



    agentsMaxSpeedList = [wolfMaxSpeed]* numWolves + [sheepMaxSpeed] * numSheeps

    numAgents = numWolves + numSheeps
    numEntities = numAgents + numBlocks

    entitiesSizeList = [wolfSize]* numWolves + [sheepSize] * numSheeps + [blockSize]* numBlocks
    entityMaxSpeedList = [wolfMaxSpeed]* numWolves + [sheepMaxSpeed] * numSheeps + [blockMaxSpeed]* numBlocks
    entitiesMovableList = [True]* numAgents + [False] * numBlocks
    massList = [1.0] * numEntities

    isCollision = IsCollision(getPosFromAgentState)
    rewardWolf = RewardWolf(wolvesID, sheepsID, entitiesSizeList, isCollision)
    punishForOutOfBound = PunishForOutOfBound()
    rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBound)

    rewardFunc = lambda state, action, nextState: \
        list(rewardWolf(state, action, nextState)) + list(rewardSheep(state, action, nextState))

    makePropertyList=MakePropertyList(transferNumberListToStr)

    #changeCollisionReleventParameter
    dmin=parameterDict['dmin']
    dmax=parameterDict['dmax']
    width=parameterDict['width']
    midpoint=parameterDict['midpoint']
    power=parameterDict['power']
    timeconst=parameterDict['timeconst']
    dampratio=parameterDict['dampratio']

    geomIds=[1,2]
    keyNameList=['@solimp','@solref']
    valueList=[[[dmin,dmax,width,midpoint,power],[timeconst,dampratio]]]*len(geomIds)
    collisionParameter=makePropertyList(geomIds,keyNameList,valueList)

#changeSize
    # geomIds=[1,2]
    # keyNameList=['@size']
    # valueList=[[[0.075,0.075]],[[0.1,0.1]]]
    # sizeParameter=makePropertyList(geomIds,keyNameList,valueList)

#load env xml and change some geoms' property
    # physicsDynamicsPath=os.path.join(dirName,'multiAgentcollision.xml')
    physicsDynamicsPath=os.path.join(dirName,'..','..','environment','mujocoEnv','multiAgentcollision.xml')
    with open(physicsDynamicsPath) as f:
        xml_string = f.read()
    envXmlDict = xmltodict.parse(xml_string.strip())
    print(envXmlDict)
    envXmlPropertyDictList=[collisionParameter]
    changeEnvXmlPropertFuntionyList=[changeGeomProperty]
    for propertyDict,changeXmlProperty in zip(envXmlPropertyDictList,changeEnvXmlPropertFuntionyList):
        envXmlDict=changeXmlProperty(envXmlDict,propertyDict)

    envXml=xmltodict.unparse(envXmlDict)

    physicsModel = mujoco.load_model_from_xml(envXml)
    physicsSimulation = mujoco.MjSim(physicsModel)


    # MDP function
    qPosInit = [0, 0]*numAgents
    qVelInit = [0, 0]*numAgents

    qVelInitNoise = 0
    qPosInitNoise = 1

    # reset=ResetUniformWithoutXPos(physicsSimulation, qPosInit, qVelInit, numAgents, numBlocks,qPosInitNoise, qVelInitNoise)

    fixReset=ResetFixWithoutXPos(physicsSimulation, qPosInit, qVelInit, numAgents,numBlocks)
    blocksState=[[0,0,0,0]]
    reset=lambda :fixReset([-0.5,0.8,-0.5,0,0.5,0.8],[0,0,0,0,0,0],blocksState)


    isTerminal = lambda state: False
    numSimulationFrames = 1
    visualize=False
    # physicsViewer=None
    dt=0.1
    damping=2.5
    transit = TransitionFunctionWithoutXPos(physicsSimulation,numAgents , numSimulationFrames,damping*dt/numSimulationFrames,agentsMaxSpeedList,visualize,isTerminal)


    maxRunningSteps = 100
    sampleTrajectory = SampleTrajectory(maxRunningSteps, transit, isTerminal, rewardFunc, reset)
    observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID, blocksID, getPosFromAgentState, getVelFromAgentState)
    observe = lambda state: [observeOneAgent(agentID)(state) for agentID in range(numAgents)]
    initObsForParams = observe(reset())
    obsShape = [initObsForParams[obsID].shape for obsID in range(len(initObsForParams))]

    worldDim = 2
    evalNum=1

    trajectorySaveExtension = '.pickle'
    fixedParameters = {'isMujoco': 1,'isCylinder':1,'evalNum':evalNum}
    trajectoriesSaveDirectory=trajSavePath = os.path.join(dirName,'..','trajectory')
    generateTrajectorySavePath = GetSavePath(trajectoriesSaveDirectory, trajectorySaveExtension, fixedParameters)
    trajectorySavePath = generateTrajectorySavePath(parameterDict)


    if not os.path.isfile(trajectorySavePath):
        trajList =list()

        for i in range(evalNum):

            # policy =lambda state: [[-3,0] for agent in range(numAgents)]
            np.random.seed(i)
            # policy =lambda state: [np.random.uniform(-5,5,2) for agent in range(numAgents)]sss
            # policy =lambda state: [[0,1] for agent in range(numAgents)]
            policy =lambda state: [[1,0] ]
            # policy =lambda state: [[np.random.uniform(0,1,1),0] ,[np.random.uniform(-1,0,1),0] ]
            traj = sampleTrajectory(policy)
            # print(i,'traj',[state[1] for state in traj[:2]])
            # print(traj)
            trajList.append( traj)

        # saveTraj
        saveTraj = True
        if saveTraj:
            # trajSavePath = os.path.join(dirName,'traj', 'evaluateCollision', 'CollisionMoveTransitDamplingCylinder.pickle')

            saveToPickle(trajList, trajectorySavePath)

        # visualize

        # physicsViewer.render()
        visualize = True
        if visualize:
            entitiesColorList = [wolfColor] * numWolves + [sheepColor] * numSheeps + [blockColor] * numBlocks
            render = Render(entitiesSizeList, entitiesColorList, numAgents, getPosFromAgentState)
            render(trajList)
Пример #5
0
def main():
    numAgents = 2
    stateDim = numAgents * 2
    actionLow = -1
    actionHigh = 1
    actionBound = (actionHigh - actionLow) / 2
    actionDim = 2

    buildActorModel = BuildActorModel(stateDim, actionDim, actionBound)
    actorLayerWidths = [64]
    actorWriter, actorModel = buildActorModel(actorLayerWidths)

    buildCriticModel = BuildCriticModel(stateDim, actionDim)
    criticLayerWidths = [64]
    criticWriter, criticModel = buildCriticModel(criticLayerWidths)

    trainCriticBySASRQ = TrainCriticBySASRQ(learningRateCritic, gamma,
                                            criticWriter)
    trainCritic = TrainCritic(actByPolicyTarget, evaluateCriticTarget,
                              trainCriticBySASRQ)

    trainActorFromGradients = TrainActorFromGradients(learningRateActor,
                                                      actorWriter)
    trainActorOneStep = TrainActorOneStep(actByPolicyTrain,
                                          trainActorFromGradients,
                                          getActionGradients)
    trainActor = TrainActor(trainActorOneStep)

    paramUpdateInterval = 1
    updateParameters = UpdateParameters(paramUpdateInterval, tau)

    modelList = [actorModel, criticModel]
    actorModel, criticModel = resetTargetParamToTrainParam(modelList)
    trainModels = TrainDDPGModels(updateParameters, trainActor, trainCritic,
                                  actorModel, criticModel)

    noiseInitVariance = 1
    varianceDiscount = .9995
    getNoise = GetExponentialDecayGaussNoise(noiseInitVariance,
                                             varianceDiscount,
                                             noiseDecayStartStep)
    actOneStepWithNoise = ActDDPGOneStep(actionLow, actionHigh,
                                         actByPolicyTrain, actorModel,
                                         getNoise)

    sampleFromMemory = SampleFromMemory(minibatchSize)
    learnFromBuffer = LearnFromBuffer(learningStartBufferSize,
                                      sampleFromMemory, trainModels)

    sheepId = 0
    wolfId = 1
    getSheepPos = GetAgentPosFromState(sheepId)
    getWolfPos = GetAgentPosFromState(wolfId)

    wolfSpeed = 1
    wolfPolicy = HeatSeekingContinuousDeterministicPolicy(
        getWolfPos, getSheepPos, wolfSpeed)
    # wolfPolicy = lambda state: (0, 0)

    xBoundary = (0, 20)
    yBoundary = (0, 20)
    stayWithinBoundary = StayWithinBoundary(xBoundary, yBoundary)
    physicalTransition = TransitForNoPhysics(getIntendedNextState,
                                             stayWithinBoundary)
    transit = TransitWithSingleWolf(physicalTransition, wolfPolicy)

    sheepAliveBonus = 1 / maxTimeStep
    sheepTerminalPenalty = 20

    killzoneRadius = 1
    isTerminal = IsTerminal(getWolfPos, getSheepPos, killzoneRadius)
    getBoundaryPunishment = GetBoundaryPunishment(xBoundary,
                                                  yBoundary,
                                                  sheepIndex=0,
                                                  punishmentVal=10)
    rewardSheep = RewardFunctionCompete(sheepAliveBonus, sheepTerminalPenalty,
                                        isTerminal)
    getReward = RewardSheepWithBoundaryHeuristics(rewardSheep,
                                                  getIntendedNextState,
                                                  getBoundaryPunishment,
                                                  getSheepPos)
    sampleOneStep = SampleOneStep(transit, getReward)

    runDDPGTimeStep = RunTimeStep(actOneStepWithNoise, sampleOneStep,
                                  learnFromBuffer)

    # reset = Reset(xBoundary, yBoundary, numAgents)
    # reset = lambda: np.array([10, 3, 15, 8]) #all [-1, -1] action
    # reset = lambda: np.array([15, 8, 10, 3]) # all [1. 1.]
    # reset = lambda: np.array([15, 10, 10, 10])
    reset = lambda: np.array([10, 10, 15, 5])

    runEpisode = RunEpisode(reset, runDDPGTimeStep, maxTimeStep, isTerminal)

    ddpg = RunAlgorithm(runEpisode, maxEpisode)

    replayBuffer = deque(maxlen=int(bufferSize))
    meanRewardList, trajectory = ddpg(replayBuffer)

    trainedActorModel, trainedCriticModel = trainModels.getTrainedModels()

    modelIndex = 0
    actorFixedParam = {'actorModel': modelIndex}
    criticFixedParam = {'criticModel': modelIndex}
    parameters = {
        'wolfSpeed': wolfSpeed,
        'dimension': actionDim,
        'maxEpisode': maxEpisode,
        'maxTimeStep': maxTimeStep,
        'minibatchSize': minibatchSize,
        'gamma': gamma,
        'learningRateActor': learningRateActor,
        'learningRateCritic': learningRateCritic
    }

    modelSaveDirectory = "../trainedDDPGModels"
    modelSaveExtension = '.ckpt'
    getActorSavePath = GetSavePath(modelSaveDirectory, modelSaveExtension,
                                   actorFixedParam)
    getCriticSavePath = GetSavePath(modelSaveDirectory, modelSaveExtension,
                                    criticFixedParam)
    savePathActor = getActorSavePath(parameters)
    savePathCritic = getCriticSavePath(parameters)

    with actorModel.as_default():
        saveVariables(trainedActorModel, savePathActor)
    with criticModel.as_default():
        saveVariables(trainedCriticModel, savePathCritic)

    plotResult = True
    if plotResult:
        plt.plot(list(range(maxEpisode)), meanRewardList)
        plt.show()
Пример #6
0
def main():
    statedim = env.observation_space.shape[0]
    actiondim = env.action_space.shape[0]
    actionHigh = env.action_space.high
    actionLow = env.action_space.low
    actionBound = (actionHigh - actionLow)/2
    
    replaybuffer = deque(maxlen=buffersize)
    paramUpdateFrequency = 1
    totalrewards = []
    meanreward = []
    totalreward = []
    
    buildActorModel = BuildActorModel(statedim, actiondim, actionBound)
    actorWriter, actorModel = buildActorModel(actornumberlayers)
    
    buildCriticModel = BuildCriticModel(statedim, actiondim)
    criticWriter, criticModel = buildCriticModel(criticnumberlayers)

    trainCritic = TrainCritic(criticlearningRate, gamma, criticWriter)
    trainActor = TrainActor(actorlearningRate, actorWriter)
    updateParameters = UpdateParameters(tau,paramUpdateFrequency)

    actorModel= ReplaceParameters(actorModel)
    criticModel= ReplaceParameters(criticModel)
    
    trainddpgModels = TrainDDPGModels(updateParameters, trainActor, trainCritic, actorModel, criticModel)
    
    getnoise = GetNoise(noiseDecay,minVar,noiseDacayStep)
    getnoiseaction = GetNoiseAction(actorModel,actionLow, actionHigh)
    learn = Learn(buffersize,batchsize,trainddpgModels,actiondim)
    runtime = 0
    trajectory = []
    noisevar = initnoisevar
    for episode in range(EPISODE):
        state  = env.reset()
        rewards = 0
        for i in range(maxTimeStep):
            env.render()
            noise,noisevar = getnoise(runtime,noisevar)
            noiseaction = getnoiseaction(state,noise)
            nextstate,reward,done,info = env.step(noiseaction)
            learn(replaybuffer,state, noiseaction, nextstate,reward)
            trajectory.append((state, noiseaction, nextstate,reward))
            rewards += reward
            state = nextstate
            runtime += 1
            print(actionHigh,actionLow)
        if i == maxTimeStep-1:
            totalrewards.append(rewards)
            totalreward.append(rewards)
            print('episode: ',episode,'reward:',rewards, 'noisevar',noisevar)
            
        if episode % 100 == 0:
            meanreward.append(np.mean(totalreward))
            print('episode: ',episode,'meanreward:',np.mean(totalreward))
            totalreward = []
    plt.plot(range(EPISODE),totalrewards)
    plt.xlabel('episode')
    plt.ylabel('rewards')
    plt.show()
# save Model
    modelIndex = 0
    actorFixedParam = {'actorModel': modelIndex}
    criticFixedParam = {'criticModel': modelIndex}
    parameters = {'env': Env_name, 'Eps': EPISODE,  'batchsize': batchsize,'buffersize': buffersize,'maxTimeStep':maxTimeStep,
                  'gamma': gamma, 'actorlearningRate': actorlearningRate, 'criticlearningRate': criticlearningRate,
                  'tau': tau, 'noiseDecay': noiseDecay, 'minVar': minVar, 'initnoisevar': initnoisevar}

    modelSaveDirectory = "/path/to/logs/trainedDDPGModels"
    modelSaveExtension = '.ckpt'
    getSavePath = GetSavePath(modelSaveDirectory, modelSaveExtension, actorFixedParam)
    getSavePath = GetSavePath(modelSaveDirectory, modelSaveExtension, criticFixedParam)
    savePathDQN = getSavePath(parameters)

    with actorModel.as_default():
        saveVariables(actorModel, savePathDQN)
    with criticModel.as_default():
        saveVariables(criticModel, savePathDQN)
        
    dirName = os.path.dirname(__file__)
    trajectoryPath = os.path.join(dirName,'trajectory', 'HopperTrajectory.pickle')
    saveToPickle(trajectory, trajectoryPath)
def main():
    manipulatedVariables = OrderedDict()
    #solimp
    manipulatedVariables['dmin'] = [0.9]
    manipulatedVariables['dmax'] = [0.9999]
    manipulatedVariables['width'] = [0.001]
    manipulatedVariables['midpoint'] = [0.5]#useless
    manipulatedVariables['power'] = [1]#useless
    #solref
    manipulatedVariables['timeconst'] = [0.02,0.4]#[0.1,0.2,0.4,0.6]
    manipulatedVariables['dampratio'] = [0.5]


   # timeconst: 0.1-0.2
   # dmin: 0.5-dimax
   # dmax:0.9-0.9999
   # dampratio:0.3-0.8


    productedValues = it.product(*[[(key, value) for value in values] for key, values in manipulatedVariables.items()])
    parametersAllCondtion = [dict(list(specificValueParameter)) for specificValueParameter in productedValues]

    for parameterOneCondiiton in parametersAllCondtion:
        print(parameterOneCondiiton)
        simuliateOneParameter(parameterOneCondiiton)


    levelNames = list(manipulatedVariables.keys())
    levelValues = list(manipulatedVariables.values())
    modelIndex = pd.MultiIndex.from_product(levelValues, names=levelNames)
    toSplitFrame = pd.DataFrame(index=modelIndex)
    # save evaluation trajectories
    dirName = os.path.dirname(__file__)
    dataFolderName=os.path.join(dirName,'..')
    trajectoryDirectory = os.path.join(dataFolderName, 'trajectory')
    if not os.path.exists(trajectoryDirectory):
        os.makedirs(trajectoryDirectory)




    # originEnvTrajSavePath =os.path.join(dirName,'traj','evaluateCollision','collisionMoveOriginBaseLine.pickle')
    originEnvTrajSavePath =os.path.join(dirName,'..','trajectory','collisionMoveOriginBaseLine.pickle')
    originTraj=loadFromPickle(originEnvTrajSavePath)[0]
    originTrajdata=[[],[],[],[]]

    originTrajdata[0]=[s[0][0][:2] for s in originTraj]
    originTrajdata[1]=[s[0][1][:2] for s in originTraj]
    originTrajdata[2]=[s[0][0][2:] for s in originTraj]
    originTrajdata[3]=[s[0][1][2:] for s in originTraj]
    originTraj=loadFromPickle(originEnvTrajSavePath)

    trajectoryExtension = '.pickle'
    fixedParameters = {'isMujoco': 1,'isCylinder':1}

    generateTrajectorySavePath = GetSavePath(trajectoryDirectory, trajectoryExtension, fixedParameters)

    getparameter= GetSavePath('', '', {})
    def parseTraj(traj):
        x=[s[0] for s in traj]
        y=[s[1] for s in traj]
        return x,y
    for i, parameterOneCondiiton in enumerate(parametersAllCondtion):
        mujocoTrajdata=[[],[],[],[]]

        trajectorySavePath = generateTrajectorySavePath(parameterOneCondiiton)
        mujocoTraj=loadFromPickle(trajectorySavePath)
        mujocoTrajdata[0]=[s[0][0][:2] for s in mujocoTraj]
        mujocoTrajdata[1]=[s[0][1][:2] for s in mujocoTraj]
        mujocoTrajdata[2]=[s[0][0][2:] for s in mujocoTraj]
        mujocoTrajdata[3]=[s[0][1][2:] for s in mujocoTraj]
        fig = plt.figure(i)
        numRows = 2
        numColumns = 2
        plotCounterNum = numRows*numColumns
        subName=['wolf0Pos(vs.Sheep)','wolf1Pos(vs.None)','wolf0Vel(vs.Sheep)','wolf1Vel(vs.None)']
        for plotCounter in range(plotCounterNum):

            axForDraw = fig.add_subplot(numRows, numColumns, plotCounter+1)


            axForDraw.set_title(subName[plotCounter])
            # axForDraw.set_ylim(-0.5, 1)
            # axForDraw.set_xlim(-1, 1)
            originX,originY=parseTraj(originTrajdata[plotCounter])
            mujocoX,mujocoY=parseTraj(mujocoTrajdata[plotCounter])

            plt.scatter(range(len(originX)),originX,s=5,c='red',marker='x',label='origin')
            plt.scatter(range(len(mujocoX)),mujocoX,s=5,c='blue',marker='v',label='mujoco')

        numSimulationFrames=10
        plt.suptitle(getparameter(parameterOneCondiiton)+'numSimulationFrames={}'.format(numSimulationFrames))
        # plt.suptitle('OriginEnv')
        plt.legend(loc='best')

    plt.show()
Пример #8
0
def main():
    manipulatedVariables = OrderedDict()
    #solimp
    # manipulatedVariables['dmin'] = [0.9]
    # manipulatedVariables['dmax'] = [0.9999]
    # manipulatedVariables['width'] = [0.001]
    # manipulatedVariables['midpoint'] = [0.5]#useless
    # manipulatedVariables['power'] = [1]#useless
    #solref
    manipulatedVariables['timeconst'] = [0.5]
    manipulatedVariables['dampratio'] = [0.2]
    manipulatedVariables['damping'] = [0.25, 2.5, 5]
    eavluateParmetersList = ['dampratio', 'damping']
    lineParameter = ['timeconst']
    prametersToDrop = list(
        set(manipulatedVariables.keys()) -
        set(eavluateParmetersList + lineParameter))
    print(prametersToDrop, manipulatedVariables)

    # timeconst: 0.1-0.2
    # dmin: 0.5-dimax
    # dmax:0.9-0.9999
    # dampratio:0.3-0.8
    productedValues = it.product(
        *[[(key, value) for value in values]
          for key, values in manipulatedVariables.items()])
    parametersAllCondtion = [
        dict(list(specificValueParameter))
        for specificValueParameter in productedValues
    ]

    evalNum = 1
    randomSeed = 1542
    dt = 0.02  #0.05

    dirName = os.path.dirname(__file__)
    dataFolderName = os.path.join(dirName, '..')
    trajectoryDirectory = os.path.join(dataFolderName, 'trajectory',
                                       'Linedt={}'.format(dt))
    if not os.path.exists(trajectoryDirectory):
        os.makedirs(trajectoryDirectory)

    for parameterOneCondiiton in parametersAllCondtion:
        # print(parameterOneCondiiton,evalNum,randomSeed)
        simuliateOneParameter(parameterOneCondiiton, evalNum, randomSeed, dt)

    levelNames = list(manipulatedVariables.keys())
    levelValues = list(manipulatedVariables.values())
    modelIndex = pd.MultiIndex.from_product(levelValues, names=levelNames)
    toSplitFrame = pd.DataFrame(index=modelIndex)
    # save evaluation trajectories

    trajectoryExtension = '.pickle'
    trajectoryFixedParameters = {
        'isMujoco': 1,
        'isCylinder': 1,
        'evalNum': evalNum,
        'randomSeed': randomSeed
    }

    generateTrajectorySavePath = GetSavePath(trajectoryDirectory,
                                             trajectoryExtension,
                                             trajectoryFixedParameters)

    originEnvTrajSavePath = os.path.join(dirName, '..', 'trajectory',
                                         'LineMoveOriginBaseLine.pickle')
    originTraj = loadFromPickle(originEnvTrajSavePath)[0]
    originTrajdata = [[], [], [], []]

    originTrajdata[0] = [s[0][0][:2] for s in originTraj]
    originTrajdata[1] = [s[0][0][2:] for s in originTraj]
    originTraj = loadFromPickle(originEnvTrajSavePath)

    fixedOriginEnvTrajSavePath = os.path.join(dirName, '..', 'trajectory',
                                              'LineMoveFixedBaseLine.pickle')
    fixedOriginTraj = loadFromPickle(fixedOriginEnvTrajSavePath)[0]
    fixedOriginTrajdata = [[], [], [], []]

    fixedOriginTrajdata[0] = [s[0][0][:2] for s in fixedOriginTraj]
    fixedOriginTrajdata[1] = [s[0][0][2:] for s in fixedOriginTraj]
    fixedOriginTraj = loadFromPickle(originEnvTrajSavePath)

    # trajectoryExtension = '.pickle'
    # fixedParameters = {'isMujoco': 1,'isCylinder':1}

    # generateTrajectorySavePath = GetSavePath(trajectoryDirectory, trajectoryExtension, fixedParameters)

    getparameter = GetSavePath('', '', {})

    def parseTraj(traj):
        # print(traj)
        x = [s[0] for s in traj]
        y = [s[1] for s in traj]
        return x, y

    for i, parameterOneCondiiton in enumerate(parametersAllCondtion):
        mujocoTrajdata = [[], [], [], []]

        trajectorySavePath = generateTrajectorySavePath(parameterOneCondiiton)
        mujocoTraj = loadFromPickle(trajectorySavePath)
        mujocoTrajdata[0] = [s[0][0][:2] for s in mujocoTraj]
        # mujocoTrajdata[1]=[s[0][1][:2] for s in mujocoTraj]
        mujocoTrajdata[1] = [s[0][0][2:] for s in mujocoTraj]
        # mujocoTrajdata[3]=[s[0][1][2:] for s in mujocoTraj]
        fig = plt.figure(i)
        numRows = 1
        numColumns = 2
        plotCounterNum = numRows * numColumns
        subName = ['Pos', 'Vel']
        for plotCounter in range(plotCounterNum):

            axForDraw = fig.add_subplot(numRows, numColumns, plotCounter + 1)

            axForDraw.set_title(subName[plotCounter])
            # axForDraw.set_ylim(-0.5, 1)
            # axForDraw.set_xlim(-1, 1)
            originX, originY = parseTraj(originTrajdata[plotCounter])
            mujocoX, mujocoY = parseTraj(mujocoTrajdata[plotCounter])
            fixedOriginX, fixedOriginY = parseTraj(
                fixedOriginTrajdata[plotCounter])
            plt.scatter(range(len(originX)),
                        originX,
                        s=5,
                        c='red',
                        marker='x',
                        label='origin')
            plt.scatter(range(len(fixedOriginX)),
                        fixedOriginX,
                        s=5,
                        c='green',
                        marker='x',
                        label='fixedOrigin')
            # print(mujocoX)s
            plt.scatter(range(len(mujocoX)),
                        mujocoX,
                        s=5,
                        c='blue',
                        marker='v',
                        label='mujoco')

        numSimulationFrames = int(0.1 / dt)
        plt.suptitle(
            getparameter(parameterOneCondiiton) +
            'numSimulationFrames={}'.format(numSimulationFrames))
        # plt.suptitle('OriginEnv')
        plt.legend(loc='best')

    plt.show()
def main():
    stateDim = env.observation_space.shape[0]
    actionDim = env.action_space.shape[0]
    actionHigh = env.action_space.high
    actionLow = env.action_space.low
    actionBound = (actionHigh - actionLow) / 2

    buildActorModel = BuildActorModel(stateDim, actionDim, actionBound)
    actorLayerWidths = [30]
    actorWriter, actorModel = buildActorModel(actorLayerWidths)

    buildCriticModel = BuildCriticModel(stateDim, actionDim)
    criticLayerWidths = [30]
    criticWriter, criticModel = buildCriticModel(criticLayerWidths)

    trainCriticBySASRQ = TrainCriticBySASRQ(learningRateCritic, gamma,
                                            criticWriter)
    trainCritic = TrainCritic(actByPolicyTarget, evaluateCriticTarget,
                              trainCriticBySASRQ)

    trainActorFromGradients = TrainActorFromGradients(learningRateActor,
                                                      actorWriter)
    trainActorOneStep = TrainActorOneStep(actByPolicyTrain,
                                          trainActorFromGradients,
                                          getActionGradients)
    trainActor = TrainActor(trainActorOneStep)

    paramUpdateInterval = 1
    updateParameters = UpdateParameters(paramUpdateInterval, tau)

    modelList = [actorModel, criticModel]
    actorModel, criticModel = resetTargetParamToTrainParam(modelList)
    trainModels = TrainDDPGModels(updateParameters, trainActor, trainCritic,
                                  actorModel, criticModel)

    noiseInitVariance = 1  # control exploration
    varianceDiscount = .99995
    noiseDecayStartStep = bufferSize
    minVar = .1
    getNoise = GetExponentialDecayGaussNoise(noiseInitVariance,
                                             varianceDiscount,
                                             noiseDecayStartStep, minVar)
    actOneStepWithNoise = ActDDPGOneStep(actionLow, actionHigh,
                                         actByPolicyTrain, actorModel,
                                         getNoise)

    learningStartBufferSize = minibatchSize
    sampleFromMemory = SampleFromMemory(minibatchSize)
    learnFromBuffer = LearnFromBuffer(learningStartBufferSize,
                                      sampleFromMemory, trainModels)

    transit = TransitGymMountCarContinuous()
    isTerminal = IsTerminalMountCarContin()
    getReward = RewardMountCarContin(isTerminal)
    sampleOneStep = SampleOneStep(transit, getReward)

    runDDPGTimeStep = RunTimeStep(actOneStepWithNoise, sampleOneStep,
                                  learnFromBuffer)

    resetLow = -1
    resetHigh = 0.4
    reset = ResetMountCarContin(seed=None)
    runEpisode = RunEpisode(reset, runDDPGTimeStep, maxTimeStep, isTerminal)

    ddpg = RunAlgorithm(runEpisode, maxEpisode)
    replayBuffer = deque(maxlen=int(bufferSize))
    meanRewardList, trajectory = ddpg(replayBuffer)

    trainedActorModel, trainedCriticModel = trainModels.getTrainedModels()

    # save Model
    modelIndex = 0
    actorFixedParam = {'actorModel': modelIndex}
    criticFixedParam = {'criticModel': modelIndex}
    parameters = {
        'env': ENV_NAME,
        'Eps': maxEpisode,
        'timeStep': maxTimeStep,
        'batch': minibatchSize,
        'gam': gamma,
        'lrActor': learningRateActor,
        'lrCritic': learningRateCritic,
        'noiseVar': noiseInitVariance,
        'varDiscout': varianceDiscount,
        'resetLow': resetLow,
        'High': resetHigh
    }

    modelSaveDirectory = "../trainedDDPGModels"
    modelSaveExtension = '.ckpt'
    getActorSavePath = GetSavePath(modelSaveDirectory, modelSaveExtension,
                                   actorFixedParam)
    getCriticSavePath = GetSavePath(modelSaveDirectory, modelSaveExtension,
                                    criticFixedParam)
    savePathActor = getActorSavePath(parameters)
    savePathCritic = getCriticSavePath(parameters)

    with actorModel.as_default():
        saveVariables(trainedActorModel, savePathActor)
    with criticModel.as_default():
        saveVariables(trainedCriticModel, savePathCritic)

    dirName = os.path.dirname(__file__)
    trajectoryPath = os.path.join(dirName, '..', 'trajectory',
                                  'mountCarTrajectoryOriginalReset1.pickle')
    saveToPickle(trajectory, trajectoryPath)

    # plots& plot
    showDemo = False
    if showDemo:
        visualize = VisualizeMountCarContin()
        visualize(trajectory)

    plotResult = True
    if plotResult:
        plt.plot(list(range(maxEpisode)), meanRewardList)
        plt.show()
Пример #10
0
def simuliateOneParameter(parameterOneCondiiton, evalNum, randomSeed):
    saveTraj = True
    visualizeTraj = False
    visualizeMujoco = False

    numWolves = parameterOneCondiiton['numWolves']
    numSheeps = parameterOneCondiiton['numSheeps']
    numBlocks = parameterOneCondiiton['numBlocks']
    timeconst = parameterOneCondiiton['timeconst']
    dampratio = parameterOneCondiiton['dampratio']
    maxTimeStep = parameterOneCondiiton['maxTimeStep']
    sheepSpeedMultiplier = parameterOneCondiiton['sheepSpeedMultiplier']
    individualRewardWolf = parameterOneCondiiton['individualRewardWolf']
    hasWalls = parameterOneCondiiton['hasWalls']
    dt = parameterOneCondiiton['dt']

    print(
        "maddpg: {} wolves, {} sheep, {} blocks, saveTraj: {}, visualize: {}".
        format(numWolves, numSheeps, numBlocks, str(saveTraj),
               str(visualizeTraj)))

    numAgents = numWolves + numSheeps
    numEntities = numAgents + numBlocks
    wolvesID = list(range(numWolves))
    sheepsID = list(range(numWolves, numAgents))
    blocksID = list(range(numAgents, numEntities))

    wolfSize = 0.075
    sheepSize = 0.05
    blockSize = 0.2
    entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheeps + [
        blockSize
    ] * numBlocks

    wolfMaxSpeed = 1.0
    blockMaxSpeed = None
    sheepMaxSpeedOriginal = 1.3
    sheepMaxSpeed = sheepMaxSpeedOriginal * sheepSpeedMultiplier
    entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [
        sheepMaxSpeed
    ] * numSheeps + [blockMaxSpeed] * numBlocks

    entitiesMovableList = [True] * numAgents + [False] * numBlocks
    massList = [1.0] * numEntities

    isCollision = IsCollision(getPosFromAgentState)
    punishForOutOfBound = PunishForOutOfBound()
    rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList,
                              getPosFromAgentState, isCollision,
                              punishForOutOfBound)

    if individualRewardWolf:
        rewardWolf = RewardWolfIndividual(wolvesID, sheepsID, entitiesSizeList,
                                          isCollision)
    else:
        rewardWolf = RewardWolf(wolvesID, sheepsID, entitiesSizeList,
                                isCollision)

    rewardFunc = lambda state, action, nextState: list(
        rewardWolf(state, action, nextState)) + list(
            rewardSheep(state, action, nextState))
    dirName = os.path.dirname(__file__)

    if hasWalls:

        physicsDynamicsPath = os.path.join(
            dirName, '..', '..', 'environment', 'mujocoEnv',
            'dt={}'.format(dt),
            'hasWalls={}_numBlocks={}_numSheeps={}_numWolves={}timeconst={}dampratio={}.xml'
            .format(hasWalls, numBlocks, numSheeps, numWolves, timeconst,
                    dampratio))

    else:
        physicsDynamicsPath = os.path.join(
            dirName, '..', '..', 'environment', 'mujocoEnv',
            'numBlocks={}_numSheeps={}_numWolves={}timeconst={}dampratio={}.xml'
            .format(numBlocks, numSheeps, numWolves, timeconst, dampratio))

    with open(physicsDynamicsPath) as f:
        xml_string = f.read()
    envXmlDict = xmltodict.parse(xml_string.strip())
    envXml = xmltodict.unparse(envXmlDict)

    physicsModel = mujoco.load_model_from_xml(envXml)
    physicsSimulation = mujoco.MjSim(physicsModel)
    # print(physicsSimulation.model.body_pos)

    qPosInit = [0, 0] * numAgents
    qVelInit = [0, 0] * numAgents
    qVelInitNoise = 0 * hasWalls
    qPosInitNoise = 0.8 * hasWalls
    getBlockRandomPos = lambda: np.random.uniform(-0.7 * hasWalls, +0.7 *
                                                  hasWalls, 2)
    getBlockSpeed = lambda: np.zeros(2)

    numQPos = len(physicsSimulation.data.qpos)
    numQVel = len(physicsSimulation.data.qvel)

    sampleAgentsQPos = lambda: np.asarray(qPosInit) + np.random.uniform(
        low=-qPosInitNoise, high=qPosInitNoise, size=numQPos)
    sampleAgentsQVel = lambda: np.asarray(qVelInit) + np.random.uniform(
        low=-qVelInitNoise, high=qVelInitNoise, size=numQVel)

    minDistance = 0.2 + 2 * blockSize
    isOverlap = IsOverlap(minDistance)

    sampleBlockState = SampleBlockState(numBlocks, getBlockRandomPos,
                                        getBlockSpeed, isOverlap)

    reset = ResetUniformWithoutXPos(physicsSimulation, numAgents, numBlocks,
                                    sampleAgentsQPos, sampleAgentsQVel,
                                    sampleBlockState)

    damping = 2.5
    numSimulationFrames = int(0.1 / dt)
    agentsMaxSpeedList = [wolfMaxSpeed] * numWolves + [sheepMaxSpeed
                                                       ] * numSheeps
    reshapeAction = ReshapeAction()
    isTerminal = lambda state: False
    transit = TransitionFunctionWithoutXPos(physicsSimulation, numAgents,
                                            numSimulationFrames,
                                            damping * dt * numSimulationFrames,
                                            agentsMaxSpeedList,
                                            visualizeMujoco, isTerminal,
                                            reshapeAction)

    maxRunningStepsToSample = 100
    sampleTrajectory = SampleTrajectory(maxRunningStepsToSample, transit,
                                        isTerminal, rewardFunc, reset)

    observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID,
                                              blocksID, getPosFromAgentState,
                                              getVelFromAgentState)
    observe = lambda state: [
        observeOneAgent(agentID)(state) for agentID in range(numAgents)
    ]

    initObsForParams = observe(reset())
    obsShape = [
        initObsForParams[obsID].shape[0]
        for obsID in range(len(initObsForParams))
    ]

    worldDim = 2
    actionDim = worldDim * 2 + 1

    layerWidth = [128, 128]

    # ------------ model ------------------------
    buildMADDPGModels = BuildMADDPGModels(actionDim, numAgents, obsShape)
    modelsList = [
        buildMADDPGModels(layerWidth, agentID) for agentID in range(numAgents)
    ]

    if hasWalls:

        modelFolder = os.path.join(
            dirName, '..', 'trainedModels', 'mujocoMADDPGeavluateWall',
            'dt={}'.format(dt),
            'hasWalls=' + str(hasWalls) + 'numBlocks=' + str(numBlocks) +
            'numSheeps=' + str(numSheeps) + 'numWolves=' + str(numWolves) +
            'timeconst=' + str(timeconst) + 'dampratio=' + str(dampratio) +
            'individualRewardWolf=' + str(individualRewardWolf) +
            'sheepSpeedMultiplier=' + str(sheepSpeedMultiplier))
        individStr = 'individ' if individualRewardWolf else 'shared'
        fileName = "maddpghasWalls={}{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}{}_agent".format(
            hasWalls, numWolves, numSheeps, numBlocks, maxEpisode, maxTimeStep,
            sheepSpeedMultiplier, individStr)

    else:
        modelFolder = os.path.join(
            dirName, '..', 'trainedModels', 'mujocoMADDPG',
            'numBlocks=' + str(numBlocks) + 'numSheeps=' + str(numSheeps) +
            'numWolves=' + str(numWolves) + 'timeconst=' + str(timeconst) +
            'dampratio=' + str(dampratio))
        fileName = "maddpg{}wolves{}sheep{}blocks60000episodes25stepSheepSpeed1.0shared_agent".format(
            numWolves, numSheeps, numBlocks)
    modelPaths = [
        os.path.join(modelFolder, fileName + str(i) + '60000eps')
        for i in range(numAgents)
    ]

    [
        restoreVariables(model, path)
        for model, path in zip(modelsList, modelPaths)
    ]

    actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy)
    policy = lambda allAgentsStates: [
        actOneStepOneModel(model, observe(allAgentsStates))
        for model in modelsList
    ]

    startTime = time.time()
    trajList = []
    for i in range(evalNum):
        np.random.seed(i)
        traj = sampleTrajectory(policy)
        trajList.append(list(traj))
    endTime = time.time()
    print("Time taken {} seconds to generate {} trajectories".format(
        (endTime - startTime), evalNum))

    # saveTraj
    if saveTraj:

        trajectoryFolder = os.path.join(dirName, '..', 'trajectory',
                                        'evluateWall')
        if not os.path.exists(trajectoryFolder):
            os.makedirs(trajectoryFolder)

        trajectorySaveExtension = '.pickle'
        fixedParameters = {'randomSeed': randomSeed, 'evalNum': evalNum}
        generateTrajectorySavePath = GetSavePath(trajectoryFolder,
                                                 trajectorySaveExtension,
                                                 fixedParameters)
        trajectorySavePath = generateTrajectorySavePath(parameterOneCondiiton)

        saveToPickle(trajList, trajectorySavePath)

    # visualize
    if visualizeTraj:
        entitiesColorList = [wolfColor] * numWolves + [
            sheepColor
        ] * numSheeps + [blockColor] * numBlocks
        render = Render(entitiesSizeList, entitiesColorList, numAgents,
                        getPosFromAgentState)
        print(trajList[0][0], '!!!3', trajList[0][1])
        trajToRender = np.concatenate(trajList)
        render(trajToRender)

    return endTime - startTime
Пример #11
0
def main():

    manipulatedVariables = OrderedDict()

    manipulatedVariables['numWolves'] = [3]
    manipulatedVariables['numSheeps'] = [1]
    manipulatedVariables['numBlocks'] = [2]
    manipulatedVariables['maxTimeStep'] = [25]
    manipulatedVariables['sheepSpeedMultiplier'] = [1.0]
    manipulatedVariables['individualRewardWolf'] = [0]
    manipulatedVariables['timeconst'] = [0.5]
    manipulatedVariables['dampratio'] = [0.2]
    manipulatedVariables['hasWalls'] = [1.0, 1.5, 2.0]
    manipulatedVariables['dt'] = [0.05]

    eavluateParmetersList = ['hasWalls', 'dt']
    lineParameter = ['sheepSpeedMultiplier']
    prametersToDrop = list(
        set(manipulatedVariables.keys()) -
        set(eavluateParmetersList + lineParameter))

    print(prametersToDrop, manipulatedVariables)

    productedValues = it.product(
        *[[(key, value) for value in values]
          for key, values in manipulatedVariables.items()])
    parametersAllCondtion = [
        dict(list(specificValueParameter))
        for specificValueParameter in productedValues
    ]

    evalNum = 500
    randomSeed = 1542
    dt = 0.05  #0.05

    dirName = os.path.dirname(__file__)

    trajectoryDirectory = os.path.join(dirName, '..', 'trajectory',
                                       'evluateWall')
    if not os.path.exists(trajectoryDirectory):
        os.makedirs(trajectoryDirectory)
    time = []
    for parameterOneCondiiton in parametersAllCondtion:
        # print(parameterOneCondiiton,evalNum,randomSeed)

        sampletime = simuliateOneParameter(parameterOneCondiiton, evalNum,
                                           randomSeed)
        time.append(sampletime)

    levelNames = list(manipulatedVariables.keys())
    levelValues = list(manipulatedVariables.values())
    modelIndex = pd.MultiIndex.from_product(levelValues, names=levelNames)
    toSplitFrame = pd.DataFrame(index=modelIndex)
    # save evaluation trajectories

    trajectoryExtension = '.pickle'
    trajectoryFixedParameters = {'randomSeed': randomSeed, 'evalNum': evalNum}

    getTrajectorySavePath = GetSavePath(trajectoryDirectory,
                                        trajectoryExtension,
                                        trajectoryFixedParameters)
    getTrajectorySavePathFromDf = lambda df: getTrajectorySavePath(
        readParametersFromDf(df))

    # compute statistics on the trajectories

    fuzzySearchParameterNames = []
    loadTrajectories = LoadTrajectories(getTrajectorySavePath, loadFromPickle,
                                        fuzzySearchParameterNames)
    loadTrajectoriesFromDf = lambda df: loadTrajectories(
        readParametersFromDf(df))

    class CheckStateOfofBound(object):
        """docstring for ClassName"""
        def __init__(self, minDistance):
            self.minDistance = minDistance

        def __call__(self, state):
            absloc = np.abs([agent[:2] for agent in state[0][:-2]])
            return np.any(absloc > self.minDistance)

    class ComputeOutOfBounds:
        def __init__(self, getTrajectories, CheckStateOfofBound):
            self.getTrajectories = getTrajectories
            self.CheckStateOfofBound = CheckStateOfofBound

        def __call__(self, oneConditionDf):
            allTrajectories = self.getTrajectories(oneConditionDf)
            minDistance = readParametersFromDf(
                oneConditionDf)['hasWalls'] + 0.05

            checkStateOfofBound = CheckStateOfofBound(minDistance)
            # allMeasurements = np.array(self.measurementFunction(allTrajectories,minDistance))
            # allMeasurements = np.array(self.measurementFunction(allTrajectories,minDistance))s
            allMeasurements = np.array([
                checkStateOfofBound(state) for traj in allTrajectories
                for state in traj
            ])
            # print(allMeasurements)
            measurementMean = np.mean(allMeasurements)
            measurementStd = np.std(allMeasurements)
            return pd.Series({'mean': measurementMean, 'std': measurementStd})

    computeStatistics = ComputeOutOfBounds(loadTrajectoriesFromDf,
                                           CheckStateOfofBound)
    statisticsDf = toSplitFrame.groupby(levelNames).apply(computeStatistics)
    print(statisticsDf)

    for i, parameterOneCondiiton in enumerate(parametersAllCondtion):
        print(parameterOneCondiiton, time[i])
def main():


    manipulatedVariables = OrderedDict()


    manipulatedVariables['damping'] = [0.0]
    manipulatedVariables['frictionloss'] = [0.0]
    manipulatedVariables['masterForce']=[0.0]

    eavluateParmetersList=['frictionloss','damping']
    lineParameter=['masterForce']
    prametersToDrop=list(set(manipulatedVariables.keys())-set(eavluateParmetersList+lineParameter))


    productedValues = it.product(*[[(key, value) for value in values] for key, values in manipulatedVariables.items()])
    conditions = [dict(list(specificValueParameter)) for specificValueParameter in productedValues]
    evalNum=500
    randomSeed=133




    for condition in conditions:
        print(condition)

        generateSingleCondition(condition,evalNum,randomSeed)


    levelNames = list(manipulatedVariables.keys())
    levelValues = list(manipulatedVariables.values())
    modelIndex = pd.MultiIndex.from_product(levelValues, names=levelNames)
    toSplitFrame = pd.DataFrame(index=modelIndex)
    # save evaluation trajectories




    trajectoriesSaveDirectory=os.path.join(dirName,'..', 'trajectory','evluateRopeFixedEnv2')
    if not os.path.exists(trajectoriesSaveDirectory):
        os.makedirs(trajectoriesSaveDirectory)
    trajectoryExtension = '.pickle'
    trajectoryFixedParameters = { 'randomSeed':randomSeed,'evalNum':evalNum}


    getTrajectorySavePath = GetSavePath(trajectoriesSaveDirectory, trajectoryExtension, trajectoryFixedParameters)
    getTrajectorySavePathFromDf = lambda df: getTrajectorySavePath(readParametersFromDf(df))

    # compute statistics on the trajectories


    fuzzySearchParameterNames = []
    loadTrajectories = LoadTrajectories(getTrajectorySavePath, loadFromPickle, fuzzySearchParameterNames)
    loadTrajectoriesFromDf = lambda df: loadTrajectories(readParametersFromDf(df))
    def computeV(traj):
        # [print(s1[0][0]-s2[0][0],np.linalg.norm(s1[0][0][2:]-s2[0][0][2:]),np.linalg.norm(s1[0][0][:2]-s2[0][0][:2])) for s1,s2 in zip (baselineTraj,traj) ]
        vel=[np.linalg.norm(agentState[2:]) for state in traj for agentState in state[0]]
        # print (vel)
        return np.mean(vel)

    measurementFunction = computeV
    computeStatistics = ComputeStatistics(loadTrajectoriesFromDf, measurementFunction)
    statisticsDf = toSplitFrame.groupby(levelNames).apply(computeStatistics)
    print(statisticsDf)


    darwStatisticsDf=DarwStatisticsDf(manipulatedVariables,eavluateParmetersList,lineParameter)
    subtitle='velocity'
    figIndex=0
    ylimMax=1.2
    darwStatisticsDf(statisticsDf,subtitle,figIndex,ylimMax)



    class fliterMeasurement():
        """docstring for fliterMeasurement"""
        def __init__(self, splitLength,splitMeasurement):
            self.splitLength = splitLength
            self.splitMeasurement = splitMeasurement
        def __call__(self,traj):
            [splitMeasurement(traj[i:i+self.splitLength]) for i in range(len(traj)-self.splitLength) ]

    getWolfPos=lambda state :getPosFromAgentState(state[0][0])
    getSheepfPos=lambda state :getPosFromAgentState(state[0][1])
    minDistance=0.35
    isCaught=IsTerminal(minDistance,getWolfPos,getSheepfPos)
    measurementFunction2 =lambda traj: np.mean([isCaught(state) for state in traj])

    computeStatistics = ComputeStatistics(loadTrajectoriesFromDf, measurementFunction2)
    statisticsDf2 = toSplitFrame.groupby(levelNames).apply(computeStatistics)
    print(statisticsDf2)
    subtitle='caughtRatio(minDistance={})'.format(minDistance)
    figIndex=figIndex+1
    ylimMax=0.2
    darwStatisticsDf(statisticsDf2,subtitle,figIndex,ylimMax)

    getWolfPos=lambda state :getPosFromAgentState(state[0][0])
    getSheepfPos=lambda state :getPosFromAgentState(state[0][1])
    calculateWolfSheepDistance=lambda state:np.linalg.norm(getWolfPos(state)-getSheepfPos(state))
    measurementFunction3 =lambda traj: np.mean([calculateWolfSheepDistance(state) for state in traj])

    computeStatistics = ComputeStatistics(loadTrajectoriesFromDf, measurementFunction3)
    statisticsDf3 = toSplitFrame.groupby(levelNames).apply(computeStatistics)
    print(statisticsDf3)
    subtitle='WolfSheepDistance'
    figIndex=figIndex+1
    ylimMax=1.6
    darwStatisticsDf(statisticsDf3,subtitle,figIndex,ylimMax)


    getWolfPos=lambda state :getPosFromAgentState(state[0][0])
    getMasterfPos=lambda state :getPosFromAgentState(state[0][2])
    calculateWolfMasterDistance=lambda state:np.linalg.norm(getWolfPos(state)-getMasterfPos(state))
    measurementFunction3 =lambda traj: np.mean([calculateWolfMasterDistance(state) for state in traj])

    computeStatistics = ComputeStatistics(loadTrajectoriesFromDf, measurementFunction3)
    statisticsDf3 = toSplitFrame.groupby(levelNames).apply(computeStatistics)
    print(statisticsDf3)
    subtitle='WolfMasterDistance'
    figIndex=figIndex+1
    ylimMax=0.6
    darwStatisticsDf(statisticsDf3,subtitle,figIndex,ylimMax)



    getWolfPos=lambda state :getPosFromAgentState(state[0][0])
    getSheepfPos=lambda state :getPosFromAgentState(state[0][1])

    getWolfVel=lambda state :getVelFromAgentState(state[0][0])

    def calculateCrossAngle(vel1,vel2):
        vel1complex=complex(vel1[0],vel1[1])
        vel2complex=complex(vel2[0],vel2[1])
        return np.abs(np.angle(vel2complex/vel1complex))/np.pi*180
    calculateDevation= lambda state:calculateCrossAngle(getWolfPos(state)-getSheepfPos(state),getWolfVel(state))

    measurementFunction3 =lambda traj: np.mean([calculateDevation(state) for state in traj])

    computeStatistics = ComputeStatistics(loadTrajectoriesFromDf, measurementFunction3)
    statisticsDf = toSplitFrame.groupby(levelNames).apply(computeStatistics)
    print(statisticsDf)
    subtitle='Devation'
    figIndex=figIndex+1
    darwStatisticsDf(statisticsDf,subtitle,figIndex)

    plt.show()
def generateSingleCondition(condition,evalNum,randomSeed):
    debug = 0
    if debug:


        damping=2.0
        frictionloss=0.0
        masterForce=1.0

        numWolves = 1
        numSheeps = 1
        numMasters = 1
        maxTimeStep = 25

        saveTraj=False
        saveImage=True
        visualizeMujoco=False
        visualizeTraj = True
        makeVideo=True
    else:

        # print(sys.argv)
        # condition = json.loads(sys.argv[1])
        damping = float(condition['damping'])
        frictionloss = float(condition['frictionloss'])
        masterForce = float(condition['masterForce'])

        numWolves = 1
        numSheeps = 1
        numMasters = 1
        maxTimeStep = 25

        saveTraj=True
        saveImage=False
        visualizeMujoco=False
        visualizeTraj = False
        makeVideo=False

    print("maddpg: , saveTraj: {}, visualize: {},damping; {},frictionloss: {}".format( str(saveTraj), str(visualizeMujoco),damping,frictionloss))


    numAgents = numWolves + numSheeps+numMasters
    numEntities = numAgents + numMasters
    wolvesID = [0]
    sheepsID = [1]
    masterID = [2]

    wolfSize = 0.075
    sheepSize = 0.05
    masterSize = 0.075
    entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheeps + [masterSize] * numMasters

    wolfMaxSpeed = 1.0
    blockMaxSpeed = None


    entitiesMovableList = [True] * numAgents + [False] * numMasters
    massList = [1.0] * numEntities

    isCollision = IsCollision(getPosFromAgentState)
    punishForOutOfBound = PunishForOutOfBound()
    rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState, isCollision,punishForOutOfBound)


    rewardWolf = RewardWolf(wolvesID, sheepsID, entitiesSizeList, isCollision)
    rewardMaster= lambda state, action, nextState: [-reward  for reward in rewardWolf(state, action, nextState)]
    rewardFunc = lambda state, action, nextState: \
        list(rewardWolf(state, action, nextState)) + list(rewardSheep(state, action, nextState))+list(rewardMaster(state, action, nextState))

    dirName = os.path.dirname(__file__)
    # physicsDynamicsPath=os.path.join(dirName,'..','..','environment','mujocoEnv','rope','leased.xml')

    makePropertyList=MakePropertyList(transferNumberListToStr)

    geomIds=[1,2,3]
    keyNameList=[0,1]
    valueList=[[damping,damping]]*len(geomIds)
    dampngParameter=makePropertyList(geomIds,keyNameList,valueList)

    changeJointDampingProperty=lambda envDict,geomPropertyDict:changeJointProperty(envDict,geomPropertyDict,'@damping')

    geomIds=[1,2,3]
    keyNameList=[0,1]
    valueList=[[frictionloss,frictionloss]]*len(geomIds)
    frictionlossParameter=makePropertyList(geomIds,keyNameList,valueList)
    changeJointFrictionlossProperty=lambda envDict,geomPropertyDict:changeJointProperty(envDict,geomPropertyDict,'@frictionloss')


    physicsDynamicsPath=os.path.join(dirName,'..','..','environment','mujocoEnv','rope','leasedNew.xml')


    with open(physicsDynamicsPath) as f:
        xml_string = f.read()
    envXmlDict = xmltodict.parse(xml_string.strip())




    envXmlDict = xmltodict.parse(xml_string.strip())
    envXmlPropertyDictList=[dampngParameter,frictionlossParameter]
    changeEnvXmlPropertFuntionyList=[changeJointDampingProperty,changeJointFrictionlossProperty]
    for propertyDict,changeXmlProperty in zip(envXmlPropertyDictList,changeEnvXmlPropertFuntionyList):
        envXmlDict=changeXmlProperty(envXmlDict,propertyDict)





    envXml=xmltodict.unparse(envXmlDict)

    physicsModel = mujoco.load_model_from_xml(envXml)
    physicsSimulation = mujoco.MjSim(physicsModel)
    # print(physicsSimulation.model.body_pos)

    # print(dir(physicsSimulation.model))
    # print(dir(physicsSimulation.data),physicsSimulation.dataphysicsSimulation.data)

    # print(physicsSimulation.data.qpos,dir(physicsSimulation.data.qpos))
    # print(physicsSimulation.data.qpos,dir(physicsSimulation.data.qpos))
    qPosInit = (0, ) * 24
    qVelInit = (0, ) * 24
    qPosInitNoise = 0.4
    qVelInitNoise = 0
    numAgent = 2
    tiedAgentId = [0, 2]
    ropePartIndex = list(range(3, 12))
    maxRopePartLength = 0.06
    reset = ResetUniformWithoutXPosForLeashed(physicsSimulation, qPosInit, qVelInit, numAgent, tiedAgentId,ropePartIndex, maxRopePartLength, qPosInitNoise, qVelInitNoise)
    numSimulationFrames=10
    isTerminal= lambda state: False
    reshapeActionList = [ReshapeAction(5),ReshapeAction(5),ReshapeAction(masterForce)]
    transit=TransitionFunctionWithoutXPos(physicsSimulation, numSimulationFrames, visualizeMujoco,isTerminal, reshapeActionList)

    # damping=2.5
    # numSimulationFrames =int(0.1/dt)
    # agentsMaxSpeedList = [wolfMaxSpeed]* numWolves + [sheepMaxSpeed] * numSheeps
    # reshapeAction = ReshapeAction()
    # isTerminal = lambda state: False
    # transit = TransitionFunctionWithoutXPos(physicsSimulation,numAgents , numSimulationFrames,damping*dt*numSimulationFrames,agentsMaxSpeedList,visualizeMujoco,isTerminal,reshapeAction)


    maxRunningStepsToSample = 100
    sampleTrajectory = SampleTrajectory(maxRunningStepsToSample, transit, isTerminal, rewardFunc, reset)

    observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID, masterID, getPosFromAgentState, getVelFromAgentState)
    observe = lambda state: [observeOneAgent(agentID)(state) for agentID in range(numAgents)]

    initObsForParams = observe(reset())
    obsShape = [initObsForParams[obsID].shape[0] for obsID in range(len(initObsForParams))]

    worldDim = 2
    actionDim = worldDim * 2 + 1

    layerWidth = [128, 128]

    # ------------ model ------------------------
    buildMADDPGModels = BuildMADDPGModels(actionDim, numAgents, obsShape)
    modelsList = [buildMADDPGModels(layerWidth, agentID) for agentID in range(numAgents)]


    modelFolder = os.path.join(dirName, '..', 'trainedModels', 'mujocoMADDPGLeasedFixedEnv2','damping={}_frictionloss={}_masterForce={}'.format(damping,frictionloss,masterForce))
    fileName = "maddpg{}episodes{}step_agent".format(maxEpisode, maxTimeStep)

    modelPaths = [os.path.join(modelFolder,  fileName + str(i) + '60000eps') for i in range(numAgents)]

    [restoreVariables(model, path) for model, path in zip(modelsList, modelPaths)]

    actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy)
    policy = lambda allAgentsStates: [actOneStepOneModel(model, observe(allAgentsStates)) for model in modelsList]


    trajList = []
    # numTrajToSample = 20
    for i in range(evalNum):
        np.random.seed(randomSeed+i)
        traj = sampleTrajectory(policy)
        trajList.append(list(traj))

    # saveTraj
    if saveTraj:
        trajectorySaveExtension = '.pickle'
        fixedParameters = { 'randomSeed':randomSeed,'evalNum':evalNum}
        trajectoriesSaveDirectory=os.path.join(dirName,'..', 'trajectory','evluateRopeFixedEnv2')
        if not os.path.exists(trajectoriesSaveDirectory):
            os.makedirs(trajectoriesSaveDirectory)
        generateTrajectorySavePath = GetSavePath(trajectoriesSaveDirectory, trajectorySaveExtension, fixedParameters)
        trajectorySavePath = generateTrajectorySavePath(condition)
        saveToPickle(trajList, trajectorySavePath)

        # trajFileName = "maddpg{}wolves{}sheep{}blocks{}eps{}stepSheepSpeed{}{}Traj".format(numWolves, numSheeps, numMasters, maxEpisode, maxTimeStep, sheepSpeedMultiplier, individStr)
        # trajSavePath = os.path.join(dirName, '..', 'trajectory', trajFileName)
        # saveToPickle(trajList, trajSavePath)


    # visualize
    if visualizeTraj:
        demoFolder = os.path.join(dirName, '..', 'demos', 'mujocoMADDPGLeased','damping={}_frictionloss={}_masterForce={}'.format(damping,frictionloss,masterForce))

        if not os.path.exists(demoFolder):
            os.makedirs(demoFolder)
        entitiesColorList = [wolfColor] * numWolves + [sheepColor] * numSheeps + [masterColor] * numMasters
        render = Render(entitiesSizeList, entitiesColorList, numAgents,demoFolder,saveImage, getPosFromAgentState)
        # print(trajList[0][0],'!!!3',trajList[0][1])
        trajToRender = np.concatenate(trajList)
        render(trajToRender)
def main():
    stateDim = env.observation_space.shape[0]
    actionDim = 7
    buildModel = BuildModel(stateDim, actionDim)
    layersWidths = [30]
    writer, model = buildModel(layersWidths)

    learningRate = 0.001
    gamma = 0.99
    trainModelBySASRQ = TrainModelBySASRQ(learningRate, gamma, writer)

    paramUpdateInterval = 300
    updateParameters = UpdateParameters(paramUpdateInterval)
    model = resetTargetParamToTrainParam([model])[0]
    trainModels = TrainDQNModel(getTargetQValue, trainModelBySASRQ, updateParameters, model)

    epsilonMax = 0.9
    epsilonIncrease = 0.0001
    epsilonMin = 0
    bufferSize = 10000
    decayStartStep = bufferSize
    getEpsilon = GetEpsilon(epsilonMax, epsilonMin, epsilonIncrease, decayStartStep)

    actGreedyByModel = ActGreedyByModel(getTrainQValue, model)
    actRandom = ActRandom(actionDim)
    actByTrainNetEpsilonGreedy = ActByTrainNetEpsilonGreedy(getEpsilon, actGreedyByModel, actRandom)

    minibatchSize = 128
    learningStartBufferSize = minibatchSize
    sampleFromMemory = SampleFromMemory(minibatchSize)
    learnFromBuffer = LearnFromBuffer(learningStartBufferSize, sampleFromMemory, trainModels)

    processAction = ProcessDiscretePendulumAction(actionDim)
    transit = TransitGymPendulum(processAction)
    getReward = RewardGymPendulum(angle_normalize, processAction)
    sampleOneStep = SampleOneStep(transit, getReward)

    runDQNTimeStep = RunTimeStep(actByTrainNetEpsilonGreedy, sampleOneStep, learnFromBuffer, observe)

    reset = ResetGymPendulum(seed)
    maxTimeStep = 200
    runEpisode = RunEpisode(reset, runDQNTimeStep, maxTimeStep, isTerminalGymPendulum)

    maxEpisode = 400
    dqn = RunAlgorithm(runEpisode, maxEpisode)
    replayBuffer = deque(maxlen=int(bufferSize))
    meanRewardList, trajectory = dqn(replayBuffer)

    trainedModel = trainModels.getTrainedModels()

# save Model
    parameters = {'maxEpisode': maxEpisode, 'maxTimeStep': maxTimeStep, 'minibatchSize': minibatchSize, 'gamma': gamma,
                  'learningRate': learningRate, 'epsilonIncrease': epsilonIncrease , 'epsilonMin': epsilonMin}

    modelSaveDirectory = "../trainedDQNModels"
    modelSaveExtension = '.ckpt'
    getSavePath = GetSavePath(modelSaveDirectory, modelSaveExtension)
    savePath = getSavePath(parameters)

    with trainedModel.as_default():
        saveVariables(trainedModel, savePath)

    dirName = os.path.dirname(__file__)
    trajectoryPath = os.path.join(dirName, '..', 'trajectory', 'pendulumDQNTrajectory.pickle')
    saveToPickle(trajectory, trajectoryPath)

    plotResult = True
    if plotResult:
        plt.plot(list(range(maxEpisode)), meanRewardList)
        plt.show()

    showDemo = False
    if showDemo:
        visualize = VisualizeGymPendulum()
        visualize(trajectory)
def main():
    env = MtCarDiscreteEnvSetup()
    visualize = visualizeMtCarDiscrete()
    reset = resetMtCarDiscrete(1234)
    transition = MtCarDiscreteTransition()
    rewardMtCar = MtCarDiscreteReward()
    isterminal = MtCarDiscreteIsTerminal()

    statesdim = env.observation_space.shape[0]
    actiondim = env.action_space.n
    replaybuffer = deque(maxlen=buffersize)
    runepsilon = initepsilon
    totalrewards = []
    meanreward = []
    trajectory = []
    totalreward = []

    buildmodel = BuildModel(statesdim, actiondim)
    Writer, DQNmodel = buildmodel(numberlayers)
    replaceParameters = ReplaceParameters(replaceiter)
    trainModel = TrainModel(learningRate, gamma, Writer)
    trainDQNmodel = TrainDQNmodel(replaceParameters, trainModel, DQNmodel)
    learn = Learn(buffersize, batchsize, trainDQNmodel, actiondim)

    for episode in range(EPISODE):
        state = reset()
        rewards = 0
        runtime = 0
        while True:
            action = learn.Getaction(DQNmodel, runepsilon, state)
            nextstate = transition(state, action)
            done = isterminal(nextstate)
            reward = rewardMtCar(state, action, nextstate, done)
            learn.ReplayMemory(replaybuffer, state, action, reward, nextstate,
                               done)
            trajectory.append((state, action, reward, nextstate))
            rewards += reward
            state = nextstate
            runtime += 1
            if runtime == 200:
                totalrewards.append(rewards)
                totalreward.append(rewards)
                runtime = 0
                print('episode: ', episode, 'reward:', rewards, 'epsilon:',
                      runepsilon)
                break
            if done:
                totalrewards.append(rewards)
                totalreward.append(rewards)
                print('episode: ', episode, 'reward:', rewards, 'epsilon:',
                      runepsilon)
                break
        runepsilon = epsilonDec(runepsilon, minepsilon, epsilondec)
        if episode % 100 == 0:
            meanreward.append(np.mean(totalreward))
            print('episode: ', episode, 'meanreward:', np.mean(totalreward))
            totalreward = []
    episode = 100 * (np.arange(len(meanreward)))
    plt.plot(episode, meanreward)
    plt.xlabel('episode')
    plt.ylabel('rewards')
    plt.ylim([-200, -50])
    plt.show()

    # save Model
    modelIndex = 0
    DQNFixedParam = {'DQNmodel': modelIndex}
    parameters = {
        'env': ENV_NAME,
        'Eps': EPISODE,
        'batch': batchsize,
        'buffersize': buffersize,
        'gam': gamma,
        'learningRate': learningRate,
        'replaceiter': replaceiter,
        'epsilondec': epsilondec,
        'minepsilon': minepsilon,
        'initepsilon': initepsilon
    }

    modelSaveDirectory = "/path/to/logs/trainedDQNModels"
    modelSaveExtension = '.ckpt'
    getSavePath = GetSavePath(modelSaveDirectory, modelSaveExtension,
                              DQNFixedParam)
    savePathDQN = getSavePath(parameters)

    with DQNmodel.as_default():
        saveVariables(DQNmodel, savePathDQN)

    dirName = os.path.dirname(__file__)
    trajectoryPath = os.path.join(dirName, 'trajectory',
                                  'mountCarTrajectory.pickle')
    saveToPickle(trajectory, trajectoryPath)
def main():
    manipulatedVariables = OrderedDict()
    #solimp
    manipulatedVariables['dmin'] = [0.9]
    manipulatedVariables['dmax'] = [0.9999]
    manipulatedVariables['width'] = [0.001]
    manipulatedVariables['midpoint'] = [0.5]  #useless
    manipulatedVariables['power'] = [1]  #useless
    #solref
    manipulatedVariables['timeconst'] = [
        0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0
    ]
    manipulatedVariables['dampratio'] = [0.2, 0.4, 0.6, 0.8, 1.0]

    eavluateParmetersList = ['dampratio', 'dmax']
    lineParameter = ['timeconst']
    prametersToDrop = list(
        set(manipulatedVariables.keys()) -
        set(eavluateParmetersList + lineParameter))
    print(prametersToDrop, manipulatedVariables)

    # timeconst: 0.1-0.2
    # dmin: 0.5-dimax
    # dmax:0.9-0.9999
    # dampratio:0.3-0.8
    productedValues = it.product(
        *[[(key, value) for value in values]
          for key, values in manipulatedVariables.items()])
    parametersAllCondtion = [
        dict(list(specificValueParameter))
        for specificValueParameter in productedValues
    ]

    evalNum = 500
    randomSeed = 1542
    dt = 0.05  #0.05

    dirName = os.path.dirname(__file__)
    dataFolderName = os.path.join(dirName, '..')
    trajectoryDirectory = os.path.join(dataFolderName, 'trajectory',
                                       'variousCollsiondt={}'.format(dt))
    if not os.path.exists(trajectoryDirectory):
        os.makedirs(trajectoryDirectory)

    for parameterOneCondiiton in parametersAllCondtion:
        # print(parameterOneCondiiton,evalNum,randomSeed)
        simuliateOneParameter(parameterOneCondiiton, evalNum, randomSeed, dt)

    levelNames = list(manipulatedVariables.keys())
    levelValues = list(manipulatedVariables.values())
    modelIndex = pd.MultiIndex.from_product(levelValues, names=levelNames)
    toSplitFrame = pd.DataFrame(index=modelIndex)
    # save evaluation trajectories

    trajectoryExtension = '.pickle'
    trajectoryFixedParameters = {
        'isMujoco': 1,
        'isCylinder': 1,
        'evalNum': evalNum,
        'randomSeed': randomSeed
    }

    getTrajectorySavePath = GetSavePath(trajectoryDirectory,
                                        trajectoryExtension,
                                        trajectoryFixedParameters)
    getTrajectorySavePathFromDf = lambda df: getTrajectorySavePath(
        readParametersFromDf(df))

    # compute statistics on the trajectories

    fuzzySearchParameterNames = []
    loadTrajectories = LoadTrajectories(getTrajectorySavePath, loadFromPickle,
                                        fuzzySearchParameterNames)
    loadTrajectoriesFromDf = lambda df: loadTrajectories(
        readParametersFromDf(df))

    def compareXPos(traj, baselineTraj):
        SE = [
            np.linalg.norm(s1[0][0][:2] - s2[0][0][:2])**2
            for s1, s2 in zip(baselineTraj, traj)
        ]

        return SE

    originEnvTrajSavePath = os.path.join(
        dirName, '..', 'trajectory', 'variousCollsion',
        'collisionMoveOriginBaseLine_evalNum={}_randomSeed={}.pickle'.format(
            evalNum, randomSeed))
    baselineTrajs = loadFromPickle(originEnvTrajSavePath)

    class CompareTrajectories():
        def __init__(self, baselineTrajs, calculateStatiscs):
            self.baselineTrajs = baselineTrajs
            self.calculateStatiscs = calculateStatiscs

        def __call__(self, trajectorys):

            allMeasurements = np.array([
                self.calculateStatiscs(traj, baselineTraj)
                for traj, baselineTraj in zip(trajectorys, self.baselineTrajs)
            ])
            # print()
            measurementMean = allMeasurements.mean(axis=0)
            measurementStd = allMeasurements.std(axis=0)
            return pd.Series({'mean': measurementMean, 'std': measurementStd})

    measurementFunction = CompareTrajectories(baselineTrajs, compareXPos)

    computeStatistics = ComputeStatistics(loadTrajectoriesFromDf,
                                          measurementFunction)
    statisticsDf = toSplitFrame.groupby(levelNames).apply(computeStatistics)
    print(statisticsDf)

    fig = plt.figure(0)

    rowName, columnName = eavluateParmetersList
    numRows = len(manipulatedVariables[rowName])
    numColumns = len(manipulatedVariables[columnName])
    plotCounter = 1
    selfId = 0
    for rowVar, grp in statisticsDf.groupby(rowName):
        grp.index = grp.index.droplevel(rowName)

        for columVar, group in grp.groupby(columnName):
            group.index = group.index.droplevel(columnName)

            axForDraw = fig.add_subplot(numRows, numColumns, plotCounter)
            if (plotCounter % numColumns == 1) or numColumns == 1:
                axForDraw.set_ylabel(rowName + ': {}'.format(rowVar))
            if plotCounter <= numColumns:
                axForDraw.set_title(columnName + ': {}'.format(columVar))

            axForDraw.set_ylim(0, 0.02)
            drawPerformanceLine(group, axForDraw, lineParameter,
                                prametersToDrop)
            plotCounter += 1

    plt.suptitle('MSEforPos,dt={}'.format(dt))
    plt.legend(loc='best')

    # plt.show()

    def compareV(traj, baselineTraj):
        # [print(s1[0][0]-s2[0][0],np.linalg.norm(s1[0][0][2:]-s2[0][0][2:]),np.linalg.norm(s1[0][0][:2]-s2[0][0][:2])) for s1,s2 in zip (baselineTraj,traj) ]
        SE = [
            np.linalg.norm(s1[0][0][2:] - s2[0][0][2:])**2
            for s1, s2 in zip(baselineTraj, traj)
        ]
        return SE

    measurementFunction2 = CompareTrajectories(baselineTrajs, compareV)

    computeStatistics = ComputeStatistics(loadTrajectoriesFromDf,
                                          measurementFunction2)
    statisticsDf2 = toSplitFrame.groupby(levelNames).apply(computeStatistics)
    print(statisticsDf2)

    fig = plt.figure(1)

    numRows = len(manipulatedVariables[rowName])
    numColumns = len(manipulatedVariables[columnName])
    plotCounter = 1
    for rowVar, grp in statisticsDf2.groupby(rowName):
        grp.index = grp.index.droplevel(rowName)

        for columVar, group in grp.groupby(columnName):
            group.index = group.index.droplevel(columnName)

            axForDraw = fig.add_subplot(numRows, numColumns, plotCounter)
            if (plotCounter % numColumns == 1) or numColumns == 1:
                axForDraw.set_ylabel(rowName + ': {}'.format(rowVar))
            if plotCounter <= numColumns:
                axForDraw.set_title(columnName + ': {}'.format(columVar))

            axForDraw.set_ylim(0, 0.4)
            drawPerformanceLine(group, axForDraw, lineParameter,
                                prametersToDrop)
            plotCounter += 1

    plt.suptitle('MSEforSpeed,dt={}'.format(dt))
    plt.legend(loc='best')
    plt.show()
def simuliateOneParameter(parameterDict, evalNum, randomSeed, dt):
    mujocoVisualize = False
    demoVisualize = False

    wolfSize = 0.075
    sheepSize = 0.05
    blockSize = 0.2

    wolfColor = np.array([0.85, 0.35, 0.35])
    sheepColor = np.array([0.35, 0.85, 0.35])
    blockColor = np.array([0.25, 0.25, 0.25])

    wolvesID = [0]
    sheepsID = [1]
    blocksID = [2]

    numWolves = len(wolvesID)
    numSheeps = len(sheepsID)
    numBlocks = len(blocksID)

    sheepMaxSpeed = 1.3
    wolfMaxSpeed = 1.0
    blockMaxSpeed = None

    agentsMaxSpeedList = [wolfMaxSpeed] * numWolves + [sheepMaxSpeed
                                                       ] * numSheeps

    numAgents = numWolves + numSheeps
    numEntities = numAgents + numBlocks

    entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheeps + [
        blockSize
    ] * numBlocks
    entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [
        sheepMaxSpeed
    ] * numSheeps + [blockMaxSpeed] * numBlocks
    entitiesMovableList = [True] * numAgents + [False] * numBlocks
    massList = [1.0] * numEntities

    isCollision = IsCollision(getPosFromAgentState)
    rewardWolf = RewardWolf(wolvesID, sheepsID, entitiesSizeList, isCollision)
    punishForOutOfBound = PunishForOutOfBound()
    rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList,
                              getPosFromAgentState, isCollision,
                              punishForOutOfBound)

    rewardFunc = lambda state, action, nextState: \
        list(rewardWolf(state, action, nextState)) + list(rewardSheep(state, action, nextState))

    makePropertyList = MakePropertyList(transferNumberListToStr)

    #changeCollisionReleventParameter
    dmin = parameterDict['dmin']
    dmax = parameterDict['dmax']
    width = parameterDict['width']
    midpoint = parameterDict['midpoint']
    power = parameterDict['power']
    timeconst = parameterDict['timeconst']
    dampratio = parameterDict['dampratio']

    geomIds = [1, 2, 3]
    keyNameList = ['@solimp', '@solref']
    valueList = [[[dmin, dmax, width, midpoint, power], [timeconst, dampratio]]
                 ] * len(geomIds)
    collisionParameter = makePropertyList(geomIds, keyNameList, valueList)

    #changeSize
    # geomIds=[1,2]
    # keyNameList=['@size']
    # valueList=[[[0.075,0.075]],[[0.1,0.1]]]
    # sizeParameter=makePropertyList(geomIds,keyNameList,valueList)

    #load env xml and change some geoms' property
    physicsDynamicsPath = os.path.join(
        dirName, '..', '..', 'environment', 'mujocoEnv',
        'variousCollision_numBlocks=1_numSheeps=1_numWolves=1dt={}.xml'.format(
            dt))

    with open(physicsDynamicsPath) as f:
        xml_string = f.read()
    envXmlDict = xmltodict.parse(xml_string.strip())

    envXmlPropertyDictList = [collisionParameter]
    changeEnvXmlPropertFuntionyList = [changeGeomProperty]
    for propertyDict, changeXmlProperty in zip(
            envXmlPropertyDictList, changeEnvXmlPropertFuntionyList):
        envXmlDict = changeXmlProperty(envXmlDict, propertyDict)

    envXml = xmltodict.unparse(envXmlDict)
    # print(envXmlDict['mujoco']['worldbody']['body'][0])
    physicsModel = mujoco.load_model_from_xml(envXml)
    physicsSimulation = mujoco.MjSim(physicsModel)

    # MDP function
    qPosInit = [0, 0] * numAgents
    qVelInit = [0, 0] * numAgents

    qVelInitNoise = 0
    qPosInitNoise = 1

    # reset=ResetUniformWithoutXPos(physicsSimulation, qPosInit, qVelInit, numAgents, numBlocks,qPosInitNoise, qVelInitNoise)
    # fixReset=ResetFixWithoutXPos(physicsSimulation, qPosInit, qVelInit, numAgents,numBlocks)
    # blocksState=[[0,-0.8,0,0]]
    # reset=lambda :fixReset([-0.5,0.8,-0.5,0,0.5,0.8],[0,0,0,0,0,0],blocksState)

    isTerminal = lambda state: False
    numSimulationFrames = int(0.1 / dt)
    print(numSimulationFrames)
    # dt=0.01
    damping = 2.5
    reshapeAction = lambda action: action
    # transit = TransitionFunctionWithoutXPos(physicsSimulation,numAgents , numSimulationFrames,damping*dt/numSimulationFrames,agentsMaxSpeedList,mujocoVisualize,reshapeAction)

    maxRunningSteps = 10

    # sampleTrajectory = SampleTrajectory(maxRunningSteps, transit, isTerminal, rewardFunc, reset)
    # observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID, blocksID, getPosFromAgentState, getVelFromAgentState)
    # observe = lambda state: [observeOneAgent(agentID)(state) for agentID in range(numAgents)]
    # initObsForParams = observe(reset())
    # obsShape = [initObsForParams[obsID].shape for obsID in range(len(initObsForParams))]

    class ImpulsePolicy(object):
        """docstring for c"""
        def __init__(self, initAction):
            self.initAction = initAction

        def __call__(self, state, timeStep):
            action = [[0, 0], [0, 0]]
            if timeStep == 0:
                action = self.initAction
            return action

    worldDim = 2
    trajList = []
    startTime = time.time()
    for i in range(evalNum):

        np.random.seed(randomSeed + i)
        initSpeedDirection = np.random.uniform(-np.pi / 2, np.pi / 2, 1)[0]
        initSpeed = np.random.uniform(0, 1, 1)[0]
        initActionDirection = np.random.uniform(-np.pi / 2, np.pi / 2, 1)[0]
        initForce = np.random.uniform(0, 5, 1)[0]
        # print(initSpeedDirection,initSpeed,initActionDirection,initForce)

        fixReset = ResetFixWithoutXPos(physicsSimulation, qPosInit, qVelInit,
                                       numAgents, numBlocks)
        blocksState = [[0, 0, 0, 0]]  #posX posY velX velY
        reset = lambda: fixReset([-0.28, 0, 8, 8], [
            initSpeed * np.cos(initSpeedDirection), initSpeed * np.sin(
                initSpeedDirection), 0, 0
        ], blocksState)

        transit = TransitionFunctionWithoutXPos(
            physicsSimulation, numAgents, numSimulationFrames,
            damping * dt * numSimulationFrames, agentsMaxSpeedList,
            mujocoVisualize, isTerminal, reshapeAction)

        initAction = [[
            initForce * np.cos(initActionDirection),
            initForce * np.sin(initActionDirection)
        ], [0, 0]]
        impulsePolicy = ImpulsePolicy(initAction)

        sampleTrajectory = SampleTrajectory(maxRunningSteps, transit,
                                            isTerminal, rewardFunc, reset)

        traj = sampleTrajectory(impulsePolicy)
        # print('traj',traj[0])
        trajList.append(traj)

    # saveTraj
    # print(trajList)
    saveTraj = True
    if saveTraj:
        # trajSavePath = os.path.join(dirName,'traj', 'evaluateCollision', 'CollisionMoveTransitDamplingCylinder.pickle')
        trajectorySaveExtension = '.pickle'
        fixedParameters = {
            'isMujoco': 1,
            'isCylinder': 1,
            'randomSeed': randomSeed,
            'evalNum': evalNum
        }
        trajectoriesSaveDirectory = trajSavePath = os.path.join(
            dirName, '..', 'trajectory', 'variousCollsiondt={}'.format(dt))
        generateTrajectorySavePath = GetSavePath(trajectoriesSaveDirectory,
                                                 trajectorySaveExtension,
                                                 fixedParameters)
        trajectorySavePath = generateTrajectorySavePath(parameterDict)
        saveToPickle(trajList, trajectorySavePath)

    # visualize

    if demoVisualize:
        entitiesColorList = [wolfColor] * numWolves + [
            sheepColor
        ] * numSheeps + [blockColor] * numBlocks
        render = Render(entitiesSizeList, entitiesColorList, numAgents,
                        getPosFromAgentState)
        trajToRender = np.concatenate(trajList)
        render(trajToRender)
    endTime = time.time()
    print("Time taken {} seconds to generate {} trajectories".format(
        (endTime - startTime), evalNum))
Пример #18
0
def main():
    stateDim = env.observation_space.shape[0]
    actionDim = env.action_space.shape[0]
    actionHigh = env.action_space.high
    actionLow = env.action_space.low
    actionBound = (actionHigh - actionLow) / 2

    fixedParameters = OrderedDict()
    fixedParameters['batchSize'] = 128
    fixedParameters['learningRateActor'] = 0.001
    fixedParameters['learningRateCritic'] = 0.001
    fixedParameters['maxEpisode'] = 200
    fixedParameters['maxRunSteps'] = 200
    fixedParameters['tau'] = 0.01
    fixedParameters['gamma'] = 0.9
    fixedParameters['actorLayerWidths'] = [30]
    fixedParameters['criticLayerWidths'] = [30]
    fixedParameters['stateDim'] = stateDim
    fixedParameters['actionDim'] = actionDim
    fixedParameters['actionBound'] = actionBound
    fixedParameters['actionHigh'] = actionHigh
    fixedParameters['actionLow'] = actionLow
    fixedParameters['paramUpdateInterval'] = 1
    fixedParameters['varianceDiscount'] = .9995
    fixedParameters['noiseDecayStartStep'] = 10000
    fixedParameters['learningStartStep'] = fixedParameters['batchSize']

    independentVariables = OrderedDict()
    independentVariables['noiseInitVariance'] = [1, 2, 3, 4]
    independentVariables['memorySize'] = [1000, 5000, 10000, 20000]

    modelSaveDirectory = "../trainedDDPGModels"
    modelSaveExtension = '.ckpt'
    getSavePath = GetSavePath(modelSaveDirectory, modelSaveExtension)

    evaluate = EvaluateNoiseAndMemorySize(fixedParameters,
                                          getSavePath,
                                          saveModel=False)

    levelNames = list(independentVariables.keys())
    levelValues = list(independentVariables.values())
    levelIndex = pd.MultiIndex.from_product(levelValues, names=levelNames)
    toSplitFrame = pd.DataFrame(index=levelIndex)
    resultDF = toSplitFrame.groupby(levelNames).apply(evaluate)

    nCols = len(independentVariables['noiseInitVariance'])
    nRows = len(independentVariables['memorySize'])
    numplot = 1
    axs = []
    figure = plt.figure(figsize=(12, 10))
    for keyCol, outterSubDf in resultDF.groupby('memorySize'):
        for keyRow, innerSubDf in outterSubDf.groupby('noiseInitVariance'):
            subplot = figure.add_subplot(nRows, nCols, numplot)
            axs.append(subplot)
            numplot += 1
            plt.ylim([-1600, 50])
            innerSubDf.T.plot(ax=subplot)

    dirName = os.path.dirname(__file__)
    plotPath = os.path.join(dirName, '..', 'plots')
    plt.savefig(os.path.join(plotPath, 'pendulumEvaluation'))
    plt.show()