priorityExponent=0.0, batchSize=64, maxEpisodes=4096, trainSteps=1024, minStepsBeforeTraining=4096, rewardScaling=(10.0 ** -0.75), actionShift=0.0, stepsPerUpdate=1, render=True, showGraphs=False, saveModel=False, saveModelToS3=False, restoreModel=True, train=False, testSteps=1024, maxMinutes=360, targetEntropy=-4.0, maxGradientNorm=5.0, meanRegularizationConstant=0.0, varianceRegularizationConstant=0.0, randomStartSteps=0, gradientSteps=1, initialExtraNoise=0, extraNoiseDecay=0, evaluationEvery=25, numFinalEvaluations=10, maxTrainSteps = 1000000 ) print("Total Reward: "+str(agent.execute()))
batchSize=64, nStep=3, frameSkip=2, maxEpisodes=4096, trainSteps=1024, maxTrainSteps=6000000, minStepsBeforeTraining=10000, rewardScaling=(10.0**-0.75), actionShift=0.0, stepsPerUpdate=1, render=False, showGraphs=True, saveModel=True, saveModelToS3=False, restoreModel=False, train=True, testSteps=1024, maxMinutes=360, targetEntropy=-4.0, maxGradientNorm=5.0, meanRegularizationConstant=0.0, varianceRegularizationConstant=0.0, randomStartSteps=10000, gradientSteps=1, initialExtraNoise=0, extraNoiseDecay=0, evaluationEvery=25, numFinalEvaluations=10) results = agent.execute()
qNetworkSize=[256, 256], valueNetworkSize=[256, 256], entropyCoefficient=entropyCoefficient, valueNetworkLearningRate=learningRate, policyNetworkLearningRate=learningRate, qNetworkLearningRate=learningRate, tau=0.005, gamma=0.99, maxMemoryLength=int(1e6), priorityExponent=0, batchSize=256, maxGradientNorm=5, maxEpisodes=1024, trainSteps=1024, minStepsBeforeTraining=4096, rewardScaling=rewardScaling, actionScaling=actionScaling, actionShift=0.0, stepsPerUpdate=1, render=True, showGraphs=True, meanRegularizationConstant=weightRegularizationConstant, varianceRegularizationConstant=weightRegularizationConstant, testSteps=1024, maxMinutes=600) result = agent.execute() print("Result: " + result) except: print("Error evaluating parameters") result = -20000