示例#1
0
def objective(args):
    NUM_TESTS_FOR_NOISE = 1
    env = gym.make('LunarLander-v2')
    learningRate = args
    # numIntermediateLayers = int(numIntermediateLayers)
    # intermediateLayerSize = int(intermediateLayerSize)
    # finalLayerSize = int(finalLayerSize)
    # layers = []
    # for i in range(numIntermediateLayers):
    #     layers.append(intermediateLayerSize)
    # layers.append(finalLayerSize)
    # print("Layers: ",layers)
    # print("Priority: ",priorityExponent)
    # print("LR: ",learningRate)
    totalResult = 0
    for i in range(NUM_TESTS_FOR_NOISE):
        sess = tf.Session()
        a = Agent(
            sess=sess,
            env=env,
            numAvailableActions=4,
            numObservations=8,
            rewardsMovingAverageSampleLength=20,
            gamma=1,
            nStepUpdate=1,
            includeIntermediatePairs=False,
            maxRunningMinutes=30,

            # test parameters
            episodesPerTest=1,
            numTestPeriods=40000,
            numTestsPerTestPeriod=30,
            episodeStepLimit=1024,
            intermediateTests=False,

            render=False,
            showGraph=False,

            # hyperparameters
            valueMin=-400.0,
            valueMax=300.0,
            numAtoms=14,
            maxMemoryLength=100000,
            batchSize=256,
            networkSize=[128, 128, 256],
            learningRate=learningRate,
            priorityExponent=0,
            epsilonInitial = 2,
            epsilonDecay = .9987,
            minFramesForTraining = 2048,
            noisyLayers = False,
            maxGradientNorm = 4,
            minExploration = .15,
        )
        testResults = np.array(a.execute())
        performance = np.mean(testResults[np.argpartition(-testResults,range(4))[:4]])
        totalResult = totalResult + performance
    print(str(learningRate)+","+str(performance))
    return -totalResult
示例#2
0
    # agentName="agent_842763505",

    # hyperparameters
    rewardScaling=rewardScaling,
    nStepReturns=1,
    maxMemoryLength=int(1e6),
    batchSize=64,
    learningRate=6.25e-4,
    priorityExponent=0,
    epsilonInitial=1,
    epsilonDecay=.999975,
    minExploration=.01,
    maxExploration=1.0,
    minFramesForTraining=2048,
    maxGradientNorm=5,
    preNetworkSize=[128, 128],
    postNetworkSize=[256],
    numQuantiles=8,
    embeddingDimension=16,
    kappa=1.0,
    trainingIterations=3,
    tau=0.001)
performance = a.execute()[0]
cur = db.cursor()
cur.execute(
    "insert into experiments (label, x1, x2, x3, x4, y) values ('{0}', '{1}', '{2}', '{3}', '{4}', '{5}')"
    .format(experimentName, rewardScaling, 0, 0, 0, performance))
db.commit()
cur.close()
db.close()
示例#3
0
    intermediateTests=False,
    render=False,
    showGraph=True,
    saveModel=True,
    loadModel=False,
    disableRandomActions=False,
    disableTraining=False,
    # agentName="agent_223120799",

    # hyperparameters
    rewardScaling=pow(10, -.75),
    nStepReturns=1,
    maxMemoryLength=int(1e6),
    batchSize=64,
    learningRate=6.25e-4,
    priorityExponent=0,
    epsilonInitial=1,
    epsilonDecay=.999,
    minExploration=.01,
    maxExploration=1.0,
    minFramesForTraining=2048,
    maxGradientNorm=5,
    preNetworkSize=[256, 256],
    postNetworkSize=[512],
    numQuantiles=32,
    embeddingDimension=64,
    kappa=1.0,
    trainingIterations=3,
    tau=0.001)
testResults = a.execute()