示例#1
0
    agent.learner.gd.alpha = 0.3 #step size of \mu adaption
    agent.learner.gdSig.alpha = 0.15 #step size of \sigma adaption
    agent.learner.gd.momentum = 0.0
    batch=2 #number of samples per gradient estimate (was: 2; more here due to stochastic setting)
    #create experiment
    experiment = EpisodicExperiment(task, agent)
    prnts=1 #frequency of console output
    epis=2000/batch/prnts
    
    #actual roll outs
    filename="dataSPLA08NoRew"+repr(int(random.random()*1000000.0))+".dat"
    wf = open(filename, 'wb')
    for updates in range(epis):
        for i in range(prnts):
            experiment.doEpisodes(batch) #execute #batch episodes
            agent.learn() #learn from the gather experience
            agent.reset() #reset agent and environment
        #print out related data
        stp = (updates+1)*batch*prnts
        print "Step: ", runs, "/", stp, "Best: ", agent.learner.best, "Base: ", agent.learner.baseline, "Reward: ", agent.learner.reward   
        wf.write(repr(stp)+"\n") 
        wf.write(repr(agent.learner.baseline[0])+"\n") 
        if useGraphics:
            pl.addData(0,float(stp),agent.learner.baseline)
            pl.addData(1,float(stp),agent.learner.best)
            pl.update()

        #if updates/100 == float(updates)/100.0:
        #    saveWeights("walk.wgt", agent.learner.original)  
    wf.close()      
示例#2
0
    agent.learner.gdSig.alpha = 0.1
    agent.learner.gd.momentum = 0.9
    agent.learner.epsilon = 6.0
    agent.learner.initSigmas()
    # agent.learner.rprop = True
    experiment = EpisodicExperiment(task, agent)
    batch = 16
    prnts = 10
    epis = 50000 / batch / prnts
    save = False

    rl = []
    for updates in range(epis):
        for i in range(prnts):
            experiment.doEpisodes(batch)
            agent.learn()
            agent.reset()
        print "Parameters: ", agent.learner.original
        print "Epsilon   : ", agent.learner.sigList
        print "Step: ", runs, "/", (
            updates + 1
        ) * batch * prnts, "Best: ", agent.learner.best, "Base: ", agent.learner.baseline, "Reward: ", agent.learner.reward
        print ""
        rl.append(float(agent.learner.baseline))
    if save:
        fnStart = "dataCartMom"
        fnExp = repr(int(agent.learner.gd.alpha * 100)) + "m" + repr(
            int(agent.learner.gdSig.alpha * 100)) + "s" + repr(
                batch / 2) + "b" + repr(int(agent.learner.epsilon * 10)) + "e"
        fnIdent = "SPLA" + repr(int(random.random() * 1000000.0))
        filename = fnStart + fnExp + fnIdent + ".dat"
示例#3
0
    agent.learner.gdSig.alpha = 0.085  #step size of \sigma adaption
    agent.learner.gd.momentum = 0.0

    #Loading weights
    if loadNet:
        agent.learner.original = loadWeights("grasp.wgt")
        agent.learner.gd.init(agent.learner.original)
        agent.learner.epsilon = 0.2
        agent.learner.initSigmas()

    batch = 2  #number of samples per gradient estimate
    #create experiment
    experiment = EpisodicExperiment(task, agent)
    prnts = 1  #frequency of console output
    epis = 5000000 / batch / prnts

    #actual roll outs
    for updates in range(epis):
        for i in range(prnts):
            experiment.doEpisodes(batch)  #execute batch episodes
            agent.learn()  #learn from the gather experience
            agent.reset()  #reset agent and environment
        #print out related data
        print "Step: ", runs, "/", (
            updates + 1) * batch * prnts, "Best: ", agent.learner.best,
        print "Base: ", agent.learner.baseline, "Reward: ", agent.learner.reward
        #Saving weights
        if saveNet:
            if updates / 100 == float(updates) / 100.0:
                saveWeights(saveName, agent.learner.original)
示例#4
0
文件: cartSPLA.py 项目: HKou/pybrain
    agent.learner.gdSig.alpha = 0.1
    agent.learner.gd.momentum = 0.9
    agent.learner.epsilon = 6.0
    agent.learner.initSigmas()
    # agent.learner.rprop = True
    experiment = EpisodicExperiment(task, agent)
    batch = 16
    prnts = 10
    epis = 50000 / batch / prnts
    save = False

    rl = []
    for updates in range(epis):
        for i in range(prnts):
            experiment.doEpisodes(batch)
            agent.learn()
            agent.reset()
        print "Parameters: ", agent.learner.original
        print "Epsilon   : ", agent.learner.sigList
        print "Step: ", runs, "/", (
            updates + 1
        ) * batch * prnts, "Best: ", agent.learner.best, "Base: ", agent.learner.baseline, "Reward: ", agent.learner.reward
        print ""
        rl.append(float(agent.learner.baseline))
    if save:
        fnStart = "dataCartMom"
        fnExp = (
            repr(int(agent.learner.gd.alpha * 100))
            + "m"
            + repr(int(agent.learner.gdSig.alpha * 100))
            + "s"