def buildSimulation(alpha, gamma, lambdaa, importance,expName='test0'): net=nef.Network('HandWired parameters of RL node to bias') net.add_to_nengo() #rl = rl_sarsa.qlambda("RL", noStateVars=2, noActions=4, noValues=20, logPeriod=2000) rl = rl_sarsa.qlambdaASM("RL", noStateVars=2, noActions=4, noValues=20, logPeriod=2000) world = gridworld.benchmarkA("map_20x20","BenchmarkGridWorldNodeC",10000); net.add(rl) # place them into the network net.add(world) # connect them together net.connect(world.getOrigin(QLambda.topicDataIn), rl.newTerminationFor(QLambda.topicDataIn)) net.connect(rl.getOrigin(QLambda.topicDataOut), world.getTermination(QLambda.topicDataOut)) # define the parameter sources (controllable from the simulation window) net.make_input('alpha',[alpha]) net.make_input('gamma',[gamma]) net.make_input('lambda',[lambdaa]) net.make_input('importance',[importance]) # connect signal sources to the RL node net.connect('alpha', rl.getTermination(QLambda.topicAlpha)) net.connect('gamma', rl.getTermination(QLambda.topicGamma)) net.connect('lambda', rl.getTermination(QLambda.topicLambda)) net.connect('importance', rl.getTermination(QLambda.topicImportance)) saver = net.add(ProsperitySaver('data_'+expName+'.txt')) net.connect(rl.getOrigin(QLambda.topicProsperity),saver.getTermination("data")); return net
def addModel(net, modelName='model0'): """ Add a new independent model into the network, return array of model components. These can be then used for configuring the model. """ ############################# define components rl = rl_sarsa.qlambdaASM(modelName, noStateVars=2, noActions=4, noValues=15, logPeriod=2000, synchronous=False, classname="org.hanns.rl.discrete.ros.sarsa.config.QLambdaCoverageRewardFile") world = gridworld.benchmarkA(modelName+"_map_15x15",mapName="BenchmarkGridWorldNodeD",logPeriod=100000); source = motivation.basic(modelName, 1, Motivation.DEF_DECAY, logPeriod=10000) net.add(rl) # place them into the network net.add(world) net.add(source) ################################ connect components together # create tranform matrix which connects states dim of GridWorld (states) to the first dim of RL (dataIn=states) # note: first dim is reward tstates = [[0 for j in range(3)] for i in range(3)] tstates[1][1] = 1; tstates[2][2] = 1; # identity transform without first dimension (do not connect reward directly!) #print tstates net.connect(world.getOrigin(QLambda.topicDataIn), rl.newTerminationFor(QLambda.topicDataIn,tstates)) # world -> rl (states) net.connect(rl.getOrigin(QLambda.topicDataOut), world.getTermination(QLambda.topicDataOut)) # rl -> world (actions) # connect RL to the RL provided by/(passed through) the Motivaton source treward = [[0 for j in range(1)] for i in range(3)] # yx (y is second dim) treward[0][0] = 1; net.connect(world.getOrigin(QLambda.topicDataIn), source.newTerminationFor(Motivation.topicDataIn,treward)) # world -> motivation (reward) # create tranform matrix which connects first dim of Motivation (reward) to the first dim of RL (reward) trr = [[0 for j in range(3)] for i in range(2)] # yx (y is second dim) trr[0][0] = 1; net.connect(source.getOrigin(Motivation.topicDataOut), rl.newTerminationFor(QLambda.topicDataIn,trr)) # motivation (r) -> rl (reward) # connect Importance input of the RL to the Motivation provided by the Motivaton source timportance = [[0 for j in range(1)] for i in range(2)] # yx timportance[1][0] = 1; #print timportance net.connect(source.getOrigin(Motivation.topicDataOut), rl.newTerminationFor(QLambda.topicImportance,timportance)) # motivation -> rl (importance) # connect GridWorld to the Importance input of RL module tsi = [[0 for j in range(1)] for i in range(3)] # yx #print tsi net.connect(world.getOrigin(QLambda.topicDataIn), rl.newTerminationFor(QLambda.topicImportance, tsi)) # world -> rl (importance) # connect to the decay port net.make_input(modelName+'_decay',[0.01]) #net.make_input(name+'_decay',[Motivation.DEF_DECAY]) net.connect(modelName+'_decay', source.getTermination(Motivation.topicDecay)) ########################## misc goes here net.make_input(modelName+'_importance',[0]) net.connect(modelName+'_importance', rl.getTermination(QLambda.topicImportance)) saver = net.add(ProsperitySaver(modelName+'_saver.txt')) # TODO no need for name here? net.connect(rl.getOrigin(QLambda.topicProsperity),saver.getTermination("RLprosperity")); # return all new components return [rl, world, source, saver, modelName, tstates, modelName]
def buildSimulation(alpha, gamma, lambdaa, importance, expName='test0'): net = nef.Network('HandWired parameters of RL node to bias') net.add_to_nengo() rl = rl_sarsa.qlambdaASM( "RL", noStateVars=2, noActions=4, noValues=20, logPeriod=2000, classname="org.hanns.rl.discrete.ros.sarsa.QLambda", prospLen=1) world = gridworld.benchmarkA("map_20x20", "BenchmarkGridWorldNodeC", 10000) net.add(rl) # place them into the network net.add(world) # connect them together net.connect(world.getOrigin(QLambda.topicDataIn), rl.newTerminationFor(QLambda.topicDataIn)) net.connect(rl.getOrigin(QLambda.topicDataOut), world.getTermination(QLambda.topicDataOut)) # define the parameter sources (controllable from the simulation window) net.make_input('alpha', [alpha]) net.make_input('gamma', [gamma]) net.make_input('lambda', [lambdaa]) net.make_input('importance', [importance]) # connect signal sources to the RL node net.connect('alpha', rl.getTermination(QLambda.topicAlpha)) net.connect('gamma', rl.getTermination(QLambda.topicGamma)) net.connect('lambda', rl.getTermination(QLambda.topicLambda)) net.connect('importance', rl.getTermination(QLambda.topicImportance)) saver = net.add(ProsperitySaver('data_' + expName + '.txt')) net.connect(rl.getOrigin(QLambda.topicProsperity), saver.getTermination("data")) return net
def buildSimulation(alpha, gamma, lambdaa, importance, expName="test0"): net = nef.Network("HandWired parameters of RL node to bias") net.add_to_nengo() rl = rl_sarsa.qlambdaASM( "RL", noStateVars=2, noActions=4, noValues=20, logPeriod=2000, classname="org.hanns.rl.discrete.ros.sarsa.QLambda", prospLen=1, ) world = gridworld.benchmarkA("map_20x20", "BenchmarkGridWorldNodeC", 10000) net.add(rl) # place them into the network net.add(world) # connect them together net.connect(world.getOrigin(QLambda.topicDataIn), rl.newTerminationFor(QLambda.topicDataIn)) net.connect(rl.getOrigin(QLambda.topicDataOut), world.getTermination(QLambda.topicDataOut)) # define the parameter sources (controllable from the simulation window) net.make_input("alpha", [alpha]) net.make_input("gamma", [gamma]) net.make_input("lambda", [lambdaa]) net.make_input("importance", [importance]) # connect signal sources to the RL node net.connect("alpha", rl.getTermination(QLambda.topicAlpha)) net.connect("gamma", rl.getTermination(QLambda.topicGamma)) net.connect("lambda", rl.getTermination(QLambda.topicLambda)) net.connect("importance", rl.getTermination(QLambda.topicImportance)) saver = net.add(ProsperitySaver("data_" + expName + ".txt")) net.connect(rl.getOrigin(QLambda.topicProsperity), saver.getTermination("data")) return net