Пример #1
0
def buildExperiment(alpha, gamma, lambdaa, importance):
    net = nef.Network("HandWired parameters of RL node to bias")
    net.add_to_nengo()

    rl = rl_sarsa.qlambda("RL", noStateVars=2, noActions=4, noValues=20, logPeriod=2000)
    world = gridworld.benchmarkA("map_20x20", "BenchmarkGridWorldNodeC", 10000)
    net.add(rl)  # place them into the network
    net.add(world)

    # connect them together
    net.connect(world.getOrigin(QLambda.topicDataIn), rl.newTerminationFor(QLambda.topicDataIn))
    net.connect(rl.getOrigin(QLambda.topicDataOut), world.getTermination(QLambda.topicDataOut))

    # define the parameter sources (controllable from the simulation window)
    net.make_input("alpha", [alpha])
    net.make_input("gamma", [gamma])
    net.make_input("lambda", [lambdaa])
    net.make_input("importance", [importance])

    # connect signal sources to the RL node
    net.connect("alpha", rl.getTermination(QLambda.topicAlpha))
    net.connect("gamma", rl.getTermination(QLambda.topicGamma))
    net.connect("lambda", rl.getTermination(QLambda.topicLambda))
    net.connect("importance", rl.getTermination(QLambda.topicImportance))
    return net
Пример #2
0
def buildSimulation(alpha, gamma, lambdaa, importance,expName='test0'):
	net=nef.Network('HandWired parameters of RL node to bias')
	net.add_to_nengo()  

	#rl = rl_sarsa.qlambda("RL", noStateVars=2, noActions=4, noValues=20, logPeriod=2000)
	rl = rl_sarsa.qlambdaASM("RL", noStateVars=2, noActions=4, noValues=20, logPeriod=2000)
	world = gridworld.benchmarkA("map_20x20","BenchmarkGridWorldNodeC",10000);
	net.add(rl)									    # place them into the network
	net.add(world)

	# connect them together
	net.connect(world.getOrigin(QLambda.topicDataIn), rl.newTerminationFor(QLambda.topicDataIn))
	net.connect(rl.getOrigin(QLambda.topicDataOut), world.getTermination(QLambda.topicDataOut))

	# define the parameter sources (controllable from the simulation window)
	net.make_input('alpha',[alpha])
	net.make_input('gamma',[gamma])
	net.make_input('lambda',[lambdaa])
	net.make_input('importance',[importance])

	# connect signal sources to the RL node
	net.connect('alpha', rl.getTermination(QLambda.topicAlpha))
	net.connect('gamma', rl.getTermination(QLambda.topicGamma))
	net.connect('lambda', rl.getTermination(QLambda.topicLambda))
	net.connect('importance', rl.getTermination(QLambda.topicImportance))
	
	saver = net.add(ProsperitySaver('data_'+expName+'.txt'))
	net.connect(rl.getOrigin(QLambda.topicProsperity),saver.getTermination("data"));
	return net
Пример #3
0
def buildExperiment(alpha, gamma, lambdaa, importance):
    net = nef.Network('HandWired parameters of RL node to bias')
    net.add_to_nengo()

    rl = rl_sarsa.qlambda("RL",
                          noStateVars=2,
                          noActions=4,
                          noValues=20,
                          logPeriod=2000)
    world = gridworld.benchmarkA("map_20x20", "BenchmarkGridWorldNodeC", 10000)
    net.add(rl)  # place them into the network
    net.add(world)

    # connect them together
    net.connect(world.getOrigin(QLambda.topicDataIn),
                rl.newTerminationFor(QLambda.topicDataIn))
    net.connect(rl.getOrigin(QLambda.topicDataOut),
                world.getTermination(QLambda.topicDataOut))

    # define the parameter sources (controllable from the simulation window)
    net.make_input('alpha', [alpha])
    net.make_input('gamma', [gamma])
    net.make_input('lambda', [lambdaa])
    net.make_input('importance', [importance])

    # connect signal sources to the RL node
    net.connect('alpha', rl.getTermination(QLambda.topicAlpha))
    net.connect('gamma', rl.getTermination(QLambda.topicGamma))
    net.connect('lambda', rl.getTermination(QLambda.topicLambda))
    net.connect('importance', rl.getTermination(QLambda.topicImportance))
    return net
def addModel(net, modelName='model0'):
    """
    Add a new independent model into the network, return array of model components.
    These can be then used for configuring the model.
    """
    ############################# define components
    rl                = rl_sarsa.qlambdaASM(modelName, noStateVars=2, noActions=4, noValues=15, logPeriod=2000, synchronous=False, classname="org.hanns.rl.discrete.ros.sarsa.config.QLambdaCoverageRewardFile") 
    world             = gridworld.benchmarkA(modelName+"_map_15x15",mapName="BenchmarkGridWorldNodeD",logPeriod=100000);
    source            = motivation.basic(modelName, 1, Motivation.DEF_DECAY, logPeriod=10000) 

    net.add(rl)									    # place them into the network
    net.add(world)
    net.add(source)

    ################################ connect components together
    # create tranform matrix which connects states dim of GridWorld (states) to the first dim of RL (dataIn=states)
	# note: first dim is reward
    tstates           = [[0 for j in range(3)] for i in range(3)]
    tstates[1][1]     = 1;
    tstates[2][2]     = 1;	# identity transform without first dimension (do not connect reward directly!)
    #print tstates
    net.connect(world.getOrigin(QLambda.topicDataIn), rl.newTerminationFor(QLambda.topicDataIn,tstates)) # world -> rl (states)
    net.connect(rl.getOrigin(QLambda.topicDataOut), world.getTermination(QLambda.topicDataOut))			 # rl -> world (actions)

    # connect RL to the RL provided by/(passed through) the Motivaton source
    treward           = [[0 for j in range(1)] for i in range(3)] # yx (y is second dim)
    treward[0][0]     = 1;
    net.connect(world.getOrigin(QLambda.topicDataIn), source.newTerminationFor(Motivation.topicDataIn,treward)) # world -> motivation (reward)

    # create tranform matrix which connects first dim of Motivation (reward) to the first dim of RL (reward)
    trr                 = [[0 for j in range(3)] for i in range(2)] # yx (y is second dim)
    trr[0][0]           = 1;
    net.connect(source.getOrigin(Motivation.topicDataOut), rl.newTerminationFor(QLambda.topicDataIn,trr))  # motivation (r) -> rl (reward)

    # connect Importance input of the RL to the Motivation provided by the Motivaton source 
    timportance       = [[0 for j in range(1)] for i in range(2)] # yx
    timportance[1][0] = 1;
    #print timportance
    net.connect(source.getOrigin(Motivation.topicDataOut), rl.newTerminationFor(QLambda.topicImportance,timportance)) # motivation -> rl (importance)

    # connect GridWorld to the Importance input of RL module
    tsi       = [[0 for j in range(1)] for i in range(3)] # yx
    #print tsi
    net.connect(world.getOrigin(QLambda.topicDataIn), rl.newTerminationFor(QLambda.topicImportance, tsi)) # world -> rl (importance)

    # connect to the decay port	
    net.make_input(modelName+'_decay',[0.01])    #net.make_input(name+'_decay',[Motivation.DEF_DECAY])
    net.connect(modelName+'_decay', source.getTermination(Motivation.topicDecay))

    ########################## misc goes here
    net.make_input(modelName+'_importance',[0])
    net.connect(modelName+'_importance', rl.getTermination(QLambda.topicImportance))

    saver = net.add(ProsperitySaver(modelName+'_saver.txt')) # TODO no need for name here?
    net.connect(rl.getOrigin(QLambda.topicProsperity),saver.getTermination("RLprosperity"));

    # return all new components 
    return [rl, world, source, saver, modelName, tstates, modelName]
Пример #5
0
def buildSimulation(alpha, gamma, lambdaa, importance, expName='test0'):
    net = nef.Network('HandWired parameters of RL node to bias')
    net.add_to_nengo()

    rl = rl_sarsa.qlambdaASM(
        "RL",
        noStateVars=2,
        noActions=4,
        noValues=20,
        logPeriod=2000,
        classname="org.hanns.rl.discrete.ros.sarsa.QLambda",
        prospLen=1)
    world = gridworld.benchmarkA("map_20x20", "BenchmarkGridWorldNodeC", 10000)
    net.add(rl)  # place them into the network
    net.add(world)

    # connect them together
    net.connect(world.getOrigin(QLambda.topicDataIn),
                rl.newTerminationFor(QLambda.topicDataIn))
    net.connect(rl.getOrigin(QLambda.topicDataOut),
                world.getTermination(QLambda.topicDataOut))

    # define the parameter sources (controllable from the simulation window)
    net.make_input('alpha', [alpha])
    net.make_input('gamma', [gamma])
    net.make_input('lambda', [lambdaa])
    net.make_input('importance', [importance])

    # connect signal sources to the RL node
    net.connect('alpha', rl.getTermination(QLambda.topicAlpha))
    net.connect('gamma', rl.getTermination(QLambda.topicGamma))
    net.connect('lambda', rl.getTermination(QLambda.topicLambda))
    net.connect('importance', rl.getTermination(QLambda.topicImportance))

    saver = net.add(ProsperitySaver('data_' + expName + '.txt'))
    net.connect(rl.getOrigin(QLambda.topicProsperity),
                saver.getTermination("data"))
    return net
Пример #6
0
def buildSimulation(alpha, gamma, lambdaa, importance, expName="test0"):
    net = nef.Network("HandWired parameters of RL node to bias")
    net.add_to_nengo()

    rl = rl_sarsa.qlambdaASM(
        "RL",
        noStateVars=2,
        noActions=4,
        noValues=20,
        logPeriod=2000,
        classname="org.hanns.rl.discrete.ros.sarsa.QLambda",
        prospLen=1,
    )
    world = gridworld.benchmarkA("map_20x20", "BenchmarkGridWorldNodeC", 10000)
    net.add(rl)  # place them into the network
    net.add(world)

    # connect them together
    net.connect(world.getOrigin(QLambda.topicDataIn), rl.newTerminationFor(QLambda.topicDataIn))
    net.connect(rl.getOrigin(QLambda.topicDataOut), world.getTermination(QLambda.topicDataOut))

    # define the parameter sources (controllable from the simulation window)
    net.make_input("alpha", [alpha])
    net.make_input("gamma", [gamma])
    net.make_input("lambda", [lambdaa])
    net.make_input("importance", [importance])

    # connect signal sources to the RL node
    net.connect("alpha", rl.getTermination(QLambda.topicAlpha))
    net.connect("gamma", rl.getTermination(QLambda.topicGamma))
    net.connect("lambda", rl.getTermination(QLambda.topicLambda))
    net.connect("importance", rl.getTermination(QLambda.topicImportance))

    saver = net.add(ProsperitySaver("data_" + expName + ".txt"))
    net.connect(rl.getOrigin(QLambda.topicProsperity), saver.getTermination("data"))
    return net
Пример #7
0
# Create the NeuralModule which implements discrete RL algorithm - Q(lambda) - Q-learning with eligibility traces
#
# Start the benchmark B
#
# by Jaroslav Vitku [[email protected]]

import nef
from ca.nengo.math.impl import FourierFunction
from ca.nengo.model.impl import FunctionInput
from ca.nengo.model import Units
from ctu.nengoros.modules.impl import DefaultNeuralModule as NeuralModule
from ctu.nengoros.comm.nodeFactory import NodeGroup as NodeGroup
from ctu.nengoros.comm.rosutils import RosUtils as RosUtils
from org.hanns.rl.discrete.ros.sarsa import QLambda
import rl_sarsa
import gridworld

net=nef.Network('Demo of SARSA RL module interacting with the simulator of discrete 2D world with obstacles and rewards')
net.add_to_nengo()  

rl = rl_sarsa.qlambdaASMConfigured("RL",net, noStateVars=2, noActions=4, noValues=20)   # 2 state variables, 4 actions, xsize=20

world = gridworld.benchmarkA("map_20x20","BenchmarkGridWorldNodeC");
net.add(world)

# data
net.connect(world.getOrigin(QLambda.topicDataIn), rl.newTerminationFor(QLambda.topicDataIn))
net.connect(rl.getOrigin(QLambda.topicDataOut), world.getTermination(QLambda.topicDataOut))

print 'Configuration complete.'
Пример #8
0
#
# Start the benchmark B
#
# by Jaroslav Vitku [[email protected]]

import nef
from ca.nengo.math.impl import FourierFunction
from ca.nengo.model.impl import FunctionInput
from ca.nengo.model import Units
from ctu.nengoros.modules.impl import DefaultNeuralModule as NeuralModule
from ctu.nengoros.comm.nodeFactory import NodeGroup as NodeGroup
from ctu.nengoros.comm.rosutils import RosUtils as RosUtils
from org.hanns.rl.discrete.ros.sarsa import QLambda
import rl_sarsa
import gridworld

net=nef.Network('Demo of SARSA RL module interacting with the simulator of discrete 2D world with obstacles and rewards')
net.add_to_nengo()  

rl = rl_sarsa.qlambdaASMConfigured("RL",net, noStateVars=2, noActions=4, noValues=30)   # 2 state variables, 4 actions, xsize=30

# TODO this seems not to work now..
world = gridworld.benchmarkA("map_30x30","BenchmarkGridWorldNodeB");
net.add(world)

# data
net.connect(world.getOrigin(QLambda.topicDataIn), rl.newTerminationFor(QLambda.topicDataIn))
net.connect(rl.getOrigin(QLambda.topicDataOut), world.getTermination(QLambda.topicDataOut))

print 'Configuration complete.'
Пример #9
0
import nef
from ca.nengo.math.impl import FourierFunction
from ca.nengo.model.impl import FunctionInput
from ca.nengo.model import Units
from ctu.nengoros.modules.impl import DefaultNeuralModule as NeuralModule
from ctu.nengoros.comm.nodeFactory import NodeGroup as NodeGroup
from ctu.nengoros.comm.rosutils import RosUtils as RosUtils
from org.hanns.rl.discrete.ros.sarsa import QLambda
import rl_sarsa
import gridworld

net = nef.Network(
    'Demo of SARSA RL module interacting with the simulator of discrete 2D world with obstacles and rewards'
)
net.add_to_nengo()

rl = rl_sarsa.qlambdaASMConfigured(
    "RL", net, noStateVars=2, noActions=4,
    noValues=20)  # 2 state variables, 4 actions, xsize=20

world = gridworld.benchmarkA("map_20x20", "BenchmarkGridWorldNodeC")
net.add(world)

# data
net.connect(world.getOrigin(QLambda.topicDataIn),
            rl.newTerminationFor(QLambda.topicDataIn))
net.connect(rl.getOrigin(QLambda.topicDataOut),
            world.getTermination(QLambda.topicDataOut))

print 'Configuration complete.'