Пример #1
0
def buildSimulation(alpha, gamma, lambdaa, importance,expName='test0'):
	net=nef.Network('HandWired parameters of RL node to bias')
	net.add_to_nengo()  

	#rl = rl_sarsa.qlambda("RL", noStateVars=2, noActions=4, noValues=20, logPeriod=2000)
	rl = rl_sarsa.qlambdaASM("RL", noStateVars=2, noActions=4, noValues=20, logPeriod=2000)
	world = gridworld.benchmarkA("map_20x20","BenchmarkGridWorldNodeC",10000);
	net.add(rl)									    # place them into the network
	net.add(world)

	# connect them together
	net.connect(world.getOrigin(QLambda.topicDataIn), rl.newTerminationFor(QLambda.topicDataIn))
	net.connect(rl.getOrigin(QLambda.topicDataOut), world.getTermination(QLambda.topicDataOut))

	# define the parameter sources (controllable from the simulation window)
	net.make_input('alpha',[alpha])
	net.make_input('gamma',[gamma])
	net.make_input('lambda',[lambdaa])
	net.make_input('importance',[importance])

	# connect signal sources to the RL node
	net.connect('alpha', rl.getTermination(QLambda.topicAlpha))
	net.connect('gamma', rl.getTermination(QLambda.topicGamma))
	net.connect('lambda', rl.getTermination(QLambda.topicLambda))
	net.connect('importance', rl.getTermination(QLambda.topicImportance))
	
	saver = net.add(ProsperitySaver('data_'+expName+'.txt'))
	net.connect(rl.getOrigin(QLambda.topicProsperity),saver.getTermination("data"));
	return net
def addModel(net, modelName='model0'):
    """
    Add a new independent model into the network, return array of model components.
    These can be then used for configuring the model.
    """
    ############################# define components
    rl                = rl_sarsa.qlambdaASM(modelName, noStateVars=2, noActions=4, noValues=15, logPeriod=2000, synchronous=False, classname="org.hanns.rl.discrete.ros.sarsa.config.QLambdaCoverageRewardFile") 
    world             = gridworld.benchmarkA(modelName+"_map_15x15",mapName="BenchmarkGridWorldNodeD",logPeriod=100000);
    source            = motivation.basic(modelName, 1, Motivation.DEF_DECAY, logPeriod=10000) 

    net.add(rl)									    # place them into the network
    net.add(world)
    net.add(source)

    ################################ connect components together
    # create tranform matrix which connects states dim of GridWorld (states) to the first dim of RL (dataIn=states)
	# note: first dim is reward
    tstates           = [[0 for j in range(3)] for i in range(3)]
    tstates[1][1]     = 1;
    tstates[2][2]     = 1;	# identity transform without first dimension (do not connect reward directly!)
    #print tstates
    net.connect(world.getOrigin(QLambda.topicDataIn), rl.newTerminationFor(QLambda.topicDataIn,tstates)) # world -> rl (states)
    net.connect(rl.getOrigin(QLambda.topicDataOut), world.getTermination(QLambda.topicDataOut))			 # rl -> world (actions)

    # connect RL to the RL provided by/(passed through) the Motivaton source
    treward           = [[0 for j in range(1)] for i in range(3)] # yx (y is second dim)
    treward[0][0]     = 1;
    net.connect(world.getOrigin(QLambda.topicDataIn), source.newTerminationFor(Motivation.topicDataIn,treward)) # world -> motivation (reward)

    # create tranform matrix which connects first dim of Motivation (reward) to the first dim of RL (reward)
    trr                 = [[0 for j in range(3)] for i in range(2)] # yx (y is second dim)
    trr[0][0]           = 1;
    net.connect(source.getOrigin(Motivation.topicDataOut), rl.newTerminationFor(QLambda.topicDataIn,trr))  # motivation (r) -> rl (reward)

    # connect Importance input of the RL to the Motivation provided by the Motivaton source 
    timportance       = [[0 for j in range(1)] for i in range(2)] # yx
    timportance[1][0] = 1;
    #print timportance
    net.connect(source.getOrigin(Motivation.topicDataOut), rl.newTerminationFor(QLambda.topicImportance,timportance)) # motivation -> rl (importance)

    # connect GridWorld to the Importance input of RL module
    tsi       = [[0 for j in range(1)] for i in range(3)] # yx
    #print tsi
    net.connect(world.getOrigin(QLambda.topicDataIn), rl.newTerminationFor(QLambda.topicImportance, tsi)) # world -> rl (importance)

    # connect to the decay port	
    net.make_input(modelName+'_decay',[0.01])    #net.make_input(name+'_decay',[Motivation.DEF_DECAY])
    net.connect(modelName+'_decay', source.getTermination(Motivation.topicDecay))

    ########################## misc goes here
    net.make_input(modelName+'_importance',[0])
    net.connect(modelName+'_importance', rl.getTermination(QLambda.topicImportance))

    saver = net.add(ProsperitySaver(modelName+'_saver.txt')) # TODO no need for name here?
    net.connect(rl.getOrigin(QLambda.topicProsperity),saver.getTermination("RLprosperity"));

    # return all new components 
    return [rl, world, source, saver, modelName, tstates, modelName]
Пример #3
0
def buildSimulation(alpha, gamma, lambdaa, importance, expName='test0'):
    net = nef.Network('HandWired parameters of RL node to bias')
    net.add_to_nengo()

    rl = rl_sarsa.qlambdaASM(
        "RL",
        noStateVars=2,
        noActions=4,
        noValues=20,
        logPeriod=2000,
        classname="org.hanns.rl.discrete.ros.sarsa.QLambda",
        prospLen=1)
    world = gridworld.benchmarkA("map_20x20", "BenchmarkGridWorldNodeC", 10000)
    net.add(rl)  # place them into the network
    net.add(world)

    # connect them together
    net.connect(world.getOrigin(QLambda.topicDataIn),
                rl.newTerminationFor(QLambda.topicDataIn))
    net.connect(rl.getOrigin(QLambda.topicDataOut),
                world.getTermination(QLambda.topicDataOut))

    # define the parameter sources (controllable from the simulation window)
    net.make_input('alpha', [alpha])
    net.make_input('gamma', [gamma])
    net.make_input('lambda', [lambdaa])
    net.make_input('importance', [importance])

    # connect signal sources to the RL node
    net.connect('alpha', rl.getTermination(QLambda.topicAlpha))
    net.connect('gamma', rl.getTermination(QLambda.topicGamma))
    net.connect('lambda', rl.getTermination(QLambda.topicLambda))
    net.connect('importance', rl.getTermination(QLambda.topicImportance))

    saver = net.add(ProsperitySaver('data_' + expName + '.txt'))
    net.connect(rl.getOrigin(QLambda.topicProsperity),
                saver.getTermination("data"))
    return net
Пример #4
0
def buildSimulation(alpha, gamma, lambdaa, importance, expName="test0"):
    net = nef.Network("HandWired parameters of RL node to bias")
    net.add_to_nengo()

    rl = rl_sarsa.qlambdaASM(
        "RL",
        noStateVars=2,
        noActions=4,
        noValues=20,
        logPeriod=2000,
        classname="org.hanns.rl.discrete.ros.sarsa.QLambda",
        prospLen=1,
    )
    world = gridworld.benchmarkA("map_20x20", "BenchmarkGridWorldNodeC", 10000)
    net.add(rl)  # place them into the network
    net.add(world)

    # connect them together
    net.connect(world.getOrigin(QLambda.topicDataIn), rl.newTerminationFor(QLambda.topicDataIn))
    net.connect(rl.getOrigin(QLambda.topicDataOut), world.getTermination(QLambda.topicDataOut))

    # define the parameter sources (controllable from the simulation window)
    net.make_input("alpha", [alpha])
    net.make_input("gamma", [gamma])
    net.make_input("lambda", [lambdaa])
    net.make_input("importance", [importance])

    # connect signal sources to the RL node
    net.connect("alpha", rl.getTermination(QLambda.topicAlpha))
    net.connect("gamma", rl.getTermination(QLambda.topicGamma))
    net.connect("lambda", rl.getTermination(QLambda.topicLambda))
    net.connect("importance", rl.getTermination(QLambda.topicImportance))

    saver = net.add(ProsperitySaver("data_" + expName + ".txt"))
    net.connect(rl.getOrigin(QLambda.topicProsperity), saver.getTermination("data"))
    return net