示例#1
0
def offlineDemo():
    this_score = evaluateAgent()
    printScore(0, this_score)
    theFile = open("results.csv", "w")
    theFile.close()
    if os.path.isfile("Archive.csv"):
        os.remove('Archive.csv')
    for i in range(0, 200):
        for j in range(0, 50):
            RLGlue.RL_episode(0)
            RLGlue.RL_env_message("stop print")
            if j % 20 == 0 and i > 0:
                RLGlue.RL_env_message("print")
            printScore((i + 1) * 50, this_score)
        this_score = evaluateAgent()
        printScore((i + 1) * 50, this_score)
        theFile = open("results.csv", "a")
        theFile.write("%d\t%.2f\t%.2f\n" %
                      ((i) * 50, this_score[0], this_score[1]))
        theFile.close()
    os.rename('results.csv', 'Archive.csv')
示例#2
0
def demo():
    statistics = []
    episodeLength = 100
    #this_score = evaluateAgent()
    #printScore(0, this_score)
    #statistics.append(this_score)

    for i in range(1, 1000):

        RLGlue.RL_env_message("set-start-state " + S)
        RLGlue.RL_start()
        RLGlue.RL_episode(episodeLength)
        this_return = RLGlue.RL_return()
        print "%d\t\t%.2f" % (i, this_return)
        statistics.append(this_return)
        saveResultToCSV(statistics, "MyResults_sarsa1000_ver2.csv")
示例#3
0
def preload(envNameString):
    theRequest = "TO=%d FROM=%d CMD=%d VALTYPE=%d VALS=NULL" % (
        ENVSHELL, BENCHMARK, LISTQUERY, NOVALUE)
    theResponse = RLGlue.RL_env_message(theRequest)
    lastColonPos = theResponse.rfind("=")
    thePayLoad = theResponse[lastColonPos + 1:]
    if thePayLoad[-1] == ':':
        thePayLoad = thePayLoad[:-1]
    items = thePayLoad.split(':')
    theNames = []
    theParamHolders = []
    for i in range(1, len(items), 2):
        theNames.append(items[i])
        theParamHolders.append(ParameterHolder(items[i + 1]))

    for i in range(len(theNames)):
        if theNames[i] == envNameString:
            indexOfMyEnv = i

    return theParamHolders[indexOfMyEnv]
示例#4
0
# limitations under the License.
#
#  $Revision$
#  $Date$
#  $Author$
#  $HeadURL$

import sys

import rlglue.RLGlue as RLGlue
from glue_test import glue_test
tester = glue_test("test_message")

task_spec = RLGlue.RL_init()

tester.check_fail("empty" != RLGlue.RL_env_message(None))

tester.check_fail("empty" != RLGlue.RL_env_message(""))

tester.check_fail("empty" != RLGlue.RL_agent_message(None))

tester.check_fail("empty" != RLGlue.RL_agent_message(""))

tester.check_fail("" != RLGlue.RL_env_message("empty"))

tester.check_fail("" != RLGlue.RL_agent_message("empty"))

theResponse = RLGlue.RL_env_message("null")
tester.check_fail(not (theResponse != None or "" != theResponse))

theResponse = RLGlue.RL_agent_message("null")
示例#5
0
#  $Date: 2009-02-05 11:24:12 +0200 (Thu, 05 Feb 2009) $
#  $Author: gabalz $
#  $HeadURL: http://rl-glue-ext.googlecode.com/svn/trunk/projects/codecs/Python/src/tests/test_1_experiment.py $

import sys

import rlglue.RLGlue as RLGlue
from glue_test import glue_test
tester = glue_test("test_1")

task_spec = RLGlue.RL_init()

RLGlue.RL_start()

roat = RLGlue.RL_step()
tester.check_fail("one|1.|one" != RLGlue.RL_env_message("one"))
tester.check_fail("one|1.|one" != RLGlue.RL_agent_message("one"))
tester.check_fail(roat.terminal != 0)
tester.check_fail(roat.r != 1.0)
tester.check_fail(len(roat.o.intArray) != 1)
tester.check_fail(len(roat.o.doubleArray) != 0)
tester.check_fail(len(roat.o.charArray) != 0)
tester.check_fail(roat.o.intArray[0] != 0)

roat = RLGlue.RL_step()
tester.check_fail("two|2.2.|two" != RLGlue.RL_env_message("two"))
tester.check_fail("two|2.2.|two" != RLGlue.RL_agent_message("two"))
tester.check_fail(roat.terminal != 0)
tester.check_fail(roat.r != 1.0)
tester.check_fail(len(roat.o.intArray) != 1)
tester.check_fail(len(roat.o.doubleArray) != 0)
示例#6
0
		print "\t", msg 
		RLGlue.RL_agent_message(msg)
	
	# Run experiments
	for i in xrange(settings.instances):
		print "Running experiment #%d with %d episodes..." % (i + 1, settings.episodes),
		sys.stdout.flush()
		experiment.run()
		
		# Experiment completed, show summary
		print "Done!"
		print str(experiment)
		
	
	# Store data to file
	env_name = RLGlue.RL_env_message('name')
	data_file = env_name + '_' + time.strftime('%Y-%m-%d_%H:%M:%S.dat')
	data_path = os.path.join(settings.results_dir, data_file)
	
	print
	print "Storing results into %s..." % (data_path),
	
	""" Save result data to file """
	f = open(data_path, 'w')
	
	f.write("# Settings:\n")
	for k in dir(settings):
		if k.startswith('__'):
			continue
		f.write("#   %s = %s\n" % (k, getattr(settings, k)))
	
示例#7
0
# Run experiment
if __name__ == "__main__":

    import settings

    # Create a new experiment
    experiment = Experiment(**settings.experiment)

    # Set up environment
    print
    print "Environment settings:"
    for k, v in settings.environment.items():
        msg = 'set %s %s' % (k, v)
        print "  ", msg
        RLGlue.RL_env_message(msg)

    # Set up agent
    print "Agent settings:"
    for k, v in settings.agent.items():
        msg = 'set %s %s' % (k, v)
        print "  ", msg
        RLGlue.RL_agent_message(msg)

    print

    # Run experiments
    for i in xrange(settings.experiment['instances']):
        experiment.run()

        #print str(experiment)
示例#8
0
taskSpec = RLGlue.RL_init()
print("RL_init called, the environment sent task spec: " + taskSpec)

print("\n\n----------Sending some sample messages----------")

#Talk to the agent and environment a bit...*/
responseMessage = RLGlue.RL_agent_message("what is your name?")
print("Agent responded to \"what is your name?\" with: " + responseMessage)

responseMessage = RLGlue.RL_agent_message(
    "If at first you don't succeed; call it version 1.0")
print(
    "Agent responded to \"If at first you don't succeed; call it version 1.0  \" with: "
    + responseMessage + "\n")

responseMessage = RLGlue.RL_env_message("what is your name?")
print("Environment responded to \"what is your name?\" with: " +
      responseMessage)
responseMessage = RLGlue.RL_env_message(
    "If at first you don't succeed; call it version 1.0")
print(
    "Environment responded to \"If at first you don't succeed; call it version 1.0  \" with: "
    + responseMessage)

print("\n\n----------Running a few episodes----------")
runEpisode(100)
runEpisode(100)
runEpisode(100)
runEpisode(100)
runEpisode(100)
runEpisode(1)
示例#9
0
#To compute the average number of steps as well as the average reward

import rlglue.RLGlue as RLGlue
import sys
import matplotlib.pyplot as plt

#def q_experiment():
#	for i in

RLGlue.RL_init()
'''for i in range(100):
	RLGlue.RL_episode(0)
	print RLGlue.RL_return()
'''
#q_experiment()
'''

avg_steps_a = []
avg_reward_a = []
RLGlue.RL_env_message("set-start-state 0");
for i in range (100):
	num_of_steps = 0
	reward = 0 
	for j  in range(50):
		RLGlue.RL_episode(0)
		num_of_steps = num_of_steps + RLGlue.RL_num_steps()
		reward = reward + RLGlue.RL_return()
	avg_reward_a.append(reward/50)
	avg_steps_a.append(num_of_steps/50)
'''
'''
示例#10
0
def load(envNameString, theParams):
    loadPayLoad = envNameString + ":" + theParams.stringSerialize()
    theRequest = "TO=%d FROM=%d CMD=%d VALTYPE=%d VALS=%s" % (
        ENVSHELL, BENCHMARK, LOADQUERY, STRINGLIST, loadPayLoad)
    RLGlue.RL_env_message(theRequest)
示例#11
0
RLGlue.RL_agent_message("save_policy results.dat")

print "\nCalling RL_cleanup and RL_init to clear the agent's memory..."

RLGlue.RL_cleanup()
RLGlue.RL_init()

print "Evaluating the agent's default policy:\n\t\tMean Return\tStandardDeviation\n------------------------------------------------------"
single_evaluation()

print "\nLoading up the value function we saved earlier."
RLGlue.RL_agent_message("load_policy results.dat")

print "Evaluating the agent after loading the value function:\n\t\tMean Return\tStandardDeviation\n------------------------------------------------------"
single_evaluation()

print "Telling the environment to use fixed start state of 2,3."
RLGlue.RL_env_message("set-start-state 2 3")
RLGlue.RL_start()
print "Telling the environment to print the current state to the screen."
RLGlue.RL_env_message("print-state")
print "Evaluating the agent a few times from a fixed start state of 2,3:\n\t\tMean Return\tStandardDeviation\n-------------------------------------------"
single_evaluation()

print "Evaluating the agent again with the random start state:\n\t\tMean Return\tStandardDeviation\n-----------------------------------------------------"
RLGlue.RL_env_message("set-random-start-state")
single_evaluation()

RLGlue.RL_cleanup()
print "\nProgram Complete."
示例#12
0
def runEpisode(stepLimit):
    # stepLimit of 0 implies no limit
    global whichEpisode
    terminal = RLGlue.RL_episode(stepLimit)
    totalSteps = RLGlue.RL_num_steps()
    totalReward = RLGlue.RL_return()

    print "Episode " + str(whichEpisode) + "\t " + str(
        totalSteps) + " steps \t" + str(totalReward) + " total reward\t "

    whichEpisode = whichEpisode + 1


RLGlue.RL_init()
#RLGlue.RL_env_message("dumptmatrix tmatrixperfect.dat")
RLGlue.RL_env_message("printabstractstates")
for i in xrange(NO_EPISODES):
    runEpisode(0)
'''
returnVsEpisode = np.zeros(NO_EPISODES)
timeVsEpisode = np.zeros(NO_EPISODES)

def calculateCoords(state):
    return [state%12,state/12]

policy = [12*[4*[0]] for i in xrange(12)]

for run in xrange(NO_RUNS):
    print "Run: "+str(run+1)
    RLGlue.RL_init()
示例#13
0
import sys
import matplotlib.pyplot as plt

#def q_experiment():
#	for i in

RLGlue.RL_init()
'''for i in range(100):
	RLGlue.RL_episode(0)
	print RLGlue.RL_return()
'''
#q_experiment()

avg_steps_a = []
avg_reward_a = []
RLGlue.RL_env_message("set-start-state 0")
for i in range(100):
    num_of_steps = 0
    reward = 0
    for j in range(50):
        RLGlue.RL_episode(0)
        num_of_steps = num_of_steps + RLGlue.RL_num_steps()
        reward = reward + RLGlue.RL_return()
    avg_reward_a.append(reward / 50)
    avg_steps_a.append(num_of_steps / 50)
'''

avg_steps_b = []
avg_reward_b = []
RLGlue.RL_env_message("set-start-state 1");
for i in range (100):
示例#14
0
#
# Just do a single evaluateAgent and print it
#
#def single_evaluation():
#    this_score = evaluateAgent()
#    printScore(0, this_score)

RLGlue.RL_init()
print "Telling the environment to use fixed start state."
nbrReaches = 7
habitatSize = 4
S = array([random.randint(1, 3) for i in xrange(nbrReaches * habitatSize)])
#S=array([1,1,2, 1, 3, 3, 1])
#S=[1,2,3,3,2,1,3,2,2,3,2,1,2,2,3,2,2,1,3,1,1,2,2,3,3,2,1,1]
S = ",".join(map(str, S))  # just a way to display a list in python
print S
RLGlue.RL_env_message("set-start-state " + S)
RLGlue.RL_start()

print "Starting offline demo\n----------------------------\nWill alternate learning for 10 episodes, then freeze policy and evaluate for 10 episodes.\n"
print "After Episode\tMean Return\tStandard Deviation\n-------------------------------------------------------------------------"
demo()

print "Evaluating the agent again with the random start state:\n\t\tMean Return\tStandardDeviation\n-----------------------------------------------------"
RLGlue.RL_env_message("set-random-start-state")
#single_evaluation()

RLGlue.RL_cleanup()
print "\nProgram Complete."
示例#15
0
"""
Manual experiment for testing the environment
"""

import sys
import os
import time

from rlglue import RLGlue

# Initialize RL Glue
RLGlue.RL_init()

RLGlue.RL_env_message('debug=True')

RLGlue.RL_start()

running = True
reward = 0
while running:
    result = RLGlue.RL_step()
    running = not result.terminal

steps = RLGlue.RL_num_steps()
R = RLGlue.RL_return()

print 'Experiment ended after %d steps with a return of %d' % (steps, R)

RLGlue.RL_cleanup()