Пример #1
0
    def run_epoch(self, epoch, num_steps, prefix, collect_reward=False):
        """ Run one 'epoch' of training or testing, where an epoch is defined
        by the number of steps executed.  Prints a progress report after
        every trial

        Arguments:
           num_steps - steps per epoch
           prefix - string to print ('training' or 'testing')

        """
        steps_left = num_steps
        if prefix == "training" or not collect_reward:
            while steps_left > 0:
                print prefix + " epoch: ", epoch, "steps_left: ", steps_left
                sys.stdout.flush()
                terminal = RLGlue.RL_episode(steps_left)
                if not terminal:
                    RLGlue.RL_agent_message("episode_end")
                steps_left -= RLGlue.RL_num_steps()
        elif prefix == "testing":
            total_reward = 0
            episode_counter = 0
            terminal = False
            while steps_left > 0:
                if terminal:
                    print prefix + " epoch: ", epoch, "steps_left: ", steps_left
                    sys.stdout.flush()
                roat = RLGlue.RL_step()
                reward = roat.r
                terminal = roat.terminal
                total_reward += reward
                episode_counter += terminal
                steps_left -= 1
            return total_reward, episode_counter
Пример #2
0
def recordTrajectory():
    RLGlue.RL_start()
    trajectory = []
    while True:
        roat = RLGlue.RL_step()
        trajectory.append(roat)
        if roat.terminal:
            break
    return trajectory
Пример #3
0
    def start(self):
        print "\nExperiment starting!"
        taskSpec = RLGlue.RL_init()
        print taskSpec

        exp_params_for_agent = {}
        self.agent_params = self.message_agent(MessageType.exchange_params,
                                               exp_params_for_agent)

        # Keep overhead a bit lower by having functions inline
        def should_report():
            self.step % args.report_freq == 0

        def should_evaluate():
            step % args.eval_freq == 0 and step > self.agent_params[
                'learn_start']

        def should_save():
            step % args.save_freq == 0

        observ_action = RLGlue.RL_start()

        while self.step <= self.steps:
            observ_action_term = RLGlue.RL_step()

            # If game ends, start another
            if observ_action_term.terminal:
                # Not sure if we need to clean up after every episode, don't think so
                RLGlue.RL_start()
                self.n_train_episodes += 1

            if should_report():
                # TODO assert agent steps is equal
                print 'Steps: {}'.format(step)
                self.message_agent(MessageType.report)

            if should_evaluate():
                pass

            if should_save():
                pass

        print "A job well done."
        RLGlue.RL_cleanup()
Пример #4
0
#  $Revision: 617 $
#  $Date: 2009-02-05 11:24:12 +0200 (Thu, 05 Feb 2009) $
#  $Author: gabalz $
#  $HeadURL: http://rl-glue-ext.googlecode.com/svn/trunk/projects/codecs/Python/src/tests/test_1_experiment.py $

import sys

import rlglue.RLGlue as RLGlue
from glue_test import glue_test
tester = glue_test("test_1")

task_spec = RLGlue.RL_init()

RLGlue.RL_start()

roat = RLGlue.RL_step()
tester.check_fail("one|1.|one" != RLGlue.RL_env_message("one"))
tester.check_fail("one|1.|one" != RLGlue.RL_agent_message("one"))
tester.check_fail(roat.terminal != 0)
tester.check_fail(roat.r != 1.0)
tester.check_fail(len(roat.o.intArray) != 1)
tester.check_fail(len(roat.o.doubleArray) != 0)
tester.check_fail(len(roat.o.charArray) != 0)
tester.check_fail(roat.o.intArray[0] != 0)

roat = RLGlue.RL_step()
tester.check_fail("two|2.2.|two" != RLGlue.RL_env_message("two"))
tester.check_fail("two|2.2.|two" != RLGlue.RL_agent_message("two"))
tester.check_fail(roat.terminal != 0)
tester.check_fail(roat.r != 1.0)
tester.check_fail(len(roat.o.intArray) != 1)
Пример #5
0
        tester.check_fail(len(startTuple.a.charArray) != 0)

        tester.check_fail(len(startTuple.o.intArray) != 0)
        tester.check_fail(len(startTuple.o.doubleArray) != 0)
        tester.check_fail(len(startTuple.o.charArray) != 0)
    else:
        tester.check_fail(len(startTuple.a.intArray) != 7)
        tester.check_fail(len(startTuple.a.doubleArray) != 3)
        tester.check_fail(len(startTuple.a.charArray) != 1)

        tester.check_fail(len(startTuple.o.intArray) != 2)
        tester.check_fail(len(startTuple.o.doubleArray) != 4)
        tester.check_fail(len(startTuple.o.charArray) != 5)

    for whichStep in range(0, 5):
        stepTuple = RLGlue.RL_step()
        tester.check_fail(stepTuple.terminal != 0)
        tester.check_fail(stepTuple.r != 0)

        if (whichEpisode % 2 == 0):
            tester.check_fail(len(stepTuple.a.intArray) != 0)
            tester.check_fail(len(stepTuple.a.doubleArray) != 0)
            tester.check_fail(len(stepTuple.a.charArray) != 0)

            tester.check_fail(len(stepTuple.o.intArray) != 0)
            tester.check_fail(len(stepTuple.o.doubleArray) != 0)
            tester.check_fail(len(stepTuple.o.charArray) != 0)
        else:
            tester.check_fail(len(stepTuple.a.intArray) != 7)
            tester.check_fail(len(stepTuple.a.doubleArray) != 3)
            tester.check_fail(len(stepTuple.a.charArray) != 1)
Пример #6
0
print("\n\n----------Stepping through an episode----------")
#We could also start over and do another experiment */
taskSpec = RLGlue.RL_init()

#We could run one step at a time instead of one episode at a time */
#Start the episode */
startResponse = RLGlue.RL_start()

firstObservation = startResponse.o.intArray[0]
firstAction = startResponse.a.intArray[0]
print("First observation and action were: " + str(firstObservation) +
      " and: " + str(firstAction))

#Run one step */
stepResponse = RLGlue.RL_step()

#Run until the episode ends*/
while (stepResponse.terminal != 1):
    stepResponse = RLGlue.RL_step()
    #if (stepResponse.terminal != 1)
    #Could optionally print state,action pairs */
    #printf("(%d,%d) ",stepResponse.o.intArray[0],stepResponse.a.intArray[0])*/

print("\n\n----------Summary----------")

totalSteps = RLGlue.RL_num_steps()
totalReward = RLGlue.RL_return()
print("It ran for " + str(totalSteps) + " steps, total reward was: " +
      str(totalReward))
RLGlue.RL_cleanup()
Пример #7
0
#  $Revision: 617 $
#  $Date: 2009-02-05 04:24:12 -0500 (Thu, 05 Feb 2009) $
#  $Author: gabalz $
#  $HeadURL: http://rl-glue-ext.googlecode.com/svn/trunk/projects/codecs/Python/src/tests/test_1_experiment.py $

import sys

import rlglue.RLGlue as RLGlue
from glue_test import glue_test
tester=glue_test("test_1")

task_spec=RLGlue.RL_init();

RLGlue.RL_start();

roat=RLGlue.RL_step();
tester.check_fail("one|1.|one"!=RLGlue.RL_env_message("one"));
tester.check_fail("one|1.|one"!=RLGlue.RL_agent_message("one"));
tester.check_fail(roat.terminal!=0);
tester.check_fail(roat.r!=1.0);
tester.check_fail(len(roat.o.intArray)!=1);
tester.check_fail(len(roat.o. doubleArray)!=0);
tester.check_fail(len(roat.o. charArray)!=0);
tester.check_fail(roat.o.intArray[0]!=0);

roat=RLGlue.RL_step();
tester.check_fail("two|2.2.|two"!=RLGlue.RL_env_message("two"));
tester.check_fail("two|2.2.|two"!=RLGlue.RL_agent_message("two"));
tester.check_fail(roat.terminal!=0);
tester.check_fail(roat.r!=1.0);
tester.check_fail(len(roat.o.intArray)!=1);
Пример #8
0
"""
Manual experiment for testing the environment
"""

import sys
import os
import time

from rlglue import RLGlue

# Initialize RL Glue
RLGlue.RL_init()

RLGlue.RL_env_message('debug=True')

RLGlue.RL_start()

running = True
reward = 0
while running:
    result = RLGlue.RL_step()
    running = not result.terminal

steps = RLGlue.RL_num_steps()
R = RLGlue.RL_return()

print 'Experiment ended after %d steps with a return of %d' % (steps, R)

RLGlue.RL_cleanup()