示例#1
0
    def run_epoch(self, epoch, num_steps, prefix, collect_reward=False):
        """ Run one 'epoch' of training or testing, where an epoch is defined
        by the number of steps executed.  Prints a progress report after
        every trial

        Arguments:
           num_steps - steps per epoch
           prefix - string to print ('training' or 'testing')

        """
        steps_left = num_steps
        if prefix == "training" or not collect_reward:
            while steps_left > 0:
                print prefix + " epoch: ", epoch, "steps_left: ", steps_left
                sys.stdout.flush()
                terminal = RLGlue.RL_episode(steps_left)
                if not terminal:
                    RLGlue.RL_agent_message("episode_end")
                steps_left -= RLGlue.RL_num_steps()
        elif prefix == "testing":
            total_reward = 0
            episode_counter = 0
            terminal = False
            while steps_left > 0:
                if terminal:
                    print prefix + " epoch: ", epoch, "steps_left: ", steps_left
                    sys.stdout.flush()
                roat = RLGlue.RL_step()
                reward = roat.r
                terminal = roat.terminal
                total_reward += reward
                episode_counter += terminal
                steps_left -= 1
            return total_reward, episode_counter
示例#2
0
def runEpisode(is_learning_episode):
    global whichEpisode, learningEpisode

    RLGlue.RL_episode(10000)
    totalSteps = RLGlue.RL_num_steps()
    totalReward = RLGlue.RL_return()

    whichEpisode += 1

    if is_learning_episode:
        learningEpisode += 1
        #print "Episode " + str(learningEpisode) + "/" + str(whichEpisode) + "\t " + str(totalSteps) + " steps \t" + str(totalReward) + " total reward\t "
        print "Episode %d/%d\t %d steps \t %.1f total reward\t" % (
            learningEpisode, whichEpisode, totalSteps, totalReward)
    else:
        #print "Evaluation ::\t " + str(totalSteps) + " steps \t" + str(totalReward) + " total reward\t "
        print "Evaluation ::\t %d steps \t %.1f total reward" % (totalSteps,
                                                                 totalReward)
        with open('eval_dump.json', 'a') as f:
            json.dump(
                {
                    "Steps": totalSteps,
                    "Episode": whichEpisode,
                    "Reward": totalReward
                }, f)
            f.write('\n')
        return totalSteps
示例#3
0
    def run_episode(self):
        """ Run a single episode """
        # Update epsilon
        '''
        phase_len = self.episodes / 3
        if self.episode_number == phase_len * 2:
            # Start low phase
            RLGlue.RL_agent_message('set epsilon %f' % (self.epsilon_low))
        elif self.episode_number >= phase_len and self.episode_number < phase_len * 2:
            # In decr phase
            epsilon = float(RLGlue.RL_agent_message('get epsilon'))
            epsilon += (self.epsilon_decr - self.epsilon_high) / phase_len
            RLGlue.RL_agent_message('set epsilon %f' % (epsilon))
        elif self.episode_number == 0:
            # Start high phase
            RLGlue.RL_agent_message('set epsilon %f' % (self.epsilon_high))
        '''
        terminal = RLGlue.RL_episode(0)  # 0 - run until terminal
        steps = RLGlue.RL_num_steps()
        reward = RLGlue.RL_return()

        #print "\nEpisode %d\t %d steps\t reward: %d" % (self.episode_number, steps, reward)
        #print "Episode "+str(episode_number)+"\t "+str(totalSteps)+ " steps \t" + str(totalReward) + " total reward\t " + str(terminal) + " natural end"

        self.returns[self.episode_number] = (
            reward + self.returns[self.episode_number] *
            (self.instance - 1)) / self.instance
        self.steps[self.episode_number] = (steps +
                                           self.steps[self.episode_number] *
                                           (self.instance - 1)) / self.instance

        self.episode_number += 1
def runEpisode(step_limit):
    global which_episode
    global total_win
    global total_draw
    global total_lose
    global ns_epoch
    global pcts_win
    global pcts_win_or_draw
    global pcts_lose

    which_episode += 1

    # ゲーム1回 開始
    terminal = RLGlue.RL_episode(step_limit)

    # 勝負がつくまでのステップ数と報酬を取得
    total_steps = RLGlue.RL_num_steps()
    total_reward = RLGlue.RL_return()

    # 今回の結果を表示
    r_win = 1.0
    r_draw = -0.5
    r_lose = -1.0

    if total_reward == r_win:
        total_win += 1
    elif total_reward == r_draw:
        total_draw += 1
    elif total_reward == r_lose:
        total_lose += 1

    print("Episode " + str(which_episode) + "\t " + str(total_steps) +
          " steps \t" + str(total_reward) + " total reward\t " +
          str(terminal) + " natural end")

    # 100回毎に勝敗を集計
    record_interval = 100

    if which_episode % record_interval == 0:
        line = 'Episode: {}, {} wins, {} draws, {} loses'.format(
            which_episode, total_win, total_draw, total_lose)
        print(
            '---------------------------------------------------------------')
        print(line)
        print(
            '---------------------------------------------------------------')

        # 集計結果をファイルに出力
        with open('result.txt', 'a') as f:
            f.writelines(line + '\n')

        ns_epoch.append(which_episode)
        pcts_win.append(float(total_win) / record_interval * 100)
        pcts_win_or_draw.append(
            float(total_win + total_draw) / record_interval * 100)
        pcts_lose.append(float(total_win) / record_interval * 100)

        total_win = 0
        total_draw = 0
        total_lose = 0
示例#5
0
def run_epoch(epoch, num_steps, prefix):
    steps_left = num_steps
    while steps_left > 0:
        print prefix + " epoch: ", epoch, "steps_left: ", steps_left
        terminal = RLGlue.RL_episode(steps_left)
        if not terminal:
            RLGlue.RL_agent_message("episode_end")
        steps_left -= RLGlue.RL_num_steps()
示例#6
0
def runEpisode(stepLimit, trial):
	global whichEpisode
	terminal=RLGlue.RL_episode(stepLimit)
	totalSteps=RLGlue.RL_num_steps()
	totalReward=RLGlue.RL_return()
	
	print "Experiment "+str(trial + 1)+"\t Episode "+str(whichEpisode)+"\t "+str(totalSteps)+ " steps \t" + str(totalReward) + " total reward\t " + str(terminal) + " natural end"
	
	whichEpisode=whichEpisode+1
示例#7
0
def run_experiment(maxsteps=100, numeps=1):
    taskSpec = RLGlue.RL_init()
    for ep in range(numeps):
        terminal = RLGlue.RL_episode(maxsteps)
        totalSteps = RLGlue.RL_num_steps()
        totalReward = RLGlue.RL_return()
        print "Episode " + str(ep) + "\t " + str(
            totalSteps) + " steps \t" + str(
                totalReward) + " total reward\t " + str(
                    terminal) + " natural end"
    RLGlue.RL_cleanup()
示例#8
0
def runEpisode(stepLimit):
    # stepLimit of 0 implies no limit
    global whichEpisode
    terminal = RLGlue.RL_episode(stepLimit)
    totalSteps = RLGlue.RL_num_steps()
    totalReward = RLGlue.RL_return()

    print "Episode " + str(whichEpisode) + "\t " + str(
        totalSteps) + " steps \t" + str(totalReward) + " total reward\t "

    whichEpisode = whichEpisode + 1
示例#9
0
def runEpisode(stepLimit):
    global whichEpisode
    RLGlue.RL_agent_message('reset')
    terminal = RLGlue.RL_episode(stepLimit)
    totalSteps = RLGlue.RL_num_steps()
    totalReward = RLGlue.RL_return()

    print("Episode " + str(whichEpisode)),
    print("\t " + str(totalSteps)),
    print(" steps \t" + str(totalReward)),
    print " total reward\t " + str(terminal) + " natural end"

    RLGlue.RL_agent_message('episode_end')

    whichEpisode = whichEpisode + 1
示例#10
0
	def run_episode(self):
		""" Run a single episode """
		terminal = RLGlue.RL_episode(10)
		steps = RLGlue.RL_num_steps()
		reward = RLGlue.RL_return()
		
		#print "Episode %d\t %d steps\t reward: %d" % (episode_number, steps, reward)
		#print "Episode "+str(episode_number)+"\t "+str(totalSteps)+ " steps \t" + str(totalReward) + " total reward\t " + str(terminal) + " natural end"
	
		self.total_reward += reward
		
		# Update average
		x = self.total_reward / (self.episode_number + 1)
		self.results[self.episode_number] += (x - self.results[self.episode_number]) / self.instance
		
		self.episode_number += 1
示例#11
0
def runEpisode(is_learning_episode):
    global whichEpisode, learningEpisode

    RLGlue.RL_episode(0)
    totalSteps = RLGlue.RL_num_steps()
    totalReward = RLGlue.RL_return()

    whichEpisode += 1

    if is_learning_episode:
        learningEpisode += 1
        print("Episode " + str(learningEpisode) + "\t " + str(totalSteps) +
              " steps \t" + str(totalReward) + " total reward\t ")
    else:
        print("Evaluation ::\t " + str(totalSteps) + " steps \t" +
              str(totalReward) + " total reward\t ")
示例#12
0
def run_epoch(epoch, num_steps, prefix):
    """ Run one 'epoch' of training or testing, where an epoch is defined
    by the number of steps executed.  Prints a progress report after
    every trial

    Arguments:
       num_steps - steps per epoch
       prefix - string to print ('training' or 'testing')

    """
    steps_left = num_steps
    while steps_left > 0:
        print prefix + " epoch: ", epoch, "steps_left: ", steps_left
        terminal = RLGlue.RL_episode(steps_left)
        if not terminal:
            RLGlue.RL_agent_message("episode_end")
        steps_left -= RLGlue.RL_num_steps()
示例#13
0
def runEpisode(is_learning_episode):
    global whichEpisode, learningEpisode

    RLGlue.RL_episode(0)
    totalSteps = RLGlue.RL_num_steps()
    totalReward = RLGlue.RL_return()

    whichEpisode += 1

    if is_learning_episode:
        learningEpisode += 1
        print "Episode " + str(learningEpisode) + "\t " + str(totalSteps) + " steps \t" + str(totalReward) + " total reward\t "
    else:
        print "Evaluation ::\t " + str(totalSteps) + " steps \t" + str(totalReward) + " total reward\t "

# write reward in csv file
        list_csv = [str(learningEpisode), str(totalReward)]
        f_csv = open('reward.csv', 'a')
        writer_r = csv.writer(f_csv, lineterminator = '\n')
        writer_r.writerow(list_csv)
        f_csv.close()
示例#14
0
def runEpisode(is_learning_episode):
    global whichEpisode, learningEpisode

    RLGlue.RL_episode(0)
    totalSteps = RLGlue.RL_num_steps()
    totalReward = RLGlue.RL_return()

    whichEpisode += 1

    if is_learning_episode:
        learningEpisode += 1
        logger.info("{},{},{},{}".format(
            dt.now().strftime("%Y-%m-%d_%H:%M:%S"), learningEpisode,
            totalSteps, totalReward))
        print "Episode " + str(learningEpisode) + "\t " + str(
            totalSteps) + " steps \t" + str(
                totalReward) + " total reward\t " + dt.now().strftime(
                    "%Y%m%d_%H%M%S")
    else:
        print "Evaluation ::\t " + str(totalSteps) + " steps \t" + str(
            totalReward) + " total reward\t "
示例#15
0
def main():
    whichTrainingMDP = 1
    # Uncomment ONE of the following lines to choose your experiment
    #loadTetris(whichTrainingMDP); #put the desired parameter set in where MDP is in [0,19]
    #loadHelicopter(whichTrainingMDP); #put the desired parameter set in where MDP is in [0,9]
    #loadAcrobot(whichTrainingMDP); #put the desired parameter set in where MDP is in [1,49] #0 is standard acrobot
    #loadPolyathlon(whichTrainingMDP); #put the desired parameter set in where MDP is in [0,5]
    loadMario(True, True, 121, 0, 99, whichTrainingMDP)

    # and then,
    #		just run the experiment:
    RLGlue.RL_init()
    episodesToRun = 10
    totalSteps = 0
    for i in range(episodesToRun):
        RLGlue.RL_episode(20000)
        thisSteps = RLGlue.RL_num_steps()
        print "Total steps in episode %d is %d" % (i, thisSteps)
        totalSteps += thisSteps

    print "Total steps : %d\n" % (totalSteps)
    RLGlue.RL_cleanup()
示例#16
0
def run_episode(training=True):
	global total_episode, learned_episode, total_time, learned_steps, csv_episode, highscore, num_finished_eval_episode, evaluation_scores
	start_time = time.time()
	RLGlue.RL_episode(0)
	num_steps = RLGlue.RL_num_steps()
	total_reward = RLGlue.RL_return()
	total_episode += 1
	elapsed_time = time.time() - start_time
	total_time += elapsed_time
	epoch = int(learned_steps / time_steps_per_epoch)

	if training:
		learned_steps += num_steps
		learned_episode += 1
		sec = int(elapsed_time)
		total_minutes = int(total_time / 60)
		csv_episode.append([learned_episode, total_reward, num_steps, sec, total_minutes, epoch, learned_steps])
		if total_reward > highscore:
			highscore = total_reward
			csv_training_highscore.append([learned_episode, highscore, total_minutes, epoch])
		print "Episode:", learned_episode, "epoch:", epoch, "num_steps:", num_steps, "total_reward:", total_reward, "time:", sec, "sec",  "total_time:", total_minutes, "min"

	return num_steps, total_reward
#  $Revision$
#  $Date$
#  $Author$
#  $HeadURL$

import sys

import rlglue.RLGlue as RLGlue
from glue_test import glue_test
tester = glue_test("test_rl_episode")

task_spec = RLGlue.RL_init()

isTerminal = RLGlue.RL_episode(0)
tester.check_fail(isTerminal != 1)
tester.check_fail(RLGlue.RL_num_steps() != 5)

isTerminal = RLGlue.RL_episode(1)
tester.check_fail(isTerminal != 0)
tester.check_fail(RLGlue.RL_num_steps() != 1)

isTerminal = RLGlue.RL_episode(2)
tester.check_fail(isTerminal != 0)
tester.check_fail(RLGlue.RL_num_steps() != 2)

isTerminal = RLGlue.RL_episode(4)
tester.check_fail(isTerminal != 0)
tester.check_fail(RLGlue.RL_num_steps() != 4)

isTerminal = RLGlue.RL_episode(5)
tester.check_fail(isTerminal != 0)
示例#18
0
"""
Manual experiment for testing the environment
"""

import sys
import os
import time

from rlglue import RLGlue

# Initialize RL Glue
RLGlue.RL_init()

RLGlue.RL_env_message('debug=True')

RLGlue.RL_start()

running = True
reward = 0
while running:
    result = RLGlue.RL_step()
    running = not result.terminal

steps = RLGlue.RL_num_steps()
R = RLGlue.RL_return()

print 'Experiment ended after %d steps with a return of %d' % (steps, R)

RLGlue.RL_cleanup()
示例#19
0
def runEpisode(stepLimit):
    global whichEpisode
    terminal = RLGlue.RL_episode(stepLimit)
    totalSteps = RLGlue.RL_num_steps()
    totalReward = RLGlue.RL_return()

    print "Experiment " + str(args.expid) + "\t Episode " + str(
        whichEpisode) + "\t " + str(totalSteps) + " steps \t" + str(
            totalReward) + " total reward\t " + str(terminal) + " natural end"

    whichEpisode = whichEpisode + 1


#Main Program starts here

# Remember that stepLimit of 0 means there is no limit at all!*/
for t in range(args.numtrials):
    print 'trial: ' + str(t)
    whichEpisode = 0
    taskSpec = RLGlue.RL_init()
    steps = np.zeros(args.numeps)
    rews = np.zeros(args.numeps)
    for ep in range(args.numeps):
        runEpisode(args.maxsteps)
        steps[ep] = RLGlue.RL_num_steps()
        rews[ep] = RLGlue.RL_return()
    print 'trial finished, final reward: ' + str(rews[-1])
    #with open(args.path+'/'+args.logname+'_'+str(args.expid)+'_'+str(t)+'.pkl','w') as f:
    #    pickle.dump((steps,rews),f,-1)
    RLGlue.RL_cleanup()
示例#20
0
print("\n\n----------Stepping through an episode----------")
#We could also start over and do another experiment */
taskSpec = RLGlue.RL_init()

#We could run one step at a time instead of one episode at a time */
#Start the episode */
startResponse = RLGlue.RL_start()

firstObservation = startResponse.o.intArray[0]
firstAction = startResponse.a.intArray[0]
print("First observation and action were: " + str(firstObservation) +
      " and: " + str(firstAction))

#Run one step */
stepResponse = RLGlue.RL_step()

#Run until the episode ends*/
while (stepResponse.terminal != 1):
    stepResponse = RLGlue.RL_step()
    #if (stepResponse.terminal != 1)
    #Could optionally print state,action pairs */
    #printf("(%d,%d) ",stepResponse.o.intArray[0],stepResponse.a.intArray[0])*/

print("\n\n----------Summary----------")

totalSteps = RLGlue.RL_num_steps()
totalReward = RLGlue.RL_return()
print("It ran for " + str(totalSteps) + " steps, total reward was: " +
      str(totalReward))
RLGlue.RL_cleanup()
示例#21
0
		RLGlue.RL_episode(0)
		num_of_steps = num_of_steps + RLGlue.RL_num_steps()
		reward = reward + RLGlue.RL_return()
	avg_reward_b.append(reward/50)
	avg_steps_b.append(num_of_steps/50)
'''

avg_steps_c = []
avg_reward_c = []
RLGlue.RL_env_message("set-start-state 2")
for i in range(100):
    num_of_steps = 0
    reward = 0
    for j in range(50):
        RLGlue.RL_episode(0)
        num_of_steps = num_of_steps + RLGlue.RL_num_steps()
        reward = reward + RLGlue.RL_return()
    avg_reward_c.append(reward / 50)
    avg_steps_c.append(num_of_steps / 50)
'''

plt.plot(avg_steps_a,'r')
plt.ylabel('Average_Steps')
plt.xlabel('Number of 50 episode runs')
plt.title('Average_steps of A')	
plt.show()

plt.plot(avg_reward_a,'r')
plt.ylabel('Average_Reward')
plt.xlabel('Number of 50 episode runs')
plt.title('Average_Reward of A')