def runEpisode(step_limit): global which_episode global total_win global total_draw global total_lose global ns_epoch global pcts_win global pcts_win_or_draw global pcts_lose which_episode += 1 # ゲーム1回 開始 terminal = RLGlue.RL_episode(step_limit) # 勝負がつくまでのステップ数と報酬を取得 total_steps = RLGlue.RL_num_steps() total_reward = RLGlue.RL_return() # 今回の結果を表示 r_win = 1.0 r_draw = -0.5 r_lose = -1.0 if total_reward == r_win: total_win += 1 elif total_reward == r_draw: total_draw += 1 elif total_reward == r_lose: total_lose += 1 print("Episode " + str(which_episode) + "\t " + str(total_steps) + " steps \t" + str(total_reward) + " total reward\t " + str(terminal) + " natural end") # 100回毎に勝敗を集計 record_interval = 100 if which_episode % record_interval == 0: line = 'Episode: {}, {} wins, {} draws, {} loses'.format( which_episode, total_win, total_draw, total_lose) print( '---------------------------------------------------------------') print(line) print( '---------------------------------------------------------------') # 集計結果をファイルに出力 with open('result.txt', 'a') as f: f.writelines(line + '\n') ns_epoch.append(which_episode) pcts_win.append(float(total_win) / record_interval * 100) pcts_win_or_draw.append( float(total_win + total_draw) / record_interval * 100) pcts_lose.append(float(total_win) / record_interval * 100) total_win = 0 total_draw = 0 total_lose = 0
def run_episode(self): """ Run a single episode """ # Update epsilon ''' phase_len = self.episodes / 3 if self.episode_number == phase_len * 2: # Start low phase RLGlue.RL_agent_message('set epsilon %f' % (self.epsilon_low)) elif self.episode_number >= phase_len and self.episode_number < phase_len * 2: # In decr phase epsilon = float(RLGlue.RL_agent_message('get epsilon')) epsilon += (self.epsilon_decr - self.epsilon_high) / phase_len RLGlue.RL_agent_message('set epsilon %f' % (epsilon)) elif self.episode_number == 0: # Start high phase RLGlue.RL_agent_message('set epsilon %f' % (self.epsilon_high)) ''' terminal = RLGlue.RL_episode(0) # 0 - run until terminal steps = RLGlue.RL_num_steps() reward = RLGlue.RL_return() #print "\nEpisode %d\t %d steps\t reward: %d" % (self.episode_number, steps, reward) #print "Episode "+str(episode_number)+"\t "+str(totalSteps)+ " steps \t" + str(totalReward) + " total reward\t " + str(terminal) + " natural end" self.returns[self.episode_number] = ( reward + self.returns[self.episode_number] * (self.instance - 1)) / self.instance self.steps[self.episode_number] = (steps + self.steps[self.episode_number] * (self.instance - 1)) / self.instance self.episode_number += 1
def runEpisode(is_learning_episode): global whichEpisode, learningEpisode RLGlue.RL_episode(10000) totalSteps = RLGlue.RL_num_steps() totalReward = RLGlue.RL_return() whichEpisode += 1 if is_learning_episode: learningEpisode += 1 #print "Episode " + str(learningEpisode) + "/" + str(whichEpisode) + "\t " + str(totalSteps) + " steps \t" + str(totalReward) + " total reward\t " print "Episode %d/%d\t %d steps \t %.1f total reward\t" % ( learningEpisode, whichEpisode, totalSteps, totalReward) else: #print "Evaluation ::\t " + str(totalSteps) + " steps \t" + str(totalReward) + " total reward\t " print "Evaluation ::\t %d steps \t %.1f total reward" % (totalSteps, totalReward) with open('eval_dump.json', 'a') as f: json.dump( { "Steps": totalSteps, "Episode": whichEpisode, "Reward": totalReward }, f) f.write('\n') return totalSteps
def runEpisode(stepLimit, trial): global whichEpisode terminal=RLGlue.RL_episode(stepLimit) totalSteps=RLGlue.RL_num_steps() totalReward=RLGlue.RL_return() print "Experiment "+str(trial + 1)+"\t Episode "+str(whichEpisode)+"\t "+str(totalSteps)+ " steps \t" + str(totalReward) + " total reward\t " + str(terminal) + " natural end" whichEpisode=whichEpisode+1
def run_experiment(maxsteps=100, numeps=1): taskSpec = RLGlue.RL_init() for ep in range(numeps): terminal = RLGlue.RL_episode(maxsteps) totalSteps = RLGlue.RL_num_steps() totalReward = RLGlue.RL_return() print "Episode " + str(ep) + "\t " + str( totalSteps) + " steps \t" + str( totalReward) + " total reward\t " + str( terminal) + " natural end" RLGlue.RL_cleanup()
def runEpisode(stepLimit): # stepLimit of 0 implies no limit global whichEpisode terminal = RLGlue.RL_episode(stepLimit) totalSteps = RLGlue.RL_num_steps() totalReward = RLGlue.RL_return() print "Episode " + str(whichEpisode) + "\t " + str( totalSteps) + " steps \t" + str(totalReward) + " total reward\t " whichEpisode = whichEpisode + 1
def runEpisode(stepLimit): global whichEpisode RLGlue.RL_agent_message('reset') terminal = RLGlue.RL_episode(stepLimit) totalSteps = RLGlue.RL_num_steps() totalReward = RLGlue.RL_return() print("Episode " + str(whichEpisode)), print("\t " + str(totalSteps)), print(" steps \t" + str(totalReward)), print " total reward\t " + str(terminal) + " natural end" RLGlue.RL_agent_message('episode_end') whichEpisode = whichEpisode + 1
def run_episode(self): """ Run a single episode """ terminal = RLGlue.RL_episode(10) steps = RLGlue.RL_num_steps() reward = RLGlue.RL_return() #print "Episode %d\t %d steps\t reward: %d" % (episode_number, steps, reward) #print "Episode "+str(episode_number)+"\t "+str(totalSteps)+ " steps \t" + str(totalReward) + " total reward\t " + str(terminal) + " natural end" self.total_reward += reward # Update average x = self.total_reward / (self.episode_number + 1) self.results[self.episode_number] += (x - self.results[self.episode_number]) / self.instance self.episode_number += 1
def runEpisode(is_learning_episode): global whichEpisode, learningEpisode RLGlue.RL_episode(0) totalSteps = RLGlue.RL_num_steps() totalReward = RLGlue.RL_return() whichEpisode += 1 if is_learning_episode: learningEpisode += 1 print("Episode " + str(learningEpisode) + "\t " + str(totalSteps) + " steps \t" + str(totalReward) + " total reward\t ") else: print("Evaluation ::\t " + str(totalSteps) + " steps \t" + str(totalReward) + " total reward\t ")
def demo(): statistics = [] episodeLength = 100 #this_score = evaluateAgent() #printScore(0, this_score) #statistics.append(this_score) for i in range(1, 1000): RLGlue.RL_env_message("set-start-state " + S) RLGlue.RL_start() RLGlue.RL_episode(episodeLength) this_return = RLGlue.RL_return() print "%d\t\t%.2f" % (i, this_return) statistics.append(this_return) saveResultToCSV(statistics, "MyResults_sarsa1000_ver2.csv")
def evaluateAgent(): sum = 0 sum_of_squares = 0 n = 10 episodeLength = 100 RLGlue.RL_agent_message("freeze learning") #print "FREEZE LEARNING" for i in range(0, n): RLGlue.RL_episode(100) this_return = RLGlue.RL_return() sum += this_return sum_of_squares += this_return**2 mean = sum / n variance = (sum_of_squares - n * mean * mean) / (n - 1.0) standard_dev = math.sqrt(variance) RLGlue.RL_agent_message("unfreeze learning") #print "UNFREEZE LEARNING" return mean, standard_dev
def runEpisode(is_learning_episode): global whichEpisode, learningEpisode RLGlue.RL_episode(0) totalSteps = RLGlue.RL_num_steps() totalReward = RLGlue.RL_return() whichEpisode += 1 if is_learning_episode: learningEpisode += 1 logger.info("{},{},{},{}".format( dt.now().strftime("%Y-%m-%d_%H:%M:%S"), learningEpisode, totalSteps, totalReward)) print "Episode " + str(learningEpisode) + "\t " + str( totalSteps) + " steps \t" + str( totalReward) + " total reward\t " + dt.now().strftime( "%Y%m%d_%H%M%S") else: print "Evaluation ::\t " + str(totalSteps) + " steps \t" + str( totalReward) + " total reward\t "
def runEpisode(is_learning_episode): global whichEpisode, learningEpisode RLGlue.RL_episode(0) totalSteps = RLGlue.RL_num_steps() totalReward = RLGlue.RL_return() whichEpisode += 1 if is_learning_episode: learningEpisode += 1 print "Episode " + str(learningEpisode) + "\t " + str(totalSteps) + " steps \t" + str(totalReward) + " total reward\t " else: print "Evaluation ::\t " + str(totalSteps) + " steps \t" + str(totalReward) + " total reward\t " # write reward in csv file list_csv = [str(learningEpisode), str(totalReward)] f_csv = open('reward.csv', 'a') writer_r = csv.writer(f_csv, lineterminator = '\n') writer_r.writerow(list_csv) f_csv.close()
def evaluateAgent(): sum = 0 sum_of_squares = 0 this_return = 0 mean = 0 variance = 0 n = 10 RLGlue.RL_agent_message("freeze learning") for i in range(0, n): # We use a cutoff here in case the #policy is bad and will never end an episode RLGlue.RL_episode(5000) this_return = RLGlue.RL_return() sum += this_return sum_of_squares += this_return**2 mean = sum / n variance = (sum_of_squares - n * mean * mean) / (n - 1.0) standard_dev = math.sqrt(variance) RLGlue.RL_agent_message("unfreeze learning") return mean, standard_dev
def evaluateAgent(): sum=0; sum_of_squares=0; this_return=0; mean=0; variance=0; n=10; RLGlue.RL_agent_message("freeze learning"); for i in range(0,n): # We use a cutoff here in case the #policy is bad and will never end an episode RLGlue.RL_episode(5000); this_return=RLGlue.RL_return(); sum+=this_return; sum_of_squares+=this_return**2; mean=sum/n; variance = (sum_of_squares - n*mean*mean)/(n - 1.0); standard_dev=math.sqrt(variance); RLGlue.RL_agent_message("unfreeze learning"); return mean,standard_dev;
def run_episode(training=True): global total_episode, learned_episode, total_time, learned_steps, csv_episode, highscore, num_finished_eval_episode, evaluation_scores start_time = time.time() RLGlue.RL_episode(0) num_steps = RLGlue.RL_num_steps() total_reward = RLGlue.RL_return() total_episode += 1 elapsed_time = time.time() - start_time total_time += elapsed_time epoch = int(learned_steps / time_steps_per_epoch) if training: learned_steps += num_steps learned_episode += 1 sec = int(elapsed_time) total_minutes = int(total_time / 60) csv_episode.append([learned_episode, total_reward, num_steps, sec, total_minutes, epoch, learned_steps]) if total_reward > highscore: highscore = total_reward csv_training_highscore.append([learned_episode, highscore, total_minutes, epoch]) print "Episode:", learned_episode, "epoch:", epoch, "num_steps:", num_steps, "total_reward:", total_reward, "time:", sec, "sec", "total_time:", total_minutes, "min" return num_steps, total_reward
avg_steps_c = [] avg_reward_c = [] RLGlue.RL_env_message("set-start-state 2") for i in range(50): num_of_steps = 0 reward = 0 j = 0 jobs = [] while j < 20: #p=multiprocessing.Process(target=RLGlue.RL_episode,args=(100000,)) #jobs.append(p) #p.start RLGlue.RL_episode(100000) j = j + 1 if RLGlue.RL_return() == 0: j = j - 1 else: num_of_steps = num_of_steps + RLGlue.RL_num_steps() reward = reward + RLGlue.RL_return() print RLGlue.RL_return() avg_reward_c.append(reward / 50) avg_steps_c.append(num_of_steps / 50) file = open("values.txt", 'w') file.write(str(avg_steps_c)) file.write(" ") file.write(str(avg_reward_c)) plt.plot(avg_steps_a, 'r')
def runEpisode(stepLimit): global whichEpisode terminal = RLGlue.RL_episode(stepLimit) totalSteps = RLGlue.RL_num_steps() totalReward = RLGlue.RL_return() print "Experiment " + str(args.expid) + "\t Episode " + str( whichEpisode) + "\t " + str(totalSteps) + " steps \t" + str( totalReward) + " total reward\t " + str(terminal) + " natural end" whichEpisode = whichEpisode + 1 #Main Program starts here # Remember that stepLimit of 0 means there is no limit at all!*/ for t in range(args.numtrials): print 'trial: ' + str(t) whichEpisode = 0 taskSpec = RLGlue.RL_init() steps = np.zeros(args.numeps) rews = np.zeros(args.numeps) for ep in range(args.numeps): runEpisode(args.maxsteps) steps[ep] = RLGlue.RL_num_steps() rews[ep] = RLGlue.RL_return() print 'trial finished, final reward: ' + str(rews[-1]) #with open(args.path+'/'+args.logname+'_'+str(args.expid)+'_'+str(t)+'.pkl','w') as f: # pickle.dump((steps,rews),f,-1) RLGlue.RL_cleanup()
print("\n\n----------Stepping through an episode----------") #We could also start over and do another experiment */ taskSpec = RLGlue.RL_init() #We could run one step at a time instead of one episode at a time */ #Start the episode */ startResponse = RLGlue.RL_start() firstObservation = startResponse.o.intArray[0] firstAction = startResponse.a.intArray[0] print("First observation and action were: " + str(firstObservation) + " and: " + str(firstAction)) #Run one step */ stepResponse = RLGlue.RL_step() #Run until the episode ends*/ while (stepResponse.terminal != 1): stepResponse = RLGlue.RL_step() #if (stepResponse.terminal != 1) #Could optionally print state,action pairs */ #printf("(%d,%d) ",stepResponse.o.intArray[0],stepResponse.a.intArray[0])*/ print("\n\n----------Summary----------") totalSteps = RLGlue.RL_num_steps() totalReward = RLGlue.RL_return() print("It ran for " + str(totalSteps) + " steps, total reward was: " + str(totalReward)) RLGlue.RL_cleanup()
num_of_steps = num_of_steps + RLGlue.RL_num_steps() reward = reward + RLGlue.RL_return() avg_reward_b.append(reward/50) avg_steps_b.append(num_of_steps/50) ''' avg_steps_c = [] avg_reward_c = [] RLGlue.RL_env_message("set-start-state 2") for i in range(100): num_of_steps = 0 reward = 0 for j in range(50): RLGlue.RL_episode(0) num_of_steps = num_of_steps + RLGlue.RL_num_steps() reward = reward + RLGlue.RL_return() avg_reward_c.append(reward / 50) avg_steps_c.append(num_of_steps / 50) ''' plt.plot(avg_steps_a,'r') plt.ylabel('Average_Steps') plt.xlabel('Number of 50 episode runs') plt.title('Average_steps of A') plt.show() plt.plot(avg_reward_a,'r') plt.ylabel('Average_Reward') plt.xlabel('Number of 50 episode runs') plt.title('Average_Reward of A') plt.show()
""" Manual experiment for testing the environment """ import sys import os import time from rlglue import RLGlue # Initialize RL Glue RLGlue.RL_init() RLGlue.RL_env_message('debug=True') RLGlue.RL_start() running = True reward = 0 while running: result = RLGlue.RL_step() running = not result.terminal steps = RLGlue.RL_num_steps() R = RLGlue.RL_return() print 'Experiment ended after %d steps with a return of %d' % (steps, R) RLGlue.RL_cleanup()