def someEpisodes(game_env, net, discountFactor=0.99, maxSteps=100, avgOver=1, returnEvents=False, exploretoo=True): """ Return the fitness value for one episode of play, given the policy defined by a neural network. """ import pdb pdb.set_trace() task = GameTask(game_env) game_env.recordingEnabled = True game_env.reset() net.reset() task.maxSteps=maxSteps agent = LearningAgent(net) agent.learning = False agent.logging = False exper = EpisodicExperiment(task, agent) fitness = 0 for _ in range(avgOver): rs = exper.doEpisodes(1) # add a slight bonus for more exploration, if rewards are identical if exploretoo: fitness += len(set(game_env._allEvents)) * 1e-6 # the true, discounted reward fitness += sum([sum([v*discountFactor**step for step, v in enumerate(r)]) for r in rs]) fitness /= avgOver if returnEvents: return fitness, game_env._allEvents else: return fitness
def someEpisodes(game_env, net, discountFactor=0.99, maxSteps=100, avgOver=1, returnEvents=False): """ Return the fitness value for one episode of play, given the policy defined by a neural network. """ task = GameTask(game_env) game_env.recordingEnabled = True game_env.reset() net.reset() task.maxSteps=maxSteps agent = LearningAgent(net) agent.learning = False agent.logging = False exper = EpisodicExperiment(task, agent) fitness = 0 for _ in range(avgOver): rs = exper.doEpisodes(1) # add a slight bonus for more exploration, if rewards are identical fitness += len(set(game_env._allEvents)) * 1e-6 # the true, discounted reward fitness += sum([sum([v*discountFactor**step for step, v in enumerate(r)]) for r in rs]) fitness /= avgOver if returnEvents: return fitness, game_env._allEvents else: return fitness
from scipy import * import sys, time from pybrain.rl.learners.valuebased import ActionValueNetwork from pybrain.rl.agents import LearningAgent from pybrain.rl.learners import Q, SARSA, NFQ from pybrain.rl.experiments.episodic import EpisodicExperiment from pybrain.rl.environments import Task from tasktest import TestTask from envtest import TestEnv env = TestEnv() task = TestTask(env) controller = ActionValueNetwork(200, 3) learner = NFQ() agent = LearningAgent(controller, learner) experiment = EpisodicExperiment(task, agent) i = 0 while True: experiment.doEpisodes(10) print "Learning" agent.learn() agent.reset() i += 1 print "Cycle: %d" % i if i > 60: agent.learning = False
import sys, time from pybrain.rl.learners.valuebased import ActionValueNetwork from pybrain.rl.agents import LearningAgent from pybrain.rl.learners import Q, SARSA, NFQ from pybrain.rl.experiments.episodic import EpisodicExperiment from pybrain.rl.environments import Task from tasktest import TestTask from envtest import TestEnv env = TestEnv() task = TestTask(env) controller = ActionValueNetwork(200, 3) learner = NFQ() agent = LearningAgent(controller, learner) experiment = EpisodicExperiment(task, agent) i = 0 while True: experiment.doEpisodes(10) print "Learning" agent.learn() agent.reset() i += 1 print "Cycle: %d" %i if i > 60: agent.learning = False
experiment = Experiment(task, agent) k = 0 while k < number_of_runs: experiment.doInteractions(96) agent.learn() agent.reset() #log some data of the first and last run. if k == 0: # if it is the first run first_run_time2 = environment.log_time first_run_soc2 = environment.log_soc first_run_volt2 = environment.log_volt #if k == number_of_runs - 1: # if it it the last run # last_run_time2 = environment.log_time # last_run_soc2 = environment.log_soc # last_run_volt2 = environment.log_volt environment.reset() k += 1 agent.learning = False #to keep it from exploring experiment = Experiment(task, agent) experiment.doInteractions(96) last_run_time2 = environment.log_time last_run_soc2 = environment.log_soc last_run_volt2 = environment.log_volt plt.plot(first_run_time2, first_run_soc2, 'r--', last_run_time2, last_run_soc2, 'g--') plt.show()