示例#1
0
def someEpisodes(game_env, net, discountFactor=0.99, maxSteps=100, avgOver=1, returnEvents=False, exploretoo=True):
    """ Return the fitness value for one episode of play, given the policy defined by a neural network. """
    import pdb
    pdb.set_trace()

    task = GameTask(game_env)
    game_env.recordingEnabled = True        
    game_env.reset()        
    net.reset()
    task.maxSteps=maxSteps
    agent = LearningAgent(net)
    agent.learning = False
    agent.logging = False
    exper = EpisodicExperiment(task, agent)
    fitness = 0
    for _ in range(avgOver):
        rs = exper.doEpisodes(1)
        # add a slight bonus for more exploration, if rewards are identical
        if exploretoo:
            fitness += len(set(game_env._allEvents)) * 1e-6
        # the true, discounted reward        
        fitness += sum([sum([v*discountFactor**step for step, v in enumerate(r)]) for r in rs])
    fitness /= avgOver
    if returnEvents:
        return fitness, game_env._allEvents
    else:
        return fitness
示例#2
0
def someEpisodes(game_env, net, discountFactor=0.99, maxSteps=100, avgOver=1, returnEvents=False):
    """ Return the fitness value for one episode of play, given the policy defined by a neural network. """
    task = GameTask(game_env)
    game_env.recordingEnabled = True        
    game_env.reset()        
    net.reset()
    task.maxSteps=maxSteps
    agent = LearningAgent(net)
    agent.learning = False
    agent.logging = False
    exper = EpisodicExperiment(task, agent)
    fitness = 0
    for _ in range(avgOver):
        rs = exper.doEpisodes(1)
        # add a slight bonus for more exploration, if rewards are identical
        fitness += len(set(game_env._allEvents)) * 1e-6
        # the true, discounted reward        
        fitness += sum([sum([v*discountFactor**step for step, v in enumerate(r)]) for r in rs])
    fitness /= avgOver
    if returnEvents:
        return fitness, game_env._allEvents
    else:
        return fitness
示例#3
0
from scipy import *
import sys, time

from pybrain.rl.learners.valuebased import ActionValueNetwork
from pybrain.rl.agents import LearningAgent
from pybrain.rl.learners import Q, SARSA, NFQ
from pybrain.rl.experiments.episodic import EpisodicExperiment
from pybrain.rl.environments import Task
from tasktest import TestTask
from envtest import TestEnv

env = TestEnv()
task = TestTask(env)

controller = ActionValueNetwork(200, 3)
learner = NFQ()
agent = LearningAgent(controller, learner)

experiment = EpisodicExperiment(task, agent)

i = 0
while True:
    experiment.doEpisodes(10)
    print "Learning"
    agent.learn()
    agent.reset()
    i += 1
    print "Cycle: %d" % i
    if i > 60:
        agent.learning = False
示例#4
0
import sys, time

from pybrain.rl.learners.valuebased import ActionValueNetwork
from pybrain.rl.agents import LearningAgent
from pybrain.rl.learners import Q, SARSA, NFQ
from pybrain.rl.experiments.episodic import EpisodicExperiment
from pybrain.rl.environments import Task
from tasktest import TestTask
from envtest import TestEnv

env = TestEnv()
task = TestTask(env)

controller = ActionValueNetwork(200, 3)
learner = NFQ()
agent = LearningAgent(controller, learner)

experiment = EpisodicExperiment(task, agent)

i = 0
while True:
    experiment.doEpisodes(10)
    print "Learning"
    agent.learn()
    agent.reset()
    i += 1
    print "Cycle: %d" %i
    if i > 60:
        agent.learning = False

示例#5
0
文件: main.py 项目: RolandSaur/msc
experiment = Experiment(task, agent)

k = 0
while k < number_of_runs:
    experiment.doInteractions(96)
    agent.learn()
    agent.reset()
    #log some data of the first and last run.
    if k == 0:  # if it is the first run
        first_run_time2 = environment.log_time
        first_run_soc2 = environment.log_soc
        first_run_volt2 = environment.log_volt
    #if k == number_of_runs - 1: # if it it the last run
    #    last_run_time2 = environment.log_time
    #    last_run_soc2 = environment.log_soc
    #    last_run_volt2 = environment.log_volt
    environment.reset()
    k += 1

agent.learning = False  #to keep it from exploring
experiment = Experiment(task, agent)
experiment.doInteractions(96)
last_run_time2 = environment.log_time
last_run_soc2 = environment.log_soc
last_run_volt2 = environment.log_volt

plt.plot(first_run_time2, first_run_soc2, 'r--', last_run_time2, last_run_soc2,
         'g--')

plt.show()