示例#1
0
def test6():
    """ Now with memory!"""
    from numpy import ndarray
    from examples.gridphysics.mazes import polarmaze_game
    from pybrain.optimization import SNES
    g = VGDLParser().parseGame(polarmaze_game)
    g.buildLevel(cheese_maze)
    game_env = GameEnvironment(g)
    net = buildNet(game_env.outdim, 10, 4, temperature=0.1, recurrent=True)
    
    algo = SNES(lambda x: someEpisodes(game_env, x, avgOver=6, maxSteps=30, exploretoo=False), net, verbose=True, desiredEvaluation=0.85)
    print algo.batchSize
    rows, cols = 2,3
    episodesPerStep = 5
    for i in range(rows*cols):
        pylab.subplot(rows, cols, i+1)
        algo.learn(episodesPerStep)
        if isinstance(algo.bestEvaluable, ndarray):
            net._setParameters(algo.bestEvaluable)
        else:
            net = algo.bestEvaluable
        plotBackground(game_env)    
        plotTrajectories(game_env, net)
        pylab.title(str((i+1)*episodesPerStep))
        if algo.desiredEvaluation <= algo.bestEvaluation:
            break
        print
    pylab.show()
示例#2
0
def plotLSPIValues(gametype, layout, discountFactor=0.9, useTD=False, showValue=False):
    # build the game
    g = VGDLParser().parseGame(gametype)
    g.buildLevel(layout)
    
    # transform into an MDP and the mapping to observations
    C = MDPconverter(g)
    Ts, R, fMap = C.convert()    
    
    # find the the best least-squares approximation to the policy,
    # given only observations, not the state information
    if useTD:
        # state-based
        _, Tlspi = LSTD_PI_policy(fMap, Ts, R, discountFactor=discountFactor)
    else:
        # state-action-based
        _, Tlspi = LSPI_policy(fMap, Ts, R, discountFactor=discountFactor)
    
    # evaluate the policy
    Vlspi = trueValues(Tlspi, R, discountFactor=discountFactor)
        
    # plot those values    
    featurePlot((g.width, g.height), C.states, Vlspi)
    
    if showValue:
        # expected discounted reward at initial state
        Vinit = Vlspi[C.initIndex()]
        pylab.xlabel("V0=%.4f"%Vinit)
示例#3
0
def test2():
    from examples.gridphysics.mazes import polarmaze_game, maze_level_1
    from pybrain.optimization import SNES

    game_str, map_str = polarmaze_game, maze_level_1
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)
    game_env = GameEnvironment(g, actionDelay=100, recordingEnabled=True)
    net = buildNet(game_env.outdim, 6, 2)

    algo = SNES(lambda x: someEpisodes(game_env, x),
                net,
                verbose=True,
                desiredEvaluation=0.43)
    rows, cols = 3, 3
    episodesPerStep = 2
    for i in range(rows * cols):
        pylab.subplot(rows, cols, i + 1)
        algo.learn(episodesPerStep)
        net._setParameters(algo.bestEvaluable)
        plotBackground(game_env)
        plotTrajectories(game_env, net)
        pylab.title(str((i + 1) * episodesPerStep))
        if algo.desiredEvaluation <= algo.bestEvaluation:
            break
        print
    pylab.show()
示例#4
0
def test3():
    from examples.gridphysics.mazes.simple import consistent_corridor
    from examples.gridphysics.mazes import polarmaze_game
    from pybrain.optimization import SNES
    g = VGDLParser().parseGame(polarmaze_game)
    g.buildLevel(consistent_corridor)
    game_env = GameEnvironment(g)
    net = buildNet(game_env.outdim, 4, 4, temperature=0.05, recurrent=False)

    algo = SNES(lambda x: someEpisodes(game_env, x),
                net,
                verbose=True,
                desiredEvaluation=0.78)
    rows, cols = 2, 2
    episodesPerStep = 3
    for i in range(rows * cols):
        pylab.subplot(rows, cols, i + 1)
        algo.learn(episodesPerStep)
        net._setParameters(algo.bestEvaluable)
        plotBackground(game_env)
        plotTrajectories(game_env, net)
        pylab.title(str((i + 1) * episodesPerStep))
        if algo.desiredEvaluation <= algo.bestEvaluation:
            break
        print
    pylab.show()
示例#5
0
def testAugmented():
    from vgdl.core import VGDLParser
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from vgdl.mdpmap import MDPconverter
    from vgdl.agents import PolicyDrivenAgent
    
    
    zelda_level2 = """
wwwwwwwwwwwww
wA wwk1ww   w
ww  ww    1 w
ww     wwww+w
wwwww1ww  www
wwwww  0  Gww
wwwwwwwwwwwww
"""

    
    from examples.gridphysics.mazes.rigidzelda import rigidzelda_game
    g = VGDLParser().parseGame(rigidzelda_game)
    g.buildLevel(zelda_level2)
    env = GameEnvironment(g, visualize=False,
                          recordingEnabled=True, actionDelay=150)
    C = MDPconverter(g, env=env, verbose=True)
    Ts, R, _ = C.convert()
    print C.states
    print Ts[0]
    print R
    env.reset()
    agent = PolicyDrivenAgent.buildOptimal(env)
    env.visualize = True
    env.reset()
    task = GameTask(env)    
    exper = EpisodicExperiment(task, agent)
    exper.doEpisodes(1)
示例#6
0
def plotLSPIValues(gametype, layout, discountFactor=0.9, useTD=False, showValue=False):
    # build the game
    g = VGDLParser().parseGame(gametype)
    g.buildLevel(layout)
    
    # transform into an MDP and the mapping to observations
    C = MDPconverter(g)
    Ts, R, fMap = C.convert()    
    
    # find the the best least-squares approximation to the policy,
    # given only observations, not the state information
    if useTD:
        # state-based
        _, Tlspi = LSTD_PI_policy(fMap, Ts, R, discountFactor=discountFactor)
    else:
        # state-action-based
        _, Tlspi = LSPI_policy(fMap, Ts, R, discountFactor=discountFactor)
    
    # evaluate the policy
    Vlspi = trueValues(Tlspi, R, discountFactor=discountFactor)
        
    # plot those values    
    featurePlot((g.width, g.height), C.states, Vlspi)
    
    if showValue:
        # expected discounted reward at initial state
        Vinit = Vlspi[C.initIndex()]
        pylab.xlabel("V0=%.4f"%Vinit)
示例#7
0
def testAugmented():
    from vgdl.core import VGDLParser
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from vgdl.mdpmap import MDPconverter
    from vgdl.agents import PolicyDrivenAgent

    zelda_level2 = """
wwwwwwwwwwwww
wA wwk1ww   w
ww  ww    1 w
ww     wwww+w
wwwww1ww  www
wwwww  0  Gww
wwwwwwwwwwwww
"""

    from examples.gridphysics.mazes.rigidzelda import rigidzelda_game
    g = VGDLParser().parseGame(rigidzelda_game)
    g.buildLevel(zelda_level2)
    env = GameEnvironment(g,
                          visualize=False,
                          recordingEnabled=True,
                          actionDelay=150)
    C = MDPconverter(g, env=env, verbose=True)
    Ts, R, _ = C.convert()
    print C.states
    print Ts[0]
    print R
    env.reset()
    agent = PolicyDrivenAgent.buildOptimal(env)
    env.visualize = True
    env.reset()
    task = GameTask(env)
    exper = EpisodicExperiment(task, agent)
    exper.doEpisodes(1)
示例#8
0
def testRecordingToGif(human=False):
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from vgdl.core import VGDLParser
    from examples.gridphysics.mazes import polarmaze_game, maze_level_2
    from vgdl.agents import PolicyDrivenAgent, InteractiveAgent
    from vgdl.tools import makeGifVideo

    game_str, map_str = polarmaze_game, maze_level_2
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)
    env = GameEnvironment(g,
                          visualize=human,
                          recordingEnabled=True,
                          actionDelay=200)
    task = GameTask(env)
    if human:
        agent = InteractiveAgent()
    else:
        agent = PolicyDrivenAgent.buildOptimal(env)
    exper = EpisodicExperiment(task, agent)
    res = exper.doEpisodes(1)
    print res

    actions = [a for _, a, _ in env._allEvents]
    print actions
    makeGifVideo(env, actions, initstate=env._initstate)
示例#9
0
def test6():
    """ Now with memory!"""
    from numpy import ndarray
    from examples.gridphysics.mazes import polarmaze_game
    from pybrain.optimization import SNES
    g = VGDLParser().parseGame(polarmaze_game)
    g.buildLevel(cheese_maze)
    game_env = GameEnvironment(g)
    net = buildNet(game_env.outdim, 10, 4, temperature=0.1, recurrent=True)
    
    algo = SNES(lambda x: someEpisodes(game_env, x, avgOver=6, maxSteps=30, exploretoo=False), net, verbose=True, desiredEvaluation=0.85)
    print algo.batchSize
    rows, cols = 2,3
    episodesPerStep = 5
    for i in range(rows*cols):
        pylab.subplot(rows, cols, i+1)
        algo.learn(episodesPerStep)
        if isinstance(algo.bestEvaluable, ndarray):
            net._setParameters(algo.bestEvaluable)
        else:
            net = algo.bestEvaluable
        plotBackground(game_env)    
        plotTrajectories(game_env, net)
        pylab.title(str((i+1)*episodesPerStep))
        if algo.desiredEvaluation <= algo.bestEvaluation:
            break
        print
    pylab.show()
示例#10
0
def test4():
    from numpy import ndarray
    from examples.gridphysics.mazes import polarmaze_game
    from pybrain.optimization import SNES, WeightGuessing
    g = VGDLParser().parseGame(polarmaze_game)
    g.buildLevel(labyrinth2)
    game_env = GameEnvironment(g)
    net = buildNet(game_env.outdim, 5, 4, temperature=0.1, recurrent=False)
    
    algo = SNES(lambda x: someEpisodes(game_env, x, avgOver=3), net, verbose=True, desiredEvaluation=0.75)
    #algo = WeightGuessing(lambda x: someEpisodes(game_env, x), net, verbose=True, desiredEvaluation=0.78)
    rows, cols = 2,2
    episodesPerStep = 4
    for i in range(rows*cols):
        pylab.subplot(rows, cols, i+1)
        algo.learn(episodesPerStep)
        if isinstance(algo.bestEvaluable, ndarray):
            net._setParameters(algo.bestEvaluable)
        else:
            net = algo.bestEvaluable
        plotBackground(game_env)    
        plotTrajectories(game_env, net)
        pylab.title(str((i+1)*episodesPerStep))
        if algo.desiredEvaluation <= algo.bestEvaluation:
            break
        print
    pylab.show()
示例#11
0
def test4():
    from numpy import ndarray
    from examples.gridphysics.mazes import polarmaze_game
    from pybrain.optimization import SNES, WeightGuessing
    g = VGDLParser().parseGame(polarmaze_game)
    g.buildLevel(labyrinth2)
    game_env = GameEnvironment(g)
    net = buildNet(game_env.outdim, 5, 4, temperature=0.1, recurrent=False)
    
    algo = SNES(lambda x: someEpisodes(game_env, x, avgOver=3), net, verbose=True, desiredEvaluation=0.75)
    #algo = WeightGuessing(lambda x: someEpisodes(game_env, x), net, verbose=True, desiredEvaluation=0.78)
    rows, cols = 2,2
    episodesPerStep = 4
    for i in range(rows*cols):
        pylab.subplot(rows, cols, i+1)
        algo.learn(episodesPerStep)
        if isinstance(algo.bestEvaluable, ndarray):
            net._setParameters(algo.bestEvaluable)
        else:
            net = algo.bestEvaluable
        plotBackground(game_env)    
        plotTrajectories(game_env, net)
        pylab.title(str((i+1)*episodesPerStep))
        if algo.desiredEvaluation <= algo.bestEvaluation:
            break
        print
    pylab.show()
示例#12
0
def testRolloutVideo(actions=[0, 0, 2, 2, 0, 3] * 2):
    from examples.gridphysics.mazes import polarmaze_game, maze_level_1
    from vgdl.core import VGDLParser
    from vgdl.tools import makeGifVideo
    game_str, map_str = polarmaze_game, maze_level_1
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)
    makeGifVideo(GameEnvironment(g, visualize=True), actions)
示例#13
0
def testRollout(actions=[0, 0, 2, 2, 0, 3] * 20):        
    from examples.gridphysics.mazes import polarmaze_game, maze_level_1
    from vgdl.core import VGDLParser
    game_str, map_str = polarmaze_game, maze_level_1
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)    
    env = GameEnvironment(g, visualize=True, actionDelay=100)
    env.rollOut(actions)
示例#14
0
def testRolloutVideo(actions=[0, 0, 2, 2, 0, 3] * 2):        
    from examples.gridphysics.mazes import polarmaze_game, maze_level_1
    from vgdl.core import VGDLParser
    from vgdl.tools import makeGifVideo
    game_str, map_str = polarmaze_game, maze_level_1
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)
    makeGifVideo(GameEnvironment(g, visualize=True), actions)
示例#15
0
def _createVGDLGame( gameSpec, levelSpec ):
    import uuid
    from vgdl.core import VGDLParser
    # parse, run and play.
    game = VGDLParser().parseGame(gameSpec)
    game.buildLevel(levelSpec)
    game.uiud = uuid.uuid4()
    return game
示例#16
0
def testRollout(actions=[0, 0, 2, 2, 0, 3] * 20):
    from examples.gridphysics.mazes import polarmaze_game, maze_level_1
    from vgdl.core import VGDLParser
    game_str, map_str = polarmaze_game, maze_level_1
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)
    env = GameEnvironment(g, visualize=True, actionDelay=100)
    env.rollOut(actions)
示例#17
0
def testLoadSave():
    from vgdl.core import VGDLParser
    from examples.gridphysics.aliens import aliens_level, aliens_game
        
    map_str, game_str = aliens_level, aliens_game
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)
    
    for _ in range(1000):
        s = g.getFullState()
        g.setFullState(s)
示例#18
0
def test2():
    from examples.gridphysics.mazes import polarmaze_game, maze_level_1
    from vgdl.core import VGDLParser
    game_str, map_str = polarmaze_game, maze_level_1
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)
    actions = [1, 0, 0, 3, 0, 2, 0, 2, 0, 0, 0]
    env = GameEnvironment(g, visualize=True, actionDelay=100)
    env.rollOut(actions)
    env.reset()
    senv = SubjectiveGame(g, actionDelay=1500)
    senv.rollOut(actions)
示例#19
0
def test2():
    from examples.gridphysics.mazes import polarmaze_game, maze_level_1
    from vgdl.core import VGDLParser
    game_str, map_str = polarmaze_game, maze_level_1
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)    
    actions = [1, 0, 0, 3, 0, 2, 0, 2, 0, 0, 0]
    env = GameEnvironment(g, visualize=True, actionDelay=100)
    env.rollOut(actions)
    env.reset()
    senv = SubjectiveGame(g, actionDelay=1500)
    senv.rollOut(actions)
示例#20
0
def testStochMaze():
    from vgdl.core import VGDLParser
    from examples.gridphysics.mazes.stochastic import stoch_game, stoch_level
    g = VGDLParser().parseGame(stoch_game)
    g.buildLevel(stoch_level)
    C = MDPconverter(g, verbose=True)
    Ts, R, fMap = C.convert()
    print C.states
    print R
    for T in Ts:
        print T
    print fMap
示例#21
0
def testStochMaze():
    from vgdl.core import VGDLParser
    from examples.gridphysics.mazes.stochastic import stoch_game, stoch_level
    g = VGDLParser().parseGame(stoch_game)
    g.buildLevel(stoch_level)
    C = MDPconverter(g, verbose=True)
    Ts, R, fMap = C.convert()
    print C.states
    print R
    for T in Ts:
        print T
    print fMap
示例#22
0
def testMaze():
    from vgdl.core import VGDLParser
    from examples.gridphysics.mazes import polarmaze_game, maze_level_1
    game_str, map_str = polarmaze_game, maze_level_1
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)
    C = MDPconverter(g, verbose=True)
    Ts, R, fMap = C.convert()
    print C.states
    print R
    for T in Ts:
        print T
    print fMap
示例#23
0
def testMaze():
    from vgdl.core import VGDLParser
    from examples.gridphysics.mazes import polarmaze_game, maze_level_1
    game_str, map_str = polarmaze_game, maze_level_1
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)
    C = MDPconverter(g, verbose=True)
    Ts, R, fMap = C.convert()
    print C.states
    print R
    for T in Ts:
        print T
    print fMap
示例#24
0
def test4():
    """ Same thing, but animated. """
    from examples.gridphysics.mazes.windy import windy_stoch_game, windy_level
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from vgdl.interfaces import GameEnvironment, GameTask
    from vgdl.agents import PolicyDrivenAgent 
    g = VGDLParser().parseGame(windy_stoch_game)
    g.buildLevel(windy_level)
    env = GameEnvironment(g, visualize=True, actionDelay=100)
    task = GameTask(env)
    agent = PolicyDrivenAgent.buildOptimal(env)
    exper = EpisodicExperiment(task, agent)
    res = exper.doEpisodes(5)
    print res
示例#25
0
def test4():
    """ Same thing, but animated. """
    from examples.gridphysics.mazes.windy import windy_stoch_game, windy_level
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from vgdl.interfaces import GameEnvironment, GameTask
    from vgdl.agents import PolicyDrivenAgent
    g = VGDLParser().parseGame(windy_stoch_game)
    g.buildLevel(windy_level)
    env = GameEnvironment(g, visualize=True, actionDelay=100)
    task = GameTask(env)
    agent = PolicyDrivenAgent.buildOptimal(env)
    exper = EpisodicExperiment(task, agent)
    res = exper.doEpisodes(5)
    print res
示例#26
0
def testPolicyAgent():
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from vgdl.core import VGDLParser
    from examples.gridphysics.mazes import polarmaze_game, maze_level_2
    from vgdl.agents import PolicyDrivenAgent
    game_str, map_str = polarmaze_game, maze_level_2
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)

    env = GameEnvironment(g, visualize=False, actionDelay=100)
    task = GameTask(env)
    agent = PolicyDrivenAgent.buildOptimal(env)
    env.visualize = True
    env.reset()
    exper = EpisodicExperiment(task, agent)
    res = exper.doEpisodes(2)
    print res
示例#27
0
def test1():
    from examples.gridphysics.mazes import polarmaze_game, maze_level_1    
    
    game_str, map_str = polarmaze_game, maze_level_1
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)
    
    game_env = GameEnvironment(g)
    print 'number of observations:', game_env.outdim
    
    net = buildNet(game_env.outdim, 2, 2)
    for i in range(200):
        net.randomize()
        net.reset()
        print someEpisodes(game_env, net),
        if i% 20 == 19:
            print
示例#28
0
def test1():
    from examples.gridphysics.mazes import polarmaze_game, maze_level_1    
    
    game_str, map_str = polarmaze_game, maze_level_1
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)
    
    game_env = GameEnvironment(g)
    print 'number of observations:', game_env.outdim
    
    net = buildNet(game_env.outdim, 2, 2)
    for i in range(200):
        net.randomize()
        net.reset()
        print someEpisodes(game_env, net),
        if i% 20 == 19:
            print
示例#29
0
def testPolicyAgent():
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from vgdl.core import VGDLParser
    from examples.gridphysics.mazes import polarmaze_game, maze_level_2
    from vgdl.agents import PolicyDrivenAgent
    game_str, map_str = polarmaze_game, maze_level_2
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)
    
    env = GameEnvironment(g, visualize=False, actionDelay=100)
    task = GameTask(env)
    agent = PolicyDrivenAgent.buildOptimal(env)
    env.visualize = True
    env.reset()
    exper = EpisodicExperiment(task, agent)
    res = exper.doEpisodes(2)
    print res
示例#30
0
def test3():
    from examples.gridphysics.mazes import polarmaze_game
    from examples.gridphysics.mazes.simple import maze_level_1b
    from vgdl.core import VGDLParser
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from vgdl.interfaces import GameTask
    from vgdl.agents import InteractiveAgent, UserTiredException
    game_str, map_str = polarmaze_game, maze_level_1b
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)    
    senv = SubjectiveGame(g, actionDelay=100, recordingEnabled=True)
    #senv = GameEnvironment(g, actionDelay=100, recordingEnabled=True, visualize=True)
    task = GameTask(senv)    
    iagent = InteractiveAgent()
    exper = EpisodicExperiment(task, iagent)
    try:
        exper.doEpisodes(1)
    except UserTiredException:
        pass
    print senv._allEvents
示例#31
0
def test3():
    from examples.gridphysics.mazes import polarmaze_game
    from examples.gridphysics.mazes.simple import maze_level_1b
    from vgdl.core import VGDLParser
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from vgdl.interfaces import GameTask
    from vgdl.agents import InteractiveAgent, UserTiredException
    game_str, map_str = polarmaze_game, maze_level_1b
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)
    senv = SubjectiveGame(g, actionDelay=100, recordingEnabled=True)
    #senv = GameEnvironment(g, actionDelay=100, recordingEnabled=True, visualize=True)
    task = GameTask(senv)
    iagent = InteractiveAgent()
    exper = EpisodicExperiment(task, iagent)
    try:
        exper.doEpisodes(1)
    except UserTiredException:
        pass
    print senv._allEvents
示例#32
0
def plotOptimalValues(gametype, layout, discountFactor=0.9, showValue=False):
    # build the game
    g = VGDLParser().parseGame(gametype)
    g.buildLevel(layout)
    
    # transform into an MDP
    C = MDPconverter(g)
    Ts, R, _ = C.convert()
    
    # find the optimal policy
    _, Topt = policyIteration(Ts, R, discountFactor=discountFactor)
    
    # evaluate the policy
    Vopt = trueValues(Topt, R, discountFactor=discountFactor)
        
    # plot those values    
    featurePlot((g.width, g.height), C.states, Vopt, plotdirections=True)
    
    if showValue:
        # expected discounted reward at initial state
        Vinit = Vopt[C.initIndex()]
        pylab.xlabel("V0=%.4f"%Vinit)
示例#33
0
def testInteractions():
    from vgdl.core import VGDLParser
    from examples.gridphysics.aliens import aliens_level, aliens_game
    from pygame.locals import K_SPACE
    # from examples.gridphysics.sokoban import so
    from pybrain.rl.agents.agent import Agent
    
    class DummyAgent(Agent):
        total = 4
        def getAction(self):
            # res = randint(0, self.total - 1)
            return 1
        
    map_str, game_str = aliens_level, aliens_game
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)
    g._initScreen(g.screensize,headless=True)
        
    for _ in range(300):
        win, _ = g.tick(K_SPACE)
        if win is not None:
            break
示例#34
0
def plotOptimalValues(gametype, layout, discountFactor=0.9, showValue=False):
    # build the game
    g = VGDLParser().parseGame(gametype)
    g.buildLevel(layout)

    # transform into an MDP
    C = MDPconverter(g)
    Ts, R, _ = C.convert()

    # find the optimal policy
    _, Topt = policyIteration(Ts, R, discountFactor=discountFactor)

    # evaluate the policy
    Vopt = trueValues(Topt, R, discountFactor=discountFactor)

    # plot those values
    featurePlot((g.width, g.height), C.states, Vopt, plotdirections=True)

    if showValue:
        # expected discounted reward at initial state
        Vinit = Vopt[C.initIndex()]
        pylab.xlabel("V0=%.4f" % Vinit)
示例#35
0
def testInteractions():
    from random import randint
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from vgdl.core import VGDLParser
    from examples.gridphysics.mazes import polarmaze_game, maze_level_1    
    from pybrain.rl.agents.agent import Agent
    
    class DummyAgent(Agent):
        total = 4
        def getAction(self):
            res = randint(0, self.total - 1)
            return res    
        
    game_str, map_str = polarmaze_game, maze_level_1
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)
    
    env = GameEnvironment(g, visualize=True, actionDelay=100)
    task = GameTask(env)
    agent = DummyAgent()
    exper = EpisodicExperiment(task, agent)
    res = exper.doEpisodes(2)
    print res
示例#36
0
def testRecordingToGif(human=False):
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from vgdl.core import VGDLParser
    from examples.gridphysics.mazes import polarmaze_game, maze_level_2
    from vgdl.agents import PolicyDrivenAgent, InteractiveAgent
    from vgdl.tools import makeGifVideo
    
    game_str, map_str = polarmaze_game, maze_level_2
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)
    env = GameEnvironment(g, visualize=human, recordingEnabled=True, actionDelay=200)
    task = GameTask(env)
    if human:
        agent = InteractiveAgent()
    else:
        agent = PolicyDrivenAgent.buildOptimal(env)
    exper = EpisodicExperiment(task, agent)
    res = exper.doEpisodes(1)
    print res
    
    actions = [a for _, a, _ in env._allEvents]
    print actions
    makeGifVideo(env, actions, initstate=env._initstate)
示例#37
0
def test3():
    from examples.gridphysics.mazes.simple import office_layout_2, consistent_corridor
    from examples.gridphysics.mazes import polarmaze_game
    from pybrain.optimization import SNES
    g = VGDLParser().parseGame(polarmaze_game)
    g.buildLevel(consistent_corridor)
    game_env = GameEnvironment(g)
    net = buildNet(game_env.outdim, 4, 4, temperature=0.05, recurrent=False)
    
    algo = SNES(lambda x: someEpisodes(game_env, x), net, verbose=True, desiredEvaluation=0.78)
    rows, cols = 2,2
    episodesPerStep = 3
    for i in range(rows*cols):
        pylab.subplot(rows, cols, i+1)
        algo.learn(episodesPerStep)
        net._setParameters(algo.bestEvaluable)
        plotBackground(game_env)    
        plotTrajectories(game_env, net)
        pylab.title(str((i+1)*episodesPerStep))
        if algo.desiredEvaluation <= algo.bestEvaluation:
            break
        print
    pylab.show()
示例#38
0
def testInteractions():
    from random import randint
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from vgdl.core import VGDLParser
    from examples.gridphysics.mazes import polarmaze_game, maze_level_1
    from pybrain.rl.agents.agent import Agent

    class DummyAgent(Agent):
        total = 4

        def getAction(self):
            res = randint(0, self.total - 1)
            return res

    game_str, map_str = polarmaze_game, maze_level_1
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)

    env = GameEnvironment(g, visualize=True, actionDelay=100)
    task = GameTask(env)
    agent = DummyAgent()
    exper = EpisodicExperiment(task, agent)
    res = exper.doEpisodes(2)
    print res
示例#39
0
def test2():
    from examples.gridphysics.mazes import polarmaze_game, maze_level_1    
    from pybrain.optimization import SNES
    
    game_str, map_str = polarmaze_game, maze_level_1
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)
    game_env = GameEnvironment(g, actionDelay=100, recordingEnabled=True)
    net = buildNet(game_env.outdim, 6, 2)
    
    algo = SNES(lambda x: someEpisodes(game_env, x), net, verbose=True, desiredEvaluation=0.43)
    rows, cols = 3,3
    episodesPerStep = 2
    for i in range(rows*cols):
        pylab.subplot(rows, cols, i+1)
        algo.learn(episodesPerStep)
        net._setParameters(algo.bestEvaluable)
        plotBackground(game_env)    
        plotTrajectories(game_env, net)
        pylab.title(str((i+1)*episodesPerStep))
        if algo.desiredEvaluation <= algo.bestEvaluation:
            break
        print
    pylab.show()
示例#40
0
        s += "w w\n"
    s += "wGw\n"
    for _ in range(length/2-2):
        s += "w w\n"
    s +=     "wAw\n"
    s +=     "w4w\n"
    s +=     "w1w\n"
    s +=     "www\n"
    return s


def ringworld(width):
    assert width > 1
    level = ["w"]*(width+2)+["\n"]
    level += ["w"]+[" "]*width+["w\n"]
    level += ["w"]*(width+2)+["\n"]
    level[int(width*1.5+3.5)] = 'G'    
    #level[-(width+5)] = 'A'    
    level_str = ''.join(level)
    return level_str
    

if __name__ == "__main__":
    print ringworld(9)    
    from vgdl.core import VGDLParser
    g = VGDLParser().parseGame(wrapmaze_game)
    g.buildLevel(ringworld(19))
    g.randomizeAvatar()
    g.startGame()
            
    VGDLParser.playGame(portalmaze_game, portalringworld(19))
示例#41
0
def runLunarLander():
    # import lunar lander
    from vgdl.examples.continuousphysics.lander import lander_game, lander_level

    # build the game
    g = VGDLParser().parseGame(lander_game)
    g.buildLevel(lander_level)

    # TODO: Determine how to not need to bring up the pygame display in order to run the game.
    g._initScreen([1, 1])

    ship = g.getAvatars()[0]

    # store initial ship state
    initState = [ship.rect.x, ship.rect.y, ship.speed, ship.orientation]

    print "starting position: " + str(ship)
    print "starting state: " + str(initState)
    # get random actions
    actions = generateInput(ACTIONS)

    states = [initState]
    # move ship based on random actions
    print actions
    for a in actions:
        for i in range(REPEATS):
            ship.action = a
            updateGame(g, a)
            if ended:
                print a, i
                break
        states.append(makeState(ship))

    endState = states[len(states)-1]

    # confirm final position
    print "first final position after actions: " + str(ship)
    print "final state: " + str(endState)

    # reroll ship back to initial state
    setState(ship, initState)

    # vary action sequence
    # first pick a point to vary
    random.seed(10466)
    varyIndex = random.randint(0, len(actions) - 1)

    # then change that action
    oldAction = actions[varyIndex]
    actions[varyIndex] = BASEDIRS[random.randint(0, len(BASEDIRS) - 1)]

    # print out the change and the full list of actions
    print "changed action " + str(varyIndex) + " to " + str(actions[varyIndex])
    print "new actions: " + str(actions)

    # predict through simple calculation how the final position should be
    predictState = predictOutcome(states, actions, oldAction, varyIndex)
    print "predicted state " + str(predictState)

    # find out where the actual final position is
    for a in actions:
        for i in range(REPEATS):
            updateGame(g, a)
            if ended:
                print a, i
                break

    endState = makeState(ship)
    print "actual ending position: " + str(ship)
    print "ending state: " + str(endState)

    # get error
    error = [endState[0] - predictState[0], endState[1] - predictState[1]]
    print "prediction error: " + str(error)