Python MDPconverter примеры использования

Язык программирования: Python

Пространство имен/Пакет: vgdl.mdpmap

Класс/Тип: MDPconverter

Примеров на hotexamples.com: 10

Python MDPconverter - 10 примеров найдено. Это лучшие примеры Python кода для vgdl.mdpmap.MDPconverter, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

MDPconverter(5)

convert(5)

initIndex(2)

Пример #1

Показать файл

Файл: model_pomdp.py Проект: chongdashu/py-vgdl

def plotLSPIValues(gametype, layout, discountFactor=0.9, useTD=False, showValue=False):
    # build the game
    g = VGDLParser().parseGame(gametype)
    g.buildLevel(layout)
    
    # transform into an MDP and the mapping to observations
    C = MDPconverter(g)
    Ts, R, fMap = C.convert()    
    
    # find the the best least-squares approximation to the policy,
    # given only observations, not the state information
    if useTD:
        # state-based
        _, Tlspi = LSTD_PI_policy(fMap, Ts, R, discountFactor=discountFactor)
    else:
        # state-action-based
        _, Tlspi = LSPI_policy(fMap, Ts, R, discountFactor=discountFactor)
    
    # evaluate the policy
    Vlspi = trueValues(Tlspi, R, discountFactor=discountFactor)
        
    # plot those values    
    featurePlot((g.width, g.height), C.states, Vlspi)
    
    if showValue:
        # expected discounted reward at initial state
        Vinit = Vlspi[C.initIndex()]
        pylab.xlabel("V0=%.4f"%Vinit)

Пример #2

Показать файл

Файл: interfaces.py Проект: sarobe/VGDLEntityCreator

def testAugmented():
    from vgdl.core import VGDLParser
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from vgdl.mdpmap import MDPconverter
    from vgdl.agents import PolicyDrivenAgent
    
    
    zelda_level2 = """
wwwwwwwwwwwww
wA wwk1ww   w
ww  ww    1 w
ww     wwww+w
wwwww1ww  www
wwwww  0  Gww
wwwwwwwwwwwww
"""

    
    from examples.gridphysics.mazes.rigidzelda import rigidzelda_game
    g = VGDLParser().parseGame(rigidzelda_game)
    g.buildLevel(zelda_level2)
    env = GameEnvironment(g, visualize=False,
                          recordingEnabled=True, actionDelay=150)
    C = MDPconverter(g, env=env, verbose=True)
    Ts, R, _ = C.convert()
    print C.states
    print Ts[0]
    print R
    env.reset()
    agent = PolicyDrivenAgent.buildOptimal(env)
    env.visualize = True
    env.reset()
    task = GameTask(env)    
    exper = EpisodicExperiment(task, agent)
    exper.doEpisodes(1)

Пример #3

Показать файл

def plotLSPIValues(gametype, layout, discountFactor=0.9, useTD=False, showValue=False):
    # build the game
    g = VGDLParser().parseGame(gametype)
    g.buildLevel(layout)
    
    # transform into an MDP and the mapping to observations
    C = MDPconverter(g)
    Ts, R, fMap = C.convert()    
    
    # find the the best least-squares approximation to the policy,
    # given only observations, not the state information
    if useTD:
        # state-based
        _, Tlspi = LSTD_PI_policy(fMap, Ts, R, discountFactor=discountFactor)
    else:
        # state-action-based
        _, Tlspi = LSPI_policy(fMap, Ts, R, discountFactor=discountFactor)
    
    # evaluate the policy
    Vlspi = trueValues(Tlspi, R, discountFactor=discountFactor)
        
    # plot those values    
    featurePlot((g.width, g.height), C.states, Vlspi)
    
    if showValue:
        # expected discounted reward at initial state
        Vinit = Vlspi[C.initIndex()]
        pylab.xlabel("V0=%.4f"%Vinit)

Пример #4

Показать файл

def testAugmented():
    from vgdl.core import VGDLParser
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from vgdl.mdpmap import MDPconverter
    from vgdl.agents import PolicyDrivenAgent

    zelda_level2 = """
wwwwwwwwwwwww
wA wwk1ww   w
ww  ww    1 w
ww     wwww+w
wwwww1ww  www
wwwww  0  Gww
wwwwwwwwwwwww
"""

    from examples.gridphysics.mazes.rigidzelda import rigidzelda_game
    g = VGDLParser().parseGame(rigidzelda_game)
    g.buildLevel(zelda_level2)
    env = GameEnvironment(g,
                          visualize=False,
                          recordingEnabled=True,
                          actionDelay=150)
    C = MDPconverter(g, env=env, verbose=True)
    Ts, R, _ = C.convert()
    print C.states
    print Ts[0]
    print R
    env.reset()
    agent = PolicyDrivenAgent.buildOptimal(env)
    env.visualize = True
    env.reset()
    task = GameTask(env)
    exper = EpisodicExperiment(task, agent)
    exper.doEpisodes(1)

Пример #5

Показать файл

Файл: nomodel_pomdp.py Проект: chongdashu/py-vgdl

def plotBackground(env, known=[[]]):
    if len(known[0]) == 0:
        from vgdl.mdpmap import MDPconverter
        g = env._game
        C = MDPconverter(g, env=env, verbose=False)
        _, R, _ = C.convert()
        size = (g.width, g.height)
        known[0].append((size, C.states, R))
    featurePlot(*known[0][0])

Пример #6

Показать файл

def plotBackground(env, known=[[]]):
    if len(known[0]) == 0:
        from vgdl.mdpmap import MDPconverter
        g = env._game
        C = MDPconverter(g, env=env, verbose=False)
        _, R, _ = C.convert()
        size = (g.width, g.height)
        known[0].append((size, C.states, R))
    featurePlot(*known[0][0])

Пример #7

Показать файл

Файл: agents.py Проект: sarobe/VGDLEntityCreator

    def buildOptimal(game_env, discountFactor=0.99):
        """ Given a game, find the optimal (state-based) policy and 
        return an agent that is playing accordingly. """
        from vgdl.mdpmap import MDPconverter
        C = MDPconverter(env=game_env)
        Ts, R, _ = C.convert()
        policy, _ = policyIteration(Ts, R, discountFactor=discountFactor)
        game_env.reset()

        def x(*_):
            s = game_env.getState()
            #print s
            i = C.states.index(s)
            return i
        #return PolicyDrivenAgent(policy, lambda *_: C.states.index(game_env.getState()))
        return PolicyDrivenAgent(policy, x)

Пример #8

Показать файл

    def buildOptimal(game_env, discountFactor=0.99):
        """ Given a game, find the optimal (state-based) policy and 
        return an agent that is playing accordingly. """
        from vgdl.mdpmap import MDPconverter
        C = MDPconverter(env=game_env)
        Ts, R, _ = C.convert()
        policy, _ = policyIteration(Ts, R, discountFactor=discountFactor)
        game_env.reset()

        def x(*_):
            s = game_env.getState()
            #print s
            i = C.states.index(s)
            return i

        #return PolicyDrivenAgent(policy, lambda *_: C.states.index(game_env.getState()))
        return PolicyDrivenAgent(policy, x)

Пример #9

Показать файл

Файл: model_mdp.py Проект: lrhammond/sdsrl

def plotOptimalValues(gametype, layout, discountFactor=0.9, showValue=False):
    # build the game
    g = VGDLParser().parseGame(gametype)
    g.buildLevel(layout)

    # transform into an MDP
    C = MDPconverter(g)
    Ts, R, _ = C.convert()

    # find the optimal policy
    _, Topt = policyIteration(Ts, R, discountFactor=discountFactor)

    # evaluate the policy
    Vopt = trueValues(Topt, R, discountFactor=discountFactor)

    # plot those values
    featurePlot((g.width, g.height), C.states, Vopt, plotdirections=True)

    if showValue:
        # expected discounted reward at initial state
        Vinit = Vopt[C.initIndex()]
        pylab.xlabel("V0=%.4f" % Vinit)

Пример #10

Показать файл

Файл: model_mdp.py Проект: chongdashu/py-vgdl

def plotOptimalValues(gametype, layout, discountFactor=0.9, showValue=False):
    # build the game
    g = VGDLParser().parseGame(gametype)
    g.buildLevel(layout)
    
    # transform into an MDP
    C = MDPconverter(g)
    Ts, R, _ = C.convert()
    
    # find the optimal policy
    _, Topt = policyIteration(Ts, R, discountFactor=discountFactor)
    
    # evaluate the policy
    Vopt = trueValues(Topt, R, discountFactor=discountFactor)
        
    # plot those values    
    featurePlot((g.width, g.height), C.states, Vopt, plotdirections=True)
    
    if showValue:
        # expected discounted reward at initial state
        Vinit = Vopt[C.initIndex()]
        pylab.xlabel("V0=%.4f"%Vinit)