Python ChainMDP примеры использования

Язык программирования: Python

Пространство имен/Пакет: rlpy.Domains

Класс/Тип: ChainMDP

Примеров на hotexamples.com: 5

Python ChainMDP - 5 примеров найдено. Это лучшие примеры Python кода для rlpy.Domains.ChainMDP, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

ChainMDP(4)

s0(1)

state(1)

step(1)

Основные методы

ChainMDP (4)

s0 (1)

state (1)

step (1)

Пример #1

Показать файл

def test_transitions():
    """
    Ensure that actions result in expected state transition behavior.
    Note that if the agent attempts to leave the edge
    (select LEFT from s0 or RIGHT from s49) then the state should not change.
    NOTE: assume p_action_failure is only noise term.

    """
    # [[initialize domain]]
    chainSize = 5
    domain = ChainMDP(chainSize=chainSize)
    dummyS = domain.s0()
    domain.state = np.array([2])  # state s2
    left = 0
    right = 1

    # Check basic step
    r, ns, terminal, possibleA = domain.step(left)
    assert ns[0] == 1 and terminal == False
    assert np.all(possibleA == np.array([left,
                                         right]))  # all actions available
    assert r == domain.STEP_REWARD

    # Ensure all actions available, even on corner case, to meet domain specs
    r, ns, terminal, possibleA = domain.step(left)
    assert ns[0] == 0 and terminal == False
    assert np.all(possibleA == np.array([left,
                                         right]))  # all actions available
    assert r == domain.STEP_REWARD

    # Ensure state does not change or wrap around per domain spec
    r, ns, terminal, possibleA = domain.step(left)
    assert ns[0] == 0 and terminal == False
    assert np.all(possibleA == np.array([left,
                                         right]))  # all actions available
    assert r == domain.STEP_REWARD

    r, ns, terminal, possibleA = domain.step(right)
    assert ns[0] == 1 and terminal == False
    assert np.all(possibleA == np.array([left,
                                         right]))  # all actions available
    assert r == domain.STEP_REWARD

    r, ns, terminal, possibleA = domain.step(right)
    r, ns, terminal, possibleA = domain.step(right)
    r, ns, terminal, possibleA = domain.step(right)

    # Ensure goal state gives proper condition
    assert ns[0] == 4 and terminal == True
    assert np.all(possibleA == np.array([left,
                                         right]))  # all actions available
    assert r == domain.GOAL_REWARD

Пример #2

Показать файл

Файл: Experiment2.py Проект: xsongx/angrybirds

def make_experiment(exp_id=1, path="./Results/Tutorial/gridworld-sarsa0"):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path

    ## Domain:
    chain_size = 5
    domain = ChainMDP(chain_size)
    opt["domain"] = domain

    ## Representation
    # discretization only needed for continuous state spaces, discarded otherwise
    representation = Tabular(domain, discretization=20)

    ## Policy
    policy = eGreedy(representation, epsilon=0.2)

    ## Agent
    opt["agent"] = PosteriorSampling(representation=representation,
                                     policy=policy,
                                     discount_factor=domain.discount_factor,
                                     initial_learn_rate=0.1)
    opt["checks_per_policy"] = 100
    opt["max_steps"] = 4000
    opt["num_policy_checks"] = 10
    experiment = Experiment(**opt)
    return experiment

Пример #3

Показать файл

def _make_experiment(exp_id=1, path="./Results/Tmp/test_ChainMDP/"):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """

    ## Domain:
    chainSize = 5
    domain = ChainMDP(chainSize=chainSize)

    ## Representation
    # discretization only needed for continuous state spaces, discarded otherwise
    representation = Tabular(domain)

    ## Policy
    policy = eGreedy(representation, epsilon=0.2)

    ## Agent
    agent = SARSA(representation=representation,
                  policy=policy,
                  discount_factor=domain.discount_factor,
                  learn_rate=0.1)
    checks_per_policy = 3
    max_steps = 50
    num_policy_checks = 3
    experiment = Experiment(**locals())
    return experiment

Пример #4

Показать файл

Файл: test_ChainMDP.py Проект: smcgregor/rlpy

def test_transitions():
    """
    Ensure that actions result in expected state transition behavior. 
    Note that if the agent attempts to leave the edge 
    (select LEFT from s0 or RIGHT from s49) then the state should not change.
    NOTE: assume p_action_failure is only noise term.
    
    """
    # [[initialize domain]]
    chainSize = 5
    domain = ChainMDP(chainSize=chainSize)
    dummyS = domain.s0()
    domain.state = np.array([2]) # state s2
    left = 0
    right = 1
    
    # Check basic step
    r,ns,terminal,possibleA = domain.step(left)
    assert ns[0] == 1 and terminal == False
    assert np.all(possibleA == np.array([left, right])) # all actions available
    assert r == domain.STEP_REWARD
    
    # Ensure all actions available, even on corner case, to meet domain specs
    r,ns,terminal,possibleA = domain.step(left)
    assert ns[0] == 0 and terminal == False
    assert np.all(possibleA == np.array([left, right])) # all actions available
    assert r == domain.STEP_REWARD
    
    # Ensure state does not change or wrap around per domain spec
    r,ns,terminal,possibleA = domain.step(left)
    assert ns[0] == 0 and terminal == False
    assert np.all(possibleA == np.array([left, right])) # all actions available
    assert r == domain.STEP_REWARD
    
    r,ns,terminal,possibleA = domain.step(right)
    assert ns[0] == 1 and terminal == False
    assert np.all(possibleA == np.array([left, right])) # all actions available
    assert r == domain.STEP_REWARD
    
    r,ns,terminal,possibleA = domain.step(right)
    r,ns,terminal,possibleA = domain.step(right)
    r,ns,terminal,possibleA = domain.step(right)
    
    # Ensure goal state gives proper condition
    assert ns[0] == 4 and terminal == True
    assert np.all(possibleA == np.array([left, right])) # all actions available
    assert r == domain.GOAL_REWARD

Пример #5

Показать файл

Файл: ChainMDP.py Проект: incumbent/sdrl

    def makeComponents(self):
        chainSize = self.spChainSize.value()
        domain = ChainMDP(chainSize=chainSize)

        representation = RepresentationFactory.get(
            config=self.representationConfig,
            name=str(self.lstRepresentation.currentItem().text()),
            domain=domain)

        policy = PolicyFactory.get(config=self.policyConfig,
                                   name=str(
                                       self.lstPolicy.currentItem().text()),
                                   representation=representation)

        agent = AgentFactory.get(config=self.agentConfig,
                                 name=str(self.lstAgent.currentItem().text()),
                                 representation=representation,
                                 policy=policy)

        return domain, agent