Exemplo n.º 1
0
def test_fdcheck_dlogpi():
    """Finite differences check for the dlogpi of the gibbs policy"""
    logger = Logger()
    domain = GridWorld()
    representation = Tabular(logger=logger, domain=domain, discretization=20)
    policy = GibbsPolicy(representation=representation, logger=logger)

    def f(theta, s, a):
        policy.representation.theta = theta
        return np.log(policy.prob(s, a))

    def df(theta, s, a):
        policy.representation.theta = theta
        return policy.dlogpi(s, a)

    def df_approx(theta, s, a):
        return approx_fprime(theta, f, 1e-10, s, a)

    thetas = np.random.rand(10, len(representation.theta))
    for i in range(10):
        s = np.array([np.random.randint(4), np.random.randint(5)])
        a = np.random.choice(domain.possibleActions(s))
        for theta in thetas:
            # print "s", s
            # print "a", a
            # print "f", f(theta, s, a)
            # print "df", df(theta, s, a)
            # print "df_approx", df_approx(theta, s, a)
            error = check_grad(f, df, theta, s, a)
            print error
            assert np.abs(error) < 1e-6
Exemplo n.º 2
0
    def makeComponents(self):
        map_type = str(self.lstMap.currentItem().text())
        noise = self.spNoise.value()
        maze = os.path.join(GridWorld.default_map_dir, map_type + '.txt')
        domain = GridWorld(maze, noise=noise)
        domain.GOAL_REWARD = self.spGoalReward.value()
        domain.PIT_REWARD = self.spPitReward.value()
        domain.STEP_REWARD = self.spStepReward.value()

        representation = RepresentationFactory.get(
            config=self.representationConfig,
            name=str(self.lstRepresentation.currentItem().text()),
            domain=domain)

        policy = PolicyFactory.get(config=self.policyConfig,
                                   name=str(
                                       self.lstPolicy.currentItem().text()),
                                   representation=representation)

        agent = AgentFactory.get(config=self.agentConfig,
                                 name=str(self.lstAgent.currentItem().text()),
                                 representation=representation,
                                 policy=policy)

        return domain, agent
Exemplo n.º 3
0
def test_bag_creation():
    """
    Ensure create appropriate # of conjunctions, that they have been 
    instantiated properly, and there are no duplicates. 
    """
    mapDir = os.path.join(__rlpy_location__, "Domains", "GridWorldMaps")
    mapname=os.path.join(mapDir, "4x5.txt") # expect 4*5 = 20 states
    domain = GridWorld(mapname=mapname)
    
    initial_representation = IndependentDiscretization(domain)
    maxBatchDiscovery=np.inf
    batchThreshold=1e-10
    discretization=20
    bagSize=100000 # We add all possible features
    
    rep = OMPTD(domain, initial_representation, discretization, 
                maxBatchDiscovery, batchThreshold, bagSize, sparsify=False)
    assert rep.totalFeatureSize == 9+20
    assert rep.features_num == 9
    
    # Compute full (including non-discovered) feature vec for a few states
    states = np.array([[0,0], [0,1], [1,0], [1,1]])
    s0_unused = domain.s0() # just to initialize domain.state, etc
    rep.calculateFullPhiNormalized(states)
    phi_states = rep.fullphi
    phi_states[phi_states>0] = True
    true_phi_s1 = np.zeros(len(phi_states[0,:]))
    true_phi_s1[0] = True
    true_phi_s1[4] = True # TODO - could be [4] depending on axes, check.
    true_phi_s1[9] = True # The conjunction of [0,0]
    assert np.all(true_phi_s1 == phi_states[0,:]) # expected feature vec returned
    assert sum(phi_states[0,:]) == 3 # 2 original basic feats and 1 conjunction
    
    true_phi_s2 = np.zeros(len(phi_states[0,:]))
    true_phi_s2[0] = True
    true_phi_s2[5] = True # TODO - could be [4] depending on axes, check.
    true_phi_s2[10] = True # The conjunction of [0,0]
    assert np.all(true_phi_s2 == phi_states[1,:]) # expected feature vec returned
    assert sum(phi_states[1,:]) == 3 # 2 original basic feats and 1 conjunction
    
    true_phi_s3 = np.zeros(len(phi_states[0,:]))
    true_phi_s3[1] = True
    true_phi_s3[4] = True # TODO - could be [4] depending on axes, check.
    true_phi_s3[14] = True # The conjunction of [0,0]
    assert np.all(true_phi_s3 == phi_states[2,:]) # expected feature vec returned
    assert sum(phi_states[2,:]) == 3 # 2 original basic feats and 1 conjunction
    
    true_phi_s4 = np.zeros(len(phi_states[0,:]))
    true_phi_s4[1] = True
    true_phi_s4[5] = True # TODO - could be [4] depending on axes, check.
    true_phi_s4[15] = True # The conjunction of [0,0]
    assert np.all(true_phi_s4 == phi_states[3,:]) # expected feature vec returned
    assert sum(phi_states[3,:]) == 3 # 2 original basic feats and 1 conjunction
Exemplo n.º 4
0
def test_batch_discovery():
    """
    Test feature discovery from features available in bag, and that appropriate
    feats are activiated in later calls to phi_nonterminal()
    
    """
    mapDir = os.path.join(__rlpy_location__, "Domains", "GridWorldMaps")
    mapname = os.path.join(mapDir, "4x5.txt")  # expect 4*5 = 20 states
    domain = GridWorld(mapname=mapname)

    s0_unused = domain.s0()  # just to initialize domain.state, etc

    initial_representation = IndependentDiscretization(domain)
    maxBatchDiscovery = np.inf
    batchThreshold = 1e-10
    discretization = 20
    bagSize = 100000  # We add all possible features

    rep = OMPTD(domain,
                initial_representation,
                discretization,
                maxBatchDiscovery,
                batchThreshold,
                bagSize,
                sparsify=False)
    states = np.array([[0, 0], [0, 2]])
    activePhi_s1 = rep.phi_nonTerminal(states[0, :])
    activePhi_s2 = rep.phi_nonTerminal(states[1, :])
    phiMatr = np.zeros((2, len(activePhi_s1)))
    phiMatr[0, :] = activePhi_s1
    phiMatr[1, :] = activePhi_s2
    td_errors = np.array([2, 5])
    flagAddedFeat = rep.batchDiscover(td_errors, phiMatr, states)
    assert flagAddedFeat  # should have added at least one
    assert rep.selectedFeatures[-1] == 9  # feat conj that yields state [0,2]
    assert rep.selectedFeatures[-2] == 11  # feat conj that yields state [0,0]

    # Ensure that discovered features are now active
    true_phi_s1 = np.zeros(rep.features_num)
    true_phi_s1[0] = True
    true_phi_s1[4] = True  # TODO - could be [4] depending on axes, check.
    true_phi_s1[10] = True  # The conjunction of [0,0]
    assert np.all(true_phi_s1 == rep.phi_nonTerminal(states[0, :]))

    true_phi_s2 = np.zeros(rep.features_num)
    true_phi_s2[0] = True
    true_phi_s2[6] = True  # TODO - could be [4] depending on axes, check.
    true_phi_s2[
        9] = True  # The conjunction of [0,2] [[note actual id is 11, but in index 10]]
    assert np.all(true_phi_s2 == rep.phi_nonTerminal(states[1, :]))
Exemplo n.º 5
0
def make_experiment(exp_id=1, path="./Results/Tutorial/gridworld-IncrTabularTut"):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """
    opt = {}
    opt["exp_id"] = exp_id

    ## Domain:
    maze = os.path.join(GridWorld.default_map_dir, '4x5.txt')
    domain = GridWorld(maze, noise=0.3)
    opt["domain"] = domain

    ## Representation
    # discretization only needed for continuous state spaces, discarded otherwise
    representation  = IncrTabularTut(domain)

    ## Policy
    policy = eGreedy(representation, epsilon=0.2)

    ## Agent
    opt["agent"] = SARSA(representation=representation, policy=policy,
                  discount_factor=domain.discount_factor,
                       learn_rate=0.1)
    opt["checks_per_policy"] = 100
    opt["max_steps"] = 2000
    opt["num_policy_checks"] = 10
    experiment = Experiment(**opt)
    return experiment
Exemplo n.º 6
0
def test_cell_expansion():
    """ Ensure start with 0 cells, add one for each state uniquely. """
    mapDir = os.path.join(__rlpy_location__, "Domains", "GridWorldMaps")
    mapname = os.path.join(mapDir, "4x5.txt")  # expect 4*5 = 20 states
    domain = GridWorld(mapname=mapname)

    rep = IncrementalTabular(domain, discretization=100)
    assert rep.features_num == 0  # start with 0 cells
    sOrigin = np.array([0, 0])
    s2 = np.array([1, 2])
    terminal = False  # nonterminal state
    a = 1  # arbitrary action

    # Expect to add feats for these newly seen states
    numAdded = rep.pre_discover(sOrigin, terminal, a, s2, terminal)
    assert numAdded == 2
    assert rep.features_num == 2
    phiVecOrigin = rep.phi(sOrigin, terminal)
    phiVec2 = rep.phi(s2, terminal)
    assert sum(phiVecOrigin) == 1
    assert sum(phiVec2) == 1
    phiVecOrigin2 = rep.phi(np.array([0, 0]), terminal=False)
    assert rep.features_num == 2  # didn't duplicate the feature
    assert sum(phiVecOrigin2) == 1

    # Make sure we dont duplicate feats anywhere
    numAdded = rep.pre_discover(np.array([0, 0]), terminal, a, s2, terminal)
    assert numAdded == 0
    assert rep.features_num == 2
Exemplo n.º 7
0
def make_experiment(exp_id=1, path="./Results/Tutorial/gridworld-qlearning"):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path

    # Domain:
    maze = os.path.join(GridWorld.default_map_dir, '4x5.txt')
    domain = GridWorld(maze, noise=0.3)
    opt["domain"] = domain

    # Representation
    representation = Tabular(domain, discretization=20)

    # Policy
    policy = eGreedy(representation, epsilon=0.2)

    # Agent
    opt["agent"] = Q_Learning(representation=representation, policy=policy,
                       discount_factor=domain.discount_factor,
                       initial_learn_rate=0.1,
                       learn_rate_decay_mode="boyan", boyan_N0=100,
                       lambda_=0.)
    opt["checks_per_policy"] = 100
    opt["max_steps"] = 2000
    opt["num_policy_checks"] = 10
    experiment = Experiment(**opt)
    return experiment
Exemplo n.º 8
0
def test_number_of_cells():
    """ Ensure create appropriate # of cells (despite ``discretization``) """
    mapDir = os.path.join(__rlpy_location__, "Domains", "GridWorldMaps")
    mapname = os.path.join(mapDir, "4x5.txt")  # expect 4*5 = 20 states
    domain = GridWorld(mapname=mapname)

    memory = 30  # Determines number of feats; it is the size of cache
    num_tilings = [
        2,
    ]  # has 2 tilings
    resolutions = [
        4,
    ]  # resolution of staterange / 4
    dimensions = [
        [0, 1],
    ]  # tiling over dimensions 0 and 1

    rep = TileCoding(domain,
                     memory,
                     num_tilings,
                     resolutions,
                     resolution_matrix=None,
                     dimensions=dimensions,
                     safety="super")  # super safety prevents any collisions
    assert rep.features_num == memory
Exemplo n.º 9
0
def make_experiment(
        exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/"):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """

    # Experiment variables
    opt = {}
    opt["exp_id"] = exp_id
    opt["max_steps"] = 10000
    opt["num_policy_checks"] = 10

    # Domain:
    # MAZE                = '/Domains/GridWorldMaps/1x3.txt'
    maze = os.path.join(GridWorld.default_map_dir, '4x5.txt')
    domain = GridWorld(maze, noise=0.3)
    opt["domain"] = domain

    # Representation
    representation = Tabular(domain, discretization=20)

    # Policy
    policy = GibbsPolicy(representation)

    # Agent
    opt["agent"] = NaturalActorCritic(policy, representation, domain.discount_factor,
                               0.3, 100, 1000, .7, 0.1)

    experiment = Experiment(**opt)
    return experiment
Exemplo n.º 10
0
def make_experiment(exp_id=1, path="./Results/Temp", show=False):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """

    # Domain:
    # MAZE                = '/Domains/GridWorldMaps/1x3.txt'
    maze = os.path.join(GridWorld.default_map_dir, '4x5.txt')
    domain = GridWorld(maze, noise=0.3)

    # Representation
    representation = Tabular(domain, discretization=20)

    # MDP Solver
    agent = PolicyIteration(exp_id,
                            representation,
                            domain,
                            project_path=path,
                            show=show)

    return MDPSolverExperiment(agent, domain)
Exemplo n.º 11
0
def test_batch_discovery():
    """
    Test feature discovery from features available in bag, and that appropriate
    feats are activiated in later calls to phi_nonterminal()
    
    """
    mapDir = os.path.join(__rlpy_location__, "Domains", "GridWorldMaps")
    mapname=os.path.join(mapDir, "4x5.txt") # expect 4*5 = 20 states
    domain = GridWorld(mapname=mapname)
    
    s0_unused = domain.s0() # just to initialize domain.state, etc
    
    initial_representation = IndependentDiscretization(domain)
    maxBatchDiscovery=np.inf
    batchThreshold=1e-10
    discretization=20
    bagSize=100000 # We add all possible features
    
    rep = OMPTD(domain, initial_representation, discretization, 
                maxBatchDiscovery, batchThreshold, bagSize, sparsify=False)
    states = np.array([[0,0], [0,2]])
    activePhi_s1 = rep.phi_nonTerminal(states[0,:])
    activePhi_s2 = rep.phi_nonTerminal(states[1,:])
    phiMatr = np.zeros(( 2, len(activePhi_s1) ))
    phiMatr[0,:] = activePhi_s1
    phiMatr[1,:] = activePhi_s2
    td_errors = np.array([2, 5])
    flagAddedFeat = rep.batchDiscover(td_errors, phiMatr, states)
    assert flagAddedFeat # should have added at least one
    assert rep.selectedFeatures[-1] == 9 # feat conj that yields state [0,2]
    assert rep.selectedFeatures[-2] == 11 # feat conj that yields state [0,0]
    
    # Ensure that discovered features are now active
    true_phi_s1 = np.zeros(rep.features_num)
    true_phi_s1[0] = True
    true_phi_s1[4] = True # TODO - could be [4] depending on axes, check.
    true_phi_s1[10] = True # The conjunction of [0,0]
    assert np.all(true_phi_s1 == rep.phi_nonTerminal(states[0,:]))
    
    true_phi_s2 = np.zeros(rep.features_num)
    true_phi_s2[0] = True
    true_phi_s2[6] = True # TODO - could be [4] depending on axes, check.
    true_phi_s2[9] = True # The conjunction of [0,2] [[note actual id is 11, but in index 10]]
    assert np.all(true_phi_s2 == rep.phi_nonTerminal(states[1,:]))
    
Exemplo n.º 12
0
def test_number_of_cells():
    """ Ensure create appropriate # of cells (despite ``discretization``) """
    mapDir = os.path.join(__rlpy_location__, "Domains", "GridWorldMaps")
    mapname=os.path.join(mapDir, "4x5.txt") # expect 4*5 = 20 states
    domain = GridWorld(mapname=mapname)
    
    rep = Tabular(domain, discretization=100)
    assert rep.features_num == 20
    rep = Tabular(domain, discretization=5)
    assert rep.features_num == 20
Exemplo n.º 13
0
def make_experiment(exp_id=1, path="./Results/Temp"):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """

    # Experiment variables
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 10000000
    opt["num_policy_checks"] = 50

    # Logging

    # Domain:
    # MAZE                = '/Domains/GridWorldMaps/1x3.txt'
    maze = os.path.join(GridWorld.default_map_dir, 'large_state.txt')
    domain = GridWorld(maze, noise=0.3)
    opt["domain"] = domain

    # Representation
    discover_threshold = 1.
    lambda_ = 0.3
    initial_learn_rate = 0.11
    boyan_N0 = 100

    initial_rep = IndependentDiscretization(domain)
    representation = iFDDK(domain,
                           discover_threshold,
                           initial_rep,
                           sparsify=True,
                           useCache=True,
                           lazy=True,
                           lambda_=lambda_)

    # Policy
    policy = eGreedyDecay(representation, epsilonInit=0.9)

    # Agent
    opt["agent"] = Q_Learning(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=lambda_,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)

    experiment = Experiment(**opt)
    return experiment
Exemplo n.º 14
0
    def makeComponents(self):
        map_type = str(self.lstMap.currentItem().text())
        noise = float(self.spNoise.value())
        maze = os.path.join(GridWorld.default_map_dir, map_type+'.txt')
        domain = GridWorld(maze, noise=noise)
        domain.GOAL_REWARD = float(self.spGoalReward.value())
        domain.PIT_REWARD = float(self.spPitReward.value())
        domain.STEP_REWARD = float(self.spStepReward.value())

        representation = RepresentationFactory.get(config=self.representationConfig,
            name=str(self.lstRepresentation.currentItem().text()),
            domain=domain)

        policy = PolicyFactory.get(config=self.policyConfig,
            name=str(self.lstPolicy.currentItem().text()),
            representation=representation)

        agent = AgentFactory.get(config=self.agentConfig,
            name=str(self.lstAgent.currentItem().text()),
            representation=representation,
            policy=policy)

        return domain, agent
Exemplo n.º 15
0
def test_phi_cells():
    """ Ensure correct features are activated for corresponding state """
    mapDir = os.path.join(__rlpy_location__, "Domains", "GridWorldMaps")
    mapname = os.path.join(mapDir, "4x5.txt")  # expect 4*5 = 20 states
    domain = GridWorld(mapname=mapname)

    rep = IndependentDiscretization(domain)

    for r in np.arange(4):
        for c in np.arange(5):
            phiVec = rep.phi(np.array([r, c]), terminal=False)
            assert sum(phiVec) == 2  # 1 for each dimension
            assert phiVec[r] == 1  # correct row activated
            assert phiVec[4 + c] == 1  # correct col activated
Exemplo n.º 16
0
def test_phi_cells():
    """ Ensure correct feature is activated for corresponding state """
    mapDir = os.path.join(__rlpy_location__, "Domains", "GridWorldMaps")
    mapname=os.path.join(mapDir, "4x5.txt") # expect 4*5 = 20 states
    domain = GridWorld(mapname=mapname)
    
    # Allow internal represnetation to change -- just make sure each state has
    # a unique id that is consistently activated.
    rep = Tabular(domain)
    seenStates = -1 * np.ones(rep.features_num)
    for r in np.arange(4):
        for c in np.arange(5):
            phiVec = rep.phi(np.array([r,c]), terminal=False)
            assert sum(phiVec) == 1 # only 1 active feature
            activeInd = np.where(phiVec > 0)
            assert seenStates[activeInd] != True # havent seen it before
            seenStates[activeInd] = True
    assert np.all(seenStates == True) # we've covered all states
Exemplo n.º 17
0
def make_experiment(exp_id=2,
					path="./Results/MetaRLSarsa",
					boyan_N0=680.715, 
					discount_factor=0.9,
					initial_learn_rate=1,
					lambda_=0.106):
	opt = {}
	opt["path"] = path
	opt["exp_id"] = exp_id
	opt["max_steps"] = 50000
	opt["num_policy_checks"] = 50
	opt["checks_per_policy"] = 100
	# start_at = np.array([4, 5])

	# Logging

	# Domain:
	# MAZE                = '/Domains/GridWorldMaps/1x3.txt'
	map_dir = os.path.expanduser("~/work/clipper/models/rl/GridworldMaps/")
	maze = os.path.join(map_dir, "12x12-Bridge.txt")

	print maze
	domain = GridWorld(maze, 
									# random_start=True, 
									noise=0.1,
									# start_at=np.array([4,6])
									)	

	representation = Tabular(domain)
	policy = eGreedy(representation, epsilon=0.3)
	opt['agent'] = Q_Learning(representation=representation,
							 policy=policy,
							 learn_rate_decay_mode="boyan",
							 boyan_N0=boyan_N0,
							 lambda_=lambda_,
							 initial_learn_rate=initial_learn_rate,
							 discount_factor=discount_factor)
	opt['domain'] = domain
	experiment = Experiment(**opt)

	# print opt

	return experiment
Exemplo n.º 18
0
def make_experiment(batch_id, exp_id, grid, max_steps=10000, weight_vec=None):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """
    path = ('./Results/Experiment%d' % batch_id)
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path

    # Domain:
    domain = GridWorld(grid, noise=0.3)
    opt["domain"] = domain

    # Representation
    representation = Tabular(domain, discretization=20)

    # Policy
    policy = eGreedy(representation, epsilon=0.2)

    # Agent
    opt["agent"] = Q_Learning(representation=representation,
                              policy=policy,
                              discount_factor=domain.discount_factor,
                              initial_learn_rate=0.1,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=100,
                              lambda_=0)
    opt["checks_per_policy"] = 10
    opt["max_steps"] = max_steps
    opt["num_policy_checks"] = 1
    experiment = Experiment(**opt)

    # Insert weight vector as start point if provided.
    if weight_vec != None:
        representation.weight_vec = weight_vec
    return experiment
Exemplo n.º 19
0
def make_experiment(exp_id=1, path="./Results/Temp"):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """

    # Experiment variables
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 10000
    opt["num_policy_checks"] = 10
    opt["checks_per_policy"] = 50

    # Logging

    # Domain:
    # MAZE                = '/Domains/GridWorldMaps/1x3.txt'
    maze = os.path.join('.', '4x5.txt')
    domain = GridWorld(maze, noise=0.3)
    opt["domain"] = domain

    # Representation
    representation = Tabular(domain)

    # Policy
    policy = eGreedy(representation, epsilon=0.1)

    # Agent
    opt["agent"] = LSPI(policy, representation, domain.discount_factor,
                        opt["max_steps"], 1000)

    experiment = Experiment(**opt)
    return experiment
Exemplo n.º 20
0
from rlpy.Agents import Agent, LSPI, NaturalActorCritic, Q_Learning
from rlpy.Domains import GridWorld
from rlpy.Representations import iFDDK, IndependentDiscretization, Tabular
from rlpy.Policies import eGreedy, GibbsPolicy
import os
from typing import Optional

from common import run_cli

MAZE = os.path.join(GridWorld.default_map_dir, '11x11-Rooms.txt')
DOMAIN = GridWorld(MAZE, noise=0.3)
MAX_STEPS = 10000


def select_agent(name: Optional[str], _seed: int) -> Agent:
    tabular = Tabular(DOMAIN, discretization=20)
    if name is None or name == 'lspi':
        policy = eGreedy(tabular, epsilon=0.1)
        return LSPI(policy, tabular, DOMAIN.discount_factor, MAX_STEPS, 1000)
    elif name == 'nac':
        return NaturalActorCritic(GibbsPolicy(tabular),
                                  tabular,
                                  DOMAIN.discount_factor,
                                  forgetting_rate=0.3,
                                  min_steps_between_updates=100,
                                  max_steps_between_updates=1000,
                                  lambda_=0.7,
                                  learn_rate=0.1)
    elif name == 'tabular-q':
        return Q_Learning(
            eGreedy(tabular, epsilon=0.1),
Exemplo n.º 21
0
def test_bag_creation():
    """
    Ensure create appropriate # of conjunctions, that they have been 
    instantiated properly, and there are no duplicates. 
    """
    mapDir = os.path.join(__rlpy_location__, "Domains", "GridWorldMaps")
    mapname = os.path.join(mapDir, "4x5.txt")  # expect 4*5 = 20 states
    domain = GridWorld(mapname=mapname)

    initial_representation = IndependentDiscretization(domain)
    maxBatchDiscovery = np.inf
    batchThreshold = 1e-10
    discretization = 20
    bagSize = 100000  # We add all possible features

    rep = OMPTD(domain,
                initial_representation,
                discretization,
                maxBatchDiscovery,
                batchThreshold,
                bagSize,
                sparsify=False)
    assert rep.totalFeatureSize == 9 + 20
    assert rep.features_num == 9

    # Compute full (including non-discovered) feature vec for a few states
    states = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
    s0_unused = domain.s0()  # just to initialize domain.state, etc
    rep.calculateFullPhiNormalized(states)
    phi_states = rep.fullphi
    phi_states[phi_states > 0] = True
    true_phi_s1 = np.zeros(len(phi_states[0, :]))
    true_phi_s1[0] = True
    true_phi_s1[4] = True  # TODO - could be [4] depending on axes, check.
    true_phi_s1[9] = True  # The conjunction of [0,0]
    assert np.all(
        true_phi_s1 == phi_states[0, :])  # expected feature vec returned
    assert sum(
        phi_states[0, :]) == 3  # 2 original basic feats and 1 conjunction

    true_phi_s2 = np.zeros(len(phi_states[0, :]))
    true_phi_s2[0] = True
    true_phi_s2[5] = True  # TODO - could be [4] depending on axes, check.
    true_phi_s2[10] = True  # The conjunction of [0,0]
    assert np.all(
        true_phi_s2 == phi_states[1, :])  # expected feature vec returned
    assert sum(
        phi_states[1, :]) == 3  # 2 original basic feats and 1 conjunction

    true_phi_s3 = np.zeros(len(phi_states[0, :]))
    true_phi_s3[1] = True
    true_phi_s3[4] = True  # TODO - could be [4] depending on axes, check.
    true_phi_s3[14] = True  # The conjunction of [0,0]
    assert np.all(
        true_phi_s3 == phi_states[2, :])  # expected feature vec returned
    assert sum(
        phi_states[2, :]) == 3  # 2 original basic feats and 1 conjunction

    true_phi_s4 = np.zeros(len(phi_states[0, :]))
    true_phi_s4[1] = True
    true_phi_s4[5] = True  # TODO - could be [4] depending on axes, check.
    true_phi_s4[15] = True  # The conjunction of [0,0]
    assert np.all(
        true_phi_s4 == phi_states[3, :])  # expected feature vec returned
    assert sum(
        phi_states[3, :]) == 3  # 2 original basic feats and 1 conjunction