Пример #1
0
def test_continuous_discr():
    """ Ensure correct discretization in continuous state spaces """
    # NOTE - if possible, test a domain with mixed discr/continuous
    domain = InfiniteTrackCartPole.InfTrackCartPole()  #2 continuous dims
    rep = IndependentDiscretization(domain, discretization=20)
    assert rep.features_num == 40
    rep = IndependentDiscretization(domain, discretization=50)
    assert rep.features_num == 100
Пример #2
0
def test_number_of_cells():
    """ Ensure create appropriate # of cells (despite ``discretization``) """
    mapDir = os.path.join(__rlpy_location__, "Domains", "GridWorldMaps")
    mapname = os.path.join(mapDir, "4x5.txt")  # expect 4*5 = 20 states
    domain = GridWorld(mapname=mapname)

    rep = IndependentDiscretization(domain, discretization=100)
    assert rep.features_num == 9
    rep = IndependentDiscretization(domain, discretization=5)
    assert rep.features_num == 9
Пример #3
0
def test_phi_cells():
    """ Ensure correct features are activated for corresponding state """
    mapDir = os.path.join(__rlpy_location__, "Domains", "GridWorldMaps")
    mapname=os.path.join(mapDir, "4x5.txt") # expect 4*5 = 20 states
    domain = GridWorld(mapname=mapname)

    rep = IndependentDiscretization(domain)

    for r in np.arange(4):
        for c in np.arange(5):
            phiVec = rep.phi(np.array([r,c]), terminal=False)
            assert sum(phiVec) == 2 # 1 for each dimension
            assert phiVec[r] == 1 # correct row activated
            assert phiVec[4+c] == 1 # correct col activated    
Пример #4
0
def test_phi_cells():
    """ Ensure correct features are activated for corresponding state """
    mapDir = os.path.join(__rlpy_location__, "Domains", "GridWorldMaps")
    mapname = os.path.join(mapDir, "4x5.txt")  # expect 4*5 = 20 states
    domain = GridWorld(mapname=mapname)

    rep = IndependentDiscretization(domain)

    for r in np.arange(4):
        for c in np.arange(5):
            phiVec = rep.phi(np.array([r, c]), terminal=False)
            assert sum(phiVec) == 2  # 1 for each dimension
            assert phiVec[r] == 1  # correct row activated
            assert phiVec[4 + c] == 1  # correct col activated
Пример #5
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    discover_threshold=1.0,
                    lambda_=0.,
                    boyan_N0=20.1,
                    initial_learn_rate=0.330):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 100000
    opt["num_policy_checks"] = 10
    opt["checks_per_policy"] = 1
    sparsify = 1
    ifddeps = 1e-7
    domain = IntruderMonitoring()
    opt["domain"] = domain
    initial_rep = IndependentDiscretization(domain)
    representation = iFDD(domain,
                          discover_threshold,
                          initial_rep,
                          sparsify=sparsify,
                          useCache=True,
                          iFDDPlus=1 - ifddeps)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = SARSA(policy,
                         representation,
                         discount_factor=domain.discount_factor,
                         lambda_=lambda_,
                         initial_learn_rate=initial_learn_rate,
                         learn_rate_decay_mode="boyan",
                         boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Пример #6
0
def make_experiment(exp_id=1, path="./Results/Temp"):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """

    # Experiment variables
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 10000000
    opt["num_policy_checks"] = 50

    # Logging

    # Domain:
    # MAZE                = '/Domains/GridWorldMaps/1x3.txt'
    maze = os.path.join(GridWorld.default_map_dir, 'large_state.txt')
    domain = GridWorld(maze, noise=0.3)
    opt["domain"] = domain

    # Representation
    discover_threshold = 1.
    lambda_ = 0.3
    initial_learn_rate = 0.11
    boyan_N0 = 100

    initial_rep = IndependentDiscretization(domain)
    representation = iFDDK(domain,
                           discover_threshold,
                           initial_rep,
                           sparsify=True,
                           useCache=True,
                           lazy=True,
                           lambda_=lambda_)

    # Policy
    policy = eGreedyDecay(representation, epsilonInit=0.9)

    # Agent
    opt["agent"] = Q_Learning(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=lambda_,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)

    experiment = Experiment(**opt)
    return experiment
Пример #7
0
def test_batch_discovery():
    """
    Test feature discovery from features available in bag, and that appropriate
    feats are activiated in later calls to phi_nonterminal()
    
    """
    mapDir = os.path.join(__rlpy_location__, "Domains", "GridWorldMaps")
    mapname = os.path.join(mapDir, "4x5.txt")  # expect 4*5 = 20 states
    domain = GridWorld(mapname=mapname)

    s0_unused = domain.s0()  # just to initialize domain.state, etc

    initial_representation = IndependentDiscretization(domain)
    maxBatchDiscovery = np.inf
    batchThreshold = 1e-10
    discretization = 20
    bagSize = 100000  # We add all possible features

    rep = OMPTD(domain,
                initial_representation,
                discretization,
                maxBatchDiscovery,
                batchThreshold,
                bagSize,
                sparsify=False)
    states = np.array([[0, 0], [0, 2]])
    activePhi_s1 = rep.phi_nonTerminal(states[0, :])
    activePhi_s2 = rep.phi_nonTerminal(states[1, :])
    phiMatr = np.zeros((2, len(activePhi_s1)))
    phiMatr[0, :] = activePhi_s1
    phiMatr[1, :] = activePhi_s2
    td_errors = np.array([2, 5])
    flagAddedFeat = rep.batchDiscover(td_errors, phiMatr, states)
    assert flagAddedFeat  # should have added at least one
    assert rep.selectedFeatures[-1] == 9  # feat conj that yields state [0,2]
    assert rep.selectedFeatures[-2] == 11  # feat conj that yields state [0,0]

    # Ensure that discovered features are now active
    true_phi_s1 = np.zeros(rep.features_num)
    true_phi_s1[0] = True
    true_phi_s1[4] = True  # TODO - could be [4] depending on axes, check.
    true_phi_s1[10] = True  # The conjunction of [0,0]
    assert np.all(true_phi_s1 == rep.phi_nonTerminal(states[0, :]))

    true_phi_s2 = np.zeros(rep.features_num)
    true_phi_s2[0] = True
    true_phi_s2[6] = True  # TODO - could be [4] depending on axes, check.
    true_phi_s2[
        9] = True  # The conjunction of [0,2] [[note actual id is 11, but in index 10]]
    assert np.all(true_phi_s2 == rep.phi_nonTerminal(states[1, :]))
Пример #8
0
def select_agent(name: Optional[str], _seed: int) -> Agent:
    tabular = Tabular(DOMAIN, discretization=20)
    if name is None or name == 'lspi':
        policy = eGreedy(tabular, epsilon=0.1)
        return LSPI(policy, tabular, DOMAIN.discount_factor, MAX_STEPS, 1000)
    elif name == 'nac':
        return NaturalActorCritic(GibbsPolicy(tabular),
                                  tabular,
                                  DOMAIN.discount_factor,
                                  forgetting_rate=0.3,
                                  min_steps_between_updates=100,
                                  max_steps_between_updates=1000,
                                  lambda_=0.7,
                                  learn_rate=0.1)
    elif name == 'tabular-q':
        return Q_Learning(
            eGreedy(tabular, epsilon=0.1),
            tabular,
            discount_factor=DOMAIN.discount_factor,
            lambda_=0.3,
            initial_learn_rate=0.11,
            learn_rate_decay_mode='boyan',
            boyan_N0=100,
        )
    elif name == 'ifddk-q':
        lambda_ = 0.3
        ifddk = iFDDK(
            DOMAIN,
            discovery_threshold=1.0,
            initial_representation=IndependentDiscretization(DOMAIN),
            sparsify=True,
            useCache=True,
            lazy=True,
            lambda_=lambda_,
        )
        return Q_Learning(
            eGreedy(ifddk, epsilon=0.1),
            ifddk,
            discount_factor=DOMAIN.discount_factor,
            lambda_=lambda_,
            initial_learn_rate=0.11,
            learn_rate_decay_mode='boyan',
            boyan_N0=100,
        )
    else:
        raise NotImplementedError()
Пример #9
0
def test_bag_creation():
    """
    Ensure create appropriate # of conjunctions, that they have been 
    instantiated properly, and there are no duplicates. 
    """
    mapDir = os.path.join(__rlpy_location__, "Domains", "GridWorldMaps")
    mapname = os.path.join(mapDir, "4x5.txt")  # expect 4*5 = 20 states
    domain = GridWorld(mapname=mapname)

    initial_representation = IndependentDiscretization(domain)
    maxBatchDiscovery = np.inf
    batchThreshold = 1e-10
    discretization = 20
    bagSize = 100000  # We add all possible features

    rep = OMPTD(domain,
                initial_representation,
                discretization,
                maxBatchDiscovery,
                batchThreshold,
                bagSize,
                sparsify=False)
    assert rep.totalFeatureSize == 9 + 20
    assert rep.features_num == 9

    # Compute full (including non-discovered) feature vec for a few states
    states = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
    s0_unused = domain.s0()  # just to initialize domain.state, etc
    rep.calculateFullPhiNormalized(states)
    phi_states = rep.fullphi
    phi_states[phi_states > 0] = True
    true_phi_s1 = np.zeros(len(phi_states[0, :]))
    true_phi_s1[0] = True
    true_phi_s1[4] = True  # TODO - could be [4] depending on axes, check.
    true_phi_s1[9] = True  # The conjunction of [0,0]
    assert np.all(
        true_phi_s1 == phi_states[0, :])  # expected feature vec returned
    assert sum(
        phi_states[0, :]) == 3  # 2 original basic feats and 1 conjunction

    true_phi_s2 = np.zeros(len(phi_states[0, :]))
    true_phi_s2[0] = True
    true_phi_s2[5] = True  # TODO - could be [4] depending on axes, check.
    true_phi_s2[10] = True  # The conjunction of [0,0]
    assert np.all(
        true_phi_s2 == phi_states[1, :])  # expected feature vec returned
    assert sum(
        phi_states[1, :]) == 3  # 2 original basic feats and 1 conjunction

    true_phi_s3 = np.zeros(len(phi_states[0, :]))
    true_phi_s3[1] = True
    true_phi_s3[4] = True  # TODO - could be [4] depending on axes, check.
    true_phi_s3[14] = True  # The conjunction of [0,0]
    assert np.all(
        true_phi_s3 == phi_states[2, :])  # expected feature vec returned
    assert sum(
        phi_states[2, :]) == 3  # 2 original basic feats and 1 conjunction

    true_phi_s4 = np.zeros(len(phi_states[0, :]))
    true_phi_s4[1] = True
    true_phi_s4[5] = True  # TODO - could be [4] depending on axes, check.
    true_phi_s4[15] = True  # The conjunction of [0,0]
    assert np.all(
        true_phi_s4 == phi_states[3, :])  # expected feature vec returned
    assert sum(
        phi_states[3, :]) == 3  # 2 original basic feats and 1 conjunction
Пример #10
0
def select_agent(name: Optional[str], seed: int) -> Agent:
    tabular = Tabular(DOMAIN, discretization=20)
    if name is None or name == 'tabular-lspi':
        policy = eGreedy(tabular, epsilon=0.1)
        return LSPI(policy, tabular, DOMAIN.discount_factor, MAX_STEPS, 1000)
    elif name == 'tabular-q':
        policy = eGreedy(tabular, epsilon=0.1)
        return Q_Learning(policy, tabular, DOMAIN.discount_factor, lambda_=0.3)
    elif name == 'tabular-sarsa':
        policy = eGreedy(tabular, epsilon=0.1)
        return SARSA(policy, tabular, DOMAIN.discount_factor, lambda_=0.3)
    elif name == 'ifdd-q':
        lambda_, boyan_N0 = 0.42, 202
        discretization = 18
        initial_rep = IndependentDiscretization(DOMAIN, discretization=discretization)
        ifdd = iFDD(
            DOMAIN,
            discovery_threshold=8.63917,
            initial_representation=initial_rep,
            useCache=True,
            iFDDPlus=True,
        )
        return Q_Learning(
            eGreedy(ifdd, epsilon=0.1),
            ifdd,
            discount_factor=DOMAIN.discount_factor,
            lambda_=lambda_,
            initial_learn_rate=0.7422,
            learn_rate_decay_mode='boyan',
            boyan_N0=boyan_N0,
        )
    elif name == 'kifdd-q':
        lambda_, boyan_N0 = 0.52738, 389.56
        kernel_resolution = 8.567677
        kernel_width = (DOMAIN.statespace_limits[:, 1] - DOMAIN.statespace_limits[:, 0]) \
            / kernel_resolution
        kifdd = KernelizediFDD(
            DOMAIN,
            sparsify=True,
            kernel=gaussian_kernel,
            kernel_args=[kernel_width],
            active_threshold=0.01,
            discover_threshold=0.0807,
            normalization=True,
            max_active_base_feat=10,
            max_base_feat_sim=0.5
        )
        policy = eGreedy(kifdd, epsilon=0.1)
        return Q_Learning(
            policy,
            kifdd,
            discount_factor=DOMAIN.discount_factor,
            lambda_=lambda_,
            initial_learn_rate=0.4244,
            learn_rate_decay_mode='boyan',
            boyan_N0=boyan_N0,
        )
    elif name == 'rbfs-q':
        rbf = RBF(
            DOMAIN,
            num_rbfs=96,
            resolution_max=21.0,
            resolution_min=21.0,
            const_feature=False,
            normalize=True,
            seed=seed,
        )
        policy = eGreedy(rbf, epsilon=0.1)
        return Q_Learning(
            policy,
            rbf,
            discount_factor=DOMAIN.discount_factor,
            lambda_=0.1953,
            initial_learn_rate=0.6633,
            learn_rate_decay_mode='boyan',
            boyan_N0=13444.0,
        )
    else:
        raise NotImplementedError()