Пример #1
0
def test_qlearn_valfun_chain():
    """
        Check if SARSA computes the value function of a simple Markov chain correctly.
        This only tests value function estimation, only one action possible
    """
    rep = MockRepresentation()
    pol = eGreedy(rep)
    agent = Q_Learning(pol, rep, 0.9, lambda_=0.)
    for i in xrange(1000):
        if i % 4 == 3:
            continue
        agent.learn(np.array([i % 4]), [0], 0, 1., np.array([(i + 1) % 4]), [0], 0, (i + 2) % 4 == 0)
    V_true = np.array([2.71, 1.9, 1, 0])
    np.testing.assert_allclose(rep.weight_vec, V_true)
Пример #2
0
def test_qlearn_valfun_chain():
    """
        Check if Q-Learning computes the value function of a simple Markov chain correctly.
        This only tests value function estimation, only one action possible
    """
    rep = MockRepresentation()
    pol = eGreedy(rep)
    agent = Q_Learning(pol, rep, 0.9, lambda_=0.)
    for i in xrange(1000):
        if i % 4 == 3:
            continue
        agent.learn(np.array([i % 4]), [0], 0, 1., np.array([(i + 1) % 4]),
                    [0], 0, (i + 2) % 4 == 0)
    V_true = np.array([2.71, 1.9, 1, 0])
    np.testing.assert_allclose(rep.weight_vec, V_true)
Пример #3
0
def make_experiment(exp_id=1, path="./Results/Experiments/HalfReward/" + getTimeStr() + "/"):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path

    ## Domain:
    maze = os.path.join(GridWorldInter.default_map_dir, '11x11-RoomsSeg.txt') 
    domain = GridWorldInter(maze, noise=0.01)
    opt["domain"] = domain

    ## Representation
    # discretization only needed for continuous state spaces, discarded otherwise
    representation  = Tabular(domain, discretization=20)

    ## Policy
    policy = eGreedy(representation, epsilon=0.1) ## Need to change this back, limiting noise ATM

    ## Agent
    opt["agent"] = Q_Learning(representation=representation, policy=policy,
                   discount_factor=domain.discount_factor,
                       initial_learn_rate=0.3)
    opt["checks_per_policy"] = 50
    opt["max_steps"] = 12000
    opt["num_policy_checks"] = 20
    # experiment = ExperimentDelayed(**opt)
    experiment = Experiment(**opt)
    return experiment
Пример #4
0
def make_experiment(
        exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/",
        boyan_N0=753,
        initial_learn_rate=.7,
        resolution=25.,
        num_rbfs=206.,
        lambda_=0.75):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 10000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 10

    domain = InfCartPoleBalance()
    opt["domain"] = domain
    representation = RBF(domain, num_rbfs=int(num_rbfs),
                         resolution_max=resolution, resolution_min=resolution,
                         const_feature=False, normalize=True, seed=exp_id)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_Learning(
        policy, representation, discount_factor=domain.discount_factor,
        lambda_=lambda_, initial_learn_rate=initial_learn_rate,
        learn_rate_decay_mode="boyan", boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Пример #5
0
    def runTIRL(self, N=5, w=2, pruning=0.5):
        opt = deepcopy(self.opt_template)
        dist = self.getIRLDist(N=N)
        ac = self.getTSCWaypoints(N, w, pruning)
        domain = self.createStateDomain(
            waypoints=ac,
            rewardFunction=lambda x, y, z, w: ConsumableGridWorldIRL.rewardIRL(
                x, y, z, w, dist))
        opt["domain"] = domain
        representation = IncrementalTabular(
            domain, discretization=self.env_template["discretization"])
        policy = eGreedy(representation, epsilon=self.env_template["exp"])
        opt["agent"] = Q_Learning(representation=representation,
                                  policy=policy,
                                  discount_factor=domain.discount_factor,
                                  initial_learn_rate=0.1,
                                  learn_rate_decay_mode="boyan",
                                  boyan_N0=100,
                                  lambda_=0.)

        experiment = Experiment(**opt)
        experiment.run(visualize_steps=False,
                       performance_domain=self.createStateDomain(
                           waypoints=self.env_template["consumable"]),
                       visualize_learning=False,
                       visualize_performance=0)
        experiment.save()

        return np.max(experiment.result["return"]), np.sum(
            experiment.result["return"])
Пример #6
0
    def runIRL(self, N=5):
        opt = deepcopy(self.opt_template)

        dist = self.getIRLDist(N=N)
        bdist = self.getIRLDist(N=N, rand=True)

        #print dist-bdist

        domain = self.createMarkovDomain(
            rewardFunction=lambda x, y, z, w: ConsumableGridWorldIRL.
            maxEntReward(x, y, z, w, dist - bdist))
        opt["domain"] = domain

        representation = IncrementalTabular(
            domain, discretization=self.env_template["discretization"])
        policy = eGreedy(representation, epsilon=self.env_template["exp"])
        opt["agent"] = Q_Learning(representation=representation,
                                  policy=policy,
                                  discount_factor=domain.discount_factor,
                                  initial_learn_rate=0.1,
                                  learn_rate_decay_mode="boyan",
                                  boyan_N0=100,
                                  lambda_=0.)

        performance_domain = self.createMarkovDomain()

        experiment = Experiment(**opt)
        experiment.run(visualize_steps=False,
                       performance_domain=performance_domain,
                       visualize_learning=False,
                       visualize_performance=0)
        experiment.save()

        return np.max(experiment.result["return"]), np.sum(
            experiment.result["return"])
Пример #7
0
def make_experiment(
        exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/",
        discover_threshold=0.03104970,
        lambda_=0.,
        boyan_N0=1220.247254,
        initial_learn_rate=0.27986823):
    opt = {}
    opt["exp_id"] = exp_id
    opt["max_steps"] = 100000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 1
    sparsify = 1
    ifddeps = 1e-7
    domain = BlocksWorld(blocks=6, noise=0.3, )
    opt["domain"] = domain
    initial_rep = IndependentDiscretization(domain)
    representation = iFDD(domain, discover_threshold, initial_rep,
                          sparsify=sparsify,
                          useCache=True,
                          iFDDPlus=1 - ifddeps)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_Learning(
        policy, representation,discount_factor=domain.discount_factor,
        lambda_=lambda_, initial_learn_rate=initial_learn_rate,
        learn_rate_decay_mode="boyan", boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Пример #8
0
def make_experiment(exp_id=1, path="./Results/Tutorial/gridworld-qlearning"):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path

    # Domain:
    maze = os.path.join(GridWorld.default_map_dir, '4x5.txt')
    domain = GridWorld(maze, noise=0.3)
    opt["domain"] = domain

    # Representation
    representation = Tabular(domain, discretization=20)

    # Policy
    policy = eGreedy(representation, epsilon=0.2)

    # Agent
    opt["agent"] = Q_Learning(representation=representation, policy=policy,
                       discount_factor=domain.discount_factor,
                       initial_learn_rate=0.1,
                       learn_rate_decay_mode="boyan", boyan_N0=100,
                       lambda_=0.)
    opt["checks_per_policy"] = 100
    opt["max_steps"] = 2000
    opt["num_policy_checks"] = 10
    experiment = Experiment(**opt)
    return experiment
Пример #9
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    boyan_N0=389.56,
                    lambda_=0.52738,
                    initial_learn_rate=.424409,
                    discretization=30):
    opt = {}
    opt["exp_id"] = exp_id
    opt["max_steps"] = 400000
    opt["num_policy_checks"] = 10
    opt["checks_per_policy"] = 100

    domain = PuddleGapWorld()
    opt["domain"] = domain
    representation = Tabular(domain, discretization=discretization)
    policy = eGreedy(representation, epsilon=0.1)
    # agent           = SARSA(representation,policy,domain,initial_learn_rate=1.,
    # lambda_=0., learn_rate_decay_mode="boyan", boyan_N0=100)
    opt["agent"] = Q_Learning(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=lambda_,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Пример #10
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    lambda_=0.,
                    boyan_N0=1240.89223,
                    initial_learn_rate=0.0063744503,
                    discretization=8.):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 50000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 10

    domain = InfCartPoleBalance()
    opt["domain"] = domain

    representation = IndependentDiscretization(domain,
                                               discretization=discretization)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_Learning(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=lambda_,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Пример #11
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    boyan_N0=119,
                    initial_learn_rate=.06,
                    discretization=34):

    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 30000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 1

    domain = FiniteCartPoleBalanceOriginal(good_reward=0.)
    opt["domain"] = domain
    representation = IncrementalTabular(domain, discretization=discretization)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_Learning(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=0.9,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Пример #12
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    boyan_N0=753,
                    initial_learn_rate=.7,
                    discretization=20.,
                    lambda_=0.75):
    opt = {}
    opt["exp_id"] = exp_id
    opt["max_steps"] = 5000
    opt["num_policy_checks"] = 10
    opt["checks_per_policy"] = 10

    domain = InfCartPoleBalance(episodeCap=1000)
    opt["domain"] = domain
    representation = Tabular(domain, discretization=discretization)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_Learning(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=lambda_,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Пример #13
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    lambda_=0.9,
                    boyan_N0=22.36,
                    initial_learn_rate=.068,
                    discretization=9):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 150000
    opt["num_policy_checks"] = 30
    opt["checks_per_policy"] = 1

    domain = HIVTreatment()
    opt["domain"] = domain
    representation = IndependentDiscretization(domain,
                                               discretization=discretization)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_Learning(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=0.9,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Пример #14
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    discover_threshold=1204.,
                    lambda_=0.,
                    boyan_N0=7353.2,
                    initial_learn_rate=.9712):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 500000
    opt["num_policy_checks"] = 30
    opt["checks_per_policy"] = 10
    sparsify = 1
    kappa = 1e-7
    domain = PST(NUM_UAV=4)
    opt["domain"] = domain
    initial_rep = IndependentDiscretization(domain)
    representation = iFDD(domain,
                          discover_threshold,
                          initial_rep,
                          sparsify=sparsify,
                          useCache=True,
                          iFDDPlus=1 - kappa)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_Learning(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=lambda_,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Пример #15
0
def make_experiment(
        exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/",
        discover_threshold=0.0148120884,
        lambda_=0.,
        boyan_N0=460.3858,
        initial_learn_rate=0.8014120,
        discretization=25.):
    opt = {}
    opt["exp_id"] = exp_id
    opt["max_steps"] = 50000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 10
    sparsify = True
    kappa = 1e-7
    domain = InfCartPoleBalance()
    opt["domain"] = domain
    initial_rep = IndependentDiscretization(
        domain,
        discretization=discretization)
    representation = iFDD(domain, discover_threshold, initial_rep,
                          sparsify=sparsify,
                          discretization=discretization,
                          useCache=True,
                          iFDDPlus=1 - kappa)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_Learning(policy, representation,
                       discount_factor=domain.discount_factor,
                       lambda_=lambda_,
                       initial_learn_rate=initial_learn_rate,
                       learn_rate_decay_mode="boyan", boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Пример #16
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    boyan_N0=120,
                    initial_learn_rate=.06,
                    discretization=50):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 40000
    opt["num_policy_checks"] = 10
    opt["checks_per_policy"] = 1

    domain = FiftyChain()
    opt["domain"] = domain
    representation = Tabular(domain)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_Learning(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=0.9,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Пример #17
0
def select_agent(name: Optional[str], _seed: int) -> Agent:
    tabular = Tabular(DOMAIN, discretization=20)
    if name is None or name == 'lspi':
        policy = eGreedy(tabular, epsilon=0.1)
        return LSPI(policy, tabular, DOMAIN.discount_factor, MAX_STEPS, 1000)
    elif name == 'nac':
        return NaturalActorCritic(GibbsPolicy(tabular),
                                  tabular,
                                  DOMAIN.discount_factor,
                                  forgetting_rate=0.3,
                                  min_steps_between_updates=100,
                                  max_steps_between_updates=1000,
                                  lambda_=0.7,
                                  learn_rate=0.1)
    elif name == 'tabular-q':
        return Q_Learning(
            eGreedy(tabular, epsilon=0.1),
            tabular,
            discount_factor=DOMAIN.discount_factor,
            lambda_=0.3,
            initial_learn_rate=0.11,
            learn_rate_decay_mode='boyan',
            boyan_N0=100,
        )
    elif name == 'ifddk-q':
        lambda_ = 0.3
        ifddk = iFDDK(
            DOMAIN,
            discovery_threshold=1.0,
            initial_representation=IndependentDiscretization(DOMAIN),
            sparsify=True,
            useCache=True,
            lazy=True,
            lambda_=lambda_,
        )
        return Q_Learning(
            eGreedy(ifddk, epsilon=0.1),
            ifddk,
            discount_factor=DOMAIN.discount_factor,
            lambda_=lambda_,
            initial_learn_rate=0.11,
            learn_rate_decay_mode='boyan',
            boyan_N0=100,
        )
    else:
        raise NotImplementedError()
Пример #18
0
def make_experiment(exp_id=1,
                    path="./Results/Temp",
                    initial_learn_rate=.40,
                    lambda_=0.,
                    resolution=25,
                    num_rbfs=300):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.
    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """
    # import sys
    # import os
    # cur_dir = os.path.expanduser("~/work/clipper/models/rl/")
    # sys.path.append(cur_dir)
    # from Domains import RCCarModified
    # from Policies import RCCarGreedy

    # Experiment variables
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 200000
    opt["num_policy_checks"] = 15
    opt["checks_per_policy"] = 2
    # Logging

    domain = RCCarLeftTurn(noise=0.)
    opt["domain"] = domain

    # Representation
    kernel = gaussian_kernel
    representation = RandomLocalBases(domain,
                                      gaussian_kernel,
                                      num=int(num_rbfs),
                                      normalization=True,
                                      resolution_max=resolution,
                                      seed=exp_id)

    policy = eGreedy(representation, epsilon=0.15)
    # if biasedaction > -1:
    #     print "No Random starts with biasing {}".format(i % 4)
    #     policy = BiasedGreedy(representation, epsilon=0.5, biasedaction=biasedaction)

    # Agent

    opt["agent"] = Q_Learning(policy,
                              representation,
                              domain.discount_factor,
                              initial_learn_rate=initial_learn_rate,
                              lambda_=lambda_,
                              learn_rate_decay_mode="const")

    experiment = Experiment(**opt)

    return experiment
Пример #19
0
def make_experiment(exp_id=1,
                    path="./Results/Experiments/",
                    domain_class="GridWorld",
                    mapf='9x9-2Path0.txt',
                    max_steps=5000,
                    num_policy_checks=50,
                    agent_eps=0.1,
                    env_noise=0.1,
                    seg_goal=0.8,
                    step_reward=-0.001,
                    weights=None):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path

    maze = os.path.join(GridWorldInter.default_map_dir, mapf)

    ## Domain:
    if domain_class == "GridWorld":
        domain = GridWorld(maze, noise=env_noise, step_reward=step_reward)
    elif domain_class == "GridWorldInter":
        domain = GridWorldInter(maze, noise=env_noise, new_goal=seg_goal)

    opt["domain"] = domain

    ## Representation
    # discretization only needed for continuous state spaces, discarded otherwise
    representation = Tabular(domain, discretization=20)
    if weights is not None:
        assert domain_class == "GridWorld"  ## ensure that we are transferring to right class
        representation.weight_vec = weights

    ## Policy
    policy = eGreedy(
        representation,
        epsilon=agent_eps)  ## Need to change this back, limiting noise ATM

    ## Agent
    opt["agent"] = Q_Learning(representation=representation,
                              policy=policy,
                              discount_factor=domain.discount_factor,
                              initial_learn_rate=0.3)
    opt["checks_per_policy"] = 50
    opt["max_steps"] = max_steps
    opt["num_policy_checks"] = num_policy_checks

    experiment = ExperimentSegment(**opt)
    return experiment
Пример #20
0
def make_experiment(exp_id=1, path="./Results/Temp"):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """

    # Experiment variables
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 10000000
    opt["num_policy_checks"] = 50

    # Logging

    # Domain:
    # MAZE                = '/Domains/GridWorldMaps/1x3.txt'
    maze = os.path.join(GridWorld.default_map_dir, 'large_state.txt')
    domain = GridWorld(maze, noise=0.3)
    opt["domain"] = domain

    # Representation
    discover_threshold = 1.
    lambda_ = 0.3
    initial_learn_rate = 0.11
    boyan_N0 = 100

    initial_rep = IndependentDiscretization(domain)
    representation = iFDDK(domain,
                           discover_threshold,
                           initial_rep,
                           sparsify=True,
                           useCache=True,
                           lazy=True,
                           lambda_=lambda_)

    # Policy
    policy = eGreedyDecay(representation, epsilonInit=0.9)

    # Agent
    opt["agent"] = Q_Learning(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=lambda_,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)

    experiment = Experiment(**opt)
    return experiment
Пример #21
0
def make_experiment(exp_id=1, path="./Results/Experiments/", 
                    mapf='9x9-2Path0.txt', eval_map='9x9-2Path0.txt',
                    max_eps=10000, num_policy_checks=50, checks_per_policy=50, 
                    agent_eps=0.2, env_noise=0.1, episodeCap=30, 
                    step_reward=-0.1, door_reward=0.1, weights=None):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    :param id: number used to seed the random number generators
    :param path: output directory where logs and results are stored
    :param max_eps: total number of episodes to rollout
    :param episodeCap: total number of steps to take within one episode
    """

    ##
    # from IPython.lib.pretty import pprint
    # print pprint(vars())
    ##
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path

    maze = os.path.join(GridWorldInter.default_map_dir, mapf) 
    eval_maze = os.path.join(GridWorldInter.default_map_dir, eval_map)

    ## Domain:
    domain = GridWorldTime(maze, noise=env_noise, episodeCap=episodeCap, door_reward=door_reward, step_reward=step_reward)
    eval_domain = GridWorldTime(eval_maze, noise=env_noise, episodeCap=episodeCap, step_reward=step_reward,)
        
    opt["domain"] = domain
    opt["eval_domain"] = eval_domain #TODO: Can change this implementation to have Experiment take care of running default maps

    ## Representation
    # discretization only needed for continuous state spaces, discarded otherwise
    representation = Tabular(domain, discretization=20)
    if weights is not None:
        representation.weight_vec = weights

    ## Policy
    policy = eGreedy(representation, epsilon=agent_eps) ## Need to change this back, limiting noise ATM

    ## Agent
    opt["agent"] = Q_Learning(representation=representation, policy=policy,
                   discount_factor=domain.discount_factor,
                       initial_learn_rate=0.3, learn_rate_decay_mode='const')
    opt["max_eps"] = max_eps
    opt["checks_per_policy"] = checks_per_policy
    opt["num_policy_checks"] = num_policy_checks

    experiment = ExperimentSegment(**opt)
    return experiment
Пример #22
0
def generate_meta_experiment(exp_id,
                             agent_paths,
                             path,
                             unique=True,
                             expdomain=None):
    opt = {}
    if unique:
        opt["path"] = os.path.join(path, get_time_str())
    else:
        opt["path"] = path

    opt["exp_id"] = exp_id
    opt["max_steps"] = 10000
    opt["num_policy_checks"] = 40
    opt["checks_per_policy"] = 20
    # start_at = np.array([4, 6])

    agents = load_all_agents(agent_paths,
                             pretrained=True,
                             load_confidence=True)
    for a in agents:
        a.policy.epsilon = 0
    if expdomain:
        actual_domain = expdomain(mapname=mapname,
                                  terrain_augmentation=False,
                                  noise=0.1)
    else:
        actual_domain = GridWorldMixed(mapname=mapname,
                                       terrain_augmentation=False,
                                       noise=0.1)
    domain = PolicyMixer(actual_domain, agents)
    representation = Tabular(domain)
    policy = eGreedy(representation)  # , tau=.1)
    opt['agent'] = Q_Learning(policy,
                              representation,
                              discount_factor=0.9,
                              initial_learn_rate=0.8,
                              lambda_=0.5,
                              learn_rate_decay_mode='boyan',
                              boyan_N0=2380)
    opt['domain'] = domain
    experiment = Experiment(**opt)

    path_join = lambda s: os.path.join(opt["path"], s)
    if not os.path.exists(opt["path"]):
        os.makedirs(opt["path"])

    shutil.copy(inspect.getsourcefile(inspect.currentframe()),
                path_join("experiment.py"))

    return experiment
Пример #23
0
def make_experiment(arm, exp_id=1, path="./Results/Tutorial/dvrk-planar"):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path

    #u = [{'x': 0.0381381038389, 'y': 0.0348028884984}, {'x': 0.0553447503026, 'y': 0.0523395529395}]
    u = [{
        'x': 0.0193056007411,
        'y': 0.0370999763421
    }, {
        'x': 0.0393056007411,
        'y': 0.0370999763421
    }]

    domain = DVRKPlanarDomain(arm, u[0], u[1])
    opt["domain"] = domain

    # Representation
    representation = RBF(domain,
                         num_rbfs=1000,
                         resolution_max=10,
                         resolution_min=10,
                         const_feature=False,
                         normalize=True,
                         seed=2)

    # Policy
    policy = eGreedy(representation, epsilon=0.2)

    # Agent
    opt["agent"] = Q_Learning(representation=representation,
                              policy=policy,
                              discount_factor=domain.discount_factor,
                              initial_learn_rate=0.875,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=1000,
                              lambda_=0.0)
    opt["checks_per_policy"] = 1
    opt["max_steps"] = 100
    opt["num_policy_checks"] = 10
    experiment = Experiment(**opt)
    return experiment, domain, policy, representation
Пример #24
0
def get_demonstrations(demonstration_per_policy, max_policy_iter,
                       num_policy_demo_checks, agent):
    """return demonstrations generated from the parallel parking car rlpy simulator"""
    opt = {}
    opt["exp_id"] = 1
    #    opt["path"] = "./Results/gridworld2"
    opt["checks_per_policy"] = 5
    opt["max_steps"] = 1000000
    opt["num_policy_checks"] = 1000
    exp = 0.3
    discretization = 20
    walls = [(-1, -0.3, 0.1, 0.3)]
    domain = RCIRL([(-0.1, -0.25)],
                   wallArray=walls,
                   noise=0,
                   rewardFunction=RCIRL.rcreward)
    domain.episodeCap = 200
    # Representation 10
    representation = RBF(domain,
                         num_rbfs=1000,
                         resolution_max=25,
                         resolution_min=25,
                         const_feature=False,
                         normalize=True,
                         seed=1)  #discretization=discretization)
    # Policy
    policy = eGreedy(representation, epsilon=0.3)

    # Agent
    # opt["agent"]=agent
    opt["agent"] = Q_Learning(representation=representation,
                              policy=policy,
                              discount_factor=domain.discount_factor,
                              initial_learn_rate=0.7,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=700,
                              lambda_=0.)

    opt["domain"] = domain

    pdomain = RCIRL([(-0.1, -0.25)], wallArray=walls, noise=0)

    experiment = Experiment(**opt)
    experiment.run(visualize_steps=False,
                   performance_domain=pdomain,
                   visualize_learning=False,
                   visualize_performance=1)
    # return experiment
    return map(lambda x: map(lambda y: np.array(y), x),
               experiment.all_experiment_list)
Пример #25
0
def make_experiment(exp_id=2,
					agent_paths=None,
					mapname="12x12-Bridge.txt",
					path="./Results/MetaRLSarsa",
					boyan_N0=1000, 
					discount_factor=0.8286376417073243,
					initial_learn_rate=0.5,
					lambda_=0.2):
	opt = {}
	opt["path"] = path
	opt["exp_id"] = exp_id
	opt["max_steps"] = 30000
	opt["num_policy_checks"] = 5
	opt["checks_per_policy"] = 10
	# start_at = np.array([4, 5])

	# Logging

	# Domain:
	# MAZE                = '/Domains/GridWorldMaps/1x3.txt'
	maze = os.path.join(map_dir, mapname)

	map_dir = os.path.expanduser("~/work/clipper/models/rl/GridworldMaps/")

	domain = GridWorldModified(maze, 
									# random_start=True, 
									noise=0.1,
									# start_at=np.array([4,6])
									)	

	# agent_1 = loadagent("QL") # most likely preloaded
	# agent_2 = loadagent("SARSA")
	# agent_3 = loadagent("NAC")
	# agents = [agent_1, agent_2, agent_3]
	# agents = load_all_agents(agent_paths)

	# domain = PolicyMixer(actual_domain, agents, seed=exp_id)
	representation = Tabular(domain)
	policy = eGreedy(representation, epsilon=0.3)
	opt['agent'] = Q_Learning(representation=representation,
							 policy=policy,
							 learn_rate_decay_mode="boyan",
							 boyan_N0=boyan_N0,
							 lambda_=lambda_,
							 initial_learn_rate=initial_learn_rate,
							 discount_factor=discount_factor)
	opt['domain'] = domain
	experiment = Experiment(**opt)

	return experiment
Пример #26
0
def grid_world1_reward(exp_id=2, path="./Results/gridworld1"):
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path
    opt["checks_per_policy"] = 10
    opt["max_steps"] = 150000
    opt["num_policy_checks"] = 20
    noise = 0.1
    exp = 0.3
    discretization = 400

    maze = os.path.join(ConsumableGridWorld.default_map_dir,
                        '10x7-ACC2011.txt')
    domain = ConsumableGridWorldIRL(
        [(7, 5), (1, 2)],
        mapname=maze,
        encodingFunction=lambda x: ConsumableGridWorldIRL.stateVisitEncoding(
            x, [(7, 5)]),
        noise=noise,
        binary=True)

    opt["domain"] = domain

    # Representation
    representation = Tabular(domain, discretization=discretization)

    # Policy
    policy = eGreedy(representation, epsilon=exp)

    # Agent
    opt["agent"] = Q_Learning(representation=representation,
                              policy=policy,
                              discount_factor=domain.discount_factor,
                              initial_learn_rate=0.1,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=100,
                              lambda_=0.)

    experiment = Experiment(**opt)
    experiment.run(visualize_steps=False,
                   visualize_learning=False,
                   visualize_performance=0)
    experiment.save()
    return np.max(experiment.result["return"]), np.sum(
        experiment.result["return"])
Пример #27
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    discover_threshold=8948708.75,
                    boyan_N0=627.12,
                    lambda_=0.5433,
                    initial_learn_rate=0.59812,
                    kernel_resolution=24.340):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 150000
    opt["num_policy_checks"] = 30
    opt["checks_per_policy"] = 1
    active_threshold = 0.01
    max_base_feat_sim = 0.5
    sparsify = 1

    domain = HIVTreatment()
    opt["domain"] = domain
    # domain = FiniteCartPoleBalanceModern()
    kernel_width = old_div(
        (domain.statespace_limits[:, 1] - domain.statespace_limits[:, 0]),
        kernel_resolution)
    representation = KernelizediFDD(domain,
                                    sparsify=sparsify,
                                    kernel=gaussian_kernel,
                                    kernel_args=[kernel_width],
                                    active_threshold=active_threshold,
                                    discover_threshold=discover_threshold,
                                    normalization=True,
                                    max_active_base_feat=10,
                                    max_base_feat_sim=max_base_feat_sim)
    policy = eGreedy(representation, epsilon=0.1)
    # agent           = SARSA(representation,policy,domain,initial_learn_rate=initial_learn_rate,
    # lambda_=.0, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0)
    opt["agent"] = Q_Learning(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=lambda_,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Пример #28
0
def make_experiment(exp_id=2,
					path="./Results/MetaRLSarsa",
					boyan_N0=680.715, 
					discount_factor=0.9,
					initial_learn_rate=1,
					lambda_=0.106):
	opt = {}
	opt["path"] = path
	opt["exp_id"] = exp_id
	opt["max_steps"] = 50000
	opt["num_policy_checks"] = 50
	opt["checks_per_policy"] = 100
	# start_at = np.array([4, 5])

	# Logging

	# Domain:
	# MAZE                = '/Domains/GridWorldMaps/1x3.txt'
	map_dir = os.path.expanduser("~/work/clipper/models/rl/GridworldMaps/")
	maze = os.path.join(map_dir, "12x12-Bridge.txt")

	print maze
	domain = GridWorld(maze, 
									# random_start=True, 
									noise=0.1,
									# start_at=np.array([4,6])
									)	

	representation = Tabular(domain)
	policy = eGreedy(representation, epsilon=0.3)
	opt['agent'] = Q_Learning(representation=representation,
							 policy=policy,
							 learn_rate_decay_mode="boyan",
							 boyan_N0=boyan_N0,
							 lambda_=lambda_,
							 initial_learn_rate=initial_learn_rate,
							 discount_factor=discount_factor)
	opt['domain'] = domain
	experiment = Experiment(**opt)

	# print opt

	return experiment
Пример #29
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    discover_threshold=1e6,
                    boyan_N0=5e5,
                    lambda_=0.5,
                    initial_learn_rate=0.9,
                    kernel_resolution=10):
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path
    opt["max_steps"] = 10000
    opt["num_policy_checks"] = 30
    opt["checks_per_policy"] = 1
    active_threshold = 0.01
    max_base_feat_sim = 0.5
    sparsify = 1

    domain = HelicopterHover()
    opt["domain"] = domain
    # domain = FiniteCartPoleBalanceModern()
    kernel_width = (domain.statespace_limits[:, 1] - domain.statespace_limits[:, 0]) \
        / kernel_resolution
    representation = KernelizediFDD(domain,
                                    sparsify=sparsify,
                                    kernel=linf_triangle_kernel,
                                    kernel_args=[kernel_width],
                                    active_threshold=active_threshold,
                                    discover_threshold=discover_threshold,
                                    normalization=True,
                                    max_active_base_feat=10,
                                    max_base_feat_sim=max_base_feat_sim)
    policy = eGreedy(representation, epsilon=0.1)
    # lambda_=.0, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0)
    opt["agent"] = Q_Learning(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=lambda_,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Пример #30
0
    def __init__(self, domain, representation, policy, steps=100000):

        opt = {}
        opt["domain"] = domain
        # Agent
        opt["agent"] = Q_Learning(representation=representation,
                                  policy=policy,
                                  discount_factor=domain.discount_factor,
                                  initial_learn_rate=0.1,
                                  learn_rate_decay_mode="boyan",
                                  boyan_N0=100,
                                  lambda_=0.)

        opt["checks_per_policy"] = 10
        opt["max_steps"] = steps
        opt["num_policy_checks"] = 20
        experiment = Experiment(**opt)
        experiment.run()
        self.policy = opt["agent"].policy
        self.domain = domain
Пример #31
0
def make_experiment(
        exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/",
        lambda_=0.,
        boyan_N0=143.791,
        initial_learn_rate=0.18696):
    opt = {}
    opt["exp_id"] = exp_id
    opt["max_steps"] = 100000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 5
    domain = BlocksWorld(blocks=6, noise=0.3)
    opt["domain"] = domain
    representation = IndependentDiscretization(domain)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_Learning(
        policy, representation, discount_factor=domain.discount_factor,
        lambda_=lambda_, initial_learn_rate=initial_learn_rate,
        learn_rate_decay_mode="boyan", boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment