Exemplo n.º 1
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    boyan_N0=136,
                    lambda_=0.0985,
                    initial_learn_rate=0.090564,
                    resolution=13.,
                    num_rbfs=9019):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 150000
    opt["num_policy_checks"] = 30
    opt["checks_per_policy"] = 1

    domain = HIVTreatment()
    opt["domain"] = domain
    representation = NonparametricLocalBases(domain,
                                             kernel=linf_triangle_kernel,
                                             resolution=resolution,
                                             normalization=True)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_LEARNING(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=lambda_,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Exemplo n.º 2
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    lambda_=0.9,
                    boyan_N0=22.36,
                    initial_learn_rate=.068,
                    discretization=9):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 150000
    opt["num_policy_checks"] = 30
    opt["checks_per_policy"] = 1

    domain = HIVTreatment()
    opt["domain"] = domain
    representation = IndependentDiscretization(domain,
                                               discretization=discretization)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_Learning(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=0.9,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Exemplo n.º 3
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    boyan_N0=136,
                    lambda_=0.0985,
                    initial_learn_rate=0.090564,
                    resolution=13.,
                    num_rbfs=9019):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 150000
    opt["num_policy_checks"] = 30
    opt["checks_per_policy"] = 1

    domain = HIVTreatment()
    opt["domain"] = domain
    representation = RBF(domain,
                         num_rbfs=int(num_rbfs),
                         resolution_max=resolution,
                         resolution_min=resolution,
                         const_feature=False,
                         normalize=True,
                         seed=exp_id)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_LEARNING(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=lambda_,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Exemplo n.º 4
0
 def initConfig(self):
     domain = HIVTreatment()
     kernel_resolution = 14.7920
     kernel_width = (domain.statespace_limits[:, 1] - domain.statespace_limits[:, 0]) \
                    / kernel_resolution
     self.agentConfig['QLearning'] = {
         'lambda': 0.9,
         'gamma': 0.9,
         'alpha': 0.08,
         'alpha_decay_mode': 'boyan',
         'boyan_N0': 238
     }
     self.agentConfig['Sarsa'] = {
         'lambda': 0.9,
         'gamma': 0.9,
         'alpha': 0.08,
         'alpha_decay_mode': 'boyan',
         'boyan_N0': 238
     }
     self.policyConfig['eGreedy'] = {'epsilon': 0.1}
     self.representationConfig['IndependentDiscretization'] = {
         'discretization': 9
     }
     self.representationConfig['RBF'] = {
         'num_rbfs': 9019,
         'resolution_max': 13,
         'resolution_min': 13
     }
     self.representationConfig['IncrementalTabular'] = {
         'discretization': 35
     }
     self.representationConfig['KernelizediFDD'] = {
         'sparsify': 1,
         'kernel': gaussian_kernel,
         'kernel_args': [kernel_width],
         'active_threshold': 0.01,
         'discover_threshold': 611850.81,
         'max_active_base_feat': 10,
         'max_base_feat_sim': 0.5,
         'kernel_resolution': 14.7920
     }
     self.representationConfig['iFDD'] = {
         'discretization': 18,
         'discover_threshold': 107091
     }
     self.experimentConfig["maxSteps"] = 150000
     self.experimentConfig["episodeCap"] = 200
     self.experimentConfig["policyChecks"] = 30
     self.experimentConfig["checksPerPolicy"] = 1
Exemplo n.º 5
0
    def makeComponents(self):
        domain = HIVTreatment()

        representation = RepresentationFactory.get(
            config=self.representationConfig,
            name=str(self.lstRepresentation.currentItem().text()),
            domain=domain)

        policy = PolicyFactory.get(config=self.policyConfig,
                                   name=str(
                                       self.lstPolicy.currentItem().text()),
                                   representation=representation)

        agent = AgentFactory.get(config=self.agentConfig,
                                 name=str(self.lstAgent.currentItem().text()),
                                 representation=representation,
                                 policy=policy)

        return domain, agent
Exemplo n.º 6
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    discover_threshold=107091,
                    lambda_=0.245,
                    boyan_N0=514,
                    initial_learn_rate=.327,
                    discretization=18):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 150000
    opt["num_policy_checks"] = 30
    opt["checks_per_policy"] = 1
    sparsify = 1
    domain = HIVTreatment()
    opt["domain"] = domain
    initial_rep = IndependentDiscretization(domain,
                                            discretization=discretization)
    representation = iFDD(domain,
                          discover_threshold,
                          initial_rep,
                          sparsify=sparsify,
                          discretization=discretization,
                          useCache=True,
                          iFDDPlus=True)
    #representation.PRINT_MAX_RELEVANCE = True
    policy = eGreedy(representation, epsilon=0.1)
    # agent           = SARSA(representation,policy,domain,initial_learn_rate=initial_learn_rate,
    # lambda_=.0, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0)
    opt["agent"] = Q_LEARNING(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=lambda_,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment