def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=136, lambda_=0.0985, initial_learn_rate=0.090564, resolution=13., num_rbfs=9019): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 150000 opt["num_policy_checks"] = 30 opt["checks_per_policy"] = 1 domain = HIVTreatment() opt["domain"] = domain representation = NonparametricLocalBases(domain, kernel=linf_triangle_kernel, resolution=resolution, normalization=True) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_LEARNING(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", lambda_=0.9, boyan_N0=22.36, initial_learn_rate=.068, discretization=9): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 150000 opt["num_policy_checks"] = 30 opt["checks_per_policy"] = 1 domain = HIVTreatment() opt["domain"] = domain representation = IndependentDiscretization(domain, discretization=discretization) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_Learning(policy, representation, discount_factor=domain.discount_factor, lambda_=0.9, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=136, lambda_=0.0985, initial_learn_rate=0.090564, resolution=13., num_rbfs=9019): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 150000 opt["num_policy_checks"] = 30 opt["checks_per_policy"] = 1 domain = HIVTreatment() opt["domain"] = domain representation = RBF(domain, num_rbfs=int(num_rbfs), resolution_max=resolution, resolution_min=resolution, const_feature=False, normalize=True, seed=exp_id) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_LEARNING(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def initConfig(self): domain = HIVTreatment() kernel_resolution = 14.7920 kernel_width = (domain.statespace_limits[:, 1] - domain.statespace_limits[:, 0]) \ / kernel_resolution self.agentConfig['QLearning'] = { 'lambda': 0.9, 'gamma': 0.9, 'alpha': 0.08, 'alpha_decay_mode': 'boyan', 'boyan_N0': 238 } self.agentConfig['Sarsa'] = { 'lambda': 0.9, 'gamma': 0.9, 'alpha': 0.08, 'alpha_decay_mode': 'boyan', 'boyan_N0': 238 } self.policyConfig['eGreedy'] = {'epsilon': 0.1} self.representationConfig['IndependentDiscretization'] = { 'discretization': 9 } self.representationConfig['RBF'] = { 'num_rbfs': 9019, 'resolution_max': 13, 'resolution_min': 13 } self.representationConfig['IncrementalTabular'] = { 'discretization': 35 } self.representationConfig['KernelizediFDD'] = { 'sparsify': 1, 'kernel': gaussian_kernel, 'kernel_args': [kernel_width], 'active_threshold': 0.01, 'discover_threshold': 611850.81, 'max_active_base_feat': 10, 'max_base_feat_sim': 0.5, 'kernel_resolution': 14.7920 } self.representationConfig['iFDD'] = { 'discretization': 18, 'discover_threshold': 107091 } self.experimentConfig["maxSteps"] = 150000 self.experimentConfig["episodeCap"] = 200 self.experimentConfig["policyChecks"] = 30 self.experimentConfig["checksPerPolicy"] = 1
def makeComponents(self): domain = HIVTreatment() representation = RepresentationFactory.get( config=self.representationConfig, name=str(self.lstRepresentation.currentItem().text()), domain=domain) policy = PolicyFactory.get(config=self.policyConfig, name=str( self.lstPolicy.currentItem().text()), representation=representation) agent = AgentFactory.get(config=self.agentConfig, name=str(self.lstAgent.currentItem().text()), representation=representation, policy=policy) return domain, agent
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=107091, lambda_=0.245, boyan_N0=514, initial_learn_rate=.327, discretization=18): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 150000 opt["num_policy_checks"] = 30 opt["checks_per_policy"] = 1 sparsify = 1 domain = HIVTreatment() opt["domain"] = domain initial_rep = IndependentDiscretization(domain, discretization=discretization) representation = iFDD(domain, discover_threshold, initial_rep, sparsify=sparsify, discretization=discretization, useCache=True, iFDDPlus=True) #representation.PRINT_MAX_RELEVANCE = True policy = eGreedy(representation, epsilon=0.1) # agent = SARSA(representation,policy,domain,initial_learn_rate=initial_learn_rate, # lambda_=.0, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) opt["agent"] = Q_LEARNING(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment